mirror of
https://github.com/samsonjs/compiler.git
synced 2026-03-25 08:45:52 +00:00
[big commit] use variable proxies to defer address calculation
This commit is contained in:
parent
19d79c8836
commit
0c21d1abc6
4 changed files with 325 additions and 109 deletions
297
asm/binary.rb
297
asm/binary.rb
|
|
@ -4,8 +4,15 @@
|
|||
#
|
||||
# sjs
|
||||
# may 2009
|
||||
#
|
||||
# Refer to the Intel[1] or AMD documentationon on x86 for explanations
|
||||
# of Mod-R/M encoding, the Scale-Index-Base (SIB) byte, opcode groups.
|
||||
#
|
||||
# The start and exit shell codes were obtained by disassembling
|
||||
# minimal binaries on the respective platforms.
|
||||
|
||||
require 'asm/asm'
|
||||
require 'asm/varproxy'
|
||||
|
||||
module Assembler
|
||||
|
||||
|
|
@ -25,8 +32,9 @@ module Assembler
|
|||
SignedInt = MinSigned..MaxSigned
|
||||
SignedByte = -128..127
|
||||
|
||||
# This is used for encoding instructions. Just as the generated asm
|
||||
# contains "BITS 32", binary is generated for 32-bit protected mode.
|
||||
# This is used for encoding instructions. Just as the equivalent
|
||||
# assembly would contain "BITS 32", binary is generated for 32-bit
|
||||
# protected mode.
|
||||
DefaultOperandSize = :dword
|
||||
|
||||
SizeMap = {:byte => 8, :word => 16, :dword => 32}
|
||||
|
|
@ -50,63 +58,150 @@ module Assembler
|
|||
]
|
||||
}
|
||||
|
||||
attr_reader :eip
|
||||
attr_reader :ip
|
||||
|
||||
def initialize(platform, symtab, objwriter)
|
||||
def initialize(platform, symtab, objwriter_class)
|
||||
super(platform)
|
||||
@symtab = symtab
|
||||
@objwriter = objwriter
|
||||
@binary = [] # Byte array of machine code.
|
||||
@eip = 0 # Our instruction pointer, or the number of bytes written.
|
||||
@objwriter_class = objwriter_class
|
||||
# @objwriter = objwriter
|
||||
|
||||
# Almost a byte array, except for addresses.
|
||||
#
|
||||
# Addresses take the form [:<type>, <name>]
|
||||
# where <type> is one of: var, const, or label
|
||||
#
|
||||
# NOTE the type is redundant because of VariableProxy#const?
|
||||
# and labels are just strings.
|
||||
#
|
||||
# however, we could accept strings for variable names
|
||||
# if we keep the type tag. something to think about.
|
||||
@ir = []
|
||||
|
||||
# Our instruction pointer, or the number of bytes written.
|
||||
@ip = 0
|
||||
|
||||
# Map locations in the byte array to var proxies so we can
|
||||
# resolve address operations on the 2nd pass.
|
||||
@proxies = {}
|
||||
|
||||
# Always include the _main entry point in our symbol table. It begins at the
|
||||
# beginning of the __TEXT segment, 0x0.
|
||||
@symtab.deflabel('_main', @eip)
|
||||
@symtab.deflabel('_main', @ip)
|
||||
|
||||
X86_start[@platform].each {|byte| emit_byte(byte)}
|
||||
end
|
||||
|
||||
def output
|
||||
resolve_labels
|
||||
blobs = X86_start[@platform] + @binary + X86_exit[@platform]
|
||||
binary = blobs.pack('c*')
|
||||
@objwriter.text(binary)
|
||||
@objwriter.const(@symtab.const_data)
|
||||
@objwriter.bss(@symtab.bss_size)
|
||||
@objwriter.symtab(@symtab)
|
||||
@objwriter.serialize
|
||||
X86_exit[@platform].each {|byte| emit_byte(byte)}
|
||||
|
||||
byte_array = resolve_labels
|
||||
|
||||
#puts "1st pass: " + byte_array.inspect if DEBUG_OUTPUT
|
||||
|
||||
binary = package(byte_array)
|
||||
|
||||
@symtab.calculate_offsets(binary.length)
|
||||
if DEBUG_OUTPUT
|
||||
puts ">>> text offset: 0x#{@symtab.text_offset.to_s(16)}"
|
||||
puts ">>> const offset: 0x#{@symtab.const_offset.to_s(16)}"
|
||||
puts ">>> bss offset: 0x#{@symtab.bss_offset.to_s(16)}"
|
||||
end
|
||||
|
||||
# Now that we know where everything lies do the 2nd pass
|
||||
# calculating and filling in final var and const addresses.
|
||||
#
|
||||
# outline:
|
||||
# - resolve all variable proxies in @proxies replacing
|
||||
# the 4 bytes (0xff) with the real address
|
||||
|
||||
bss_offset = @symtab.bss_offset
|
||||
const_offset = @symtab.const_offset
|
||||
@proxies.each do |i, proxy|
|
||||
#puts ">>> Resolving #{proxy.name}" if DEBUG_OUTPUT
|
||||
var = @symtab.var(proxy.name)
|
||||
base_addr = if proxy.const?
|
||||
const_offset + @symtab.const(proxy.name)
|
||||
else
|
||||
bss_offset + @symtab.var(proxy.name)
|
||||
end
|
||||
#puts ">>> Replacing #{byte_array[i,4].map{|x|'0x' + x.to_s(16)}.inspect} with #{num_to_quad(proxy.resolve(base_addr)).map{|x|'0x' + x.to_s(16)}.inspect}" if DEBUG_OUTPUT
|
||||
byte_array[i, 4] = num_to_quad(proxy.resolve(base_addr))
|
||||
end
|
||||
|
||||
binary = package(byte_array)
|
||||
|
||||
#puts "2nd pass: " + byte_array.inspect if DEBUG_OUTPUT
|
||||
|
||||
objwriter = @objwriter_class.new
|
||||
objwriter.text(binary)
|
||||
objwriter.const(@symtab.const_data) if @symtab.const_size > 0
|
||||
objwriter.bss(@symtab.bss_size) if @symtab.bss_size > 0
|
||||
objwriter.reloc(@symtab.reloc_info)
|
||||
objwriter.symtab(@symtab)
|
||||
objwriter.serialize
|
||||
end
|
||||
|
||||
def resolve_labels
|
||||
bytes_read = 0
|
||||
@binary.each_with_index do |x, i|
|
||||
bytes = []
|
||||
@ir.each_with_index do |x, i|
|
||||
if x.is_a?(Numeric)
|
||||
bytes << x
|
||||
bytes_read += 1
|
||||
|
||||
elsif addr?(x)
|
||||
@binary[i, 1] = x[1..-1]
|
||||
bytes_read += 1
|
||||
# remember this so we can replace the bogus addr later
|
||||
@proxies[bytes_read] = x[1]
|
||||
|
||||
else # label to resolve
|
||||
# add a relocation entry for this address
|
||||
@symtab.reloc(bytes_read)
|
||||
|
||||
# fill in said bogus addr
|
||||
bytes += [0xff, 0xff, 0xff, 0xff]
|
||||
|
||||
bytes_read += 4
|
||||
|
||||
|
||||
# TODO find out if we should calculate addrs as offsets rather than
|
||||
# absolute as they are done now. (ok for Mach-O, maybe not ELF)
|
||||
elsif label?(x)
|
||||
# the actual eip points to the next instruction already, so should we.
|
||||
real_eip = bytes_read + 4
|
||||
addr = @symtab.lookup_label(x) - real_eip # dest - src to get relative addr
|
||||
puts "resolved label: #{x} = 0x#{@symtab.lookup_label(x).to_s(16)} (rel: 0x#{addr.to_s(16)}, eip = 0x#{real_eip.to_s(16)}, bytes_read = 0x#{bytes_read.to_s(16)})" if DEBUG_OUTPUT
|
||||
@binary[i, 1] = num_to_quad(addr)
|
||||
# count the first byte just written, the rest are counted normally
|
||||
bytes_read += 1
|
||||
real_ip = bytes_read + 4
|
||||
name = x[1]
|
||||
addr = @symtab.lookup_label(name) - real_ip # dest - src to get relative addr
|
||||
#puts "resolved label: #{x} = 0x#{@symtab.lookup_label(name).to_s(16)} (rel: 0x#{addr.to_s(16)}, ip = 0x#{real_ip.to_s(16)}, bytes_read = 0x#{bytes_read.to_s(16)})" if DEBUG_OUTPUT
|
||||
|
||||
|
||||
bytes += num_to_quad(addr)
|
||||
bytes_read += 4
|
||||
|
||||
else
|
||||
raise "unknown value in the IR at #{bytes_read} - #{x.inspect}"
|
||||
end
|
||||
end
|
||||
|
||||
return bytes
|
||||
end
|
||||
|
||||
def package(bytes)
|
||||
bytes.pack('c*')
|
||||
end
|
||||
|
||||
# Silly semantics, but labels don't count as an address since they
|
||||
# don't need to be deferred.
|
||||
def addr?(x)
|
||||
x.is_a?(Array) && x[0] == :addr
|
||||
end
|
||||
|
||||
def addr_size(addr)
|
||||
addr.length - 1
|
||||
x.is_a?(Array) && [:var, :const].include?(x[0])
|
||||
end
|
||||
|
||||
def label?(x)
|
||||
x.is_a?(Array) && x[0] == :label
|
||||
end
|
||||
|
||||
# XXX this should probably evaluate the value somehow
|
||||
def defconst(name, bytes, value)
|
||||
@symtab.defconst(name, bytes, value)
|
||||
return const(name)
|
||||
end
|
||||
|
||||
# Define a variable with the given name and size in bytes.
|
||||
|
|
@ -116,27 +211,49 @@ module Assembler
|
|||
else
|
||||
STDERR.puts "[warning] attempted to redefine #{name}"
|
||||
end
|
||||
return var(name)
|
||||
end
|
||||
|
||||
# These methods are all delegated to the symbol table.
|
||||
%w[var var? const const?].each do |method|
|
||||
define_method(method) do |name|
|
||||
@symtab.send(method, name)
|
||||
def var(name)
|
||||
STDERR.puts "[error] undefined variable #{name}" unless var?(name)
|
||||
# TODO bail on undefined vars
|
||||
VariableProxy.new(name)
|
||||
end
|
||||
|
||||
def const(name)
|
||||
STDERR.puts "[error] undefined variable #{name}" unless const?(name)
|
||||
# TODO bail on undefined consts
|
||||
VariableProxy.new(name, true)
|
||||
end
|
||||
|
||||
def var?(name)
|
||||
@symtab.var?(name)
|
||||
end
|
||||
|
||||
def const?(name)
|
||||
@symtab.const?(name)
|
||||
end
|
||||
|
||||
# Define a variable unless it exists.
|
||||
def var!(name, bytes=4)
|
||||
if var?(name)
|
||||
var(name)
|
||||
else
|
||||
defvar(name, bytes)
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
# Count the bytes that were encoded in the given block.
|
||||
def asm
|
||||
# stash the current number of bytes written
|
||||
instruction_offset = @eip
|
||||
instruction_offset = @ip
|
||||
|
||||
print "0x#{@eip.to_s(16).rjust(4, '0')}\t" if DEBUG_OUTPUT
|
||||
print "0x#{@ip.to_s(16).rjust(4, '0')}\t" if DEBUG_OUTPUT
|
||||
|
||||
yield
|
||||
|
||||
# return the number of bytes written
|
||||
@eip - instruction_offset
|
||||
@ip - instruction_offset
|
||||
|
||||
puts if DEBUG_OUTPUT
|
||||
end
|
||||
|
|
@ -160,26 +277,38 @@ module Assembler
|
|||
# make sure it's a byte
|
||||
raise "not a byte: #{byte.inspect}" unless byte == byte & 0xff
|
||||
|
||||
byte = byte & 0xff
|
||||
byte = byte & 0xff
|
||||
### end of pointless code
|
||||
|
||||
print (byte >= 0 && byte < 0x10 ? '0' : '') + byte.to_s(16) + ' ' if DEBUG_OUTPUT
|
||||
|
||||
@binary << byte
|
||||
@eip += 1
|
||||
@ir << byte
|
||||
@ip += 1
|
||||
end
|
||||
|
||||
def emit_addr(addr)
|
||||
@eip += addr.length
|
||||
addr.insert(0, :addr)
|
||||
puts addr.inspect if DEBUG_OUTPUT
|
||||
@binary << addr
|
||||
# addresses are emited as arrays of bytes, prefixed with :var, :const, or :label
|
||||
def emit_addr(type, name)
|
||||
placeholder = [type, name]
|
||||
puts placeholder.inspect if DEBUG_OUTPUT
|
||||
@ir << placeholder
|
||||
|
||||
# all addresses are 32-bits and jumps are all 32-bit relative
|
||||
@ip += 4
|
||||
end
|
||||
|
||||
def emit_future_addr(label)
|
||||
print "<#{label}> " if DEBUG_OUTPUT
|
||||
@binary << label
|
||||
@eip += 4 # all jumps are 32-bit relative for now
|
||||
def emit_var(name_or_proxy)
|
||||
proxy = name_or_proxy.is_a?(VariableProxy) ? name_or_proxy : var(name_or_proxy)
|
||||
emit_addr(:var, proxy)
|
||||
end
|
||||
|
||||
def emit_const(name)
|
||||
proxy = name_or_proxy.is_a?(VariableProxy) ? name_or_proxy : const(name_or_proxy)
|
||||
emit_addr(:const, proxy)
|
||||
end
|
||||
|
||||
def emit_label(name)
|
||||
print "<#{name}> " if DEBUG_OUTPUT
|
||||
emit_addr(:label, name)
|
||||
end
|
||||
|
||||
def emit_dword(num)
|
||||
|
|
@ -190,9 +319,9 @@ module Assembler
|
|||
@symtab.unique_label(suffix)
|
||||
end
|
||||
|
||||
def emit_label(name)
|
||||
puts "\n#{name} (0x#{@eip.to_s(16)}):" if DEBUG_OUTPUT
|
||||
@symtab.deflabel(name, @eip)
|
||||
def deflabel(name)
|
||||
puts "\n#{name} (0x#{@ip.to_s(16)}):" if DEBUG_OUTPUT
|
||||
@symtab.deflabel(name, @ip)
|
||||
end
|
||||
|
||||
def emit_modrm(addr, reg=0)
|
||||
|
|
@ -201,12 +330,14 @@ module Assembler
|
|||
disp8 = nil
|
||||
disp32 = nil
|
||||
sib = nil
|
||||
var = nil # variable proxy
|
||||
|
||||
# effective address
|
||||
if addr.is_a?(Array)
|
||||
eff_addr = addr[1] || addr[0] # works with or without size prefix
|
||||
raise "invalid effective address: #{addr.inspect}" unless eff_addr
|
||||
case eff_addr
|
||||
|
||||
when RegisterProxy
|
||||
|
||||
# Simple register addressing, e.g. [ESI].
|
||||
|
|
@ -266,6 +397,11 @@ module Assembler
|
|||
rm = 5 # 101
|
||||
disp32 = eff_addr
|
||||
|
||||
when VariableProxy
|
||||
mod = 0
|
||||
rm = 5
|
||||
var = eff_addr
|
||||
|
||||
else
|
||||
raise "unsupported effective address: #{addr.inspect}"
|
||||
end
|
||||
|
|
@ -275,14 +411,22 @@ module Assembler
|
|||
mod = 3
|
||||
rm = addr.regnum
|
||||
|
||||
# XXX TODO elsif addr.respond_to?(:name)
|
||||
# (VariableProxy) => [:(var|const), addr.name]
|
||||
#
|
||||
# i.e. a pointer to that var
|
||||
|
||||
else
|
||||
raise "unsupported effective address: #{addr.inspect}"
|
||||
end
|
||||
|
||||
emit_byte((mod << 6) | (reg << 3) | rm)
|
||||
emit_byte(sib) if sib
|
||||
emit_addr([disp8]) if disp8
|
||||
emit_addr(num_to_quad(disp32)) if disp32
|
||||
|
||||
emit_byte(disp8) if disp8
|
||||
|
||||
emit_dword(disp32) if disp32
|
||||
emit_var(var) if var
|
||||
end
|
||||
|
||||
|
||||
|
|
@ -311,12 +455,25 @@ module Assembler
|
|||
op.is_a?(Numeric) && op >= -(2 ** bits / 2) && op <= (2 ** bits - 1)
|
||||
end
|
||||
|
||||
# Return true if op is a valid operand of the specified size.
|
||||
# (:byte, :word, :dword)
|
||||
#
|
||||
# Valid operands are:
|
||||
#
|
||||
# * registers
|
||||
#
|
||||
# * effective addresses (wrapped in an array to look like nasm code)
|
||||
#
|
||||
# XXX This method is pretty ugly.
|
||||
def rm?(op, size=DefaultOperandSize)
|
||||
register?(op, size) || op.is_a?(Array) && (op.size == 1 || op[0] == size)
|
||||
register?(op, size) ||
|
||||
(op.is_a?(Array) &&
|
||||
(op.size == 1 && [Numeric, RegisterProxy, VariableProxy].any?{|c| c == op[0].class}) ||
|
||||
(op.size == 2 && rm?(op[1])))
|
||||
end
|
||||
|
||||
def offset?(addr, size=DefaultOperandSize)
|
||||
addr.is_a?(Array) && addr[0].is_a?(Numeric)
|
||||
addr.is_a?(Array) && (addr[0].is_a?(Numeric) || addr[0].is_a?(VariableProxy))
|
||||
end
|
||||
|
||||
def constant?(op)
|
||||
|
|
@ -382,7 +539,7 @@ module Assembler
|
|||
|
||||
# This is an array of arguments to be passed to emit_modrm, if it is set.
|
||||
modrm = nil
|
||||
|
||||
|
||||
# version 1: mov r32, imm32
|
||||
if register?(dest) && immediate?(src)
|
||||
opcode = 0xb8 + dest.regnum # dest encoded in instruction
|
||||
|
|
@ -434,10 +591,20 @@ module Assembler
|
|||
raise "unsupported MOV instruction, #{dest.inspect}, #{src.inspect}"
|
||||
end
|
||||
|
||||
dword = immediate || offset
|
||||
|
||||
asm do
|
||||
emit_byte(opcode)
|
||||
emit_modrm(*modrm) if modrm
|
||||
emit_dword(immediate || offset) if immediate || offset
|
||||
if dword.is_a?(VariableProxy)
|
||||
if dword.const?
|
||||
emit_const(dword)
|
||||
else
|
||||
emit_var(dword)
|
||||
end
|
||||
elsif dword
|
||||
emit_dword(dword)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
|
@ -446,7 +613,7 @@ module Assembler
|
|||
|
||||
# movzx Gv, ??
|
||||
if register?(dest)
|
||||
|
||||
|
||||
opcode = case
|
||||
when rm?(src, :byte): 0xb6 # movzx Gv, Eb
|
||||
when rm?(src, :word): 0xb7 # movzx Gv, Ew
|
||||
|
|
@ -742,7 +909,7 @@ module Assembler
|
|||
def jmp(label)
|
||||
asm do
|
||||
emit_byte(0xe9)
|
||||
emit_future_addr(label)
|
||||
emit_label(label)
|
||||
end
|
||||
end
|
||||
|
||||
|
|
@ -768,7 +935,7 @@ module Assembler
|
|||
asm do
|
||||
emit_byte(0x0f)
|
||||
emit_byte(opcode)
|
||||
emit_future_addr(label)
|
||||
emit_label(label)
|
||||
end
|
||||
end
|
||||
|
||||
|
|
@ -807,8 +974,8 @@ module Assembler
|
|||
|
||||
# NOTE: LOOP only accepts a 1-byte signed offset. Don't use it.
|
||||
def loop_(label)
|
||||
real_eip = @eip + 2 # loop instruction is 2 bytes
|
||||
delta = @symtab.lookup_label(label) - real_eip
|
||||
real_ip = @ip + 2 # loop instruction is 2 bytes
|
||||
delta = @symtab.lookup_label(label) - real_ip
|
||||
unless SignedByte === delta
|
||||
raise "LOOP can only jump -128 to 127 bytes, #{label} is #{delta} bytes away"
|
||||
end
|
||||
|
|
|
|||
41
asm/varproxy.rb
Normal file
41
asm/varproxy.rb
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
module Assembler
|
||||
|
||||
# Wrap a variable's address so that we can perform arithmetic on it
|
||||
# before resolving it when we know where things will go in memory.
|
||||
# All we do is catch arithmetic ops and then provide a means to
|
||||
# resolve a final addres by replaying them later.
|
||||
#
|
||||
# e.g. [symtab.var('i')] or [symtab.var('i') * 2]
|
||||
class VariableProxy
|
||||
|
||||
attr_reader :name
|
||||
attr_accessor :ops
|
||||
|
||||
def initialize(name, const=false)
|
||||
@name = name
|
||||
@const = const
|
||||
@ops = []
|
||||
end
|
||||
|
||||
%w[+ * / - % & |].each do |op|
|
||||
define_method(op) do |*args|
|
||||
new_proxy = self.class.new(@name, @const)
|
||||
new_proxy.ops << [op, *args]
|
||||
return new_proxy
|
||||
end
|
||||
end
|
||||
|
||||
# XXX should this perhaps use the offset instead?
|
||||
def resolve(base_addr)
|
||||
@ops.inject(base_addr) do |addr, op|
|
||||
addr.send(*op)
|
||||
end
|
||||
end
|
||||
|
||||
def const?
|
||||
@const
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
8
build.rb
8
build.rb
|
|
@ -87,14 +87,14 @@ end
|
|||
|
||||
def build(filename, platform='linux', binformat='elf')
|
||||
objfile = base(filename) + '.o'
|
||||
symtab, objwriter =
|
||||
symtab, objwriter_class =
|
||||
case binformat
|
||||
when 'elf': [Assembler::ELFSymtab.new, Assembler::ELFFile.new]
|
||||
when 'macho': [Assembler::MachOSymtab.new, Assembler::MachOFile.new]
|
||||
when 'elf': [Assembler::ELFSymtab.new, Assembler::ELFFile]
|
||||
when 'macho': [Assembler::MachOSymtab.new, Assembler::MachOFile]
|
||||
else
|
||||
raise "unsupported binary format: #{binformat}"
|
||||
end
|
||||
compile(filename, objfile, Assembler::Binary.new(platform, symtab, objwriter))
|
||||
compile(filename, objfile, Assembler::Binary.new(platform, symtab, objwriter_class))
|
||||
exefile = link(objfile, platform)
|
||||
return exefile
|
||||
end
|
||||
|
|
|
|||
88
compiler.rb
88
compiler.rb
|
|
@ -13,6 +13,7 @@
|
|||
# require 'unroller'
|
||||
|
||||
require 'asm/registers'
|
||||
require 'asm/varproxy'
|
||||
|
||||
class ParseError < StandardError
|
||||
attr_reader :caller, :context
|
||||
|
|
@ -34,22 +35,19 @@ class Compiler
|
|||
attr_reader :asm
|
||||
|
||||
def initialize(input, asm)
|
||||
# XXX for development only!
|
||||
@indent = 0 # for pretty printing
|
||||
|
||||
@look = '' # Next lookahead char.
|
||||
@token = nil # Type of last read token.
|
||||
@value = nil # Value of last read token.
|
||||
@input = input # Stream to read from.
|
||||
|
||||
@asm = asm
|
||||
@asm = asm # assembler
|
||||
|
||||
# seed the lexer
|
||||
get_char
|
||||
end
|
||||
|
||||
def compile
|
||||
block
|
||||
block # parse a block of code
|
||||
expected(:'end of file') unless eof?
|
||||
asm.output
|
||||
end
|
||||
|
|
@ -267,7 +265,7 @@ class Compiler
|
|||
asm.cmp(reg, 0) # if false do nothing
|
||||
asm.jz(end_label)
|
||||
asm.mov(reg, -1) # truthy, make it true
|
||||
asm.emit_label(end_label)
|
||||
asm.deflabel(end_label)
|
||||
end
|
||||
|
||||
def relation
|
||||
|
|
@ -336,11 +334,11 @@ class Compiler
|
|||
asm.not_(EAX) if invert # (or true if inverted)
|
||||
asm.jmp(end_label)
|
||||
|
||||
asm.emit_label(true_label)
|
||||
asm.deflabel(true_label)
|
||||
asm.xor(EAX, EAX) # return true
|
||||
asm.not_(EAX) unless invert # (or false if inverted)
|
||||
|
||||
asm.emit_label(end_label)
|
||||
asm.deflabel(end_label)
|
||||
end
|
||||
|
||||
# a: <on the stack>
|
||||
|
|
@ -387,11 +385,14 @@ class Compiler
|
|||
name = @value
|
||||
match('=')
|
||||
boolean_expression
|
||||
asm.defvar(name) unless asm.var?(name)
|
||||
asm.mov([asm.var(name)], EAX)
|
||||
lval = asm.var!(name)
|
||||
asm.mov([lval], EAX)
|
||||
end
|
||||
|
||||
# Parse a code block.
|
||||
#
|
||||
# TODO replace the case..when with a lookup table
|
||||
# (might be exposed in the language later)
|
||||
def block(label=nil)
|
||||
scan
|
||||
until @value == 'else' || @value == 'end' || eof?
|
||||
|
|
@ -438,13 +439,13 @@ class Compiler
|
|||
skip_any_whitespace
|
||||
end_label = asm.mklabel(:endif) # now we need the 2nd label
|
||||
asm.jmp(end_label)
|
||||
asm.emit_label(else_label)
|
||||
asm.deflabel(else_label)
|
||||
@indent += 1
|
||||
block(label)
|
||||
@indent -= 1
|
||||
end
|
||||
match_word('end')
|
||||
asm.emit_label(end_label)
|
||||
asm.deflabel(end_label)
|
||||
end
|
||||
|
||||
# Used to implement the Two-Label-Loops (while, until, repeat).
|
||||
|
|
@ -455,7 +456,7 @@ class Compiler
|
|||
def simple_loop(name)
|
||||
start_label = asm.mklabel(:"#{name}_loop")
|
||||
end_label = asm.mklabel(:"end_#{name}")
|
||||
asm.emit_label(start_label)
|
||||
asm.deflabel(start_label)
|
||||
|
||||
yield(end_label)
|
||||
|
||||
|
|
@ -464,7 +465,7 @@ class Compiler
|
|||
@indent -= 1
|
||||
match_word('end')
|
||||
asm.jmp(start_label)
|
||||
asm.emit_label(end_label)
|
||||
asm.deflabel(end_label)
|
||||
end
|
||||
|
||||
def condition_loop(name, jump_instruction)
|
||||
|
|
@ -494,13 +495,13 @@ class Compiler
|
|||
# s = s + x
|
||||
# e
|
||||
def for_stmt
|
||||
counter = get_name
|
||||
asm.defvar(counter)
|
||||
name = get_name
|
||||
counter = asm.defvar(name)
|
||||
match('=')
|
||||
boolean_expression # initial value
|
||||
asm.sub(EAX, 1) # pre-decrement because of the
|
||||
# following pre-increment
|
||||
asm.mov([asm.var(counter)], EAX) # stash the counter in memory
|
||||
asm.mov([counter], EAX) # stash the counter in memory
|
||||
match_word('to', :scan => true)
|
||||
boolean_expression # final value
|
||||
skip_any_whitespace
|
||||
|
|
@ -508,9 +509,9 @@ class Compiler
|
|||
final = [ESP]
|
||||
|
||||
simple_loop('for') do |end_label|
|
||||
asm.mov(ECX, [asm.var(counter)]) # get the counter
|
||||
asm.mov(ECX, [counter]) # get the counter
|
||||
asm.add(ECX, 1) # increment
|
||||
asm.mov([asm.var(counter)], ECX) # store the counter
|
||||
asm.mov([counter], ECX) # store the counter
|
||||
asm.cmp(final, ECX) # check if we're done
|
||||
asm.jz(end_label) # if so jump to the end
|
||||
end
|
||||
|
|
@ -529,7 +530,7 @@ class Compiler
|
|||
|
||||
start_label = asm.mklabel(:do)
|
||||
end_label = asm.mklabel(:enddo)
|
||||
asm.emit_label(start_label)
|
||||
asm.deflabel(start_label)
|
||||
|
||||
asm.push(ECX)
|
||||
|
||||
|
|
@ -548,7 +549,7 @@ class Compiler
|
|||
# always clean up the stack after.
|
||||
asm.sub(ESP, 4)
|
||||
|
||||
asm.emit_label(end_label)
|
||||
asm.deflabel(end_label)
|
||||
|
||||
# If there was a break we have to clean up the stack here. If
|
||||
# there was no break we clean up the phony push above.
|
||||
|
|
@ -573,35 +574,42 @@ class Compiler
|
|||
|
||||
# print eax in hex format
|
||||
def print_stmt
|
||||
# variable names
|
||||
d = 'DIGITS'
|
||||
h = 'HEX'
|
||||
# variables
|
||||
d = '__DIGITS'
|
||||
h = '__HEX'
|
||||
|
||||
digits = if asm.var?(d)
|
||||
asm.var(d)
|
||||
else
|
||||
d_var = asm.defvar(d, 4)
|
||||
asm.block do
|
||||
# define a lookup table of digits
|
||||
mov([d_var], 0x33323130)
|
||||
mov([d_var+4], 0x37363534)
|
||||
mov([d_var+8], 0x62613938)
|
||||
mov([d_var+12], 0x66656463)
|
||||
end
|
||||
d_var
|
||||
end
|
||||
|
||||
# 3 dwords == 12 chars
|
||||
hex = asm.var!(h, 3)
|
||||
|
||||
asm.block do
|
||||
# define a lookup table of digits
|
||||
unless var?(d)
|
||||
defvar(d, 4)
|
||||
mov([var(d)], 0x33323130)
|
||||
mov([var(d)+4], 0x37363534)
|
||||
mov([var(d)+8], 0x62613938)
|
||||
mov([var(d)+12], 0x66656463)
|
||||
end
|
||||
# 3 dwords == 12 chars
|
||||
defvar(h, 3) unless var?(h)
|
||||
# TODO check sign and prepend '-' if negative
|
||||
mov([var(h)], 0x7830) # "0x" == [48, 120]
|
||||
mov([var(h)+10], 0xa) # newline + null terminator
|
||||
mov([hex], 0x7830) # "0x" == [48, 120]
|
||||
mov([hex+10], 0xa) # newline + null terminator
|
||||
end
|
||||
boolean_expression
|
||||
asm.block do
|
||||
# convert eax to a hex string
|
||||
lea(ESI, [var(d)])
|
||||
lea(EDI, [var(h)+9])
|
||||
lea(ESI, [digits])
|
||||
lea(EDI, [hex+9])
|
||||
# build the string backwards (right to left), byte by byte
|
||||
mov(ECX, 4)
|
||||
end
|
||||
asm.emit_label(loop_label=asm.mklabel)
|
||||
asm.block do
|
||||
deflabel(loop_label=mklabel)
|
||||
# low nybble of nth byte
|
||||
movzx(EBX, AL)
|
||||
and_(BL, 0x0f) # isolate low nybble
|
||||
|
|
@ -619,7 +627,7 @@ class Compiler
|
|||
loop_(loop_label)
|
||||
# write(int fd, char *s, int n)
|
||||
mov(EAX, 4) # SYS_write
|
||||
lea(ECX, [var(h)]) # ecx = &s
|
||||
lea(ECX, [hex]) # ecx = &s
|
||||
args = [1, # fd = 1 (STDOUT)
|
||||
ECX, # s = &s
|
||||
11] # n = 11 (excluding term, max # of chars to print)
|
||||
|
|
|
|||
Loading…
Reference in a new issue