compiler/asm/binary.rb

320 lines
8.5 KiB
Ruby

# A very basic x86 assembler library for Ruby. Generally the
# instructions implemented are the minimum needed by the compiler this
# is written for. x86 is just too big.
#
# sjs
# may 2009
module Assembler
# Define a method named `emit_byte` and one named `binary_size` and
# include this module. Calling the assembler methods will output
# x86 machine code ... hopefully. So far it's incomplete and
# binaries just segfault.
class Binary
# This structure allows for x86 registers of all sizes. The
# number of the register is the index of the array in which it was
# found.
Registers = [ [:eax, :ax, :al], # 0
[:ecx, :cx, :cl], # 1
[:edx, :dx, :dl], # 2
[:ebx, :bx, :bl], # 3
[:esp, :sp, :ah], # 4
[:ebp, :bp, :ch], # 5
[:esi, :si, :dh], # 6
[:edi, :di, :bh] # 7
]
# Regex to match any x86 register name.
RegisterRegex = '(e?[acdb]x|e?[sb]p|e?[sd]i|[acdb][hl])'
# Match a literal number in binary, octal, decimal, or hex
NumberRegex = '(0[xXbB]?)?[0-9a-fA-F]+'
# Match a variable name.
NameRegex = '[a-zA-Z][a-zA-Z0-9]*'
# 0.size gives the real answer, we only do x86 though
MachineBytes = 4
MachineBits = MachineBytes * 8
MinSigned = -1 * 2**(MachineBits-1)
MaxSigned = 2**(MachineBits-1) - 1
MinUnsigned = 0
MaxUnsigned = 2**MachineBits - 1
SignedRange = MinSigned..MaxSigned
# Count the bytes that were encoded in the given block.
def asm
# stash the current number of bytes written
instruction_offset = bytes_written
yield
# return the number of bytes written
bytes_written - instruction_offset
end
def emit_dword(num)
num_to_quad(num).each {|byte| emit_byte(byte)}
end
# 0-2: r/m
# 3-5: reg/opcode
# 6-7: mod
#
# dest and src are tuples of the form [type, value] where type is
# any of :reg, :rm32, :imm32. Max _one_ :rm32 arg per call.
def emit_modrm(dest, src, override)
if dest[0] == :reg
reg = override[:op] || regnum(dest[1])
# mod == 11 (register content)
if src[0] == :reg
mod = 3
rm = regnum(src[1])
# mod == 00 (pointer)
elsif src[0] == :rm32
mod = 0
parts = decode_addr(src[1])
rm = case parts[0]
# mod == 00 (direct pointer e.g. [eax])
when :reg
regnum(parts[1])
when :sib
sib = parts[1..-1]
4
when :disp
disp = parts[1]
5
end
end
elsif src[0] == :reg
reg = override[:op] || regnum(src[1])
else
raise "unsupported mod r/m byte! dest=#{dest} src=#{src}"
end
emit_byte((mod << 6) & (reg << 3) & rm)
emit_sib(sib) if defined? sib
emit_dword(disp) if defined? disp
end
def emit_sib(sib)
scale, index, base = *sib
if [1,2,4,8].include?(scale)
scale = log2(scale)
else
raise "unsupported SIB scale: #{scale}, should be [1, 2, 4, 8]"
end
emit_byte((scale << 6) & (index << 3) & base)
end
def register?(op)
Registers.each_with_index { |list,i| return i if list.include?(op) }
nil
end
def regnum(op)
num = register?
raise "not a register: #{op.inspect}" unless num
num
end
def immediate?(op)
op.is_a?(Numeric) || (op.is_a?(String) && op.match(/^#{NumberRegex}$/))
end
def rm32?(op)
offset?(op) || op.respond_to?(:match) && op.match(/^
\[
#{RegisterRegex} # base register
(\+#{RegisterRegex} # optional index register
(\*[1248])? # optional scale
)?
\]
$/x)
end
# 6 versions of the mov instruction are supported:
# 1. mov reg32, immediate32 (0xb8+destreg, imm32)
# 2. mov reg32, r/m32 (0x8b, mod r/m, maybe sib)
# 2a. mov eax, memoffset32 (0xa1, disp32)
# 3. mov r/m32, reg32 (0x89, mod r/m, maybe sib)
# 3a. mov memoffset32, eax (0xa3, disp32)
# 4. mov r/m32, immediate32 (0xc7, mod r/m, maybe sib, imm32)
def x86_mov(dest, src)
dest = dest[6..-1] if dest.is_a?(String) && dest[0..5] == 'dword '
src = src[6..-1] if src.is_a?(String) && src[0..5] == 'dword '
asm do
# version 1: mov r32, imm32
if register?(dest) && immediate?(src)
emit_byte(0xb8 + regnum(dest)) # dest encoded in instruction
emit_dword(parse_num(src))
# version 2: mov r32, r/m32
elsif register?(dest) && rm32?(src)
# version 2a: mov eax, moffs32
if dest == :eax && offset?(src)
emit_byte(0xa1)
num = decode_addr(src)[1]
emit_dword(num)
else
emit_byte(0x8b)
emit_modrm([:reg, dest], [:rm32, src])
end
# version 3: mov r/m32, r32
elsif rm32?(dest) && register?(src)
# version 3a: mov moffs32, eax
if offset?(dest) && src == :eax
emit_byte(0xa3)
num = decode_addr(dest)[1]
emit_dword(num)
else
emit_byte(0x89)
emit_modrm([:rm32, dest], [:reg, src])
end
# version 4: mov r/m32, imm32
elsif rm32?(dest) && immediate?(src)
emit_byte(0xc7)
emit_modrm([:rm32, dest], [:imm32, src], :op => 0)
else
puts "rm32?(dest): #{rm32?(dest)}\t\trm32?(src): #{rm32?(src)}"
puts "register?(dest): #{register?(dest)}\t\tregister?(src): #{register?(src)}"
puts "immediate?(dest): #{immediate?(dest)}\t\timmediate?(src): #{immediate?(src)}"
puts "offset?(dest): #{offset?(dest)}\t\toffset?(src): #{offset?(src)}"
#raise "unsupported mov format: mov #{dest}, #{src}"
puts "!!! unsupported mov format: mov #{dest}, #{src}"
end
end # asm do
end
def x86_add(dest, src)
end
def x86_sub(dest, src)
end
def x86_imul(op)
end
def x86_idiv(op)
end
def x86_inc(op)
asm do
if register?(op)
emit_byte(0x40 + regnum(op))
elsif rm32?(op)
emit_byte(0xff)
emit_modrm(...)
else
raise "unsupported op #{op}, wanted r32 or r/m32"
end
end
end
def x86_push(reg)
end
def x86_cmp(a, b)
end
def offset?(addr)
addr.respond_to?(:match) && addr.match(/^\[(#{NameRegex}|#{NumberRegex})\]$/)
end
def decode_addr(addr)
addr = addr[1..-2] # strip brackets
if matches = addr.match(/^#{NameRegex}$/)
unless loc = @vars[matches[0]]
raise "undefined variable #{matches[0]}"
end
[:disp, @bss_offset + loc]
elsif matches = addr.match(/^#{NumberRegex}$/)
[:disp, parse_num(matches[0])]
elsif addr.index('*')
bi, scale = *addr.split('*')
base, index = *bi.split('+')
[:sib, scale.to_i, index.to_sym, base.to_sym]
elsif addr.index('+')
base, index = *addr.split('+')
[:sib, 1, index.to_sym, base.to_sym]
else
[:reg, addr.to_sym]
end
end
# Parse a number from a string. Used by emit_dword.
def parse_num(str)
# If it's not a string it's a number, just return it.
return str unless str.is_a?(String)
str.downcase!
base = 10 # default to base 10
if str[0, 1] == '0'
base = case str[1, 1]
when 'x'
16
when 'b'
str.slice!(2..-1)
2
else
8
end
end
str.to_i(base)
end
# Convert a number to a quad of bytes, discarding excess bits.
# Little endian!
def num_to_quad(num)
[
num & 0xff,
(num >> 8) & 0xff,
(num >> 16) & 0xff,
(num >> 24) & 0xff
]
end
def log2(x, tol=1e-13)
result = 0.0
# Integer part
while x < 1
resultp -= 1
x *= 2
end
while x >= 2
result += 1
x /= 2
end
# Fractional part
fp = 1.0
while fp >= tol
fp /= 2
x *= x
if x >= 2
x /= 2
result += fp
end
end
result
end
end # module Binary
end # module Assembler