mirror of
https://github.com/samsonjs/compiler.git
synced 2026-03-25 08:45:52 +00:00
320 lines
8.5 KiB
Ruby
320 lines
8.5 KiB
Ruby
# A very basic x86 assembler library for Ruby. Generally the
|
|
# instructions implemented are the minimum needed by the compiler this
|
|
# is written for. x86 is just too big.
|
|
#
|
|
# sjs
|
|
# may 2009
|
|
|
|
module Assembler
|
|
|
|
# Define a method named `emit_byte` and one named `binary_size` and
|
|
# include this module. Calling the assembler methods will output
|
|
# x86 machine code ... hopefully. So far it's incomplete and
|
|
# binaries just segfault.
|
|
class Binary
|
|
|
|
# This structure allows for x86 registers of all sizes. The
|
|
# number of the register is the index of the array in which it was
|
|
# found.
|
|
Registers = [ [:eax, :ax, :al], # 0
|
|
[:ecx, :cx, :cl], # 1
|
|
[:edx, :dx, :dl], # 2
|
|
[:ebx, :bx, :bl], # 3
|
|
[:esp, :sp, :ah], # 4
|
|
[:ebp, :bp, :ch], # 5
|
|
[:esi, :si, :dh], # 6
|
|
[:edi, :di, :bh] # 7
|
|
]
|
|
|
|
# Regex to match any x86 register name.
|
|
RegisterRegex = '(e?[acdb]x|e?[sb]p|e?[sd]i|[acdb][hl])'
|
|
|
|
# Match a literal number in binary, octal, decimal, or hex
|
|
NumberRegex = '(0[xXbB]?)?[0-9a-fA-F]+'
|
|
|
|
# Match a variable name.
|
|
NameRegex = '[a-zA-Z][a-zA-Z0-9]*'
|
|
|
|
# 0.size gives the real answer, we only do x86 though
|
|
MachineBytes = 4
|
|
MachineBits = MachineBytes * 8
|
|
MinSigned = -1 * 2**(MachineBits-1)
|
|
MaxSigned = 2**(MachineBits-1) - 1
|
|
MinUnsigned = 0
|
|
MaxUnsigned = 2**MachineBits - 1
|
|
SignedRange = MinSigned..MaxSigned
|
|
|
|
|
|
|
|
# Count the bytes that were encoded in the given block.
|
|
def asm
|
|
# stash the current number of bytes written
|
|
instruction_offset = bytes_written
|
|
|
|
yield
|
|
|
|
# return the number of bytes written
|
|
bytes_written - instruction_offset
|
|
end
|
|
|
|
|
|
def emit_dword(num)
|
|
num_to_quad(num).each {|byte| emit_byte(byte)}
|
|
end
|
|
|
|
# 0-2: r/m
|
|
# 3-5: reg/opcode
|
|
# 6-7: mod
|
|
#
|
|
# dest and src are tuples of the form [type, value] where type is
|
|
# any of :reg, :rm32, :imm32. Max _one_ :rm32 arg per call.
|
|
def emit_modrm(dest, src, override)
|
|
if dest[0] == :reg
|
|
reg = override[:op] || regnum(dest[1])
|
|
|
|
# mod == 11 (register content)
|
|
if src[0] == :reg
|
|
mod = 3
|
|
rm = regnum(src[1])
|
|
|
|
# mod == 00 (pointer)
|
|
elsif src[0] == :rm32
|
|
mod = 0
|
|
parts = decode_addr(src[1])
|
|
rm = case parts[0]
|
|
# mod == 00 (direct pointer e.g. [eax])
|
|
when :reg
|
|
regnum(parts[1])
|
|
when :sib
|
|
sib = parts[1..-1]
|
|
4
|
|
when :disp
|
|
disp = parts[1]
|
|
5
|
|
end
|
|
end
|
|
elsif src[0] == :reg
|
|
reg = override[:op] || regnum(src[1])
|
|
else
|
|
raise "unsupported mod r/m byte! dest=#{dest} src=#{src}"
|
|
end
|
|
emit_byte((mod << 6) & (reg << 3) & rm)
|
|
emit_sib(sib) if defined? sib
|
|
emit_dword(disp) if defined? disp
|
|
end
|
|
|
|
def emit_sib(sib)
|
|
scale, index, base = *sib
|
|
if [1,2,4,8].include?(scale)
|
|
scale = log2(scale)
|
|
else
|
|
raise "unsupported SIB scale: #{scale}, should be [1, 2, 4, 8]"
|
|
end
|
|
emit_byte((scale << 6) & (index << 3) & base)
|
|
end
|
|
|
|
def register?(op)
|
|
Registers.each_with_index { |list,i| return i if list.include?(op) }
|
|
nil
|
|
end
|
|
|
|
def regnum(op)
|
|
num = register?
|
|
raise "not a register: #{op.inspect}" unless num
|
|
num
|
|
end
|
|
|
|
def immediate?(op)
|
|
op.is_a?(Numeric) || (op.is_a?(String) && op.match(/^#{NumberRegex}$/))
|
|
end
|
|
|
|
def rm32?(op)
|
|
offset?(op) || op.respond_to?(:match) && op.match(/^
|
|
\[
|
|
#{RegisterRegex} # base register
|
|
(\+#{RegisterRegex} # optional index register
|
|
(\*[1248])? # optional scale
|
|
)?
|
|
\]
|
|
$/x)
|
|
end
|
|
|
|
# 6 versions of the mov instruction are supported:
|
|
# 1. mov reg32, immediate32 (0xb8+destreg, imm32)
|
|
# 2. mov reg32, r/m32 (0x8b, mod r/m, maybe sib)
|
|
# 2a. mov eax, memoffset32 (0xa1, disp32)
|
|
# 3. mov r/m32, reg32 (0x89, mod r/m, maybe sib)
|
|
# 3a. mov memoffset32, eax (0xa3, disp32)
|
|
# 4. mov r/m32, immediate32 (0xc7, mod r/m, maybe sib, imm32)
|
|
def x86_mov(dest, src)
|
|
dest = dest[6..-1] if dest.is_a?(String) && dest[0..5] == 'dword '
|
|
src = src[6..-1] if src.is_a?(String) && src[0..5] == 'dword '
|
|
|
|
asm do
|
|
|
|
# version 1: mov r32, imm32
|
|
if register?(dest) && immediate?(src)
|
|
emit_byte(0xb8 + regnum(dest)) # dest encoded in instruction
|
|
emit_dword(parse_num(src))
|
|
|
|
# version 2: mov r32, r/m32
|
|
elsif register?(dest) && rm32?(src)
|
|
# version 2a: mov eax, moffs32
|
|
if dest == :eax && offset?(src)
|
|
emit_byte(0xa1)
|
|
num = decode_addr(src)[1]
|
|
emit_dword(num)
|
|
else
|
|
emit_byte(0x8b)
|
|
emit_modrm([:reg, dest], [:rm32, src])
|
|
end
|
|
|
|
# version 3: mov r/m32, r32
|
|
elsif rm32?(dest) && register?(src)
|
|
# version 3a: mov moffs32, eax
|
|
if offset?(dest) && src == :eax
|
|
emit_byte(0xa3)
|
|
num = decode_addr(dest)[1]
|
|
emit_dword(num)
|
|
else
|
|
emit_byte(0x89)
|
|
emit_modrm([:rm32, dest], [:reg, src])
|
|
end
|
|
|
|
# version 4: mov r/m32, imm32
|
|
elsif rm32?(dest) && immediate?(src)
|
|
emit_byte(0xc7)
|
|
emit_modrm([:rm32, dest], [:imm32, src], :op => 0)
|
|
else
|
|
puts "rm32?(dest): #{rm32?(dest)}\t\trm32?(src): #{rm32?(src)}"
|
|
puts "register?(dest): #{register?(dest)}\t\tregister?(src): #{register?(src)}"
|
|
puts "immediate?(dest): #{immediate?(dest)}\t\timmediate?(src): #{immediate?(src)}"
|
|
puts "offset?(dest): #{offset?(dest)}\t\toffset?(src): #{offset?(src)}"
|
|
#raise "unsupported mov format: mov #{dest}, #{src}"
|
|
puts "!!! unsupported mov format: mov #{dest}, #{src}"
|
|
end
|
|
|
|
end # asm do
|
|
|
|
end
|
|
|
|
|
|
def x86_add(dest, src)
|
|
end
|
|
|
|
def x86_sub(dest, src)
|
|
end
|
|
|
|
def x86_imul(op)
|
|
end
|
|
|
|
def x86_idiv(op)
|
|
end
|
|
|
|
def x86_inc(op)
|
|
asm do
|
|
if register?(op)
|
|
emit_byte(0x40 + regnum(op))
|
|
elsif rm32?(op)
|
|
emit_byte(0xff)
|
|
emit_modrm(...)
|
|
else
|
|
raise "unsupported op #{op}, wanted r32 or r/m32"
|
|
end
|
|
end
|
|
end
|
|
|
|
def x86_push(reg)
|
|
end
|
|
|
|
def x86_cmp(a, b)
|
|
end
|
|
|
|
|
|
def offset?(addr)
|
|
addr.respond_to?(:match) && addr.match(/^\[(#{NameRegex}|#{NumberRegex})\]$/)
|
|
end
|
|
|
|
def decode_addr(addr)
|
|
addr = addr[1..-2] # strip brackets
|
|
|
|
if matches = addr.match(/^#{NameRegex}$/)
|
|
unless loc = @vars[matches[0]]
|
|
raise "undefined variable #{matches[0]}"
|
|
end
|
|
[:disp, @bss_offset + loc]
|
|
elsif matches = addr.match(/^#{NumberRegex}$/)
|
|
[:disp, parse_num(matches[0])]
|
|
elsif addr.index('*')
|
|
bi, scale = *addr.split('*')
|
|
base, index = *bi.split('+')
|
|
[:sib, scale.to_i, index.to_sym, base.to_sym]
|
|
elsif addr.index('+')
|
|
base, index = *addr.split('+')
|
|
[:sib, 1, index.to_sym, base.to_sym]
|
|
else
|
|
[:reg, addr.to_sym]
|
|
end
|
|
end
|
|
|
|
# Parse a number from a string. Used by emit_dword.
|
|
def parse_num(str)
|
|
# If it's not a string it's a number, just return it.
|
|
return str unless str.is_a?(String)
|
|
|
|
str.downcase!
|
|
base = 10 # default to base 10
|
|
if str[0, 1] == '0'
|
|
base = case str[1, 1]
|
|
when 'x'
|
|
16
|
|
when 'b'
|
|
str.slice!(2..-1)
|
|
2
|
|
else
|
|
8
|
|
end
|
|
end
|
|
str.to_i(base)
|
|
end
|
|
|
|
# Convert a number to a quad of bytes, discarding excess bits.
|
|
# Little endian!
|
|
def num_to_quad(num)
|
|
[
|
|
num & 0xff,
|
|
(num >> 8) & 0xff,
|
|
(num >> 16) & 0xff,
|
|
(num >> 24) & 0xff
|
|
]
|
|
end
|
|
|
|
def log2(x, tol=1e-13)
|
|
result = 0.0
|
|
|
|
# Integer part
|
|
while x < 1
|
|
resultp -= 1
|
|
x *= 2
|
|
end
|
|
while x >= 2
|
|
result += 1
|
|
x /= 2
|
|
end
|
|
|
|
# Fractional part
|
|
fp = 1.0
|
|
while fp >= tol
|
|
fp /= 2
|
|
x *= x
|
|
if x >= 2
|
|
x /= 2
|
|
result += fp
|
|
end
|
|
end
|
|
result
|
|
end
|
|
|
|
end # module Binary
|
|
|
|
end # module Assembler
|