mirror of
https://github.com/samsonjs/compiler.git
synced 2026-03-25 08:45:52 +00:00
[BUNK] moving to macbook, broken commit.
This commit is contained in:
commit
76d4d2be3a
19 changed files with 1066 additions and 156 deletions
16
.gitignore
vendored
16
.gitignore
vendored
|
|
@ -14,4 +14,18 @@ test/test_repeat
|
|||
test/test_do
|
||||
test/test_for
|
||||
test/test_break
|
||||
test/test_print
|
||||
test/test_print
|
||||
|
||||
*.o
|
||||
test.asm
|
||||
elfwriter
|
||||
lea
|
||||
min
|
||||
min.bin
|
||||
min_elf
|
||||
mov
|
||||
show_elf_magic
|
||||
test.bin
|
||||
test_elf.bin
|
||||
test_elf
|
||||
min2
|
||||
|
|
|
|||
14
Makefile
14
Makefile
|
|
@ -1,5 +1,17 @@
|
|||
test:
|
||||
cd test && make all
|
||||
|
||||
elfwriter: elfwriter.c
|
||||
gcc -o elfwriter elfwriter.c -lelf
|
||||
|
||||
.PHONY: test
|
||||
test_elf: elfwriter build
|
||||
./elfwriter test.bin 4 test_elf.o
|
||||
ld -o test_elf test_elf.o
|
||||
./test_elf
|
||||
|
||||
clean:
|
||||
@rm -f elfwriter
|
||||
@rm -f test_elf.o
|
||||
@rm -f test_elf
|
||||
|
||||
.PHONY: test
|
||||
|
|
|
|||
124
asm.rb
124
asm.rb
|
|
@ -1,124 +0,0 @@
|
|||
# A very basic x86 assembler library for Ruby. Generally the
|
||||
# instructions implemented are the minimum needed by the compiler this
|
||||
# is written for. x86 is just too big.
|
||||
#
|
||||
# sjs
|
||||
# may 2009
|
||||
|
||||
module Assembler
|
||||
|
||||
# Define a method named `emit` and include this module. Calling the
|
||||
# assembler methods will output nasm-friendly x86 asm code, line by
|
||||
# line.
|
||||
module X86
|
||||
|
||||
#####################
|
||||
# assembler methods #
|
||||
#####################
|
||||
|
||||
def x86_mov(dest, src)
|
||||
emit("mov #{dest}, #{src.is_a?(Numeric) ? "0x#{src.to_s(16)}" : src}")
|
||||
end
|
||||
|
||||
def x86_movzx(dest, src)
|
||||
emit("movzx #{dest}, #{src}")
|
||||
end
|
||||
|
||||
def x86_add(dest, src)
|
||||
emit("add #{dest}, #{src}")
|
||||
end
|
||||
|
||||
def x86_sub(dest, src)
|
||||
emit("sub #{dest}, #{src}")
|
||||
end
|
||||
|
||||
def x86_imul(op)
|
||||
emit("imul #{op}")
|
||||
end
|
||||
|
||||
def x86_idiv(op)
|
||||
emit("idiv #{op}")
|
||||
end
|
||||
|
||||
def x86_inc(op)
|
||||
emit("inc #{op}")
|
||||
end
|
||||
|
||||
def x86_dec(op)
|
||||
emit("dec #{op}")
|
||||
end
|
||||
|
||||
def x86_push(reg)
|
||||
emit("push #{reg}")
|
||||
end
|
||||
|
||||
def x86_pop(reg)
|
||||
emit("pop #{reg}")
|
||||
end
|
||||
|
||||
def x86_call(label)
|
||||
emit("call #{label}")
|
||||
end
|
||||
|
||||
def x86_neg(reg)
|
||||
emit("neg #{reg}")
|
||||
end
|
||||
|
||||
def x86_not(rm32)
|
||||
emit("not #{rm32}")
|
||||
end
|
||||
|
||||
def x86_xchg(op1, op2)
|
||||
emit("xchg #{op1}, #{op2}")
|
||||
end
|
||||
|
||||
def x86_and(op1, op2)
|
||||
emit("and #{op1}, #{op2}")
|
||||
end
|
||||
|
||||
def x86_or(op1, op2)
|
||||
emit("or #{op1}, #{op2}")
|
||||
end
|
||||
|
||||
def x86_xor(op1, op2)
|
||||
emit("xor #{op1}, #{op2}")
|
||||
end
|
||||
|
||||
def x86_jz(label)
|
||||
emit("jz #{label}")
|
||||
end
|
||||
|
||||
def x86_jnz(label)
|
||||
emit("jnz #{label}")
|
||||
end
|
||||
|
||||
def x86_jmp(label)
|
||||
emit("jmp #{label}")
|
||||
end
|
||||
|
||||
def x86_jl(label)
|
||||
emit("jl #{label}")
|
||||
end
|
||||
|
||||
def x86_cmp(a, b)
|
||||
emit("cmp #{a}, #{b}")
|
||||
end
|
||||
|
||||
def x86_lea(a, b)
|
||||
emit("lea #{a}, #{b}")
|
||||
end
|
||||
|
||||
def x86_shr(a, b)
|
||||
emit("shr #{a}, #{b}")
|
||||
end
|
||||
|
||||
def x86_loop(label)
|
||||
emit("loop #{label}")
|
||||
end
|
||||
|
||||
def x86_int(num)
|
||||
emit("int 0x#{num.to_s(16)}")
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
9
asm/asm.rb
Normal file
9
asm/asm.rb
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
# Assembler container module. Sub modules are Text and Binary, which
|
||||
# both export the same interface for generating either assembly or
|
||||
# machine code for x86.
|
||||
#
|
||||
# sjs
|
||||
# may 2009
|
||||
|
||||
module Assembler
|
||||
end
|
||||
320
asm/binary.rb
Normal file
320
asm/binary.rb
Normal file
|
|
@ -0,0 +1,320 @@
|
|||
# A very basic x86 assembler library for Ruby. Generally the
|
||||
# instructions implemented are the minimum needed by the compiler this
|
||||
# is written for. x86 is just too big.
|
||||
#
|
||||
# sjs
|
||||
# may 2009
|
||||
|
||||
module Assembler
|
||||
|
||||
# Define a method named `emit_byte` and one named `binary_size` and
|
||||
# include this module. Calling the assembler methods will output
|
||||
# x86 machine code ... hopefully. So far it's incomplete and
|
||||
# binaries just segfault.
|
||||
class Binary
|
||||
|
||||
# This structure allows for x86 registers of all sizes. The
|
||||
# number of the register is the index of the array in which it was
|
||||
# found.
|
||||
Registers = [ [:eax, :ax, :al], # 0
|
||||
[:ecx, :cx, :cl], # 1
|
||||
[:edx, :dx, :dl], # 2
|
||||
[:ebx, :bx, :bl], # 3
|
||||
[:esp, :sp, :ah], # 4
|
||||
[:ebp, :bp, :ch], # 5
|
||||
[:esi, :si, :dh], # 6
|
||||
[:edi, :di, :bh] # 7
|
||||
]
|
||||
|
||||
# Regex to match any x86 register name.
|
||||
RegisterRegex = '(e?[acdb]x|e?[sb]p|e?[sd]i|[acdb][hl])'
|
||||
|
||||
# Match a literal number in binary, octal, decimal, or hex
|
||||
NumberRegex = '(0[xXbB]?)?[0-9a-fA-F]+'
|
||||
|
||||
# Match a variable name.
|
||||
NameRegex = '[a-zA-Z][a-zA-Z0-9]*'
|
||||
|
||||
# 0.size gives the real answer, we only do x86 though
|
||||
MachineBytes = 4
|
||||
MachineBits = MachineBytes * 8
|
||||
MinSigned = -1 * 2**(MachineBits-1)
|
||||
MaxSigned = 2**(MachineBits-1) - 1
|
||||
MinUnsigned = 0
|
||||
MaxUnsigned = 2**MachineBits - 1
|
||||
SignedRange = MinSigned..MaxSigned
|
||||
|
||||
|
||||
|
||||
# Count the bytes that were encoded in the given block.
|
||||
def asm
|
||||
# stash the current number of bytes written
|
||||
instruction_offset = bytes_written
|
||||
|
||||
yield
|
||||
|
||||
# return the number of bytes written
|
||||
bytes_written - instruction_offset
|
||||
end
|
||||
|
||||
|
||||
def emit_dword(num)
|
||||
num_to_quad(num).each {|byte| emit_byte(byte)}
|
||||
end
|
||||
|
||||
# 0-2: r/m
|
||||
# 3-5: reg/opcode
|
||||
# 6-7: mod
|
||||
#
|
||||
# dest and src are tuples of the form [type, value] where type is
|
||||
# any of :reg, :rm32, :imm32. Max _one_ :rm32 arg per call.
|
||||
def emit_modrm(dest, src, override)
|
||||
if dest[0] == :reg
|
||||
reg = override[:op] || regnum(dest[1])
|
||||
|
||||
# mod == 11 (register content)
|
||||
if src[0] == :reg
|
||||
mod = 3
|
||||
rm = regnum(src[1])
|
||||
|
||||
# mod == 00 (pointer)
|
||||
elsif src[0] == :rm32
|
||||
mod = 0
|
||||
parts = decode_addr(src[1])
|
||||
rm = case parts[0]
|
||||
# mod == 00 (direct pointer e.g. [eax])
|
||||
when :reg
|
||||
regnum(parts[1])
|
||||
when :sib
|
||||
sib = parts[1..-1]
|
||||
4
|
||||
when :disp
|
||||
disp = parts[1]
|
||||
5
|
||||
end
|
||||
end
|
||||
elsif src[0] == :reg
|
||||
reg = override[:op] || regnum(src[1])
|
||||
else
|
||||
raise "unsupported mod r/m byte! dest=#{dest} src=#{src}"
|
||||
end
|
||||
emit_byte((mod << 6) & (reg << 3) & rm)
|
||||
emit_sib(sib) if defined? sib
|
||||
emit_dword(disp) if defined? disp
|
||||
end
|
||||
|
||||
def emit_sib(sib)
|
||||
scale, index, base = *sib
|
||||
if [1,2,4,8].include?(scale)
|
||||
scale = log2(scale)
|
||||
else
|
||||
raise "unsupported SIB scale: #{scale}, should be [1, 2, 4, 8]"
|
||||
end
|
||||
emit_byte((scale << 6) & (index << 3) & base)
|
||||
end
|
||||
|
||||
def register?(op)
|
||||
Registers.each_with_index { |list,i| return i if list.include?(op) }
|
||||
nil
|
||||
end
|
||||
|
||||
def regnum(op)
|
||||
num = register?
|
||||
raise "not a register: #{op.inspect}" unless num
|
||||
num
|
||||
end
|
||||
|
||||
def immediate?(op)
|
||||
op.is_a?(Numeric) || (op.is_a?(String) && op.match(/^#{NumberRegex}$/))
|
||||
end
|
||||
|
||||
def rm32?(op)
|
||||
offset?(op) || op.respond_to?(:match) && op.match(/^
|
||||
\[
|
||||
#{RegisterRegex} # base register
|
||||
(\+#{RegisterRegex} # optional index register
|
||||
(\*[1248])? # optional scale
|
||||
)?
|
||||
\]
|
||||
$/x)
|
||||
end
|
||||
|
||||
# 6 versions of the mov instruction are supported:
|
||||
# 1. mov reg32, immediate32 (0xb8+destreg, imm32)
|
||||
# 2. mov reg32, r/m32 (0x8b, mod r/m, maybe sib)
|
||||
# 2a. mov eax, memoffset32 (0xa1, disp32)
|
||||
# 3. mov r/m32, reg32 (0x89, mod r/m, maybe sib)
|
||||
# 3a. mov memoffset32, eax (0xa3, disp32)
|
||||
# 4. mov r/m32, immediate32 (0xc7, mod r/m, maybe sib, imm32)
|
||||
def x86_mov(dest, src)
|
||||
dest = dest[6..-1] if dest.is_a?(String) && dest[0..5] == 'dword '
|
||||
src = src[6..-1] if src.is_a?(String) && src[0..5] == 'dword '
|
||||
|
||||
asm do
|
||||
|
||||
# version 1: mov r32, imm32
|
||||
if register?(dest) && immediate?(src)
|
||||
emit_byte(0xb8 + regnum(dest)) # dest encoded in instruction
|
||||
emit_dword(parse_num(src))
|
||||
|
||||
# version 2: mov r32, r/m32
|
||||
elsif register?(dest) && rm32?(src)
|
||||
# version 2a: mov eax, moffs32
|
||||
if dest == :eax && offset?(src)
|
||||
emit_byte(0xa1)
|
||||
num = decode_addr(src)[1]
|
||||
emit_dword(num)
|
||||
else
|
||||
emit_byte(0x8b)
|
||||
emit_modrm([:reg, dest], [:rm32, src])
|
||||
end
|
||||
|
||||
# version 3: mov r/m32, r32
|
||||
elsif rm32?(dest) && register?(src)
|
||||
# version 3a: mov moffs32, eax
|
||||
if offset?(dest) && src == :eax
|
||||
emit_byte(0xa3)
|
||||
num = decode_addr(dest)[1]
|
||||
emit_dword(num)
|
||||
else
|
||||
emit_byte(0x89)
|
||||
emit_modrm([:rm32, dest], [:reg, src])
|
||||
end
|
||||
|
||||
# version 4: mov r/m32, imm32
|
||||
elsif rm32?(dest) && immediate?(src)
|
||||
emit_byte(0xc7)
|
||||
emit_modrm([:rm32, dest], [:imm32, src], :op => 0)
|
||||
else
|
||||
puts "rm32?(dest): #{rm32?(dest)}\t\trm32?(src): #{rm32?(src)}"
|
||||
puts "register?(dest): #{register?(dest)}\t\tregister?(src): #{register?(src)}"
|
||||
puts "immediate?(dest): #{immediate?(dest)}\t\timmediate?(src): #{immediate?(src)}"
|
||||
puts "offset?(dest): #{offset?(dest)}\t\toffset?(src): #{offset?(src)}"
|
||||
#raise "unsupported mov format: mov #{dest}, #{src}"
|
||||
puts "!!! unsupported mov format: mov #{dest}, #{src}"
|
||||
end
|
||||
|
||||
end # asm do
|
||||
|
||||
end
|
||||
|
||||
|
||||
def x86_add(dest, src)
|
||||
end
|
||||
|
||||
def x86_sub(dest, src)
|
||||
end
|
||||
|
||||
def x86_imul(op)
|
||||
end
|
||||
|
||||
def x86_idiv(op)
|
||||
end
|
||||
|
||||
def x86_inc(op)
|
||||
asm do
|
||||
if register?(op)
|
||||
emit_byte(0x40 + regnum(op))
|
||||
elsif rm32?(op)
|
||||
emit_byte(0xff)
|
||||
emit_modrm(...)
|
||||
else
|
||||
raise "unsupported op #{op}, wanted r32 or r/m32"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def x86_push(reg)
|
||||
end
|
||||
|
||||
def x86_cmp(a, b)
|
||||
end
|
||||
|
||||
|
||||
def offset?(addr)
|
||||
addr.respond_to?(:match) && addr.match(/^\[(#{NameRegex}|#{NumberRegex})\]$/)
|
||||
end
|
||||
|
||||
def decode_addr(addr)
|
||||
addr = addr[1..-2] # strip brackets
|
||||
|
||||
if matches = addr.match(/^#{NameRegex}$/)
|
||||
unless loc = @vars[matches[0]]
|
||||
raise "undefined variable #{matches[0]}"
|
||||
end
|
||||
[:disp, @bss_offset + loc]
|
||||
elsif matches = addr.match(/^#{NumberRegex}$/)
|
||||
[:disp, parse_num(matches[0])]
|
||||
elsif addr.index('*')
|
||||
bi, scale = *addr.split('*')
|
||||
base, index = *bi.split('+')
|
||||
[:sib, scale.to_i, index.to_sym, base.to_sym]
|
||||
elsif addr.index('+')
|
||||
base, index = *addr.split('+')
|
||||
[:sib, 1, index.to_sym, base.to_sym]
|
||||
else
|
||||
[:reg, addr.to_sym]
|
||||
end
|
||||
end
|
||||
|
||||
# Parse a number from a string. Used by emit_dword.
|
||||
def parse_num(str)
|
||||
# If it's not a string it's a number, just return it.
|
||||
return str unless str.is_a?(String)
|
||||
|
||||
str.downcase!
|
||||
base = 10 # default to base 10
|
||||
if str[0, 1] == '0'
|
||||
base = case str[1, 1]
|
||||
when 'x'
|
||||
16
|
||||
when 'b'
|
||||
str.slice!(2..-1)
|
||||
2
|
||||
else
|
||||
8
|
||||
end
|
||||
end
|
||||
str.to_i(base)
|
||||
end
|
||||
|
||||
# Convert a number to a quad of bytes, discarding excess bits.
|
||||
# Little endian!
|
||||
def num_to_quad(num)
|
||||
[
|
||||
num & 0xff,
|
||||
(num >> 8) & 0xff,
|
||||
(num >> 16) & 0xff,
|
||||
(num >> 24) & 0xff
|
||||
]
|
||||
end
|
||||
|
||||
def log2(x, tol=1e-13)
|
||||
result = 0.0
|
||||
|
||||
# Integer part
|
||||
while x < 1
|
||||
resultp -= 1
|
||||
x *= 2
|
||||
end
|
||||
while x >= 2
|
||||
result += 1
|
||||
x /= 2
|
||||
end
|
||||
|
||||
# Fractional part
|
||||
fp = 1.0
|
||||
while fp >= tol
|
||||
fp /= 2
|
||||
x *= x
|
||||
if x >= 2
|
||||
x /= 2
|
||||
result += fp
|
||||
end
|
||||
end
|
||||
result
|
||||
end
|
||||
|
||||
end # module Binary
|
||||
|
||||
end # module Assembler
|
||||
127
asm/text.rb
Normal file
127
asm/text.rb
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
# A subset of x86 assembly.
|
||||
#
|
||||
# sjs
|
||||
# may 2009
|
||||
|
||||
module Assembler
|
||||
|
||||
# Define a method named `emit` and include this module. Calling
|
||||
# the assembler methods will output nasm-friendly x86 asm code,
|
||||
# line by line. This is dead easy and we can trust nasm to
|
||||
# compile correct machine code, which is tricky.
|
||||
module Text
|
||||
|
||||
def self.included(other)
|
||||
im = other.instance_methods
|
||||
unless im.include?(:emit)
|
||||
raise "#{self.name} requires the including class define the emit method"
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def x86_mov(dest, src)
|
||||
emit("mov #{dest}, #{src.is_a?(Numeric) ? "0x#{src.to_s(16)}" : src}")
|
||||
end
|
||||
|
||||
def x86_movzx(dest, src)
|
||||
emit("movzx #{dest}, #{src}")
|
||||
end
|
||||
|
||||
def x86_add(dest, src)
|
||||
emit("add #{dest}, #{src}")
|
||||
end
|
||||
|
||||
def x86_sub(dest, src)
|
||||
emit("sub #{dest}, #{src}")
|
||||
end
|
||||
|
||||
def x86_imul(op)
|
||||
emit("imul #{op}")
|
||||
end
|
||||
|
||||
def x86_idiv(op)
|
||||
emit("idiv #{op}")
|
||||
end
|
||||
|
||||
def x86_inc(op)
|
||||
emit("inc #{op}")
|
||||
end
|
||||
|
||||
def x86_dec(op)
|
||||
emit("dec #{op}")
|
||||
end
|
||||
|
||||
def x86_push(reg)
|
||||
emit("push #{reg}")
|
||||
end
|
||||
|
||||
def x86_pop(reg)
|
||||
emit("pop #{reg}")
|
||||
end
|
||||
|
||||
def x86_call(label)
|
||||
emit("call #{label}")
|
||||
end
|
||||
|
||||
def x86_neg(reg)
|
||||
emit("neg #{reg}")
|
||||
end
|
||||
|
||||
def x86_not(rm32)
|
||||
emit("not #{rm32}")
|
||||
end
|
||||
|
||||
def x86_xchg(op1, op2)
|
||||
emit("xchg #{op1}, #{op2}")
|
||||
end
|
||||
|
||||
def x86_and(op1, op2)
|
||||
emit("and #{op1}, #{op2}")
|
||||
end
|
||||
|
||||
def x86_or(op1, op2)
|
||||
emit("or #{op1}, #{op2}")
|
||||
end
|
||||
|
||||
def x86_xor(op1, op2)
|
||||
emit("xor #{op1}, #{op2}")
|
||||
end
|
||||
|
||||
def x86_jz(label)
|
||||
emit("jz #{label}")
|
||||
end
|
||||
|
||||
def x86_jnz(label)
|
||||
emit("jnz #{label}")
|
||||
end
|
||||
|
||||
def x86_jmp(label)
|
||||
emit("jmp #{label}")
|
||||
end
|
||||
|
||||
def x86_jl(label)
|
||||
emit("jl #{label}")
|
||||
end
|
||||
|
||||
def x86_cmp(a, b)
|
||||
emit("cmp #{a}, #{b}")
|
||||
end
|
||||
|
||||
def x86_lea(a, b)
|
||||
emit("lea #{a}, #{b}")
|
||||
end
|
||||
|
||||
def x86_shr(a, b)
|
||||
emit("shr #{a}, #{b}")
|
||||
end
|
||||
|
||||
def x86_loop(label)
|
||||
emit("loop #{label}")
|
||||
end
|
||||
|
||||
def x86_int(num)
|
||||
emit("int 0x#{num.to_s(16)}")
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
48
build.rb
48
build.rb
|
|
@ -4,6 +4,12 @@ ROOT = __FILE__.sub(/\/build\.rb$/, '') unless defined? ROOT
|
|||
|
||||
require 'compiler'
|
||||
|
||||
|
||||
X86_exit = [0x89, 0xc3, # mov ebx, eax (exit code)
|
||||
0xb8, 1, 0, 0, 0, # mov eax, 1
|
||||
0xcd, 0x80 # int 0x80
|
||||
].pack('c*')
|
||||
|
||||
def main
|
||||
filename = ARGV[0].to_s
|
||||
raise "can't read #{filename}" unless File.readable?(filename)
|
||||
|
|
@ -25,18 +31,32 @@ def interpolate(templatefile, data)
|
|||
end
|
||||
end
|
||||
|
||||
# input: filename
|
||||
# output: filename
|
||||
def compile(filename)
|
||||
data, bss, code = nil
|
||||
# filename: input filename
|
||||
# format: output format, nasm or binary
|
||||
# returns: output filename
|
||||
def compile(filename, format='asm')
|
||||
|
||||
# compile to asm or binary
|
||||
output = nil
|
||||
File.open(filename, 'r') do |input|
|
||||
compiler = Compiler.new(input)
|
||||
data, bss, code = compiler.compile
|
||||
compiler = Compiler.new(input, format)
|
||||
output = compiler.compile
|
||||
end
|
||||
if format == 'asm'
|
||||
mode = 'w'
|
||||
data, bss, code = *output
|
||||
output = interpolate("#{ROOT}/template.asm",
|
||||
:data => data, :bss => bss, :code => code)
|
||||
else
|
||||
mode = 'wb'
|
||||
output += X86_exit
|
||||
end
|
||||
outfile = "#{base(filename)}.#{format}"
|
||||
File.open(outfile, mode) do |out|
|
||||
if format == 'asm'
|
||||
out.puts(output)
|
||||
end
|
||||
end
|
||||
asm = interpolate("#{ROOT}/template.asm",
|
||||
:data => data, :bss => bss, :code => code)
|
||||
outfile = "#{base(filename)}.asm"
|
||||
File.open(outfile, 'w') { |out| out.puts(asm) }
|
||||
return outfile
|
||||
|
||||
rescue ParseError => e
|
||||
|
|
@ -69,8 +89,12 @@ def link(filename)
|
|||
return f
|
||||
end
|
||||
|
||||
def build(filename)
|
||||
link( asm( compile(filename) ) )
|
||||
def build(filename, format='asm')
|
||||
if format == 'asm'
|
||||
link( asm( compile(filename) ) )
|
||||
else # binary
|
||||
link( compile(filename, format='bin') )
|
||||
end
|
||||
end
|
||||
|
||||
def run(filename)
|
||||
|
|
|
|||
81
compiler.rb
81
compiler.rb
|
|
@ -13,6 +13,7 @@
|
|||
# require 'unroller'
|
||||
|
||||
require 'asm'
|
||||
require 'opcode'
|
||||
|
||||
class ParseError < StandardError
|
||||
attr_reader :caller, :context
|
||||
|
|
@ -23,37 +24,58 @@ class ParseError < StandardError
|
|||
end
|
||||
|
||||
class Compiler
|
||||
# This module uses our `emit` method to output x86 code for nasm.
|
||||
include Assembler::X86
|
||||
|
||||
attr_reader :data, :bss, :code
|
||||
# This module uses our `emit_byte` method to output x86 machine code
|
||||
# directly using the assembler library.
|
||||
# include Assembler::Binary
|
||||
|
||||
Keywords = %w[
|
||||
if else end while until repeat for to do break
|
||||
print
|
||||
]
|
||||
|
||||
def initialize(input=STDIN)
|
||||
@look = '' # lookahead char
|
||||
@token = nil # type of last read token
|
||||
@value = nil # value of last read token
|
||||
@input = input # stream to read from
|
||||
@data = '' # data section
|
||||
@bss = '' # bss section
|
||||
@code = '' # code section
|
||||
@vars = {} # defined variables
|
||||
@num_labels = 0 # used to generate unique labels
|
||||
@num_labels_with_suffix = Hash.new(0)
|
||||
attr_reader :data, :bss, :code
|
||||
|
||||
def initialize(input, asm=Assembler::Text.new)
|
||||
# XXX for development only!
|
||||
@indent = 0 # for pretty printing
|
||||
|
||||
@look = '' # Next lookahead char.
|
||||
@token = nil # Type of last read token.
|
||||
@value = nil # Value of last read token.
|
||||
@input = input # Stream to read from.
|
||||
@data = '' # Data section.
|
||||
@bss = '' # BSS section.
|
||||
@code = '' # Code section.
|
||||
@binary = [] # Byte array of machine code.
|
||||
@vars = {} # Symbol table, maps names to locations in BSS.
|
||||
@num_labels = 0 # Used to generate unique labels.
|
||||
@num_labels_with_suffix = Hash.new(0)
|
||||
|
||||
@header_size = 0x100 # ELF, Linux, x86
|
||||
@text_offset = 0x08048000 + @header_size # Offset of text section in memory (Linux, x86).
|
||||
@text_size = 0x02be00 # Size of text section.
|
||||
@data_offset = @text_offset + @text_size # Offset of data section.
|
||||
@data_size = 0x4e00 # Size of data section.
|
||||
@bss_offset = @data_offset + @data_size # Offset of bss section.
|
||||
@bss_size = 0 # Size of bss section.
|
||||
|
||||
# Labels for the assembler. Maps names to locations.
|
||||
@labels = Hash.new {|h, key| raise "undefined label: #{key}"}
|
||||
|
||||
@asm = asm
|
||||
|
||||
# seed the lexer
|
||||
get_char
|
||||
end
|
||||
|
||||
def asm
|
||||
@asm
|
||||
end
|
||||
|
||||
def compile
|
||||
block
|
||||
expected(:'end of file') unless eof?
|
||||
[@data, @bss, @code]
|
||||
asm.output
|
||||
end
|
||||
|
||||
# Scan the input stream for the next token.
|
||||
|
|
@ -790,7 +812,8 @@ class Compiler
|
|||
def defvar(name, dwords=1)
|
||||
unless var?(name)
|
||||
@bss << "#{name}: resd #{dwords}\n"
|
||||
@vars[name] = name
|
||||
@vars[name] = @bss_size
|
||||
@bss_size += dwords
|
||||
else
|
||||
STDERR.puts "[warning] attempted to redefine #{name}"
|
||||
end
|
||||
|
|
@ -804,14 +827,30 @@ class Compiler
|
|||
@vars[name]
|
||||
end
|
||||
|
||||
# Emit a line of code wrapped between a tab and a newline.
|
||||
# Emit a line of code wrapped between a tab and a newline. Required
|
||||
# by Assembler::Text.
|
||||
def emit(code, options={})
|
||||
tab = options.has_key?(:tab) ? options[:tab] : "\t"
|
||||
@code << "#{tab}#{code}\n"
|
||||
end
|
||||
|
||||
# emit_byte and bytes_written are required by Assembler::Binary.
|
||||
def emit_byte(byte)
|
||||
@binary << byte
|
||||
end
|
||||
def bytes_written
|
||||
@binary.size
|
||||
end
|
||||
|
||||
|
||||
def emit_label(name=unique_label)
|
||||
emit("#{name}:", :tab => nil)
|
||||
|
||||
@labels[name] = @binary.length
|
||||
end
|
||||
|
||||
def resolve_label(label)
|
||||
@labels[label]
|
||||
end
|
||||
|
||||
# Generate a unique label.
|
||||
|
|
@ -833,6 +872,12 @@ class Compiler
|
|||
' ' * (real_indent * 4)
|
||||
end
|
||||
|
||||
# Pack the array into a byte string.
|
||||
def binary
|
||||
@binary.pack('c*')
|
||||
end
|
||||
|
||||
|
||||
def pushing(reg)
|
||||
x86_push(reg)
|
||||
yield
|
||||
|
|
|
|||
288
elfwriter.c
Normal file
288
elfwriter.c
Normal file
|
|
@ -0,0 +1,288 @@
|
|||
#include <libelf.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
/* _exit(0) */
|
||||
/* uint8_t shell_code[] = { */
|
||||
/* 0xbb, 0, 0, 0, 0, /\* mov ebx, 0 *\/ */
|
||||
/* 0xb8, 1, 0, 0, 0, /\* mov eax, 1 *\/ */
|
||||
/* 0xcd, 0x80 /\* int 0x80 *\/ */
|
||||
/* }; */
|
||||
|
||||
/* uint32_t hash_words[] = { */
|
||||
/* 0x12345678, */
|
||||
/* 0xdeadc0de, */
|
||||
/* 0x1234abcd */
|
||||
/* }; */
|
||||
|
||||
#define header_size 0x100
|
||||
#define text_addr 0x8048000 + header_size
|
||||
#define text_size 0x02be00
|
||||
#define data_addr text_addr + text_size
|
||||
#define data_size 0x4e00
|
||||
#define bss_addr data_addr + data_size
|
||||
size_t bss_size = 0;
|
||||
|
||||
char string_table[] = {
|
||||
/* Offset 0 */ '\0',
|
||||
/* Offset 1 */ '.', 't', 'e', 'x', 't', '\0' ,
|
||||
/* Offset 7 */ '.', 'b', 's', 's', '\0',
|
||||
/* Offset 12 */ '.', 's', 'h', 's', 't', 'r', 't', 'a', 'b', '\0'
|
||||
};
|
||||
|
||||
|
||||
/* Write a static 32-bit x86 ELF binary to filename. The file is
|
||||
* clobbered without confirmation!
|
||||
*/
|
||||
int
|
||||
elf_write(const char *filename, uint8_t *code, size_t code_size)
|
||||
{
|
||||
int fd;
|
||||
size_t shstrndx;
|
||||
Elf *elf;
|
||||
Elf_Scn *scn;
|
||||
Elf_Data *data;
|
||||
Elf32_Ehdr *ehdr;
|
||||
Elf32_Phdr *phdr;
|
||||
Elf32_Shdr *shdr;
|
||||
|
||||
if (elf_version(EV_CURRENT) == EV_NONE) {
|
||||
printf("Failed to initialize ELF library!\n");
|
||||
return -1;
|
||||
}
|
||||
if ((fd = open(filename, O_RDWR|O_TRUNC|O_CREAT, 0666)) < 0) {
|
||||
printf("Can't open %s for writing.\n", filename);
|
||||
perror("[elf_write]");
|
||||
return -2;
|
||||
}
|
||||
if ((elf = elf_begin(fd, ELF_C_WRITE, (Elf *)0)) == 0) {
|
||||
printf("elf_begin failed!\n");
|
||||
return -3;
|
||||
}
|
||||
|
||||
|
||||
/**************
|
||||
* ELF Header *
|
||||
**************/
|
||||
|
||||
if ((ehdr = elf32_newehdr(elf)) == NULL) {
|
||||
printf("elf32_newehdr failed!\n");
|
||||
return -4;
|
||||
}
|
||||
ehdr->e_ident[EI_DATA] = ELFDATA2LSB; /* 2's complement, little endian */
|
||||
ehdr->e_type = ET_EXEC;
|
||||
ehdr->e_machine = EM_386; /* x86 */
|
||||
|
||||
/* Image starts at 0x8048000, x86 32-bit abi. We need a bit
|
||||
* of room for headers and such. TODO figure out how much
|
||||
* room is needed!
|
||||
*
|
||||
* Current entry point is .text section.
|
||||
*/
|
||||
ehdr->e_entry = text_addr;
|
||||
|
||||
|
||||
/*******************
|
||||
* Program Headers *
|
||||
*******************/
|
||||
|
||||
/* 3 segments => 3 program headers (text, data, bss) */
|
||||
if ((phdr = elf32_newphdr(elf, 3)) == NULL) {
|
||||
printf("elf32_newphdr failed!\n");
|
||||
return -5;
|
||||
}
|
||||
|
||||
|
||||
/*****************
|
||||
* .text section *
|
||||
*****************/
|
||||
|
||||
if ((scn = elf_newscn(elf)) == NULL) {
|
||||
printf("elf_newscn failed!\n");
|
||||
return -6;
|
||||
}
|
||||
if ((data = elf_newdata(scn)) == NULL) {
|
||||
printf("elf_newdata failed!\n");
|
||||
return -7;
|
||||
}
|
||||
data->d_align = 16;
|
||||
data->d_buf = code;
|
||||
data->d_off = 0LL;
|
||||
data->d_type = ELF_T_BYTE;
|
||||
data->d_size = code_size;
|
||||
data->d_version = EV_CURRENT;
|
||||
|
||||
if ((shdr = elf32_getshdr(scn)) == NULL) {
|
||||
printf("elf32_getshdr failed!\n");
|
||||
return -8;
|
||||
}
|
||||
shdr->sh_name = 1;
|
||||
shdr->sh_type = SHT_PROGBITS;
|
||||
shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
|
||||
shdr->sh_addr = text_addr;
|
||||
|
||||
|
||||
/****************
|
||||
* .bss section *
|
||||
****************/
|
||||
|
||||
if ((scn = elf_newscn(elf)) == NULL) {
|
||||
printf("elf_newscn failed!\n");
|
||||
return -6;
|
||||
}
|
||||
if ((data = elf_newdata(scn)) == NULL) {
|
||||
printf("elf_newdata failed!\n");
|
||||
return -7;
|
||||
}
|
||||
data->d_align = 4;
|
||||
data->d_off = 0LL;
|
||||
data->d_type = ELF_T_BYTE;
|
||||
data->d_size = bss_size;
|
||||
data->d_version = EV_CURRENT;
|
||||
|
||||
if ((shdr = elf32_getshdr(scn)) == NULL) {
|
||||
printf("elf32_getshdr failed!\n");
|
||||
return -8;
|
||||
}
|
||||
shdr->sh_name = 7;
|
||||
shdr->sh_type = SHT_NOBITS;
|
||||
shdr->sh_flags = SHF_WRITE | SHF_ALLOC;
|
||||
shdr->sh_addr = bss_addr;
|
||||
|
||||
|
||||
/*******************************
|
||||
* section header string table *
|
||||
*******************************/
|
||||
|
||||
if ((scn = elf_newscn(elf)) == NULL) {
|
||||
printf("elf_newscn failed!\n");
|
||||
return -9;
|
||||
}
|
||||
if ((data = elf_newdata(scn)) == NULL) {
|
||||
printf("elf_newdata failed!\n");
|
||||
return -10;
|
||||
}
|
||||
data->d_align = 1;
|
||||
data->d_buf = string_table;
|
||||
data->d_off = 0LL;
|
||||
data->d_type = ELF_T_BYTE;
|
||||
data->d_size = sizeof(string_table);
|
||||
data->d_version = EV_CURRENT;
|
||||
|
||||
if ((shdr = elf32_getshdr(scn)) == NULL) {
|
||||
printf("elf32_getshdr failed!\n");
|
||||
return -11;
|
||||
}
|
||||
shdr->sh_name = 12;
|
||||
shdr->sh_type = SHT_STRTAB;
|
||||
shdr->sh_flags = SHF_STRINGS | SHF_ALLOC;
|
||||
shdr->sh_entsize = 0;
|
||||
|
||||
|
||||
/* int elf_setshstrndx(Elf *e, Elf32_Ehdr *eh, size_t shstrndx) */
|
||||
shstrndx = elf_ndxscn(scn);
|
||||
if (shstrndx >= SHN_LORESERVE) {
|
||||
if ((scn = elf_getscn(elf, 0)) == NULL) {
|
||||
printf("elf_getscn failed!\n");
|
||||
return -12;
|
||||
}
|
||||
/* assert(scn->s_ndx == SHN_UNDEF); */
|
||||
/* scn->s_shdr.s_shdr32.sh_link = shstrndx; */
|
||||
elf_flagshdr(scn, ELF_C_SET, ELF_F_DIRTY);
|
||||
shstrndx = SHN_XINDEX;
|
||||
}
|
||||
ehdr->e_shstrndx = shstrndx;
|
||||
|
||||
if (elf_update(elf, ELF_C_NULL) < 0) {
|
||||
printf("elf_update failed!\n");
|
||||
return -12;
|
||||
}
|
||||
|
||||
/* phdr->p_vaddr = phdr->p_paddr = 0x8048000 + ehdr->e_phoff; */
|
||||
/* phdr->p_type = PT_PHDR; */
|
||||
/* phdr->p_offset = ehdr->e_phoff; */
|
||||
/* phdr->p_filesz = elf32_fsize(ELF_T_PHDR, 1, EV_CURRENT); */
|
||||
|
||||
/* text segment */
|
||||
phdr->p_vaddr = text_addr;
|
||||
phdr->p_type = PT_LOAD;
|
||||
phdr->p_offset = header_size;
|
||||
phdr->p_filesz = text_size;
|
||||
phdr->p_memsz = text_size;
|
||||
phdr->p_flags = PF_R | PF_X;
|
||||
phdr->p_align = 0x1000;
|
||||
|
||||
/* data segment */
|
||||
phdr++;
|
||||
phdr->p_vaddr = data_addr;
|
||||
phdr->p_type = PT_LOAD;
|
||||
phdr->p_offset = header_size + text_size;
|
||||
phdr->p_filesz = data_size;
|
||||
phdr->p_memsz = data_size + 0x1024; /* XXX unsure why the abi specifies + 0x1024 */
|
||||
phdr->p_flags = PF_R | PF_W | PF_X;
|
||||
phdr->p_align = 0x1000;
|
||||
|
||||
/* bss segment */
|
||||
phdr++;
|
||||
phdr->p_vaddr = bss_addr;
|
||||
phdr->p_type = PT_LOAD;
|
||||
phdr->p_offset = header_size + text_size + data_size;
|
||||
phdr->p_filesz = bss_size;
|
||||
phdr->p_memsz = bss_size;
|
||||
phdr->p_flags = PF_R | PF_W;
|
||||
phdr->p_align = 0x1000;
|
||||
|
||||
elf_flagphdr(elf, ELF_C_SET, ELF_F_DIRTY);
|
||||
|
||||
if (elf_update(elf, ELF_C_WRITE) < 0) {
|
||||
printf("elf_update failed!\n");
|
||||
return -13;
|
||||
}
|
||||
|
||||
elf_end(elf);
|
||||
close(fd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, const char *argv[])
|
||||
{
|
||||
int result;
|
||||
pid_t pid;
|
||||
FILE *fd;
|
||||
uint8_t *code = NULL;
|
||||
size_t code_size = 0, chunk_size = 1024, bytes_read;
|
||||
|
||||
if (argc < 4) {
|
||||
printf("usage: %s <input> <bss_size> <output>\n", argv[0]);
|
||||
printf(" Wraps the input file in an ELF binary.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
bss_size = strtoul(argv[2], 0, 10);
|
||||
|
||||
if ((fd = fopen(argv[1], "r")) < 0) {
|
||||
printf("[error] can't open %s for reading.\n", argv[1]);
|
||||
perror("[main]");
|
||||
return 2;
|
||||
}
|
||||
while (!feof(fd) && !ferror(fd)) {
|
||||
code = realloc(code, code_size + chunk_size);
|
||||
bytes_read = fread(code+code_size, 1, chunk_size, fd);
|
||||
code_size += bytes_read;
|
||||
}
|
||||
fclose(fd);
|
||||
|
||||
printf("Writing x86 ELF binary to %s...\n", argv[3]);
|
||||
result = elf_write(argv[3], code, code_size);
|
||||
if (result < 0) {
|
||||
printf("[error] elf_write failed.\n");
|
||||
return 3;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
12
lea.asm
Normal file
12
lea.asm
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
BITS 32
|
||||
|
||||
lea eax, [ebx+ecx*4]
|
||||
lea ebx, [eax+ecx*4]
|
||||
lea eax, [ecx+ebx*4]
|
||||
lea eax, [ecx+ebx*8]
|
||||
lea eax, [ecx+ebx]
|
||||
lea eax, [0x1000+10*4]
|
||||
lea eax, [eax]
|
||||
lea eax, [ecx]
|
||||
lea ecx, [eax]
|
||||
lea eax, [0xdeadbeef]
|
||||
4
min.asm
Normal file
4
min.asm
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
BITS 32
|
||||
mov ebx,0
|
||||
mov eax,1
|
||||
int 0x80
|
||||
1
min.code
Normal file
1
min.code
Normal file
|
|
@ -0,0 +1 @@
|
|||
a=0
|
||||
4
min2.asm
Normal file
4
min2.asm
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
BITS 32
|
||||
mov ebx,eax
|
||||
mov eax,1
|
||||
int 0x80
|
||||
89
mov.asm
Normal file
89
mov.asm
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
BITS 32
|
||||
|
||||
;;; 00000000 b8 78 56 34 12 b9 78 56 34 12 ba 78 56 34 12 bb |.xV4..xV4..xV4..|
|
||||
;;; 00000010 78 56 34 12 89 c0 89 c8 89 d0 89 d8 89 c1 89 c9 |xV4.............|
|
||||
;;; 00000020 89 d1 89 d9 89 c2 89 ca 89 d2 89 da 89 c3 89 cb |................|
|
||||
;;; 00000030 89 d3 89 db a1 ef be ad de 8b 0d ef be ad de 8b |................|
|
||||
;;; 00000040 15 ef be ad de 8b 1d ef be ad de a3 ef be ad de |................|
|
||||
;;; 00000050 89 0d ef be ad de 89 15 ef be ad de 89 1d ef be |................|
|
||||
;;; 00000060 ad de 8b 00 8b 01 8b 02 8b 03 8b 08 8b 09 8b 0a |................|
|
||||
;;; 00000070 8b 0b 8b 10 8b 11 8b 12 8b 13 8b 18 8b 19 8b 1a |................|
|
||||
;;; 00000080 8b 1b 89 00 89 01 89 02 89 03 89 08 89 09 89 0a |................|
|
||||
;;; 00000090 89 0b 89 10 89 11 89 12 89 13 89 18 89 19 89 1a |................|
|
||||
;;; 000000a0 89 1b |..|
|
||||
;;; 000000a2
|
||||
|
||||
mov eax, 0x12345678 ; b8 78 56 34 12
|
||||
mov ecx, 0x12345678 ; b9 78 56 34 12
|
||||
mov edx, 0x12345678 ; ba 78 56 34 12
|
||||
mov ebx, 0x12345678 ; bb 78 56 34 12
|
||||
|
||||
mov eax, eax ; 89 c0
|
||||
mov eax, ecx ; 89 c8
|
||||
mov eax, edx ; 89 d0
|
||||
mov eax, ebx ; 89 d8
|
||||
|
||||
mov ecx, eax ; 89 c1
|
||||
mov ecx, ecx ; 89 c9
|
||||
mov ecx, edx ; 89 d1
|
||||
mov ecx, ebx ; 89 d9
|
||||
|
||||
mov edx, eax ; 89 c2
|
||||
mov edx, ecx ; 89 ca
|
||||
mov edx, edx ; 89 d2
|
||||
mov edx, ebx ; 89 da
|
||||
|
||||
mov ebx, eax ; 89 c3
|
||||
mov ebx, ecx ; 89 cb
|
||||
mov ebx, edx ; 89 d3
|
||||
mov ebx, ebx ; 89 db
|
||||
|
||||
mov eax, dword [0xdeadbeef] ; a1 ef be ad de
|
||||
mov ecx, dword [0xdeadbeef] ; 8b 0e ef be ad de
|
||||
mov edx, dword [0xdeadbeef] ; 8b 16 ef be ad de
|
||||
mov ebx, dword [0xdeadbeef] ; 8b 1e ef be ad de
|
||||
|
||||
mov [0xdeadbeef], eax ; a3 ef be ad de
|
||||
mov [0xdeadbeef], ecx ; 89 0e ef be ad de
|
||||
mov [0xdeadbeef], edx ; 89 16 ef be ad de
|
||||
mov [0xdeadbeef], ebx ; 89 1e ef be ad de
|
||||
|
||||
mov eax, dword [eax] ; 8b 00
|
||||
mov eax, dword [ecx] ; 8b 01
|
||||
mov eax, dword [edx] ; 8b 02
|
||||
mov eax, dword [ebx] ; 8b 03
|
||||
|
||||
mov ecx, dword [eax] ; 8b 08
|
||||
mov ecx, dword [ecx] ; 8b 09
|
||||
mov ecx, dword [edx] ; 8b 0a
|
||||
mov ecx, dword [ebx] ; 8b 0b
|
||||
|
||||
mov edx, dword [eax] ; 8b 10
|
||||
mov edx, dword [ecx] ; 8b 11
|
||||
mov edx, dword [edx] ; 8b 12
|
||||
mov edx, dword [ebx] ; 8b 13
|
||||
|
||||
mov ebx, dword [eax] ; 8b 18
|
||||
mov ebx, dword [ecx] ; 8b 19
|
||||
mov ebx, dword [edx] ; 8b 1a
|
||||
mov ebx, dword [ebx] ; 8b 1b
|
||||
|
||||
mov [eax], eax ; 89 00
|
||||
mov [ecx], eax ; 89 01
|
||||
mov [edx], eax ; 89 02
|
||||
mov [ebx], eax ; 89 03
|
||||
|
||||
mov [eax], ecx ; 89 08
|
||||
mov [ecx], ecx ; 89 09
|
||||
mov [edx], ecx ; 89 0a
|
||||
mov [ebx], ecx ; 89 0b
|
||||
|
||||
mov [eax], edx ; 89 10
|
||||
mov [ecx], edx ; 89 11
|
||||
mov [edx], edx ; 89 12
|
||||
mov [ebx], edx ; 89 13
|
||||
|
||||
mov [eax], ebx ; 89 18
|
||||
mov [ecx], ebx ; 89 19
|
||||
mov [edx], ebx ; 89 1a
|
||||
mov [ebx], ebx ; 89 1b
|
||||
25
opcode.rb
Normal file
25
opcode.rb
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
class OpCode
|
||||
Attrs = [:prefix, :op, :modrm, :sib, :extra]
|
||||
attr_accessor *Attrs
|
||||
|
||||
def initialize(attrs)
|
||||
Attrs.each do |attr|
|
||||
send("#{attr}=", attrs[attr])
|
||||
end
|
||||
end
|
||||
|
||||
def size
|
||||
Attrs.inject(0) {|sum, attr|
|
||||
iv = instance_variable_get("@#{attr}")
|
||||
if iv.is_a?(Enumerable)
|
||||
sum + iv.size
|
||||
else
|
||||
sum + 1
|
||||
end
|
||||
}
|
||||
end
|
||||
|
||||
def binary
|
||||
Attrs.map {|attr| send(attr)}.flatten.pack('c*')
|
||||
end
|
||||
end
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
BITS 32
|
||||
GLOBAL _start
|
||||
SECTION .text
|
||||
_start:
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
BITS 32
|
||||
GLOBAL _start
|
||||
SECTION .data
|
||||
{data}
|
||||
|
|
|
|||
47
test.rb
Normal file
47
test.rb
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
require 'compiler'
|
||||
require 'stringio'
|
||||
|
||||
X86_exit = [0x89, 0xc3, # mov ebx, eax (exit code)
|
||||
0xb8, 1, 0, 0, 0, # mov eax, 1
|
||||
0xcd, 0x80 # int 0x80
|
||||
].pack('c*')
|
||||
|
||||
def error(msg) STDERR.puts(msg) end
|
||||
|
||||
def parse(input)
|
||||
compiler = Compiler.new(input)
|
||||
compiler.parse # tuple of [data, bss, code, binary]
|
||||
|
||||
rescue ParseError => e
|
||||
error("[error] #{e.message}")
|
||||
error("[context] #{e.context}")
|
||||
# error("Aborting!")
|
||||
error(e.caller)
|
||||
exit(1)
|
||||
end
|
||||
|
||||
def interpolate(template, data)
|
||||
data.inject(template) do |template, mapping|
|
||||
token, replacement = *mapping
|
||||
template.sub("{#{token}}", replacement)
|
||||
end
|
||||
end
|
||||
|
||||
def main(arg)
|
||||
input = if File.readable?(arg)
|
||||
File.open(arg)
|
||||
else
|
||||
# StringIO.new("5*(3-5)*2+2-9/3-8/2-4*(5+5+5)\n")
|
||||
StringIO.new("abc=999\nabc-888\n")
|
||||
end
|
||||
data, bss, code, binary = *parse(input)
|
||||
template = File.read("template.asm")
|
||||
asm = interpolate(template, :data => data, :bss => bss, :code => code)
|
||||
File.open("test.asm", "w") { |f| f.puts(asm) }
|
||||
File.open("test.bin", "wb") { |f|
|
||||
f.write(binary)
|
||||
f.write(X86_exit)
|
||||
}
|
||||
end
|
||||
|
||||
main(ARGV[0].to_s)
|
||||
11
x86.txt
Normal file
11
x86.txt
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
mov (0x66) {
|
||||
reg32, reg32 (0x89) {
|
||||
op2 - src
|
||||
|
||||
eax ecx edx ebx
|
||||
op1 eax c0 c8 d0 d8
|
||||
dest ecx c1 c9 d1 d9
|
||||
edx c2 ca d2 da
|
||||
ebx c3 cb d3 db
|
||||
}
|
||||
}
|
||||
Loading…
Reference in a new issue