mirror of
https://github.com/samsonjs/compiler.git
synced 2026-04-27 14:57:45 +00:00
[NEW] First hints of cross-platform support. Compiles to Mach-O on Darwin with nasm and gcc.
There is no binary assembler support for Darwin yet! I'm not sure when I will dive into the details of generating a Mach-O binary from Ruby or C. [MERGED] Binary assembler support. It *should* work on ELF but it needs testing on Linux.
This commit is contained in:
parent
76d4d2be3a
commit
a4506bab10
16 changed files with 437 additions and 407 deletions
42
asm/asm.rb
42
asm/asm.rb
|
|
@ -6,4 +6,46 @@
|
||||||
# may 2009
|
# may 2009
|
||||||
|
|
||||||
module Assembler
|
module Assembler
|
||||||
|
|
||||||
|
# Abstract class for common functionality between different code
|
||||||
|
# generators. Also defines somewhat of an interface that must be
|
||||||
|
# implemented to be useful.
|
||||||
|
class AssemblerBase
|
||||||
|
|
||||||
|
def initialize(*args)
|
||||||
|
@vars = {} # Symbol table, maps names to locations in BSS.
|
||||||
|
@num_labels = 0 # Used to generate unique labels.
|
||||||
|
@num_labels_with_suffix = Hash.new(0)
|
||||||
|
|
||||||
|
# Maps names to locations.
|
||||||
|
@labels = Hash.new {|h, key| raise "undefined label: #{key}"}
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
def block(*args, &blk)
|
||||||
|
instance_eval(&blk)
|
||||||
|
end
|
||||||
|
|
||||||
|
def output
|
||||||
|
raise "#{self.class} is supposed to implement this method!"
|
||||||
|
end
|
||||||
|
|
||||||
|
def var(name)
|
||||||
|
@vars[name]
|
||||||
|
end
|
||||||
|
alias_method :var?, :var
|
||||||
|
|
||||||
|
# Generate a unique label.
|
||||||
|
def label(suffix=nil)
|
||||||
|
@num_labels += 1
|
||||||
|
if suffix
|
||||||
|
@num_labels_with_suffix[suffix] += 1
|
||||||
|
suffix = "_#{suffix}_#{@num_labels_with_suffix[suffix]}"
|
||||||
|
end
|
||||||
|
name = "L#{sprintf "%06d", @num_labels}#{suffix}"
|
||||||
|
return name
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
|
||||||
105
asm/binary.rb
105
asm/binary.rb
|
|
@ -5,13 +5,18 @@
|
||||||
# sjs
|
# sjs
|
||||||
# may 2009
|
# may 2009
|
||||||
|
|
||||||
|
ROOT = __FILE__.sub(/\/asm\/binary\.rb$/, '') unless defined? ROOT
|
||||||
|
$LOAD_PATH << ROOT unless $LOAD_PATH.include?(ROOT)
|
||||||
|
|
||||||
|
require 'asm/asm'
|
||||||
|
|
||||||
module Assembler
|
module Assembler
|
||||||
|
|
||||||
# Define a method named `emit_byte` and one named `binary_size` and
|
# Define a method named `emit_byte` and one named `binary_size` and
|
||||||
# include this module. Calling the assembler methods will output
|
# include this module. Calling the assembler methods will output
|
||||||
# x86 machine code ... hopefully. So far it's incomplete and
|
# x86 machine code ... hopefully. So far it's incomplete and
|
||||||
# binaries just segfault.
|
# binaries just segfault.
|
||||||
class Binary
|
class Binary < AssemblerBase
|
||||||
|
|
||||||
# This structure allows for x86 registers of all sizes. The
|
# This structure allows for x86 registers of all sizes. The
|
||||||
# number of the register is the index of the array in which it was
|
# number of the register is the index of the array in which it was
|
||||||
|
|
@ -44,7 +49,72 @@ module Assembler
|
||||||
MaxUnsigned = 2**MachineBits - 1
|
MaxUnsigned = 2**MachineBits - 1
|
||||||
SignedRange = MinSigned..MaxSigned
|
SignedRange = MinSigned..MaxSigned
|
||||||
|
|
||||||
|
X86_exit = {
|
||||||
|
'linux' => [0x89, 0xc3, # mov ebx, eax (exit code)
|
||||||
|
0xb8, 1, 0, 0, 0, # mov eax, 1
|
||||||
|
0xcd, 0x80 # int 0x80
|
||||||
|
].pack('c*'),
|
||||||
|
|
||||||
|
'darwin' => [0x50, # push eax (exit code)
|
||||||
|
0xb8, 1, 0, 0, 0, # mov eax, 1
|
||||||
|
0xcd, 0x80 # int 0x80
|
||||||
|
].pack('c*')
|
||||||
|
}
|
||||||
|
|
||||||
|
def initialize(platform='linux', binformat='elf')
|
||||||
|
super
|
||||||
|
@binary = [] # Byte array of machine code.
|
||||||
|
@platform = platform
|
||||||
|
@binformat = binformat
|
||||||
|
init_sections
|
||||||
|
end
|
||||||
|
|
||||||
|
def init_sections
|
||||||
|
case @platform
|
||||||
|
|
||||||
|
when 'linux'
|
||||||
|
raise "unsupported" unless @binformat == 'elf'
|
||||||
|
@header_size = 0x100 # ELF, Linux
|
||||||
|
@text_offset = 0x08048000 + @header_size # Offset of text section in memory
|
||||||
|
|
||||||
|
when 'darwin'
|
||||||
|
raise "unsupported" unless @binformat == 'macho'
|
||||||
|
@header_size = 0x100 # Mach-O, Darwin
|
||||||
|
@text_offset = 0x08048000 + @header_size # Offset of text section in memory
|
||||||
|
else
|
||||||
|
raise "unsupported platform: #{platform}"
|
||||||
|
end
|
||||||
|
@text_size = 0x02be00 # Size of text section.
|
||||||
|
@data_offset = @text_offset + @text_size # Offset of data section.
|
||||||
|
@data_size = 0x4e00 # Size of data section.
|
||||||
|
@bss_offset = @data_offset + @data_size # Offset of bss section.
|
||||||
|
@bss_size = 0 # Size of bss section.
|
||||||
|
end
|
||||||
|
|
||||||
|
def output
|
||||||
|
@binary.pack('c*') + X86_exit[@platform]
|
||||||
|
end
|
||||||
|
|
||||||
|
# Define a constant in the .data section.
|
||||||
|
def const(name, value)
|
||||||
|
raise "unimplemented!"
|
||||||
|
end
|
||||||
|
|
||||||
|
# Define a variable with the given name and size (in dwords).
|
||||||
|
def defvar(name, dwords=1)
|
||||||
|
unless var?(name)
|
||||||
|
@vars[name] = @bss_size
|
||||||
|
@bss_size += dwords
|
||||||
|
else
|
||||||
|
STDERR.puts "[warning] attempted to redefine #{name}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def label(suffix=nil)
|
||||||
|
name = super
|
||||||
|
@labels[name] = bytes_written
|
||||||
|
return name
|
||||||
|
end
|
||||||
|
|
||||||
# Count the bytes that were encoded in the given block.
|
# Count the bytes that were encoded in the given block.
|
||||||
def asm
|
def asm
|
||||||
|
|
@ -56,7 +126,18 @@ module Assembler
|
||||||
# return the number of bytes written
|
# return the number of bytes written
|
||||||
bytes_written - instruction_offset
|
bytes_written - instruction_offset
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def emit_byte(byte)
|
||||||
|
@binary << byte
|
||||||
|
end
|
||||||
|
|
||||||
|
def bytes_written
|
||||||
|
@binary.size
|
||||||
|
end
|
||||||
|
|
||||||
|
def emit_label(name=label)
|
||||||
|
@labels[name] = @binary.length
|
||||||
|
end
|
||||||
|
|
||||||
def emit_dword(num)
|
def emit_dword(num)
|
||||||
num_to_quad(num).each {|byte| emit_byte(byte)}
|
num_to_quad(num).each {|byte| emit_byte(byte)}
|
||||||
|
|
@ -119,7 +200,7 @@ module Assembler
|
||||||
end
|
end
|
||||||
|
|
||||||
def regnum(op)
|
def regnum(op)
|
||||||
num = register?
|
num = register?(op)
|
||||||
raise "not a register: #{op.inspect}" unless num
|
raise "not a register: #{op.inspect}" unless num
|
||||||
num
|
num
|
||||||
end
|
end
|
||||||
|
|
@ -146,7 +227,7 @@ module Assembler
|
||||||
# 3. mov r/m32, reg32 (0x89, mod r/m, maybe sib)
|
# 3. mov r/m32, reg32 (0x89, mod r/m, maybe sib)
|
||||||
# 3a. mov memoffset32, eax (0xa3, disp32)
|
# 3a. mov memoffset32, eax (0xa3, disp32)
|
||||||
# 4. mov r/m32, immediate32 (0xc7, mod r/m, maybe sib, imm32)
|
# 4. mov r/m32, immediate32 (0xc7, mod r/m, maybe sib, imm32)
|
||||||
def x86_mov(dest, src)
|
def mov(dest, src)
|
||||||
dest = dest[6..-1] if dest.is_a?(String) && dest[0..5] == 'dword '
|
dest = dest[6..-1] if dest.is_a?(String) && dest[0..5] == 'dword '
|
||||||
src = src[6..-1] if src.is_a?(String) && src[0..5] == 'dword '
|
src = src[6..-1] if src.is_a?(String) && src[0..5] == 'dword '
|
||||||
|
|
||||||
|
|
@ -199,35 +280,35 @@ module Assembler
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
def x86_add(dest, src)
|
def add(dest, src)
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_sub(dest, src)
|
def sub(dest, src)
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_imul(op)
|
def imul(op)
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_idiv(op)
|
def idiv(op)
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_inc(op)
|
def inc(op)
|
||||||
asm do
|
asm do
|
||||||
if register?(op)
|
if register?(op)
|
||||||
emit_byte(0x40 + regnum(op))
|
emit_byte(0x40 + regnum(op))
|
||||||
elsif rm32?(op)
|
elsif rm32?(op)
|
||||||
emit_byte(0xff)
|
emit_byte(0xff)
|
||||||
emit_modrm(...)
|
# emit_modrm(...)
|
||||||
else
|
else
|
||||||
raise "unsupported op #{op}, wanted r32 or r/m32"
|
raise "unsupported op #{op}, wanted r32 or r/m32"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_push(reg)
|
def push(reg)
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_cmp(a, b)
|
def cmp(a, b)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
117
asm/text.rb
117
asm/text.rb
|
|
@ -3,123 +3,166 @@
|
||||||
# sjs
|
# sjs
|
||||||
# may 2009
|
# may 2009
|
||||||
|
|
||||||
|
ROOT = __FILE__.sub(/\/asm\/text\.rb$/, '') unless defined? ROOT
|
||||||
|
$LOAD_PATH << ROOT unless $LOAD_PATH.include?(ROOT)
|
||||||
|
|
||||||
|
require 'asm/asm'
|
||||||
|
|
||||||
module Assembler
|
module Assembler
|
||||||
|
|
||||||
# Define a method named `emit` and include this module. Calling
|
# Assembler methods output nasm-friendly x86 asm code, line by
|
||||||
# the assembler methods will output nasm-friendly x86 asm code,
|
# line. This is dead easy and we can trust nasm to compile
|
||||||
# line by line. This is dead easy and we can trust nasm to
|
# correct machine code, which isn't trivial.
|
||||||
# compile correct machine code, which is tricky.
|
class Text < AssemblerBase
|
||||||
module Text
|
|
||||||
|
|
||||||
def self.included(other)
|
def initialize(platform='linux')
|
||||||
im = other.instance_methods
|
super
|
||||||
unless im.include?(:emit)
|
@data = ''
|
||||||
raise "#{self.name} requires the including class define the emit method"
|
@bss = ''
|
||||||
|
@code = ''
|
||||||
|
@templatefile = "#{ROOT}/template.#{platform}.asm"
|
||||||
|
raise "unsupported platform: #{platform}" unless File.readable?(@templatefile)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Define a constant in the .data section.
|
||||||
|
def const(name, value)
|
||||||
|
@data << "#{name}\tequ #{value}"
|
||||||
|
end
|
||||||
|
|
||||||
|
# Define a variable with the given name and size (in dwords).
|
||||||
|
def defvar(name, dwords=1)
|
||||||
|
unless var?(name)
|
||||||
|
@bss << "#{name}: resd #{dwords}\n"
|
||||||
|
@vars[name] = name
|
||||||
|
else
|
||||||
|
STDERR.puts "[warning] attempted to redefine #{name}"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Emit a line of code wrapped between a tab and a newline.
|
||||||
def x86_mov(dest, src)
|
def emit(code, options={})
|
||||||
emit("mov #{dest}, #{src.is_a?(Numeric) ? "0x#{src.to_s(16)}" : src}")
|
tab = options.has_key?(:tab) ? options[:tab] : "\t"
|
||||||
|
@code << "#{tab}#{code}\n"
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_movzx(dest, src)
|
def label(suffix=nil)
|
||||||
|
name = super
|
||||||
|
@labels[name] = name
|
||||||
|
return name
|
||||||
|
end
|
||||||
|
|
||||||
|
def output
|
||||||
|
File.read(@templatefile).
|
||||||
|
sub("{data}", @data).
|
||||||
|
sub("{bss}", @bss).
|
||||||
|
sub("{code}", @code)
|
||||||
|
end
|
||||||
|
|
||||||
|
def emit_label(name=label)
|
||||||
|
emit("#{name}:", :tab => nil)
|
||||||
|
end
|
||||||
|
|
||||||
|
def mov(dest, src)
|
||||||
|
emit("mov #{dest}, #{src}#{src.is_a?(Numeric) ? " ; 0x#{src.to_s(16)}" : ''}")
|
||||||
|
end
|
||||||
|
|
||||||
|
def movzx(dest, src)
|
||||||
emit("movzx #{dest}, #{src}")
|
emit("movzx #{dest}, #{src}")
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_add(dest, src)
|
def add(dest, src)
|
||||||
emit("add #{dest}, #{src}")
|
emit("add #{dest}, #{src}")
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_sub(dest, src)
|
def sub(dest, src)
|
||||||
emit("sub #{dest}, #{src}")
|
emit("sub #{dest}, #{src}")
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_imul(op)
|
def imul(op)
|
||||||
emit("imul #{op}")
|
emit("imul #{op}")
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_idiv(op)
|
def idiv(op)
|
||||||
emit("idiv #{op}")
|
emit("idiv #{op}")
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_inc(op)
|
def inc(op)
|
||||||
emit("inc #{op}")
|
emit("inc #{op}")
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_dec(op)
|
def dec(op)
|
||||||
emit("dec #{op}")
|
emit("dec #{op}")
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_push(reg)
|
def push(reg)
|
||||||
emit("push #{reg}")
|
emit("push #{reg}")
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_pop(reg)
|
def pop(reg)
|
||||||
emit("pop #{reg}")
|
emit("pop #{reg}")
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_call(label)
|
def call(label)
|
||||||
emit("call #{label}")
|
emit("call #{label}")
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_neg(reg)
|
def neg(reg)
|
||||||
emit("neg #{reg}")
|
emit("neg #{reg}")
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_not(rm32)
|
def not(rm32)
|
||||||
emit("not #{rm32}")
|
emit("not #{rm32}")
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_xchg(op1, op2)
|
def xchg(op1, op2)
|
||||||
emit("xchg #{op1}, #{op2}")
|
emit("xchg #{op1}, #{op2}")
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_and(op1, op2)
|
def and_(op1, op2)
|
||||||
emit("and #{op1}, #{op2}")
|
emit("and #{op1}, #{op2}")
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_or(op1, op2)
|
def or(op1, op2)
|
||||||
emit("or #{op1}, #{op2}")
|
emit("or #{op1}, #{op2}")
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_xor(op1, op2)
|
def xor(op1, op2)
|
||||||
emit("xor #{op1}, #{op2}")
|
emit("xor #{op1}, #{op2}")
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_jz(label)
|
def jz(label)
|
||||||
emit("jz #{label}")
|
emit("jz #{label}")
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_jnz(label)
|
def jnz(label)
|
||||||
emit("jnz #{label}")
|
emit("jnz #{label}")
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_jmp(label)
|
def jmp(label)
|
||||||
emit("jmp #{label}")
|
emit("jmp #{label}")
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_jl(label)
|
def jl(label)
|
||||||
emit("jl #{label}")
|
emit("jl #{label}")
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_cmp(a, b)
|
def cmp(a, b)
|
||||||
emit("cmp #{a}, #{b}")
|
emit("cmp #{a}, #{b}")
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_lea(a, b)
|
def lea(a, b)
|
||||||
emit("lea #{a}, #{b}")
|
emit("lea #{a}, #{b}")
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_shr(a, b)
|
def shr(a, b)
|
||||||
emit("shr #{a}, #{b}")
|
emit("shr #{a}, #{b}")
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_loop(label)
|
def loop_(label)
|
||||||
emit("loop #{label}")
|
emit("loop #{label}")
|
||||||
end
|
end
|
||||||
|
|
||||||
def x86_int(num)
|
def int(num)
|
||||||
emit("int 0x#{num.to_s(16)}")
|
emit("int 0x#{num.to_s(16)}")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
||||||
84
build.rb
84
build.rb
|
|
@ -1,14 +1,8 @@
|
||||||
#!/usr/bin/env ruby
|
#!/usr/bin/env ruby
|
||||||
|
|
||||||
ROOT = __FILE__.sub(/\/build\.rb$/, '') unless defined? ROOT
|
|
||||||
|
|
||||||
require 'compiler'
|
require 'compiler'
|
||||||
|
require 'asm/text'
|
||||||
|
require 'asm/binary'
|
||||||
X86_exit = [0x89, 0xc3, # mov ebx, eax (exit code)
|
|
||||||
0xb8, 1, 0, 0, 0, # mov eax, 1
|
|
||||||
0xcd, 0x80 # int 0x80
|
|
||||||
].pack('c*')
|
|
||||||
|
|
||||||
def main
|
def main
|
||||||
filename = ARGV[0].to_s
|
filename = ARGV[0].to_s
|
||||||
|
|
@ -23,39 +17,21 @@ def base(filename)
|
||||||
filename.sub(/\.[^.]*$/, '')
|
filename.sub(/\.[^.]*$/, '')
|
||||||
end
|
end
|
||||||
|
|
||||||
def interpolate(templatefile, data)
|
|
||||||
template = File.read(templatefile)
|
|
||||||
data.inject(template) do |template, mapping|
|
|
||||||
token, replacement = *mapping
|
|
||||||
template.sub("{#{token}}", replacement)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# filename: input filename
|
# filename: input filename
|
||||||
# format: output format, nasm or binary
|
# asm: assembler to use
|
||||||
# returns: output filename
|
# returns: output filename
|
||||||
def compile(filename, format='asm')
|
def compile(filename, asm, binformat='elf')
|
||||||
|
|
||||||
# compile to asm or binary
|
|
||||||
output = nil
|
|
||||||
File.open(filename, 'r') do |input|
|
File.open(filename, 'r') do |input|
|
||||||
compiler = Compiler.new(input, format)
|
compiler = Compiler.new(input, asm, binformat)
|
||||||
output = compiler.compile
|
compiler.compile
|
||||||
end
|
end
|
||||||
if format == 'asm'
|
|
||||||
mode = 'w'
|
ext = asm.class.name.split('::').last[0,3].downcase == 'bin' ? 'bin' : 'asm'
|
||||||
data, bss, code = *output
|
outfile = "#{base(filename)}.#{ext}"
|
||||||
output = interpolate("#{ROOT}/template.asm",
|
File.open(outfile, 'wb') do |out|
|
||||||
:data => data, :bss => bss, :code => code)
|
out.puts(asm.output)
|
||||||
else
|
|
||||||
mode = 'wb'
|
|
||||||
output += X86_exit
|
|
||||||
end
|
|
||||||
outfile = "#{base(filename)}.#{format}"
|
|
||||||
File.open(outfile, mode) do |out|
|
|
||||||
if format == 'asm'
|
|
||||||
out.puts(output)
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
return outfile
|
return outfile
|
||||||
|
|
||||||
|
|
@ -68,33 +44,47 @@ rescue ParseError => e
|
||||||
end
|
end
|
||||||
|
|
||||||
# assemble using nasm, return resulting filename.
|
# assemble using nasm, return resulting filename.
|
||||||
def asm(filename)
|
def asm(filename, binformat='elf')
|
||||||
f = base(filename)
|
f = base(filename)
|
||||||
outfile = "#{f}.o"
|
outfile = "#{f}.o"
|
||||||
output = `nasm -f elf -g -o #{outfile} #{filename}`
|
output = `nasm -f #{binformat} -g -o #{outfile} #{filename}`
|
||||||
if $?.exitstatus != 0
|
if $?.exitstatus != 0
|
||||||
raise "nasm failed: #{$?.exitstatus}", output
|
puts output
|
||||||
|
raise "nasm failed: #{$?.exitstatus}"
|
||||||
end
|
end
|
||||||
return outfile
|
return outfile
|
||||||
end
|
end
|
||||||
|
|
||||||
# link with ld, return resulting filename.
|
# link with ld, return resulting filename.
|
||||||
def link(filename)
|
def link(filename, platform='linux')
|
||||||
f = base(filename)
|
f = base(filename)
|
||||||
output = `ld -o #{f} #{filename}`
|
cmd, args = *case platform
|
||||||
|
when 'darwin': ['gcc', '-arch i386']
|
||||||
|
when 'linux': ['ld', '']
|
||||||
|
else
|
||||||
|
raise "unsupported platform: #{platform}"
|
||||||
|
end
|
||||||
|
output = `#{cmd} #{args} -o #{f} #{filename}`
|
||||||
if $?.exitstatus != 0
|
if $?.exitstatus != 0
|
||||||
raise "ld failed: #{$?.exitstatus}", output
|
puts output
|
||||||
|
raise "ld failed: #{$?.exitstatus}"
|
||||||
end
|
end
|
||||||
`chmod +x #{f}`
|
`chmod +x #{f}`
|
||||||
return f
|
return f
|
||||||
end
|
end
|
||||||
|
|
||||||
def build(filename, format='asm')
|
# TODO Use a dependency injection framework for the assembler, and
|
||||||
if format == 'asm'
|
# other parts as things become more modular.
|
||||||
link( asm( compile(filename) ) )
|
def build(filename, platform='linux', format='asm', binformat='elf')
|
||||||
else # binary
|
bin = if format == 'asm'
|
||||||
link( compile(filename, format='bin') )
|
code = compile(filename, Assembler::Text.new(platform))
|
||||||
end
|
obj = asm( code, binformat )
|
||||||
|
link( obj, platform )
|
||||||
|
else # binary
|
||||||
|
obj = compile(filename, Assembler::Binary.new(platform), binformat)
|
||||||
|
link( obj, platform )
|
||||||
|
end
|
||||||
|
return bin
|
||||||
end
|
end
|
||||||
|
|
||||||
def run(filename)
|
def run(filename)
|
||||||
|
|
|
||||||
338
compiler.rb
338
compiler.rb
|
|
@ -12,9 +12,6 @@
|
||||||
# require 'rubygems'
|
# require 'rubygems'
|
||||||
# require 'unroller'
|
# require 'unroller'
|
||||||
|
|
||||||
require 'asm'
|
|
||||||
require 'opcode'
|
|
||||||
|
|
||||||
class ParseError < StandardError
|
class ParseError < StandardError
|
||||||
attr_reader :caller, :context
|
attr_reader :caller, :context
|
||||||
def initialize(caller, context=nil)
|
def initialize(caller, context=nil)
|
||||||
|
|
@ -24,43 +21,27 @@ class ParseError < StandardError
|
||||||
end
|
end
|
||||||
|
|
||||||
class Compiler
|
class Compiler
|
||||||
# This module uses our `emit_byte` method to output x86 machine code
|
|
||||||
# directly using the assembler library.
|
|
||||||
# include Assembler::Binary
|
|
||||||
|
|
||||||
Keywords = %w[
|
Keywords = %w[
|
||||||
if else end while until repeat for to do break
|
if else end while until repeat for to do break
|
||||||
print
|
print
|
||||||
]
|
]
|
||||||
|
|
||||||
attr_reader :data, :bss, :code
|
attr_reader :asm
|
||||||
|
|
||||||
def initialize(input, asm=Assembler::Text.new)
|
def initialize(input, asm, binformat='elf')
|
||||||
# XXX for development only!
|
# XXX for development only!
|
||||||
@indent = 0 # for pretty printing
|
@indent = 0 # for pretty printing
|
||||||
|
|
||||||
|
# The only binary format our assembler knows right now is ELF.
|
||||||
|
unless binformat == 'elf'
|
||||||
|
raise "Only ELF is supported. Unsupported binary format: #{binformat}."
|
||||||
|
end
|
||||||
|
|
||||||
@look = '' # Next lookahead char.
|
@look = '' # Next lookahead char.
|
||||||
@token = nil # Type of last read token.
|
@token = nil # Type of last read token.
|
||||||
@value = nil # Value of last read token.
|
@value = nil # Value of last read token.
|
||||||
@input = input # Stream to read from.
|
@input = input # Stream to read from.
|
||||||
@data = '' # Data section.
|
|
||||||
@bss = '' # BSS section.
|
|
||||||
@code = '' # Code section.
|
|
||||||
@binary = [] # Byte array of machine code.
|
|
||||||
@vars = {} # Symbol table, maps names to locations in BSS.
|
|
||||||
@num_labels = 0 # Used to generate unique labels.
|
|
||||||
@num_labels_with_suffix = Hash.new(0)
|
|
||||||
|
|
||||||
@header_size = 0x100 # ELF, Linux, x86
|
|
||||||
@text_offset = 0x08048000 + @header_size # Offset of text section in memory (Linux, x86).
|
|
||||||
@text_size = 0x02be00 # Size of text section.
|
|
||||||
@data_offset = @text_offset + @text_size # Offset of data section.
|
|
||||||
@data_size = 0x4e00 # Size of data section.
|
|
||||||
@bss_offset = @data_offset + @data_size # Offset of bss section.
|
|
||||||
@bss_size = 0 # Size of bss section.
|
|
||||||
|
|
||||||
# Labels for the assembler. Maps names to locations.
|
|
||||||
@labels = Hash.new {|h, key| raise "undefined label: #{key}"}
|
|
||||||
|
|
||||||
@asm = asm
|
@asm = asm
|
||||||
|
|
||||||
|
|
@ -68,10 +49,6 @@ class Compiler
|
||||||
get_char
|
get_char
|
||||||
end
|
end
|
||||||
|
|
||||||
def asm
|
|
||||||
@asm
|
|
||||||
end
|
|
||||||
|
|
||||||
def compile
|
def compile
|
||||||
block
|
block
|
||||||
expected(:'end of file') unless eof?
|
expected(:'end of file') unless eof?
|
||||||
|
|
@ -107,10 +84,10 @@ class Compiler
|
||||||
match('(')
|
match('(')
|
||||||
# TODO arg list
|
# TODO arg list
|
||||||
match(')')
|
match(')')
|
||||||
x86_call(name)
|
asm.call(name)
|
||||||
else
|
else
|
||||||
# variable access
|
# variable access
|
||||||
x86_mov(:eax, "dword [#{name}]")
|
asm.mov(:eax, "dword [#{name}]")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
@ -123,7 +100,7 @@ class Compiler
|
||||||
elsif alpha?(@look)
|
elsif alpha?(@look)
|
||||||
identifier # or call
|
identifier # or call
|
||||||
elsif digit?(@look)
|
elsif digit?(@look)
|
||||||
x86_mov(:eax, get_number.to_i)
|
asm.mov(:eax, get_number.to_i)
|
||||||
else
|
else
|
||||||
expected(:'integer, identifier, function call, or parenthesized expression', :got => @look)
|
expected(:'integer, identifier, function call, or parenthesized expression', :got => @look)
|
||||||
end
|
end
|
||||||
|
|
@ -134,7 +111,7 @@ class Compiler
|
||||||
sign = @look
|
sign = @look
|
||||||
match(sign) if op?(:unary, sign)
|
match(sign) if op?(:unary, sign)
|
||||||
factor
|
factor
|
||||||
x86_neg(:eax) if sign == '-'
|
asm.neg(:eax) if sign == '-'
|
||||||
end
|
end
|
||||||
|
|
||||||
# Parse and translate a single term (factor or mulop). Result is in
|
# Parse and translate a single term (factor or mulop). Result is in
|
||||||
|
|
@ -172,7 +149,7 @@ class Compiler
|
||||||
def add
|
def add
|
||||||
match('+')
|
match('+')
|
||||||
term # Result is in eax.
|
term # Result is in eax.
|
||||||
x86_add(:eax, '[esp]') # Add a to b.
|
asm.add(:eax, '[esp]') # Add a to b.
|
||||||
end
|
end
|
||||||
|
|
||||||
# Parse a subtraction operator and the 2nd term (b). The result is
|
# Parse a subtraction operator and the 2nd term (b). The result is
|
||||||
|
|
@ -180,8 +157,8 @@ class Compiler
|
||||||
def subtract
|
def subtract
|
||||||
match('-')
|
match('-')
|
||||||
term # Result, b, is in eax.
|
term # Result, b, is in eax.
|
||||||
x86_neg(:eax) # Fake the subtraction. a - b == a + -b
|
asm.neg(:eax) # Fake the subtraction. a - b == a + -b
|
||||||
x86_add(:eax, '[esp]') # Add a and -b.
|
asm.add(:eax, '[esp]') # Add a and -b.
|
||||||
end
|
end
|
||||||
|
|
||||||
# Parse an addition operator and the 2nd term (b). The result is
|
# Parse an addition operator and the 2nd term (b). The result is
|
||||||
|
|
@ -189,7 +166,7 @@ class Compiler
|
||||||
def multiply
|
def multiply
|
||||||
match('*')
|
match('*')
|
||||||
signed_factor # Result is in eax.
|
signed_factor # Result is in eax.
|
||||||
x86_imul('dword [esp]') # Multiply a by b.
|
asm.imul('dword [esp]') # Multiply a by b.
|
||||||
end
|
end
|
||||||
|
|
||||||
# Parse a division operator and the divisor (b). The result is
|
# Parse a division operator and the divisor (b). The result is
|
||||||
|
|
@ -197,14 +174,14 @@ class Compiler
|
||||||
def divide
|
def divide
|
||||||
match('/')
|
match('/')
|
||||||
signed_factor # Result is in eax.
|
signed_factor # Result is in eax.
|
||||||
x86_xchg(:eax, '[esp]') # Swap the divisor and dividend into
|
asm.xchg(:eax, '[esp]') # Swap the divisor and dividend into
|
||||||
# the correct places.
|
# the correct places.
|
||||||
|
|
||||||
# idiv uses edx:eax as the dividend so we need to ensure that edx
|
# idiv uses edx:eax as the dividend so we need to ensure that edx
|
||||||
# is correctly sign-extended w.r.t. eax.
|
# is correctly sign-extended w.r.t. eax.
|
||||||
emit('cdq') # Sign-extend eax into edx (Convert Double to
|
asm.cdq # Sign-extend eax into edx (Convert Double to
|
||||||
# Quad).
|
# Quad).
|
||||||
x86_idiv('dword [esp]') # Divide a (eax) by b ([esp]).
|
asm.idiv('dword [esp]') # Divide a (eax) by b ([esp]).
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -215,19 +192,19 @@ class Compiler
|
||||||
def bitor_expr
|
def bitor_expr
|
||||||
match('|')
|
match('|')
|
||||||
term
|
term
|
||||||
x86_or(:eax, '[esp]')
|
asm.or(:eax, '[esp]')
|
||||||
end
|
end
|
||||||
|
|
||||||
def bitand_expr
|
def bitand_expr
|
||||||
match('&')
|
match('&')
|
||||||
signed_factor
|
signed_factor
|
||||||
x86_and(:eax, '[esp]')
|
asm.and_(:eax, '[esp]')
|
||||||
end
|
end
|
||||||
|
|
||||||
def xor_expr
|
def xor_expr
|
||||||
match('^')
|
match('^')
|
||||||
term
|
term
|
||||||
x86_xor(:eax, '[esp]')
|
asm.xor(:eax, '[esp]')
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -240,6 +217,7 @@ class Compiler
|
||||||
while @look == '|'
|
while @look == '|'
|
||||||
op '||' do
|
op '||' do
|
||||||
boolean_term
|
boolean_term
|
||||||
|
# !!! this method has moved, IMPLEMENT THIS!
|
||||||
emit("<logical or>")
|
emit("<logical or>")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
@ -250,6 +228,7 @@ class Compiler
|
||||||
while @look == '&'
|
while @look == '&'
|
||||||
op '&&' do
|
op '&&' do
|
||||||
not_factor
|
not_factor
|
||||||
|
# !!! this method has moved, IMPLEMENT THIS!
|
||||||
emit("<logical and>")
|
emit("<logical and>")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
@ -258,9 +237,9 @@ class Compiler
|
||||||
def boolean_factor
|
def boolean_factor
|
||||||
if boolean?(@look)
|
if boolean?(@look)
|
||||||
if get_boolean == 'true'
|
if get_boolean == 'true'
|
||||||
x86_mov(:eax, -1)
|
asm.mov(:eax, -1)
|
||||||
else
|
else
|
||||||
x86_xor(:eax, :eax)
|
asm.xor(:eax, :eax)
|
||||||
end
|
end
|
||||||
scan
|
scan
|
||||||
else
|
else
|
||||||
|
|
@ -273,7 +252,7 @@ class Compiler
|
||||||
match('!')
|
match('!')
|
||||||
boolean_factor
|
boolean_factor
|
||||||
make_boolean(:eax) # ensure it is -1 or 0...
|
make_boolean(:eax) # ensure it is -1 or 0...
|
||||||
x86_not(:eax) # so that not is also boolean not
|
asm.not(:eax) # so that not is also boolean not
|
||||||
else
|
else
|
||||||
boolean_factor
|
boolean_factor
|
||||||
end
|
end
|
||||||
|
|
@ -282,11 +261,11 @@ class Compiler
|
||||||
# Convert any identifier to a boolean (-1 or 0). This is
|
# Convert any identifier to a boolean (-1 or 0). This is
|
||||||
# semantically equivalent to !!reg in C or Ruby.
|
# semantically equivalent to !!reg in C or Ruby.
|
||||||
def make_boolean(reg=:eax)
|
def make_boolean(reg=:eax)
|
||||||
end_label = unique_label(:endmakebool)
|
end_label = asm.label(:endmakebool)
|
||||||
x86_cmp(reg, 0) # if false do nothing
|
asm.cmp(reg, 0) # if false do nothing
|
||||||
x86_jz(end_label)
|
asm.jz(end_label)
|
||||||
x86_mov(reg, -1) # truthy, make it true
|
asm.mov(reg, -1) # truthy, make it true
|
||||||
emit_label(end_label)
|
asm.emit_label(end_label)
|
||||||
end
|
end
|
||||||
|
|
||||||
def relation
|
def relation
|
||||||
|
|
@ -314,14 +293,14 @@ class Compiler
|
||||||
# and make_boolean will leave -1 (true) for us in eax.
|
# and make_boolean will leave -1 (true) for us in eax.
|
||||||
def neq_relation
|
def neq_relation
|
||||||
expression
|
expression
|
||||||
x86_sub(:eax, '[esp]')
|
asm.sub(:eax, '[esp]')
|
||||||
make_boolean
|
make_boolean
|
||||||
end
|
end
|
||||||
|
|
||||||
# Invert the != test for equal.
|
# Invert the != test for equal.
|
||||||
def eq_relation
|
def eq_relation
|
||||||
neq_relation
|
neq_relation
|
||||||
x86_not(:eax)
|
asm.not(:eax)
|
||||||
end
|
end
|
||||||
|
|
||||||
# > and < are both implemented in terms of jl (jump if less than).
|
# > and < are both implemented in terms of jl (jump if less than).
|
||||||
|
|
@ -337,20 +316,20 @@ class Compiler
|
||||||
# Invert the sense of the test?
|
# Invert the sense of the test?
|
||||||
invert = options[:invert]
|
invert = options[:invert]
|
||||||
|
|
||||||
true_label = unique_label(:cmp)
|
true_label = asm.label(:cmp)
|
||||||
end_label = unique_label(:endcmp)
|
end_label = asm.label(:endcmp)
|
||||||
x86_cmp(a, b)
|
asm.cmp(a, b)
|
||||||
x86_jl(true_label)
|
asm.jl(true_label)
|
||||||
|
|
||||||
x86_xor(:eax, :eax) # return false
|
asm.xor(:eax, :eax) # return false
|
||||||
x86_not(:eax) if invert # (or true if inverted)
|
asm.not(:eax) if invert # (or true if inverted)
|
||||||
x86_jmp(end_label)
|
asm.jmp(end_label)
|
||||||
|
|
||||||
emit_label(true_label)
|
asm.emit_label(true_label)
|
||||||
x86_xor(:eax, :eax) # return true
|
asm.xor(:eax, :eax) # return true
|
||||||
x86_not(:eax) unless invert # (or false if inverted)
|
asm.not(:eax) unless invert # (or false if inverted)
|
||||||
|
|
||||||
emit_label(end_label)
|
asm.emit_label(end_label)
|
||||||
end
|
end
|
||||||
|
|
||||||
# a: [esp]
|
# a: [esp]
|
||||||
|
|
@ -401,8 +380,8 @@ class Compiler
|
||||||
name = @value
|
name = @value
|
||||||
match('=')
|
match('=')
|
||||||
boolean_expression
|
boolean_expression
|
||||||
defvar(name) unless var?(name)
|
asm.defvar(name) unless asm.var?(name)
|
||||||
x86_mov("dword [#{name}]", :eax)
|
asm.mov("dword [#{name}]", :eax)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Parse a code block.
|
# Parse a code block.
|
||||||
|
|
@ -439,26 +418,26 @@ class Compiler
|
||||||
|
|
||||||
# Parse an if-else statement.
|
# Parse an if-else statement.
|
||||||
def if_else_stmt(label)
|
def if_else_stmt(label)
|
||||||
else_label = unique_label(:end_or_else)
|
else_label = asm.label(:end_or_else)
|
||||||
end_label = else_label # only generated if else clause
|
end_label = else_label # only generated if else clause
|
||||||
# present
|
# present
|
||||||
condition
|
condition
|
||||||
skip_any_whitespace
|
skip_any_whitespace
|
||||||
x86_jz(else_label)
|
asm.jz(else_label)
|
||||||
@indent += 1
|
@indent += 1
|
||||||
block(label)
|
block(label)
|
||||||
@indent -= 1
|
@indent -= 1
|
||||||
if @token == :keyword && @value == 'else'
|
if @token == :keyword && @value == 'else'
|
||||||
skip_any_whitespace
|
skip_any_whitespace
|
||||||
end_label = unique_label(:endif) # now we need the 2nd label
|
end_label = asm.label(:endif) # now we need the 2nd label
|
||||||
x86_jmp(end_label)
|
asm.jmp(end_label)
|
||||||
emit_label(else_label)
|
asm.emit_label(else_label)
|
||||||
@indent += 1
|
@indent += 1
|
||||||
block(label)
|
block(label)
|
||||||
@indent -= 1
|
@indent -= 1
|
||||||
end
|
end
|
||||||
match_word('end')
|
match_word('end')
|
||||||
emit_label(end_label)
|
asm.emit_label(end_label)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Used to implement the Two-Label-Loops (while, until, repeat).
|
# Used to implement the Two-Label-Loops (while, until, repeat).
|
||||||
|
|
@ -467,9 +446,9 @@ class Compiler
|
||||||
# block: Code to execute at the start of each iteration. (e.g. a
|
# block: Code to execute at the start of each iteration. (e.g. a
|
||||||
# condition)
|
# condition)
|
||||||
def simple_loop(name)
|
def simple_loop(name)
|
||||||
start_label = unique_label(:"loop_#{name}")
|
start_label = asm.label(:"loop_#{name}")
|
||||||
end_label = unique_label(:"end_#{name}")
|
end_label = asm.label(:"end_#{name}")
|
||||||
emit_label(start_label)
|
asm.emit_label(start_label)
|
||||||
|
|
||||||
yield(end_label)
|
yield(end_label)
|
||||||
|
|
||||||
|
|
@ -477,15 +456,15 @@ class Compiler
|
||||||
block(end_label)
|
block(end_label)
|
||||||
@indent -= 1
|
@indent -= 1
|
||||||
match_word('end')
|
match_word('end')
|
||||||
x86_jmp(start_label)
|
asm.jmp(start_label)
|
||||||
emit_label(end_label)
|
asm.emit_label(end_label)
|
||||||
end
|
end
|
||||||
|
|
||||||
def while_stmt
|
def while_stmt
|
||||||
simple_loop('while') do |end_label|
|
simple_loop('while') do |end_label|
|
||||||
condition
|
condition
|
||||||
skip_any_whitespace
|
skip_any_whitespace
|
||||||
x86_jz(end_label)
|
asm.jz(end_label)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
@ -493,7 +472,7 @@ class Compiler
|
||||||
simple_loop('until') do |end_label|
|
simple_loop('until') do |end_label|
|
||||||
condition
|
condition
|
||||||
skip_any_whitespace
|
skip_any_whitespace
|
||||||
x86_jnz(end_label)
|
asm.jnz(end_label)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
@ -511,24 +490,24 @@ class Compiler
|
||||||
counter = "[#{get_name}]"
|
counter = "[#{get_name}]"
|
||||||
match('=')
|
match('=')
|
||||||
boolean_expression # initial value
|
boolean_expression # initial value
|
||||||
x86_sub(:eax, 1) # pre-decrement because of the
|
asm.sub(:eax, 1) # pre-decrement because of the
|
||||||
# following pre-increment
|
# following pre-increment
|
||||||
x86_mov(counter, :eax) # stash the counter in memory
|
asm.mov(counter, :eax) # stash the counter in memory
|
||||||
match_word('to', :scan => true)
|
match_word('to', :scan => true)
|
||||||
boolean_expression # final value
|
boolean_expression # final value
|
||||||
skip_any_whitespace
|
skip_any_whitespace
|
||||||
x86_push(:eax) # stash final value on stack
|
asm.push(:eax) # stash final value on stack
|
||||||
final = '[esp]'
|
final = '[esp]'
|
||||||
|
|
||||||
simple_loop('for') do |end_label|
|
simple_loop('for') do |end_label|
|
||||||
x86_mov(:ecx, counter) # get the counter
|
asm.mov(:ecx, counter) # get the counter
|
||||||
x86_add(:ecx, 1) # increment
|
asm.add(:ecx, 1) # increment
|
||||||
x86_mov(counter, :ecx) # store the counter
|
asm.mov(counter, :ecx) # store the counter
|
||||||
x86_cmp(final, :ecx) # check if we're done
|
asm.cmp(final, :ecx) # check if we're done
|
||||||
x86_jz(end_label) # if so jump to the end
|
asm.jz(end_label) # if so jump to the end
|
||||||
end
|
end
|
||||||
|
|
||||||
x86_add(:esp, 4) # clean up the stack
|
asm.add(:esp, 4) # clean up the stack
|
||||||
end
|
end
|
||||||
|
|
||||||
# do 5
|
# do 5
|
||||||
|
|
@ -538,39 +517,38 @@ class Compiler
|
||||||
|
|
||||||
boolean_expression
|
boolean_expression
|
||||||
skip_any_whitespace
|
skip_any_whitespace
|
||||||
x86_mov(:ecx, :eax)
|
asm.mov(:ecx, :eax)
|
||||||
x86_push(:ecx)
|
|
||||||
|
|
||||||
start_label = unique_label(:do)
|
start_label = asm.label(:do)
|
||||||
end_label = unique_label(:enddo)
|
end_label = asm.label(:enddo)
|
||||||
emit_label(start_label)
|
asm.emit_label(start_label)
|
||||||
|
|
||||||
x86_push(:ecx)
|
asm.push(:ecx)
|
||||||
|
|
||||||
@indent += 1
|
@indent += 1
|
||||||
block(end_label)
|
block(end_label)
|
||||||
@indent -= 1
|
@indent -= 1
|
||||||
|
|
||||||
x86_pop(:ecx)
|
asm.pop(:ecx)
|
||||||
|
|
||||||
match_word('end')
|
match_word('end')
|
||||||
x86_loop(start_label)
|
asm.loop_(start_label)
|
||||||
|
|
||||||
# Phony push! break needs to clean up the stack, but since we
|
# Phony push! break needs to clean up the stack, but since we
|
||||||
# don't know if there is a break at this point we fake a push and
|
# don't know if there is a break at this point we fake a push and
|
||||||
# always clean up the stack after.
|
# always clean up the stack after.
|
||||||
x86_sub(:esp, 4)
|
asm.sub(:esp, 4)
|
||||||
|
|
||||||
emit_label(end_label)
|
asm.emit_label(end_label)
|
||||||
|
|
||||||
# If there was a break we have to clean up the stack here. If
|
# If there was a break we have to clean up the stack here. If
|
||||||
# there was no break we clean up the phony push above.
|
# there was no break we clean up the phony push above.
|
||||||
x86_add(:esp, 4)
|
asm.add(:esp, 4)
|
||||||
end
|
end
|
||||||
|
|
||||||
def break_stmt(label)
|
def break_stmt(label)
|
||||||
if label
|
if label
|
||||||
x86_jmp(label)
|
asm.jmp(label)
|
||||||
else
|
else
|
||||||
expected(:'break to be somewhere useful',
|
expected(:'break to be somewhere useful',
|
||||||
:got => :'a break outside a loop')
|
:got => :'a break outside a loop')
|
||||||
|
|
@ -581,51 +559,57 @@ class Compiler
|
||||||
def condition
|
def condition
|
||||||
boolean_expression
|
boolean_expression
|
||||||
skip_whitespace
|
skip_whitespace
|
||||||
x86_cmp(:eax, 0) # 0 is false, anything else is true
|
asm.cmp(:eax, 0) # 0 is false, anything else is true
|
||||||
end
|
end
|
||||||
|
|
||||||
# print eax in hex format
|
# print eax in hex format
|
||||||
def print_stmt
|
def print_stmt
|
||||||
# define a lookup table of digits
|
asm.block do
|
||||||
unless var?('DIGITS')
|
# define a lookup table of digits
|
||||||
defvar('DIGITS', 4)
|
unless var?('DIGITS')
|
||||||
x86_mov('dword [DIGITS]', 0x33323130)
|
defvar('DIGITS', 4)
|
||||||
x86_mov('dword [DIGITS+4]', 0x37363534)
|
mov('dword [DIGITS]', 0x33323130)
|
||||||
x86_mov('dword [DIGITS+8]', 0x62613938)
|
mov('dword [DIGITS+4]', 0x37363534)
|
||||||
x86_mov('dword [DIGITS+12]', 0x66656463)
|
mov('dword [DIGITS+8]', 0x62613938)
|
||||||
|
mov('dword [DIGITS+12]', 0x66656463)
|
||||||
|
end
|
||||||
|
# 3 dwords == 12 chars
|
||||||
|
defvar('HEX', 3) unless var?('HEX')
|
||||||
|
# TODO check sign and prepend '-' if negative
|
||||||
|
mov('word [HEX]', 0x7830) # "0x" == [48, 120]
|
||||||
|
mov('word [HEX+10]', 0xa) # newline + null terminator
|
||||||
end
|
end
|
||||||
# 3 dwords == 12 chars
|
|
||||||
defvar('HEX', 3) unless var?('HEX')
|
|
||||||
# TODO check sign and prepend '-' if negative
|
|
||||||
x86_mov('word [HEX]', 0x7830) # "0x" == [48, 120]
|
|
||||||
x86_mov('word [HEX+10]', 0xa) # newline + null terminator
|
|
||||||
boolean_expression
|
boolean_expression
|
||||||
# convert eax to a hex string
|
asm.block do
|
||||||
x86_lea(:esi, '[DIGITS]')
|
# convert eax to a hex string
|
||||||
x86_lea(:edi, '[HEX+9]')
|
lea(:esi, '[DIGITS]')
|
||||||
# build the string backwards (right to left), byte by byte
|
lea(:edi, '[HEX+9]')
|
||||||
x86_mov(:ecx, 4)
|
# build the string backwards (right to left), byte by byte
|
||||||
emit_label(loop_label=unique_label)
|
mov(:ecx, 4)
|
||||||
# low nybble of nth byte
|
end
|
||||||
x86_movzx(:ebx, :al)
|
asm.emit_label(loop_label=asm.label)
|
||||||
x86_and(:bl, 0x0f) # isolate low nybble
|
asm.block do
|
||||||
x86_movzx(:edx, 'byte [esi+ebx]')
|
# low nybble of nth byte
|
||||||
x86_mov('byte [edi]', :dl)
|
movzx(:ebx, :al)
|
||||||
x86_dec(:edi)
|
and_(:bl, 0x0f) # isolate low nybble
|
||||||
# high nybble of nth byte
|
movzx(:edx, 'byte [esi+ebx]')
|
||||||
x86_movzx(:ebx, :al)
|
mov('byte [edi]', :dl)
|
||||||
x86_and(:bl, 0xf0) # isolate high nybble
|
dec(:edi)
|
||||||
x86_shr(:bl, 4)
|
# high nybble of nth byte
|
||||||
x86_mov(:dl, 'byte [esi+ebx]')
|
movzx(:ebx, :al)
|
||||||
x86_mov('byte [edi]', :dl)
|
and_(:bl, 0xf0) # isolate high nybble
|
||||||
x86_dec(:edi)
|
shr(:bl, 4)
|
||||||
x86_shr(:eax, 8)
|
mov(:dl, 'byte [esi+ebx]')
|
||||||
x86_loop(loop_label)
|
mov('byte [edi]', :dl)
|
||||||
x86_mov(:eax, 4) # SYS_write
|
dec(:edi)
|
||||||
x86_mov(:ebx, 1) # STDOUT
|
shr(:eax, 8)
|
||||||
x86_lea(:ecx, '[HEX]')
|
loop_(loop_label)
|
||||||
x86_mov(:edx, 11) # excluding term, max # of chars to print
|
mov(:eax, 4) # SYS_write
|
||||||
x86_int(0x80)
|
mov(:ebx, 1) # STDOUT
|
||||||
|
lea(:ecx, '[HEX]')
|
||||||
|
mov(:edx, 11) # excluding term, max # of chars to print
|
||||||
|
int(0x80)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -802,67 +786,7 @@ class Compiler
|
||||||
get_char while any_whitespace?(@look)
|
get_char while any_whitespace?(@look)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
# Define a constant in the .data section.
|
|
||||||
def equ(name, value)
|
|
||||||
@data << "#{name}\tequ #{value}"
|
|
||||||
end
|
|
||||||
|
|
||||||
# Define a variable with the given name and size (in dwords).
|
|
||||||
def defvar(name, dwords=1)
|
|
||||||
unless var?(name)
|
|
||||||
@bss << "#{name}: resd #{dwords}\n"
|
|
||||||
@vars[name] = @bss_size
|
|
||||||
@bss_size += dwords
|
|
||||||
else
|
|
||||||
STDERR.puts "[warning] attempted to redefine #{name}"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def var?(name)
|
|
||||||
@vars[name]
|
|
||||||
end
|
|
||||||
|
|
||||||
def var(name)
|
|
||||||
@vars[name]
|
|
||||||
end
|
|
||||||
|
|
||||||
# Emit a line of code wrapped between a tab and a newline. Required
|
|
||||||
# by Assembler::Text.
|
|
||||||
def emit(code, options={})
|
|
||||||
tab = options.has_key?(:tab) ? options[:tab] : "\t"
|
|
||||||
@code << "#{tab}#{code}\n"
|
|
||||||
end
|
|
||||||
|
|
||||||
# emit_byte and bytes_written are required by Assembler::Binary.
|
|
||||||
def emit_byte(byte)
|
|
||||||
@binary << byte
|
|
||||||
end
|
|
||||||
def bytes_written
|
|
||||||
@binary.size
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
def emit_label(name=unique_label)
|
|
||||||
emit("#{name}:", :tab => nil)
|
|
||||||
|
|
||||||
@labels[name] = @binary.length
|
|
||||||
end
|
|
||||||
|
|
||||||
def resolve_label(label)
|
|
||||||
@labels[label]
|
|
||||||
end
|
|
||||||
|
|
||||||
# Generate a unique label.
|
|
||||||
def unique_label(suffix=nil)
|
|
||||||
@num_labels += 1
|
|
||||||
if suffix
|
|
||||||
@num_labels_with_suffix[suffix] += 1
|
|
||||||
suffix = "_#{suffix}_#{@num_labels_with_suffix[suffix]}"
|
|
||||||
end
|
|
||||||
"L#{sprintf "%06d", @num_labels}#{suffix}"
|
|
||||||
end
|
|
||||||
|
|
||||||
def indent
|
def indent
|
||||||
real_indent = if @value == 'else' || @value == 'end'
|
real_indent = if @value == 'else' || @value == 'end'
|
||||||
@indent - 1
|
@indent - 1
|
||||||
|
|
@ -872,16 +796,10 @@ class Compiler
|
||||||
' ' * (real_indent * 4)
|
' ' * (real_indent * 4)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Pack the array into a byte string.
|
|
||||||
def binary
|
|
||||||
@binary.pack('c*')
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
def pushing(reg)
|
def pushing(reg)
|
||||||
x86_push(reg)
|
asm.push(reg)
|
||||||
yield
|
yield
|
||||||
x86_add(:esp, 4)
|
asm.add(:esp, 4)
|
||||||
end
|
end
|
||||||
|
|
||||||
def op(name)
|
def op(name)
|
||||||
|
|
|
||||||
|
|
@ -1,3 +0,0 @@
|
||||||
mov ebx, eax
|
|
||||||
mov eax, 1
|
|
||||||
int 0x80
|
|
||||||
6
min.darwin.asm
Normal file
6
min.darwin.asm
Normal file
|
|
@ -0,0 +1,6 @@
|
||||||
|
BITS 32
|
||||||
|
SECTION .text
|
||||||
|
GLOBAL _main
|
||||||
|
_main:
|
||||||
|
mov eax, 0
|
||||||
|
ret
|
||||||
4
min2.asm
4
min2.asm
|
|
@ -1,4 +0,0 @@
|
||||||
BITS 32
|
|
||||||
mov ebx,eax
|
|
||||||
mov eax,1
|
|
||||||
int 0x80
|
|
||||||
25
opcode.rb
25
opcode.rb
|
|
@ -1,25 +0,0 @@
|
||||||
class OpCode
|
|
||||||
Attrs = [:prefix, :op, :modrm, :sib, :extra]
|
|
||||||
attr_accessor *Attrs
|
|
||||||
|
|
||||||
def initialize(attrs)
|
|
||||||
Attrs.each do |attr|
|
|
||||||
send("#{attr}=", attrs[attr])
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def size
|
|
||||||
Attrs.inject(0) {|sum, attr|
|
|
||||||
iv = instance_variable_get("@#{attr}")
|
|
||||||
if iv.is_a?(Enumerable)
|
|
||||||
sum + iv.size
|
|
||||||
else
|
|
||||||
sum + 1
|
|
||||||
end
|
|
||||||
}
|
|
||||||
end
|
|
||||||
|
|
||||||
def binary
|
|
||||||
Attrs.map {|attr| send(attr)}.flatten.pack('c*')
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
@ -1,4 +0,0 @@
|
||||||
BITS 32
|
|
||||||
GLOBAL _start
|
|
||||||
SECTION .text
|
|
||||||
_start:
|
|
||||||
11
template.darwin.asm
Normal file
11
template.darwin.asm
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
BITS 32
|
||||||
|
GLOBAL _main
|
||||||
|
SECTION .data
|
||||||
|
{data}
|
||||||
|
SECTION .bss
|
||||||
|
{bss}
|
||||||
|
SECTION .text
|
||||||
|
_main:
|
||||||
|
{code}
|
||||||
|
;; The result in eax is the exit code, just return.
|
||||||
|
ret
|
||||||
47
test.rb
47
test.rb
|
|
@ -1,47 +0,0 @@
|
||||||
require 'compiler'
|
|
||||||
require 'stringio'
|
|
||||||
|
|
||||||
X86_exit = [0x89, 0xc3, # mov ebx, eax (exit code)
|
|
||||||
0xb8, 1, 0, 0, 0, # mov eax, 1
|
|
||||||
0xcd, 0x80 # int 0x80
|
|
||||||
].pack('c*')
|
|
||||||
|
|
||||||
def error(msg) STDERR.puts(msg) end
|
|
||||||
|
|
||||||
def parse(input)
|
|
||||||
compiler = Compiler.new(input)
|
|
||||||
compiler.parse # tuple of [data, bss, code, binary]
|
|
||||||
|
|
||||||
rescue ParseError => e
|
|
||||||
error("[error] #{e.message}")
|
|
||||||
error("[context] #{e.context}")
|
|
||||||
# error("Aborting!")
|
|
||||||
error(e.caller)
|
|
||||||
exit(1)
|
|
||||||
end
|
|
||||||
|
|
||||||
def interpolate(template, data)
|
|
||||||
data.inject(template) do |template, mapping|
|
|
||||||
token, replacement = *mapping
|
|
||||||
template.sub("{#{token}}", replacement)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def main(arg)
|
|
||||||
input = if File.readable?(arg)
|
|
||||||
File.open(arg)
|
|
||||||
else
|
|
||||||
# StringIO.new("5*(3-5)*2+2-9/3-8/2-4*(5+5+5)\n")
|
|
||||||
StringIO.new("abc=999\nabc-888\n")
|
|
||||||
end
|
|
||||||
data, bss, code, binary = *parse(input)
|
|
||||||
template = File.read("template.asm")
|
|
||||||
asm = interpolate(template, :data => data, :bss => bss, :code => code)
|
|
||||||
File.open("test.asm", "w") { |f| f.puts(asm) }
|
|
||||||
File.open("test.bin", "wb") { |f|
|
|
||||||
f.write(binary)
|
|
||||||
f.write(X86_exit)
|
|
||||||
}
|
|
||||||
end
|
|
||||||
|
|
||||||
main(ARGV[0].to_s)
|
|
||||||
|
|
@ -1,50 +1,63 @@
|
||||||
|
PLATFORM=$(shell uname -s)
|
||||||
|
BINFORMAT="bin"
|
||||||
|
ifeq ($(PLATFORM), Darwin)
|
||||||
|
BINFORMAT="macho"
|
||||||
|
endif
|
||||||
|
ifeq ($(PLATFORM), Linux)
|
||||||
|
BINFORMAT="elf"
|
||||||
|
endif
|
||||||
|
ifeq ($(BINFORMAT), bin)
|
||||||
|
@echo "WARNING: binary format is 'bin', this is probably not what you want!"
|
||||||
|
@echo " Your platform, " $(PLATFORM) ", is unsupported."
|
||||||
|
endif
|
||||||
|
|
||||||
all: lt gt ge le eq neq if while until repeat for do break print
|
all: lt gt ge le eq neq if while until repeat for do break print
|
||||||
@echo -n
|
@true
|
||||||
|
|
||||||
lt: test.rb test_lt.code
|
lt: test.rb test_lt.code
|
||||||
@./test.rb lt
|
@./test.rb lt $(BINFORMAT)
|
||||||
|
|
||||||
gt: test.rb test_gt.code
|
gt: test.rb test_gt.code
|
||||||
@./test.rb gt
|
@./test.rb gt $(BINFORMAT)
|
||||||
|
|
||||||
ge: test.rb test_ge.code
|
ge: test.rb test_ge.code
|
||||||
@./test.rb ge
|
@./test.rb ge $(BINFORMAT)
|
||||||
|
|
||||||
le: test.rb test_le.code
|
le: test.rb test_le.code
|
||||||
@./test.rb le
|
@./test.rb le $(BINFORMAT)
|
||||||
|
|
||||||
eq: test.rb test_eq.code
|
eq: test.rb test_eq.code
|
||||||
@./test.rb eq
|
@./test.rb eq $(BINFORMAT)
|
||||||
|
|
||||||
neq: test.rb test_neq.code
|
neq: test.rb test_neq.code
|
||||||
@./test.rb neq
|
@./test.rb neq $(BINFORMAT)
|
||||||
|
|
||||||
if: test.rb test_if.code
|
if: test.rb test_if.code
|
||||||
@./test.rb if
|
@./test.rb if $(BINFORMAT)
|
||||||
|
|
||||||
while: test.rb test_while.code
|
while: test.rb test_while.code
|
||||||
@./test.rb while
|
@./test.rb while $(BINFORMAT)
|
||||||
|
|
||||||
until: test.rb test_until.code
|
until: test.rb test_until.code
|
||||||
@./test.rb until
|
@./test.rb until $(BINFORMAT)
|
||||||
|
|
||||||
repeat: test.rb test_repeat.code
|
repeat: test.rb test_repeat.code
|
||||||
@./test.rb repeat
|
@./test.rb repeat $(BINFORMAT)
|
||||||
|
|
||||||
for: test.rb test_for.code
|
for: test.rb test_for.code
|
||||||
@./test.rb for
|
@./test.rb for $(BINFORMAT)
|
||||||
|
|
||||||
do: test.rb test_do.code
|
do: test.rb test_do.code
|
||||||
@./test.rb do
|
@./test.rb do $(BINFORMAT)
|
||||||
|
|
||||||
break: test.rb test_break.code
|
break: test.rb test_break.code
|
||||||
@./test.rb break
|
@./test.rb break $(BINFORMAT)
|
||||||
|
|
||||||
print: test.rb test_print.code
|
print: test.rb test_print.code
|
||||||
@./test.rb print
|
@./test.rb print $(BINFORMAT)
|
||||||
|
|
||||||
big_test: test.rb big_test.code
|
big_test: test.rb big_test.code
|
||||||
@./test.rb big
|
@./test.rb big $(BINFORMAT)
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
@rm -f test*.asm test*.o
|
@rm -f test*.asm test*.o
|
||||||
|
|
|
||||||
13
test/test.rb
13
test/test.rb
|
|
@ -5,12 +5,21 @@ $LOAD_PATH << ROOT
|
||||||
|
|
||||||
require 'build'
|
require 'build'
|
||||||
|
|
||||||
|
# usage: build.rb <func> [binformat]
|
||||||
|
#
|
||||||
|
# ([format] will go before [binformat])
|
||||||
|
|
||||||
def main
|
def main
|
||||||
func = ARGV[0].to_s
|
func = ARGV[0].to_s
|
||||||
|
format = 'asm' # 'bin' only assembles one or two
|
||||||
|
# instructions right now, but support
|
||||||
|
# is in place
|
||||||
|
binformat = (ARGV[1] ? ARGV[1] : 'elf').downcase
|
||||||
|
platform = `uname -s`.chomp.downcase
|
||||||
print "testing #{func} ... "
|
print "testing #{func} ... "
|
||||||
success = run( build("test_#{func}.code") )
|
success = run( build("test_#{func}.code", platform, format, binformat) )
|
||||||
puts success == 0 ? "pass" : "FAIL! (#{success})"
|
puts success == 0 ? "pass" : "FAIL! (#{success})"
|
||||||
exit(success)
|
exit(success.to_i)
|
||||||
end
|
end
|
||||||
|
|
||||||
main if $0 == __FILE__
|
main if $0 == __FILE__
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue