mirror of
https://github.com/samsonjs/compiler.git
synced 2026-03-25 08:45:52 +00:00
[NEW] Binary assembler outputs working machine code and Mach-O object files that can be linked into working executables.
This commit is contained in:
parent
47ce9043e4
commit
bc6a3d4d3b
25 changed files with 2082 additions and 894 deletions
13
Makefile
13
Makefile
|
|
@ -1,17 +1,4 @@
|
|||
test:
|
||||
cd test && make all
|
||||
|
||||
elfwriter: elfwriter.c
|
||||
gcc -o elfwriter elfwriter.c -lelf
|
||||
|
||||
test_elf: elfwriter build
|
||||
./elfwriter test.bin 4 test_elf.o
|
||||
ld -o test_elf test_elf.o
|
||||
./test_elf
|
||||
|
||||
clean:
|
||||
@rm -f elfwriter
|
||||
@rm -f test_elf.o
|
||||
@rm -f test_elf
|
||||
|
||||
.PHONY: test
|
||||
|
|
|
|||
31
asm/asm.rb
31
asm/asm.rb
|
|
@ -5,6 +5,8 @@
|
|||
# sjs
|
||||
# may 2009
|
||||
|
||||
require 'asm/registers'
|
||||
|
||||
module Assembler
|
||||
|
||||
# Abstract class for common functionality between different code
|
||||
|
|
@ -14,41 +16,14 @@ module Assembler
|
|||
|
||||
attr_reader :platform
|
||||
|
||||
def initialize(platform='linux', *args)
|
||||
def initialize(platform)
|
||||
@platform = platform
|
||||
@vars = {} # Symbol table, maps names to locations in BSS.
|
||||
@num_labels = 0 # Used to generate unique labels.
|
||||
@num_labels_with_suffix = Hash.new(0)
|
||||
|
||||
# Maps names to locations.
|
||||
@labels = Hash.new {|h, key| raise "undefined label: #{key}"}
|
||||
|
||||
end
|
||||
|
||||
def block(*args, &blk)
|
||||
instance_eval(&blk)
|
||||
end
|
||||
|
||||
def output
|
||||
raise "#{self.class} is supposed to implement this method!"
|
||||
end
|
||||
|
||||
def var(name)
|
||||
@vars[name]
|
||||
end
|
||||
alias_method :var?, :var
|
||||
|
||||
# Generate a unique label.
|
||||
def label(suffix=nil)
|
||||
@num_labels += 1
|
||||
if suffix
|
||||
@num_labels_with_suffix[suffix] += 1
|
||||
suffix = "_#{suffix}_#{@num_labels_with_suffix[suffix]}"
|
||||
end
|
||||
name = "L#{sprintf "%06d", @num_labels}#{suffix}"
|
||||
return name
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
|
|
|||
908
asm/binary.rb
908
asm/binary.rb
File diff suppressed because it is too large
Load diff
320
asm/cstruct.rb
Normal file
320
asm/cstruct.rb
Normal file
|
|
@ -0,0 +1,320 @@
|
|||
# Struct does some trickery with custom allocators so we can't subclass it without writing C.
|
||||
# Instead we define a CStruct class that does something similar enough for our purpose. It is
|
||||
# subclassed just like any other class. A nice side-effect of this syntax is that it is always
|
||||
# clear that a CStruct is just a class and instances of the struct are objects.
|
||||
#
|
||||
# Some light metaprogramming is used to make the following syntax possible:
|
||||
#
|
||||
# class MachHeader < CStruct
|
||||
# uint :magic
|
||||
# int :cputype
|
||||
# int :cpusubtype
|
||||
# ...
|
||||
# int :flags
|
||||
# end
|
||||
#
|
||||
# Inheritance works as you would expect.
|
||||
#
|
||||
# class LoadCommand < CStruct
|
||||
# uint32 :cmd
|
||||
# uint32 :cmdsize
|
||||
# end
|
||||
#
|
||||
# # inherits cmd and cmdsize as the first 2 fields
|
||||
# class SegmentCommand < LoadCommand
|
||||
# string :segname, 16
|
||||
# uint32 :vmaddr
|
||||
# uint32
|
||||
# end
|
||||
#
|
||||
# Nothing tricky or confusing there. Members of a CStruct class are declared in the
|
||||
# class definition. A different definition using a more static approach probably wouldn't
|
||||
# be very hard... if performance is critical ... but then why are you using Ruby? ;-)
|
||||
|
||||
class CStruct
|
||||
|
||||
|
||||
###################
|
||||
# Class Constants #
|
||||
###################
|
||||
|
||||
# Size in bytes.
|
||||
SizeMap = {
|
||||
:int8 => 1,
|
||||
:uint8 => 1,
|
||||
:int16 => 2,
|
||||
:uint16 => 2,
|
||||
:int32 => 4,
|
||||
:uint32 => 4,
|
||||
:string => lambda { |*opts| opts.first }, # first opt is size
|
||||
# the last 3 are to make the language more C-like
|
||||
:int => 4,
|
||||
:uint => 4,
|
||||
:char => 1
|
||||
}
|
||||
|
||||
# 32-bit
|
||||
PackMap = {
|
||||
:int8 => 'c',
|
||||
:uint8 => 'C',
|
||||
:int16 => 's',
|
||||
:uint16 => 'S',
|
||||
:int32 => 'i',
|
||||
:uint32 => 'I',
|
||||
:string => lambda do |str, *opts|
|
||||
len = opts.first
|
||||
str.ljust(len, "\0")[0, len]
|
||||
end,
|
||||
# a few C-like names
|
||||
:int => 'i',
|
||||
:uint => 'I',
|
||||
:char => 'C'
|
||||
}
|
||||
|
||||
# Only needed when unpacking is different from packing, i.e. strings w/ lambdas in PackMap.
|
||||
UnpackMap = {
|
||||
:string => lambda do |str, *opts|
|
||||
len = opts.first
|
||||
val = str[0, len-1].sub(/\0*$/, '')
|
||||
str.slice!((len-1)..-1)
|
||||
val
|
||||
end
|
||||
}
|
||||
|
||||
##########################
|
||||
# Class Instance Methods #
|
||||
##########################
|
||||
|
||||
# Note: const_get and const_set are used so the constants are bound at runtime, to the
|
||||
# real class that has subclassed CStruct. I figured Ruby would do this but I haven't
|
||||
# looked at the implementation of constants so it might be tricky.
|
||||
#
|
||||
# All of this could probably be avoided with Ruby 1.9 and private class variables.
|
||||
# That is definitely something to experiment with.
|
||||
|
||||
class <<self
|
||||
|
||||
def inherited(subclass)
|
||||
subclass.instance_eval do
|
||||
|
||||
# These "constants" are only constant references. Structs can be modified.
|
||||
# After the struct is defined it is still open, but good practice would be not
|
||||
# to change a struct after it has been defined.
|
||||
#
|
||||
# To support inheritance properly we try to get these constants from the enclosing
|
||||
# scope (and clone them before modifying them!), and default to empty, er, defaults.
|
||||
|
||||
members = const_get(:Members).clone rescue []
|
||||
member_index = const_get(:MemberIndex).clone rescue {}
|
||||
member_sizes = const_get(:MemberSizes).clone rescue {}
|
||||
member_opts = const_get(:MemberOptions).clone rescue {}
|
||||
|
||||
const_set(:Members, members)
|
||||
const_set(:MemberIndex, member_index)
|
||||
const_set(:MemberSizes, member_sizes)
|
||||
const_set(:MemberOptions, member_opts)
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
# Define a method for each size name, and when that method is called it updates
|
||||
# the struct class accordingly.
|
||||
SizeMap.keys.each do |type|
|
||||
|
||||
define_method(type) do |name, *args|
|
||||
name = name.to_sym
|
||||
const_get(:MemberIndex)[name] = const_get(:Members).size
|
||||
const_get(:MemberSizes)[name] = type
|
||||
const_get(:MemberOptions)[name] = args
|
||||
const_get(:Members) << name
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
||||
# Return the number of members.
|
||||
def size
|
||||
const_get(:Members).size
|
||||
end
|
||||
alias_method :length, :size
|
||||
|
||||
# Return the number of bytes occupied in memory or on disk.
|
||||
def bytesize
|
||||
const_get(:Members).inject(0) { |size, name| size + sizeof(name) }
|
||||
end
|
||||
|
||||
def sizeof(name)
|
||||
value = SizeMap[const_get(:MemberSizes)[name]]
|
||||
value.respond_to?(:call) ? value.call(*const_get(:MemberOptions)[name]) : value
|
||||
end
|
||||
|
||||
def new_from_bin(bin)
|
||||
new_struct = new
|
||||
new_struct.unserialize(bin)
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
||||
####################
|
||||
# Instance Methods #
|
||||
####################
|
||||
|
||||
attr_reader :values
|
||||
|
||||
def initialize(*args)
|
||||
@values = args
|
||||
end
|
||||
|
||||
def serialize
|
||||
vals = @values.clone
|
||||
membs = members.clone
|
||||
pack_pattern.map do |patt|
|
||||
name = membs.shift
|
||||
if patt.is_a?(String)
|
||||
[vals.shift].pack(patt)
|
||||
else
|
||||
patt.call(vals.shift, *member_options[name])
|
||||
end
|
||||
end.join
|
||||
end
|
||||
|
||||
def unserialize(bin)
|
||||
bin = bin.clone
|
||||
@values = []
|
||||
membs = members.clone
|
||||
unpack_pattern.each do |patt|
|
||||
name = membs.shift
|
||||
if patt.is_a?(String)
|
||||
@values += bin.unpack(patt)
|
||||
bin.slice!(0, sizeof(name))
|
||||
else
|
||||
@values << patt.call(bin, *member_options[name])
|
||||
end
|
||||
end
|
||||
self
|
||||
end
|
||||
|
||||
def pack_pattern
|
||||
members.map { |name| PackMap[member_sizes[name]] }
|
||||
end
|
||||
|
||||
def unpack_pattern
|
||||
members.map { |name| UnpackMap[member_sizes[name]] || PackMap[member_sizes[name]] }
|
||||
end
|
||||
|
||||
def [](name_or_idx)
|
||||
case name_or_idx
|
||||
|
||||
when Numeric
|
||||
idx = name_or_idx
|
||||
@values[idx]
|
||||
|
||||
when String, Symbol
|
||||
name = name_or_idx.to_sym
|
||||
@values[member_index[name]]
|
||||
|
||||
else
|
||||
raise ArgumentError, "expected name or index, got #{name_or_idx.inspect}"
|
||||
end
|
||||
end
|
||||
|
||||
def []=(name_or_idx, value)
|
||||
case name_or_idx
|
||||
|
||||
when Numeric
|
||||
idx = name_or_idx
|
||||
@values[idx] = value
|
||||
|
||||
when String, Symbol
|
||||
name = name_or_idx.to_sym
|
||||
@values[member_index[name]] = value
|
||||
|
||||
else
|
||||
raise ArgumentError, "expected name or index, got #{name_or_idx.inspect}"
|
||||
end
|
||||
end
|
||||
|
||||
def ==(other)
|
||||
puts @values.inspect
|
||||
puts other.values.inspect
|
||||
other.is_a?(self.class) && other.values == @values
|
||||
end
|
||||
|
||||
# Some of these are just to quack like Ruby's built-in Struct. YAGNI, but can't hurt either.
|
||||
|
||||
def each(&block)
|
||||
@values.each(&block)
|
||||
end
|
||||
|
||||
def each_pair(&block)
|
||||
members.zip(@values).each(&block)
|
||||
end
|
||||
|
||||
def size
|
||||
members.size
|
||||
end
|
||||
alias_method :length, :size
|
||||
|
||||
def sizeof(name)
|
||||
self.class.sizeof(name)
|
||||
end
|
||||
|
||||
def bytesize
|
||||
self.class.bytesize
|
||||
end
|
||||
|
||||
alias_method :to_a, :values
|
||||
|
||||
|
||||
# A few convenience methods.
|
||||
|
||||
def members
|
||||
self.class::Members
|
||||
end
|
||||
|
||||
def member_index
|
||||
self.class::MemberIndex
|
||||
end
|
||||
|
||||
def member_sizes
|
||||
self.class::MemberSizes
|
||||
end
|
||||
|
||||
def member_options
|
||||
self.class::MemberOptions
|
||||
end
|
||||
|
||||
# The last expression is returned, so return self instead of junk.
|
||||
self
|
||||
end
|
||||
|
||||
|
||||
# a small test
|
||||
if $0 == __FILE__
|
||||
class MachHeader < CStruct
|
||||
uint :magic
|
||||
int :cputype
|
||||
int :cpusubtype
|
||||
string :segname, 16
|
||||
end
|
||||
puts MachHeader::Members.inspect
|
||||
puts MachHeader::MemberIndex.inspect
|
||||
puts MachHeader::MemberSizes.inspect
|
||||
puts "# of MachHeader members: " + MachHeader.size.to_s + ", size in bytes: " + MachHeader.bytesize.to_s
|
||||
mh = MachHeader.new(0xfeedface, 7, 3, "foobar")
|
||||
puts "magic(#{MachHeader.sizeof(:magic)}): " + mh[:magic].inspect
|
||||
puts "cputype(#{MachHeader.sizeof(:cputype)}): " + mh[:cputype].inspect
|
||||
puts "cpusubtype(#{MachHeader.sizeof(:cpusubtype)}): " + mh[:cpusubtype].inspect
|
||||
puts "segname(#{MachHeader.sizeof(:segname)}): " + mh[:segname].inspect
|
||||
puts mh.pack_pattern.inspect
|
||||
binstr = mh.serialize
|
||||
puts "values: " + mh.values.inspect
|
||||
newmh = MachHeader.new_from_bin(binstr)
|
||||
puts "new values: " + newmh.values.inspect
|
||||
newbinstr = newmh.serialize
|
||||
puts "serialized: " + binstr.inspect
|
||||
puts "unserialized: " + newbinstr.inspect
|
||||
puts "new == old ? " + (newbinstr == binstr).to_s
|
||||
end
|
||||
7
asm/elfsymtab.rb
Normal file
7
asm/elfsymtab.rb
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
module Assembler
|
||||
|
||||
class ELFSymtab < Symtab
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
9
asm/elfwriter.rb
Normal file
9
asm/elfwriter.rb
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
module Assembler
|
||||
|
||||
class ELFWriter < ObjWriter
|
||||
|
||||
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
164
asm/macho.rb
Normal file
164
asm/macho.rb
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
require 'asm/cstruct'
|
||||
|
||||
# The MachO module contains constants and structures related to the
|
||||
# Mach Object format (Mach-O). They are relevant to Darwin on OS X.
|
||||
#
|
||||
# Constants and structures as defined in /usr/include/mach-o/loader.h on
|
||||
# Mac OS X Leopard (10.5.7). Also see <mach-o/stab.h> and <mach-o/nlist.h>.
|
||||
|
||||
module MachO
|
||||
|
||||
|
||||
###############
|
||||
# Mach header #
|
||||
###############
|
||||
|
||||
# Appears at the beginning of every Mach object file.
|
||||
class MachHeader < CStruct
|
||||
uint32 :magic
|
||||
int32 :cputype
|
||||
int32 :cpusubtype
|
||||
uint32 :filetype
|
||||
uint32 :ncmds
|
||||
uint32 :sizeofcmds
|
||||
uint32 :flags
|
||||
end
|
||||
|
||||
# Values for the magic field.
|
||||
MH_MAGIC = 0xfeedface # Mach magic number.
|
||||
MH_CIGAM = 0xcefaedfe # In the reverse byte-order.
|
||||
|
||||
# Values for the filetype field.
|
||||
MH_OBJECT = 0x1
|
||||
MH_EXECUTE = 0x2
|
||||
MH_FVMLIB = 0x3
|
||||
MH_CORE = 0x4
|
||||
MH_PRELOAD = 0x5
|
||||
MH_DYLIB = 0x6
|
||||
MH_DYLINKER = 0x7
|
||||
MH_BUNDLE = 0x8
|
||||
MH_DYLIB_STUB = 0x9
|
||||
MH_DSYM = 0xa
|
||||
|
||||
# CPU types and subtypes (only Intel for now).
|
||||
CPU_TYPE_X86 = 7
|
||||
CPU_TYPE_I386 = CPU_TYPE_X86
|
||||
CPU_SUBTYPE_X86_ALL = 3
|
||||
|
||||
|
||||
############################
|
||||
# Load commands / segments #
|
||||
############################
|
||||
|
||||
class LoadCommand < CStruct
|
||||
uint32 :cmd
|
||||
uint32 :cmdsize
|
||||
end
|
||||
|
||||
# Values for the cmd member of LoadCommand CStructs (incomplete!).
|
||||
LC_SEGMENT = 0x1
|
||||
LC_SYMTAB = 0x2
|
||||
LC_SYMSEG = 0x3
|
||||
LC_THREAD = 0x4
|
||||
LC_UNIXTHREAD = 0x5
|
||||
|
||||
class SegmentCommand < LoadCommand
|
||||
string :segname, 16
|
||||
uint32 :vmaddr
|
||||
uint32 :vmsize
|
||||
uint32 :fileoff
|
||||
uint32 :filesize
|
||||
int32 :maxprot
|
||||
int32 :initprot
|
||||
uint32 :nsects
|
||||
uint32 :flags
|
||||
end
|
||||
|
||||
|
||||
# Values for protection fields, maxprot and initprot.
|
||||
VM_PROT_NONE = 0x00
|
||||
VM_PROT_READ = 0x01
|
||||
VM_PROT_WRITE = 0x02
|
||||
VM_PROT_EXECUTE = 0x04
|
||||
VM_PROT_NO_CHANGE = 0x08
|
||||
VM_PROT_COPY = 0x10
|
||||
|
||||
|
||||
class SymtabCommand < LoadCommand
|
||||
uint32 :symoff # Points to an array of Nlist structs.
|
||||
uint32 :nsyms # Number of entries in said array.
|
||||
uint32 :stroff # Offset of the string table.
|
||||
uint32 :strsize # Size of the string table in bytes.
|
||||
end
|
||||
|
||||
|
||||
LoadCommandStructMap = {
|
||||
LC_SEGMENT => SegmentCommand,
|
||||
LC_SYMTAB => SymtabCommand
|
||||
}
|
||||
|
||||
|
||||
############
|
||||
# Sections #
|
||||
############
|
||||
|
||||
class Section < CStruct
|
||||
string :sectname, 16
|
||||
string :segname, 16
|
||||
uint32 :addr
|
||||
uint32 :size
|
||||
uint32 :offset
|
||||
uint32 :align
|
||||
uint32 :reloff
|
||||
uint32 :nreloc
|
||||
uint32 :flags
|
||||
uint32 :reserved1
|
||||
uint32 :reserved2
|
||||
end
|
||||
|
||||
# Values for the type bitfield (mask 0x000000ff) of the flags field.
|
||||
# (incomplete!)
|
||||
S_REGULAR = 0x0
|
||||
S_ZEROFILL = 0x1
|
||||
S_CSTRING_LITERALS = 0x2
|
||||
|
||||
|
||||
|
||||
########################
|
||||
# Symbol table support #
|
||||
########################
|
||||
|
||||
|
||||
# Nlist is used to describe symbols.
|
||||
class Nlist < CStruct
|
||||
uint32 :n_strx # Index into string table. Index of zero is the empty string.
|
||||
uint8 :n_type # Type flag (see below).
|
||||
uint8 :n_sect # Section number (from 1) or NO_SECT.
|
||||
uint16 :n_desc # TODO See <mach-o/stab.h>.
|
||||
uint32 :n_value # The symbol's value (or stab offset).
|
||||
end
|
||||
|
||||
# Type flag (see <mach-o/nlist.h> for more details)
|
||||
# ---------
|
||||
#
|
||||
# This field consists of four bitfields:
|
||||
#
|
||||
# uchar N_STAB : 3
|
||||
# uchar N_PEXT : 1
|
||||
# uchar N_TYPE : 3
|
||||
# uchar N_EXT : 1
|
||||
#
|
||||
N_STAB = 0xe0 # if any bits set => symbolic debugging info
|
||||
N_PEXT = 0x10 # private external symbol bit
|
||||
N_TYPE = 0x0e # mask for the type bits
|
||||
N_EXT = 0x01 # external symbol bit, set for external symbols (e.g. globals)
|
||||
|
||||
# Values for N_TYPE. (incomplete!)
|
||||
N_UNDF = 0x0 # undefined, n_sect == NO_SECT
|
||||
N_ABS = 0x2 # absolute, n_sect == NO_SECT
|
||||
N_SECT = 0xe # defined in section number n_sect
|
||||
|
||||
NO_SECT = 0
|
||||
MAX_SECT = 255
|
||||
|
||||
end
|
||||
364
asm/machofile.rb
Normal file
364
asm/machofile.rb
Normal file
|
|
@ -0,0 +1,364 @@
|
|||
require 'asm/macho'
|
||||
|
||||
module Assembler
|
||||
|
||||
class MachOFile
|
||||
|
||||
include MachO
|
||||
|
||||
attr_accessor :header, :load_commands, :sections, :data
|
||||
attr_accessor :current_segment
|
||||
attr_accessor :text_offset
|
||||
|
||||
def initialize(filetype=MH_OBJECT)
|
||||
@header = MachHeader.new(MH_MAGIC, CPU_TYPE_X86, CPU_SUBTYPE_X86_ALL, filetype, 0, 0, 0)
|
||||
@load_commands = [] # All defined segments.
|
||||
@sections = {} # Map of segment names to lists of segments.
|
||||
@section_disk_size = Hash.new(0) # Sections store their VM size so we need their sizes on disk.
|
||||
@data = [] # Blobs of data that appear at the end of the file.
|
||||
# (text, data, symtab, ...)
|
||||
@current_segment = nil # An alias for the last defined segment.
|
||||
|
||||
# Leave room for __PAGEZERO, a single 0x1000 (4kb) page at 0x0. The
|
||||
# __TEXT segment starts at 0x1000 and contains mach headers and load
|
||||
# commands.
|
||||
@text_offset = 0x1000
|
||||
end
|
||||
|
||||
|
||||
# Define a LoadCommand in this file. The header's ncmds and sizeofcmds
|
||||
# fields are updated automatically to keep things in sync. If a block is
|
||||
# given it is passed the new LoadCommand struct after all other
|
||||
# initialization has been done.
|
||||
#
|
||||
# Other methods that create any type of load command should use this
|
||||
# method to do so. Right now the only types supported are LC_SEGMENT
|
||||
# and LC_SYMTAB. Modify asm/macho.rb to add structs for other types, and
|
||||
# add them to LoadCommandStructMap.
|
||||
|
||||
def load_command(cmdtype)
|
||||
struct = LoadCommandStructMap[cmdtype]
|
||||
unless struct
|
||||
raise "unsupported load command type: #{cmdtype.inspect}," +
|
||||
" supported types: #{LoadCommandStructMap.keys.sort.inspect}"
|
||||
end
|
||||
|
||||
# Fill in all the unknown fields with 0, this is nonsense for
|
||||
# string fields but that doesn't really matter.
|
||||
dummy_vals = [0] * (struct::Members.size - 2)
|
||||
|
||||
# cmd cmdsize ...
|
||||
command = struct.new(cmdtype, struct.bytesize, *dummy_vals)
|
||||
|
||||
@load_commands << command
|
||||
|
||||
@header[:ncmds] += 1
|
||||
@header[:sizeofcmds] += command.bytesize
|
||||
|
||||
yield(command) if block_given?
|
||||
|
||||
return command
|
||||
end
|
||||
|
||||
|
||||
# Define a segment in this file. If a block is given it is passed
|
||||
# the new segment. You can chain calls to segment, it returns self.
|
||||
#
|
||||
# Mach object files should only contain one anonymous segment. This
|
||||
# is not checked but should be kept in mind when crafting files.
|
||||
def segment(name, &block)
|
||||
@current_segment = load_command(LC_SEGMENT) do |seg|
|
||||
seg[:segname] = name
|
||||
block.call(seg) if block
|
||||
end
|
||||
return self
|
||||
end
|
||||
|
||||
|
||||
# Define a section under the given segment. nsects and cmdsize are
|
||||
# updated automatically. segname can't be derived from the segment
|
||||
# that this section is defined under, as they can differ.
|
||||
#
|
||||
# Mach object files have the __text, __data, and other common
|
||||
# sections all defined under one anonymous segment, but their segment
|
||||
# names reflect their final positions after linking. The linker plonks
|
||||
# them in the segment that they name.
|
||||
def section(name, segname, data='', vmsize=data.size,
|
||||
segment=@current_segment, type=S_REGULAR)
|
||||
|
||||
# Create the new section.
|
||||
section = Section.new(name, segname, 0, vmsize, 0, 0, 0, 0, 0, 0, type)
|
||||
|
||||
# Add this section to the map of segment names to sections.
|
||||
(@sections[segment[:segname]] ||= []) << section
|
||||
@section_disk_size[name] = data.size
|
||||
@data << data if data.size > 0
|
||||
|
||||
# Update the header.
|
||||
@header[:sizeofcmds] += section.bytesize
|
||||
|
||||
# Update the segment.
|
||||
segment[:nsects] += 1
|
||||
segment[:cmdsize] += section.bytesize
|
||||
|
||||
yield(section) if block_given?
|
||||
|
||||
return section
|
||||
end
|
||||
|
||||
|
||||
|
||||
# Define a standard text section under the current segment (if present).
|
||||
#
|
||||
# If there is no current segment then we act according to the file's type
|
||||
# (specified in the header). Segments are created if they do not exist.
|
||||
#
|
||||
# When it is MH_OBJECT the text section is defined under a single,
|
||||
# nameless segment, but the section's segment name is set to the name
|
||||
# given here.
|
||||
#
|
||||
# For MH_EXECUTE files the text section goes under the segment with the
|
||||
# name given (__TEXT).
|
||||
|
||||
def text(data, sectname='__text', segname='__TEXT')
|
||||
unless @current_segment
|
||||
segment(segname_based_on_filetype(segname)) do |seg|
|
||||
seg[:maxprot] = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE
|
||||
seg[:initprot] = VM_PROT_READ | VM_PROT_EXECUTE
|
||||
end
|
||||
end
|
||||
|
||||
section(sectname, segname, data) do |sect|
|
||||
sect[:flags] = 0x400 # S_ATTR_SOME_INSTRUCTIONS
|
||||
end
|
||||
|
||||
return self
|
||||
end
|
||||
|
||||
|
||||
# Define a standard data section under the current segment (if present).
|
||||
# This behaves similarly to the text method.
|
||||
#
|
||||
def data(data, sectname='__data', segname='__DATA')
|
||||
unless @current_segment
|
||||
segment(segname_based_on_filetype(segname)) do |seg|
|
||||
seg[:maxprot] = VM_PROT_READ | VM_PROT_WRITE
|
||||
seg[:initprot] = VM_PROT_READ | VM_PROT_WRITE
|
||||
end
|
||||
end
|
||||
|
||||
section(sectname, segname, data)
|
||||
|
||||
return self
|
||||
end
|
||||
|
||||
|
||||
# Define a standard const section under the current segment (if present).
|
||||
# This behaves similarly to the data method.
|
||||
#
|
||||
def const(data, sectname='__const', segname='__DATA')
|
||||
unless @current_segment
|
||||
segment(segname_based_on_filetype(segname)) do |seg|
|
||||
seg[:maxprot] = VM_PROT_READ
|
||||
seg[:initprot] = VM_PROT_READ
|
||||
end
|
||||
end
|
||||
|
||||
section(sectname, segname, data)
|
||||
|
||||
return self
|
||||
end
|
||||
|
||||
|
||||
# Define a standard BSS section under the current segment (if present).
|
||||
# This behaves similarly to the data method but accepts a VM size instead
|
||||
# of a blob, and no data is written to file since this section is for
|
||||
# uninitialized data.
|
||||
#
|
||||
def bss(vmsize, sectname='__bss', segname='__DATA')
|
||||
unless @current_segment
|
||||
segment(segname_based_on_filetype(segname)) do |seg|
|
||||
seg[:maxprot] = VM_PROT_READ | VM_PROT_WRITE
|
||||
seg[:initprot] = VM_PROT_READ | VM_PROT_WRITE
|
||||
end
|
||||
end
|
||||
|
||||
section(sectname, segname, '', vmsize)
|
||||
|
||||
return self
|
||||
end
|
||||
|
||||
|
||||
# Define a symbol table. This should usually be placed at the end of the
|
||||
# file.
|
||||
#
|
||||
# This function is overloaded to accept either an array of Nlist structs
|
||||
# packed into a byte string (i.e. a C array) and a string table, or a
|
||||
# single parameter: any type of Symtab.
|
||||
|
||||
def symtab(nlist_ary_or_symtab, stab=nil)
|
||||
if stab.nil?
|
||||
symtab = nlist_ary_or_symtab
|
||||
stab = symtab.stab
|
||||
nlist_ary = symtab.nlist_ary
|
||||
else
|
||||
nlist_ary = nlist_ary_or_symtab
|
||||
end
|
||||
|
||||
load_command(LC_SYMTAB) do |st|
|
||||
st[:nsyms] = nlist_ary.size
|
||||
st[:strsize] = stab.size
|
||||
# symoff and stroff are filled in when offsets are recalculated.
|
||||
end
|
||||
|
||||
# puts ">>> Defining symbol table:"
|
||||
# puts ">>> #{nlist_ary.size} symbols"
|
||||
# puts ">>> stab = #{stab.inspect}"
|
||||
# puts ">>> nlist_ary = #{nlist_ary.inspect}"
|
||||
# puts ">>> (serialized) = #{nlist_ary.map{|n|n.serialize}.join.inspect}"
|
||||
|
||||
@data << nlist_ary.map {|n| n.serialize}.join
|
||||
@data << stab
|
||||
|
||||
return self
|
||||
end
|
||||
|
||||
|
||||
# Serialize the entire MachO file into a byte string. This is simple
|
||||
# thanks to CStruct#serialize.
|
||||
|
||||
def serialize
|
||||
# TODO sanity checks, e.g. assert(@header[:ncmds] == @load_command.size)
|
||||
# ... perhaps an option to recalculate such data as well.
|
||||
|
||||
recalculate_offsets
|
||||
|
||||
# |------------------|
|
||||
# | Mach Header |
|
||||
# |------------------|
|
||||
# | Segment 1 |
|
||||
# | Section 1 | ---
|
||||
# | Section 2 | --|--
|
||||
# | ... | | |
|
||||
# | Segment 2 | | |
|
||||
# | Section 4 | | |
|
||||
# | Section 5 | | |
|
||||
# | ... | | |
|
||||
# | ... | | |
|
||||
# | [Symtab cmd] | | |
|
||||
# |------------------| | |
|
||||
# | Section data 1 | <-- |
|
||||
# | Section data 2 | <----
|
||||
# | ... |
|
||||
# | [Symtab data] |
|
||||
# |------------------|
|
||||
|
||||
|
||||
# dump the mach header
|
||||
obj = @header.serialize
|
||||
|
||||
# dump each load command (which include the section headers under them)
|
||||
obj += @load_commands.map do |cmd|
|
||||
sects = @sections[cmd[:segname]] rescue []
|
||||
sects.inject(cmd.serialize) do |data, sect|
|
||||
data + sect.serialize
|
||||
end
|
||||
end.join
|
||||
|
||||
# and finally dump the blobs at the end
|
||||
obj += @data.join
|
||||
|
||||
return obj
|
||||
end
|
||||
|
||||
|
||||
# Update the file offsets in SegmentCommand, SymtabCommand, and Section structs.
|
||||
|
||||
def recalculate_offsets
|
||||
|
||||
# Maintain the offset into the the file. This is used to update
|
||||
# the various structures.
|
||||
offset = @header.bytesize
|
||||
|
||||
# First pass over load commands. Most sizes are filled in here.
|
||||
@load_commands.each do |cmd|
|
||||
case cmd[:cmd]
|
||||
|
||||
when LC_SEGMENT
|
||||
seg = cmd
|
||||
sections = @sections[seg[:segname]]
|
||||
section_size = sections.size * Section.bytesize
|
||||
section_vm_size = sections.inject(0) { |total, sect| total + sect[:size] }
|
||||
section_disk_size = sections.inject(0) do |total, sect|
|
||||
total + @section_disk_size[sect[:sectname]]
|
||||
end
|
||||
|
||||
### TODO this should be redundant. try commenting it out one day.
|
||||
seg[:nsects] = sections.size
|
||||
seg[:cmdsize] = seg.bytesize + section_size
|
||||
###
|
||||
|
||||
seg[:vmsize] = section_vm_size
|
||||
seg[:filesize] = section_disk_size
|
||||
|
||||
when LC_SYMTAB
|
||||
# nop
|
||||
|
||||
else
|
||||
raise "unsupported load command: #{cmd.inspect}"
|
||||
end
|
||||
|
||||
offset += cmd[:cmdsize]
|
||||
end
|
||||
|
||||
|
||||
# offset now points to the end of the Mach-O headers, or the beginning
|
||||
# of the binary blobs of section data at the end.
|
||||
|
||||
# Second pass over load commands. Fill in file offsets.
|
||||
@load_commands.each do |cmd|
|
||||
case cmd[:cmd]\
|
||||
|
||||
when LC_SEGMENT
|
||||
seg = cmd
|
||||
sections = @sections[seg[:segname]]
|
||||
seg[:fileoff] = offset
|
||||
sections.each do |sect|
|
||||
sect[:offset] = offset
|
||||
offset += @section_disk_size[sect[:sectname]]
|
||||
end
|
||||
|
||||
when LC_SYMTAB
|
||||
st = cmd
|
||||
st[:symoff] = offset
|
||||
offset += st[:nsyms] * Nlist.bytesize
|
||||
st[:stroff] = offset
|
||||
offset += st[:strsize]
|
||||
|
||||
|
||||
# No else clause is necessary, the first iteration should have caught them.
|
||||
|
||||
end
|
||||
|
||||
end # @load_commands.each
|
||||
|
||||
end # def recalculate_offsets
|
||||
|
||||
|
||||
#######
|
||||
private
|
||||
#######
|
||||
|
||||
def segname_based_on_filetype(segname)
|
||||
case @header[:filetype]
|
||||
when MH_OBJECT: ''
|
||||
when MH_EXECUTE: segname
|
||||
else
|
||||
raise "unsupported MachO file type! #{@header.inspect}"
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
end # class MachOFile
|
||||
|
||||
end # module Assembler
|
||||
29
asm/machosym.rb
Normal file
29
asm/machosym.rb
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
require 'asm/macho'
|
||||
|
||||
module Assembler
|
||||
|
||||
class MachOSym
|
||||
|
||||
attr_accessor :name, :type, :segnum, :desc, :value
|
||||
|
||||
def initialize(name, type, segnum, desc, value)
|
||||
@name = name
|
||||
@type = type
|
||||
@segnum = segnum
|
||||
@desc = desc
|
||||
@value = value
|
||||
end
|
||||
|
||||
|
||||
def to_nlist(strx)
|
||||
MachO::Nlist.new(strx, @type, @segnum, @desc, @value)
|
||||
end
|
||||
|
||||
|
||||
def to_s
|
||||
@name
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
77
asm/machosymtab.rb
Normal file
77
asm/machosymtab.rb
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
require 'asm/macho'
|
||||
require 'asm/machosym'
|
||||
require 'asm/symtab'
|
||||
|
||||
module Assembler
|
||||
|
||||
class MachOSymtab < Symtab
|
||||
|
||||
include MachO
|
||||
|
||||
def const_offset
|
||||
return 0x2000
|
||||
end
|
||||
|
||||
def bss_offset
|
||||
# TODO figure out how to calculate these, or how to let the linker do it!
|
||||
# ... relocation tables perhaps?
|
||||
return 0x2800
|
||||
end
|
||||
|
||||
def all_symbols
|
||||
symbols = []
|
||||
|
||||
# Functions (section #1, __text)
|
||||
#
|
||||
# All labels are exported. This should be changed and only functions exported!
|
||||
# TODO fixme ...
|
||||
#
|
||||
# Note: Sorting a Ruby hash gives an alist, e.g. [[<key>, <value>], ...]
|
||||
# We can use map on it as if it were a hash so it works nicely.
|
||||
#
|
||||
symbols +=
|
||||
@labels.sort { |a,b| a[1] <=> b[1] }.
|
||||
map do |name,addr|
|
||||
MachOSym.new(name, N_SECT | N_EXT, 1, 0, addr)
|
||||
end
|
||||
|
||||
# Constants (section #2, __const)
|
||||
symbols += @consts.sort { |a,b| a[1] <=> b[1] }.
|
||||
map do |name, addr|
|
||||
MachOSym.new(name, N_SECT, 2, 0, addr)
|
||||
end
|
||||
|
||||
# Variables (section #3, __bss)
|
||||
#
|
||||
# TODO FIXME the last var exported ends up after main somewhere... WTF?!
|
||||
symbols += @vars.sort { |a,b| a[1] <=> b[1] }.
|
||||
map do |name, addr|
|
||||
MachOSym.new(name, N_SECT, 3, 0, addr)
|
||||
end
|
||||
|
||||
return symbols
|
||||
end
|
||||
|
||||
def nlist_ary
|
||||
symbols = {}
|
||||
strx = 1
|
||||
ary = []
|
||||
all_symbols.each do |sym|
|
||||
key = sym.name.to_sym
|
||||
unless symbols.has_key?(key)
|
||||
symbols[key] = strx
|
||||
strx += sym.name.length + 1 # +1 for the null byte
|
||||
end
|
||||
ary << sym.to_nlist(symbols[key])
|
||||
end
|
||||
return ary
|
||||
end
|
||||
|
||||
def stab
|
||||
# The empty strings result in a string that begins and ends with
|
||||
['', all_symbols, ''].flatten.map { |sym| sym.to_s }.join("\0")
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
26
asm/machowriter.rb
Normal file
26
asm/machowriter.rb
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
|
||||
### XXX development hack!
|
||||
def stub_symtab!
|
||||
text_segnum = 1
|
||||
symtab_stub = {
|
||||
:functions => [
|
||||
# name type segnum addr
|
||||
['_main', N_SECT | N_EXT, text_segunm, 0x0]
|
||||
]
|
||||
}
|
||||
|
||||
nlist_ary = []
|
||||
stab = "\0"
|
||||
strx = 1 # string index (1-based)
|
||||
|
||||
symtab[:functions].each do |name, type, segnum, addr|
|
||||
nlist_ary << MachO::Nlist.new(strx, type, segnum, 0, addr)
|
||||
stab << "#{name}\0"
|
||||
strx += 1
|
||||
end
|
||||
symtab(nlist_ary, stab)
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
26
asm/objwriter.rb
Normal file
26
asm/objwriter.rb
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
module Assembler
|
||||
|
||||
class UnimplementedMethodError < RuntimeError; end
|
||||
|
||||
|
||||
# Abstract base class.
|
||||
class ObjWriter
|
||||
|
||||
def write!(filename)
|
||||
File.open(filename, 'wb') do |file|
|
||||
file.print(serialize)
|
||||
end
|
||||
end
|
||||
|
||||
def fail(name)
|
||||
raise UnimplementedMethodError, name
|
||||
end
|
||||
|
||||
# These methods must be defined for most uses of the library.
|
||||
%w[header segment section text data bss symtab serialize].each do |name|
|
||||
define_method(name) { fail(name) }
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
32
asm/registers.rb
Normal file
32
asm/registers.rb
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
require 'asm/regproxy'
|
||||
|
||||
module Assembler
|
||||
|
||||
module Registers
|
||||
|
||||
# This structure allows for x86 registers of all sizes. The
|
||||
# number of the register is the index of the array in which it was
|
||||
# found. The size of a register in bytes is 2 ** index-into-sub-array.
|
||||
Registers = [ [:al, :ax, :eax], # 0
|
||||
[:cl, :cx, :ecx], # 1
|
||||
[:dl, :dx, :edx], # 2
|
||||
[:bl, :bx, :ebx], # 3
|
||||
[:ah, :sp, :esp], # 4
|
||||
[:ch, :bp, :ebp], # 5
|
||||
[:dh, :si, :esi], # 6
|
||||
[:bh, :di, :edi] # 7
|
||||
]
|
||||
|
||||
# Setup register proxies which are used both in effective address
|
||||
# calculations, and also just as symbols representing registers.
|
||||
Registers.each_with_index do |group, regnum|
|
||||
group.each_with_index do |reg, i|
|
||||
name = reg.to_s.upcase
|
||||
const_set(name, RegisterProxy.new(reg, 8 * (2 ** i), regnum))
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
67
asm/regproxy.rb
Normal file
67
asm/regproxy.rb
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
module Assembler
|
||||
|
||||
# Acts like a register and can be used as the base or index in an
|
||||
# effective address.
|
||||
#
|
||||
# e.g. [EAX] or [ESI+EBX] or [EAX + 0xff] or [EAX + EDX * 2]
|
||||
class RegisterProxy
|
||||
|
||||
attr_reader :name, :size, :regnum
|
||||
attr_reader :base, :index, :scale
|
||||
|
||||
|
||||
def initialize(name, size, regnum)
|
||||
@name = name # attrs are read-only so sharing is ok
|
||||
@size = size
|
||||
@regnum = regnum
|
||||
@base = self
|
||||
end
|
||||
|
||||
|
||||
def +(index)
|
||||
raise "index already specified" if @index
|
||||
new_reg = self.clone
|
||||
new_reg.instance_variable_set('@index', index)
|
||||
new_reg
|
||||
end
|
||||
|
||||
|
||||
def *(scale)
|
||||
raise "index must come first" unless @index
|
||||
raise "scale already specified" if scale
|
||||
raise "unsupported scale: #{scale}" unless scale.to_s.match(/^[1248]$/)
|
||||
@scale = scale
|
||||
self
|
||||
end
|
||||
|
||||
|
||||
def scale?
|
||||
@scale
|
||||
end
|
||||
|
||||
|
||||
def index?
|
||||
@index
|
||||
end
|
||||
|
||||
|
||||
def register?
|
||||
@scale.nil? && @index.nil?
|
||||
end
|
||||
|
||||
|
||||
|
||||
def to_s
|
||||
@name.to_s +
|
||||
(@index ? "+#{@index}" : '') +
|
||||
(@scale ? "*#{@scale}" : '')
|
||||
end
|
||||
|
||||
|
||||
def inspect
|
||||
to_s
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
89
asm/symtab.rb
Normal file
89
asm/symtab.rb
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
module Assembler
|
||||
|
||||
class Symtab
|
||||
|
||||
attr_reader :const_data, :bss_size
|
||||
|
||||
def initialize
|
||||
@vars = {} # Map of variable names to addresses. (bss vars)
|
||||
@consts = {} # Map of constant names to addresses.
|
||||
@funcs = {} # map of function names to addresses.
|
||||
|
||||
# Initial data to load into memory (data for __DATA segment).
|
||||
@const_data = ''
|
||||
|
||||
@const_size = 0 # Size of const section.
|
||||
@bss_size = 0 # Size of bss section.
|
||||
|
||||
# Map names to locations.
|
||||
@labels = Hash.new {|h, key| raise "undefined label: #{key}"}
|
||||
@num_labels = 0 # Used to generate unique labels.
|
||||
@num_labels_with_suffix = Hash.new(0)
|
||||
end
|
||||
|
||||
|
||||
####
|
||||
## NB: Concrete subclasses must define methods named:
|
||||
## bss_offset, and const_offset
|
||||
####
|
||||
|
||||
|
||||
# Generate a unique label.
|
||||
def unique_label(suffix=nil)
|
||||
@num_labels += 1
|
||||
if suffix
|
||||
@num_labels_with_suffix[suffix] += 1
|
||||
suffix = "_#{suffix}_#{@num_labels_with_suffix[suffix]}"
|
||||
end
|
||||
name = "L#{sprintf "%06d", @num_labels}#{suffix}"
|
||||
return name
|
||||
end
|
||||
|
||||
def deflabel(name, addr)
|
||||
@labels[name] = addr
|
||||
return name
|
||||
end
|
||||
|
||||
|
||||
def lookup_label(name)
|
||||
@labels[name]
|
||||
end
|
||||
|
||||
|
||||
def defvar(name, bytes)
|
||||
@vars[name] = @bss_size
|
||||
@bss_size += bytes
|
||||
end
|
||||
|
||||
|
||||
def defconst(name, value, bytes)
|
||||
@consts[name] = @const_size
|
||||
@const_size += bytes
|
||||
@const_data << [value].pack('i')
|
||||
end
|
||||
|
||||
|
||||
def defun(name, addr)
|
||||
@funcs[name] = addr
|
||||
end
|
||||
|
||||
|
||||
def var(name)
|
||||
bss_offset + @vars[name]
|
||||
end
|
||||
|
||||
def var?(name)
|
||||
@vars[name]
|
||||
end
|
||||
|
||||
def const(name)
|
||||
const_offset + @consts[name]
|
||||
end
|
||||
|
||||
def const?(name)
|
||||
@consts[name]
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
21
asm/text.rb
21
asm/text.rb
|
|
@ -3,9 +3,6 @@
|
|||
# sjs
|
||||
# may 2009
|
||||
|
||||
ROOT = __FILE__.sub(/\/asm\/text\.rb$/, '') unless defined? ROOT
|
||||
$LOAD_PATH << ROOT unless $LOAD_PATH.include?(ROOT)
|
||||
|
||||
require 'asm/asm'
|
||||
|
||||
module Assembler
|
||||
|
|
@ -15,8 +12,9 @@ module Assembler
|
|||
# correct machine code, which isn't trivial.
|
||||
class Text < AssemblerBase
|
||||
|
||||
def initialize(platform='linux')
|
||||
def initialize(platform)
|
||||
super
|
||||
@vars = {} # Symbol table, maps names to locations in BSS.
|
||||
@data = ''
|
||||
@bss = ''
|
||||
@code = ''
|
||||
|
|
@ -39,6 +37,13 @@ module Assembler
|
|||
end
|
||||
end
|
||||
|
||||
|
||||
def var(name)
|
||||
@vars[name]
|
||||
end
|
||||
alias_method :var?, :var
|
||||
|
||||
|
||||
# Emit a line of code wrapped between a tab and a newline.
|
||||
def emit(code, options={})
|
||||
tab = options.has_key?(:tab) ? options[:tab] : "\t"
|
||||
|
|
@ -106,6 +111,10 @@ module Assembler
|
|||
emit("call #{label}")
|
||||
end
|
||||
|
||||
def leave
|
||||
emit("leave")
|
||||
end
|
||||
|
||||
def neg(reg)
|
||||
emit("neg #{reg}")
|
||||
end
|
||||
|
|
@ -166,5 +175,9 @@ module Assembler
|
|||
emit("int 0x#{num.to_s(16)}")
|
||||
end
|
||||
|
||||
def cdq
|
||||
emit("cdq")
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
|
|
|
|||
76
build.rb
76
build.rb
|
|
@ -3,10 +3,21 @@
|
|||
require 'compiler'
|
||||
require 'asm/text'
|
||||
require 'asm/binary'
|
||||
require 'asm/machosymtab'
|
||||
require 'asm/machofile'
|
||||
|
||||
# usage: build.rb <filename> [elf | macho ] [asm | bin]
|
||||
|
||||
def main
|
||||
filename = ARGV[0].to_s
|
||||
raise "can't read #{filename}" unless File.readable?(filename)
|
||||
binformat = ARGV[1] ? ARGV[1].downcase : 'elf'
|
||||
format = ARGV[2] ? ARGV[2].downcase : 'asm'
|
||||
platform = `uname -s`.chomp.downcase
|
||||
puts "Building #{format} from #{filename} for #{platform}, binformat is #{binformat} ..."
|
||||
outfile = build(filename, platform, format, binformat)
|
||||
puts outfile
|
||||
exit
|
||||
end
|
||||
|
||||
|
||||
|
|
@ -18,22 +29,17 @@ def base(filename)
|
|||
end
|
||||
|
||||
|
||||
# filename: input filename
|
||||
# infile: input filename
|
||||
# outfile: output filename
|
||||
# asm: assembler to use
|
||||
# returns: output filename
|
||||
def compile(filename, asm)
|
||||
def compile(infile, outfile, asm)
|
||||
|
||||
File.open(filename, 'r') do |input|
|
||||
compiler = Compiler.new(input, asm)
|
||||
compiler.compile
|
||||
end
|
||||
|
||||
ext = asm.class.name.split('::').last[0,3].downcase == 'bin' ? 'bin' : 'asm'
|
||||
outfile = "#{base(filename)}.#{ext}"
|
||||
File.open(infile, 'r') do |input|
|
||||
File.open(outfile, 'wb') do |out|
|
||||
out.puts(asm.output)
|
||||
compiler = Compiler.new(input, asm)
|
||||
out.print(compiler.compile)
|
||||
end
|
||||
end
|
||||
return outfile
|
||||
|
||||
rescue ParseError => e
|
||||
error("[error] #{e.message}")
|
||||
|
|
@ -44,12 +50,13 @@ rescue ParseError => e
|
|||
end
|
||||
|
||||
# assemble using nasm, return resulting filename.
|
||||
def asm(filename, binformat='elf')
|
||||
def assemble(filename, binformat='elf')
|
||||
f = base(filename)
|
||||
outfile = "#{f}.o"
|
||||
output = `nasm -f #{binformat} -g -o #{outfile} #{filename}`
|
||||
output = `nasm -f #{binformat} -g -o #{outfile} #{filename} 2>&1`
|
||||
if $?.exitstatus != 0
|
||||
puts output
|
||||
puts
|
||||
print output
|
||||
raise "nasm failed: #{$?.exitstatus}"
|
||||
end
|
||||
return outfile
|
||||
|
|
@ -64,32 +71,41 @@ def link(filename, platform='linux')
|
|||
else
|
||||
raise "unsupported platform: #{platform}"
|
||||
end
|
||||
output = `#{cmd} #{args} -o #{f} #{filename}`
|
||||
output = `#{cmd} #{args} -o #{f} #{filename} 2>&1`
|
||||
if $?.exitstatus != 0
|
||||
puts output
|
||||
puts
|
||||
print output
|
||||
raise "ld failed: #{$?.exitstatus}"
|
||||
end
|
||||
`chmod +x #{f}`
|
||||
`chmod u+x #{f}`
|
||||
return f
|
||||
end
|
||||
|
||||
# TODO Use a dependency injection framework for the assembler, and
|
||||
# other parts as things become more modular.
|
||||
def build(filename, platform='linux', format='asm', binformat='elf')
|
||||
bin = if format == 'asm'
|
||||
code = compile(filename, Assembler::Text.new(platform))
|
||||
obj = asm( code, binformat )
|
||||
link( obj, platform )
|
||||
else # binary
|
||||
obj = compile(filename, Assembler::Binary.new(platform))
|
||||
link( obj, platform )
|
||||
def build(filename, platform='linux', binformat='elf')
|
||||
objfile = base(filename) + '.o'
|
||||
symtab, objwriter =
|
||||
case binformat
|
||||
when 'elf': [Assembler::ELFSymtab.new, Assembler::ELFFile.new]
|
||||
when 'macho': [Assembler::MachOSymtab.new, Assembler::MachOFile.new]
|
||||
else
|
||||
raise "unsupported binary format: #{binformat}"
|
||||
end
|
||||
return bin
|
||||
compile(filename, objfile, Assembler::Binary.new(platform, symtab, objwriter))
|
||||
exefile = link(objfile, platform)
|
||||
return exefile
|
||||
end
|
||||
|
||||
def build_asm(filename, platform='linux', binformat='elf')
|
||||
asmfile = base(filename) + '.asm'
|
||||
compile(filename, asmfile, Assembler::Text.new(platform))
|
||||
objfile = assemble(asmfile, binformat)
|
||||
exefile = link(objfile, platform)
|
||||
return exefile
|
||||
end
|
||||
|
||||
def run(filename)
|
||||
filename = "./#{filename}" unless filename.include?('/')
|
||||
system(filename)
|
||||
`#{filename}`
|
||||
return $?.exitstatus
|
||||
end
|
||||
|
||||
|
|
|
|||
237
compiler.rb
237
compiler.rb
|
|
@ -12,6 +12,8 @@
|
|||
# require 'rubygems'
|
||||
# require 'unroller'
|
||||
|
||||
require 'asm/registers'
|
||||
|
||||
class ParseError < StandardError
|
||||
attr_reader :caller, :context
|
||||
def initialize(caller, context=nil)
|
||||
|
|
@ -22,6 +24,8 @@ end
|
|||
|
||||
class Compiler
|
||||
|
||||
include Assembler::Registers
|
||||
|
||||
Keywords = %w[
|
||||
if else end while until repeat for to do break
|
||||
print
|
||||
|
|
@ -82,7 +86,7 @@ class Compiler
|
|||
asm.call(name)
|
||||
else
|
||||
# variable access
|
||||
asm.mov(:eax, "dword [#{name}]")
|
||||
asm.mov(EAX, [asm.var(name)])
|
||||
end
|
||||
end
|
||||
|
||||
|
|
@ -95,7 +99,7 @@ class Compiler
|
|||
elsif alpha?(@look)
|
||||
identifier # or call
|
||||
elsif digit?(@look)
|
||||
asm.mov(:eax, get_number.to_i)
|
||||
asm.mov(EAX, get_number.to_i)
|
||||
else
|
||||
expected(:'integer, identifier, function call, or parenthesized expression', :got => @look)
|
||||
end
|
||||
|
|
@ -106,7 +110,7 @@ class Compiler
|
|||
sign = @look
|
||||
match(sign) if op?(:unary, sign)
|
||||
factor
|
||||
asm.neg(:eax) if sign == '-'
|
||||
asm.neg(EAX) if sign == '-'
|
||||
end
|
||||
|
||||
# Parse and translate a single term (factor or mulop). Result is in
|
||||
|
|
@ -115,14 +119,13 @@ class Compiler
|
|||
signed_factor # Result in eax.
|
||||
|
||||
while op?(:mul, @look)
|
||||
pushing(:eax) do
|
||||
asm.push(EAX)
|
||||
case @look
|
||||
when '*': multiply
|
||||
when '/': divide
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Parse and translate a general expression of terms. Result is
|
||||
# in eax.
|
||||
|
|
@ -130,21 +133,21 @@ class Compiler
|
|||
term # Result is in eax.
|
||||
|
||||
while op_char?(@look, :add)
|
||||
pushing(:eax) do
|
||||
asm.push(EAX)
|
||||
case @look
|
||||
when '+': add
|
||||
when '-': subtract
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Parse an addition operator and the 2nd term (b). The result is
|
||||
# left in eax. The 1st term (a) is expected on the stack.
|
||||
def add
|
||||
match('+')
|
||||
term # Result is in eax.
|
||||
asm.add(:eax, '[esp]') # Add a to b.
|
||||
asm.pop(EBX)
|
||||
asm.add(EAX, EBX) # Add a to b.
|
||||
end
|
||||
|
||||
# Parse a subtraction operator and the 2nd term (b). The result is
|
||||
|
|
@ -152,8 +155,9 @@ class Compiler
|
|||
def subtract
|
||||
match('-')
|
||||
term # Result, b, is in eax.
|
||||
asm.neg(:eax) # Fake the subtraction. a - b == a + -b
|
||||
asm.add(:eax, '[esp]') # Add a and -b.
|
||||
asm.pop(EBX)
|
||||
asm.neg(EAX) # Fake the subtraction. a - b == a + -b
|
||||
asm.add(EAX, EBX) # Add a(ebx) to -b(eax).
|
||||
end
|
||||
|
||||
# Parse an addition operator and the 2nd term (b). The result is
|
||||
|
|
@ -161,7 +165,8 @@ class Compiler
|
|||
def multiply
|
||||
match('*')
|
||||
signed_factor # Result is in eax.
|
||||
asm.imul('dword [esp]') # Multiply a by b.
|
||||
asm.pop(EBX)
|
||||
asm.imul(EBX) # Multiply a by b.
|
||||
end
|
||||
|
||||
# Parse a division operator and the divisor (b). The result is
|
||||
|
|
@ -169,14 +174,15 @@ class Compiler
|
|||
def divide
|
||||
match('/')
|
||||
signed_factor # Result is in eax.
|
||||
asm.xchg(:eax, '[esp]') # Swap the divisor and dividend into
|
||||
asm.pop(EBX)
|
||||
asm.xchg(EAX, EBX) # Swap the divisor and dividend into
|
||||
# the correct places.
|
||||
|
||||
# idiv uses edx:eax as the dividend so we need to ensure that edx
|
||||
# is correctly sign-extended w.r.t. eax.
|
||||
asm.cdq # Sign-extend eax into edx (Convert Double to
|
||||
# Quad).
|
||||
asm.idiv('dword [esp]') # Divide a (eax) by b ([esp]).
|
||||
asm.idiv(EBX) # Divide a (eax) by b (ebx).
|
||||
end
|
||||
|
||||
|
||||
|
|
@ -187,19 +193,22 @@ class Compiler
|
|||
def bitor_expr
|
||||
match('|')
|
||||
term
|
||||
asm.or(:eax, '[esp]')
|
||||
asm.pop(EBX)
|
||||
asm.or_(EAX, EBX)
|
||||
end
|
||||
|
||||
def bitand_expr
|
||||
match('&')
|
||||
signed_factor
|
||||
asm.and_(:eax, '[esp]')
|
||||
asm.pop(EBX)
|
||||
asm.and_(EAX, EBX)
|
||||
end
|
||||
|
||||
def xor_expr
|
||||
match('^')
|
||||
term
|
||||
asm.xor(:eax, '[esp]')
|
||||
asm.pop(EBX)
|
||||
asm.xor(EAX, EBX)
|
||||
end
|
||||
|
||||
|
||||
|
|
@ -232,9 +241,9 @@ class Compiler
|
|||
def boolean_factor
|
||||
if boolean?(@look)
|
||||
if get_boolean == 'true'
|
||||
asm.mov(:eax, -1)
|
||||
asm.mov(EAX, -1)
|
||||
else
|
||||
asm.xor(:eax, :eax)
|
||||
asm.xor(EAX, EAX)
|
||||
end
|
||||
scan
|
||||
else
|
||||
|
|
@ -246,8 +255,8 @@ class Compiler
|
|||
if @look == '!'
|
||||
match('!')
|
||||
boolean_factor
|
||||
make_boolean(:eax) # ensure it is -1 or 0...
|
||||
asm.not(:eax) # so that not is also boolean not
|
||||
make_boolean(EAX) # ensure it is -1 or 0...
|
||||
asm.not_(EAX) # so that 1's complement NOT is also boolean not
|
||||
else
|
||||
boolean_factor
|
||||
end
|
||||
|
|
@ -255,8 +264,8 @@ class Compiler
|
|||
|
||||
# Convert any identifier to a boolean (-1 or 0). This is
|
||||
# semantically equivalent to !!reg in C or Ruby.
|
||||
def make_boolean(reg=:eax)
|
||||
end_label = asm.label(:endmakebool)
|
||||
def make_boolean(reg=EAX)
|
||||
end_label = asm.mklabel(:endmakebool)
|
||||
asm.cmp(reg, 0) # if false do nothing
|
||||
asm.jz(end_label)
|
||||
asm.mov(reg, -1) # truthy, make it true
|
||||
|
|
@ -267,7 +276,7 @@ class Compiler
|
|||
expression
|
||||
if op_char?(@look, :rel)
|
||||
scan
|
||||
pushing(:eax) do
|
||||
asm.push(EAX)
|
||||
case @value
|
||||
when '==': eq_relation
|
||||
when '!=': neq_relation
|
||||
|
|
@ -278,9 +287,8 @@ class Compiler
|
|||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# a: [esp]
|
||||
# a: <on the stack>
|
||||
# b: eax
|
||||
#
|
||||
# If b - a is zero then a = b, and make_boolean will leave the zero
|
||||
|
|
@ -288,14 +296,15 @@ class Compiler
|
|||
# and make_boolean will leave -1 (true) for us in eax.
|
||||
def neq_relation
|
||||
expression
|
||||
asm.sub(:eax, '[esp]')
|
||||
asm.pop(EBX)
|
||||
asm.sub(EAX, EBX)
|
||||
make_boolean
|
||||
end
|
||||
|
||||
# Invert the != test for equal.
|
||||
def eq_relation
|
||||
neq_relation
|
||||
asm.not(:eax)
|
||||
asm.not_(EAX)
|
||||
end
|
||||
|
||||
# > and < are both implemented in terms of jl (jump if less than).
|
||||
|
|
@ -303,6 +312,12 @@ class Compiler
|
|||
# and order the terms appropriately for each function. As for >=
|
||||
# and <=, they in turn are implemented in terms of > and <. a is
|
||||
# greater than or equal to b if and only if a is *not* less than b.
|
||||
#
|
||||
# Note: This was done to minimize the number of instructions that
|
||||
# the assembler needed to implement, but since the Jcc
|
||||
# instructions are very cheap to implement this is no longer
|
||||
# a concern.
|
||||
|
||||
|
||||
# The next 4 relations all compare 2 values a and b, then return
|
||||
# true (-1) if the difference was below zero and false (0)
|
||||
|
|
@ -311,58 +326,62 @@ class Compiler
|
|||
# Invert the sense of the test?
|
||||
invert = options[:invert]
|
||||
|
||||
true_label = asm.label(:cmp)
|
||||
end_label = asm.label(:endcmp)
|
||||
true_label = asm.mklabel(:cmp)
|
||||
end_label = asm.mklabel(:endcmp)
|
||||
asm.cmp(a, b)
|
||||
asm.jl(true_label)
|
||||
|
||||
asm.xor(:eax, :eax) # return false
|
||||
asm.not(:eax) if invert # (or true if inverted)
|
||||
asm.xor(EAX, EAX) # return false
|
||||
asm.not_(EAX) if invert # (or true if inverted)
|
||||
asm.jmp(end_label)
|
||||
|
||||
asm.emit_label(true_label)
|
||||
asm.xor(:eax, :eax) # return true
|
||||
asm.not(:eax) unless invert # (or false if inverted)
|
||||
asm.xor(EAX, EAX) # return true
|
||||
asm.not_(EAX) unless invert # (or false if inverted)
|
||||
|
||||
asm.emit_label(end_label)
|
||||
end
|
||||
|
||||
# a: [esp]
|
||||
# a: <on the stack>
|
||||
# b: eax
|
||||
#
|
||||
# if a > b then b - a < 0
|
||||
def gt_relation
|
||||
expression
|
||||
cmp_relation(:eax, '[esp]') # b - a
|
||||
asm.pop(EBX)
|
||||
cmp_relation(EAX, EBX) # b - a
|
||||
end
|
||||
|
||||
# a: [esp]
|
||||
# a: <on the stack>
|
||||
# b: eax
|
||||
#
|
||||
# if a < b then a - b < 0
|
||||
def lt_relation
|
||||
expression
|
||||
cmp_relation('[esp]', :eax) # a - b
|
||||
asm.pop(EBX)
|
||||
cmp_relation(EBX, EAX) # a - b
|
||||
end
|
||||
|
||||
# a: [esp]
|
||||
# a: <on the stack>
|
||||
# b: eax
|
||||
#
|
||||
# if a >= b then !(a < b)
|
||||
def ge_relation
|
||||
expression
|
||||
asm.pop(EBX)
|
||||
# Compare them as in less than but invert the result.
|
||||
cmp_relation('[esp]', :eax, :invert => true)
|
||||
cmp_relation(EBX, EAX, :invert => true)
|
||||
end
|
||||
|
||||
# a: [esp]
|
||||
# a: <on the stack>
|
||||
# b: eax
|
||||
#
|
||||
# if a <= b then !(a > b)
|
||||
def le_relation
|
||||
expression
|
||||
asm.pop(EBX)
|
||||
# Compare them as in greater than but invert the result.
|
||||
cmp_relation(:eax, '[esp]', :invert => true)
|
||||
cmp_relation(EAX, EBX, :invert => true)
|
||||
end
|
||||
|
||||
|
||||
|
|
@ -376,7 +395,7 @@ class Compiler
|
|||
match('=')
|
||||
boolean_expression
|
||||
asm.defvar(name) unless asm.var?(name)
|
||||
asm.mov("dword [#{name}]", :eax)
|
||||
asm.mov([asm.var(name)], EAX)
|
||||
end
|
||||
|
||||
# Parse a code block.
|
||||
|
|
@ -413,7 +432,7 @@ class Compiler
|
|||
|
||||
# Parse an if-else statement.
|
||||
def if_else_stmt(label)
|
||||
else_label = asm.label(:end_or_else)
|
||||
else_label = asm.mklabel(:end_or_else)
|
||||
end_label = else_label # only generated if else clause
|
||||
# present
|
||||
condition
|
||||
|
|
@ -424,7 +443,7 @@ class Compiler
|
|||
@indent -= 1
|
||||
if @token == :keyword && @value == 'else'
|
||||
skip_any_whitespace
|
||||
end_label = asm.label(:endif) # now we need the 2nd label
|
||||
end_label = asm.mklabel(:endif) # now we need the 2nd label
|
||||
asm.jmp(end_label)
|
||||
asm.emit_label(else_label)
|
||||
@indent += 1
|
||||
|
|
@ -441,8 +460,8 @@ class Compiler
|
|||
# block: Code to execute at the start of each iteration. (e.g. a
|
||||
# condition)
|
||||
def simple_loop(name)
|
||||
start_label = asm.label(:"loop_#{name}")
|
||||
end_label = asm.label(:"end_#{name}")
|
||||
start_label = asm.mklabel(:"#{name}_loop")
|
||||
end_label = asm.mklabel(:"end_#{name}")
|
||||
asm.emit_label(start_label)
|
||||
|
||||
yield(end_label)
|
||||
|
|
@ -482,27 +501,29 @@ class Compiler
|
|||
# s = s + x
|
||||
# e
|
||||
def for_stmt
|
||||
counter = "[#{get_name}]"
|
||||
counter = get_name
|
||||
asm.defvar(counter)
|
||||
match('=')
|
||||
boolean_expression # initial value
|
||||
asm.sub(:eax, 1) # pre-decrement because of the
|
||||
asm.sub(EAX, 1) # pre-decrement because of the
|
||||
# following pre-increment
|
||||
asm.mov(counter, :eax) # stash the counter in memory
|
||||
asm.mov([asm.var(counter)], EAX) # stash the counter in memory
|
||||
match_word('to', :scan => true)
|
||||
boolean_expression # final value
|
||||
skip_any_whitespace
|
||||
asm.push(:eax) # stash final value on stack
|
||||
final = '[esp]'
|
||||
asm.push(EAX) # stash final value on stack
|
||||
asm.mov(EDX, ESP)
|
||||
final = [EDX]
|
||||
|
||||
simple_loop('for') do |end_label|
|
||||
asm.mov(:ecx, counter) # get the counter
|
||||
asm.add(:ecx, 1) # increment
|
||||
asm.mov(counter, :ecx) # store the counter
|
||||
asm.cmp(final, :ecx) # check if we're done
|
||||
asm.mov(ECX, [asm.var(counter)]) # get the counter
|
||||
asm.add(ECX, 1) # increment
|
||||
asm.mov([asm.var(counter)], ECX) # store the counter
|
||||
asm.cmp(final, ECX) # check if we're done
|
||||
asm.jz(end_label) # if so jump to the end
|
||||
end
|
||||
|
||||
asm.add(:esp, 4) # clean up the stack
|
||||
asm.add(ESP, 4) # clean up the stack
|
||||
end
|
||||
|
||||
# do 5
|
||||
|
|
@ -512,19 +533,19 @@ class Compiler
|
|||
|
||||
boolean_expression
|
||||
skip_any_whitespace
|
||||
asm.mov(:ecx, :eax)
|
||||
asm.mov(ECX, EAX)
|
||||
|
||||
start_label = asm.label(:do)
|
||||
end_label = asm.label(:enddo)
|
||||
start_label = asm.mklabel(:do)
|
||||
end_label = asm.mklabel(:enddo)
|
||||
asm.emit_label(start_label)
|
||||
|
||||
asm.push(:ecx)
|
||||
asm.push(ECX)
|
||||
|
||||
@indent += 1
|
||||
block(end_label)
|
||||
@indent -= 1
|
||||
|
||||
asm.pop(:ecx)
|
||||
asm.pop(ECX)
|
||||
|
||||
match_word('end')
|
||||
asm.loop_(start_label)
|
||||
|
|
@ -532,13 +553,13 @@ class Compiler
|
|||
# Phony push! break needs to clean up the stack, but since we
|
||||
# don't know if there is a break at this point we fake a push and
|
||||
# always clean up the stack after.
|
||||
asm.sub(:esp, 4)
|
||||
asm.sub(ESP, 4)
|
||||
|
||||
asm.emit_label(end_label)
|
||||
|
||||
# If there was a break we have to clean up the stack here. If
|
||||
# there was no break we clean up the phony push above.
|
||||
asm.add(:esp, 4)
|
||||
asm.add(ESP, 4)
|
||||
end
|
||||
|
||||
def break_stmt(label)
|
||||
|
|
@ -554,79 +575,83 @@ class Compiler
|
|||
def condition
|
||||
boolean_expression
|
||||
skip_whitespace
|
||||
asm.cmp(:eax, 0) # 0 is false, anything else is true
|
||||
asm.cmp(EAX, 0) # 0 is false, anything else is true
|
||||
end
|
||||
|
||||
# print eax in hex format
|
||||
def print_stmt
|
||||
# variable names
|
||||
d = 'DIGITS'
|
||||
h = 'HEX'
|
||||
|
||||
asm.block do
|
||||
# define a lookup table of digits
|
||||
unless var?('DIGITS')
|
||||
defvar('DIGITS', 4)
|
||||
mov('dword [DIGITS]', 0x33323130)
|
||||
mov('dword [DIGITS+4]', 0x37363534)
|
||||
mov('dword [DIGITS+8]', 0x62613938)
|
||||
mov('dword [DIGITS+12]', 0x66656463)
|
||||
unless var?(d)
|
||||
defvar(d, 4)
|
||||
mov([var(d)], 0x33323130)
|
||||
mov([var(d)+4], 0x37363534)
|
||||
mov([var(d)+8], 0x62613938)
|
||||
mov([var(d)+12], 0x66656463)
|
||||
end
|
||||
# 3 dwords == 12 chars
|
||||
defvar('HEX', 3) unless var?('HEX')
|
||||
defvar(h, 3) unless var?(h)
|
||||
# TODO check sign and prepend '-' if negative
|
||||
mov('word [HEX]', 0x7830) # "0x" == [48, 120]
|
||||
mov('word [HEX+10]', 0xa) # newline + null terminator
|
||||
mov([var(h)], 0x7830) # "0x" == [48, 120]
|
||||
mov([var(h)+10], 0xa) # newline + null terminator
|
||||
end
|
||||
boolean_expression
|
||||
asm.block do
|
||||
# convert eax to a hex string
|
||||
lea(:esi, '[DIGITS]')
|
||||
lea(:edi, '[HEX+9]')
|
||||
lea(ESI, [var(d)])
|
||||
lea(EDI, [var(h)+9])
|
||||
# build the string backwards (right to left), byte by byte
|
||||
mov(:ecx, 4)
|
||||
mov(ECX, 4)
|
||||
end
|
||||
asm.emit_label(loop_label=asm.label)
|
||||
asm.emit_label(loop_label=asm.mklabel)
|
||||
asm.block do
|
||||
# low nybble of nth byte
|
||||
movzx(:ebx, :al)
|
||||
and_(:bl, 0x0f) # isolate low nybble
|
||||
movzx(:edx, 'byte [esi+ebx]')
|
||||
mov('byte [edi]', :dl)
|
||||
dec(:edi)
|
||||
movzx(EBX, AL)
|
||||
and_(BL, 0x0f) # isolate low nybble
|
||||
movzx(EDX, [:byte, ESI+EBX])
|
||||
mov([EDI], DL)
|
||||
dec(EDI)
|
||||
# high nybble of nth byte
|
||||
movzx(:ebx, :al)
|
||||
and_(:bl, 0xf0) # isolate high nybble
|
||||
shr(:bl, 4)
|
||||
mov(:dl, 'byte [esi+ebx]')
|
||||
mov('byte [edi]', :dl)
|
||||
dec(:edi)
|
||||
shr(:eax, 8)
|
||||
movzx(EBX, AL)
|
||||
and_(BL, 0xf0) # isolate high nybble
|
||||
shr(BL, 4)
|
||||
mov(DL, [ESI+EBX])
|
||||
mov([EDI], DL)
|
||||
dec(EDI)
|
||||
shr(EAX, 8)
|
||||
loop_(loop_label)
|
||||
# write(int fd, char *s, int n)
|
||||
mov(:eax, 4) # SYS_write
|
||||
lea(:ecx, '[HEX]') # ecx = &s
|
||||
mov(EAX, 4) # SYS_write
|
||||
lea(ECX, [var(h)]) # ecx = &s
|
||||
args = [1, # fd = 1 (STDOUT)
|
||||
:ecx, # s = &s
|
||||
ECX, # s = &s
|
||||
11] # n = 11 (excluding term, max # of chars to print)
|
||||
case platform
|
||||
when 'darwin' # on the stack, right to left (right @ highest addr)
|
||||
####
|
||||
# setup bogus stack frame
|
||||
push(:ebp)
|
||||
mov(:ebp, :esp)
|
||||
sub(:esp, 36)
|
||||
push(EBP)
|
||||
mov(EBP, ESP)
|
||||
sub(ESP, 36)
|
||||
####
|
||||
args.reverse.each { |a| push(a) }
|
||||
push(:eax)
|
||||
push(EAX)
|
||||
int(0x80)
|
||||
####
|
||||
# teardown bogus stack frame
|
||||
xor(:eax, :eax)
|
||||
add(:esp, 36)
|
||||
pop(:ebx)
|
||||
emit("leave")
|
||||
xor(EAX, EAX)
|
||||
add(ESP, 36)
|
||||
pop(EBX)
|
||||
leave
|
||||
####
|
||||
when 'linux'
|
||||
mov(:ebx, args[0])
|
||||
mov(:ecx, args[1])
|
||||
mov(:edx, args[2])
|
||||
mov(EBX, args[0])
|
||||
mov(ECX, args[1])
|
||||
mov(EDX, args[2])
|
||||
int(0x80)
|
||||
end
|
||||
end
|
||||
|
|
@ -819,15 +844,15 @@ class Compiler
|
|||
def pushing(reg)
|
||||
asm.push(reg)
|
||||
yield
|
||||
asm.add(:esp, 4)
|
||||
asm.add(ESP, 4)
|
||||
end
|
||||
|
||||
def op(name)
|
||||
pushing(:eax) do
|
||||
asm.push(EAX)
|
||||
get_op
|
||||
expected(name) unless match_word(name)
|
||||
yield
|
||||
end
|
||||
asm.add(ESP, 4)
|
||||
end
|
||||
|
||||
|
||||
|
|
|
|||
288
elfwriter.c
288
elfwriter.c
|
|
@ -1,288 +0,0 @@
|
|||
#include <libelf.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
/* _exit(0) */
|
||||
/* uint8_t shell_code[] = { */
|
||||
/* 0xbb, 0, 0, 0, 0, /\* mov ebx, 0 *\/ */
|
||||
/* 0xb8, 1, 0, 0, 0, /\* mov eax, 1 *\/ */
|
||||
/* 0xcd, 0x80 /\* int 0x80 *\/ */
|
||||
/* }; */
|
||||
|
||||
/* uint32_t hash_words[] = { */
|
||||
/* 0x12345678, */
|
||||
/* 0xdeadc0de, */
|
||||
/* 0x1234abcd */
|
||||
/* }; */
|
||||
|
||||
#define header_size 0x100
|
||||
#define text_addr 0x8048000 + header_size
|
||||
#define text_size 0x02be00
|
||||
#define data_addr text_addr + text_size
|
||||
#define data_size 0x4e00
|
||||
#define bss_addr data_addr + data_size
|
||||
size_t bss_size = 0;
|
||||
|
||||
char string_table[] = {
|
||||
/* Offset 0 */ '\0',
|
||||
/* Offset 1 */ '.', 't', 'e', 'x', 't', '\0' ,
|
||||
/* Offset 7 */ '.', 'b', 's', 's', '\0',
|
||||
/* Offset 12 */ '.', 's', 'h', 's', 't', 'r', 't', 'a', 'b', '\0'
|
||||
};
|
||||
|
||||
|
||||
/* Write a static 32-bit x86 ELF binary to filename. The file is
|
||||
* clobbered without confirmation!
|
||||
*/
|
||||
int
|
||||
elf_write(const char *filename, uint8_t *code, size_t code_size)
|
||||
{
|
||||
int fd;
|
||||
size_t shstrndx;
|
||||
Elf *elf;
|
||||
Elf_Scn *scn;
|
||||
Elf_Data *data;
|
||||
Elf32_Ehdr *ehdr;
|
||||
Elf32_Phdr *phdr;
|
||||
Elf32_Shdr *shdr;
|
||||
|
||||
if (elf_version(EV_CURRENT) == EV_NONE) {
|
||||
printf("Failed to initialize ELF library!\n");
|
||||
return -1;
|
||||
}
|
||||
if ((fd = open(filename, O_RDWR|O_TRUNC|O_CREAT, 0666)) < 0) {
|
||||
printf("Can't open %s for writing.\n", filename);
|
||||
perror("[elf_write]");
|
||||
return -2;
|
||||
}
|
||||
if ((elf = elf_begin(fd, ELF_C_WRITE, (Elf *)0)) == 0) {
|
||||
printf("elf_begin failed!\n");
|
||||
return -3;
|
||||
}
|
||||
|
||||
|
||||
/**************
|
||||
* ELF Header *
|
||||
**************/
|
||||
|
||||
if ((ehdr = elf32_newehdr(elf)) == NULL) {
|
||||
printf("elf32_newehdr failed!\n");
|
||||
return -4;
|
||||
}
|
||||
ehdr->e_ident[EI_DATA] = ELFDATA2LSB; /* 2's complement, little endian */
|
||||
ehdr->e_type = ET_EXEC;
|
||||
ehdr->e_machine = EM_386; /* x86 */
|
||||
|
||||
/* Image starts at 0x8048000, x86 32-bit abi. We need a bit
|
||||
* of room for headers and such. TODO figure out how much
|
||||
* room is needed!
|
||||
*
|
||||
* Current entry point is .text section.
|
||||
*/
|
||||
ehdr->e_entry = text_addr;
|
||||
|
||||
|
||||
/*******************
|
||||
* Program Headers *
|
||||
*******************/
|
||||
|
||||
/* 3 segments => 3 program headers (text, data, bss) */
|
||||
if ((phdr = elf32_newphdr(elf, 3)) == NULL) {
|
||||
printf("elf32_newphdr failed!\n");
|
||||
return -5;
|
||||
}
|
||||
|
||||
|
||||
/*****************
|
||||
* .text section *
|
||||
*****************/
|
||||
|
||||
if ((scn = elf_newscn(elf)) == NULL) {
|
||||
printf("elf_newscn failed!\n");
|
||||
return -6;
|
||||
}
|
||||
if ((data = elf_newdata(scn)) == NULL) {
|
||||
printf("elf_newdata failed!\n");
|
||||
return -7;
|
||||
}
|
||||
data->d_align = 16;
|
||||
data->d_buf = code;
|
||||
data->d_off = 0LL;
|
||||
data->d_type = ELF_T_BYTE;
|
||||
data->d_size = code_size;
|
||||
data->d_version = EV_CURRENT;
|
||||
|
||||
if ((shdr = elf32_getshdr(scn)) == NULL) {
|
||||
printf("elf32_getshdr failed!\n");
|
||||
return -8;
|
||||
}
|
||||
shdr->sh_name = 1;
|
||||
shdr->sh_type = SHT_PROGBITS;
|
||||
shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
|
||||
shdr->sh_addr = text_addr;
|
||||
|
||||
|
||||
/****************
|
||||
* .bss section *
|
||||
****************/
|
||||
|
||||
if ((scn = elf_newscn(elf)) == NULL) {
|
||||
printf("elf_newscn failed!\n");
|
||||
return -6;
|
||||
}
|
||||
if ((data = elf_newdata(scn)) == NULL) {
|
||||
printf("elf_newdata failed!\n");
|
||||
return -7;
|
||||
}
|
||||
data->d_align = 4;
|
||||
data->d_off = 0LL;
|
||||
data->d_type = ELF_T_BYTE;
|
||||
data->d_size = bss_size;
|
||||
data->d_version = EV_CURRENT;
|
||||
|
||||
if ((shdr = elf32_getshdr(scn)) == NULL) {
|
||||
printf("elf32_getshdr failed!\n");
|
||||
return -8;
|
||||
}
|
||||
shdr->sh_name = 7;
|
||||
shdr->sh_type = SHT_NOBITS;
|
||||
shdr->sh_flags = SHF_WRITE | SHF_ALLOC;
|
||||
shdr->sh_addr = bss_addr;
|
||||
|
||||
|
||||
/*******************************
|
||||
* section header string table *
|
||||
*******************************/
|
||||
|
||||
if ((scn = elf_newscn(elf)) == NULL) {
|
||||
printf("elf_newscn failed!\n");
|
||||
return -9;
|
||||
}
|
||||
if ((data = elf_newdata(scn)) == NULL) {
|
||||
printf("elf_newdata failed!\n");
|
||||
return -10;
|
||||
}
|
||||
data->d_align = 1;
|
||||
data->d_buf = string_table;
|
||||
data->d_off = 0LL;
|
||||
data->d_type = ELF_T_BYTE;
|
||||
data->d_size = sizeof(string_table);
|
||||
data->d_version = EV_CURRENT;
|
||||
|
||||
if ((shdr = elf32_getshdr(scn)) == NULL) {
|
||||
printf("elf32_getshdr failed!\n");
|
||||
return -11;
|
||||
}
|
||||
shdr->sh_name = 12;
|
||||
shdr->sh_type = SHT_STRTAB;
|
||||
shdr->sh_flags = SHF_STRINGS | SHF_ALLOC;
|
||||
shdr->sh_entsize = 0;
|
||||
|
||||
|
||||
/* int elf_setshstrndx(Elf *e, Elf32_Ehdr *eh, size_t shstrndx) */
|
||||
shstrndx = elf_ndxscn(scn);
|
||||
if (shstrndx >= SHN_LORESERVE) {
|
||||
if ((scn = elf_getscn(elf, 0)) == NULL) {
|
||||
printf("elf_getscn failed!\n");
|
||||
return -12;
|
||||
}
|
||||
/* assert(scn->s_ndx == SHN_UNDEF); */
|
||||
/* scn->s_shdr.s_shdr32.sh_link = shstrndx; */
|
||||
elf_flagshdr(scn, ELF_C_SET, ELF_F_DIRTY);
|
||||
shstrndx = SHN_XINDEX;
|
||||
}
|
||||
ehdr->e_shstrndx = shstrndx;
|
||||
|
||||
if (elf_update(elf, ELF_C_NULL) < 0) {
|
||||
printf("elf_update failed!\n");
|
||||
return -12;
|
||||
}
|
||||
|
||||
/* phdr->p_vaddr = phdr->p_paddr = 0x8048000 + ehdr->e_phoff; */
|
||||
/* phdr->p_type = PT_PHDR; */
|
||||
/* phdr->p_offset = ehdr->e_phoff; */
|
||||
/* phdr->p_filesz = elf32_fsize(ELF_T_PHDR, 1, EV_CURRENT); */
|
||||
|
||||
/* text segment */
|
||||
phdr->p_vaddr = text_addr;
|
||||
phdr->p_type = PT_LOAD;
|
||||
phdr->p_offset = header_size;
|
||||
phdr->p_filesz = text_size;
|
||||
phdr->p_memsz = text_size;
|
||||
phdr->p_flags = PF_R | PF_X;
|
||||
phdr->p_align = 0x1000;
|
||||
|
||||
/* data segment */
|
||||
phdr++;
|
||||
phdr->p_vaddr = data_addr;
|
||||
phdr->p_type = PT_LOAD;
|
||||
phdr->p_offset = header_size + text_size;
|
||||
phdr->p_filesz = data_size;
|
||||
phdr->p_memsz = data_size + 0x1024; /* XXX unsure why the abi specifies + 0x1024 */
|
||||
phdr->p_flags = PF_R | PF_W | PF_X;
|
||||
phdr->p_align = 0x1000;
|
||||
|
||||
/* bss segment */
|
||||
phdr++;
|
||||
phdr->p_vaddr = bss_addr;
|
||||
phdr->p_type = PT_LOAD;
|
||||
phdr->p_offset = header_size + text_size + data_size;
|
||||
phdr->p_filesz = bss_size;
|
||||
phdr->p_memsz = bss_size;
|
||||
phdr->p_flags = PF_R | PF_W;
|
||||
phdr->p_align = 0x1000;
|
||||
|
||||
elf_flagphdr(elf, ELF_C_SET, ELF_F_DIRTY);
|
||||
|
||||
if (elf_update(elf, ELF_C_WRITE) < 0) {
|
||||
printf("elf_update failed!\n");
|
||||
return -13;
|
||||
}
|
||||
|
||||
elf_end(elf);
|
||||
close(fd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, const char *argv[])
|
||||
{
|
||||
int result;
|
||||
pid_t pid;
|
||||
FILE *fd;
|
||||
uint8_t *code = NULL;
|
||||
size_t code_size = 0, chunk_size = 1024, bytes_read;
|
||||
|
||||
if (argc < 4) {
|
||||
printf("usage: %s <input> <bss_size> <output>\n", argv[0]);
|
||||
printf(" Wraps the input file in an ELF binary.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
bss_size = strtoul(argv[2], 0, 10);
|
||||
|
||||
if ((fd = fopen(argv[1], "r")) < 0) {
|
||||
printf("[error] can't open %s for reading.\n", argv[1]);
|
||||
perror("[main]");
|
||||
return 2;
|
||||
}
|
||||
while (!feof(fd) && !ferror(fd)) {
|
||||
code = realloc(code, code_size + chunk_size);
|
||||
bytes_read = fread(code+code_size, 1, chunk_size, fd);
|
||||
code_size += bytes_read;
|
||||
}
|
||||
fclose(fd);
|
||||
|
||||
printf("Writing x86 ELF binary to %s...\n", argv[3]);
|
||||
result = elf_write(argv[3], code, code_size);
|
||||
if (result < 0) {
|
||||
printf("[error] elf_write failed.\n");
|
||||
return 3;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
12
lea.asm
12
lea.asm
|
|
@ -1,12 +0,0 @@
|
|||
BITS 32
|
||||
|
||||
lea eax, [ebx+ecx*4]
|
||||
lea ebx, [eax+ecx*4]
|
||||
lea eax, [ecx+ebx*4]
|
||||
lea eax, [ecx+ebx*8]
|
||||
lea eax, [ecx+ebx]
|
||||
lea eax, [0x1000+10*4]
|
||||
lea eax, [eax]
|
||||
lea eax, [ecx]
|
||||
lea ecx, [eax]
|
||||
lea eax, [0xdeadbeef]
|
||||
89
mov.asm
89
mov.asm
|
|
@ -1,89 +0,0 @@
|
|||
BITS 32
|
||||
|
||||
;;; 00000000 b8 78 56 34 12 b9 78 56 34 12 ba 78 56 34 12 bb |.xV4..xV4..xV4..|
|
||||
;;; 00000010 78 56 34 12 89 c0 89 c8 89 d0 89 d8 89 c1 89 c9 |xV4.............|
|
||||
;;; 00000020 89 d1 89 d9 89 c2 89 ca 89 d2 89 da 89 c3 89 cb |................|
|
||||
;;; 00000030 89 d3 89 db a1 ef be ad de 8b 0d ef be ad de 8b |................|
|
||||
;;; 00000040 15 ef be ad de 8b 1d ef be ad de a3 ef be ad de |................|
|
||||
;;; 00000050 89 0d ef be ad de 89 15 ef be ad de 89 1d ef be |................|
|
||||
;;; 00000060 ad de 8b 00 8b 01 8b 02 8b 03 8b 08 8b 09 8b 0a |................|
|
||||
;;; 00000070 8b 0b 8b 10 8b 11 8b 12 8b 13 8b 18 8b 19 8b 1a |................|
|
||||
;;; 00000080 8b 1b 89 00 89 01 89 02 89 03 89 08 89 09 89 0a |................|
|
||||
;;; 00000090 89 0b 89 10 89 11 89 12 89 13 89 18 89 19 89 1a |................|
|
||||
;;; 000000a0 89 1b |..|
|
||||
;;; 000000a2
|
||||
|
||||
mov eax, 0x12345678 ; b8 78 56 34 12
|
||||
mov ecx, 0x12345678 ; b9 78 56 34 12
|
||||
mov edx, 0x12345678 ; ba 78 56 34 12
|
||||
mov ebx, 0x12345678 ; bb 78 56 34 12
|
||||
|
||||
mov eax, eax ; 89 c0
|
||||
mov eax, ecx ; 89 c8
|
||||
mov eax, edx ; 89 d0
|
||||
mov eax, ebx ; 89 d8
|
||||
|
||||
mov ecx, eax ; 89 c1
|
||||
mov ecx, ecx ; 89 c9
|
||||
mov ecx, edx ; 89 d1
|
||||
mov ecx, ebx ; 89 d9
|
||||
|
||||
mov edx, eax ; 89 c2
|
||||
mov edx, ecx ; 89 ca
|
||||
mov edx, edx ; 89 d2
|
||||
mov edx, ebx ; 89 da
|
||||
|
||||
mov ebx, eax ; 89 c3
|
||||
mov ebx, ecx ; 89 cb
|
||||
mov ebx, edx ; 89 d3
|
||||
mov ebx, ebx ; 89 db
|
||||
|
||||
mov eax, dword [0xdeadbeef] ; a1 ef be ad de
|
||||
mov ecx, dword [0xdeadbeef] ; 8b 0e ef be ad de
|
||||
mov edx, dword [0xdeadbeef] ; 8b 16 ef be ad de
|
||||
mov ebx, dword [0xdeadbeef] ; 8b 1e ef be ad de
|
||||
|
||||
mov [0xdeadbeef], eax ; a3 ef be ad de
|
||||
mov [0xdeadbeef], ecx ; 89 0e ef be ad de
|
||||
mov [0xdeadbeef], edx ; 89 16 ef be ad de
|
||||
mov [0xdeadbeef], ebx ; 89 1e ef be ad de
|
||||
|
||||
mov eax, dword [eax] ; 8b 00
|
||||
mov eax, dword [ecx] ; 8b 01
|
||||
mov eax, dword [edx] ; 8b 02
|
||||
mov eax, dword [ebx] ; 8b 03
|
||||
|
||||
mov ecx, dword [eax] ; 8b 08
|
||||
mov ecx, dword [ecx] ; 8b 09
|
||||
mov ecx, dword [edx] ; 8b 0a
|
||||
mov ecx, dword [ebx] ; 8b 0b
|
||||
|
||||
mov edx, dword [eax] ; 8b 10
|
||||
mov edx, dword [ecx] ; 8b 11
|
||||
mov edx, dword [edx] ; 8b 12
|
||||
mov edx, dword [ebx] ; 8b 13
|
||||
|
||||
mov ebx, dword [eax] ; 8b 18
|
||||
mov ebx, dword [ecx] ; 8b 19
|
||||
mov ebx, dword [edx] ; 8b 1a
|
||||
mov ebx, dword [ebx] ; 8b 1b
|
||||
|
||||
mov [eax], eax ; 89 00
|
||||
mov [ecx], eax ; 89 01
|
||||
mov [edx], eax ; 89 02
|
||||
mov [ebx], eax ; 89 03
|
||||
|
||||
mov [eax], ecx ; 89 08
|
||||
mov [ecx], ecx ; 89 09
|
||||
mov [edx], ecx ; 89 0a
|
||||
mov [ebx], ecx ; 89 0b
|
||||
|
||||
mov [eax], edx ; 89 10
|
||||
mov [ecx], edx ; 89 11
|
||||
mov [edx], edx ; 89 12
|
||||
mov [ebx], edx ; 89 13
|
||||
|
||||
mov [eax], ebx ; 89 18
|
||||
mov [ecx], ebx ; 89 19
|
||||
mov [edx], ebx ; 89 1a
|
||||
mov [ebx], ebx ; 89 1b
|
||||
|
|
@ -56,7 +56,7 @@ break: test.rb test_break.code
|
|||
print: test.rb test_print.code
|
||||
@./test.rb print $(BINFORMAT)
|
||||
|
||||
big_test: test.rb big_test.code
|
||||
big_test: test.rb test_big.code
|
||||
@./test.rb big $(BINFORMAT)
|
||||
|
||||
clean:
|
||||
|
|
|
|||
18
test/test.rb
18
test/test.rb
|
|
@ -5,20 +5,20 @@ $LOAD_PATH << ROOT
|
|||
|
||||
require 'build'
|
||||
|
||||
# usage: build.rb <func> [binformat]
|
||||
#
|
||||
# ([format] will go before [binformat])
|
||||
# usage: test.rb <func> [binformat] [format]
|
||||
|
||||
def main
|
||||
func = ARGV[0].to_s
|
||||
format = 'asm' # 'bin' only assembles one or two
|
||||
# instructions right now, but support
|
||||
# is in place
|
||||
binformat = (ARGV[1] ? ARGV[1] : 'elf').downcase
|
||||
binformat = ARGV[1] ? ARGV[1].downcase : 'elf'
|
||||
format = ARGV[2] ? ARGV[2].downcase : 'asm'
|
||||
platform = `uname -s`.chomp.downcase
|
||||
print "testing #{func} ... "
|
||||
success = run( build("test_#{func}.code", platform, format, binformat) )
|
||||
puts success == 0 ? "pass" : "FAIL! (#{success})"
|
||||
success = run( build("test_#{func}.code", platform, binformat) )
|
||||
if success == 0
|
||||
puts "pass"
|
||||
else
|
||||
puts "FAIL! (#{success})"
|
||||
end
|
||||
exit(success.to_i)
|
||||
end
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
i=0
|
||||
a=10
|
||||
for i = 0 to 10
|
||||
a=a-1
|
||||
end
|
||||
a=a
|
||||
|
||||
11
x86.txt
11
x86.txt
|
|
@ -1,11 +0,0 @@
|
|||
mov (0x66) {
|
||||
reg32, reg32 (0x89) {
|
||||
op2 - src
|
||||
|
||||
eax ecx edx ebx
|
||||
op1 eax c0 c8 d0 d8
|
||||
dest ecx c1 c9 d1 d9
|
||||
edx c2 ca d2 da
|
||||
ebx c3 cb d3 db
|
||||
}
|
||||
}
|
||||
Loading…
Reference in a new issue