mirror of
https://github.com/samsonjs/compiler.git
synced 2026-04-27 14:57:45 +00:00
[NEW] Binary assembler outputs working machine code and Mach-O object files that can be linked into working executables.
This commit is contained in:
parent
47ce9043e4
commit
bc6a3d4d3b
25 changed files with 2082 additions and 894 deletions
13
Makefile
13
Makefile
|
|
@ -1,17 +1,4 @@
|
||||||
test:
|
test:
|
||||||
cd test && make all
|
cd test && make all
|
||||||
|
|
||||||
elfwriter: elfwriter.c
|
|
||||||
gcc -o elfwriter elfwriter.c -lelf
|
|
||||||
|
|
||||||
test_elf: elfwriter build
|
|
||||||
./elfwriter test.bin 4 test_elf.o
|
|
||||||
ld -o test_elf test_elf.o
|
|
||||||
./test_elf
|
|
||||||
|
|
||||||
clean:
|
|
||||||
@rm -f elfwriter
|
|
||||||
@rm -f test_elf.o
|
|
||||||
@rm -f test_elf
|
|
||||||
|
|
||||||
.PHONY: test
|
.PHONY: test
|
||||||
|
|
|
||||||
31
asm/asm.rb
31
asm/asm.rb
|
|
@ -5,6 +5,8 @@
|
||||||
# sjs
|
# sjs
|
||||||
# may 2009
|
# may 2009
|
||||||
|
|
||||||
|
require 'asm/registers'
|
||||||
|
|
||||||
module Assembler
|
module Assembler
|
||||||
|
|
||||||
# Abstract class for common functionality between different code
|
# Abstract class for common functionality between different code
|
||||||
|
|
@ -14,41 +16,14 @@ module Assembler
|
||||||
|
|
||||||
attr_reader :platform
|
attr_reader :platform
|
||||||
|
|
||||||
def initialize(platform='linux', *args)
|
def initialize(platform)
|
||||||
@platform = platform
|
@platform = platform
|
||||||
@vars = {} # Symbol table, maps names to locations in BSS.
|
|
||||||
@num_labels = 0 # Used to generate unique labels.
|
|
||||||
@num_labels_with_suffix = Hash.new(0)
|
|
||||||
|
|
||||||
# Maps names to locations.
|
|
||||||
@labels = Hash.new {|h, key| raise "undefined label: #{key}"}
|
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def block(*args, &blk)
|
def block(*args, &blk)
|
||||||
instance_eval(&blk)
|
instance_eval(&blk)
|
||||||
end
|
end
|
||||||
|
|
||||||
def output
|
|
||||||
raise "#{self.class} is supposed to implement this method!"
|
|
||||||
end
|
|
||||||
|
|
||||||
def var(name)
|
|
||||||
@vars[name]
|
|
||||||
end
|
|
||||||
alias_method :var?, :var
|
|
||||||
|
|
||||||
# Generate a unique label.
|
|
||||||
def label(suffix=nil)
|
|
||||||
@num_labels += 1
|
|
||||||
if suffix
|
|
||||||
@num_labels_with_suffix[suffix] += 1
|
|
||||||
suffix = "_#{suffix}_#{@num_labels_with_suffix[suffix]}"
|
|
||||||
end
|
|
||||||
name = "L#{sprintf "%06d", @num_labels}#{suffix}"
|
|
||||||
return name
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
|
||||||
908
asm/binary.rb
908
asm/binary.rb
File diff suppressed because it is too large
Load diff
320
asm/cstruct.rb
Normal file
320
asm/cstruct.rb
Normal file
|
|
@ -0,0 +1,320 @@
|
||||||
|
# Struct does some trickery with custom allocators so we can't subclass it without writing C.
|
||||||
|
# Instead we define a CStruct class that does something similar enough for our purpose. It is
|
||||||
|
# subclassed just like any other class. A nice side-effect of this syntax is that it is always
|
||||||
|
# clear that a CStruct is just a class and instances of the struct are objects.
|
||||||
|
#
|
||||||
|
# Some light metaprogramming is used to make the following syntax possible:
|
||||||
|
#
|
||||||
|
# class MachHeader < CStruct
|
||||||
|
# uint :magic
|
||||||
|
# int :cputype
|
||||||
|
# int :cpusubtype
|
||||||
|
# ...
|
||||||
|
# int :flags
|
||||||
|
# end
|
||||||
|
#
|
||||||
|
# Inheritance works as you would expect.
|
||||||
|
#
|
||||||
|
# class LoadCommand < CStruct
|
||||||
|
# uint32 :cmd
|
||||||
|
# uint32 :cmdsize
|
||||||
|
# end
|
||||||
|
#
|
||||||
|
# # inherits cmd and cmdsize as the first 2 fields
|
||||||
|
# class SegmentCommand < LoadCommand
|
||||||
|
# string :segname, 16
|
||||||
|
# uint32 :vmaddr
|
||||||
|
# uint32
|
||||||
|
# end
|
||||||
|
#
|
||||||
|
# Nothing tricky or confusing there. Members of a CStruct class are declared in the
|
||||||
|
# class definition. A different definition using a more static approach probably wouldn't
|
||||||
|
# be very hard... if performance is critical ... but then why are you using Ruby? ;-)
|
||||||
|
|
||||||
|
class CStruct
|
||||||
|
|
||||||
|
|
||||||
|
###################
|
||||||
|
# Class Constants #
|
||||||
|
###################
|
||||||
|
|
||||||
|
# Size in bytes.
|
||||||
|
SizeMap = {
|
||||||
|
:int8 => 1,
|
||||||
|
:uint8 => 1,
|
||||||
|
:int16 => 2,
|
||||||
|
:uint16 => 2,
|
||||||
|
:int32 => 4,
|
||||||
|
:uint32 => 4,
|
||||||
|
:string => lambda { |*opts| opts.first }, # first opt is size
|
||||||
|
# the last 3 are to make the language more C-like
|
||||||
|
:int => 4,
|
||||||
|
:uint => 4,
|
||||||
|
:char => 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# 32-bit
|
||||||
|
PackMap = {
|
||||||
|
:int8 => 'c',
|
||||||
|
:uint8 => 'C',
|
||||||
|
:int16 => 's',
|
||||||
|
:uint16 => 'S',
|
||||||
|
:int32 => 'i',
|
||||||
|
:uint32 => 'I',
|
||||||
|
:string => lambda do |str, *opts|
|
||||||
|
len = opts.first
|
||||||
|
str.ljust(len, "\0")[0, len]
|
||||||
|
end,
|
||||||
|
# a few C-like names
|
||||||
|
:int => 'i',
|
||||||
|
:uint => 'I',
|
||||||
|
:char => 'C'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Only needed when unpacking is different from packing, i.e. strings w/ lambdas in PackMap.
|
||||||
|
UnpackMap = {
|
||||||
|
:string => lambda do |str, *opts|
|
||||||
|
len = opts.first
|
||||||
|
val = str[0, len-1].sub(/\0*$/, '')
|
||||||
|
str.slice!((len-1)..-1)
|
||||||
|
val
|
||||||
|
end
|
||||||
|
}
|
||||||
|
|
||||||
|
##########################
|
||||||
|
# Class Instance Methods #
|
||||||
|
##########################
|
||||||
|
|
||||||
|
# Note: const_get and const_set are used so the constants are bound at runtime, to the
|
||||||
|
# real class that has subclassed CStruct. I figured Ruby would do this but I haven't
|
||||||
|
# looked at the implementation of constants so it might be tricky.
|
||||||
|
#
|
||||||
|
# All of this could probably be avoided with Ruby 1.9 and private class variables.
|
||||||
|
# That is definitely something to experiment with.
|
||||||
|
|
||||||
|
class <<self
|
||||||
|
|
||||||
|
def inherited(subclass)
|
||||||
|
subclass.instance_eval do
|
||||||
|
|
||||||
|
# These "constants" are only constant references. Structs can be modified.
|
||||||
|
# After the struct is defined it is still open, but good practice would be not
|
||||||
|
# to change a struct after it has been defined.
|
||||||
|
#
|
||||||
|
# To support inheritance properly we try to get these constants from the enclosing
|
||||||
|
# scope (and clone them before modifying them!), and default to empty, er, defaults.
|
||||||
|
|
||||||
|
members = const_get(:Members).clone rescue []
|
||||||
|
member_index = const_get(:MemberIndex).clone rescue {}
|
||||||
|
member_sizes = const_get(:MemberSizes).clone rescue {}
|
||||||
|
member_opts = const_get(:MemberOptions).clone rescue {}
|
||||||
|
|
||||||
|
const_set(:Members, members)
|
||||||
|
const_set(:MemberIndex, member_index)
|
||||||
|
const_set(:MemberSizes, member_sizes)
|
||||||
|
const_set(:MemberOptions, member_opts)
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
# Define a method for each size name, and when that method is called it updates
|
||||||
|
# the struct class accordingly.
|
||||||
|
SizeMap.keys.each do |type|
|
||||||
|
|
||||||
|
define_method(type) do |name, *args|
|
||||||
|
name = name.to_sym
|
||||||
|
const_get(:MemberIndex)[name] = const_get(:Members).size
|
||||||
|
const_get(:MemberSizes)[name] = type
|
||||||
|
const_get(:MemberOptions)[name] = args
|
||||||
|
const_get(:Members) << name
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
# Return the number of members.
|
||||||
|
def size
|
||||||
|
const_get(:Members).size
|
||||||
|
end
|
||||||
|
alias_method :length, :size
|
||||||
|
|
||||||
|
# Return the number of bytes occupied in memory or on disk.
|
||||||
|
def bytesize
|
||||||
|
const_get(:Members).inject(0) { |size, name| size + sizeof(name) }
|
||||||
|
end
|
||||||
|
|
||||||
|
def sizeof(name)
|
||||||
|
value = SizeMap[const_get(:MemberSizes)[name]]
|
||||||
|
value.respond_to?(:call) ? value.call(*const_get(:MemberOptions)[name]) : value
|
||||||
|
end
|
||||||
|
|
||||||
|
def new_from_bin(bin)
|
||||||
|
new_struct = new
|
||||||
|
new_struct.unserialize(bin)
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
####################
|
||||||
|
# Instance Methods #
|
||||||
|
####################
|
||||||
|
|
||||||
|
attr_reader :values
|
||||||
|
|
||||||
|
def initialize(*args)
|
||||||
|
@values = args
|
||||||
|
end
|
||||||
|
|
||||||
|
def serialize
|
||||||
|
vals = @values.clone
|
||||||
|
membs = members.clone
|
||||||
|
pack_pattern.map do |patt|
|
||||||
|
name = membs.shift
|
||||||
|
if patt.is_a?(String)
|
||||||
|
[vals.shift].pack(patt)
|
||||||
|
else
|
||||||
|
patt.call(vals.shift, *member_options[name])
|
||||||
|
end
|
||||||
|
end.join
|
||||||
|
end
|
||||||
|
|
||||||
|
def unserialize(bin)
|
||||||
|
bin = bin.clone
|
||||||
|
@values = []
|
||||||
|
membs = members.clone
|
||||||
|
unpack_pattern.each do |patt|
|
||||||
|
name = membs.shift
|
||||||
|
if patt.is_a?(String)
|
||||||
|
@values += bin.unpack(patt)
|
||||||
|
bin.slice!(0, sizeof(name))
|
||||||
|
else
|
||||||
|
@values << patt.call(bin, *member_options[name])
|
||||||
|
end
|
||||||
|
end
|
||||||
|
self
|
||||||
|
end
|
||||||
|
|
||||||
|
def pack_pattern
|
||||||
|
members.map { |name| PackMap[member_sizes[name]] }
|
||||||
|
end
|
||||||
|
|
||||||
|
def unpack_pattern
|
||||||
|
members.map { |name| UnpackMap[member_sizes[name]] || PackMap[member_sizes[name]] }
|
||||||
|
end
|
||||||
|
|
||||||
|
def [](name_or_idx)
|
||||||
|
case name_or_idx
|
||||||
|
|
||||||
|
when Numeric
|
||||||
|
idx = name_or_idx
|
||||||
|
@values[idx]
|
||||||
|
|
||||||
|
when String, Symbol
|
||||||
|
name = name_or_idx.to_sym
|
||||||
|
@values[member_index[name]]
|
||||||
|
|
||||||
|
else
|
||||||
|
raise ArgumentError, "expected name or index, got #{name_or_idx.inspect}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def []=(name_or_idx, value)
|
||||||
|
case name_or_idx
|
||||||
|
|
||||||
|
when Numeric
|
||||||
|
idx = name_or_idx
|
||||||
|
@values[idx] = value
|
||||||
|
|
||||||
|
when String, Symbol
|
||||||
|
name = name_or_idx.to_sym
|
||||||
|
@values[member_index[name]] = value
|
||||||
|
|
||||||
|
else
|
||||||
|
raise ArgumentError, "expected name or index, got #{name_or_idx.inspect}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def ==(other)
|
||||||
|
puts @values.inspect
|
||||||
|
puts other.values.inspect
|
||||||
|
other.is_a?(self.class) && other.values == @values
|
||||||
|
end
|
||||||
|
|
||||||
|
# Some of these are just to quack like Ruby's built-in Struct. YAGNI, but can't hurt either.
|
||||||
|
|
||||||
|
def each(&block)
|
||||||
|
@values.each(&block)
|
||||||
|
end
|
||||||
|
|
||||||
|
def each_pair(&block)
|
||||||
|
members.zip(@values).each(&block)
|
||||||
|
end
|
||||||
|
|
||||||
|
def size
|
||||||
|
members.size
|
||||||
|
end
|
||||||
|
alias_method :length, :size
|
||||||
|
|
||||||
|
def sizeof(name)
|
||||||
|
self.class.sizeof(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
def bytesize
|
||||||
|
self.class.bytesize
|
||||||
|
end
|
||||||
|
|
||||||
|
alias_method :to_a, :values
|
||||||
|
|
||||||
|
|
||||||
|
# A few convenience methods.
|
||||||
|
|
||||||
|
def members
|
||||||
|
self.class::Members
|
||||||
|
end
|
||||||
|
|
||||||
|
def member_index
|
||||||
|
self.class::MemberIndex
|
||||||
|
end
|
||||||
|
|
||||||
|
def member_sizes
|
||||||
|
self.class::MemberSizes
|
||||||
|
end
|
||||||
|
|
||||||
|
def member_options
|
||||||
|
self.class::MemberOptions
|
||||||
|
end
|
||||||
|
|
||||||
|
# The last expression is returned, so return self instead of junk.
|
||||||
|
self
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
# a small test
|
||||||
|
if $0 == __FILE__
|
||||||
|
class MachHeader < CStruct
|
||||||
|
uint :magic
|
||||||
|
int :cputype
|
||||||
|
int :cpusubtype
|
||||||
|
string :segname, 16
|
||||||
|
end
|
||||||
|
puts MachHeader::Members.inspect
|
||||||
|
puts MachHeader::MemberIndex.inspect
|
||||||
|
puts MachHeader::MemberSizes.inspect
|
||||||
|
puts "# of MachHeader members: " + MachHeader.size.to_s + ", size in bytes: " + MachHeader.bytesize.to_s
|
||||||
|
mh = MachHeader.new(0xfeedface, 7, 3, "foobar")
|
||||||
|
puts "magic(#{MachHeader.sizeof(:magic)}): " + mh[:magic].inspect
|
||||||
|
puts "cputype(#{MachHeader.sizeof(:cputype)}): " + mh[:cputype].inspect
|
||||||
|
puts "cpusubtype(#{MachHeader.sizeof(:cpusubtype)}): " + mh[:cpusubtype].inspect
|
||||||
|
puts "segname(#{MachHeader.sizeof(:segname)}): " + mh[:segname].inspect
|
||||||
|
puts mh.pack_pattern.inspect
|
||||||
|
binstr = mh.serialize
|
||||||
|
puts "values: " + mh.values.inspect
|
||||||
|
newmh = MachHeader.new_from_bin(binstr)
|
||||||
|
puts "new values: " + newmh.values.inspect
|
||||||
|
newbinstr = newmh.serialize
|
||||||
|
puts "serialized: " + binstr.inspect
|
||||||
|
puts "unserialized: " + newbinstr.inspect
|
||||||
|
puts "new == old ? " + (newbinstr == binstr).to_s
|
||||||
|
end
|
||||||
7
asm/elfsymtab.rb
Normal file
7
asm/elfsymtab.rb
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
module Assembler
|
||||||
|
|
||||||
|
class ELFSymtab < Symtab
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
9
asm/elfwriter.rb
Normal file
9
asm/elfwriter.rb
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
module Assembler
|
||||||
|
|
||||||
|
class ELFWriter < ObjWriter
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
164
asm/macho.rb
Normal file
164
asm/macho.rb
Normal file
|
|
@ -0,0 +1,164 @@
|
||||||
|
require 'asm/cstruct'
|
||||||
|
|
||||||
|
# The MachO module contains constants and structures related to the
|
||||||
|
# Mach Object format (Mach-O). They are relevant to Darwin on OS X.
|
||||||
|
#
|
||||||
|
# Constants and structures as defined in /usr/include/mach-o/loader.h on
|
||||||
|
# Mac OS X Leopard (10.5.7). Also see <mach-o/stab.h> and <mach-o/nlist.h>.
|
||||||
|
|
||||||
|
module MachO
|
||||||
|
|
||||||
|
|
||||||
|
###############
|
||||||
|
# Mach header #
|
||||||
|
###############
|
||||||
|
|
||||||
|
# Appears at the beginning of every Mach object file.
|
||||||
|
class MachHeader < CStruct
|
||||||
|
uint32 :magic
|
||||||
|
int32 :cputype
|
||||||
|
int32 :cpusubtype
|
||||||
|
uint32 :filetype
|
||||||
|
uint32 :ncmds
|
||||||
|
uint32 :sizeofcmds
|
||||||
|
uint32 :flags
|
||||||
|
end
|
||||||
|
|
||||||
|
# Values for the magic field.
|
||||||
|
MH_MAGIC = 0xfeedface # Mach magic number.
|
||||||
|
MH_CIGAM = 0xcefaedfe # In the reverse byte-order.
|
||||||
|
|
||||||
|
# Values for the filetype field.
|
||||||
|
MH_OBJECT = 0x1
|
||||||
|
MH_EXECUTE = 0x2
|
||||||
|
MH_FVMLIB = 0x3
|
||||||
|
MH_CORE = 0x4
|
||||||
|
MH_PRELOAD = 0x5
|
||||||
|
MH_DYLIB = 0x6
|
||||||
|
MH_DYLINKER = 0x7
|
||||||
|
MH_BUNDLE = 0x8
|
||||||
|
MH_DYLIB_STUB = 0x9
|
||||||
|
MH_DSYM = 0xa
|
||||||
|
|
||||||
|
# CPU types and subtypes (only Intel for now).
|
||||||
|
CPU_TYPE_X86 = 7
|
||||||
|
CPU_TYPE_I386 = CPU_TYPE_X86
|
||||||
|
CPU_SUBTYPE_X86_ALL = 3
|
||||||
|
|
||||||
|
|
||||||
|
############################
|
||||||
|
# Load commands / segments #
|
||||||
|
############################
|
||||||
|
|
||||||
|
class LoadCommand < CStruct
|
||||||
|
uint32 :cmd
|
||||||
|
uint32 :cmdsize
|
||||||
|
end
|
||||||
|
|
||||||
|
# Values for the cmd member of LoadCommand CStructs (incomplete!).
|
||||||
|
LC_SEGMENT = 0x1
|
||||||
|
LC_SYMTAB = 0x2
|
||||||
|
LC_SYMSEG = 0x3
|
||||||
|
LC_THREAD = 0x4
|
||||||
|
LC_UNIXTHREAD = 0x5
|
||||||
|
|
||||||
|
class SegmentCommand < LoadCommand
|
||||||
|
string :segname, 16
|
||||||
|
uint32 :vmaddr
|
||||||
|
uint32 :vmsize
|
||||||
|
uint32 :fileoff
|
||||||
|
uint32 :filesize
|
||||||
|
int32 :maxprot
|
||||||
|
int32 :initprot
|
||||||
|
uint32 :nsects
|
||||||
|
uint32 :flags
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
# Values for protection fields, maxprot and initprot.
|
||||||
|
VM_PROT_NONE = 0x00
|
||||||
|
VM_PROT_READ = 0x01
|
||||||
|
VM_PROT_WRITE = 0x02
|
||||||
|
VM_PROT_EXECUTE = 0x04
|
||||||
|
VM_PROT_NO_CHANGE = 0x08
|
||||||
|
VM_PROT_COPY = 0x10
|
||||||
|
|
||||||
|
|
||||||
|
class SymtabCommand < LoadCommand
|
||||||
|
uint32 :symoff # Points to an array of Nlist structs.
|
||||||
|
uint32 :nsyms # Number of entries in said array.
|
||||||
|
uint32 :stroff # Offset of the string table.
|
||||||
|
uint32 :strsize # Size of the string table in bytes.
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
LoadCommandStructMap = {
|
||||||
|
LC_SEGMENT => SegmentCommand,
|
||||||
|
LC_SYMTAB => SymtabCommand
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
############
|
||||||
|
# Sections #
|
||||||
|
############
|
||||||
|
|
||||||
|
class Section < CStruct
|
||||||
|
string :sectname, 16
|
||||||
|
string :segname, 16
|
||||||
|
uint32 :addr
|
||||||
|
uint32 :size
|
||||||
|
uint32 :offset
|
||||||
|
uint32 :align
|
||||||
|
uint32 :reloff
|
||||||
|
uint32 :nreloc
|
||||||
|
uint32 :flags
|
||||||
|
uint32 :reserved1
|
||||||
|
uint32 :reserved2
|
||||||
|
end
|
||||||
|
|
||||||
|
# Values for the type bitfield (mask 0x000000ff) of the flags field.
|
||||||
|
# (incomplete!)
|
||||||
|
S_REGULAR = 0x0
|
||||||
|
S_ZEROFILL = 0x1
|
||||||
|
S_CSTRING_LITERALS = 0x2
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
########################
|
||||||
|
# Symbol table support #
|
||||||
|
########################
|
||||||
|
|
||||||
|
|
||||||
|
# Nlist is used to describe symbols.
|
||||||
|
class Nlist < CStruct
|
||||||
|
uint32 :n_strx # Index into string table. Index of zero is the empty string.
|
||||||
|
uint8 :n_type # Type flag (see below).
|
||||||
|
uint8 :n_sect # Section number (from 1) or NO_SECT.
|
||||||
|
uint16 :n_desc # TODO See <mach-o/stab.h>.
|
||||||
|
uint32 :n_value # The symbol's value (or stab offset).
|
||||||
|
end
|
||||||
|
|
||||||
|
# Type flag (see <mach-o/nlist.h> for more details)
|
||||||
|
# ---------
|
||||||
|
#
|
||||||
|
# This field consists of four bitfields:
|
||||||
|
#
|
||||||
|
# uchar N_STAB : 3
|
||||||
|
# uchar N_PEXT : 1
|
||||||
|
# uchar N_TYPE : 3
|
||||||
|
# uchar N_EXT : 1
|
||||||
|
#
|
||||||
|
N_STAB = 0xe0 # if any bits set => symbolic debugging info
|
||||||
|
N_PEXT = 0x10 # private external symbol bit
|
||||||
|
N_TYPE = 0x0e # mask for the type bits
|
||||||
|
N_EXT = 0x01 # external symbol bit, set for external symbols (e.g. globals)
|
||||||
|
|
||||||
|
# Values for N_TYPE. (incomplete!)
|
||||||
|
N_UNDF = 0x0 # undefined, n_sect == NO_SECT
|
||||||
|
N_ABS = 0x2 # absolute, n_sect == NO_SECT
|
||||||
|
N_SECT = 0xe # defined in section number n_sect
|
||||||
|
|
||||||
|
NO_SECT = 0
|
||||||
|
MAX_SECT = 255
|
||||||
|
|
||||||
|
end
|
||||||
364
asm/machofile.rb
Normal file
364
asm/machofile.rb
Normal file
|
|
@ -0,0 +1,364 @@
|
||||||
|
require 'asm/macho'
|
||||||
|
|
||||||
|
module Assembler
|
||||||
|
|
||||||
|
class MachOFile
|
||||||
|
|
||||||
|
include MachO
|
||||||
|
|
||||||
|
attr_accessor :header, :load_commands, :sections, :data
|
||||||
|
attr_accessor :current_segment
|
||||||
|
attr_accessor :text_offset
|
||||||
|
|
||||||
|
def initialize(filetype=MH_OBJECT)
|
||||||
|
@header = MachHeader.new(MH_MAGIC, CPU_TYPE_X86, CPU_SUBTYPE_X86_ALL, filetype, 0, 0, 0)
|
||||||
|
@load_commands = [] # All defined segments.
|
||||||
|
@sections = {} # Map of segment names to lists of segments.
|
||||||
|
@section_disk_size = Hash.new(0) # Sections store their VM size so we need their sizes on disk.
|
||||||
|
@data = [] # Blobs of data that appear at the end of the file.
|
||||||
|
# (text, data, symtab, ...)
|
||||||
|
@current_segment = nil # An alias for the last defined segment.
|
||||||
|
|
||||||
|
# Leave room for __PAGEZERO, a single 0x1000 (4kb) page at 0x0. The
|
||||||
|
# __TEXT segment starts at 0x1000 and contains mach headers and load
|
||||||
|
# commands.
|
||||||
|
@text_offset = 0x1000
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
# Define a LoadCommand in this file. The header's ncmds and sizeofcmds
|
||||||
|
# fields are updated automatically to keep things in sync. If a block is
|
||||||
|
# given it is passed the new LoadCommand struct after all other
|
||||||
|
# initialization has been done.
|
||||||
|
#
|
||||||
|
# Other methods that create any type of load command should use this
|
||||||
|
# method to do so. Right now the only types supported are LC_SEGMENT
|
||||||
|
# and LC_SYMTAB. Modify asm/macho.rb to add structs for other types, and
|
||||||
|
# add them to LoadCommandStructMap.
|
||||||
|
|
||||||
|
def load_command(cmdtype)
|
||||||
|
struct = LoadCommandStructMap[cmdtype]
|
||||||
|
unless struct
|
||||||
|
raise "unsupported load command type: #{cmdtype.inspect}," +
|
||||||
|
" supported types: #{LoadCommandStructMap.keys.sort.inspect}"
|
||||||
|
end
|
||||||
|
|
||||||
|
# Fill in all the unknown fields with 0, this is nonsense for
|
||||||
|
# string fields but that doesn't really matter.
|
||||||
|
dummy_vals = [0] * (struct::Members.size - 2)
|
||||||
|
|
||||||
|
# cmd cmdsize ...
|
||||||
|
command = struct.new(cmdtype, struct.bytesize, *dummy_vals)
|
||||||
|
|
||||||
|
@load_commands << command
|
||||||
|
|
||||||
|
@header[:ncmds] += 1
|
||||||
|
@header[:sizeofcmds] += command.bytesize
|
||||||
|
|
||||||
|
yield(command) if block_given?
|
||||||
|
|
||||||
|
return command
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
# Define a segment in this file. If a block is given it is passed
|
||||||
|
# the new segment. You can chain calls to segment, it returns self.
|
||||||
|
#
|
||||||
|
# Mach object files should only contain one anonymous segment. This
|
||||||
|
# is not checked but should be kept in mind when crafting files.
|
||||||
|
def segment(name, &block)
|
||||||
|
@current_segment = load_command(LC_SEGMENT) do |seg|
|
||||||
|
seg[:segname] = name
|
||||||
|
block.call(seg) if block
|
||||||
|
end
|
||||||
|
return self
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
# Define a section under the given segment. nsects and cmdsize are
|
||||||
|
# updated automatically. segname can't be derived from the segment
|
||||||
|
# that this section is defined under, as they can differ.
|
||||||
|
#
|
||||||
|
# Mach object files have the __text, __data, and other common
|
||||||
|
# sections all defined under one anonymous segment, but their segment
|
||||||
|
# names reflect their final positions after linking. The linker plonks
|
||||||
|
# them in the segment that they name.
|
||||||
|
def section(name, segname, data='', vmsize=data.size,
|
||||||
|
segment=@current_segment, type=S_REGULAR)
|
||||||
|
|
||||||
|
# Create the new section.
|
||||||
|
section = Section.new(name, segname, 0, vmsize, 0, 0, 0, 0, 0, 0, type)
|
||||||
|
|
||||||
|
# Add this section to the map of segment names to sections.
|
||||||
|
(@sections[segment[:segname]] ||= []) << section
|
||||||
|
@section_disk_size[name] = data.size
|
||||||
|
@data << data if data.size > 0
|
||||||
|
|
||||||
|
# Update the header.
|
||||||
|
@header[:sizeofcmds] += section.bytesize
|
||||||
|
|
||||||
|
# Update the segment.
|
||||||
|
segment[:nsects] += 1
|
||||||
|
segment[:cmdsize] += section.bytesize
|
||||||
|
|
||||||
|
yield(section) if block_given?
|
||||||
|
|
||||||
|
return section
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Define a standard text section under the current segment (if present).
|
||||||
|
#
|
||||||
|
# If there is no current segment then we act according to the file's type
|
||||||
|
# (specified in the header). Segments are created if they do not exist.
|
||||||
|
#
|
||||||
|
# When it is MH_OBJECT the text section is defined under a single,
|
||||||
|
# nameless segment, but the section's segment name is set to the name
|
||||||
|
# given here.
|
||||||
|
#
|
||||||
|
# For MH_EXECUTE files the text section goes under the segment with the
|
||||||
|
# name given (__TEXT).
|
||||||
|
|
||||||
|
def text(data, sectname='__text', segname='__TEXT')
|
||||||
|
unless @current_segment
|
||||||
|
segment(segname_based_on_filetype(segname)) do |seg|
|
||||||
|
seg[:maxprot] = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE
|
||||||
|
seg[:initprot] = VM_PROT_READ | VM_PROT_EXECUTE
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
section(sectname, segname, data) do |sect|
|
||||||
|
sect[:flags] = 0x400 # S_ATTR_SOME_INSTRUCTIONS
|
||||||
|
end
|
||||||
|
|
||||||
|
return self
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
# Define a standard data section under the current segment (if present).
|
||||||
|
# This behaves similarly to the text method.
|
||||||
|
#
|
||||||
|
def data(data, sectname='__data', segname='__DATA')
|
||||||
|
unless @current_segment
|
||||||
|
segment(segname_based_on_filetype(segname)) do |seg|
|
||||||
|
seg[:maxprot] = VM_PROT_READ | VM_PROT_WRITE
|
||||||
|
seg[:initprot] = VM_PROT_READ | VM_PROT_WRITE
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
section(sectname, segname, data)
|
||||||
|
|
||||||
|
return self
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
# Define a standard const section under the current segment (if present).
|
||||||
|
# This behaves similarly to the data method.
|
||||||
|
#
|
||||||
|
def const(data, sectname='__const', segname='__DATA')
|
||||||
|
unless @current_segment
|
||||||
|
segment(segname_based_on_filetype(segname)) do |seg|
|
||||||
|
seg[:maxprot] = VM_PROT_READ
|
||||||
|
seg[:initprot] = VM_PROT_READ
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
section(sectname, segname, data)
|
||||||
|
|
||||||
|
return self
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
# Define a standard BSS section under the current segment (if present).
|
||||||
|
# This behaves similarly to the data method but accepts a VM size instead
|
||||||
|
# of a blob, and no data is written to file since this section is for
|
||||||
|
# uninitialized data.
|
||||||
|
#
|
||||||
|
def bss(vmsize, sectname='__bss', segname='__DATA')
|
||||||
|
unless @current_segment
|
||||||
|
segment(segname_based_on_filetype(segname)) do |seg|
|
||||||
|
seg[:maxprot] = VM_PROT_READ | VM_PROT_WRITE
|
||||||
|
seg[:initprot] = VM_PROT_READ | VM_PROT_WRITE
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
section(sectname, segname, '', vmsize)
|
||||||
|
|
||||||
|
return self
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
# Define a symbol table. This should usually be placed at the end of the
|
||||||
|
# file.
|
||||||
|
#
|
||||||
|
# This function is overloaded to accept either an array of Nlist structs
|
||||||
|
# packed into a byte string (i.e. a C array) and a string table, or a
|
||||||
|
# single parameter: any type of Symtab.
|
||||||
|
|
||||||
|
def symtab(nlist_ary_or_symtab, stab=nil)
|
||||||
|
if stab.nil?
|
||||||
|
symtab = nlist_ary_or_symtab
|
||||||
|
stab = symtab.stab
|
||||||
|
nlist_ary = symtab.nlist_ary
|
||||||
|
else
|
||||||
|
nlist_ary = nlist_ary_or_symtab
|
||||||
|
end
|
||||||
|
|
||||||
|
load_command(LC_SYMTAB) do |st|
|
||||||
|
st[:nsyms] = nlist_ary.size
|
||||||
|
st[:strsize] = stab.size
|
||||||
|
# symoff and stroff are filled in when offsets are recalculated.
|
||||||
|
end
|
||||||
|
|
||||||
|
# puts ">>> Defining symbol table:"
|
||||||
|
# puts ">>> #{nlist_ary.size} symbols"
|
||||||
|
# puts ">>> stab = #{stab.inspect}"
|
||||||
|
# puts ">>> nlist_ary = #{nlist_ary.inspect}"
|
||||||
|
# puts ">>> (serialized) = #{nlist_ary.map{|n|n.serialize}.join.inspect}"
|
||||||
|
|
||||||
|
@data << nlist_ary.map {|n| n.serialize}.join
|
||||||
|
@data << stab
|
||||||
|
|
||||||
|
return self
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
# Serialize the entire MachO file into a byte string. This is simple
|
||||||
|
# thanks to CStruct#serialize.
|
||||||
|
|
||||||
|
def serialize
|
||||||
|
# TODO sanity checks, e.g. assert(@header[:ncmds] == @load_command.size)
|
||||||
|
# ... perhaps an option to recalculate such data as well.
|
||||||
|
|
||||||
|
recalculate_offsets
|
||||||
|
|
||||||
|
# |------------------|
|
||||||
|
# | Mach Header |
|
||||||
|
# |------------------|
|
||||||
|
# | Segment 1 |
|
||||||
|
# | Section 1 | ---
|
||||||
|
# | Section 2 | --|--
|
||||||
|
# | ... | | |
|
||||||
|
# | Segment 2 | | |
|
||||||
|
# | Section 4 | | |
|
||||||
|
# | Section 5 | | |
|
||||||
|
# | ... | | |
|
||||||
|
# | ... | | |
|
||||||
|
# | [Symtab cmd] | | |
|
||||||
|
# |------------------| | |
|
||||||
|
# | Section data 1 | <-- |
|
||||||
|
# | Section data 2 | <----
|
||||||
|
# | ... |
|
||||||
|
# | [Symtab data] |
|
||||||
|
# |------------------|
|
||||||
|
|
||||||
|
|
||||||
|
# dump the mach header
|
||||||
|
obj = @header.serialize
|
||||||
|
|
||||||
|
# dump each load command (which include the section headers under them)
|
||||||
|
obj += @load_commands.map do |cmd|
|
||||||
|
sects = @sections[cmd[:segname]] rescue []
|
||||||
|
sects.inject(cmd.serialize) do |data, sect|
|
||||||
|
data + sect.serialize
|
||||||
|
end
|
||||||
|
end.join
|
||||||
|
|
||||||
|
# and finally dump the blobs at the end
|
||||||
|
obj += @data.join
|
||||||
|
|
||||||
|
return obj
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
# Update the file offsets in SegmentCommand, SymtabCommand, and Section structs.
|
||||||
|
|
||||||
|
def recalculate_offsets
|
||||||
|
|
||||||
|
# Maintain the offset into the the file. This is used to update
|
||||||
|
# the various structures.
|
||||||
|
offset = @header.bytesize
|
||||||
|
|
||||||
|
# First pass over load commands. Most sizes are filled in here.
|
||||||
|
@load_commands.each do |cmd|
|
||||||
|
case cmd[:cmd]
|
||||||
|
|
||||||
|
when LC_SEGMENT
|
||||||
|
seg = cmd
|
||||||
|
sections = @sections[seg[:segname]]
|
||||||
|
section_size = sections.size * Section.bytesize
|
||||||
|
section_vm_size = sections.inject(0) { |total, sect| total + sect[:size] }
|
||||||
|
section_disk_size = sections.inject(0) do |total, sect|
|
||||||
|
total + @section_disk_size[sect[:sectname]]
|
||||||
|
end
|
||||||
|
|
||||||
|
### TODO this should be redundant. try commenting it out one day.
|
||||||
|
seg[:nsects] = sections.size
|
||||||
|
seg[:cmdsize] = seg.bytesize + section_size
|
||||||
|
###
|
||||||
|
|
||||||
|
seg[:vmsize] = section_vm_size
|
||||||
|
seg[:filesize] = section_disk_size
|
||||||
|
|
||||||
|
when LC_SYMTAB
|
||||||
|
# nop
|
||||||
|
|
||||||
|
else
|
||||||
|
raise "unsupported load command: #{cmd.inspect}"
|
||||||
|
end
|
||||||
|
|
||||||
|
offset += cmd[:cmdsize]
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
# offset now points to the end of the Mach-O headers, or the beginning
|
||||||
|
# of the binary blobs of section data at the end.
|
||||||
|
|
||||||
|
# Second pass over load commands. Fill in file offsets.
|
||||||
|
@load_commands.each do |cmd|
|
||||||
|
case cmd[:cmd]\
|
||||||
|
|
||||||
|
when LC_SEGMENT
|
||||||
|
seg = cmd
|
||||||
|
sections = @sections[seg[:segname]]
|
||||||
|
seg[:fileoff] = offset
|
||||||
|
sections.each do |sect|
|
||||||
|
sect[:offset] = offset
|
||||||
|
offset += @section_disk_size[sect[:sectname]]
|
||||||
|
end
|
||||||
|
|
||||||
|
when LC_SYMTAB
|
||||||
|
st = cmd
|
||||||
|
st[:symoff] = offset
|
||||||
|
offset += st[:nsyms] * Nlist.bytesize
|
||||||
|
st[:stroff] = offset
|
||||||
|
offset += st[:strsize]
|
||||||
|
|
||||||
|
|
||||||
|
# No else clause is necessary, the first iteration should have caught them.
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
end # @load_commands.each
|
||||||
|
|
||||||
|
end # def recalculate_offsets
|
||||||
|
|
||||||
|
|
||||||
|
#######
|
||||||
|
private
|
||||||
|
#######
|
||||||
|
|
||||||
|
def segname_based_on_filetype(segname)
|
||||||
|
case @header[:filetype]
|
||||||
|
when MH_OBJECT: ''
|
||||||
|
when MH_EXECUTE: segname
|
||||||
|
else
|
||||||
|
raise "unsupported MachO file type! #{@header.inspect}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
end # class MachOFile
|
||||||
|
|
||||||
|
end # module Assembler
|
||||||
29
asm/machosym.rb
Normal file
29
asm/machosym.rb
Normal file
|
|
@ -0,0 +1,29 @@
|
||||||
|
require 'asm/macho'
|
||||||
|
|
||||||
|
module Assembler
|
||||||
|
|
||||||
|
class MachOSym
|
||||||
|
|
||||||
|
attr_accessor :name, :type, :segnum, :desc, :value
|
||||||
|
|
||||||
|
def initialize(name, type, segnum, desc, value)
|
||||||
|
@name = name
|
||||||
|
@type = type
|
||||||
|
@segnum = segnum
|
||||||
|
@desc = desc
|
||||||
|
@value = value
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
def to_nlist(strx)
|
||||||
|
MachO::Nlist.new(strx, @type, @segnum, @desc, @value)
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
def to_s
|
||||||
|
@name
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
77
asm/machosymtab.rb
Normal file
77
asm/machosymtab.rb
Normal file
|
|
@ -0,0 +1,77 @@
|
||||||
|
require 'asm/macho'
|
||||||
|
require 'asm/machosym'
|
||||||
|
require 'asm/symtab'
|
||||||
|
|
||||||
|
module Assembler
|
||||||
|
|
||||||
|
class MachOSymtab < Symtab
|
||||||
|
|
||||||
|
include MachO
|
||||||
|
|
||||||
|
def const_offset
|
||||||
|
return 0x2000
|
||||||
|
end
|
||||||
|
|
||||||
|
def bss_offset
|
||||||
|
# TODO figure out how to calculate these, or how to let the linker do it!
|
||||||
|
# ... relocation tables perhaps?
|
||||||
|
return 0x2800
|
||||||
|
end
|
||||||
|
|
||||||
|
def all_symbols
|
||||||
|
symbols = []
|
||||||
|
|
||||||
|
# Functions (section #1, __text)
|
||||||
|
#
|
||||||
|
# All labels are exported. This should be changed and only functions exported!
|
||||||
|
# TODO fixme ...
|
||||||
|
#
|
||||||
|
# Note: Sorting a Ruby hash gives an alist, e.g. [[<key>, <value>], ...]
|
||||||
|
# We can use map on it as if it were a hash so it works nicely.
|
||||||
|
#
|
||||||
|
symbols +=
|
||||||
|
@labels.sort { |a,b| a[1] <=> b[1] }.
|
||||||
|
map do |name,addr|
|
||||||
|
MachOSym.new(name, N_SECT | N_EXT, 1, 0, addr)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Constants (section #2, __const)
|
||||||
|
symbols += @consts.sort { |a,b| a[1] <=> b[1] }.
|
||||||
|
map do |name, addr|
|
||||||
|
MachOSym.new(name, N_SECT, 2, 0, addr)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Variables (section #3, __bss)
|
||||||
|
#
|
||||||
|
# TODO FIXME the last var exported ends up after main somewhere... WTF?!
|
||||||
|
symbols += @vars.sort { |a,b| a[1] <=> b[1] }.
|
||||||
|
map do |name, addr|
|
||||||
|
MachOSym.new(name, N_SECT, 3, 0, addr)
|
||||||
|
end
|
||||||
|
|
||||||
|
return symbols
|
||||||
|
end
|
||||||
|
|
||||||
|
def nlist_ary
|
||||||
|
symbols = {}
|
||||||
|
strx = 1
|
||||||
|
ary = []
|
||||||
|
all_symbols.each do |sym|
|
||||||
|
key = sym.name.to_sym
|
||||||
|
unless symbols.has_key?(key)
|
||||||
|
symbols[key] = strx
|
||||||
|
strx += sym.name.length + 1 # +1 for the null byte
|
||||||
|
end
|
||||||
|
ary << sym.to_nlist(symbols[key])
|
||||||
|
end
|
||||||
|
return ary
|
||||||
|
end
|
||||||
|
|
||||||
|
def stab
|
||||||
|
# The empty strings result in a string that begins and ends with
|
||||||
|
['', all_symbols, ''].flatten.map { |sym| sym.to_s }.join("\0")
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
26
asm/machowriter.rb
Normal file
26
asm/machowriter.rb
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
|
||||||
|
### XXX development hack!
|
||||||
|
def stub_symtab!
|
||||||
|
text_segnum = 1
|
||||||
|
symtab_stub = {
|
||||||
|
:functions => [
|
||||||
|
# name type segnum addr
|
||||||
|
['_main', N_SECT | N_EXT, text_segunm, 0x0]
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
nlist_ary = []
|
||||||
|
stab = "\0"
|
||||||
|
strx = 1 # string index (1-based)
|
||||||
|
|
||||||
|
symtab[:functions].each do |name, type, segnum, addr|
|
||||||
|
nlist_ary << MachO::Nlist.new(strx, type, segnum, 0, addr)
|
||||||
|
stab << "#{name}\0"
|
||||||
|
strx += 1
|
||||||
|
end
|
||||||
|
symtab(nlist_ary, stab)
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
26
asm/objwriter.rb
Normal file
26
asm/objwriter.rb
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
module Assembler
|
||||||
|
|
||||||
|
class UnimplementedMethodError < RuntimeError; end
|
||||||
|
|
||||||
|
|
||||||
|
# Abstract base class.
|
||||||
|
class ObjWriter
|
||||||
|
|
||||||
|
def write!(filename)
|
||||||
|
File.open(filename, 'wb') do |file|
|
||||||
|
file.print(serialize)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def fail(name)
|
||||||
|
raise UnimplementedMethodError, name
|
||||||
|
end
|
||||||
|
|
||||||
|
# These methods must be defined for most uses of the library.
|
||||||
|
%w[header segment section text data bss symtab serialize].each do |name|
|
||||||
|
define_method(name) { fail(name) }
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
32
asm/registers.rb
Normal file
32
asm/registers.rb
Normal file
|
|
@ -0,0 +1,32 @@
|
||||||
|
require 'asm/regproxy'
|
||||||
|
|
||||||
|
module Assembler
|
||||||
|
|
||||||
|
module Registers
|
||||||
|
|
||||||
|
# This structure allows for x86 registers of all sizes. The
|
||||||
|
# number of the register is the index of the array in which it was
|
||||||
|
# found. The size of a register in bytes is 2 ** index-into-sub-array.
|
||||||
|
Registers = [ [:al, :ax, :eax], # 0
|
||||||
|
[:cl, :cx, :ecx], # 1
|
||||||
|
[:dl, :dx, :edx], # 2
|
||||||
|
[:bl, :bx, :ebx], # 3
|
||||||
|
[:ah, :sp, :esp], # 4
|
||||||
|
[:ch, :bp, :ebp], # 5
|
||||||
|
[:dh, :si, :esi], # 6
|
||||||
|
[:bh, :di, :edi] # 7
|
||||||
|
]
|
||||||
|
|
||||||
|
# Setup register proxies which are used both in effective address
|
||||||
|
# calculations, and also just as symbols representing registers.
|
||||||
|
Registers.each_with_index do |group, regnum|
|
||||||
|
group.each_with_index do |reg, i|
|
||||||
|
name = reg.to_s.upcase
|
||||||
|
const_set(name, RegisterProxy.new(reg, 8 * (2 ** i), regnum))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
67
asm/regproxy.rb
Normal file
67
asm/regproxy.rb
Normal file
|
|
@ -0,0 +1,67 @@
|
||||||
|
module Assembler
|
||||||
|
|
||||||
|
# Acts like a register and can be used as the base or index in an
|
||||||
|
# effective address.
|
||||||
|
#
|
||||||
|
# e.g. [EAX] or [ESI+EBX] or [EAX + 0xff] or [EAX + EDX * 2]
|
||||||
|
class RegisterProxy
|
||||||
|
|
||||||
|
attr_reader :name, :size, :regnum
|
||||||
|
attr_reader :base, :index, :scale
|
||||||
|
|
||||||
|
|
||||||
|
def initialize(name, size, regnum)
|
||||||
|
@name = name # attrs are read-only so sharing is ok
|
||||||
|
@size = size
|
||||||
|
@regnum = regnum
|
||||||
|
@base = self
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
def +(index)
|
||||||
|
raise "index already specified" if @index
|
||||||
|
new_reg = self.clone
|
||||||
|
new_reg.instance_variable_set('@index', index)
|
||||||
|
new_reg
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
def *(scale)
|
||||||
|
raise "index must come first" unless @index
|
||||||
|
raise "scale already specified" if scale
|
||||||
|
raise "unsupported scale: #{scale}" unless scale.to_s.match(/^[1248]$/)
|
||||||
|
@scale = scale
|
||||||
|
self
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
def scale?
|
||||||
|
@scale
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
def index?
|
||||||
|
@index
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
def register?
|
||||||
|
@scale.nil? && @index.nil?
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def to_s
|
||||||
|
@name.to_s +
|
||||||
|
(@index ? "+#{@index}" : '') +
|
||||||
|
(@scale ? "*#{@scale}" : '')
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
def inspect
|
||||||
|
to_s
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
89
asm/symtab.rb
Normal file
89
asm/symtab.rb
Normal file
|
|
@ -0,0 +1,89 @@
|
||||||
|
module Assembler
|
||||||
|
|
||||||
|
class Symtab
|
||||||
|
|
||||||
|
attr_reader :const_data, :bss_size
|
||||||
|
|
||||||
|
def initialize
|
||||||
|
@vars = {} # Map of variable names to addresses. (bss vars)
|
||||||
|
@consts = {} # Map of constant names to addresses.
|
||||||
|
@funcs = {} # map of function names to addresses.
|
||||||
|
|
||||||
|
# Initial data to load into memory (data for __DATA segment).
|
||||||
|
@const_data = ''
|
||||||
|
|
||||||
|
@const_size = 0 # Size of const section.
|
||||||
|
@bss_size = 0 # Size of bss section.
|
||||||
|
|
||||||
|
# Map names to locations.
|
||||||
|
@labels = Hash.new {|h, key| raise "undefined label: #{key}"}
|
||||||
|
@num_labels = 0 # Used to generate unique labels.
|
||||||
|
@num_labels_with_suffix = Hash.new(0)
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
####
|
||||||
|
## NB: Concrete subclasses must define methods named:
|
||||||
|
## bss_offset, and const_offset
|
||||||
|
####
|
||||||
|
|
||||||
|
|
||||||
|
# Generate a unique label.
|
||||||
|
def unique_label(suffix=nil)
|
||||||
|
@num_labels += 1
|
||||||
|
if suffix
|
||||||
|
@num_labels_with_suffix[suffix] += 1
|
||||||
|
suffix = "_#{suffix}_#{@num_labels_with_suffix[suffix]}"
|
||||||
|
end
|
||||||
|
name = "L#{sprintf "%06d", @num_labels}#{suffix}"
|
||||||
|
return name
|
||||||
|
end
|
||||||
|
|
||||||
|
def deflabel(name, addr)
|
||||||
|
@labels[name] = addr
|
||||||
|
return name
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
def lookup_label(name)
|
||||||
|
@labels[name]
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
def defvar(name, bytes)
|
||||||
|
@vars[name] = @bss_size
|
||||||
|
@bss_size += bytes
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
def defconst(name, value, bytes)
|
||||||
|
@consts[name] = @const_size
|
||||||
|
@const_size += bytes
|
||||||
|
@const_data << [value].pack('i')
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
def defun(name, addr)
|
||||||
|
@funcs[name] = addr
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
def var(name)
|
||||||
|
bss_offset + @vars[name]
|
||||||
|
end
|
||||||
|
|
||||||
|
def var?(name)
|
||||||
|
@vars[name]
|
||||||
|
end
|
||||||
|
|
||||||
|
def const(name)
|
||||||
|
const_offset + @consts[name]
|
||||||
|
end
|
||||||
|
|
||||||
|
def const?(name)
|
||||||
|
@consts[name]
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
21
asm/text.rb
21
asm/text.rb
|
|
@ -3,9 +3,6 @@
|
||||||
# sjs
|
# sjs
|
||||||
# may 2009
|
# may 2009
|
||||||
|
|
||||||
ROOT = __FILE__.sub(/\/asm\/text\.rb$/, '') unless defined? ROOT
|
|
||||||
$LOAD_PATH << ROOT unless $LOAD_PATH.include?(ROOT)
|
|
||||||
|
|
||||||
require 'asm/asm'
|
require 'asm/asm'
|
||||||
|
|
||||||
module Assembler
|
module Assembler
|
||||||
|
|
@ -15,8 +12,9 @@ module Assembler
|
||||||
# correct machine code, which isn't trivial.
|
# correct machine code, which isn't trivial.
|
||||||
class Text < AssemblerBase
|
class Text < AssemblerBase
|
||||||
|
|
||||||
def initialize(platform='linux')
|
def initialize(platform)
|
||||||
super
|
super
|
||||||
|
@vars = {} # Symbol table, maps names to locations in BSS.
|
||||||
@data = ''
|
@data = ''
|
||||||
@bss = ''
|
@bss = ''
|
||||||
@code = ''
|
@code = ''
|
||||||
|
|
@ -39,6 +37,13 @@ module Assembler
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
def var(name)
|
||||||
|
@vars[name]
|
||||||
|
end
|
||||||
|
alias_method :var?, :var
|
||||||
|
|
||||||
|
|
||||||
# Emit a line of code wrapped between a tab and a newline.
|
# Emit a line of code wrapped between a tab and a newline.
|
||||||
def emit(code, options={})
|
def emit(code, options={})
|
||||||
tab = options.has_key?(:tab) ? options[:tab] : "\t"
|
tab = options.has_key?(:tab) ? options[:tab] : "\t"
|
||||||
|
|
@ -106,6 +111,10 @@ module Assembler
|
||||||
emit("call #{label}")
|
emit("call #{label}")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def leave
|
||||||
|
emit("leave")
|
||||||
|
end
|
||||||
|
|
||||||
def neg(reg)
|
def neg(reg)
|
||||||
emit("neg #{reg}")
|
emit("neg #{reg}")
|
||||||
end
|
end
|
||||||
|
|
@ -166,5 +175,9 @@ module Assembler
|
||||||
emit("int 0x#{num.to_s(16)}")
|
emit("int 0x#{num.to_s(16)}")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def cdq
|
||||||
|
emit("cdq")
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
||||||
76
build.rb
76
build.rb
|
|
@ -3,10 +3,21 @@
|
||||||
require 'compiler'
|
require 'compiler'
|
||||||
require 'asm/text'
|
require 'asm/text'
|
||||||
require 'asm/binary'
|
require 'asm/binary'
|
||||||
|
require 'asm/machosymtab'
|
||||||
|
require 'asm/machofile'
|
||||||
|
|
||||||
|
# usage: build.rb <filename> [elf | macho ] [asm | bin]
|
||||||
|
|
||||||
def main
|
def main
|
||||||
filename = ARGV[0].to_s
|
filename = ARGV[0].to_s
|
||||||
raise "can't read #{filename}" unless File.readable?(filename)
|
raise "can't read #{filename}" unless File.readable?(filename)
|
||||||
|
binformat = ARGV[1] ? ARGV[1].downcase : 'elf'
|
||||||
|
format = ARGV[2] ? ARGV[2].downcase : 'asm'
|
||||||
|
platform = `uname -s`.chomp.downcase
|
||||||
|
puts "Building #{format} from #{filename} for #{platform}, binformat is #{binformat} ..."
|
||||||
|
outfile = build(filename, platform, format, binformat)
|
||||||
|
puts outfile
|
||||||
|
exit
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -18,22 +29,17 @@ def base(filename)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
# filename: input filename
|
# infile: input filename
|
||||||
|
# outfile: output filename
|
||||||
# asm: assembler to use
|
# asm: assembler to use
|
||||||
# returns: output filename
|
def compile(infile, outfile, asm)
|
||||||
def compile(filename, asm)
|
|
||||||
|
|
||||||
File.open(filename, 'r') do |input|
|
File.open(infile, 'r') do |input|
|
||||||
compiler = Compiler.new(input, asm)
|
|
||||||
compiler.compile
|
|
||||||
end
|
|
||||||
|
|
||||||
ext = asm.class.name.split('::').last[0,3].downcase == 'bin' ? 'bin' : 'asm'
|
|
||||||
outfile = "#{base(filename)}.#{ext}"
|
|
||||||
File.open(outfile, 'wb') do |out|
|
File.open(outfile, 'wb') do |out|
|
||||||
out.puts(asm.output)
|
compiler = Compiler.new(input, asm)
|
||||||
|
out.print(compiler.compile)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
return outfile
|
|
||||||
|
|
||||||
rescue ParseError => e
|
rescue ParseError => e
|
||||||
error("[error] #{e.message}")
|
error("[error] #{e.message}")
|
||||||
|
|
@ -44,12 +50,13 @@ rescue ParseError => e
|
||||||
end
|
end
|
||||||
|
|
||||||
# assemble using nasm, return resulting filename.
|
# assemble using nasm, return resulting filename.
|
||||||
def asm(filename, binformat='elf')
|
def assemble(filename, binformat='elf')
|
||||||
f = base(filename)
|
f = base(filename)
|
||||||
outfile = "#{f}.o"
|
outfile = "#{f}.o"
|
||||||
output = `nasm -f #{binformat} -g -o #{outfile} #{filename}`
|
output = `nasm -f #{binformat} -g -o #{outfile} #{filename} 2>&1`
|
||||||
if $?.exitstatus != 0
|
if $?.exitstatus != 0
|
||||||
puts output
|
puts
|
||||||
|
print output
|
||||||
raise "nasm failed: #{$?.exitstatus}"
|
raise "nasm failed: #{$?.exitstatus}"
|
||||||
end
|
end
|
||||||
return outfile
|
return outfile
|
||||||
|
|
@ -64,32 +71,41 @@ def link(filename, platform='linux')
|
||||||
else
|
else
|
||||||
raise "unsupported platform: #{platform}"
|
raise "unsupported platform: #{platform}"
|
||||||
end
|
end
|
||||||
output = `#{cmd} #{args} -o #{f} #{filename}`
|
output = `#{cmd} #{args} -o #{f} #{filename} 2>&1`
|
||||||
if $?.exitstatus != 0
|
if $?.exitstatus != 0
|
||||||
puts output
|
puts
|
||||||
|
print output
|
||||||
raise "ld failed: #{$?.exitstatus}"
|
raise "ld failed: #{$?.exitstatus}"
|
||||||
end
|
end
|
||||||
`chmod +x #{f}`
|
`chmod u+x #{f}`
|
||||||
return f
|
return f
|
||||||
end
|
end
|
||||||
|
|
||||||
# TODO Use a dependency injection framework for the assembler, and
|
def build(filename, platform='linux', binformat='elf')
|
||||||
# other parts as things become more modular.
|
objfile = base(filename) + '.o'
|
||||||
def build(filename, platform='linux', format='asm', binformat='elf')
|
symtab, objwriter =
|
||||||
bin = if format == 'asm'
|
case binformat
|
||||||
code = compile(filename, Assembler::Text.new(platform))
|
when 'elf': [Assembler::ELFSymtab.new, Assembler::ELFFile.new]
|
||||||
obj = asm( code, binformat )
|
when 'macho': [Assembler::MachOSymtab.new, Assembler::MachOFile.new]
|
||||||
link( obj, platform )
|
else
|
||||||
else # binary
|
raise "unsupported binary format: #{binformat}"
|
||||||
obj = compile(filename, Assembler::Binary.new(platform))
|
|
||||||
link( obj, platform )
|
|
||||||
end
|
end
|
||||||
return bin
|
compile(filename, objfile, Assembler::Binary.new(platform, symtab, objwriter))
|
||||||
|
exefile = link(objfile, platform)
|
||||||
|
return exefile
|
||||||
|
end
|
||||||
|
|
||||||
|
def build_asm(filename, platform='linux', binformat='elf')
|
||||||
|
asmfile = base(filename) + '.asm'
|
||||||
|
compile(filename, asmfile, Assembler::Text.new(platform))
|
||||||
|
objfile = assemble(asmfile, binformat)
|
||||||
|
exefile = link(objfile, platform)
|
||||||
|
return exefile
|
||||||
end
|
end
|
||||||
|
|
||||||
def run(filename)
|
def run(filename)
|
||||||
filename = "./#{filename}" unless filename.include?('/')
|
filename = "./#{filename}" unless filename.include?('/')
|
||||||
system(filename)
|
`#{filename}`
|
||||||
return $?.exitstatus
|
return $?.exitstatus
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
||||||
237
compiler.rb
237
compiler.rb
|
|
@ -12,6 +12,8 @@
|
||||||
# require 'rubygems'
|
# require 'rubygems'
|
||||||
# require 'unroller'
|
# require 'unroller'
|
||||||
|
|
||||||
|
require 'asm/registers'
|
||||||
|
|
||||||
class ParseError < StandardError
|
class ParseError < StandardError
|
||||||
attr_reader :caller, :context
|
attr_reader :caller, :context
|
||||||
def initialize(caller, context=nil)
|
def initialize(caller, context=nil)
|
||||||
|
|
@ -22,6 +24,8 @@ end
|
||||||
|
|
||||||
class Compiler
|
class Compiler
|
||||||
|
|
||||||
|
include Assembler::Registers
|
||||||
|
|
||||||
Keywords = %w[
|
Keywords = %w[
|
||||||
if else end while until repeat for to do break
|
if else end while until repeat for to do break
|
||||||
print
|
print
|
||||||
|
|
@ -82,7 +86,7 @@ class Compiler
|
||||||
asm.call(name)
|
asm.call(name)
|
||||||
else
|
else
|
||||||
# variable access
|
# variable access
|
||||||
asm.mov(:eax, "dword [#{name}]")
|
asm.mov(EAX, [asm.var(name)])
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
@ -95,7 +99,7 @@ class Compiler
|
||||||
elsif alpha?(@look)
|
elsif alpha?(@look)
|
||||||
identifier # or call
|
identifier # or call
|
||||||
elsif digit?(@look)
|
elsif digit?(@look)
|
||||||
asm.mov(:eax, get_number.to_i)
|
asm.mov(EAX, get_number.to_i)
|
||||||
else
|
else
|
||||||
expected(:'integer, identifier, function call, or parenthesized expression', :got => @look)
|
expected(:'integer, identifier, function call, or parenthesized expression', :got => @look)
|
||||||
end
|
end
|
||||||
|
|
@ -106,7 +110,7 @@ class Compiler
|
||||||
sign = @look
|
sign = @look
|
||||||
match(sign) if op?(:unary, sign)
|
match(sign) if op?(:unary, sign)
|
||||||
factor
|
factor
|
||||||
asm.neg(:eax) if sign == '-'
|
asm.neg(EAX) if sign == '-'
|
||||||
end
|
end
|
||||||
|
|
||||||
# Parse and translate a single term (factor or mulop). Result is in
|
# Parse and translate a single term (factor or mulop). Result is in
|
||||||
|
|
@ -115,14 +119,13 @@ class Compiler
|
||||||
signed_factor # Result in eax.
|
signed_factor # Result in eax.
|
||||||
|
|
||||||
while op?(:mul, @look)
|
while op?(:mul, @look)
|
||||||
pushing(:eax) do
|
asm.push(EAX)
|
||||||
case @look
|
case @look
|
||||||
when '*': multiply
|
when '*': multiply
|
||||||
when '/': divide
|
when '/': divide
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
# Parse and translate a general expression of terms. Result is
|
# Parse and translate a general expression of terms. Result is
|
||||||
# in eax.
|
# in eax.
|
||||||
|
|
@ -130,21 +133,21 @@ class Compiler
|
||||||
term # Result is in eax.
|
term # Result is in eax.
|
||||||
|
|
||||||
while op_char?(@look, :add)
|
while op_char?(@look, :add)
|
||||||
pushing(:eax) do
|
asm.push(EAX)
|
||||||
case @look
|
case @look
|
||||||
when '+': add
|
when '+': add
|
||||||
when '-': subtract
|
when '-': subtract
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
# Parse an addition operator and the 2nd term (b). The result is
|
# Parse an addition operator and the 2nd term (b). The result is
|
||||||
# left in eax. The 1st term (a) is expected on the stack.
|
# left in eax. The 1st term (a) is expected on the stack.
|
||||||
def add
|
def add
|
||||||
match('+')
|
match('+')
|
||||||
term # Result is in eax.
|
term # Result is in eax.
|
||||||
asm.add(:eax, '[esp]') # Add a to b.
|
asm.pop(EBX)
|
||||||
|
asm.add(EAX, EBX) # Add a to b.
|
||||||
end
|
end
|
||||||
|
|
||||||
# Parse a subtraction operator and the 2nd term (b). The result is
|
# Parse a subtraction operator and the 2nd term (b). The result is
|
||||||
|
|
@ -152,8 +155,9 @@ class Compiler
|
||||||
def subtract
|
def subtract
|
||||||
match('-')
|
match('-')
|
||||||
term # Result, b, is in eax.
|
term # Result, b, is in eax.
|
||||||
asm.neg(:eax) # Fake the subtraction. a - b == a + -b
|
asm.pop(EBX)
|
||||||
asm.add(:eax, '[esp]') # Add a and -b.
|
asm.neg(EAX) # Fake the subtraction. a - b == a + -b
|
||||||
|
asm.add(EAX, EBX) # Add a(ebx) to -b(eax).
|
||||||
end
|
end
|
||||||
|
|
||||||
# Parse an addition operator and the 2nd term (b). The result is
|
# Parse an addition operator and the 2nd term (b). The result is
|
||||||
|
|
@ -161,7 +165,8 @@ class Compiler
|
||||||
def multiply
|
def multiply
|
||||||
match('*')
|
match('*')
|
||||||
signed_factor # Result is in eax.
|
signed_factor # Result is in eax.
|
||||||
asm.imul('dword [esp]') # Multiply a by b.
|
asm.pop(EBX)
|
||||||
|
asm.imul(EBX) # Multiply a by b.
|
||||||
end
|
end
|
||||||
|
|
||||||
# Parse a division operator and the divisor (b). The result is
|
# Parse a division operator and the divisor (b). The result is
|
||||||
|
|
@ -169,14 +174,15 @@ class Compiler
|
||||||
def divide
|
def divide
|
||||||
match('/')
|
match('/')
|
||||||
signed_factor # Result is in eax.
|
signed_factor # Result is in eax.
|
||||||
asm.xchg(:eax, '[esp]') # Swap the divisor and dividend into
|
asm.pop(EBX)
|
||||||
|
asm.xchg(EAX, EBX) # Swap the divisor and dividend into
|
||||||
# the correct places.
|
# the correct places.
|
||||||
|
|
||||||
# idiv uses edx:eax as the dividend so we need to ensure that edx
|
# idiv uses edx:eax as the dividend so we need to ensure that edx
|
||||||
# is correctly sign-extended w.r.t. eax.
|
# is correctly sign-extended w.r.t. eax.
|
||||||
asm.cdq # Sign-extend eax into edx (Convert Double to
|
asm.cdq # Sign-extend eax into edx (Convert Double to
|
||||||
# Quad).
|
# Quad).
|
||||||
asm.idiv('dword [esp]') # Divide a (eax) by b ([esp]).
|
asm.idiv(EBX) # Divide a (eax) by b (ebx).
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -187,19 +193,22 @@ class Compiler
|
||||||
def bitor_expr
|
def bitor_expr
|
||||||
match('|')
|
match('|')
|
||||||
term
|
term
|
||||||
asm.or(:eax, '[esp]')
|
asm.pop(EBX)
|
||||||
|
asm.or_(EAX, EBX)
|
||||||
end
|
end
|
||||||
|
|
||||||
def bitand_expr
|
def bitand_expr
|
||||||
match('&')
|
match('&')
|
||||||
signed_factor
|
signed_factor
|
||||||
asm.and_(:eax, '[esp]')
|
asm.pop(EBX)
|
||||||
|
asm.and_(EAX, EBX)
|
||||||
end
|
end
|
||||||
|
|
||||||
def xor_expr
|
def xor_expr
|
||||||
match('^')
|
match('^')
|
||||||
term
|
term
|
||||||
asm.xor(:eax, '[esp]')
|
asm.pop(EBX)
|
||||||
|
asm.xor(EAX, EBX)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -232,9 +241,9 @@ class Compiler
|
||||||
def boolean_factor
|
def boolean_factor
|
||||||
if boolean?(@look)
|
if boolean?(@look)
|
||||||
if get_boolean == 'true'
|
if get_boolean == 'true'
|
||||||
asm.mov(:eax, -1)
|
asm.mov(EAX, -1)
|
||||||
else
|
else
|
||||||
asm.xor(:eax, :eax)
|
asm.xor(EAX, EAX)
|
||||||
end
|
end
|
||||||
scan
|
scan
|
||||||
else
|
else
|
||||||
|
|
@ -246,8 +255,8 @@ class Compiler
|
||||||
if @look == '!'
|
if @look == '!'
|
||||||
match('!')
|
match('!')
|
||||||
boolean_factor
|
boolean_factor
|
||||||
make_boolean(:eax) # ensure it is -1 or 0...
|
make_boolean(EAX) # ensure it is -1 or 0...
|
||||||
asm.not(:eax) # so that not is also boolean not
|
asm.not_(EAX) # so that 1's complement NOT is also boolean not
|
||||||
else
|
else
|
||||||
boolean_factor
|
boolean_factor
|
||||||
end
|
end
|
||||||
|
|
@ -255,8 +264,8 @@ class Compiler
|
||||||
|
|
||||||
# Convert any identifier to a boolean (-1 or 0). This is
|
# Convert any identifier to a boolean (-1 or 0). This is
|
||||||
# semantically equivalent to !!reg in C or Ruby.
|
# semantically equivalent to !!reg in C or Ruby.
|
||||||
def make_boolean(reg=:eax)
|
def make_boolean(reg=EAX)
|
||||||
end_label = asm.label(:endmakebool)
|
end_label = asm.mklabel(:endmakebool)
|
||||||
asm.cmp(reg, 0) # if false do nothing
|
asm.cmp(reg, 0) # if false do nothing
|
||||||
asm.jz(end_label)
|
asm.jz(end_label)
|
||||||
asm.mov(reg, -1) # truthy, make it true
|
asm.mov(reg, -1) # truthy, make it true
|
||||||
|
|
@ -267,7 +276,7 @@ class Compiler
|
||||||
expression
|
expression
|
||||||
if op_char?(@look, :rel)
|
if op_char?(@look, :rel)
|
||||||
scan
|
scan
|
||||||
pushing(:eax) do
|
asm.push(EAX)
|
||||||
case @value
|
case @value
|
||||||
when '==': eq_relation
|
when '==': eq_relation
|
||||||
when '!=': neq_relation
|
when '!=': neq_relation
|
||||||
|
|
@ -278,9 +287,8 @@ class Compiler
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
# a: [esp]
|
# a: <on the stack>
|
||||||
# b: eax
|
# b: eax
|
||||||
#
|
#
|
||||||
# If b - a is zero then a = b, and make_boolean will leave the zero
|
# If b - a is zero then a = b, and make_boolean will leave the zero
|
||||||
|
|
@ -288,14 +296,15 @@ class Compiler
|
||||||
# and make_boolean will leave -1 (true) for us in eax.
|
# and make_boolean will leave -1 (true) for us in eax.
|
||||||
def neq_relation
|
def neq_relation
|
||||||
expression
|
expression
|
||||||
asm.sub(:eax, '[esp]')
|
asm.pop(EBX)
|
||||||
|
asm.sub(EAX, EBX)
|
||||||
make_boolean
|
make_boolean
|
||||||
end
|
end
|
||||||
|
|
||||||
# Invert the != test for equal.
|
# Invert the != test for equal.
|
||||||
def eq_relation
|
def eq_relation
|
||||||
neq_relation
|
neq_relation
|
||||||
asm.not(:eax)
|
asm.not_(EAX)
|
||||||
end
|
end
|
||||||
|
|
||||||
# > and < are both implemented in terms of jl (jump if less than).
|
# > and < are both implemented in terms of jl (jump if less than).
|
||||||
|
|
@ -303,6 +312,12 @@ class Compiler
|
||||||
# and order the terms appropriately for each function. As for >=
|
# and order the terms appropriately for each function. As for >=
|
||||||
# and <=, they in turn are implemented in terms of > and <. a is
|
# and <=, they in turn are implemented in terms of > and <. a is
|
||||||
# greater than or equal to b if and only if a is *not* less than b.
|
# greater than or equal to b if and only if a is *not* less than b.
|
||||||
|
#
|
||||||
|
# Note: This was done to minimize the number of instructions that
|
||||||
|
# the assembler needed to implement, but since the Jcc
|
||||||
|
# instructions are very cheap to implement this is no longer
|
||||||
|
# a concern.
|
||||||
|
|
||||||
|
|
||||||
# The next 4 relations all compare 2 values a and b, then return
|
# The next 4 relations all compare 2 values a and b, then return
|
||||||
# true (-1) if the difference was below zero and false (0)
|
# true (-1) if the difference was below zero and false (0)
|
||||||
|
|
@ -311,58 +326,62 @@ class Compiler
|
||||||
# Invert the sense of the test?
|
# Invert the sense of the test?
|
||||||
invert = options[:invert]
|
invert = options[:invert]
|
||||||
|
|
||||||
true_label = asm.label(:cmp)
|
true_label = asm.mklabel(:cmp)
|
||||||
end_label = asm.label(:endcmp)
|
end_label = asm.mklabel(:endcmp)
|
||||||
asm.cmp(a, b)
|
asm.cmp(a, b)
|
||||||
asm.jl(true_label)
|
asm.jl(true_label)
|
||||||
|
|
||||||
asm.xor(:eax, :eax) # return false
|
asm.xor(EAX, EAX) # return false
|
||||||
asm.not(:eax) if invert # (or true if inverted)
|
asm.not_(EAX) if invert # (or true if inverted)
|
||||||
asm.jmp(end_label)
|
asm.jmp(end_label)
|
||||||
|
|
||||||
asm.emit_label(true_label)
|
asm.emit_label(true_label)
|
||||||
asm.xor(:eax, :eax) # return true
|
asm.xor(EAX, EAX) # return true
|
||||||
asm.not(:eax) unless invert # (or false if inverted)
|
asm.not_(EAX) unless invert # (or false if inverted)
|
||||||
|
|
||||||
asm.emit_label(end_label)
|
asm.emit_label(end_label)
|
||||||
end
|
end
|
||||||
|
|
||||||
# a: [esp]
|
# a: <on the stack>
|
||||||
# b: eax
|
# b: eax
|
||||||
#
|
#
|
||||||
# if a > b then b - a < 0
|
# if a > b then b - a < 0
|
||||||
def gt_relation
|
def gt_relation
|
||||||
expression
|
expression
|
||||||
cmp_relation(:eax, '[esp]') # b - a
|
asm.pop(EBX)
|
||||||
|
cmp_relation(EAX, EBX) # b - a
|
||||||
end
|
end
|
||||||
|
|
||||||
# a: [esp]
|
# a: <on the stack>
|
||||||
# b: eax
|
# b: eax
|
||||||
#
|
#
|
||||||
# if a < b then a - b < 0
|
# if a < b then a - b < 0
|
||||||
def lt_relation
|
def lt_relation
|
||||||
expression
|
expression
|
||||||
cmp_relation('[esp]', :eax) # a - b
|
asm.pop(EBX)
|
||||||
|
cmp_relation(EBX, EAX) # a - b
|
||||||
end
|
end
|
||||||
|
|
||||||
# a: [esp]
|
# a: <on the stack>
|
||||||
# b: eax
|
# b: eax
|
||||||
#
|
#
|
||||||
# if a >= b then !(a < b)
|
# if a >= b then !(a < b)
|
||||||
def ge_relation
|
def ge_relation
|
||||||
expression
|
expression
|
||||||
|
asm.pop(EBX)
|
||||||
# Compare them as in less than but invert the result.
|
# Compare them as in less than but invert the result.
|
||||||
cmp_relation('[esp]', :eax, :invert => true)
|
cmp_relation(EBX, EAX, :invert => true)
|
||||||
end
|
end
|
||||||
|
|
||||||
# a: [esp]
|
# a: <on the stack>
|
||||||
# b: eax
|
# b: eax
|
||||||
#
|
#
|
||||||
# if a <= b then !(a > b)
|
# if a <= b then !(a > b)
|
||||||
def le_relation
|
def le_relation
|
||||||
expression
|
expression
|
||||||
|
asm.pop(EBX)
|
||||||
# Compare them as in greater than but invert the result.
|
# Compare them as in greater than but invert the result.
|
||||||
cmp_relation(:eax, '[esp]', :invert => true)
|
cmp_relation(EAX, EBX, :invert => true)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -376,7 +395,7 @@ class Compiler
|
||||||
match('=')
|
match('=')
|
||||||
boolean_expression
|
boolean_expression
|
||||||
asm.defvar(name) unless asm.var?(name)
|
asm.defvar(name) unless asm.var?(name)
|
||||||
asm.mov("dword [#{name}]", :eax)
|
asm.mov([asm.var(name)], EAX)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Parse a code block.
|
# Parse a code block.
|
||||||
|
|
@ -413,7 +432,7 @@ class Compiler
|
||||||
|
|
||||||
# Parse an if-else statement.
|
# Parse an if-else statement.
|
||||||
def if_else_stmt(label)
|
def if_else_stmt(label)
|
||||||
else_label = asm.label(:end_or_else)
|
else_label = asm.mklabel(:end_or_else)
|
||||||
end_label = else_label # only generated if else clause
|
end_label = else_label # only generated if else clause
|
||||||
# present
|
# present
|
||||||
condition
|
condition
|
||||||
|
|
@ -424,7 +443,7 @@ class Compiler
|
||||||
@indent -= 1
|
@indent -= 1
|
||||||
if @token == :keyword && @value == 'else'
|
if @token == :keyword && @value == 'else'
|
||||||
skip_any_whitespace
|
skip_any_whitespace
|
||||||
end_label = asm.label(:endif) # now we need the 2nd label
|
end_label = asm.mklabel(:endif) # now we need the 2nd label
|
||||||
asm.jmp(end_label)
|
asm.jmp(end_label)
|
||||||
asm.emit_label(else_label)
|
asm.emit_label(else_label)
|
||||||
@indent += 1
|
@indent += 1
|
||||||
|
|
@ -441,8 +460,8 @@ class Compiler
|
||||||
# block: Code to execute at the start of each iteration. (e.g. a
|
# block: Code to execute at the start of each iteration. (e.g. a
|
||||||
# condition)
|
# condition)
|
||||||
def simple_loop(name)
|
def simple_loop(name)
|
||||||
start_label = asm.label(:"loop_#{name}")
|
start_label = asm.mklabel(:"#{name}_loop")
|
||||||
end_label = asm.label(:"end_#{name}")
|
end_label = asm.mklabel(:"end_#{name}")
|
||||||
asm.emit_label(start_label)
|
asm.emit_label(start_label)
|
||||||
|
|
||||||
yield(end_label)
|
yield(end_label)
|
||||||
|
|
@ -482,27 +501,29 @@ class Compiler
|
||||||
# s = s + x
|
# s = s + x
|
||||||
# e
|
# e
|
||||||
def for_stmt
|
def for_stmt
|
||||||
counter = "[#{get_name}]"
|
counter = get_name
|
||||||
|
asm.defvar(counter)
|
||||||
match('=')
|
match('=')
|
||||||
boolean_expression # initial value
|
boolean_expression # initial value
|
||||||
asm.sub(:eax, 1) # pre-decrement because of the
|
asm.sub(EAX, 1) # pre-decrement because of the
|
||||||
# following pre-increment
|
# following pre-increment
|
||||||
asm.mov(counter, :eax) # stash the counter in memory
|
asm.mov([asm.var(counter)], EAX) # stash the counter in memory
|
||||||
match_word('to', :scan => true)
|
match_word('to', :scan => true)
|
||||||
boolean_expression # final value
|
boolean_expression # final value
|
||||||
skip_any_whitespace
|
skip_any_whitespace
|
||||||
asm.push(:eax) # stash final value on stack
|
asm.push(EAX) # stash final value on stack
|
||||||
final = '[esp]'
|
asm.mov(EDX, ESP)
|
||||||
|
final = [EDX]
|
||||||
|
|
||||||
simple_loop('for') do |end_label|
|
simple_loop('for') do |end_label|
|
||||||
asm.mov(:ecx, counter) # get the counter
|
asm.mov(ECX, [asm.var(counter)]) # get the counter
|
||||||
asm.add(:ecx, 1) # increment
|
asm.add(ECX, 1) # increment
|
||||||
asm.mov(counter, :ecx) # store the counter
|
asm.mov([asm.var(counter)], ECX) # store the counter
|
||||||
asm.cmp(final, :ecx) # check if we're done
|
asm.cmp(final, ECX) # check if we're done
|
||||||
asm.jz(end_label) # if so jump to the end
|
asm.jz(end_label) # if so jump to the end
|
||||||
end
|
end
|
||||||
|
|
||||||
asm.add(:esp, 4) # clean up the stack
|
asm.add(ESP, 4) # clean up the stack
|
||||||
end
|
end
|
||||||
|
|
||||||
# do 5
|
# do 5
|
||||||
|
|
@ -512,19 +533,19 @@ class Compiler
|
||||||
|
|
||||||
boolean_expression
|
boolean_expression
|
||||||
skip_any_whitespace
|
skip_any_whitespace
|
||||||
asm.mov(:ecx, :eax)
|
asm.mov(ECX, EAX)
|
||||||
|
|
||||||
start_label = asm.label(:do)
|
start_label = asm.mklabel(:do)
|
||||||
end_label = asm.label(:enddo)
|
end_label = asm.mklabel(:enddo)
|
||||||
asm.emit_label(start_label)
|
asm.emit_label(start_label)
|
||||||
|
|
||||||
asm.push(:ecx)
|
asm.push(ECX)
|
||||||
|
|
||||||
@indent += 1
|
@indent += 1
|
||||||
block(end_label)
|
block(end_label)
|
||||||
@indent -= 1
|
@indent -= 1
|
||||||
|
|
||||||
asm.pop(:ecx)
|
asm.pop(ECX)
|
||||||
|
|
||||||
match_word('end')
|
match_word('end')
|
||||||
asm.loop_(start_label)
|
asm.loop_(start_label)
|
||||||
|
|
@ -532,13 +553,13 @@ class Compiler
|
||||||
# Phony push! break needs to clean up the stack, but since we
|
# Phony push! break needs to clean up the stack, but since we
|
||||||
# don't know if there is a break at this point we fake a push and
|
# don't know if there is a break at this point we fake a push and
|
||||||
# always clean up the stack after.
|
# always clean up the stack after.
|
||||||
asm.sub(:esp, 4)
|
asm.sub(ESP, 4)
|
||||||
|
|
||||||
asm.emit_label(end_label)
|
asm.emit_label(end_label)
|
||||||
|
|
||||||
# If there was a break we have to clean up the stack here. If
|
# If there was a break we have to clean up the stack here. If
|
||||||
# there was no break we clean up the phony push above.
|
# there was no break we clean up the phony push above.
|
||||||
asm.add(:esp, 4)
|
asm.add(ESP, 4)
|
||||||
end
|
end
|
||||||
|
|
||||||
def break_stmt(label)
|
def break_stmt(label)
|
||||||
|
|
@ -554,79 +575,83 @@ class Compiler
|
||||||
def condition
|
def condition
|
||||||
boolean_expression
|
boolean_expression
|
||||||
skip_whitespace
|
skip_whitespace
|
||||||
asm.cmp(:eax, 0) # 0 is false, anything else is true
|
asm.cmp(EAX, 0) # 0 is false, anything else is true
|
||||||
end
|
end
|
||||||
|
|
||||||
# print eax in hex format
|
# print eax in hex format
|
||||||
def print_stmt
|
def print_stmt
|
||||||
|
# variable names
|
||||||
|
d = 'DIGITS'
|
||||||
|
h = 'HEX'
|
||||||
|
|
||||||
asm.block do
|
asm.block do
|
||||||
# define a lookup table of digits
|
# define a lookup table of digits
|
||||||
unless var?('DIGITS')
|
unless var?(d)
|
||||||
defvar('DIGITS', 4)
|
defvar(d, 4)
|
||||||
mov('dword [DIGITS]', 0x33323130)
|
mov([var(d)], 0x33323130)
|
||||||
mov('dword [DIGITS+4]', 0x37363534)
|
mov([var(d)+4], 0x37363534)
|
||||||
mov('dword [DIGITS+8]', 0x62613938)
|
mov([var(d)+8], 0x62613938)
|
||||||
mov('dword [DIGITS+12]', 0x66656463)
|
mov([var(d)+12], 0x66656463)
|
||||||
end
|
end
|
||||||
# 3 dwords == 12 chars
|
# 3 dwords == 12 chars
|
||||||
defvar('HEX', 3) unless var?('HEX')
|
defvar(h, 3) unless var?(h)
|
||||||
# TODO check sign and prepend '-' if negative
|
# TODO check sign and prepend '-' if negative
|
||||||
mov('word [HEX]', 0x7830) # "0x" == [48, 120]
|
mov([var(h)], 0x7830) # "0x" == [48, 120]
|
||||||
mov('word [HEX+10]', 0xa) # newline + null terminator
|
mov([var(h)+10], 0xa) # newline + null terminator
|
||||||
end
|
end
|
||||||
boolean_expression
|
boolean_expression
|
||||||
asm.block do
|
asm.block do
|
||||||
# convert eax to a hex string
|
# convert eax to a hex string
|
||||||
lea(:esi, '[DIGITS]')
|
lea(ESI, [var(d)])
|
||||||
lea(:edi, '[HEX+9]')
|
lea(EDI, [var(h)+9])
|
||||||
# build the string backwards (right to left), byte by byte
|
# build the string backwards (right to left), byte by byte
|
||||||
mov(:ecx, 4)
|
mov(ECX, 4)
|
||||||
end
|
end
|
||||||
asm.emit_label(loop_label=asm.label)
|
asm.emit_label(loop_label=asm.mklabel)
|
||||||
asm.block do
|
asm.block do
|
||||||
# low nybble of nth byte
|
# low nybble of nth byte
|
||||||
movzx(:ebx, :al)
|
movzx(EBX, AL)
|
||||||
and_(:bl, 0x0f) # isolate low nybble
|
and_(BL, 0x0f) # isolate low nybble
|
||||||
movzx(:edx, 'byte [esi+ebx]')
|
movzx(EDX, [:byte, ESI+EBX])
|
||||||
mov('byte [edi]', :dl)
|
mov([EDI], DL)
|
||||||
dec(:edi)
|
dec(EDI)
|
||||||
# high nybble of nth byte
|
# high nybble of nth byte
|
||||||
movzx(:ebx, :al)
|
movzx(EBX, AL)
|
||||||
and_(:bl, 0xf0) # isolate high nybble
|
and_(BL, 0xf0) # isolate high nybble
|
||||||
shr(:bl, 4)
|
shr(BL, 4)
|
||||||
mov(:dl, 'byte [esi+ebx]')
|
mov(DL, [ESI+EBX])
|
||||||
mov('byte [edi]', :dl)
|
mov([EDI], DL)
|
||||||
dec(:edi)
|
dec(EDI)
|
||||||
shr(:eax, 8)
|
shr(EAX, 8)
|
||||||
loop_(loop_label)
|
loop_(loop_label)
|
||||||
# write(int fd, char *s, int n)
|
# write(int fd, char *s, int n)
|
||||||
mov(:eax, 4) # SYS_write
|
mov(EAX, 4) # SYS_write
|
||||||
lea(:ecx, '[HEX]') # ecx = &s
|
lea(ECX, [var(h)]) # ecx = &s
|
||||||
args = [1, # fd = 1 (STDOUT)
|
args = [1, # fd = 1 (STDOUT)
|
||||||
:ecx, # s = &s
|
ECX, # s = &s
|
||||||
11] # n = 11 (excluding term, max # of chars to print)
|
11] # n = 11 (excluding term, max # of chars to print)
|
||||||
case platform
|
case platform
|
||||||
when 'darwin' # on the stack, right to left (right @ highest addr)
|
when 'darwin' # on the stack, right to left (right @ highest addr)
|
||||||
####
|
####
|
||||||
# setup bogus stack frame
|
# setup bogus stack frame
|
||||||
push(:ebp)
|
push(EBP)
|
||||||
mov(:ebp, :esp)
|
mov(EBP, ESP)
|
||||||
sub(:esp, 36)
|
sub(ESP, 36)
|
||||||
####
|
####
|
||||||
args.reverse.each { |a| push(a) }
|
args.reverse.each { |a| push(a) }
|
||||||
push(:eax)
|
push(EAX)
|
||||||
int(0x80)
|
int(0x80)
|
||||||
####
|
####
|
||||||
# teardown bogus stack frame
|
# teardown bogus stack frame
|
||||||
xor(:eax, :eax)
|
xor(EAX, EAX)
|
||||||
add(:esp, 36)
|
add(ESP, 36)
|
||||||
pop(:ebx)
|
pop(EBX)
|
||||||
emit("leave")
|
leave
|
||||||
####
|
####
|
||||||
when 'linux'
|
when 'linux'
|
||||||
mov(:ebx, args[0])
|
mov(EBX, args[0])
|
||||||
mov(:ecx, args[1])
|
mov(ECX, args[1])
|
||||||
mov(:edx, args[2])
|
mov(EDX, args[2])
|
||||||
int(0x80)
|
int(0x80)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
@ -819,15 +844,15 @@ class Compiler
|
||||||
def pushing(reg)
|
def pushing(reg)
|
||||||
asm.push(reg)
|
asm.push(reg)
|
||||||
yield
|
yield
|
||||||
asm.add(:esp, 4)
|
asm.add(ESP, 4)
|
||||||
end
|
end
|
||||||
|
|
||||||
def op(name)
|
def op(name)
|
||||||
pushing(:eax) do
|
asm.push(EAX)
|
||||||
get_op
|
get_op
|
||||||
expected(name) unless match_word(name)
|
expected(name) unless match_word(name)
|
||||||
yield
|
yield
|
||||||
end
|
asm.add(ESP, 4)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
288
elfwriter.c
288
elfwriter.c
|
|
@ -1,288 +0,0 @@
|
||||||
#include <libelf.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <sys/types.h>
|
|
||||||
#include <sys/stat.h>
|
|
||||||
#include <fcntl.h>
|
|
||||||
|
|
||||||
/* _exit(0) */
|
|
||||||
/* uint8_t shell_code[] = { */
|
|
||||||
/* 0xbb, 0, 0, 0, 0, /\* mov ebx, 0 *\/ */
|
|
||||||
/* 0xb8, 1, 0, 0, 0, /\* mov eax, 1 *\/ */
|
|
||||||
/* 0xcd, 0x80 /\* int 0x80 *\/ */
|
|
||||||
/* }; */
|
|
||||||
|
|
||||||
/* uint32_t hash_words[] = { */
|
|
||||||
/* 0x12345678, */
|
|
||||||
/* 0xdeadc0de, */
|
|
||||||
/* 0x1234abcd */
|
|
||||||
/* }; */
|
|
||||||
|
|
||||||
#define header_size 0x100
|
|
||||||
#define text_addr 0x8048000 + header_size
|
|
||||||
#define text_size 0x02be00
|
|
||||||
#define data_addr text_addr + text_size
|
|
||||||
#define data_size 0x4e00
|
|
||||||
#define bss_addr data_addr + data_size
|
|
||||||
size_t bss_size = 0;
|
|
||||||
|
|
||||||
char string_table[] = {
|
|
||||||
/* Offset 0 */ '\0',
|
|
||||||
/* Offset 1 */ '.', 't', 'e', 'x', 't', '\0' ,
|
|
||||||
/* Offset 7 */ '.', 'b', 's', 's', '\0',
|
|
||||||
/* Offset 12 */ '.', 's', 'h', 's', 't', 'r', 't', 'a', 'b', '\0'
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
/* Write a static 32-bit x86 ELF binary to filename. The file is
|
|
||||||
* clobbered without confirmation!
|
|
||||||
*/
|
|
||||||
int
|
|
||||||
elf_write(const char *filename, uint8_t *code, size_t code_size)
|
|
||||||
{
|
|
||||||
int fd;
|
|
||||||
size_t shstrndx;
|
|
||||||
Elf *elf;
|
|
||||||
Elf_Scn *scn;
|
|
||||||
Elf_Data *data;
|
|
||||||
Elf32_Ehdr *ehdr;
|
|
||||||
Elf32_Phdr *phdr;
|
|
||||||
Elf32_Shdr *shdr;
|
|
||||||
|
|
||||||
if (elf_version(EV_CURRENT) == EV_NONE) {
|
|
||||||
printf("Failed to initialize ELF library!\n");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
if ((fd = open(filename, O_RDWR|O_TRUNC|O_CREAT, 0666)) < 0) {
|
|
||||||
printf("Can't open %s for writing.\n", filename);
|
|
||||||
perror("[elf_write]");
|
|
||||||
return -2;
|
|
||||||
}
|
|
||||||
if ((elf = elf_begin(fd, ELF_C_WRITE, (Elf *)0)) == 0) {
|
|
||||||
printf("elf_begin failed!\n");
|
|
||||||
return -3;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**************
|
|
||||||
* ELF Header *
|
|
||||||
**************/
|
|
||||||
|
|
||||||
if ((ehdr = elf32_newehdr(elf)) == NULL) {
|
|
||||||
printf("elf32_newehdr failed!\n");
|
|
||||||
return -4;
|
|
||||||
}
|
|
||||||
ehdr->e_ident[EI_DATA] = ELFDATA2LSB; /* 2's complement, little endian */
|
|
||||||
ehdr->e_type = ET_EXEC;
|
|
||||||
ehdr->e_machine = EM_386; /* x86 */
|
|
||||||
|
|
||||||
/* Image starts at 0x8048000, x86 32-bit abi. We need a bit
|
|
||||||
* of room for headers and such. TODO figure out how much
|
|
||||||
* room is needed!
|
|
||||||
*
|
|
||||||
* Current entry point is .text section.
|
|
||||||
*/
|
|
||||||
ehdr->e_entry = text_addr;
|
|
||||||
|
|
||||||
|
|
||||||
/*******************
|
|
||||||
* Program Headers *
|
|
||||||
*******************/
|
|
||||||
|
|
||||||
/* 3 segments => 3 program headers (text, data, bss) */
|
|
||||||
if ((phdr = elf32_newphdr(elf, 3)) == NULL) {
|
|
||||||
printf("elf32_newphdr failed!\n");
|
|
||||||
return -5;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*****************
|
|
||||||
* .text section *
|
|
||||||
*****************/
|
|
||||||
|
|
||||||
if ((scn = elf_newscn(elf)) == NULL) {
|
|
||||||
printf("elf_newscn failed!\n");
|
|
||||||
return -6;
|
|
||||||
}
|
|
||||||
if ((data = elf_newdata(scn)) == NULL) {
|
|
||||||
printf("elf_newdata failed!\n");
|
|
||||||
return -7;
|
|
||||||
}
|
|
||||||
data->d_align = 16;
|
|
||||||
data->d_buf = code;
|
|
||||||
data->d_off = 0LL;
|
|
||||||
data->d_type = ELF_T_BYTE;
|
|
||||||
data->d_size = code_size;
|
|
||||||
data->d_version = EV_CURRENT;
|
|
||||||
|
|
||||||
if ((shdr = elf32_getshdr(scn)) == NULL) {
|
|
||||||
printf("elf32_getshdr failed!\n");
|
|
||||||
return -8;
|
|
||||||
}
|
|
||||||
shdr->sh_name = 1;
|
|
||||||
shdr->sh_type = SHT_PROGBITS;
|
|
||||||
shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
|
|
||||||
shdr->sh_addr = text_addr;
|
|
||||||
|
|
||||||
|
|
||||||
/****************
|
|
||||||
* .bss section *
|
|
||||||
****************/
|
|
||||||
|
|
||||||
if ((scn = elf_newscn(elf)) == NULL) {
|
|
||||||
printf("elf_newscn failed!\n");
|
|
||||||
return -6;
|
|
||||||
}
|
|
||||||
if ((data = elf_newdata(scn)) == NULL) {
|
|
||||||
printf("elf_newdata failed!\n");
|
|
||||||
return -7;
|
|
||||||
}
|
|
||||||
data->d_align = 4;
|
|
||||||
data->d_off = 0LL;
|
|
||||||
data->d_type = ELF_T_BYTE;
|
|
||||||
data->d_size = bss_size;
|
|
||||||
data->d_version = EV_CURRENT;
|
|
||||||
|
|
||||||
if ((shdr = elf32_getshdr(scn)) == NULL) {
|
|
||||||
printf("elf32_getshdr failed!\n");
|
|
||||||
return -8;
|
|
||||||
}
|
|
||||||
shdr->sh_name = 7;
|
|
||||||
shdr->sh_type = SHT_NOBITS;
|
|
||||||
shdr->sh_flags = SHF_WRITE | SHF_ALLOC;
|
|
||||||
shdr->sh_addr = bss_addr;
|
|
||||||
|
|
||||||
|
|
||||||
/*******************************
|
|
||||||
* section header string table *
|
|
||||||
*******************************/
|
|
||||||
|
|
||||||
if ((scn = elf_newscn(elf)) == NULL) {
|
|
||||||
printf("elf_newscn failed!\n");
|
|
||||||
return -9;
|
|
||||||
}
|
|
||||||
if ((data = elf_newdata(scn)) == NULL) {
|
|
||||||
printf("elf_newdata failed!\n");
|
|
||||||
return -10;
|
|
||||||
}
|
|
||||||
data->d_align = 1;
|
|
||||||
data->d_buf = string_table;
|
|
||||||
data->d_off = 0LL;
|
|
||||||
data->d_type = ELF_T_BYTE;
|
|
||||||
data->d_size = sizeof(string_table);
|
|
||||||
data->d_version = EV_CURRENT;
|
|
||||||
|
|
||||||
if ((shdr = elf32_getshdr(scn)) == NULL) {
|
|
||||||
printf("elf32_getshdr failed!\n");
|
|
||||||
return -11;
|
|
||||||
}
|
|
||||||
shdr->sh_name = 12;
|
|
||||||
shdr->sh_type = SHT_STRTAB;
|
|
||||||
shdr->sh_flags = SHF_STRINGS | SHF_ALLOC;
|
|
||||||
shdr->sh_entsize = 0;
|
|
||||||
|
|
||||||
|
|
||||||
/* int elf_setshstrndx(Elf *e, Elf32_Ehdr *eh, size_t shstrndx) */
|
|
||||||
shstrndx = elf_ndxscn(scn);
|
|
||||||
if (shstrndx >= SHN_LORESERVE) {
|
|
||||||
if ((scn = elf_getscn(elf, 0)) == NULL) {
|
|
||||||
printf("elf_getscn failed!\n");
|
|
||||||
return -12;
|
|
||||||
}
|
|
||||||
/* assert(scn->s_ndx == SHN_UNDEF); */
|
|
||||||
/* scn->s_shdr.s_shdr32.sh_link = shstrndx; */
|
|
||||||
elf_flagshdr(scn, ELF_C_SET, ELF_F_DIRTY);
|
|
||||||
shstrndx = SHN_XINDEX;
|
|
||||||
}
|
|
||||||
ehdr->e_shstrndx = shstrndx;
|
|
||||||
|
|
||||||
if (elf_update(elf, ELF_C_NULL) < 0) {
|
|
||||||
printf("elf_update failed!\n");
|
|
||||||
return -12;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* phdr->p_vaddr = phdr->p_paddr = 0x8048000 + ehdr->e_phoff; */
|
|
||||||
/* phdr->p_type = PT_PHDR; */
|
|
||||||
/* phdr->p_offset = ehdr->e_phoff; */
|
|
||||||
/* phdr->p_filesz = elf32_fsize(ELF_T_PHDR, 1, EV_CURRENT); */
|
|
||||||
|
|
||||||
/* text segment */
|
|
||||||
phdr->p_vaddr = text_addr;
|
|
||||||
phdr->p_type = PT_LOAD;
|
|
||||||
phdr->p_offset = header_size;
|
|
||||||
phdr->p_filesz = text_size;
|
|
||||||
phdr->p_memsz = text_size;
|
|
||||||
phdr->p_flags = PF_R | PF_X;
|
|
||||||
phdr->p_align = 0x1000;
|
|
||||||
|
|
||||||
/* data segment */
|
|
||||||
phdr++;
|
|
||||||
phdr->p_vaddr = data_addr;
|
|
||||||
phdr->p_type = PT_LOAD;
|
|
||||||
phdr->p_offset = header_size + text_size;
|
|
||||||
phdr->p_filesz = data_size;
|
|
||||||
phdr->p_memsz = data_size + 0x1024; /* XXX unsure why the abi specifies + 0x1024 */
|
|
||||||
phdr->p_flags = PF_R | PF_W | PF_X;
|
|
||||||
phdr->p_align = 0x1000;
|
|
||||||
|
|
||||||
/* bss segment */
|
|
||||||
phdr++;
|
|
||||||
phdr->p_vaddr = bss_addr;
|
|
||||||
phdr->p_type = PT_LOAD;
|
|
||||||
phdr->p_offset = header_size + text_size + data_size;
|
|
||||||
phdr->p_filesz = bss_size;
|
|
||||||
phdr->p_memsz = bss_size;
|
|
||||||
phdr->p_flags = PF_R | PF_W;
|
|
||||||
phdr->p_align = 0x1000;
|
|
||||||
|
|
||||||
elf_flagphdr(elf, ELF_C_SET, ELF_F_DIRTY);
|
|
||||||
|
|
||||||
if (elf_update(elf, ELF_C_WRITE) < 0) {
|
|
||||||
printf("elf_update failed!\n");
|
|
||||||
return -13;
|
|
||||||
}
|
|
||||||
|
|
||||||
elf_end(elf);
|
|
||||||
close(fd);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
|
||||||
main(int argc, const char *argv[])
|
|
||||||
{
|
|
||||||
int result;
|
|
||||||
pid_t pid;
|
|
||||||
FILE *fd;
|
|
||||||
uint8_t *code = NULL;
|
|
||||||
size_t code_size = 0, chunk_size = 1024, bytes_read;
|
|
||||||
|
|
||||||
if (argc < 4) {
|
|
||||||
printf("usage: %s <input> <bss_size> <output>\n", argv[0]);
|
|
||||||
printf(" Wraps the input file in an ELF binary.\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
bss_size = strtoul(argv[2], 0, 10);
|
|
||||||
|
|
||||||
if ((fd = fopen(argv[1], "r")) < 0) {
|
|
||||||
printf("[error] can't open %s for reading.\n", argv[1]);
|
|
||||||
perror("[main]");
|
|
||||||
return 2;
|
|
||||||
}
|
|
||||||
while (!feof(fd) && !ferror(fd)) {
|
|
||||||
code = realloc(code, code_size + chunk_size);
|
|
||||||
bytes_read = fread(code+code_size, 1, chunk_size, fd);
|
|
||||||
code_size += bytes_read;
|
|
||||||
}
|
|
||||||
fclose(fd);
|
|
||||||
|
|
||||||
printf("Writing x86 ELF binary to %s...\n", argv[3]);
|
|
||||||
result = elf_write(argv[3], code, code_size);
|
|
||||||
if (result < 0) {
|
|
||||||
printf("[error] elf_write failed.\n");
|
|
||||||
return 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
12
lea.asm
12
lea.asm
|
|
@ -1,12 +0,0 @@
|
||||||
BITS 32
|
|
||||||
|
|
||||||
lea eax, [ebx+ecx*4]
|
|
||||||
lea ebx, [eax+ecx*4]
|
|
||||||
lea eax, [ecx+ebx*4]
|
|
||||||
lea eax, [ecx+ebx*8]
|
|
||||||
lea eax, [ecx+ebx]
|
|
||||||
lea eax, [0x1000+10*4]
|
|
||||||
lea eax, [eax]
|
|
||||||
lea eax, [ecx]
|
|
||||||
lea ecx, [eax]
|
|
||||||
lea eax, [0xdeadbeef]
|
|
||||||
89
mov.asm
89
mov.asm
|
|
@ -1,89 +0,0 @@
|
||||||
BITS 32
|
|
||||||
|
|
||||||
;;; 00000000 b8 78 56 34 12 b9 78 56 34 12 ba 78 56 34 12 bb |.xV4..xV4..xV4..|
|
|
||||||
;;; 00000010 78 56 34 12 89 c0 89 c8 89 d0 89 d8 89 c1 89 c9 |xV4.............|
|
|
||||||
;;; 00000020 89 d1 89 d9 89 c2 89 ca 89 d2 89 da 89 c3 89 cb |................|
|
|
||||||
;;; 00000030 89 d3 89 db a1 ef be ad de 8b 0d ef be ad de 8b |................|
|
|
||||||
;;; 00000040 15 ef be ad de 8b 1d ef be ad de a3 ef be ad de |................|
|
|
||||||
;;; 00000050 89 0d ef be ad de 89 15 ef be ad de 89 1d ef be |................|
|
|
||||||
;;; 00000060 ad de 8b 00 8b 01 8b 02 8b 03 8b 08 8b 09 8b 0a |................|
|
|
||||||
;;; 00000070 8b 0b 8b 10 8b 11 8b 12 8b 13 8b 18 8b 19 8b 1a |................|
|
|
||||||
;;; 00000080 8b 1b 89 00 89 01 89 02 89 03 89 08 89 09 89 0a |................|
|
|
||||||
;;; 00000090 89 0b 89 10 89 11 89 12 89 13 89 18 89 19 89 1a |................|
|
|
||||||
;;; 000000a0 89 1b |..|
|
|
||||||
;;; 000000a2
|
|
||||||
|
|
||||||
mov eax, 0x12345678 ; b8 78 56 34 12
|
|
||||||
mov ecx, 0x12345678 ; b9 78 56 34 12
|
|
||||||
mov edx, 0x12345678 ; ba 78 56 34 12
|
|
||||||
mov ebx, 0x12345678 ; bb 78 56 34 12
|
|
||||||
|
|
||||||
mov eax, eax ; 89 c0
|
|
||||||
mov eax, ecx ; 89 c8
|
|
||||||
mov eax, edx ; 89 d0
|
|
||||||
mov eax, ebx ; 89 d8
|
|
||||||
|
|
||||||
mov ecx, eax ; 89 c1
|
|
||||||
mov ecx, ecx ; 89 c9
|
|
||||||
mov ecx, edx ; 89 d1
|
|
||||||
mov ecx, ebx ; 89 d9
|
|
||||||
|
|
||||||
mov edx, eax ; 89 c2
|
|
||||||
mov edx, ecx ; 89 ca
|
|
||||||
mov edx, edx ; 89 d2
|
|
||||||
mov edx, ebx ; 89 da
|
|
||||||
|
|
||||||
mov ebx, eax ; 89 c3
|
|
||||||
mov ebx, ecx ; 89 cb
|
|
||||||
mov ebx, edx ; 89 d3
|
|
||||||
mov ebx, ebx ; 89 db
|
|
||||||
|
|
||||||
mov eax, dword [0xdeadbeef] ; a1 ef be ad de
|
|
||||||
mov ecx, dword [0xdeadbeef] ; 8b 0e ef be ad de
|
|
||||||
mov edx, dword [0xdeadbeef] ; 8b 16 ef be ad de
|
|
||||||
mov ebx, dword [0xdeadbeef] ; 8b 1e ef be ad de
|
|
||||||
|
|
||||||
mov [0xdeadbeef], eax ; a3 ef be ad de
|
|
||||||
mov [0xdeadbeef], ecx ; 89 0e ef be ad de
|
|
||||||
mov [0xdeadbeef], edx ; 89 16 ef be ad de
|
|
||||||
mov [0xdeadbeef], ebx ; 89 1e ef be ad de
|
|
||||||
|
|
||||||
mov eax, dword [eax] ; 8b 00
|
|
||||||
mov eax, dword [ecx] ; 8b 01
|
|
||||||
mov eax, dword [edx] ; 8b 02
|
|
||||||
mov eax, dword [ebx] ; 8b 03
|
|
||||||
|
|
||||||
mov ecx, dword [eax] ; 8b 08
|
|
||||||
mov ecx, dword [ecx] ; 8b 09
|
|
||||||
mov ecx, dword [edx] ; 8b 0a
|
|
||||||
mov ecx, dword [ebx] ; 8b 0b
|
|
||||||
|
|
||||||
mov edx, dword [eax] ; 8b 10
|
|
||||||
mov edx, dword [ecx] ; 8b 11
|
|
||||||
mov edx, dword [edx] ; 8b 12
|
|
||||||
mov edx, dword [ebx] ; 8b 13
|
|
||||||
|
|
||||||
mov ebx, dword [eax] ; 8b 18
|
|
||||||
mov ebx, dword [ecx] ; 8b 19
|
|
||||||
mov ebx, dword [edx] ; 8b 1a
|
|
||||||
mov ebx, dword [ebx] ; 8b 1b
|
|
||||||
|
|
||||||
mov [eax], eax ; 89 00
|
|
||||||
mov [ecx], eax ; 89 01
|
|
||||||
mov [edx], eax ; 89 02
|
|
||||||
mov [ebx], eax ; 89 03
|
|
||||||
|
|
||||||
mov [eax], ecx ; 89 08
|
|
||||||
mov [ecx], ecx ; 89 09
|
|
||||||
mov [edx], ecx ; 89 0a
|
|
||||||
mov [ebx], ecx ; 89 0b
|
|
||||||
|
|
||||||
mov [eax], edx ; 89 10
|
|
||||||
mov [ecx], edx ; 89 11
|
|
||||||
mov [edx], edx ; 89 12
|
|
||||||
mov [ebx], edx ; 89 13
|
|
||||||
|
|
||||||
mov [eax], ebx ; 89 18
|
|
||||||
mov [ecx], ebx ; 89 19
|
|
||||||
mov [edx], ebx ; 89 1a
|
|
||||||
mov [ebx], ebx ; 89 1b
|
|
||||||
|
|
@ -56,7 +56,7 @@ break: test.rb test_break.code
|
||||||
print: test.rb test_print.code
|
print: test.rb test_print.code
|
||||||
@./test.rb print $(BINFORMAT)
|
@./test.rb print $(BINFORMAT)
|
||||||
|
|
||||||
big_test: test.rb big_test.code
|
big_test: test.rb test_big.code
|
||||||
@./test.rb big $(BINFORMAT)
|
@./test.rb big $(BINFORMAT)
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
|
|
|
||||||
18
test/test.rb
18
test/test.rb
|
|
@ -5,20 +5,20 @@ $LOAD_PATH << ROOT
|
||||||
|
|
||||||
require 'build'
|
require 'build'
|
||||||
|
|
||||||
# usage: build.rb <func> [binformat]
|
# usage: test.rb <func> [binformat] [format]
|
||||||
#
|
|
||||||
# ([format] will go before [binformat])
|
|
||||||
|
|
||||||
def main
|
def main
|
||||||
func = ARGV[0].to_s
|
func = ARGV[0].to_s
|
||||||
format = 'asm' # 'bin' only assembles one or two
|
binformat = ARGV[1] ? ARGV[1].downcase : 'elf'
|
||||||
# instructions right now, but support
|
format = ARGV[2] ? ARGV[2].downcase : 'asm'
|
||||||
# is in place
|
|
||||||
binformat = (ARGV[1] ? ARGV[1] : 'elf').downcase
|
|
||||||
platform = `uname -s`.chomp.downcase
|
platform = `uname -s`.chomp.downcase
|
||||||
print "testing #{func} ... "
|
print "testing #{func} ... "
|
||||||
success = run( build("test_#{func}.code", platform, format, binformat) )
|
success = run( build("test_#{func}.code", platform, binformat) )
|
||||||
puts success == 0 ? "pass" : "FAIL! (#{success})"
|
if success == 0
|
||||||
|
puts "pass"
|
||||||
|
else
|
||||||
|
puts "FAIL! (#{success})"
|
||||||
|
end
|
||||||
exit(success.to_i)
|
exit(success.to_i)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,5 @@
|
||||||
i=0
|
|
||||||
a=10
|
a=10
|
||||||
for i = 0 to 10
|
for i = 0 to 10
|
||||||
a=a-1
|
a=a-1
|
||||||
end
|
end
|
||||||
a=a
|
a=a
|
||||||
|
|
||||||
11
x86.txt
11
x86.txt
|
|
@ -1,11 +0,0 @@
|
||||||
mov (0x66) {
|
|
||||||
reg32, reg32 (0x89) {
|
|
||||||
op2 - src
|
|
||||||
|
|
||||||
eax ecx edx ebx
|
|
||||||
op1 eax c0 c8 d0 d8
|
|
||||||
dest ecx c1 c9 d1 d9
|
|
||||||
edx c2 ca d2 da
|
|
||||||
ebx c3 cb d3 db
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Loading…
Reference in a new issue