compiler/asm/machofile.rb

374 lines
12 KiB
Ruby

require 'asm/macho'
module Assembler
class MachOFile
include MachO
attr_accessor :header, :load_commands, :sections, :data
attr_accessor :current_segment
attr_accessor :text_offset
def initialize(filetype=MH_OBJECT)
@header = MachHeader.new(MH_MAGIC, CPU_TYPE_X86, CPU_SUBTYPE_X86_ALL, filetype, 0, 0, 0)
@load_commands = [] # All defined segments.
@sections = {} # Map of segment names to lists of segments.
@section_disk_size = Hash.new(0) # Sections store their VM size so we need their sizes on disk.
@data = [] # Blobs of data that appear at the end of the file.
# (text, data, symtab, ...)
@current_segment = nil # An alias for the last defined segment.
end
# Define a LoadCommand in this file. The header's ncmds and sizeofcmds
# fields are updated automatically to keep things in sync. If a block is
# given it is passed the new LoadCommand struct after all other
# initialization has been done.
#
# Other methods that create any type of load command should use this
# method to do so. Right now the only types supported are LC_SEGMENT
# and LC_SYMTAB. Modify asm/macho.rb to add structs for other types, and
# add them to LoadCommandStructMap.
def load_command(cmdtype)
struct = LoadCommandStructMap[cmdtype]
unless struct
raise "unsupported load command type: #{cmdtype.inspect}," +
" supported types: #{LoadCommandStructMap.keys.sort.inspect}"
end
# Fill in all the unknown fields with 0, this is nonsense for
# string fields but that doesn't really matter.
dummy_vals = [0] * (struct::Members.size - 2)
# cmd cmdsize ...
command = struct.new(cmdtype, struct.bytesize, *dummy_vals)
@load_commands << command
@header[:ncmds] += 1
@header[:sizeofcmds] += command.bytesize
yield(command) if block_given?
return command
end
# Define a segment in this file. If a block is given it is passed
# the new segment. You can chain calls to segment, it returns self.
#
# Mach object files should only contain one anonymous segment. This
# is not checked but should be kept in mind when crafting files.
def segment(name, &block)
@current_segment = load_command(LC_SEGMENT) do |seg|
seg[:segname] = name
block.call(seg) if block
end
return self
end
# Define a section under the given segment. nsects and cmdsize are
# updated automatically. segname can't be derived from the segment
# that this section is defined under, as they can differ.
#
# Mach object files have the __text, __data, and other common
# sections all defined under one anonymous segment, but their segment
# names reflect their final positions after linking. The linker plonks
# them in the segment that they name.
def section(name, segname, data='', vmsize=data.size,
segment=@current_segment, type=S_REGULAR)
# Create the new section.
section = Section.new(name, segname, 0, vmsize, 0, 0, 0, 0, 0, 0, type)
# Add this section to the map of segment names to sections.
(@sections[segment[:segname]] ||= []) << section
@section_disk_size[name] = data.size
@data << data if data.size > 0
# Update the header.
@header[:sizeofcmds] += section.bytesize
# Update the segment.
segment[:nsects] += 1
segment[:cmdsize] += section.bytesize
yield(section) if block_given?
return section
end
# Define a standard text section under the current segment (if present).
#
# If there is no current segment then we act according to the file's type
# (specified in the header). Segments are created if they do not exist.
#
# When it is MH_OBJECT the text section is defined under a single,
# nameless segment, but the section's segment name is set to the name
# given here.
#
# For MH_EXECUTE files the text section goes under the segment with the
# name given (__TEXT).
def text(data, sectname='__text', segname='__TEXT')
unless @current_segment
segment(segname_based_on_filetype(segname)) do |seg|
seg[:maxprot] = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE
seg[:initprot] = VM_PROT_READ | VM_PROT_EXECUTE
end
end
section(sectname, segname, data) do |sect|
sect[:flags] = 0x400 # S_ATTR_SOME_INSTRUCTIONS
end
return self
end
# Define a standard data section under the current segment (if present).
# This behaves similarly to the text method.
#
def data(data, sectname='__data', segname='__DATA')
unless @current_segment
segment(segname_based_on_filetype(segname)) do |seg|
seg[:maxprot] = VM_PROT_READ | VM_PROT_WRITE
seg[:initprot] = VM_PROT_READ | VM_PROT_WRITE
end
end
section(sectname, segname, data)
return self
end
# Define a standard const section under the current segment (if present).
# This behaves similarly to the data method.
#
def const(data, sectname='__const', segname='__DATA')
unless @current_segment
segment(segname_based_on_filetype(segname)) do |seg|
seg[:maxprot] = VM_PROT_READ
seg[:initprot] = VM_PROT_READ
end
end
section(sectname, segname, data)
return self
end
# Define a standard BSS section under the current segment (if present).
# This behaves similarly to the data method but accepts a VM size instead
# of a blob, and no data is written to file since this section is for
# uninitialized data.
#
def bss(vmsize, sectname='__bss', segname='__DATA')
unless @current_segment
segment(segname_based_on_filetype(segname)) do |seg|
seg[:maxprot] = VM_PROT_READ | VM_PROT_WRITE
seg[:initprot] = VM_PROT_READ | VM_PROT_WRITE
end
end
section(sectname, segname, '', vmsize)
return self
end
# Define a symbol table. This should usually be placed at the end of the
# file.
#
# This function is overloaded to accept either an array of Nlist structs
# packed into a byte string (i.e. a C array) and a string table, or a
# single parameter: any type of Symtab.
def symtab(nlist_ary_or_symtab, stab=nil)
if stab.nil?
symtab = nlist_ary_or_symtab
stab = symtab.stab
nlist_ary = symtab.nlist_ary
else
nlist_ary = nlist_ary_or_symtab
end
load_command(LC_SYMTAB) do |st|
st[:nsyms] = nlist_ary.size
st[:strsize] = stab.size
# symoff and stroff are filled in when offsets are recalculated.
end
# puts ">>> Defining symbol table:"
# puts ">>> #{nlist_ary.size} symbols"
# puts ">>> stab = #{stab.inspect}"
# puts ">>> nlist_ary = #{nlist_ary.inspect}"
# puts ">>> (serialized) = #{nlist_ary.map{|n|n.serialize}.join.inspect}"
@data << nlist_ary.map {|n| n.serialize}.join
@data << stab
return self
end
# Serialize the entire MachO file into a byte string. This is simple
# thanks to CStruct#serialize.
def serialize
# TODO sanity checks, e.g. assert(@header[:ncmds] == @load_command.size)
# ... perhaps an option to recalculate such data as well.
# Now that we have all the pieces of the file defined we can calculate
# the file offsets of segments and sections.
recalculate_offsets
# |------------------|
# | Mach Header | Part 1
# |------------------|
# | Segment 1 | Part 2
# | Section 1 | ---
# | Section 2 | --|--
# | ... | | |
# | Segment 2 | | |
# | Section 4 | | |
# | Section 5 | | |
# | ... | | |
# | ... | | |
# | [Symtab cmd] | | |
# |------------------| | |
# | Section data 1 | <-- | Part 3
# | Section data 2 | <----
# | ... |
# | [Symtab data] |
# |------------------|
###################################
# Mach-O file Part 1: Mach Header #
###################################
obj = @header.serialize
#####################################
# Mach-O file Part 2: Load Commands #
#####################################
# dump each load command (which include the section headers under them)
obj += @load_commands.map do |cmd|
sects = @sections[cmd[:segname]] rescue []
sects.inject(cmd.serialize) do |data, sect|
data + sect.serialize
end
end.join
###################################
# Mach-O file Part 3: Binary data #
###################################
obj += @data.join
return obj
end
# Update the file offsets in segments and sections.
def recalculate_offsets
# Maintain the offset into the the file on disk. This is used
# to update the various structures.
offset = @header.bytesize
# First pass over load commands. Most sizes are filled in here.
@load_commands.each do |cmd|
case cmd[:cmd]
when LC_SEGMENT
seg = cmd
sections = @sections[seg[:segname]]
section_size = sections.size * Section.bytesize
section_vm_size = sections.inject(0) { |total, sect| total + sect[:size] }
section_disk_size = sections.inject(0) do |total, sect|
total + @section_disk_size[sect[:sectname]]
end
### TODO this should be redundant. try commenting it out one day.
seg[:nsects] = sections.size
seg[:cmdsize] = seg.bytesize + section_size
###
seg[:vmsize] = section_vm_size
seg[:filesize] = section_disk_size
when LC_SYMTAB
# nop
else
raise "unsupported load command: #{cmd.inspect}"
end
offset += cmd[:cmdsize]
end
# offset now points to the end of the Mach-O headers, or the beginning
# of the binary blobs of section data at the end.
# Second pass over load commands. Fill in file offsets.
@load_commands.each do |cmd|
case cmd[:cmd]\
when LC_SEGMENT
seg = cmd
sections = @sections[seg[:segname]]
seg[:fileoff] = offset
sections.each do |sect|
sect[:offset] = offset
offset += @section_disk_size[sect[:sectname]]
end
when LC_SYMTAB
st = cmd
st[:symoff] = offset
offset += st[:nsyms] * Nlist.bytesize
st[:stroff] = offset
offset += st[:strsize]
# No else clause is necessary, the first iteration should have caught them.
end
end # @load_commands.each
end # def recalculate_offsets
#######
private
#######
def segname_based_on_filetype(segname)
case @header[:filetype]
when MH_OBJECT: ''
when MH_EXECUTE: segname
else
raise "unsupported MachO file type! #{@header.inspect}"
end
end
end # class MachOFile
end # module Assembler