require 'asm/macho' module Assembler class MachOFile include MachO attr_accessor :header, :load_commands, :sections, :data attr_accessor :current_segment attr_accessor :text_offset def initialize(filetype=MH_OBJECT) @header = MachHeader.new(MH_MAGIC, CPU_TYPE_X86, CPU_SUBTYPE_X86_ALL, filetype, 0, 0, 0) @load_commands = [] # All defined segments. @sections = {} # Map of segment names to lists of segments. @section_disk_size = Hash.new(0) # Sections store their VM size so we need their sizes on disk. @data = [] # Blobs of data that appear at the end of the file. # (text, data, symtab, ...) @current_segment = nil # An alias for the last defined segment. end # Define a LoadCommand in this file. The header's ncmds and sizeofcmds # fields are updated automatically to keep things in sync. If a block is # given it is passed the new LoadCommand struct after all other # initialization has been done. # # Other methods that create any type of load command should use this # method to do so. Right now the only types supported are LC_SEGMENT # and LC_SYMTAB. Modify asm/macho.rb to add structs for other types, and # add them to LoadCommandStructMap. def load_command(cmdtype) struct = LoadCommandStructMap[cmdtype] unless struct raise "unsupported load command type: #{cmdtype.inspect}," + " supported types: #{LoadCommandStructMap.keys.sort.inspect}" end # Fill in all the unknown fields with 0, this is nonsense for # string fields but that doesn't really matter. dummy_vals = [0] * (struct::Members.size - 2) # cmd cmdsize ... command = struct.new(cmdtype, struct.bytesize, *dummy_vals) @load_commands << command @header[:ncmds] += 1 @header[:sizeofcmds] += command.bytesize yield(command) if block_given? return command end # Define a segment in this file. If a block is given it is passed # the new segment. You can chain calls to segment, it returns self. # # Mach object files should only contain one anonymous segment. This # is not checked but should be kept in mind when crafting files. def segment(name, &block) @current_segment = load_command(LC_SEGMENT) do |seg| seg[:segname] = name block.call(seg) if block end return self end # Define a section under the given segment. nsects and cmdsize are # updated automatically. segname can't be derived from the segment # that this section is defined under, as they can differ. # # Mach object files have the __text, __data, and other common # sections all defined under one anonymous segment, but their segment # names reflect their final positions after linking. The linker plonks # them in the segment that they name. def section(name, segname, data='', vmsize=data.size, segment=@current_segment, type=S_REGULAR) # Create the new section. section = Section.new(name, segname, 0, vmsize, 0, 0, 0, 0, 0, 0, type) # Add this section to the map of segment names to sections. (@sections[segment[:segname]] ||= []) << section @section_disk_size[name] = data.size @data << data if data.size > 0 # Update the header. @header[:sizeofcmds] += section.bytesize # Update the segment. segment[:nsects] += 1 segment[:cmdsize] += section.bytesize yield(section) if block_given? return section end # Define a standard text section under the current segment (if present). # # If there is no current segment then we act according to the file's type # (specified in the header). Segments are created if they do not exist. # # When it is MH_OBJECT the text section is defined under a single, # nameless segment, but the section's segment name is set to the name # given here. # # For MH_EXECUTE files the text section goes under the segment with the # name given (__TEXT). def text(data, sectname='__text', segname='__TEXT') unless @current_segment segment(segname_based_on_filetype(segname)) do |seg| seg[:maxprot] = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE seg[:initprot] = VM_PROT_READ | VM_PROT_EXECUTE end end section(sectname, segname, data) do |sect| sect[:flags] = 0x400 # S_ATTR_SOME_INSTRUCTIONS end return self end # Define a standard data section under the current segment (if present). # This behaves similarly to the text method. # def data(data, sectname='__data', segname='__DATA') unless @current_segment segment(segname_based_on_filetype(segname)) do |seg| seg[:maxprot] = VM_PROT_READ | VM_PROT_WRITE seg[:initprot] = VM_PROT_READ | VM_PROT_WRITE end end section(sectname, segname, data) return self end # Define a standard const section under the current segment (if present). # This behaves similarly to the data method. # def const(data, sectname='__const', segname='__DATA') unless @current_segment segment(segname_based_on_filetype(segname)) do |seg| seg[:maxprot] = VM_PROT_READ seg[:initprot] = VM_PROT_READ end end section(sectname, segname, data) return self end # Define a standard BSS section under the current segment (if present). # This behaves similarly to the data method but accepts a VM size instead # of a blob, and no data is written to file since this section is for # uninitialized data. # def bss(vmsize, sectname='__bss', segname='__DATA') unless @current_segment segment(segname_based_on_filetype(segname)) do |seg| seg[:maxprot] = VM_PROT_READ | VM_PROT_WRITE seg[:initprot] = VM_PROT_READ | VM_PROT_WRITE end end section(sectname, segname, '', vmsize) return self end # Define a symbol table. This should usually be placed at the end of the # file. # # This function is overloaded to accept either an array of Nlist structs # packed into a byte string (i.e. a C array) and a string table, or a # single parameter: any type of Symtab. def symtab(nlist_ary_or_symtab, stab=nil) if stab.nil? symtab = nlist_ary_or_symtab stab = symtab.stab nlist_ary = symtab.nlist_ary else nlist_ary = nlist_ary_or_symtab end load_command(LC_SYMTAB) do |st| st[:nsyms] = nlist_ary.size st[:strsize] = stab.size # symoff and stroff are filled in when offsets are recalculated. end # puts ">>> Defining symbol table:" # puts ">>> #{nlist_ary.size} symbols" # puts ">>> stab = #{stab.inspect}" # puts ">>> nlist_ary = #{nlist_ary.inspect}" # puts ">>> (serialized) = #{nlist_ary.map{|n|n.serialize}.join.inspect}" @data << nlist_ary.map {|n| n.serialize}.join @data << stab return self end # Serialize the entire MachO file into a byte string. This is simple # thanks to CStruct#serialize. def serialize # TODO sanity checks, e.g. assert(@header[:ncmds] == @load_command.size) # ... perhaps an option to recalculate such data as well. # Now that we have all the pieces of the file defined we can calculate # the file offsets of segments and sections. recalculate_offsets # |------------------| # | Mach Header | Part 1 # |------------------| # | Segment 1 | Part 2 # | Section 1 | --- # | Section 2 | --|-- # | ... | | | # | Segment 2 | | | # | Section 4 | | | # | Section 5 | | | # | ... | | | # | ... | | | # | [Symtab cmd] | | | # |------------------| | | # | Section data 1 | <-- | Part 3 # | Section data 2 | <---- # | ... | # | [Symtab data] | # |------------------| ################################### # Mach-O file Part 1: Mach Header # ################################### obj = @header.serialize ##################################### # Mach-O file Part 2: Load Commands # ##################################### # dump each load command (which include the section headers under them) obj += @load_commands.map do |cmd| sects = @sections[cmd[:segname]] rescue [] sects.inject(cmd.serialize) do |data, sect| data + sect.serialize end end.join ################################### # Mach-O file Part 3: Binary data # ################################### obj += @data.join return obj end # Update the file offsets in segments and sections. def recalculate_offsets # Maintain the offset into the the file on disk. This is used # to update the various structures. offset = @header.bytesize # First pass over load commands. Most sizes are filled in here. @load_commands.each do |cmd| case cmd[:cmd] when LC_SEGMENT seg = cmd sections = @sections[seg[:segname]] section_size = sections.size * Section.bytesize section_vm_size = sections.inject(0) { |total, sect| total + sect[:size] } section_disk_size = sections.inject(0) do |total, sect| total + @section_disk_size[sect[:sectname]] end ### TODO this should be redundant. try commenting it out one day. seg[:nsects] = sections.size seg[:cmdsize] = seg.bytesize + section_size ### seg[:vmsize] = section_vm_size seg[:filesize] = section_disk_size when LC_SYMTAB # nop else raise "unsupported load command: #{cmd.inspect}" end offset += cmd[:cmdsize] end # offset now points to the end of the Mach-O headers, or the beginning # of the binary blobs of section data at the end. # Second pass over load commands. Fill in file offsets. @load_commands.each do |cmd| case cmd[:cmd]\ when LC_SEGMENT seg = cmd sections = @sections[seg[:segname]] seg[:fileoff] = offset sections.each do |sect| sect[:offset] = offset offset += @section_disk_size[sect[:sectname]] end when LC_SYMTAB st = cmd st[:symoff] = offset offset += st[:nsyms] * Nlist.bytesize st[:stroff] = offset offset += st[:strsize] # No else clause is necessary, the first iteration should have caught them. end end # @load_commands.each end # def recalculate_offsets ####### private ####### def segname_based_on_filetype(segname) case @header[:filetype] when MH_OBJECT: '' when MH_EXECUTE: segname else raise "unsupported MachO file type! #{@header.inspect}" end end end # class MachOFile end # module Assembler