require 'asm/macho' module Assembler class MachOFile include MachO attr_accessor :header, :load_commands, :sections, :data attr_accessor :current_segment def initialize(filetype=MH_OBJECT) @header = MachHeader.new(MH_MAGIC, CPU_TYPE_X86, CPU_SUBTYPE_X86_ALL, filetype, 0, 0, 0) @load_commands = [] # All defined segments. @sections = {} # Map of segment names to lists of sections. @section_disk_size = Hash.new(0) # Sections store their VM size so we need their sizes on disk. @section_offset = 0 # Offset of the next section's data, in bytes. @data = [] # Blobs of data that appear at the end of the file. # (text, data, relocation info, symtab, ...) @current_segment = nil # An alias for the last defined segment. @text_segname = nil # Name of __TEXT segement @text_sect_index = nil # Index of __text section @text_data_index = nil # Index into @data of __text section data @reloc_info = nil # Copy of relocation info array end # Define a LoadCommand in this file. The header's ncmds and sizeofcmds # fields are updated automatically to keep things in sync. If a block is # given it is passed the new LoadCommand struct after all other # initialization has been done. # # Other methods that create any type of load command should use this # method to do so. Right now the only types supported are LC_SEGMENT # and LC_SYMTAB. Modify asm/macho.rb to add structs for other types, and # add them to LoadCommandStructMap. def load_command(cmdtype) struct = LoadCommandStructMap[cmdtype] unless struct raise "unsupported load command type: #{cmdtype.inspect}," + " supported types: #{LoadCommandStructMap.keys.sort.inspect}" end # Fill in all the unknown fields with 0, this is nonsense for # string fields but that doesn't really matter. dummy_vals = [0] * (struct::Members.size - 2) # cmd cmdsize ... command = struct.new(cmdtype, struct.bytesize, *dummy_vals) @load_commands << command @header[:ncmds] += 1 @header[:sizeofcmds] += command.bytesize yield(command) if block_given? return command end # Define a segment in this file. If a block is given it is passed # the new segment. You can chain calls to segment, it returns self. # # Mach object files should only contain one anonymous segment. This # is not checked but should be kept in mind when crafting files. def segment(name, &block) @current_segment = load_command(LC_SEGMENT) do |seg| seg[:segname] = name block.call(seg) if block end return self end # Define a section under the given segment. nsects and cmdsize are # updated automatically. segname can't be derived from the segment # that this section is defined under, as they can differ. # # Mach object files have the __text, __data, and other common # sections all defined under one anonymous segment, but their segment # names reflect their final positions after linking. The linker plonks # them in the segment that they name. def section(name, segname, data='', vmsize=data.size, segment=@current_segment, type=S_REGULAR) # Create the new section. section = Section.new(name, segname, @section_offset, vmsize, 0, 0, 0, 0, 0, 0, type) # Add this section to the map of segment names to sections. (@sections[segment[:segname]] ||= []) << section @section_disk_size[name] = data.size @section_offset += data.size @data << data if data.size > 0 # Update the header. @header[:sizeofcmds] += section.bytesize # Update the segment. segment[:nsects] += 1 segment[:cmdsize] += section.bytesize yield(section) if block_given? return section end # Define a standard text section under the current segment (if present). # # If there is no current segment then we act according to the file's type # (specified in the header). Segments are created if they do not exist. # # When it is MH_OBJECT the text section is defined under a single, # nameless segment, but the section's segment name is set to the name # given here. # # For MH_EXECUTE files the text section goes under the segment with the # name given (__TEXT). def text(data, sectname='__text', segname='__TEXT') real_segname = nil unless @current_segment real_segname = segname_based_on_filetype(segname) segment(real_segname) do |seg| seg[:maxprot] = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE seg[:initprot] = VM_PROT_READ | VM_PROT_EXECUTE end end section(sectname, segname, data) do |sect| # reloff and nreloc are calculated later (in calculate_offsets) sect[:flags] = 0x400 # S_ATTR_SOME_INSTRUCTIONS end # Remember where section and data are so we can update them later. @text_segname = real_segname || segname @text_sect_index = @sections[@text_segname].length-1 @text_data_index = @data.length-1 return self end def update_text(data) raise 'no __text segment defined yet' unless @text_data_index @data[@text_data_index] = data end # Basis for #data, #const, and #bss methods. def segment_based_on_filetype(segname, options={}) unless @current_segment permissions = VM_PROT_READ permisions |= VM_PROT_WRITE if options.delete(:writable) segment(segname_based_on_filetype(segname)) do |seg| seg[:initprot] = seg[:maxprot] = permissions end end yield if block_given? return self end # Define a standard data section under the current segment (if present). # This behaves similarly to the text method. # def data(data, sectname='__data', segname='__DATA') segment_based_on_filetype(segname, :writable => true) do section(sectname, segname, data) end end # Define a standard const section under the current segment (if present). # This behaves similarly to the data method. # def const(data, sectname='__const', segname='__DATA') segment_based_on_filetype(segname) do section(sectname, segname, data) end end # Define a standard BSS section under the current segment (if present). # This behaves similarly to the data method but accepts a VM size instead # of a blob, and no data is written to file since this section is for # uninitialized data. # def bss(vmsize, sectname='__bss', segname='__DATA') segment_based_on_filetype(segname, :writable => true) do section(sectname, segname, '', vmsize) end end # Define a relocation table. Usually between segments and the # symbol table. # # Accepts an array of relocation info structs. def reloc(reloc_info) @data << if reloc_info.respond_to?(:join) reloc_info.map {|r| r.serialize}.join else reloc_info end @reloc_info = reloc_info.map {|x| x.clone} return self end # Define a symbol table. This should usually be placed at the end of the # file. # # This function is overloaded to accept either an array of Nlist structs # packed into a byte string (i.e. a C array) and a string table, or a # single parameter: any type of Symtab. def symtab(nlist_ary_or_symtab, stab=nil) if stab.nil? symtab = nlist_ary_or_symtab stab = symtab.stab nlist_ary = symtab.nlist_ary else nlist_ary = nlist_ary_or_symtab end load_command(LC_SYMTAB) do |st| st[:nsyms] = nlist_ary.size st[:strsize] = stab.size # symoff and stroff are filled in when offsets are recalculated. end # puts ">>> Defining symbol table:" # puts ">>> #{nlist_ary.size} symbols" # puts ">>> stab = #{stab.inspect}" # puts ">>> nlist_ary = #{nlist_ary.inspect}" # puts ">>> (serialized) = #{nlist_ary.map{|n|n.serialize}.join.inspect}" @data << nlist_ary.map {|n| n.serialize}.join @data << stab return self end # Serialize the entire MachO file into a byte string. This is simple # thanks to CStruct#serialize. def serialize # TODO sanity checks, e.g. assert(@header[:ncmds] == @load_command.size) # ... perhaps an option to recalculate such data as well. # Now that we have all the pieces of the file defined we can calculate # the file offsets of segments and sections. calculate_offsets ################################### # Mach-O file Part 1: Mach Header # ################################### @header.serialize + ##################################### # Mach-O file Part 2: Load Commands # ##################################### # dump each load command (which include the section headers under them) @load_commands.map do |cmd| sects = @sections[cmd[:segname]] rescue [] sects.inject(cmd.serialize) do |data, sect| data + sect.serialize end end.join + ################################### # Mach-O file Part 3: Binary data # ################################### @data.join end # Update the file offsets in segments and sections. def calculate_offsets # Maintain the offset into the the file on disk. This is used # to update the various structures. offset = @header.bytesize # First pass over load commands. Most sizes are filled in here. @load_commands.each do |cmd| case cmd[:cmd] when LC_SEGMENT seg = cmd sections = @sections[seg[:segname]] section_size = sections.size * Section.bytesize section_vm_size = sections.inject(0) { |total, sect| total + sect[:size] } section_disk_size = sections.inject(0) do |total, sect| total + @section_disk_size[sect[:sectname]] end ### TODO this should be redundant. try commenting it out one day. seg[:nsects] = sections.size seg[:cmdsize] = seg.bytesize + section_size ### seg[:vmsize] = section_vm_size seg[:filesize] = section_disk_size when LC_SYMTAB # nop else raise "unsupported load command: #{cmd.inspect}" end offset += cmd[:cmdsize] end # offset now points to the end of the Mach-O headers, or the beginning # of the binary blobs of section data at the end. # Second pass over load commands. Fill in file offsets. @load_commands.each do |cmd| case cmd[:cmd] when LC_SEGMENT seg = cmd sections = @sections[seg[:segname]] seg[:fileoff] = offset sections.each do |sect| sect[:offset] = offset offset += @section_disk_size[sect[:sectname]] end when LC_SYMTAB if @reloc_info # update text section with relocation info __text = @sections[@text_segname][@text_sect_index] __text[:reloff] = offset __text[:nreloc] = @reloc_info.length offset += @reloc_info.first.bytesize * @reloc_info.length end st = cmd st[:symoff] = offset offset += st[:nsyms] * Nlist.bytesize st[:stroff] = offset offset += st[:strsize] # No else clause is necessary, the first iteration should have caught them. end end # @load_commands.each end # def calculate_offsets ####### private ####### def segname_based_on_filetype(segname) case @header[:filetype] when MH_OBJECT '' when MH_EXECUTE segname else raise "unsupported MachO file type: #{@header.inspect}" end end end # class MachOFile end # module Assembler