compiler/asm/machofile.rb

require 'asm/macho'

module Assembler

  class MachOFile

    include MachO

    attr_accessor :header, :load_commands, :sections, :data
    attr_accessor :current_segment

    def initialize(filetype=MH_OBJECT)
      @header = MachHeader.new(MH_MAGIC, CPU_TYPE_X86, CPU_SUBTYPE_X86_ALL, filetype, 0, 0, 0)
      @load_commands = []              # All defined segments.
      @sections = {}                   # Map of segment names to lists of sections.
      @section_disk_size = Hash.new(0) # Sections store their VM size so we need their sizes on disk.
      @section_offset = 0              # Offset of the next section's data, in bytes.
      @data = []                       # Blobs of data that appear at the end of the file.
                                       #  (text, data, relocation info, symtab, ...)
      @current_segment = nil           # An alias for the last defined segment.
      @text_segname = nil              # Name of __TEXT segement
      @text_sect_index = nil           # Index of __text section
      @text_data_index = nil           # Index into @data of __text section data
      @reloc_info = nil                # Copy of relocation info array
    end


    # Define a LoadCommand in this file.  The header's ncmds and sizeofcmds
    # fields are updated automatically to keep things in sync.  If a block is
    # given it is passed the new LoadCommand struct after all other
    # initialization has been done.
    #
    # Other methods that create any type of load command should use this
    # method to do so.  Right now the only types supported are LC_SEGMENT
    # and LC_SYMTAB.  Modify asm/macho.rb to add structs for other types, and
    # add them to LoadCommandStructMap.

    def load_command(cmdtype)
      struct = LoadCommandStructMap[cmdtype]
      unless struct
        raise "unsupported load command type: #{cmdtype.inspect}," +
              " supported types: #{LoadCommandStructMap.keys.sort.inspect}"
      end

      # Fill in all the unknown fields with 0, this is nonsense for
      # string fields but that doesn't really matter.
      dummy_vals = [0] * (struct::Members.size - 2)

                         #   cmd        cmdsize          ...
      command = struct.new(cmdtype, struct.bytesize, *dummy_vals)

      @load_commands << command

      @header[:ncmds] += 1
      @header[:sizeofcmds] += command.bytesize

      yield(command) if block_given?

      return command
    end


    # Define a segment in this file.  If a block is given it is passed
    # the new segment.  You can chain calls to segment, it returns self.
    #
    # Mach object files should only contain one anonymous segment.  This
    # is not checked but should be kept in mind when crafting files.
    def segment(name, &block)
      @current_segment = load_command(LC_SEGMENT) do |seg|
        seg[:segname] = name
        block.call(seg) if block
      end
      return self
    end


    # Define a section under the given segment.  nsects and cmdsize are
    # updated automatically.  segname can't be derived from the segment
    # that this section is defined under, as they can differ.
    #
    # Mach object files have the __text, __data, and other common
    # sections all defined under one anonymous segment, but their segment
    # names reflect their final positions after linking.  The linker plonks
    # them in the segment that they name.
    def section(name, segname, data='', vmsize=data.size,
                segment=@current_segment, type=S_REGULAR)

      # Create the new section.
      section = Section.new(name, segname, @section_offset, vmsize, 0, 0, 0, 0, 0, 0, type)

      # Add this section to the map of segment names to sections.
      (@sections[segment[:segname]] ||= []) << section
      @section_disk_size[name] = data.size
      @section_offset += data.size
      @data << data if data.size > 0

      # Update the header.
      @header[:sizeofcmds] += section.bytesize

      # Update the segment.
      segment[:nsects] += 1
      segment[:cmdsize] += section.bytesize

      yield(section) if block_given?

      return section
    end


    # Define a standard text section under the current segment (if present).
    #
    # If there is no current segment then we act according to the file's type
    # (specified in the header).  Segments are created if they do not exist.
    #
    # When it is MH_OBJECT the text section is defined under a single,
    # nameless segment, but the section's segment name is set to the name
    # given here.
    #
    # For MH_EXECUTE files the text section goes under the segment with the
    # name given (__TEXT).

    def text(data, sectname='__text', segname='__TEXT')
      real_segname = nil
      unless @current_segment
        real_segname = segname_based_on_filetype(segname)
        segment(real_segname) do |seg|
          seg[:maxprot] = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE
          seg[:initprot] = VM_PROT_READ | VM_PROT_EXECUTE
        end
      end

      section(sectname, segname, data) do |sect|
        # reloff and nreloc are calculated later (in calculate_offsets)
        sect[:flags] = 0x400 # S_ATTR_SOME_INSTRUCTIONS
      end

      # Remember where section and data are so we can update them later.
      @text_segname = real_segname || segname
      @text_sect_index = @sections[@text_segname].length-1
      @text_data_index = @data.length-1

      return self
    end

    def update_text(data)
      raise 'no __text segment defined yet' unless @text_data_index
      @data[@text_data_index] = data
    end

    # Basis for #data, #const, and #bss methods.
    def segment_based_on_filetype(segname, options={})
      unless @current_segment
        permissions = VM_PROT_READ
        permisions |= VM_PROT_WRITE if options.delete(:writable)
        segment(segname_based_on_filetype(segname)) do |seg|
          seg[:initprot] = seg[:maxprot] = permissions
        end
      end
      yield if block_given?
      return self
    end

    # Define a standard data section under the current segment (if present).
    # This behaves similarly to the text method.
    #
    def data(data, sectname='__data', segname='__DATA')
      segment_based_on_filetype(segname, :writable => true) do
        section(sectname, segname, data)
      end
    end

    # Define a standard const section under the current segment (if present).
    # This behaves similarly to the data method.
    #
    def const(data, sectname='__const', segname='__DATA')
      segment_based_on_filetype(segname) do
        section(sectname, segname, data)
      end
    end

    # Define a standard BSS section under the current segment (if present).
    # This behaves similarly to the data method but accepts a VM size instead
    # of a blob, and no data is written to file since this section is for
    # uninitialized data.
    #
    def bss(vmsize, sectname='__bss', segname='__DATA')
      segment_based_on_filetype(segname, :writable => true) do
        section(sectname, segname, '', vmsize)
      end
    end

    # Define a relocation table. Usually between segments and the
    # symbol table.
    #
    # Accepts an array of relocation info structs.
    def reloc(reloc_info)
      @data << if reloc_info.respond_to?(:join)
                 reloc_info.map {|r| r.serialize}.join
               else
                 reloc_info
               end
      @reloc_info = reloc_info.map {|x| x.clone}
      return self
    end

    # Define a symbol table.  This should usually be placed at the end of the
    # file.
    #
    # This function is overloaded to accept either an array of Nlist structs
    # packed into a byte string (i.e. a C array) and a string table, or a
    # single parameter: any type of Symtab.

    def symtab(nlist_ary_or_symtab, stab=nil)
      if stab.nil?
        symtab = nlist_ary_or_symtab
        stab = symtab.stab
        nlist_ary = symtab.nlist_ary
      else
        nlist_ary = nlist_ary_or_symtab
      end

      load_command(LC_SYMTAB) do |st|
        st[:nsyms] = nlist_ary.size
        st[:strsize] = stab.size
        # symoff and stroff are filled in when offsets are recalculated.
      end

#       puts ">>> Defining symbol table:"
#       puts ">>> #{nlist_ary.size} symbols"
#       puts ">>> stab = #{stab.inspect}"
#       puts ">>> nlist_ary = #{nlist_ary.inspect}"
#       puts ">>> (serialized) = #{nlist_ary.map{|n|n.serialize}.join.inspect}"

      @data << nlist_ary.map {|n| n.serialize}.join
      @data << stab
      return self
    end


    # Serialize the entire MachO file into a byte string.  This is simple
    # thanks to CStruct#serialize.

    def serialize
      # TODO sanity checks, e.g. assert(@header[:ncmds] == @load_command.size)
      # ... perhaps an option to recalculate such data as well.

      # Now that we have all the pieces of the file defined we can calculate
      # the file offsets of segments and sections.
      calculate_offsets

      ###################################
      # Mach-O file Part 1: Mach Header #
      ###################################
      @header.serialize +

      #####################################
      # Mach-O file Part 2: Load Commands #
      #####################################
      # dump each load command (which include the section headers under them)
      @load_commands.map do |cmd|
        sects = @sections[cmd[:segname]] rescue []
        sects.inject(cmd.serialize) do |data, sect|
          data + sect.serialize
        end
      end.join +

      ###################################
      # Mach-O file Part 3: Binary data #
      ###################################
      @data.join
    end


    # Update the file offsets in segments and sections.

    def calculate_offsets

      # Maintain the offset into the the file on disk.  This is used
      # to update the various structures.
      offset = @header.bytesize

      # First pass over load commands.  Most sizes are filled in here.
      @load_commands.each do |cmd|
        case cmd[:cmd]

        when LC_SEGMENT
          seg = cmd
          sections = @sections[seg[:segname]]
          section_size = sections.size * Section.bytesize
          section_vm_size = sections.inject(0) { |total, sect| total + sect[:size] }
          section_disk_size = sections.inject(0) do |total, sect|
            total + @section_disk_size[sect[:sectname]]
          end

          ### TODO this should be redundant. try commenting it out one day.
          seg[:nsects] = sections.size
          seg[:cmdsize] = seg.bytesize + section_size
          ###

          seg[:vmsize] = section_vm_size
          seg[:filesize] = section_disk_size

        when LC_SYMTAB
          # nop

        else
          raise "unsupported load command: #{cmd.inspect}"
        end

        offset += cmd[:cmdsize]
      end


      # offset now points to the end of the Mach-O headers, or the beginning
      # of the binary blobs of section data at the end.

      # Second pass over load commands.  Fill in file offsets.
      @load_commands.each do |cmd|
        case cmd[:cmd]

        when LC_SEGMENT
          seg = cmd
          sections = @sections[seg[:segname]]
          seg[:fileoff] = offset
          sections.each do |sect|
            sect[:offset] = offset
            offset += @section_disk_size[sect[:sectname]]
          end

        when LC_SYMTAB
          if @reloc_info
            # update text section with relocation info
            __text = @sections[@text_segname][@text_sect_index]
            __text[:reloff] = offset
            __text[:nreloc] = @reloc_info.length
            offset += @reloc_info.first.bytesize * @reloc_info.length
          end
          st = cmd
          st[:symoff] = offset
          offset += st[:nsyms] * Nlist.bytesize
          st[:stroff] = offset
          offset += st[:strsize]


        # No else clause is necessary, the first iteration should have caught them.

        end

      end # @load_commands.each

    end # def calculate_offsets


    #######
    private
    #######

    def segname_based_on_filetype(segname)
      case @header[:filetype]
      when MH_OBJECT
        ''
      when MH_EXECUTE
        segname
      else
        raise "unsupported MachO file type: #{@header.inspect}"
      end
    end


  end # class MachOFile

end # module Assembler