WIP: re-organize into lib/ dir

2026-06-24 04:49:09 +00:00 · 2026-06-18 06:42:12 -07:00 · 2026-06-18 06:42:12 -07:00 · a12bdafde4
commit a12bdafde4
parent 5da06f938c
34 changed files with 4280 additions and 0 deletions
--- a/bin/compile
+++ b/bin/compile
@ -0,0 +1,7 @@
+#!/usr/bin/env ruby
+
+$LOAD_PATH.unshift('../lib')
+
+require 'compiler'
+
+???
--- a/lib/compiler.rb
+++ b/lib/compiler.rb
@ -0,0 +1,116 @@
+this_dir = File.dirname(__FILE__)
+Dir.chdir(File.expand_path('..', this_dir))
+$LOAD_PATH.unshift(this_dir) unless $LOAD_PATH.include?(this_dir)
+
+require 'compiler/parser'
+
+class Compiler
+
+  attr_reader :platform, :arch_name, :format, :binformat
+
+  attr_reader :arch, :asm, :symbol_table_factory, :object_file_factor
+
+  # platform [String] "linux" or "darwin"
+  # arch_name [String] "x86" or "arm"
+  # format [String] "text" or "bin"
+  # binform [String, nil] "elf" or "macho", only used when format is "bin"
+  def initialize(platform, arch_name, format, binformat = nil)
+    @platform = platform
+    @arch_name = arch_name
+    @format = format
+    @binformat = binformat
+    wire
+  end
+
+  def compile(input)
+    parser = Parser.new(input, asm)
+    parser.parse
+    parser.compile
+  end
+
+
+  #######
+  private
+  #######
+
+  def wire
+    if format == 'bin'
+      case binformat
+
+      when 'elf'
+        wire_elf
+
+      when 'macho'
+        wire_macho
+
+      else
+        raise "unsupported binary format: #{binformat}"
+      end
+    end
+
+    case @arch_name
+
+    when 'x86'
+      wire_x86
+
+    when 'arm'
+      wire_arm
+
+    else
+      raise "unsupported arch: #{arch}"
+    end
+  end
+
+  def wire_elf
+    require 'compiler/asm/elf/object_file'
+    require 'compiler/asm/elf/symbol_table'
+
+    @symbol_table_factory = ASM::ELF::SymbolTable
+    @object_file_factory = ASM::ELF::ObjectFile
+  end
+
+  def wire_macho
+    require 'compiler/asm/macho/object_file'
+    require 'compiler/asm/macho/symbol_table'
+
+    @symbol_table_factory = ASM::MachO::SymbolTable
+    @object_file_factory = ASM::MachO::ObjectFile
+  end
+
+  def wire_arm
+    require 'compiler/asm/arm/binary_assembler'
+    require 'compiler/asm/arm/text_assembler'
+
+    @arch = ASM::ARM::Arch.instance
+    @asm =
+      case format
+      when 'text'
+        ASM::ARM::TextAssembler.new(self)
+
+      when 'bin'
+        ASM::ARM::BinaryAssembler.new(self)
+
+      else
+        raise "unsupported output format: #{format}"
+      end
+  end
+
+  def wire_x86
+    require 'compiler/asm/x86/binary_assembler'
+    require 'compiler/asm/x86/text_assembler'
+
+    @arch = ASM::X86::Arch.instance
+    @asm =
+      case format
+      when 'text'
+        ASM::X86::TextAssembler.new(self)
+
+      when 'bin'
+        ASM::X86::BinaryAssembler.new(self)
+
+      else
+        raise "unsupported output format: #{format}"
+      end
+  end
+
+end
--- a/lib/compiler/asm/arch.rb
+++ b/lib/compiler/asm/arch.rb
@ -0,0 +1,67 @@
+class Compiler
+  module ASM
+
+    class Arch
+
+      attr_reader :bits, :word_bits
+      attr_reader :preamble, :postamble
+      attr_reader :endianness
+
+      # config:
+      #   - bits: native register / pointer size
+      #   - word_bits: number of bits in a word
+      #   - endianness: "big" or "little"
+      #   - preamble: binary preamble
+      #   - postamble: binary postamble
+      def initialize(config)
+        @bits = config['bits']
+        @word_bits = config['word_bits']
+        @endianness = config['endianness']
+        @preamble = config['preamble']
+        @postamble = config['postamble']
+      end
+
+      def bytes
+        bits / 8
+      end
+
+      def word_bytes
+        word_bits / 8
+      end
+
+      def big_endian?
+        endianness == 'big'
+      end
+
+      def little_endian?
+        endianness == 'little'
+      end
+
+      def pointer_bytes
+        bytes
+      end
+
+      def min_signed
+        -1 * 2 ** (bits - 1)
+      end
+
+      def max_signed
+        2 ** (bits - 1) - 1
+      end
+
+      def min_unsigned
+        0
+      end
+
+      def max_unsigned
+        2 ** bits - 1
+      end
+
+      def signed_int
+        @signed_int ||= min_signed..max_signed
+      end
+
+    end
+
+  end
+end
--- a/lib/compiler/asm/assembler.rb
+++ b/lib/compiler/asm/assembler.rb
@ -0,0 +1,82 @@
+# sjs
+# may 2009
+
+class Compiler
+  module ASM
+
+    # Abstract class for common functionality between different code
+    # generators. Also defines somewhat of an interface that must be
+    # implemented to be useful.
+    class Assembler
+
+      def initialize(delegate)
+        @delegate = delegate
+      end
+
+      def arch
+        delegate.arch
+      end
+
+      def block(*args, &block)
+        instance_eval(&block)
+      end
+
+      def load(n)
+      end
+
+      def load_var(name)
+      end
+
+      def store_var(name, reg)
+      end
+
+      def neg(reg)
+      end
+
+      def stack_add(reg)
+      end
+
+      def stack_sub(reg)
+      end
+
+      def stack_mul_signed(reg)
+      end
+
+      def stack_div(reg)
+      end
+
+      def stack_or(reg)
+      end
+
+      def stack_xor(reg)
+      end
+
+      def stack_and(reg)
+      end
+
+      def not_(reg)
+      end
+      alias_method :not, :not_
+
+      def compare(reg, n)
+      end
+
+      def je(label)
+      end
+
+      def jne(label)
+      end
+
+      def jmp(label)
+      end
+
+      def mov_reg_imm(reg, n)
+      end
+
+      def call(label)
+      end
+
+    end
+
+  end
+end
--- a/lib/compiler/asm/binary_assembler.rb
+++ b/lib/compiler/asm/binary_assembler.rb
@ -0,0 +1,322 @@
+require 'compiler/asm/assembler'
+require 'compiler/asm/constant_proxy'
+require 'compiler/asm/variable_proxy'
+
+class Compiler
+  module ASM
+
+    class BinaryAssembler < Assembler
+
+      DEBUG_OUTPUT = false
+
+      attr_reader :ip
+
+      def initialize(delegate)
+        super(delegate)
+
+        @symtab = delegate.symbol_table_factory.new
+
+        # Almost a byte array, except for addresses.
+        #
+        # Addresses take the form [:<type>, <name>]
+        # where <type> is one of: var, const, or label
+        #
+        # NOTE the type is redundant because of VariableProxy#const?
+        #      and labels are just strings.
+        #
+        #      however, we could accept strings for variable names
+        #      if we keep the type tag. something to think about.
+        @ir = []
+
+        # Our instruction pointer, or the number of bytes written.
+        @ip = 0
+
+        # Map locations in the byte array to var proxies so we can
+        # resolve address operations on the 2nd pass.
+        @proxies = {}
+
+        emit_entry_point
+        emit_preamble
+      end
+
+      # register for return values
+      def return_reg
+        raise 'subclasses must override #return_reg'
+      end
+
+      def emit_entry_point
+      end
+
+      def emit_preamble
+        arch.preamble[delegate.platform].each { |byte| emit_byte(byte) }
+      end
+
+      def emit_postamble
+        arch.postamble[delegate.platform].each { |byte| emit_byte(byte) }
+      end
+
+      def output
+        emit_postamble
+
+        byte_array = resolve_labels
+
+        #puts "1st pass: " + byte_array.inspect if DEBUG_OUTPUT
+
+        binary = package(byte_array)
+
+        @symtab.calculate_offsets(binary.length)
+        if DEBUG_OUTPUT
+          puts ">>> text offset:  0x#{@symtab.text_offset.to_s(16)}"
+          puts ">>> const offset: 0x#{@symtab.const_offset.to_s(16)}"
+          puts ">>> bss offset:   0x#{@symtab.bss_offset.to_s(16)}"
+        end
+
+        # Now that we know where everything lies do the 2nd pass
+        # calculating and filling in final var and const addresses.
+        #
+        # outline:
+        #  - resolve all variable proxies in @proxies replacing
+        #    the placeholder bytes (0xff) with the real address
+
+        bss_offset = @symtab.bss_offset
+        const_offset = @symtab.const_offset
+        @proxies.each do |i, proxy|
+          #puts ">>> Resolving #{proxy.name}" if DEBUG_OUTPUT
+          var = @symtab.var(proxy.name)
+          base_addr = if proxy.const?
+                        const_offset + @symtab.const(proxy.name)
+                      else
+                        bss_offset + @symtab.var(proxy.name)
+                      end
+          #puts ">>> Replacing #{byte_array[i,4].map{|x|'0x' + x.to_s(16)}.inspect} with #{num_to_quad(proxy.resolve(base_addr)).map{|x|'0x' + x.to_s(16)}.inspect}" if DEBUG_OUTPUT
+          byte_array[i, arch.pointer_size] = num_to_quad(proxy.resolve(base_addr))
+        end
+
+        binary = package(byte_array)
+
+        #puts "2nd pass: " + byte_array.inspect if DEBUG_OUTPUT
+
+        objwriter = delegate.object_file_factory.new
+        objwriter.text(binary)
+        objwriter.const(@symtab.const_data) if @symtab.const_size > 0
+        objwriter.bss(@symtab.bss_size) if @symtab.bss_size > 0
+        objwriter.reloc(@symtab.reloc_info)
+        objwriter.symtab(@symtab)
+        objwriter.serialize
+      end
+
+      def resolve_labels
+        bytes_read = 0
+        bytes = []
+        @ir.each_with_index do |x, i|
+          if x.is_a?(Numeric)
+            bytes << x
+            bytes_read += 1
+
+          elsif addr?(x)
+            # remember this so we can replace the bogus addr later
+            @proxies[bytes_read] = x[1]
+
+            # add a relocation entry for this address
+            @symtab.reloc(bytes_read)
+
+            # fill in said bogus addr
+            bogus_addr = [0xff] * arch.pointer_size
+            bytes += bogus_addr
+            bytes_read += bogus_addr.length
+
+
+          # TODO find out if we should calculate addrs as offsets rather than
+          #      absolute as they are done now. (ok for Mach-O, maybe not ELF)
+          elsif label?(x)
+            # the actual eip points to the next instruction already, so should we.
+            real_ip = bytes_read + arch.bytes
+            name = x[1]
+            addr = @symtab.lookup_label(name) - real_ip # dest - src to get relative addr
+            #puts "resolved label: #{x} = 0x#{@symtab.lookup_label(name).to_s(16)} (rel: 0x#{addr.to_s(16)}, ip = 0x#{real_ip.to_s(16)}, bytes_read = 0x#{bytes_read.to_s(16)})" if DEBUG_OUTPUT
+
+
+            addr_bytes = addr_to_bytes(addr)
+            bytes += addr_bytes
+            bytes_read += addr_bytes.length
+
+          else
+            raise "unknown value in the IR at #{bytes_read} - #{x.inspect}"
+          end
+        end
+
+        return bytes
+      end
+
+      def package(bytes)
+        bytes.pack('c*')
+      end
+
+      # Silly semantics, but labels don't count as an address since they
+      # don't need to be deferred.
+      def addr?(x)
+        x.is_a?(Array) && [:var, :const].include?(x[0])
+      end
+
+      def label?(x)
+        x.is_a?(Array) && x[0] == :label
+      end
+
+      # XXX this should probably evaluate the value somehow
+      def define_const(name, bytes, value)
+        @symtab.define_const(name, bytes, value)
+        return const(name)
+      end
+
+      # Define a variable with the given name and size in bytes.
+      def define_var(name, bytes = arch.word_bytes)
+        unless @symtab.var?(name)
+          @symtab.define_var(name, bytes)
+        else
+          STDERR.puts "[warning] attempted to redefine #{name}"
+        end
+        return var(name)
+      end
+
+      def var(name)
+        STDERR.puts "[error] undefined variable #{name}" unless var?(name)
+        VariableProxy.new(name)
+      end
+
+      def const(name)
+        STDERR.puts "[error] undefined constant #{name}" unless const?(name)
+        ConstantProxy.new(name)
+      end
+
+      def var?(name)
+        @symtab.var?(name)
+      end
+
+      def const?(name)
+        @symtab.const?(name)
+      end
+
+      # Define a variable unless it exists.
+      def var!(name, bytes = arch.word_bytes)
+        if var?(name)
+          var(name)
+        else
+          define_var(name, bytes)
+        end
+      end
+
+      # Count the bytes that were encoded in the given block.
+      def asm
+        # stash the current number of bytes written
+        instruction_offset = @ip
+
+        print "0x#{@ip.to_s(16).rjust(4, '0')}\t" if DEBUG_OUTPUT
+
+        yield
+
+        # return the number of bytes written
+        @ip - instruction_offset
+
+        puts if DEBUG_OUTPUT
+      end
+
+
+      def emit_byte(byte)
+
+        ##### The joke's on me! Array#pack('c*') already does this. It is nice to see
+        #     in the debugging output though, so it stays for now.
+        #
+        # Convert negative native ints into signed bytes.
+        #
+        # Calculate the signed byte as the difference between -1 (0xff) and some
+        # number, X. When byte == -1 we want X == 0, so X == -byte - 1.
+        # Since -byte == ~byte + 1, then -byte - 1 == ~byte + 1 - 1 == ~byte,
+        # and X == ~byte. We want the *signed byte* -1, so we use 0xff,
+        # *not* -1. Ruby sees our signed bytes as positive ints 0-255.
+        #
+        byte = 0xff - ~byte if byte < 0 && byte >= -128
+
+        # make sure it's a byte
+        raise "not a byte: #{byte.inspect}" unless byte == byte & 0xff
+
+        byte = byte & 0xff
+        ###  end of pointless code
+
+        print (byte >= 0 && byte < 0x10 ? '0' : '') + byte.to_s(16) + ' ' if DEBUG_OUTPUT
+
+        @ir << byte
+        @ip += 1
+      end
+
+      # addresses are emited as arrays of bytes, prefixed with :var, :const, or :label
+      def emit_addr(type, name)
+        placeholder = [type, name]
+        puts placeholder.inspect if DEBUG_OUTPUT
+        @ir << placeholder
+
+        # addresses are a constant size
+        @ip += arch.pointer_bytes
+      end
+
+      def emit_var(name_or_proxy)
+        proxy = name_or_proxy.is_a?(VariableProxy) ? name_or_proxy : var(name_or_proxy)
+        emit_addr(:var, proxy)
+      end
+
+      def emit_const(name)
+        proxy = name_or_proxy.is_a?(VariableProxy) ? name_or_proxy : const(name_or_proxy)
+        emit_addr(:const, proxy)
+      end
+
+      def emit_label(name)
+        print "<#{name}> " if DEBUG_OUTPUT
+        emit_addr(:label, name)
+      end
+
+      def make_label(suffix = nil)
+        @symtab.unique_label(suffix)
+      end
+
+      def define_label(name)
+        puts "\n#{name} (0x#{@ip.to_s(16)}):" if DEBUG_OUTPUT
+        @symtab.define_label(name, @ip)
+      end
+
+      def addr_to_bytes
+        if big_endian?
+          num_to_big_endian
+        elsif little_endian?
+          num_to_little_endian
+        else
+          raise 'oops'
+        end
+      end
+
+      # Convert a number to an array of bytes, discarding excess bits.
+      def num_to_big_endian(num)
+        case arch.pointer_size
+        when 4
+          [
+            # high
+            (num >> 16) & 0xff,
+            (num >> 24) & 0xff,
+
+            # low
+            num & 0xff,
+            (num >>  8) & 0xff
+          ]
+        else
+          raise 'unimplemented'
+        end
+      end
+
+      # Convert a number to an array of bytes, discarding excess bits.
+      def num_to_little_endian(num)
+        bytes = num_to_big_endian
+        bytes.each_slice(2).to_a.reverse.flatten
+      end
+
+    end
+
+  end
+end
--- a/lib/compiler/asm/constant_proxy.rb
+++ b/lib/compiler/asm/constant_proxy.rb
@ -0,0 +1,13 @@
+class Compiler
+  module ASM
+
+    class ConstantProxy < VariableProxy
+
+      def const?
+        true
+      end
+
+    end
+
+  end
+end
--- a/lib/compiler/asm/cstruct.rb
+++ b/lib/compiler/asm/cstruct.rb
@ -0,0 +1,342 @@
+# Struct does some trickery with custom allocators so we can't
+# subclass it without writing C. Instead we define a CStruct class
+# that does something similar enough for our purpose. It is
+# subclassed just like any other class. A nice side-effect of this
+# syntax is that it is always clear that a CStruct is just a class and
+# instances of the struct are objects.
+#
+# Some light metaprogramming is used to make the following syntax possible:
+#
+# class MachHeader < CStruct
+#   uint :magic
+#   int  :cputype
+#   int  :cpusubtype
+#    ...
+#   int  :flags
+# end
+#
+# Inheritance works as you would expect.
+#
+# class LoadCommand < CStruct
+#   uint32 :cmd
+#   uint32 :cmdsize
+# end
+#
+# # inherits cmd and cmdsize as the first 2 fields
+# class SegmentCommand < LoadCommand
+#   string :segname, 16
+#   uint32 :vmaddr
+#   uint32
+# end
+#
+# Nothing tricky or confusing there. Members of a CStruct class are
+# declared in the class definition. A different definition using a
+# more static approach probably wouldn't be very hard... if
+# performance is critical ... but then why are you using Ruby? ;-)
+#
+#
+# TODO support bit fields
+#
+# Bit fields should be supported by passing the number of bits a field
+# should occupy. Perhaps we could use the size 'pack' for the rest of
+# the field.
+#
+# class RelocationInfo < CStruct
+#   int32  :address
+#   uint32 :symbolnum, 24
+#   pack   :pcrel,      1
+#   pack   :length,     2
+#   pack   :extern,     1
+#   pack   :type,       4
+# end
+
+class CStruct
+
+
+  ###################
+  # Class Constants #
+  ###################
+
+  # Size in bytes.
+  SIZE_MAP = {
+    :int8   => 1,
+    :uint8  => 1,
+    :int16  => 2,
+    :uint16 => 2,
+    :int32  => 4,
+    :uint32 => 4,
+    :string => lambda { |*opts| opts.first }, # first opt is size
+    # the last 3 are to make the language more C-like
+    :int    => 4,
+    :uint   => 4,
+    :char   => 1
+  }
+
+  # 32-bit
+  PACK_MAP = {
+    :int8   => 'c',
+    :uint8  => 'C',
+    :int16  => 's',
+    :uint16 => 'S',
+    :int32  => 'i',
+    :uint32 => 'I',
+    :string => lambda do |str, *opts|
+                        len = opts.first
+                        str.ljust(len, "\0")[0, len]
+                      end,
+    # a few C-like names
+    :int    => 'i',
+    :uint   => 'I',
+    :char   => 'C'
+  }
+
+  # Only needed when unpacking is different from packing, i.e. strings w/ lambdas in PACK_MAP.
+  UNPACK_MAP = {
+    :string => lambda do |str, *opts|
+                        len = opts.first
+                        val = str[0, len-1].sub(/\0*$/, '')
+                        str.slice!((len-1)..-1)
+                        val
+                      end
+  }
+
+  ##########################
+  # Class Instance Methods #
+  ##########################
+
+  # Note: const_get and const_set are used so the constants are bound
+  #       at runtime, to the real class that has subclassed CStruct.
+  #       I figured Ruby would do this but I haven't looked at the
+  #       implementation of constants so it might be tricky.
+  #
+  #       All of this could probably be avoided with Ruby 1.9 and
+  #       private class variables. That is definitely something to
+  #       experiment with.
+
+  class <<self
+
+    def inherited(subclass)
+      subclass.instance_eval do
+
+        # These "constants" are only constant references. Structs can
+        # be modified. After the struct is defined it is still open,
+        # but good practice would be not to change a struct after it
+        # has been defined.
+        #
+        # To support inheritance properly we try to get these
+        # constants from the enclosing scope (and clone them before
+        # modifying them!), and default to empty, er, defaults.
+
+        members = const_get(:Members).clone rescue []
+        member_index = const_get(:MemberIndex).clone rescue {}
+        member_sizes = const_get(:MemberSizes).clone rescue {}
+        member_opts = const_get(:MemberOptions).clone rescue {}
+
+        const_set(:Members, members)
+        const_set(:MemberIndex, member_index)
+        const_set(:MemberSizes, member_sizes)
+        const_set(:MemberOptions, member_opts)
+
+      end
+    end
+
+
+    # Define a method for each size name, and when that method is called it updates
+    # the struct class accordingly.
+    SIZE_MAP.keys.each do |type|
+
+      define_method(type) do |name, *args|
+        name = name.to_sym
+        const_get(:MemberIndex)[name] = const_get(:Members).size
+        const_get(:MemberSizes)[name] = type
+        const_get(:MemberOptions)[name] = args
+        const_get(:Members) << name
+      end
+
+    end
+
+
+    # Return the number of members.
+    def size
+      const_get(:Members).size
+    end
+    alias_method :length, :size
+
+    # Return the number of bytes occupied in memory or on disk.
+    def bytesize
+      const_get(:Members).inject(0) { |size, name| size + sizeof(name) }
+    end
+
+    def sizeof(name)
+      value = SIZE_MAP[const_get(:MemberSizes)[name]]
+      value.respond_to?(:call) ? value.call(*const_get(:MemberOptions)[name]) : value
+    end
+
+    def new_from_bin(bin)
+      new_struct = new
+      new_struct.unserialize(bin)
+    end
+
+  end
+
+
+  ####################
+  # Instance Methods #
+  ####################
+
+  attr_reader :values
+
+  def initialize(*args)
+    @values = args
+  end
+
+  def serialize
+    vals = @values.clone
+    membs = members.clone
+    pack_pattern.map do |patt|
+      name = membs.shift
+      if patt.is_a?(String)
+        [vals.shift].pack(patt)
+      else
+        patt.call(vals.shift, *member_options[name])
+      end
+    end.join
+  end
+
+  def unserialize(bin)
+    bin = bin.clone
+    @values = []
+    membs = members.clone
+    unpack_pattern.each do |patt|
+      name = membs.shift
+      if patt.is_a?(String)
+        @values += bin.unpack(patt)
+        bin.slice!(0, sizeof(name))
+      else
+        @values << patt.call(bin, *member_options[name])
+      end
+    end
+    self
+  end
+
+  def pack_pattern
+    members.map { |name| PACK_MAP[member_sizes[name]] }
+  end
+
+  def unpack_pattern
+    members.map { |name| UNPACK_MAP[member_sizes[name]] || PACK_MAP[member_sizes[name]] }
+  end
+
+  def [](name_or_idx)
+    case name_or_idx
+
+    when Numeric
+      idx = name_or_idx
+      @values[idx]
+
+    when String, Symbol
+      name = name_or_idx.to_sym
+      @values[member_index[name]]
+
+    else
+      raise ArgumentError.new("expected name or index, got #{name_or_idx.inspect}")
+    end
+  end
+
+  def []=(name_or_idx, value)
+    case name_or_idx
+
+    when Numeric
+      idx = name_or_idx
+      @values[idx] = value
+
+    when String, Symbol
+      name = name_or_idx.to_sym
+      @values[member_index[name]] = value
+
+    else
+      raise ArgumentError.new("expected name or index, got #{name_or_idx.inspect}")
+    end
+  end
+
+  def ==(other)
+    puts @values.inspect
+    puts other.values.inspect
+    other.is_a?(self.class) && other.values == @values
+  end
+
+  # Some of these are just to quack like Ruby's built-in Struct. YAGNI, but can't hurt either.
+
+  def each(&block)
+    @values.each(&block)
+  end
+
+  def each_pair(&block)
+    members.zip(@values).each(&block)
+  end
+
+  def size
+    members.size
+  end
+  alias_method :length, :size
+
+  def sizeof(name)
+    self.class.sizeof(name)
+  end
+
+  def bytesize
+    self.class.bytesize
+  end
+
+  alias_method :to_a, :values
+
+
+  # A few convenience methods.
+
+  def members
+    self.class::Members
+  end
+
+  def member_index
+    self.class::MemberIndex
+  end
+
+  def member_sizes
+    self.class::MemberSizes
+  end
+
+  def member_options
+    self.class::MemberOptions
+  end
+
+  # The last expression is returned, so return self instead of junk.
+  self
+end
+
+
+# a small test
+if $0 == __FILE__
+  class MachHeader < CStruct
+    uint :magic
+    int  :cputype
+    int  :cpusubtype
+    string :segname, 16
+  end
+  puts MachHeader::Members.inspect
+  puts MachHeader::MemberIndex.inspect
+  puts MachHeader::MemberSizes.inspect
+  puts "# of MachHeader members: " + MachHeader.size.to_s + ", size in bytes: " + MachHeader.bytesize.to_s
+  mh = MachHeader.new(0xfeedface, 7, 3, "foobar")
+  %w[magic cputype cpusubtype segname].each do |field|
+    puts "#{field}(#{MachHeader.sizeof(field.to_sym)}):      #{mh[field.to_sym].inspect}"
+  end
+  puts mh.pack_pattern.inspect
+  binstr = mh.serialize
+  puts "values: " + mh.values.inspect
+  newmh = MachHeader.new_from_bin(binstr)
+  puts "new values: " + newmh.values.inspect
+  newbinstr = newmh.serialize
+  puts "serialized:   " + binstr.inspect
+  puts "unserialized: " + newbinstr.inspect
+  puts "new == old ? " + (newbinstr == binstr).to_s
+end
--- a/lib/compiler/asm/elf.rb
+++ b/lib/compiler/asm/elf.rb
@ -0,0 +1,10 @@
+require 'compiler/asm/elf/structs'
+
+class Compiler
+  module ASM
+
+    module ELF
+    end
+
+  end
+end
--- a/lib/compiler/asm/elf/elfsymtab.rb
+++ b/lib/compiler/asm/elf/elfsymtab.rb
@ -0,0 +1,7 @@
+module ASM
+
+  class ELFSymbolTable < SymbolTable
+
+  end
+
+end
--- a/lib/compiler/asm/elf/elfwriter.rb
+++ b/lib/compiler/asm/elf/elfwriter.rb
@ -0,0 +1,9 @@
+module ASM
+
+  class ELFWriter < ObjWriter
+
+
+
+  end
+
+end
--- a/lib/compiler/asm/macho.rb
+++ b/lib/compiler/asm/macho.rb
@ -0,0 +1,10 @@
+require 'compiler/asm/macho/structs'
+
+class Compiler
+  module ASM
+
+    module MachO
+    end
+
+  end
+end
--- a/lib/compiler/asm/macho/load_commands.rb
+++ b/lib/compiler/asm/macho/load_commands.rb
@ -0,0 +1,61 @@
+require 'compiler/cstruct'
+
+# The MachO module contains constants and structures related to the
+# Mach Object format (Mach-O). They are relevant to Darwin on OS X.
+#
+# Constants and structures as defined in /usr/include/mach-o/loader.h
+# on Mac OS X Leopard (10.5.7). Also see <mach-o/stab.h>,
+# <mach-o/nlist.h>, and <mach-o/reloc.h>.
+
+class Compiler
+  module MachO
+
+    class LoadCommand < CStruct
+      uint32 :cmd
+      uint32 :cmdsize
+    end
+
+    # Values for the cmd member of LoadCommand CStructs (incomplete!).
+    LC_SEGMENT        = 0x1
+    LC_SYMTAB         = 0x2
+    LC_SYMSEG         = 0x3
+    LC_THREAD         = 0x4
+    LC_UNIXTHREAD	    = 0x5
+
+    class SegmentCommand < LoadCommand
+      string :segname, 16
+      uint32 :vmaddr
+      uint32 :vmsize
+      uint32 :fileoff
+      uint32 :filesize
+      int32  :maxprot
+      int32  :initprot
+      uint32 :nsects
+      uint32 :flags
+    end
+
+
+    # Values for protection fields, maxprot and initprot.
+    VM_PROT_NONE       = 0x00
+    VM_PROT_READ       = 0x01
+    VM_PROT_WRITE      = 0x02
+    VM_PROT_EXECUTE    = 0x04
+    VM_PROT_NO_CHANGE  = 0x08
+    VM_PROT_COPY       = 0x10
+
+
+    class SymbolTableCommand < LoadCommand
+      uint32 :symoff     # Points to an array of Nlist structs.
+      uint32 :nsyms      # Number of entries in said array.
+      uint32 :stroff     # Offset of the string table.
+      uint32 :strsize    # Size of the string table in bytes.
+    end
+
+
+    LOAD_COMMAND_STRUCT_MAP = {
+      LC_SEGMENT => SegmentCommand,
+      LC_SYMTAB  => SymbolTableCommand
+    }
+
+  end
+end
--- a/lib/compiler/asm/macho/mach_header.rb
+++ b/lib/compiler/asm/macho/mach_header.rb
@ -0,0 +1,46 @@
+require 'compiler/cstruct'
+
+# The MachO module contains constants and structures related to the
+# Mach Object format (Mach-O). They are relevant to Darwin on OS X.
+#
+# Constants and structures as defined in /usr/include/mach-o/loader.h
+# on Mac OS X Leopard (10.5.7). Also see <mach-o/stab.h>,
+# <mach-o/nlist.h>, and <mach-o/reloc.h>.
+
+class Compiler
+  module MachO
+
+    # Appears at the beginning of every Mach object file.
+    class MachHeader < CStruct
+      uint32 :magic
+      int32  :cputype
+      int32  :cpusubtype
+      uint32 :filetype
+      uint32 :ncmds
+      uint32 :sizeofcmds
+      uint32 :flags
+    end
+
+    # Values for the magic field.
+    MH_MAGIC = 0xfeedface          # Mach magic number (big-endian).
+    MH_CIGAM = 0xcefaedfe          # Little-endian version.
+
+    # Values for the filetype field.
+    MH_OBJECT     = 0x1
+    MH_EXECUTE    = 0x2
+    MH_FVMLIB     = 0x3
+    MH_CORE       = 0x4
+    MH_PRELOAD    = 0x5
+    MH_DYLIB      = 0x6
+    MH_DYLINKER   = 0x7
+    MH_BUNDLE     = 0x8
+    MH_DYLIB_STUB = 0x9
+    MH_DSYM       = 0xa
+
+    # CPU types and subtypes (only Intel for now).
+    CPU_TYPE_X86 = 7
+    CPU_TYPE_I386 = CPU_TYPE_X86
+    CPU_SUBTYPE_X86_ALL = 3
+
+  end
+end
--- a/lib/compiler/asm/macho/nlist.rb
+++ b/lib/compiler/asm/macho/nlist.rb
@ -0,0 +1,50 @@
+require 'compiler/cstruct'
+
+# The MachO module contains constants and structures related to the
+# Mach Object format (Mach-O). They are relevant to Darwin on OS X.
+#
+# Constants and structures as defined in /usr/include/mach-o/loader.h
+# on Mac OS X Leopard (10.5.7). Also see <mach-o/stab.h>,
+# <mach-o/nlist.h>, and <mach-o/reloc.h>.
+
+class Compiler
+  module MachO
+
+    ########################
+    # Symbol table support #
+    ########################
+
+    # Nlist is used to describe symbols.
+    class Nlist < CStruct
+      uint32 :n_strx     # Index into string table. Index of zero is the empty string.
+      uint8  :n_type     # Type flag (see below).
+      uint8  :n_sect     # Section number (from 1) or NO_SECT.
+      uint16 :n_desc     # TODO See <mach-o/stab.h>.
+      uint32 :n_value    # The symbol's value (or stab offset).
+    end
+
+    # Type flag (see <mach-o/nlist.h> for more details)
+    # ---------
+    #
+    # This field consists of four bitfields:
+    #
+    #   uchar N_STAB : 3
+    #   uchar N_PEXT : 1
+    #   uchar N_TYPE : 3
+    #   uchar N_EXT  : 1
+    #
+    N_STAB = 0xe0   # if any bits set => symbolic debugging info
+    N_PEXT = 0x10   # private external symbol bit
+    N_TYPE = 0x0e   # mask for the type bits
+    N_EXT  = 0x01   # external symbol bit, set for external symbols (e.g. globals)
+
+    # Values for N_TYPE. (incomplete!)
+    N_UNDF = 0x0    # undefined, n_sect == NO_SECT
+    N_ABS  = 0x2    # absolute, n_sect == NO_SECT
+    N_SECT = 0xe    # defined in section number n_sect
+
+    NO_SECT = 0
+    MAX_SECT = 255
+
+  end
+end
--- a/lib/compiler/asm/macho/object_file.rb
+++ b/lib/compiler/asm/macho/object_file.rb
@ -0,0 +1,373 @@
+require 'asm/macho'
+
+class Compiler
+  module MachO
+
+    class ObjectFile
+
+      attr_accessor :header, :load_commands, :sections, :data
+      attr_accessor :current_segment
+
+      def initialize(filetype = MH_OBJECT)
+        @header = MachHeader.new(MH_MAGIC, CPU_TYPE_X86, CPU_SUBTYPE_X86_ALL, filetype, 0, 0, 0)
+        @load_commands = []              # All defined segments.
+        @sections = {}                   # Map of segment names to lists of sections.
+        @section_disk_size = Hash.new(0) # Sections store their VM size so we need their sizes on disk.
+        @section_offset = 0              # Offset of the next section's data, in bytes.
+        @data = []                       # Blobs of data that appear at the end of the file.
+                                         #  (text, data, relocation info, symtab, ...)
+        @current_segment = nil           # An alias for the last defined segment.
+        @text_segname = nil              # Name of __TEXT segement
+        @text_sect_index = nil           # Index of __text section
+        @text_data_index = nil           # Index into @data of __text section data
+        @reloc_info = nil                # Copy of relocation info array
+      end
+
+
+      # Define a LoadCommand in this file. The header's ncmds and sizeofcmds
+      # fields are updated automatically to keep things in sync. If a block is
+      # given it is passed the new LoadCommand struct after all other
+      # initialization has been done.
+      #
+      # Other methods that create any type of load command should use this
+      # method to do so. Right now the only types supported are LC_SEGMENT
+      # and LC_SYMTAB. Modify asm/macho.rb to add structs for other types, and
+      # add them to LOAD_COMMAND_STRUCT_MAP.
+
+      def load_command(cmdtype)
+        struct = LOAD_COMMAND_STRUCT_MAP[cmdtype]
+        unless struct
+          raise "unsupported load command type: #{cmdtype.inspect}," +
+                " supported types: #{LOAD_COMMAND_STRUCT_MAP.keys.sort.inspect}"
+        end
+
+        # Fill in all the unknown fields with 0, this is nonsense for
+        # string fields but that doesn't really matter.
+        dummy_vals = [0] * (struct::Members.size - 2)
+
+                           #   cmd        cmdsize          ...
+        command = struct.new(cmdtype, struct.bytesize, *dummy_vals)
+
+        @load_commands << command
+
+        @header[:ncmds] += 1
+        @header[:sizeofcmds] += command.bytesize
+
+        yield(command) if block_given?
+
+        return command
+      end
+
+
+      # Define a segment in this file. If a block is given it is passed
+      # the new segment. You can chain calls to segment, it returns self.
+      #
+      # Mach object files should only contain one anonymous segment. This
+      # is not checked but should be kept in mind when crafting files.
+      def segment(name, &block)
+        @current_segment = load_command(LC_SEGMENT) do |seg|
+          seg[:segname] = name
+          block.call(seg) if block
+        end
+        return self
+      end
+
+
+      # Define a section under the given segment. nsects and cmdsize are
+      # updated automatically. segname can't be derived from the segment
+      # that this section is defined under, as they can differ.
+      #
+      # Mach object files have the __text, __data, and other common
+      # sections all defined under one anonymous segment, but their segment
+      # names reflect their final positions after linking. The linker plonks
+      # them in the segment that they name.
+      def section(name, segname, data = '', vmsize=data.size,
+                  segment = @current_segment, type = S_REGULAR)
+
+        # Create the new section.
+        section = Section.new(name, segname, @section_offset, vmsize, 0, 0, 0, 0, 0, 0, type)
+
+        # Add this section to the map of segment names to sections.
+        (@sections[segment[:segname]] ||= []) << section
+        @section_disk_size[name] = data.size
+        @section_offset += data.size
+        @data << data if data.size > 0
+
+        # Update the header.
+        @header[:sizeofcmds] += section.bytesize
+
+        # Update the segment.
+        segment[:nsects] += 1
+        segment[:cmdsize] += section.bytesize
+
+        yield(section) if block_given?
+
+        return section
+      end
+
+
+
+      # Define a standard text section under the current segment (if present).
+      #
+      # If there is no current segment then we act according to the file's type
+      # (specified in the header). Segments are created if they do not exist.
+      #
+      # When it is MH_OBJECT the text section is defined under a single,
+      # nameless segment, but the section's segment name is set to the name
+      # given here.
+      #
+      # For MH_EXECUTE files the text section goes under the segment with the
+      # name given (__TEXT).
+
+      def text(data, sectname = '__text', segname='__TEXT')
+        real_segname = nil
+        unless @current_segment
+          real_segname = segname_based_on_filetype(segname)
+          segment(real_segname) do |seg|
+            seg[:maxprot] = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE
+            seg[:initprot] = VM_PROT_READ | VM_PROT_EXECUTE
+          end
+        end
+
+        section(sectname, segname, data) do |sect|
+          # reloff and nreloc are calculated later (in calculate_offsets)
+          sect[:flags] = 0x400 # S_ATTR_SOME_INSTRUCTIONS
+        end
+
+        # Remember where section and data are so we can update them later.
+        @text_segname = real_segname || segname
+        @text_sect_index = @sections[@text_segname].length-1
+        @text_data_index = @data.length-1
+
+        return self
+      end
+
+      def update_text(data)
+        raise 'no __text segment defined yet' unless @text_data_index
+        @data[@text_data_index] = data
+      end
+
+      # Basis for #data, #const, and #bss methods.
+      def segment_based_on_filetype(segname, options = {})
+        unless @current_segment
+          permissions = VM_PROT_READ
+          permisions |= VM_PROT_WRITE if options.delete(:writable)
+          segment(segname_based_on_filetype(segname)) do |seg|
+            seg[:initprot] = seg[:maxprot] = permissions
+          end
+        end
+        yield if block_given?
+        return self
+      end
+
+      # Define a standard data section under the current segment (if present).
+      # This behaves similarly to the text method.
+      #
+      def data(data, sectname = '__data', segname='__DATA')
+        segment_based_on_filetype(segname, :writable => true) do
+          section(sectname, segname, data)
+        end
+      end
+
+      # Define a standard const section under the current segment (if present).
+      # This behaves similarly to the data method.
+      #
+      def const(data, sectname = '__const', segname='__DATA')
+        segment_based_on_filetype(segname) do
+          section(sectname, segname, data)
+        end
+      end
+
+      # Define a standard BSS section under the current segment (if present).
+      # This behaves similarly to the data method but accepts a VM size instead
+      # of a blob, and no data is written to file since this section is for
+      # uninitialized data.
+      #
+      def bss(vmsize, sectname = '__bss', segname='__DATA')
+        segment_based_on_filetype(segname, :writable => true) do
+          section(sectname, segname, '', vmsize)
+        end
+      end
+
+      # Define a relocation table. Usually between segments and the
+      # symbol table.
+      #
+      # Accepts an array of relocation info structs.
+      def reloc(reloc_info)
+        @data << if reloc_info.respond_to?(:join)
+                   reloc_info.map {|r| r.serialize}.join
+                 else
+                   reloc_info
+                 end
+        @reloc_info = reloc_info.map {|x| x.clone}
+        return self
+      end
+
+      # Define a symbol table. This should usually be placed at the end of the
+      # file.
+      #
+      # This function is overloaded to accept either an array of Nlist structs
+      # packed into a byte string (i.e. a C array) and a string table, or a
+      # single parameter: any type of SymbolTable.
+
+      def symtab(nlist_ary_or_symtab, stab = nil)
+        if stab.nil?
+          symtab = nlist_ary_or_symtab
+          stab = symtab.stab
+          nlist_ary = symtab.nlist_ary
+        else
+          nlist_ary = nlist_ary_or_symtab
+        end
+
+        load_command(LC_SYMTAB) do |st|
+          st[:nsyms] = nlist_ary.size
+          st[:strsize] = stab.size
+          # symoff and stroff are filled in when offsets are recalculated.
+        end
+
+  #       puts ">>> Defining symbol table:"
+  #       puts ">>> #{nlist_ary.size} symbols"
+  #       puts ">>> stab = #{stab.inspect}"
+  #       puts ">>> nlist_ary = #{nlist_ary.inspect}"
+  #       puts ">>> (serialized) = #{nlist_ary.map{|n|n.serialize}.join.inspect}"
+
+        @data << nlist_ary.map {|n| n.serialize}.join
+        @data << stab
+        return self
+      end
+
+
+      # Serialize the entire MachO file into a byte string. This is simple
+      # thanks to CStruct#serialize.
+
+      def serialize
+        # TODO sanity checks, e.g. assert(@header[:ncmds] == @load_command.size)
+        # ... perhaps an option to recalculate such data as well.
+
+        # Now that we have all the pieces of the file defined we can calculate
+        # the file offsets of segments and sections.
+        calculate_offsets
+
+        ###################################
+        # Mach-O file Part 1: Mach Header #
+        ###################################
+        @header.serialize +
+
+        #####################################
+        # Mach-O file Part 2: Load Commands #
+        #####################################
+        # dump each load command (which include the section headers under them)
+        @load_commands.map do |cmd|
+          sects = @sections[cmd[:segname]] rescue []
+          sects.inject(cmd.serialize) do |data, sect|
+            data + sect.serialize
+          end
+        end.join +
+
+        ###################################
+        # Mach-O file Part 3: Binary data #
+        ###################################
+        @data.join
+      end
+
+
+      # Update the file offsets in segments and sections.
+
+      def calculate_offsets
+
+        # Maintain the offset into the the file on disk. This is used
+        # to update the various structures.
+        offset = @header.bytesize
+
+        # First pass over load commands. Most sizes are filled in here.
+        @load_commands.each do |cmd|
+          case cmd[:cmd]
+
+          when LC_SEGMENT
+            seg = cmd
+            sections = @sections[seg[:segname]]
+            section_size = sections.size * Section.bytesize
+            section_vm_size = sections.inject(0) { |total, sect| total + sect[:size] }
+            section_disk_size = sections.inject(0) do |total, sect|
+              total + @section_disk_size[sect[:sectname]]
+            end
+
+            ### TODO this should be redundant. try commenting it out one day.
+            seg[:nsects] = sections.size
+            seg[:cmdsize] = seg.bytesize + section_size
+            ###
+
+            seg[:vmsize] = section_vm_size
+            seg[:filesize] = section_disk_size
+
+          when LC_SYMTAB
+            # nop
+
+          else
+            raise "unsupported load command: #{cmd.inspect}"
+          end
+
+          offset += cmd[:cmdsize]
+        end
+
+
+        # offset now points to the end of the Mach-O headers, or the beginning
+        # of the binary blobs of section data at the end.
+
+        # Second pass over load commands. Fill in file offsets.
+        @load_commands.each do |cmd|
+          case cmd[:cmd]
+
+          when LC_SEGMENT
+            seg = cmd
+            sections = @sections[seg[:segname]]
+            seg[:fileoff] = offset
+            sections.each do |sect|
+              sect[:offset] = offset
+              offset += @section_disk_size[sect[:sectname]]
+            end
+
+          when LC_SYMTAB
+            if @reloc_info
+              # update text section with relocation info
+              __text = @sections[@text_segname][@text_sect_index]
+              __text[:reloff] = offset
+              __text[:nreloc] = @reloc_info.length
+              offset += @reloc_info.first.bytesize * @reloc_info.length
+            end
+            st = cmd
+            st[:symoff] = offset
+            offset += st[:nsyms] * Nlist.bytesize
+            st[:stroff] = offset
+            offset += st[:strsize]
+
+
+          # No else clause is necessary, the first iteration should have caught them.
+
+          end
+
+        end # @load_commands.each
+
+      end # def calculate_offsets
+
+
+      #######
+      private
+      #######
+
+      def segname_based_on_filetype(segname)
+        case @header[:filetype]
+        when MH_OBJECT
+          ''
+        when MH_EXECUTE
+          segname
+        else
+          raise "unsupported MachO file type: #{@header.inspect}"
+        end
+      end
+
+
+    end
+
+  end
+end
--- a/lib/compiler/asm/macho/relocation_info.rb
+++ b/lib/compiler/asm/macho/relocation_info.rb
@ -0,0 +1,35 @@
+require 'compiler/cstruct'
+
+# The MachO module contains constants and structures related to the
+# Mach Object format (Mach-O). They are relevant to Darwin on OS X.
+#
+# Constants and structures as defined in /usr/include/mach-o/loader.h
+# on Mac OS X Leopard (10.5.7). Also see <mach-o/stab.h>,
+# <mach-o/nlist.h>, and <mach-o/reloc.h>.
+
+class Compiler
+  module MachO
+
+    class RelocationInfo < CStruct
+      int32  :r_address   # offset in the section to what is being relocated
+      uint32 :r_info
+    end
+
+    # NOTE: r_info is a packed bit field with the following members:
+    #
+    # (CStruct should eventually support bitfields, but doesn't right now.)
+    #
+    #     r_symbolnum : 24 -- symbol index if r_extern == 1 or section ordinal if r_extern == 0
+    #     r_pcrel     :  1 -- was relocated pc relative already
+    #     r_length    :  2 -- 0=byte, 1=word, 2=long, 3=quad
+    #     r_extern    :  1 -- 1 for exported symbols, 0 othewise
+    #     r_type      :  4 -- if not 0, machine specific relocation type (always 0)
+
+    R_ABS = 0         # Absolute relocation type
+                      # (r_symbolnum == R_ABS for absolute symbols that don't need reloc)
+
+    # Relocation types (r_type)
+    GENERIC_RELOC_VANILLA = 0
+
+  end
+end
--- a/lib/compiler/asm/macho/section.rb
+++ b/lib/compiler/asm/macho/section.rb
@ -0,0 +1,34 @@
+require 'compiler/cstruct'
+
+# The MachO module contains constants and structures related to the
+# Mach Object format (Mach-O). They are relevant to Darwin on OS X.
+#
+# Constants and structures as defined in /usr/include/mach-o/loader.h
+# on Mac OS X Leopard (10.5.7). Also see <mach-o/stab.h>,
+# <mach-o/nlist.h>, and <mach-o/reloc.h>.
+
+class Compiler
+  module MachO
+
+    class Section < CStruct
+      string :sectname, 16
+      string :segname, 16
+      uint32 :addr
+      uint32 :size
+      uint32 :offset
+      uint32 :align
+      uint32 :reloff
+      uint32 :nreloc
+      uint32 :flags
+      uint32 :reserved1
+      uint32 :reserved2
+    end
+
+    # Values for the type bitfield (mask 0x000000ff) of the flags field.
+    # (incomplete!)
+    S_REGULAR  = 0x0
+    S_ZEROFILL = 0x1
+    S_CSTRING_LITERALS = 0x2
+
+  end
+end
--- a/lib/compiler/asm/macho/structs.rb
+++ b/lib/compiler/asm/macho/structs.rb
@ -0,0 +1,53 @@
+require 'compiler/macho/mach_header'
+require 'compiler/macho/load_commands'
+require 'compiler/macho/section'
+require 'compiler/macho/relocation_info'
+
+# The MachO module contains constants and structures related to the
+# Mach Object format (Mach-O). They are relevant to Darwin on OS X.
+#
+# Constants and structures as defined in /usr/include/mach-o/loader.h
+# on Mac OS X Leopard (10.5.7). Also see <mach-o/stab.h>,
+# <mach-o/nlist.h>, and <mach-o/reloc.h>.
+
+class Compiler
+  module MachO
+
+    ########################
+    # Symbol table support #
+    ########################
+
+    # Nlist is used to describe symbols.
+    class Nlist < CStruct
+      uint32 :n_strx     # Index into string table. Index of zero is the empty string.
+      uint8  :n_type     # Type flag (see below).
+      uint8  :n_sect     # Section number (from 1) or NO_SECT.
+      uint16 :n_desc     # TODO See <mach-o/stab.h>.
+      uint32 :n_value    # The symbol's value (or stab offset).
+    end
+
+    # Type flag (see <mach-o/nlist.h> for more details)
+    # ---------
+    #
+    # This field consists of four bitfields:
+    #
+    #   uchar N_STAB : 3
+    #   uchar N_PEXT : 1
+    #   uchar N_TYPE : 3
+    #   uchar N_EXT  : 1
+    #
+    N_STAB = 0xe0   # if any bits set => symbolic debugging info
+    N_PEXT = 0x10   # private external symbol bit
+    N_TYPE = 0x0e   # mask for the type bits
+    N_EXT  = 0x01   # external symbol bit, set for external symbols (e.g. globals)
+
+    # Values for N_TYPE. (incomplete!)
+    N_UNDF = 0x0    # undefined, n_sect == NO_SECT
+    N_ABS  = 0x2    # absolute, n_sect == NO_SECT
+    N_SECT = 0xe    # defined in section number n_sect
+
+    NO_SECT = 0
+    MAX_SECT = 255
+
+  end
+end
--- a/lib/compiler/asm/macho/symbol.rb
+++ b/lib/compiler/asm/macho/symbol.rb
@ -0,0 +1,31 @@
+require 'compiler/macho'
+
+class Compiler
+  module MachO
+
+    class Symbol
+
+      attr_accessor :name, :type, :segnum, :desc, :value
+
+      def initialize(name, type, segnum, desc, value)
+        @name = name
+        @type = type
+        @segnum = segnum
+        @desc = desc
+        @value = value
+      end
+
+
+      def to_nlist(strx)
+        Nlist.new(strx, @type, @segnum, @desc, @value)
+      end
+
+
+      def to_s
+        @name
+      end
+
+    end
+
+  end
+end
--- a/lib/compiler/asm/macho/symbol_table.rb
+++ b/lib/compiler/asm/macho/symbol_table.rb
@ -0,0 +1,88 @@
+require 'compiler/macho/structs'
+require 'compiler/macho/symbol'
+require 'compiler/asm/symbol_table'
+
+class Compiler
+  module MachO
+
+    class SymbolTable < Assembler::SymbolTable
+
+      def make_symbols(vars, base_addr, type, segnum)
+        # Note: Sorting a Ruby hash gives an alist, e.g. [[<key>, <value>], ...]
+        #       We can use map on it as if it were a hash so it works nicely.
+        vars.sort { |a,b| a[1] <=> b[1] }.
+             map do |name, offset|
+               Symbol.new(name, type, segnum, 0, base_addr + offset)
+             end
+      end
+
+      def all_symbols
+        # TODO FIXME:
+        # - the last var exported ends up after main somewhere... WTF?!
+        # - All labels are exported. This should be changed and only functions exported!
+
+        section = 1
+
+        # Functions (section #1, __text)
+        symbols = make_symbols(@labels, text_offset, N_SECT | N_EXT, section)
+        section += 1
+
+        # Constants (section #2, __const)
+        if @consts.size > 0
+          symbols += make_symbols(@consts, const_offset, N_SECT, section)
+          section += 1
+        end
+
+        # Variables (section #3, __bss)
+        if @vars.size > 0
+          symbols += make_symbols(@vars, bss_offset, N_SECT, section)
+        end
+
+        return symbols
+      end
+
+      # this is fairly stupid but works
+      def bss_section
+        @consts.size > 0 ? 3 : 2
+      end
+
+      def nlist_ary
+        symbols = {}
+        strx = 1
+        ary = []
+        all_symbols.each do |sym|
+          key = sym.name.to_sym
+          unless symbols.has_key?(key)
+            symbols[key] = strx
+            strx += sym.name.length + 1 # +1 for the null byte
+          end
+          ary << sym.to_nlist(symbols[key])
+        end
+        return ary
+      end
+
+      def stab
+        # The empty strings result in a string that begins and ends with a null byte
+        ['', all_symbols, ''].flatten.map { |sym| sym.to_s }.join("\0")
+      end
+
+      def reloc(r_address, r_symbolnum = 0, r_length = 2, r_extern = 0, r_pcrel = 0, r_type = 0)
+        r_info = (r_type << 28) | (r_extern << 27) | (r_length << 25) |
+          (r_pcrel << 24) | r_symbolnum
+        @reloc_info << RelocationInfo.new(r_address, r_info)
+      end
+
+      def reloc_info
+        n = bss_section
+        @reloc_info.each {|r| r[:r_info] |= n}
+      end
+
+      def calculate_offsets(text_size)
+        @const_offset = @text_offset + text_size
+        @bss_offset = @const_offset + @const_size
+      end
+
+    end
+
+  end
+end
--- a/lib/compiler/asm/object_file.rb
+++ b/lib/compiler/asm/object_file.rb
@ -0,0 +1,28 @@
+class Compiler
+  module ASM
+
+    class UnimplementedMethodError < RuntimeError; end
+
+
+    # Abstract base class.
+    class ObjWriter
+
+      def write!(filename)
+        File.open(filename, 'wb') do |file|
+          file.print(serialize)
+        end
+      end
+
+      def fail(name)
+        raise UnimplementedMethodError.new(name)
+      end
+
+      # These methods must be defined for most uses of the library.
+      %w[header segment section text data bss symtab serialize].each do |name|
+        define_method(name) { fail(name) }
+      end
+
+    end
+
+  end
+end
--- a/lib/compiler/asm/register_proxy.rb
+++ b/lib/compiler/asm/register_proxy.rb
@ -0,0 +1,70 @@
+class Compiler
+  module ASM
+
+    # Acts like a register and can be used as the base or index in an
+    # effective address.
+    #
+    # e.g. [EAX] or [ESI+EBX] or [EAX + 0xff] or [EAX + EDX * 2]
+    class RegisterProxy
+
+      attr_reader :name, :size, :regnum
+      attr_reader :base, :index, :scale
+
+
+      def initialize(name, size, regnum)
+        @name = name # attrs are read-only so sharing is ok
+        @size = size
+        @regnum = regnum
+        @base = self
+      end
+
+
+      def +(index)
+        raise "index already specified" if @index
+        new_reg = self.clone
+        new_reg.instance_variable_set('@index', index)
+        new_reg
+      end
+
+
+      def *(scale)
+        raise "index must come first" unless @index
+        raise "scale already specified" if scale
+        raise "unsupported scale: #{scale}" unless scale.to_s.match(/^[1248]$/)
+        @scale = scale
+        self
+      end
+
+
+      def scale?
+        @scale
+      end
+
+
+      def index?
+        @index
+      end
+
+
+      def register?
+        @scale.nil? && @index.nil?
+      end
+
+
+
+      def to_s
+        [ @name.to_s,
+          @index && "+#{@index}",
+          @scale && "*#{@scale}"
+        ].compact.join
+      end
+
+
+      def inspect
+        to_s
+      end
+
+    end
+
+  end
+end
--- a/lib/compiler/asm/symbol_table.rb
+++ b/lib/compiler/asm/symbol_table.rb
@ -0,0 +1,99 @@
+class Compiler
+  module ASM
+
+
+    # Abstract symbol table.
+    #
+    # Basically a big map of variable, constant, and label names to
+    # offsets within their respective sections. Final addresses are
+    # calculated from these offsets on the 2nd pass when we know where
+    # things will actually live in memory.
+
+    class SymbolTable
+
+      attr_accessor :text_offset, :bss_offset, :const_offset
+      attr_reader :const_data, :const_size, :bss_size, :reloc_info
+
+      def initialize
+        @vars = {}                   # Map of variable names to offsets. (bss vars)
+        @consts = {}                 # Map of constant names to offsets.
+        @funcs = {}                  # map of function names to offsets.
+
+        # Initial data to load into memory (data for __DATA segment).
+        @const_data = ''
+
+        @const_size = 0              # Size of const section.
+        @bss_size = 0                # Size of bss section.
+
+        # Map names to locations.
+        @labels = Hash.new {|h, key| raise "undefined label: #{key}"}
+        @num_labels = 0              # Used to generate unique labels.
+        @num_labels_with_suffix = Hash.new(0)
+
+        # Relocation info. Subclasses should define a reloc method.
+        @reloc_info = []
+
+        @text_offset = 0
+        @bss_offset = 0
+        @const_offset = 0
+      end
+
+      # Generate a unique label.
+      def unique_label(suffix = nil)
+        @num_labels += 1
+        if suffix
+          @num_labels_with_suffix[suffix] += 1
+          suffix = "_#{suffix}_#{@num_labels_with_suffix[suffix]}"
+        end
+        name = "L#{sprintf "%06d", @num_labels}#{suffix}"
+        return name
+      end
+
+      def define_label(name, offset)
+        @labels[name] = offset
+        return name
+      end
+
+
+      def lookup_label(name)
+        @labels[name]
+      end
+
+
+      def define_var(name, bytes)
+        @vars[name] = @bss_size
+        @bss_size += bytes
+      end
+
+
+      def define_const(name, value, bytes)
+        @consts[name] = @const_size
+        @const_size += bytes
+        @const_data << [value].pack('i')
+      end
+
+      def define_func(name, offset)
+        @funcs[name] = offset
+      end
+
+
+      def var(name)
+        @vars[name]
+      end
+
+      def var?(name)
+        @vars.has_key?(name)
+      end
+
+      def const(name)
+        @consts[name]
+      end
+
+      def const?(name)
+        @consts.has_key?(name)
+      end
+
+    end
+
+  end
+end
--- a/lib/compiler/asm/text_assembler.rb
+++ b/lib/compiler/asm/text_assembler.rb
@ -0,0 +1,73 @@
+# sjs
+# may 2009
+
+require 'compiler/asm/assembler'
+
+class Compiler
+  module ASM
+
+    class TextAssembler < Assembler
+
+      def initialize(delegate)
+        super(delegate)
+
+        @vars = {}                   # Symbol table, maps names to locations in BSS.
+        @data = ''
+        @bss = ''
+        @code = ''
+
+        unless File.readable?(template_filename)
+          raise "unsupported platform/arch: #{delegate.platform}/#{arch.name}"
+        end
+      end
+
+      def template_filename
+        @template_filename ||= File.join(File.dirname(__FILE__), arch.name, "template.#{delegate.platform}.asm")
+      end
+
+      # Define a constant
+      def const(name, value)
+      end
+
+      # Define a variable with the given name and size in bytes.
+      def define_var(name, bytes = arch.bytes)
+        unless var?(name)
+          define_var_impl(name, bytes)
+        else
+          STDERR.puts "[warning] attempted to redefine #{name}"
+        end
+      end
+
+      def define_var_impl(name, bytes = arch.bytes)
+      end
+
+      def var(name)
+        @vars[name]
+      end
+      alias_method :var?, :var
+
+
+      # Emit a line of code wrapped between a tab and a newline.
+      def emit(code, options = {})
+        tab = options.has_key?(:tab) ? options[:tab] : "\t"
+        @code << "#{tab}#{code}\n"
+      end
+
+      def label(name = nil)
+        # FIXME
+        name = super
+        @labels[name] = name
+        return name
+      end
+
+      def output
+      end
+
+      def emit_label(name = label)
+        emit("#{name}:", tab: nil)
+      end
+
+    end
+
+  end
+end
--- a/lib/compiler/asm/variable_proxy.rb
+++ b/lib/compiler/asm/variable_proxy.rb
@ -0,0 +1,43 @@
+class Compiler
+  module ASM
+
+    # Wrap a variable's address so that we can perform arithmetic on it
+    # before resolving it when we know where things will go in memory.
+    # All we do is catch arithmetic ops and then provide a means to
+    # resolve a final addres by replaying them later.
+    #
+    # e.g. [symtab.var('i')] or [symtab.var('i') * 2]
+    class VariableProxy
+
+      attr_reader :name
+      attr_accessor :ops
+
+      def initialize(name)
+        @name = name
+        @ops = []
+      end
+
+      %w[+ * / - % & |].each do |op|
+        define_method(op) do |*args|
+          new_proxy = self.class.new(@name)
+          new_proxy.ops << [op, *args]
+          return new_proxy
+        end
+      end
+
+      # XXX should this perhaps use the offset instead?
+      def resolve(base_addr)
+        @ops.inject(base_addr) do |addr, op|
+          addr.send(*op)
+        end
+      end
+
+      # Overriden by ConstantProxy
+      def const?
+        false
+      end
+
+    end
+
+  end
+end
--- a/lib/compiler/asm/x86/arch.rb
+++ b/lib/compiler/asm/x86/arch.rb
@ -0,0 +1,42 @@
+require 'compiler/asm/arch'
+
+class Compiler
+  module ASM
+    module X86
+
+      module Arch
+
+        BINARY_PREAMBLE = {
+          'linux' => [],
+
+          'darwin' => [ 0x55,                  # push ebp
+                        0x89, 0xe5,            # mov ebp, esp
+                        0x81, 0xec, 8, 0, 0, 0 # sub esp, 8
+                      ]
+        }
+
+        BINARY_POSTAMBLE = {
+          'linux' => [ 0x89, 0xc3,         # mov ebx, eax (exit code)
+                       0xb8, 1, 0, 0, 0,   # mov eax, 1
+                       0xcd, 0x80          # int 0x80
+                     ],
+
+          'darwin' => [ 0xc9,       # leave
+                        0xc3        # ret
+                      ]
+        }
+
+        def self.instance
+          @instance ||= ASM::Arch.new({
+            'bits' => 32,
+            'word_bits' => 16,
+            'preamble' => BINARY_PREAMBLE,
+            'postamble' => BINARY_POSTAMBLE
+          })
+        end
+
+      end
+
+    end
+  end
+end
--- a/lib/compiler/asm/x86/binary_assembler.rb
+++ b/lib/compiler/asm/x86/binary_assembler.rb
@ -0,0 +1,866 @@
+# A very basic x86 assembler library for Ruby. Generally the
+# instructions implemented are the minimum needed by the compiler this
+# is written for. x86 is just too big.
+#
+# sjs
+# may 2009
+#
+# Refer to the Intel[1] or AMD documentationon on x86 for explanations
+# of Mod-R/M encoding, the Scale-Index-Base (SIB) byte, opcode groups.
+#
+# The start and exit shell codes were obtained by disassembling
+# minimal binaries on the respective platforms.
+
+require 'json'
+require 'compiler/asm/binary_assembler'
+require 'compiler/asm/x86/arch'
+require 'compiler/asm/x86/registers'
+
+class Compiler
+  module ASM
+    module X86
+
+      class BinaryAssembler < ASM::BinaryAssembler
+
+        include Registers
+
+        DEBUG_OUTPUT = false
+
+        SIGNED_BYTE = -128..127
+
+        # This is used for encoding instructions. Just as the equivalent
+        # assembly would contain "BITS 32", binary is generated for 32-bit
+        # protected mode.
+        DEFAULT_OPERAND_SIZE = :dword
+
+        SIZE_MAP = {
+          byte: 8,
+          word: 16,
+          dword: 32
+        }
+
+        def emit_entry_point
+          # Always include the _main entry point in our symbol table. It begins at the
+          # beginning of the __TEXT segment, 0x0.
+          define_label('_main')
+        end
+
+        # register for return values
+        def return_reg
+          EAX
+        end
+
+
+        ### Virtual ISA used by parser.
+
+        def load(n)
+          mov(return_reg, n)
+        end
+
+        def load_var(name)
+          mov(return_reg, [var(name)])
+        end
+
+        def store_var(name, reg)
+          mov([name], reg)
+        end
+
+
+        # stack_* methods expect op1 on the stack
+
+        def stack_add(reg)
+          pop(EBX)
+          add(reg, EBX)
+        end
+
+        def stack_sub(reg)
+          pop(EBX)
+          sub(reg, EBX)
+        end
+
+        def stack_mul_signed(reg)
+          pop(EBX)
+          imul(EBX)
+        end
+
+        def stack_div(reg)
+          pop(EBX)                # Get op1
+          xchg(reg, EBX)          # Swap the divisor and dividend into
+                                  # the correct places.
+
+          # idiv uses edx:eax as the dividend so we need to ensure that edx
+          # is correctly sign-extended w.r.t. eax.
+          cdq                     # Sign-extend eax into edx (Convert Double to Quad).
+
+          idiv(EBX)               # Divide a (eax) by b (ebx).
+        end
+
+        def stack_or(reg)
+          pop(EBX)
+          self.or(reg)
+        end
+
+        def stack_xor(reg)
+          pop(EBX)
+          xor(reg)
+        end
+
+        def stack_and(reg)
+          pop(EBX)
+          self.and(reg)
+        end
+
+        def compare(reg, n)
+          cmp(reg, n)
+        end
+
+        def mov_reg_imm(reg, imm)
+          mov(reg, imm)
+        end
+
+
+        ############################
+        ### Instruction Encoding ###
+        ############################
+
+        def emit_dword(num)
+          num_to_quad(num).each { |byte| emit_byte(byte) }
+        end
+
+        def emit_modrm(addr, reg = 0)
+          mod = 0
+          rm = 0
+          disp8 = nil
+          disp32 = nil
+          sib = nil
+          var = nil # variable proxy
+
+          # effective address
+          if addr.is_a?(Array)
+            eff_addr = addr[1] || addr[0] # works with or without size prefix
+            raise "invalid effective address: #{addr.inspect}" unless eff_addr
+            case eff_addr
+
+            when RegisterProxy
+
+              # Simple register addressing, e.g. [ESI].
+              #
+              # mod == 00
+              if eff_addr.register?
+                mod = 0
+
+                # [ESP] and [EBP] can't be encoded directly. The
+                # workaround is to use SIB to emit the code for [ESP+0]
+                # and [EBP+0] instead.
+                #
+                # To emit [ESP+0] we use SIB with scale=1 index=0 base=ESP.
+                if eff_addr == ESP
+                  rm = 4 # SIB
+                  sib = make_sib(1, 0, eff_addr)
+
+                # For [EBP+0] we can encode [EBP]+disp8 directly.
+                elsif eff_addr == EBP
+                  mod = 1
+                  rm = eff_addr.regnum
+                  disp8 = 0
+                else
+                  rm = eff_addr.regnum
+                end
+
+              # Bare displacements, e.g. [32] or [0x1234abcd]
+              elsif eff_addr.index? && eff_addr.index.is_a?(Numeric)
+
+                # disp8, mod == 01
+                if SIGNED_BYTE === eff_addr.index
+                  mod = 1
+                  disp8 = eff_addr.index
+
+                # disp32, mod == 10
+                elsif SignedRange === eff_addr.index
+                  mod = 2
+                  disp32 = eff_addr.index
+
+                else
+                  raise "address must fit in 32 bits, this doesn't: #{eff_addr.index}"
+                end
+
+              # SIB
+              elsif eff_addr.index?
+                # scale-index-base, mod == 00 and rm == 100
+                rm = 4
+                sib = make_sib(eff_addr.scale || 1, eff_addr.index, eff_addr.base)
+
+              else
+                raise "unsupported effective address: #{addr.inspect}"
+              end
+
+            # disp32, mod == 00
+            when Numeric
+              mod = 0
+              rm = 5  # 101
+              disp32 = eff_addr
+
+            when VariableProxy
+              mod = 0
+              rm = 5
+              var = eff_addr
+
+            else
+              raise "unsupported effective address: #{addr.inspect}"
+            end
+
+          # register content, mod == 11
+          elsif addr.register?
+            mod = 3
+            rm = addr.regnum
+
+          # XXX TODO elsif addr.respond_to?(:name)
+          #          (VariableProxy) => [:(var|const), addr.name]
+          #
+          # i.e. a pointer to that var
+
+          else
+            raise "unsupported effective address: #{addr.inspect}"
+          end
+
+          emit_byte((mod << 6) | (reg << 3) | rm)
+          emit_byte(sib) if sib
+
+          emit_byte(disp8) if disp8
+
+          emit_dword(disp32) if disp32
+          emit_var(var) if var
+        end
+
+
+        def make_sib(scale, index, base)
+          if [1,2,4,8].include?(scale)
+            scale = log2(scale).to_i
+          else
+            raise "unsupported SIB scale: #{scale}, should be 1, 2, 4, or 8"
+          end
+          if index == 0
+            index = 4
+          elsif index.respond_to?(:regnum)
+            index = index.regnum
+          end
+          base = base.regnum if base.respond_to?(:regnum)
+          return (scale << 6) | (index << 3) | base
+        end
+
+
+        def register?(op, size = DEFAULT_OPERAND_SIZE)
+          op.is_a?(RegisterProxy) && op.size == size ||
+            op.respond_to?(:size) && op.size == SIZE_MAP[size]
+        end
+
+        def immediate?(op, size = DEFAULT_OPERAND_SIZE)
+          bits = SIZE_MAP[size] || size
+          op.is_a?(Numeric) && op >= -(2 ** bits / 2) && op <= (2 ** bits - 1)
+        end
+
+        # Return true if op is a valid operand of the specified size.
+        #     (:byte, :word, :dword)
+        #
+        # Valid operands are:
+        #
+        #   * registers
+        #
+        #   * effective addresses (wrapped in an array to look like nasm code)
+        #
+        # XXX This method is pretty ugly.
+        def rm?(op, size = DEFAULT_OPERAND_SIZE)
+          is_register = register?(op, size)
+
+          if op.is_a?(Array)
+            case op.size
+
+            # [register/memory]
+            when 1
+              is_reg_or_mem = [Numeric, RegisterProxy, VariableProxy].include?(op[0].class)
+
+            # [<size>, memory]
+            when 2
+              is_size_and_mem = op[0] == size && [Numeric, RegisterProxy, VariableProxy].include?(op[1].class)
+
+            end
+
+          else
+            is_reg_or_mem = false
+            is_size_and_mem = false
+          end
+
+          is_register || is_reg_or_mem || is_size_and_mem
+        end
+
+        def offset?(addr, size = DEFAULT_OPERAND_SIZE)
+          addr.is_a?(Array) && (addr[0].is_a?(Numeric) || addr[0].is_a?(VariableProxy))
+        end
+
+        def constant?(op)
+          immediate?(op) || offset?(op)
+        end
+
+        def log2(x, tol = 1e-13)
+          result = 0.0
+
+          # Integer part
+          while x < 1
+            resultp -= 1
+            x *= 2
+          end
+          while x >= 2
+            result += 1
+            x /= 2
+          end
+
+          # Fractional part
+          fp = 1.0
+          while fp >= tol
+            fp /= 2
+            x *= x
+            if x >= 2
+              x /= 2
+              result += fp
+            end
+          end
+          result
+        end
+
+
+        # 9 versions of the mov instruction are supported:
+        #   1. mov reg32, immediate32
+        #   2a. mov reg32, r/m32
+        #   2b. mov eax, memoffset32
+        #   3a. mov r/m32, reg32
+        #   3b. mov memoffset32, eax
+        #   4. mov r/m32, immediate32
+        #   5. mov r/m8, imm8
+        #   6. mov reg8, r/m8
+        #   7. mov r/m8, reg8
+        def mov(dest, src)
+
+          # These 2 are used in the same way, just the name differs to make the
+          # meaning clear. They are 4-byte values that are emited at the end if
+          # they are non-nil. Only one of them will be emited, and if both are
+          # non-nil that one is immediate.
+          immediate = nil
+          offset = nil
+
+          # This is an array of arguments to be passed to emit_modrm, if it is set.
+          modrm = nil
+
+          # version 1: mov r32, imm32
+          if register?(dest) && immediate?(src)
+            opcode = 0xb8 + dest.regnum # dest encoded in instruction
+            immediate = src
+
+          # version 2a: mov r32, r/m32
+          elsif register?(dest) && rm?(src)
+            # version 2b: mov eax, moffs32
+            if dest == EAX && offset?(src)
+              opcode = 0xa1
+              offset = src[0]
+            else
+              opcode = 0x8b
+              modrm = [src, dest.regnum]
+            end
+
+          # version 3a: mov r/m32, r32
+          elsif rm?(dest) && register?(src)
+            # version 3b: mov moffs32, eax
+            if offset?(dest) && src == EAX
+              opcode = 0xa3
+              offset = dest[0]
+            else
+              opcode = 0x89
+              modrm = [dest, src.regnum]
+            end
+
+          # version 4: mov r/m32, imm32
+          elsif rm?(dest) && immediate?(src)
+            opcode = 0xc7
+            modrm = [dest, 0]
+            immediate = src
+
+          # version 5: mov r/m8, imm8
+          #
+          # It's important that this check is first because src integers can
+          # pass the register? check in version 7.
+          elsif rm?(dest, :byte) && immediate?(src, :byte)
+            opcode = 0xc6
+            modrm = [dest, 0]
+            immediate_byte = src
+
+          # version 6: mov r8, r/m8
+          elsif register?(dest, :byte) && rm?(src, :byte)
+            opcode = 0x8a
+            modrm = [src, dest.regnum]
+
+          # version 7: mov r/m8, r8
+          elsif rm?(dest, :byte) && register?(src, :byte)
+            opcode = 0x88
+            modrm = [dest, src.regnum]
+
+          else
+            # puts "rm?(dest): #{rm?(dest)}\t\trm?(src): #{rm?(src)}"
+            # puts "register?(dest): #{register?(dest)}\t\tregister?(src): #{register?(src)}"
+            # puts "immediate?(dest): #{immediate?(dest)}\t\timmediate?(src): #{immediate?(src)}"
+            # puts "offset?(dest): #{offset?(dest)}\t\toffset?(src): #{offset?(src)}"
+            # puts "rm?(dest, :byte): #{rm?(dest)}\t\trm?(src, :byte): #{rm?(src, :byte)}"
+            # puts "immediate?(dest, :byte): #{immediate?(dest)}\t\timmediate?(src, :byte): #{immediate?(src, :byte)}"
+            raise "unsupported MOV instruction, #{dest.inspect}, #{src.inspect}"
+          end
+
+          dword = immediate || offset
+
+          asm do
+            emit_byte(opcode)
+            emit_modrm(*modrm) if modrm
+
+            if dword.is_a?(VariableProxy)
+              if dword.const?
+                emit_const(dword)
+              else
+                emit_var(dword)
+              end
+
+            elsif dword
+              emit_dword(dword)
+
+            elsif immediate_byte
+              emit_byte(immediate_byte)
+
+            end
+          end
+        end
+
+
+        def movzx(dest, src)
+
+          # movzx Gv, ??
+          if register?(dest)
+
+            opcode = case
+                     when rm?(src, :byte)
+                       0xb6 # movzx Gv, Eb
+                     when rm?(src, :word)
+                       0xb7 # movzx Gv, Ew
+                     else
+                       raise "unsupported MOVZX instruction, dest=#{dest.inspect} << src=#{src.inspect} >>"
+                     end
+            asm do
+              emit_byte(0x0f)
+              emit_byte(opcode)
+              emit_modrm(src, dest.regnum)
+            end
+
+          else
+
+            raise "unimplemented MOVZX instruction, << dest=#{dest.inspect} >> src=#{src.inspect}"
+          end
+        end
+
+
+        def xchg(dest, src)
+          if dest == EAX && register?(src)
+            asm { emit_byte(0x90 + src.regnum) }
+          # swap the args if EAX comes last so we only need to handle one case below.
+          elsif src == EAX && register?(dest)
+            xchg(src, dest)
+          elsif rm?(dest) && register?(src)
+            asm do
+              emit_byte(0x87)
+              emit_modrm(dest, src.regnum)
+            end
+          elsif register?(dest) && rm?(src)
+            asm do
+              emit_byte(0x87)
+              emit_modrm(src, dest.regnum)
+            end
+          else
+            raise "unsupported XCHG instruction, dest=#{dest.inspect} src=#{src.inspect}"
+          end
+        end
+
+        # convert double to quad (sign-extend EAX into EDX)
+        def cdq
+          asm { emit_byte(0x99) }
+        end
+
+
+        def add(dest, src)
+          # add r/m32, imm8
+          if rm?(dest) && immediate?(src, :byte)
+            asm do
+              emit_byte(0x83)
+              emit_modrm(dest, 0)
+              emit_byte(src)
+            end
+
+          # add r/m32, imm32
+          elsif rm?(dest) && immediate?(src)
+             asm do
+               emit_byte(0x81)
+               emit_modrm(dest, 0)
+               emit_dword(src)
+             end
+
+          # add eax, imm32
+          elsif dest == EAX && immediate?(src)
+            asm do
+              emit_byte(0x05)
+              emit_dword(src)
+            end
+
+          # add reg32, r/m32
+          elsif register?(dest) && rm?(src)
+            asm do
+              emit_byte(0x03)
+              emit_modrm(src, dest.regnum)
+            end
+
+          else
+            raise "unsupported ADD instruction, dest=#{dest.inspect} src=#{src.inspect}"
+          end
+        end
+
+
+        def sub(dest, src)
+          # sub r/m32, imm8
+          if rm?(dest) && immediate?(src, :byte)
+            asm do
+              emit_byte(0x83)
+              emit_modrm(dest, 5)
+              emit_byte(src)
+            end
+
+          # sub r/m32, imm32
+          elsif rm?(dest) && immediate?(src)
+            asm do
+              emit_byte(0x81)
+              emit_modrm(dest, 5)
+              emit_dword(src)
+            end
+
+          # sub r/m32, reg32
+          elsif rm?(dest) && register?(src)
+            asm do
+              emit_byte(0x29)
+              emit_modrm(dest, src.regnum)
+            end
+
+          # sub reg32, r/m32
+          elsif register?(dest) && rm?(src)
+            asm do
+              emit_byte(0x2b)
+              emit_modrm(src, dest.regnum)
+            end
+
+          else
+            raise "unsupported SUB instruction, dest=#{dest.inspect} src=#{src.inspect}"
+          end
+        end
+
+
+        # Signed multiply.
+        def imul(*ops)
+          case ops.size
+
+          when 1
+            group3(ops[0], 5, 'IMUL')
+
+          when 2
+            dest, src = ops
+            raise "unsupported IMUL instruction, dest=#{dest.inspect} src=#{src.inspect}"
+
+          else
+            raise ArgumentError, "IMUL accepts exactly 1 or 2 operands (got #{ops.inspect})"
+          end
+        end
+
+        # Unsigned multiply.
+        def mul(op)
+          group3(op, 4, 'MUL')
+        end
+
+
+        # Signed divide.
+        def idiv(op)
+          group3(op, 7, 'IDIV')
+        end
+
+        # Unsigned divide.
+        def div(op)
+          group3(op, 6, 'DIV')
+        end
+
+
+        def inc(op)
+          asm do
+            if register?(op)
+              emit_byte(0x40 + regnum(op))
+            elsif rm?(op)
+              # emit_byte(0xff)
+              raise "unimplemented"
+            else
+              raise "unsupported op #{op}, wanted r32 or r/m32"
+            end
+          end
+        end
+
+
+        def dec(op)
+          if register?(op)
+            # dec reg32
+            asm { emit_byte(0x48 + op.regnum) }
+          else
+            raise "unsupported DEC instruction, op=#{op.inspect}"
+          end
+        end
+
+
+        def shr(op, n)
+
+          # shr r/m??, imm8
+          if SIGNED_BYTE === n
+
+            opcode = register?(op, :byte) ? 0xc0 : 0xc1
+
+            asm do
+              emit_byte(opcode)
+              emit_modrm(op, 5)
+              emit_byte(n)
+            end
+
+          else
+            raise "unsupported SHR instruction, op=#{op.inspect}, n=#{n.inspect}"
+          end
+
+        end
+
+
+        def and_(dest, src)
+          if rm?(dest) && register?(src)
+            asm do
+              emit_byte(0x21)
+              emit_modrm(dest, src.regnum)
+            end
+          elsif rm?(dest, 8) && immediate?(src, 8)
+            asm do
+              emit_byte(0x80)
+              emit_modrm(dest, 4)
+              emit_byte(src)
+            end
+          else
+            raise "unsupported AND instruction: dest=#{dest.inspect}, src=#{src.inspect}"
+          end
+        end
+        alias_method :and, :and_
+
+        def or_(dest, src)
+          if rm?(dest) && register?(src)
+            asm do
+              emit_byte(0x9)
+              emit_modrm(dest, src.regnum)
+            end
+          elsif rm?(dest, 8) && immediate?(src, 8)
+            asm do
+              emit_byte(0x80)
+              emit_modrm(dest, 1)
+              emit_byte(src)
+            end
+          else
+            raise "unsupported OR instruction: dest=#{dest.inspect}, src=#{src.inspect}"
+          end
+        end
+        alias_method :or, :or_
+
+        def xor(dest, src)
+          # xor r/m32, reg32
+          if rm?(dest) && register?(src)
+            asm do
+              emit_byte(0x31)
+              emit_modrm(dest, src.regnum)
+            end
+
+          else
+            raise "unsupported XOR instruction, dest=#{dest.inspect} src=#{src.inspect}"
+          end
+        end
+
+
+        def not_(op)
+          group3(op, 2, 'NOT')
+        end
+        alias_method :not, :not_
+
+
+        def neg(op)
+          group3(op, 3, 'NEG')
+        end
+
+
+        def push(op)
+          # push reg32
+          if register?(op)
+            asm { emit_byte(0x50 + op.regnum) }
+
+          elsif immediate?(op, :byte)
+            asm do
+              emit_byte(0x6a)
+              emit_byte(op)
+            end
+
+          elsif immediate?(op)
+            asm do
+              emit_byte(0x68)
+              emit_dword(op)
+            end
+
+          else
+            raise "unsupported PUSH instruction: op=#{op.inspect}"
+          end
+        end
+
+
+        def pop(op)
+          # pop reg32
+          if register?(op)
+            asm { emit_byte(0x58 + op.regnum) }
+
+          else
+            raise "unsupported POP instruction: op=#{op.inspect}"
+          end
+        end
+
+
+        def cmp(op1, op2)
+          # cmp r/m32, reg32
+          if rm?(op1) && register?(op2)
+            asm do
+              emit_byte(0x39)
+              emit_modrm(op1, op2.regnum)
+            end
+
+          # cmp eax, imm32
+          elsif op1 == EAX && immediate?(op2)
+            asm do
+              emit_byte(0x3d)
+              emit_dword(op2)
+            end
+
+          else
+            raise "unsupported CMP instruction: op1=#{op1.inspect} op2=#{op2.inspect}"
+          end
+        end
+
+
+        # Only jmp rel32 is supported.
+        def jmp(label)
+          asm do
+            emit_byte(0xe9)
+            emit_label(label)
+          end
+        end
+
+        # These all jump near (rel32).
+        JccOpcodeMap = Hash.new { |key| raise "unsupported Jcc instruction: #{key}" }.
+                            merge({
+          :jc  => 0x82,  # carry            (CF=1)
+          :je  => 0x84,  # equal            (ZF=1) --- same as jz
+          :jg  => 0x8f,  # greater          (ZF=0 and SF=OF)
+          :jl  => 0x8c,  # less than        (SF!=OF)
+          :jne => 0x85,  # not equal        (ZF=0) --- same as jnz
+          :jng => 0x8e,  # not greater than (ZF=1 or SF!=OF)
+          :jnl => 0x8d,  # not less than    (SF=OF)
+          :jnz => 0x85,  # not zero         (ZF=0)
+          :jo  => 0x80,  # overflow         (OF=1)
+          :js  => 0x88,  # sign             (SF=1)
+          :jz  => 0x84   # zero             (ZF=1)
+        })
+
+        # Only Jcc rel32 is supported.
+        def jcc(instruction, label)
+          opcode = JccOpcodeMap[instruction]
+          asm do
+            emit_byte(0x0f)
+            emit_byte(opcode)
+            emit_label(label)
+          end
+        end
+
+        JccOpcodeMap.keys.each do |name|
+          define_method(name) do |label|
+            jcc(name, label)
+          end
+        end
+
+
+        def lea(r32, mem)
+          asm do
+            emit_byte(0x8d)
+            emit_modrm(mem, r32.regnum)
+          end
+        end
+
+
+        def int(n)
+          asm do
+            emit_byte(0xcd)
+            emit_byte(n)
+          end
+        end
+
+
+        def ret
+          asm { emit_byte(0xc3) }
+        end
+
+
+        def leave
+          asm { emit_byte(0xc9) }
+        end
+
+
+        # NOTE: LOOP only accepts a 1-byte signed offset. Don't use it.
+        def loop_(label)
+          real_ip = ip + 2 # loop instruction is 2 bytes
+          delta = @symtab.lookup_label(label) - real_ip
+          unless SIGNED_BYTE === delta
+            raise "LOOP can only jump -128 to 127 bytes, #{label} is #{delta} bytes away"
+          end
+
+          asm do
+            emit_byte(0xe2)
+            emit_byte(delta)
+          end
+        end
+        alias_method :loop, :loop_
+
+
+        # Opcode group #3. 1-byte opcode, 1 operand (r/m8 or r/m32).
+        #
+        # Members of this group are: DIV, IDIV, MUL, IMUL, NEG, NOT, and TEST.
+        def group3(op, reg, instruction)
+          opcode =
+            if rm?(op, 8)
+              0xf6
+            elsif rm?(op)
+              0xf7
+            else
+              raise "unsupported #{instruction} instruction: op=#{op.inspect}"
+            end
+
+          asm do
+            emit_byte(opcode)
+            emit_modrm(op, reg)
+          end
+        end
+
+
+      end
+
+    end
+  end
+end
--- a/lib/compiler/asm/x86/registers.rb
+++ b/lib/compiler/asm/x86/registers.rb
@ -0,0 +1,32 @@
+require 'asm/regproxy'
+
+module ASM
+
+  module Registers
+
+    # This structure allows for x86 registers of all sizes. The
+    # number of the register is the index of the array in which it was
+    # found. The size of a register in bytes is 2 ** index-into-sub-array.
+    Registers = [ [:al, :ax, :eax], # 0
+                  [:cl, :cx, :ecx], # 1
+                  [:dl, :dx, :edx], # 2
+                  [:bl, :bx, :ebx], # 3
+                  [:ah, :sp, :esp], # 4
+                  [:ch, :bp, :ebp], # 5
+                  [:dh, :si, :esi], # 6
+                  [:bh, :di, :edi]  # 7
+                ]
+
+    # Setup register proxies which are used both in effective address
+    # calculations, and also just as symbols representing registers.
+    Registers.each_with_index do |group, regnum|
+      group.each_with_index do |reg, i|
+        name = reg.to_s.upcase
+        const_set(name, RegisterProxy.new(reg, 8 * (2 ** i), regnum))
+      end
+    end
+
+
+  end
+
+end
--- a/lib/compiler/asm/x86/template.darwin.asm
+++ b/lib/compiler/asm/x86/template.darwin.asm
@ -0,0 +1,11 @@
+BITS 32
+GLOBAL _main
+SECTION .data
+{data}
+SECTION .bss
+{bss}
+SECTION .text
+_main:
+{code}
+	;; The result in eax is the exit code, just return.
+	ret
--- a/lib/compiler/asm/x86/template.linux.asm
+++ b/lib/compiler/asm/x86/template.linux.asm
@ -0,0 +1,13 @@
+BITS 32
+GLOBAL _start
+SECTION .data
+{data}
+SECTION .bss
+{bss}
+SECTION .text
+_start:
+{code}
+	;; The result in eax is the exit code, move it to ebx.
+	mov ebx, eax
+	mov eax, 1		; _exit syscall
+	int 0x80		; call Linux
--- a/lib/compiler/asm/x86/text_assembler.rb
+++ b/lib/compiler/asm/x86/text_assembler.rb
@ -0,0 +1,159 @@
+# A subset of x86 assembly.
+#
+# sjs
+# may 2009
+
+require 'compiler/asm/text_assembler'
+
+class Compiler
+  module ASM
+    module X86
+
+      # ASM methods output nasm-friendly x86 asm code, line by
+      # line. This is dead easy and we can trust nasm to compile
+      # correct machine code, which isn't trivial.
+      class TextAssembler < ASM::TextAssembler
+
+        def emit_entry_point
+        end
+
+        # Define a constant in the .data section.
+        def const(name, value)
+          @data << "#{name}\tequ  #{value}"
+        end
+
+        # Define a variable with the given name and size in bytes.
+        def define_var_impl(name, bytes = nil)
+          super(name, bytes)
+          dwords = bytes / 4
+          @bss << "#{name}: resd #{dwords}\n"
+        end
+
+        def output
+          File.read(template_filename).
+            sub("{data}", @data).
+            sub("{bss}", @bss).
+            sub("{code}", @code)
+        end
+
+        def emit_label(name = label)
+          emit("#{name}:", tab: nil)
+        end
+
+        def mov(dest, src)
+          emit("mov #{dest}, #{src}#{src.is_a?(Numeric) ? " ; 0x#{src.to_s(16)}" : ''}")
+        end
+
+        def movzx(dest, src)
+          emit("movzx #{dest}, #{src}")
+        end
+
+        def add(dest, src)
+          emit("add #{dest}, #{src}")
+        end
+
+        def sub(dest, src)
+          emit("sub #{dest}, #{src}")
+        end
+
+        def imul(op)
+          emit("imul #{op}")
+        end
+
+        def idiv(op)
+          emit("idiv #{op}")
+        end
+
+        def inc(op)
+          emit("inc #{op}")
+        end
+
+        def dec(op)
+          emit("dec #{op}")
+        end
+
+        def push(reg)
+          emit("push #{reg}")
+        end
+
+        def pop(reg)
+          emit("pop #{reg}")
+        end
+
+        def call(label)
+          emit("call #{label}")
+        end
+
+        def leave
+          emit("leave")
+        end
+
+        def neg(reg)
+          emit("neg #{reg}")
+        end
+
+        def not(rm32)
+          emit("not #{rm32}")
+        end
+
+        def xchg(op1, op2)
+          emit("xchg #{op1}, #{op2}")
+        end
+
+        def and_(op1, op2)
+          emit("and #{op1}, #{op2}")
+        end
+
+        def or(op1, op2)
+          emit("or #{op1}, #{op2}")
+        end
+
+        def xor(op1, op2)
+          emit("xor #{op1}, #{op2}")
+        end
+
+        def jz(label)
+          emit("jz #{label}")
+        end
+
+        def jnz(label)
+          emit("jnz #{label}")
+        end
+
+        def jmp(label)
+          emit("jmp #{label}")
+        end
+
+        def jl(label)
+          emit("jl #{label}")
+        end
+
+        def cmp(a, b)
+          emit("cmp #{a}, #{b}")
+        end
+
+        def lea(a, b)
+          emit("lea #{a}, #{b}")
+        end
+
+        def shr(a, b)
+          emit("shr #{a}, #{b}")
+        end
+
+        def loop_(label)
+          emit("loop #{label}")
+        end
+
+        def int(num)
+          emit("int 0x#{num.to_s(16)}")
+        end
+
+        def cdq
+          emit("cdq")
+        end
+
+      end
+
+    end
+  end
+end
--- a/lib/compiler/build.rb
+++ b/lib/compiler/build.rb
@ -0,0 +1,108 @@
+#!/usr/bin/env ruby
+
+require 'compiler'
+
+# usage: build.rb <filename> [output filename] [elf | macho] [asm | bin]
+
+BIN_FORMATS = Hash.new('bin')
+BIN_FORMATS['darwin'] = 'macho'
+BIN_FORMATS['linux'] = 'elf'
+
+def main
+  filename = ARGV.shift.to_s
+  raise "can't read #{filename}" unless File.readable?(filename)
+  outdir = ARGV.shift || '.'
+  platform = `uname -s`.chomp.downcase
+  binformat = ARGV[1] ? ARGV[1].downcase : BIN_FORMATS[platform]
+  puts "Building #{filename} for #{platform}, binformat is #{binformat} ..."
+  outfile = build(filename, outdir, platform, binformat)
+  puts outfile
+  exit
+end
+
+
+def error(msg) STDERR.puts(msg) end
+
+# name part (filename minus extension)
+def base(filename)
+  filename.sub(/\.[^.]*$/, '')
+end
+
+
+# infile:   input filename
+# outfile:  output filename
+# asm:      assembler to use
+def compile(infile, outfile, asm)
+
+  File.open(infile, 'r') do |input|
+    File.open(outfile, 'wb') do |out|
+      out.print(Compiler.compile(input, asm))
+    end
+  end
+
+rescue ParseError => e
+  error("[error] #{e.message}")
+  error("[context] #{e.context}")
+  error(e.caller)
+  exit(1)
+end
+
+def run_and_warn_on_failure(command)
+  output = `#{command}`
+  if $?.exitstatus != 0
+    puts
+    print output
+    name = command.split.first
+    raise "#{name} failed: #{$?.exitstatus}"
+  end
+end
+
+# link with ld, return resulting filename.
+def link(filename, outdir, platform = 'linux')
+  f = base(filename)
+  cmd, args = *case platform
+               when 'darwin'
+                 ['gcc', '-arch i386']
+               when 'linux'
+                 ['ld', '']
+               else
+                 raise "unsupported platform: #{platform}"
+               end
+  run_and_warn_on_failure("#{cmd} #{args} -o #{f} #{filename} 2>&1")
+  `chmod u+x #{f}`
+  return f
+end
+
+def build(filename, outdir, platform = 'linux', binformat = 'macho')
+  objfile = File.join(outdir, base(filename) + '.o')
+  symtab, objwriter_class =
+    case binformat
+    when 'elf'
+      [Compiler::ELF::SymbolTable.new, Compiler::ELF::ObjectFile]
+    when 'macho'
+      [Compiler::MachO::SymbolTable.new, Compiler::MachO::ObjectFile]
+    else
+      raise "unsupported binary format: #{binformat}"
+    end
+   compile(filename, objfile, Compiler::ASM::Binary.new(platform, symtab, objwriter_class))
+  exefile = link(objfile, outdir, platform)
+  return exefile
+end
+
+def build_asm(filename, outdir, platform = 'linux', binformat = 'macho')
+  asmfile = File.join(outdir, base(filename) + '.asm')
+  compile(filename, asmfile, Compiler::ASM::Text.new(platform))
+  objfile = assemble(asmfile, binformat)
+  exefile = link(objfile, platform)
+  return exefile
+end
+
+# assemble using nasm, return resulting filename.
+def assemble(filename, binformat = 'macho')
+  f = base(filename)
+  outfile = "#{f}.o"
+  run_and_warn_on_failure("nasm -f #{binformat} -g -o #{outfile} #{filename} 2>&1")
+  return outfile
+end
+
+main if $0 == __FILE__
--- a/lib/compiler/parse_error.rb
+++ b/lib/compiler/parse_error.rb
@ -0,0 +1,14 @@
+class Compiler
+
+  class ParseError < RuntimeError
+
+    attr_reader :caller, :context
+
+    def initialize(caller, context = nil)
+      @caller = caller
+      @context = context
+    end
+
+  end
+
+end
--- a/lib/compiler/parser.rb
+++ b/lib/compiler/parser.rb
@ -0,0 +1,966 @@
+# A compiler as described by Jack Crenshaw in his famous book "Let's
+# Build a Compiler". At least in the beginning, this code will
+# closely reflect the Pascal code written by Jack. Over time it may
+# become more idiomatic, however this is an academic exercise.
+#
+# sjs
+# may 2009
+
+require 'compiler/parse_error'
+
+class Compiler
+
+  class Parser
+
+    KEYWORDS = {
+      'if'     => :if_else_stmt,
+      'while'  => :while_stmt,
+      'until'  => :until_stmt,
+      'repeat' => :repeat_stmt,
+      'for'    => :for_stmt,
+      'do'     => :do_stmt,
+      'break'  => :break_stmt,
+      'print'  => :print_stmt,
+      'else'   => nil,
+      'end'    => nil
+    }
+
+    # Grouped by precedence.
+    OPS = {
+      :add    => %w[+ -],
+      :mul    => %w[* /],
+      :rel    => %w[== != < > <= >=],
+      :or     => %w[||],
+      :and    => %w[&&],
+      :bit    => %w[| ^ &],
+      :unary  => %w[- +]
+    }
+    # Op chars are chars that can begin an op, so OP_CHARS needs to be a
+    # map of kinds of operators to a list of valid prefix chars.
+    OP_CHARS = OPS.inject({}) { |hash, kv|
+      key, val = *kv
+      hash[key] = val.map {|op| op[0, 1]} # slice off first char for each op
+      hash
+    # Include :all for a very general test.
+    }.merge(:all => OPS.values.flatten.map{|op| op[0, 1]}.sort.uniq)
+
+    FALSE = 0
+    TRUE = -1
+
+    attr_reader :asm
+
+    def initialize(input, asm)
+      @indent = 0                  # for pretty printing
+      @look = ''                   # Next lookahead char.
+      @token = nil                 # Type of last read token.
+      @value = nil                 # Value of last read token.
+      @input = input               # Stream to read from.
+      @asm = asm                   # assembler
+      @keywords = KEYWORDS.clone
+      @keyword_names = @keywords.keys
+      @label_stack = []
+
+      # seed the lexer
+      get_char
+    end
+
+    def parse
+      block # parse a block of code
+      expected(:'end of file') unless eof?
+    end
+
+    def compile
+      asm.output
+    end
+
+    # Scan the input stream for the next token.
+    def scan
+      return if @look.nil?        # eof
+
+      if alpha?(@look)
+        get_name
+
+      elsif digit?(@look)
+        get_number
+
+      elsif op_char?(@look)
+        get_op
+
+      elsif newline?(@look)
+        skip_any_whitespace
+        scan
+
+      elsif comment_char?(@look)
+        skip_comment
+        scan
+
+      else
+        # XXX default to single char op... should probably raise.
+        @token = :op
+        @value = @look
+        get_char
+      end
+    end
+
+    # put back the most recently parsed value
+    def backtrack
+      @input.ungetc(@look[0])
+      @value.reverse.each_byte {|i| @input.ungetc(i)}
+      get_char
+    end
+
+    # Parse and translate an identifier or function call.
+    def identifier
+      name = get_name
+
+      if @look == '('
+        # function call
+        match('(')
+        # TODO arg list
+        match(')')
+        asm.call(name)
+      else
+        # variable access
+        asm.load_var(name)
+      end
+    end
+
+    # Parse and translate a single factor. Result is in eax.
+    def factor
+      if @look == '('
+        match('(')
+        boolean_expression
+        match(')')
+      elsif alpha?(@look)
+        identifier                # or call
+      elsif digit?(@look)
+        asm.load(get_number.to_i)
+      else
+        expected(:'integer, identifier, function call, or parenthesized expression', :got => @look)
+      end
+    end
+
+    # Parse a signed factor.
+    def signed_factor
+      sign = @look
+      match(sign) if op?(:unary, sign)
+      factor
+      asm.neg(return_reg) if sign == '-'
+    end
+
+    # Parse and translate a single term (factor or mulop). Result is in
+    # eax.
+    def term
+      signed_factor                      # Result in eax.
+
+      while op?(:mul, @look)
+        asm.push(return_reg)
+        case @look
+        when '*'
+          multiply
+        when '/'
+          divide
+        end
+      end
+    end
+
+    # Parse and translate a general expression of terms. Result is
+    # in eax.
+    def arithmetic_expression
+      term                      # Result is in eax.
+
+      while op_char?(@look, :add)
+        asm.push(return_reg)
+        case @look
+        when '+'
+          add
+        when '-'
+          subtract
+        end
+      end
+    end
+
+    # Parse an addition operator and the 2nd term (b). The result is
+    # left in eax. The 1st term (a) is expected on the stack.
+    def add
+      match('+')
+      term                          # Result is in eax.
+      asm.stack_add(return_reg)     # Add a to b.
+    end
+
+    # Parse a subtraction operator and the 2nd term (b). The result is
+    # left in eax. The 1st term (a) is expected on the stack.
+    def subtract
+      match('-')
+      term                      # Result, b, is in eax.
+      asm.neg(return_reg)       # Fake the subtraction. a - b == a + -b
+      asm.stack_add(return_reg) # Add a to -b.
+    end
+
+    # Parse an addition operator and the 2nd term (b). The result is
+    # left in eax. The 1st term (a) is expected on the stack.
+    def multiply
+      match('*')
+      signed_factor                    # Result, b, is in return_reg.
+      asm.stack_mul_signed(return_reg) # Multiply a by b.
+    end
+
+    # Parse a division operator and the divisor (b). The result is
+    # left in eax. The dividend (a) is expected on the stack.
+    def divide
+      match('/')
+      signed_factor               # Result is in eax.
+      asm.stack_div(return_reg)   # Divide a by b.
+    end
+
+
+    ###################
+    # bit expressions #
+    ###################
+
+    def bit_expression
+      arithmetic_expression
+      while op?(:bit, @look)
+        scan
+        case @value
+        when '|'
+          bitor_expression
+        when '^'
+          bitxor_expression
+        when '&'
+          bitand_expression
+        else
+          backtrack
+          return
+        end
+      end
+    end
+
+    def bit_op(op, token)
+      asm.push(return_reg)
+      arithmetic_expression
+      asm.send("stack_#{op}", return_reg)
+    end
+
+    def bitor_expression
+      bit_op(:or, '|')
+    end
+
+    def bitxor_expression
+      bit_op(:xor, '^')
+    end
+
+    def bitand_expression
+      bit_op(:and, '&')
+    end
+
+
+    #######################
+    # boolean expressions #
+    #######################
+
+    def boolean_expression
+      boolean_term
+      while @look == '|'
+        scan
+        expected('||') unless match_word('||')
+
+        false_label = asm.make_label(:false)
+        truthy_label = asm.make_label(:truthy)
+        done_label = asm.make_label(:done)
+
+        asm.compare(return_reg, FALSE)
+        asm.jne(truthy_label)
+
+        boolean_term
+        asm.compare(return_reg, FALSE)
+        asm.je(false_label)
+
+        asm.define_label(truthy_label)
+        asm.mov_reg_imm(return_reg, TRUE)
+        asm.jmp(done_label)
+
+        asm.define_label(false_label)
+        asm.mov_reg_imm(return_reg, FALSE)
+
+        asm.define_label(done_label)
+      end
+    end
+
+    def boolean_term
+      not_factor
+      while @look == '&'
+        scan
+        expected('&&') unless match_word('&&')
+        false_label = asm.make_label(:false)
+        done_label = asm.make_label(:done)
+
+        asm.compare(return_reg, FALSE)
+        asm.je(false_label)
+
+        not_factor
+        asm.compare(return_reg, FALSE)
+        asm.je(false_label)
+
+        asm.mov_reg_imm(return_reg, TRUE)
+        asm.jmp(done_label)
+
+        asm.define_label(false_label)
+        asm.mov_reg_imm(return_reg, TRUE)
+
+        asm.define_label(done_label)
+      end
+    end
+
+    def boolean_factor
+      if boolean?(@look)
+        if get_boolean == 'true'
+          asm.mov_reg_imm(return_reg, TRUE)
+        else
+          asm.xor(return_reg, return_reg)
+        end
+        scan
+      else
+        relation
+      end
+    end
+
+    def not_factor
+      if @look == '!'
+        match('!')
+        boolean_factor
+        make_boolean(return_reg) # ensure it is -1 or 0...
+        asm.not_(return_reg)     # so that 1's complement NOT is also boolean not
+      else
+        boolean_factor
+      end
+    end
+
+    # Convert any identifier to a boolean (-1 or 0). This is
+    # semantically equivalent to !!reg in C or Ruby.
+    def make_boolean(reg)
+      end_label = asm.make_label(:endmakebool)
+      asm.compare(reg, FALSE)         # if false do nothing
+      asm.jz(end_label)
+      asm.mov_reg_imm(reg, TRUE)      # truthy, make it true
+      asm.define_label(end_label)
+    end
+
+    def relation
+      bit_expression
+      if op_char?(@look, :rel)
+        scan
+        asm.push(return_reg)
+
+        case @value
+        when '=='
+          eq_relation
+        when '!='
+          neq_relation
+        when '>'
+          gt_relation
+        when '>='
+          ge_relation
+        when '<'
+          lt_relation
+        when '<='
+          le_relation
+        end
+      end
+    end
+
+    # a: <on the stack>
+    # b: eax
+    #
+    # If b - a is zero then a = b, and make_boolean will leave the zero
+    # to effectively return false. If b - a is non-zero then a != b,
+    # and make_boolean will leave -1 (true) for us in eax.
+    def neq_relation
+      bit_expression
+      asm.stack_sub(return_reg)
+      make_boolean
+    end
+
+    # Invert the != test for equal.
+    def eq_relation
+      neq_relation
+      asm.not(return_reg)
+    end
+
+    # > and < are both implemented in terms of jl (jump if less than).
+    # We exploit the fact that cmp is the subtraction of src from dest
+    # and order the terms appropriately for each function. As for >=
+    # and <=, they in turn are implemented in terms of > and <. a is
+    # greater than or equal to b if and only if a is *not* less than b.
+    #
+    # Note: This was done to minimize the number of instructions that
+    #       the assembler needed to implement, but since the Jcc
+    #       instructions are very cheap to implement this is no longer
+    #       a concern.
+
+
+    # The next 4 relations all compare 2 values a and b, then return
+    # true (-1) if the difference was below zero and false (0)
+    # otherwise (using JL, jump if less than).
+    def cmp_relation(a, b, options = {})
+      bit_expression
+      asm.pop(EBX)
+
+      # Invert the sense of the test?
+      invert = options[:invert]
+
+      true_label = asm.make_label(:cmp)
+      end_label = asm.make_label(:endcmp)
+      asm.compare(a, b)
+      asm.jl(true_label)
+
+      asm.mov(EAX, FALSE)           # return false
+      asm.not_(EAX) if invert       # (or true if inverted)
+      asm.jmp(end_label)
+
+      asm.define_label(true_label)
+      asm.mov(EAX, FALSE)           # return true
+      asm.not_(EAX) unless invert   # (or false if inverted)
+
+      asm.define_label(end_label)
+    end
+
+    # a: <on the stack>
+    # b: eax
+    #
+    # if a > b then b - a < 0
+    def gt_relation
+      TODO: fix this
+      cmp_relation(EAX, EBX) # b - a
+    end
+
+    # a: <on the stack>
+    # b: eax
+    #
+    # if a < b then a - b < 0
+    def lt_relation
+      cmp_relation(EBX, EAX) # a - b
+    end
+
+    # a: <on the stack>
+    # b: eax
+    #
+    # if a >= b then !(a < b)
+    def ge_relation
+      # Compare them as in less than but invert the result.
+      cmp_relation(EBX, EAX, :invert => true)
+    end
+
+    # a: <on the stack>
+    # b: eax
+    #
+    # if a <= b then !(a > b)
+    def le_relation
+      # Compare them as in greater than but invert the result.
+      cmp_relation(EAX, EBX, :invert => true)
+    end
+
+
+    ######################################
+    # statements and controls structures #
+    ######################################
+
+    def keyword
+      unless action = @keywords[@value]
+        raise "unsupported keyword: #{@value}"
+      end
+      send(action)
+    end
+
+    # Parse an assignment statement. Value is in eax.
+    def assignment
+      name = @value
+      match('=')
+      boolean_expression
+      lval = asm.var!(name)
+      asm.store_var(lval, return_reg)
+    end
+
+    # Parse a code block.
+    def block
+      @indent += 1
+
+      # scan a token, type ends up in @token and value in @value
+      scan
+
+      until @value == 'else' || @value == 'end' || eof?
+        if @token == :keyword
+          keyword
+        else
+          assignment
+        end
+
+        scan
+      end
+
+      @indent -= 1
+    end
+
+    # Parse an if-else statement.
+    def if_else_stmt
+      else_label = asm.make_label(:end_or_else)
+      end_label = else_label      # only generated if else clause
+                                  # present
+      condition
+      skip_any_whitespace
+      asm.jz(else_label)
+      block
+      if @token == :keyword && @value == 'else'
+        skip_any_whitespace
+        end_label = asm.make_label(:endif) # now we need the 2nd label
+        asm.jmp(end_label)
+        asm.define_label(else_label)
+        block
+      end
+      match_word('end')
+      asm.define_label(end_label)
+    end
+
+    # Used to implement the Two-Label-Loops (while, until, repeat).
+    #
+    # name:  Name of the loop for readable labels.
+    # block: Code to execute at the start of each iteration. (e.g. a
+    #        condition)
+    def simple_loop(name)
+      start_label = asm.make_label(:"#{name}_loop")
+      end_label = asm.make_label(:"end_#{name}")
+      asm.define_label(start_label)
+      yield(end_label)
+      pushing_label(end_label) { block }
+      match_word('end')
+      asm.jmp(start_label)
+      asm.define_label(end_label)
+    end
+
+    def condition_loop(name, jump_instruction)
+      simple_loop(name) do |end_label|
+        condition
+        skip_any_whitespace
+        asm.send(jump_instruction, end_label)
+      end
+    end
+
+    def while_stmt
+      condition_loop('while', :jz) # done when == 0 (falsish)
+    end
+
+    def until_stmt
+      condition_loop('until', :jnz) # done when != 0 (truthy)
+    end
+
+    def repeat_stmt
+      simple_loop('repeat') do |end_label|
+        skip_any_whitespace
+      end
+    end
+
+    # s = 0
+    # f x = 1 to 5
+    #   s = s + x
+    # e
+    def for_stmt
+      name = get_name
+      counter = asm.define_var(name)
+      match('=')
+
+      boolean_expression                 # initial value
+
+      asm.sub(return_reg, 1)             # pre-decrement because of the
+                                         # following pre-increment
+
+      asm.mov([counter], EAX)            # stash the counter in memory
+      match_word('to', :scan => true)
+
+      boolean_expression                 # final value
+      skip_any_whitespace
+
+      asm.push(EAX)                      # stash final value on stack
+      final = [ESP]
+
+      simple_loop('for') do |end_label|
+        asm.mov(ECX, [counter])          # get the counter
+        asm.add(ECX, 1)                  # increment
+        asm.mov([counter], ECX)          # store the counter
+        asm.cmp(final, ECX)              # check if we're done
+        asm.jz(end_label)                # if so jump to the end
+      end
+
+      asm.add(ESP, 4)                    # clean up the stack
+    end
+
+    # do 5
+    #   ...
+    # end
+    def do_stmt
+
+      boolean_expression
+      skip_any_whitespace
+      asm.mov(ECX, EAX)
+
+      start_label = asm.make_label(:do)
+      end_label = asm.make_label(:enddo)
+      asm.define_label(start_label)
+
+      asm.push(ECX)
+
+      pushing_label(end_label) { block }
+
+      asm.pop(ECX)
+
+      match_word('end')
+      asm.dec(ECX)
+      asm.jnz(start_label)
+
+      # Phony push!  break needs to clean up the stack, but since we
+      # don't know if there is a break at this point we fake a push and
+      # always clean up the stack after.
+      asm.sub(ESP, 4)
+
+      asm.define_label(end_label)
+
+      # If there was a break we have to clean up the stack here. If
+      # there was no break we clean up the phony push above.
+      asm.add(ESP, 4)
+    end
+
+    def break_stmt
+      if top_label
+        asm.jmp(top_label)
+      else
+        expected(:'break to be somewhere useful',
+                 :got => :'a break outside a loop')
+      end
+    end
+
+    # Evaluates any expression for now. There are no boolean operators.
+    def condition
+      boolean_expression
+      skip_whitespace
+      asm.cmp(EAX, 0)            # 0 is false, anything else is true
+    end
+
+    # print eax in hex format
+    def print_stmt
+      # variables
+      d = '__DIGITS'
+      h = '__HEX'
+
+      digits = if asm.var?(d)
+                 asm.var(d)
+               else
+                 d_var = asm.define_var(d, 16)
+                 asm.block do
+                   # define a lookup table of digits
+                   mov([d_var],    0x33323130)
+                   mov([d_var+4],  0x37363534)
+                   mov([d_var+8],  0x62613938)
+                   mov([d_var+12], 0x66656463)
+                 end
+                 d_var
+               end
+
+      # 12 bytes: 2 for "0x", 8 hex digits, 2 for newline + null terminator
+      hex = asm.var!(h, 12)
+
+      asm.block do
+        # TODO check sign and prepend '-' if negative
+        mov([hex], 0x7830)  # "0x" ==> 0x30 (48), 0x78 (120)
+        mov([hex+4], 0)     # zero the rest
+        mov([hex+8], 0)
+        mov([:byte, hex+10], 0xa)  # newline
+        mov([:byte, hex+11], 0)    # null terminator
+      end
+      boolean_expression # result in EAX
+      asm.block do
+        # convert eax to a hex string
+        lea(ESI, [digits])
+        lea(EDI, [hex+9])
+        # build the string backwards (right to left), byte by byte
+        mov(ECX, 4)
+      end
+      asm.block do
+        define_label(loop_label = make_label)
+        # low nybble of nth byte
+        movzx(EBX, AL)
+        and_(BL, 0x0f)        # isolate low nybble
+        movzx(EDX, [:byte, ESI+EBX])
+        mov([EDI], DL)
+        dec(EDI)
+        # high nybble of nth byte
+        movzx(EBX, AL)
+        and_(BL, 0xf0)        # isolate high nybble
+        shr(BL, 4)
+        mov(DL, [ESI+EBX])
+        mov([EDI], DL)
+        dec(EDI)
+        shr(EAX, 8)
+        loop_(loop_label)
+        # write(int fd, char *s, int n)
+        mov(EAX, 4)               # SYS_write
+        lea(ECX, [hex])           # ecx = &s
+        args = [1,                # fd = 1 (STDOUT)
+                ECX,              # s = &s
+                11]               # n = 11 (excluding term, max # of chars to print)
+        case platform
+        when 'darwin'             # on the stack, right to left (right @ highest addr)
+          ####
+          # setup bogus stack frame
+          push(EBP)
+          mov(EBP, ESP)
+          sub(ESP, 36)
+          ####
+          args.reverse.each { |a| push(a) }
+          push(EAX)
+          int(0x80)
+          ####
+          # teardown bogus stack frame
+          xor(EAX, EAX)
+          add(ESP, 36)
+          pop(EBX)
+          leave
+          ####
+        when 'linux'
+          mov(EBX, args[0])
+          mov(ECX, args[1])
+          mov(EDX, args[2])
+          int(0x80)
+        end
+      end
+    end
+
+
+  ############
+  # internal #
+  ############
+
+
+    def eof?
+      @input.eof? && @look.nil?
+    end
+
+    def op_char?(char, kind = :all)
+      OP_CHARS[kind].include?(char)
+    end
+
+    def op?(kind, token)
+      OPS[kind].include?(token)
+    end
+
+    # Read the next character from the input stream.
+    def get_char
+      @look = @input.readbyte.chr unless @input.eof?
+    end
+
+    # Report what was expected
+    def expected(what, options = {})
+      got = options.has_key?(:got) ? options[:got] : @value
+      got, what = *[got, what].map {|x| x.is_a?(Symbol) ? x : "'#{x}'" }
+      if eof?
+        raise ParseError.new(caller), "Premature end of file, expected: #{what}."
+      else
+        context = (@input.readline rescue '(EOF)').gsub("\n", "\\n")
+        raise ParseError.new(caller, context), "Expected #{what} but got #{got}."
+      end
+    end
+
+
+
+    # Recognize an alphabetical character.
+    def alpha?(char)
+      ('A'..'Z') === char.upcase
+    end
+
+    # Recognize a decimal digit.
+    def digit?(char)
+      ('0'..'9') === char
+    end
+
+    # Recognize an alphanumeric character.
+    def alnum?(char)
+      alpha?(char) || digit?(char) || char == '_'
+    end
+
+    # XXX disabled! ... should treat true/false as constants
+    #     once again we need a token of lookahead
+    def boolean?(char)
+      #char == 't' || char == 'f'
+      false
+    end
+
+    def whitespace?(char)
+      char == ' ' || char == "\t"
+    end
+
+    def newline?(char)
+      char == "\n" || char == "\r"
+    end
+
+    def comment_char?(char)
+      char == '#'
+    end
+
+    def any_whitespace?(char)
+      whitespace?(char) || newline?(char)
+    end
+
+    # Parse one or more newlines.
+    def get_newline
+      expected(:newline, :got => @look) unless newline?(@look)
+      many(:newline?)
+      @token = :newline
+      @value = "\n"
+    end
+
+    # Match literal input.
+    def match(char)
+      expected(char, :got => @look) unless @look == char
+  #     puts "[ch] #{indent}#{char}"
+      get_char
+      skip_whitespace
+    end
+
+    # Match literal input.
+    def match_word(word, options = {})
+      scan if options[:scan]
+      match = @value == word
+      expected(word) unless match
+      match
+    end
+
+    # Parse zero or more consecutive characters for which the test is
+    # true.
+    def many(test)
+      test = method(test) if test.is_a?(Symbol)
+      token = ''
+      while !eof? && test[@look]
+        token << @look
+        get_char
+      end
+      skip_whitespace
+      token
+    end
+
+    # Parse a "name" (keyword or identifier).
+    def get_name
+      expected(:identifier) unless alpha?(@look)
+      @value = many(:alnum?)
+      @token = @keyword_names.include?(@value) ? :keyword : :identifier
+      @value
+    end
+
+    # Parse a number.
+    def get_number
+      expected(:integer) unless digit?(@look)
+      @token = :number
+      @value = many(:digit?)
+      # puts "[nu] #{indent}#{@value} (0x#{@value.to_i.to_s(16)})"
+      @value
+    end
+
+    def get_boolean
+      get_name
+      expected(:boolean) unless @value == 'true' || @value == 'false'
+      @token = :boolean
+      # puts "[bo] #{indent}#{@value}"
+      @value
+    end
+
+    def get_op
+      expected(:operator) unless op_char?(@look)
+      @token = :op
+      @value = many(:op_char?)
+    end
+
+    # Skip leading whitespace.
+    def skip_whitespace
+      get_char while whitespace?(@look)
+    end
+
+    # Skip leading whitespace including newlines.
+    def skip_any_whitespace
+      get_char while any_whitespace?(@look)
+    end
+
+    def skip_comment
+      get_char until newline?(@look)
+      skip_any_whitespace
+    end
+
+
+    def indent
+      real_indent = if @value == 'else' || @value == 'end'
+                      @indent - 1
+                    else
+                      @indent
+                    end
+      ' ' * (real_indent * 4)
+    end
+
+    def pushing(reg)
+      asm.push(reg)
+      yield
+      asm.add(ESP, 4)
+    end
+
+    def print_token
+      print(case @token
+            when :keyword
+              '[kw] '
+            when :number
+              '[nu] '
+            when :identifier
+              '[id] '
+            when :op
+              '[op] '
+            when :boolean
+              '[bo] '
+            when :newline
+              ''
+            else
+              raise "print doesn't know about #{@token}: #{@value}"
+            end)
+      print indent
+      puts @value
+    end
+
+    def pushing_label(label)
+      push_label(label)
+      yield
+      pop_label
+    end
+
+    def push_label(label)
+      @label_stack.push(label)
+    end
+
+    def top_label
+      @label_stack[-1]
+    end
+
+    def pop_label
+      @label_stack.pop
+    end
+
+
+    # for debugging
+    def self.hook(callback, methods)
+      methods.each do |m|
+        orig = :"orig_#{m}"
+        alias_method orig, m
+        define_method(m) do
+          val = send(orig)
+          send(callback)
+          val
+        end
+      end
+    end
+
+    # hook(:print_token, [:get_name, :get_newline, :get_number, :get_op, :get_boolean])
+
+  end
+end