diff --git a/asm/asm.rb b/asm/asm.rb index 3465cc9..e7b5f74 100644 --- a/asm/asm.rb +++ b/asm/asm.rb @@ -1,7 +1,7 @@ # Assembler container module. Sub modules are Text and Binary, which # both export the same interface for generating either assembly or # machine code for x86. -# +# # sjs # may 2009 diff --git a/asm/binary.rb b/asm/binary.rb index caa8231..f7c8a79 100644 --- a/asm/binary.rb +++ b/asm/binary.rb @@ -1,7 +1,7 @@ # A very basic x86 assembler library for Ruby. Generally the # instructions implemented are the minimum needed by the compiler this # is written for. x86 is just too big. -# +# # sjs # may 2009 # @@ -23,7 +23,7 @@ module Assembler DEBUG_OUTPUT = false # 0.size gives the real answer, we only do x86-32 though - MachineBytes = 4 + MachineBytes = 4 MachineBits = MachineBytes * 8 MinSigned = -1 * 2**(MachineBits-1) MaxSigned = 2**(MachineBits-1) - 1 @@ -65,7 +65,7 @@ module Assembler @symtab = symtab @objwriter_class = objwriter_class # @objwriter = objwriter - + # Almost a byte array, except for addresses. # # Addresses take the form [:, ] @@ -96,7 +96,7 @@ module Assembler X86_exit[@platform].each {|byte| emit_byte(byte)} byte_array = resolve_labels - + #puts "1st pass: " + byte_array.inspect if DEBUG_OUTPUT binary = package(byte_array) @@ -114,7 +114,7 @@ module Assembler # outline: # - resolve all variable proxies in @proxies replacing # the 4 bytes (0xff) with the real address - + bss_offset = @symtab.bss_offset const_offset = @symtab.const_offset @proxies.each do |i, proxy| @@ -197,7 +197,7 @@ module Assembler def label?(x) x.is_a?(Array) && x[0] == :label end - + # XXX this should probably evaluate the value somehow def defconst(name, bytes, value) @symtab.defconst(name, bytes, value) @@ -247,18 +247,18 @@ module Assembler def asm # stash the current number of bytes written instruction_offset = @ip - + print "0x#{@ip.to_s(16).rjust(4, '0')}\t" if DEBUG_OUTPUT yield - + # return the number of bytes written @ip - instruction_offset - + puts if DEBUG_OUTPUT end - - + + def emit_byte(byte) ##### The joke's on me! Array#pack('c*') already does this. It is nice to see @@ -276,12 +276,12 @@ module Assembler # make sure it's a byte raise "not a byte: #{byte.inspect}" unless byte == byte & 0xff - + byte = byte & 0xff ### end of pointless code - + print (byte >= 0 && byte < 0x10 ? '0' : '') + byte.to_s(16) + ' ' if DEBUG_OUTPUT - + @ir << byte @ip += 1 end @@ -314,7 +314,7 @@ module Assembler def emit_dword(num) num_to_quad(num).each { |byte| emit_byte(byte) } end - + def mklabel(suffix=nil) @symtab.unique_label(suffix) end @@ -372,7 +372,7 @@ module Assembler mod = 1 disp8 = eff_addr.index - # disp32, mod == 10 + # disp32, mod == 10 elsif SignedRange === eff_addr.index mod = 2 disp32 = eff_addr.index @@ -501,7 +501,7 @@ module Assembler def log2(x, tol=1e-13) result = 0.0 - + # Integer part while x < 1 resultp -= 1 @@ -511,7 +511,7 @@ module Assembler result += 1 x /= 2 end - + # Fractional part fp = 1.0 while fp >= tol @@ -521,7 +521,7 @@ module Assembler x /= 2 result += fp end - end + end result end @@ -537,14 +537,14 @@ module Assembler # 6. mov reg8, r/m8 # 7. mov r/m8, imm8 def mov(dest, src) - + # These 2 are used in the same way, just the name differs to make the # meaning clear. They are 4-byte values that are emited at the end if # they are non-nil. Only one of them will be emited, and if both are # non-nil that one is immediate. immediate = nil offset = nil - + # This is an array of arguments to be passed to emit_modrm, if it is set. modrm = nil @@ -580,7 +580,7 @@ module Assembler opcode = 0xc7 modrm = [dest, 0] immediate = src - + # version 5: mov r/m8, r8 elsif rm?(dest, :byte) && register?(src, :byte) opcode = 0x88 @@ -627,12 +627,12 @@ module Assembler emit_byte(immediate_byte) end - end + end end def movzx(dest, src) - + # movzx Gv, ?? if register?(dest) @@ -648,10 +648,10 @@ module Assembler emit_byte(0x0f) emit_byte(opcode) emit_modrm(src, dest.regnum) - end - + end + else - + raise "unimplemented MOVZX instruction, << dest=#{dest.inspect} >> src=#{src.inspect}" end end @@ -700,21 +700,21 @@ module Assembler emit_modrm(dest, 0) emit_dword(src) end - + # add eax, imm32 elsif dest == EAX && immediate?(src) asm do emit_byte(0x05) emit_dword(src) end - + # add reg32, r/m32 elsif register?(dest) && rm?(src) asm do emit_byte(0x03) emit_modrm(src, dest.regnum) end - + else raise "unsupported ADD instruction, dest=#{dest.inspect} src=#{src.inspect}" end @@ -729,7 +729,7 @@ module Assembler emit_modrm(dest, 5) emit_byte(src) end - + # sub r/m32, imm32 elsif rm?(dest) && immediate?(src) asm do @@ -737,7 +737,7 @@ module Assembler emit_modrm(dest, 5) emit_dword(src) end - + # sub r/m32, reg32 elsif rm?(dest) && register?(src) asm do @@ -803,8 +803,8 @@ module Assembler end end end - - + + def dec(op) if register?(op) # dec reg32 @@ -827,7 +827,7 @@ module Assembler emit_modrm(op, 5) emit_byte(n) end - + else raise "unsupported SHR instruction, op=#{op.inspect}, n=#{n.inspect}" end @@ -878,18 +878,18 @@ module Assembler emit_byte(0x31) emit_modrm(dest, src.regnum) end - + else raise "unsupported XOR instruction, dest=#{dest.inspect} src=#{src.inspect}" end end - - + + def not_(op) group3(op, 2, 'NOT') end alias_method :not, :not_ - + def neg(op) group3(op, 3, 'NEG') @@ -900,19 +900,19 @@ module Assembler # push reg32 if register?(op) asm { emit_byte(0x50 + op.regnum) } - + elsif immediate?(op, :byte) asm do emit_byte(0x6a) emit_byte(op) end - + elsif immediate?(op) asm do emit_byte(0x68) emit_dword(op) end - + else raise "unsupported PUSH instruction: op=#{op.inspect}" end @@ -923,7 +923,7 @@ module Assembler # pop reg32 if register?(op) asm { emit_byte(0x58 + op.regnum) } - + else raise "unsupported POP instruction: op=#{op.inspect}" end @@ -937,14 +937,14 @@ module Assembler emit_byte(0x39) emit_modrm(op1, op2.regnum) end - + # cmp eax, imm32 elsif op1 == EAX && immediate?(op2) asm do emit_byte(0x3d) emit_dword(op2) end - + else raise "unsupported CMP instruction: op1=#{op1.inspect} op2=#{op2.inspect}" end @@ -977,7 +977,7 @@ module Assembler # Only Jcc rel32 is supported. def jcc(instruction, label) - opcode = JccOpcodeMap[instruction] + opcode = JccOpcodeMap[instruction] asm do emit_byte(0x0f) emit_byte(opcode) @@ -1025,7 +1025,7 @@ module Assembler unless SignedByte === delta raise "LOOP can only jump -128 to 127 bytes, #{label} is #{delta} bytes away" end - + asm do emit_byte(0xe2) emit_byte(delta) @@ -1035,10 +1035,10 @@ module Assembler # Opcode group #3. 1-byte opcode, 1 operand (r/m8 or r/m32). - # + # # Members of this group are: DIV, IDIV, MUL, IMUL, NEG, NOT, and TEST. def group3(op, reg, instruction) - opcode = + opcode = if rm?(op, 8) 0xf6 elsif rm?(op) @@ -1055,5 +1055,5 @@ module Assembler end # class Binary - + end # module Assembler diff --git a/asm/cstruct.rb b/asm/cstruct.rb index 9db42bc..d0602e7 100644 --- a/asm/cstruct.rb +++ b/asm/cstruct.rb @@ -4,7 +4,7 @@ # subclassed just like any other class. A nice side-effect of this # syntax is that it is always clear that a CStruct is just a class and # instances of the struct are objects. -# +# # Some light metaprogramming is used to make the following syntax possible: # # class MachHeader < CStruct @@ -21,12 +21,12 @@ # uint32 :cmd # uint32 :cmdsize # end -# +# # # inherits cmd and cmdsize as the first 2 fields # class SegmentCommand < LoadCommand # string :segname, 16 # uint32 :vmaddr -# uint32 +# uint32 # end # # Nothing tricky or confusing there. Members of a CStruct class are @@ -56,7 +56,7 @@ class CStruct ################### # Class Constants # ################### - + # Size in bytes. SizeMap = { :int8 => 1, @@ -89,7 +89,7 @@ class CStruct :uint => 'I', :char => 'C' } - + # Only needed when unpacking is different from packing, i.e. strings w/ lambdas in PackMap. UnpackMap = { :string => lambda do |str, *opts| @@ -99,11 +99,11 @@ class CStruct val end } - + ########################## # Class Instance Methods # ########################## - + # Note: const_get and const_set are used so the constants are bound # at runtime, to the real class that has subclassed CStruct. # I figured Ruby would do this but I haven't looked at the @@ -112,31 +112,31 @@ class CStruct # All of this could probably be avoided with Ruby 1.9 and # private class variables. That is definitely something to # experiment with. - + class <, # , and . @@ -13,7 +13,7 @@ module MachO ############### # Mach header # ############### - + # Appears at the beginning of every Mach object file. class MachHeader < CStruct uint32 :magic @@ -22,7 +22,7 @@ module MachO uint32 :filetype uint32 :ncmds uint32 :sizeofcmds - uint32 :flags + uint32 :flags end # Values for the magic field. @@ -40,13 +40,13 @@ module MachO MH_BUNDLE = 0x8 MH_DYLIB_STUB = 0x9 MH_DSYM = 0xa - + # CPU types and subtypes (only Intel for now). CPU_TYPE_X86 = 7 - CPU_TYPE_I386 = CPU_TYPE_X86 + CPU_TYPE_I386 = CPU_TYPE_X86 CPU_SUBTYPE_X86_ALL = 3 - - + + ############################ # Load commands / segments # ############################ @@ -61,7 +61,7 @@ module MachO LC_SYMTAB = 0x2 LC_SYMSEG = 0x3 LC_THREAD = 0x4 - LC_UNIXTHREAD = 0x5 + LC_UNIXTHREAD = 0x5 class SegmentCommand < LoadCommand string :segname, 16 @@ -74,7 +74,7 @@ module MachO uint32 :nsects uint32 :flags end - + # Values for protection fields, maxprot and initprot. VM_PROT_NONE = 0x00 @@ -91,18 +91,18 @@ module MachO uint32 :stroff # Offset of the string table. uint32 :strsize # Size of the string table in bytes. end - + LoadCommandStructMap = { LC_SEGMENT => SegmentCommand, LC_SYMTAB => SymtabCommand } - + ############ # Sections # ############ - + class Section < CStruct string :sectname, 16 string :segname, 16 @@ -116,18 +116,18 @@ module MachO uint32 :reserved1 uint32 :reserved2 end - + # Values for the type bitfield (mask 0x000000ff) of the flags field. # (incomplete!) S_REGULAR = 0x0 S_ZEROFILL = 0x1 S_CSTRING_LITERALS = 0x2 - + ########################### # Relocation info support # ########################### - + class RelocationInfo < CStruct int32 :r_address # offset in the section to what is being relocated uint32 :r_info @@ -149,12 +149,12 @@ module MachO # Relocation types (r_type) GENERIC_RELOC_VANILLA = 0 - + ######################## # Symbol table support # ######################## - - + + # Nlist is used to describe symbols. class Nlist < CStruct uint32 :n_strx # Index into string table. Index of zero is the empty string. @@ -163,7 +163,7 @@ module MachO uint16 :n_desc # TODO See . uint32 :n_value # The symbol's value (or stab offset). end - + # Type flag (see for more details) # --------- # @@ -178,13 +178,13 @@ module MachO N_PEXT = 0x10 # private external symbol bit N_TYPE = 0x0e # mask for the type bits N_EXT = 0x01 # external symbol bit, set for external symbols (e.g. globals) - + # Values for N_TYPE. (incomplete!) N_UNDF = 0x0 # undefined, n_sect == NO_SECT N_ABS = 0x2 # absolute, n_sect == NO_SECT N_SECT = 0xe # defined in section number n_sect - + NO_SECT = 0 MAX_SECT = 255 - + end diff --git a/asm/machofile.rb b/asm/machofile.rb index 6064b7e..68cc73b 100644 --- a/asm/machofile.rb +++ b/asm/machofile.rb @@ -1,18 +1,18 @@ require 'asm/macho' module Assembler - + class MachOFile - + include MachO - + attr_accessor :header, :load_commands, :sections, :data attr_accessor :current_segment - + def initialize(filetype=MH_OBJECT) @header = MachHeader.new(MH_MAGIC, CPU_TYPE_X86, CPU_SUBTYPE_X86_ALL, filetype, 0, 0, 0) @load_commands = [] # All defined segments. - @sections = {} # Map of segment names to lists of sections. + @sections = {} # Map of segment names to lists of sections. @section_disk_size = Hash.new(0) # Sections store their VM size so we need their sizes on disk. @section_offset = 0 # Offset of the next section's data, in bytes. @data = [] # Blobs of data that appear at the end of the file. @@ -38,28 +38,28 @@ module Assembler def load_command(cmdtype) struct = LoadCommandStructMap[cmdtype] unless struct - raise "unsupported load command type: #{cmdtype.inspect}," + + raise "unsupported load command type: #{cmdtype.inspect}," + " supported types: #{LoadCommandStructMap.keys.sort.inspect}" end - + # Fill in all the unknown fields with 0, this is nonsense for # string fields but that doesn't really matter. dummy_vals = [0] * (struct::Members.size - 2) - + # cmd cmdsize ... command = struct.new(cmdtype, struct.bytesize, *dummy_vals) - + @load_commands << command - + @header[:ncmds] += 1 @header[:sizeofcmds] += command.bytesize - + yield(command) if block_given? - + return command end - + # Define a segment in this file. If a block is given it is passed # the new segment. You can chain calls to segment, it returns self. # @@ -84,25 +84,25 @@ module Assembler # them in the segment that they name. def section(name, segname, data='', vmsize=data.size, segment=@current_segment, type=S_REGULAR) - + # Create the new section. section = Section.new(name, segname, @section_offset, vmsize, 0, 0, 0, 0, 0, 0, type) - + # Add this section to the map of segment names to sections. (@sections[segment[:segname]] ||= []) << section @section_disk_size[name] = data.size @section_offset += data.size @data << data if data.size > 0 - + # Update the header. @header[:sizeofcmds] += section.bytesize - + # Update the segment. segment[:nsects] += 1 segment[:cmdsize] += section.bytesize yield(section) if block_given? - + return section end @@ -119,7 +119,7 @@ module Assembler # # For MH_EXECUTE files the text section goes under the segment with the # name given (__TEXT). - + def text(data, sectname='__text', segname='__TEXT') real_segname = nil unless @current_segment @@ -129,7 +129,7 @@ module Assembler seg[:initprot] = VM_PROT_READ | VM_PROT_EXECUTE end end - + section(sectname, segname, data) do |sect| # reloff and nreloc are calculated later (in calculate_offsets) sect[:flags] = 0x400 # S_ATTR_SOME_INSTRUCTIONS @@ -139,7 +139,7 @@ module Assembler @text_segname = real_segname || segname @text_sect_index = @sections[@text_segname].length-1 @text_data_index = @data.length-1 - + return self end @@ -203,14 +203,14 @@ module Assembler @reloc_info = reloc_info.map {|x| x.clone} return self end - + # Define a symbol table. This should usually be placed at the end of the # file. # # This function is overloaded to accept either an array of Nlist structs # packed into a byte string (i.e. a C array) and a string table, or a # single parameter: any type of Symtab. - + def symtab(nlist_ary_or_symtab, stab=nil) if stab.nil? symtab = nlist_ary_or_symtab @@ -219,19 +219,19 @@ module Assembler else nlist_ary = nlist_ary_or_symtab end - + load_command(LC_SYMTAB) do |st| st[:nsyms] = nlist_ary.size st[:strsize] = stab.size # symoff and stroff are filled in when offsets are recalculated. end - + # puts ">>> Defining symbol table:" # puts ">>> #{nlist_ary.size} symbols" # puts ">>> stab = #{stab.inspect}" # puts ">>> nlist_ary = #{nlist_ary.inspect}" # puts ">>> (serialized) = #{nlist_ary.map{|n|n.serialize}.join.inspect}" - + @data << nlist_ary.map {|n| n.serialize}.join @data << stab return self @@ -240,11 +240,11 @@ module Assembler # Serialize the entire MachO file into a byte string. This is simple # thanks to CStruct#serialize. - + def serialize # TODO sanity checks, e.g. assert(@header[:ncmds] == @load_command.size) # ... perhaps an option to recalculate such data as well. - + # Now that we have all the pieces of the file defined we can calculate # the file offsets of segments and sections. calculate_offsets @@ -258,7 +258,7 @@ module Assembler # Mach-O file Part 2: Load Commands # ##################################### # dump each load command (which include the section headers under them) - @load_commands.map do |cmd| + @load_commands.map do |cmd| sects = @sections[cmd[:segname]] rescue [] sects.inject(cmd.serialize) do |data, sect| data + sect.serialize @@ -271,19 +271,19 @@ module Assembler @data.join end - + # Update the file offsets in segments and sections. - + def calculate_offsets # Maintain the offset into the the file on disk. This is used # to update the various structures. offset = @header.bytesize - + # First pass over load commands. Most sizes are filled in here. @load_commands.each do |cmd| case cmd[:cmd] - + when LC_SEGMENT seg = cmd sections = @sections[seg[:segname]] @@ -292,25 +292,25 @@ module Assembler section_disk_size = sections.inject(0) do |total, sect| total + @section_disk_size[sect[:sectname]] end - + ### TODO this should be redundant. try commenting it out one day. seg[:nsects] = sections.size seg[:cmdsize] = seg.bytesize + section_size ### - + seg[:vmsize] = section_vm_size seg[:filesize] = section_disk_size - + when LC_SYMTAB # nop - + else raise "unsupported load command: #{cmd.inspect}" end offset += cmd[:cmdsize] end - + # offset now points to the end of the Mach-O headers, or the beginning # of the binary blobs of section data at the end. @@ -318,16 +318,16 @@ module Assembler # Second pass over load commands. Fill in file offsets. @load_commands.each do |cmd| case cmd[:cmd] - + when LC_SEGMENT seg = cmd - sections = @sections[seg[:segname]] + sections = @sections[seg[:segname]] seg[:fileoff] = offset sections.each do |sect| sect[:offset] = offset offset += @section_disk_size[sect[:sectname]] end - + when LC_SYMTAB if @reloc_info # update text section with relocation info @@ -344,14 +344,14 @@ module Assembler # No else clause is necessary, the first iteration should have caught them. - + end - + end # @load_commands.each end # def calculate_offsets - - + + ####### private ####### @@ -366,8 +366,8 @@ module Assembler raise "unsupported MachO file type: #{@header.inspect}" end end - - + + end # class MachOFile - + end # module Assembler diff --git a/asm/machosym.rb b/asm/machosym.rb index 6f70e83..7939e69 100644 --- a/asm/machosym.rb +++ b/asm/machosym.rb @@ -1,11 +1,11 @@ require 'asm/macho' module Assembler - + class MachOSym - + attr_accessor :name, :type, :segnum, :desc, :value - + def initialize(name, type, segnum, desc, value) @name = name @type = type @@ -13,17 +13,17 @@ module Assembler @desc = desc @value = value end - - + + def to_nlist(strx) MachO::Nlist.new(strx, @type, @segnum, @desc, @value) end - - + + def to_s @name end - + end - + end \ No newline at end of file diff --git a/asm/machosymtab.rb b/asm/machosymtab.rb index 329ec38..66b1e3a 100644 --- a/asm/machosymtab.rb +++ b/asm/machosymtab.rb @@ -3,9 +3,9 @@ require 'asm/machosym' require 'asm/symtab' module Assembler - + class MachOSymtab < Symtab - + include MachO def make_symbols(vars, base_addr, type, segnum) @@ -23,7 +23,7 @@ module Assembler # - All labels are exported. This should be changed and only functions exported! section = 1 - + # Functions (section #1, __text) symbols = make_symbols(@labels, text_offset, N_SECT | N_EXT, section) section += 1 @@ -46,7 +46,7 @@ module Assembler def bss_section @consts.size > 0 ? 3 : 2 end - + def nlist_ary symbols = {} strx = 1 @@ -61,7 +61,7 @@ module Assembler end return ary end - + def stab # The empty strings result in a string that begins and ends with a null byte ['', all_symbols, ''].flatten.map { |sym| sym.to_s }.join("\0") @@ -84,5 +84,5 @@ module Assembler end end - + end diff --git a/asm/machowriter.rb b/asm/machowriter.rb index 9ecf83c..202aac7 100644 --- a/asm/machowriter.rb +++ b/asm/machowriter.rb @@ -1,4 +1,4 @@ - + ### XXX development hack! def stub_symtab! text_segnum = 1 @@ -8,11 +8,11 @@ ['_main', N_SECT | N_EXT, text_segunm, 0x0] ] } - + nlist_ary = [] stab = "\0" strx = 1 # string index (1-based) - + symtab[:functions].each do |name, type, segnum, addr| nlist_ary << MachO::Nlist.new(strx, type, segnum, 0, addr) stab << "#{name}\0" @@ -20,7 +20,7 @@ end symtab(nlist_ary, stab) end - + end - + end \ No newline at end of file diff --git a/asm/objwriter.rb b/asm/objwriter.rb index 485bd10..2fd6037 100644 --- a/asm/objwriter.rb +++ b/asm/objwriter.rb @@ -1,11 +1,11 @@ module Assembler - + class UnimplementedMethodError < RuntimeError; end - - + + # Abstract base class. class ObjWriter - + def write!(filename) File.open(filename, 'wb') do |file| file.print(serialize) @@ -15,12 +15,12 @@ module Assembler def fail(name) raise UnimplementedMethodError, name end - + # These methods must be defined for most uses of the library. %w[header segment section text data bss symtab serialize].each do |name| define_method(name) { fail(name) } end - + end - + end \ No newline at end of file diff --git a/asm/registers.rb b/asm/registers.rb index fcef603..33e9f5f 100644 --- a/asm/registers.rb +++ b/asm/registers.rb @@ -1,9 +1,9 @@ require 'asm/regproxy' module Assembler - + module Registers - + # This structure allows for x86 registers of all sizes. The # number of the register is the index of the array in which it was # found. The size of a register in bytes is 2 ** index-into-sub-array. @@ -25,8 +25,8 @@ module Assembler const_set(name, RegisterProxy.new(reg, 8 * (2 ** i), regnum)) end end - - + + end - + end \ No newline at end of file diff --git a/asm/regproxy.rb b/asm/regproxy.rb index e7ac6b0..0b0a8d4 100644 --- a/asm/regproxy.rb +++ b/asm/regproxy.rb @@ -1,5 +1,5 @@ module Assembler - + # Acts like a register and can be used as the base or index in an # effective address. # @@ -9,7 +9,7 @@ module Assembler attr_reader :name, :size, :regnum attr_reader :base, :index, :scale - + def initialize(name, size, regnum) @name = name # attrs are read-only so sharing is ok @size = size @@ -17,7 +17,7 @@ module Assembler @base = self end - + def +(index) raise "index already specified" if @index new_reg = self.clone @@ -25,7 +25,7 @@ module Assembler new_reg end - + def *(scale) raise "index must come first" unless @index raise "scale already specified" if scale @@ -39,7 +39,7 @@ module Assembler @scale end - + def index? @index end @@ -50,18 +50,18 @@ module Assembler end - + def to_s @name.to_s + (@index ? "+#{@index}" : '') + (@scale ? "*#{@scale}" : '') end - - + + def inspect to_s end - + end end \ No newline at end of file diff --git a/asm/symtab.rb b/asm/symtab.rb index da095d0..1633d0e 100644 --- a/asm/symtab.rb +++ b/asm/symtab.rb @@ -9,15 +9,15 @@ module Assembler # things will actually live in memory. class Symtab - + attr_accessor :text_offset, :bss_offset, :const_offset attr_reader :const_data, :const_size, :bss_size, :reloc_info - + def initialize @vars = {} # Map of variable names to offsets. (bss vars) @consts = {} # Map of constant names to offsets. @funcs = {} # map of function names to offsets. - + # Initial data to load into memory (data for __DATA segment). @const_data = '' @@ -36,7 +36,7 @@ module Assembler @bss_offset = 0 @const_offset = 0 end - + # Generate a unique label. def unique_label(suffix=nil) @num_labels += 1 @@ -52,19 +52,19 @@ module Assembler @labels[name] = offset return name end - + def lookup_label(name) @labels[name] end - - + + def defvar(name, bytes) @vars[name] = @bss_size @bss_size += bytes end - + def defconst(name, value, bytes) @consts[name] = @const_size @const_size += bytes @@ -76,15 +76,15 @@ module Assembler @funcs[name] = offset end - + def var(name) @vars[name] end - + def var?(name) @vars.has_key?(name) end - + def const(name) @consts[name] end @@ -94,5 +94,5 @@ module Assembler end end - + end diff --git a/asm/text.rb b/asm/text.rb index 474087e..5f65c16 100644 --- a/asm/text.rb +++ b/asm/text.rb @@ -1,5 +1,5 @@ # A subset of x86 assembly. -# +# # sjs # may 2009 @@ -174,7 +174,7 @@ module Assembler def int(num) emit("int 0x#{num.to_s(16)}") end - + def cdq emit("cdq") end diff --git a/asm/varproxy.rb b/asm/varproxy.rb index 6f25c0d..50cfa51 100644 --- a/asm/varproxy.rb +++ b/asm/varproxy.rb @@ -1,5 +1,5 @@ module Assembler - + # Wrap a variable's address so that we can perform arithmetic on it # before resolving it when we know where things will go in memory. # All we do is catch arithmetic ops and then provide a means to @@ -10,7 +10,7 @@ module Assembler attr_reader :name attr_accessor :ops - + def initialize(name, const=false) @name = name @const = const @@ -31,7 +31,7 @@ module Assembler addr.send(*op) end end - + def const? @const end diff --git a/compiler.rb b/compiler.rb index c21beed..3ca04f0 100644 --- a/compiler.rb +++ b/compiler.rb @@ -18,7 +18,7 @@ class ParseError < StandardError end class Compiler - + include Assembler::Registers Keywords = { @@ -280,7 +280,7 @@ class Compiler false_label = asm.mklabel(:false) truthy_label = asm.mklabel(:truthy) done_label = asm.mklabel(:done) - + asm.cmp(EAX, FALSE) asm.jne(truthy_label) @@ -306,7 +306,7 @@ class Compiler expected('&&') unless match_word('&&') false_label = asm.mklabel(:false) done_label = asm.mklabel(:done) - + asm.cmp(EAX, FALSE) asm.je(false_label) @@ -382,7 +382,7 @@ class Compiler # a: # b: eax - # + # # If b - a is zero then a = b, and make_boolean will leave the zero # to effectively return false. If b - a is non-zero then a != b, # and make_boolean will leave -1 (true) for us in eax. @@ -409,8 +409,8 @@ class Compiler # the assembler needed to implement, but since the Jcc # instructions are very cheap to implement this is no longer # a concern. - - + + # The next 4 relations all compare 2 values a and b, then return # true (-1) if the difference was below zero and false (0) # otherwise (using JL, jump if less than). @@ -464,7 +464,7 @@ class Compiler # a: # b: eax - # + # # if a <= b then !(a > b) def le_relation # Compare them as in greater than but invert the result. @@ -506,7 +506,7 @@ class Compiler end @indent -= 1 end - + # Parse an if-else statement. def if_else_stmt else_label = asm.mklabel(:end_or_else) @@ -528,7 +528,7 @@ class Compiler end # Used to implement the Two-Label-Loops (while, until, repeat). - # + # # name: Name of the loop for readable labels. # block: Code to execute at the start of each iteration. (e.g. a # condition) @@ -667,7 +667,7 @@ class Compiler # 12 bytes: 2 for "0x", 8 hex digits, 2 for newline + null terminator hex = asm.var!(h, 12) - + asm.block do # TODO check sign and prepend '-' if negative mov([hex], 0x7830) # "0x" ==> 48, 120 @@ -757,10 +757,10 @@ class Compiler @look = if @input.eof? nil else - @input.readbyte.chr + @input.readbyte.chr end end - + # Report error and halt def abort(msg) raise ParseError, msg @@ -777,7 +777,7 @@ class Compiler raise ParseError.new(caller, context), "Expected #{what} but got #{got}." end end - + # Recognize an alphabetical character.