From 3f070cd0db5f34210e3c3630eea86ea2628aa51f Mon Sep 17 00:00:00 2001 From: sjs Date: Tue, 19 May 2009 17:01:14 -0700 Subject: [PATCH 1/7] [NEW] Emit x86 code for the mov instruction. Barely works 1/2 the time. The supporting infrastructure includes a C program for reading a binary blob of x86 code and wrapping it in an ELF executable for Linux x86. Unsure about getting the data for other sections of the binary besides .text. --- Makefile | 12 +- compiler.rb | 357 ++++++++++++++++++++++++++++++++++++++++++++++----- elfwriter.c | 266 ++++++++++++++++++++++++++++++++++++++ lea.asm | 12 ++ min.asm | 4 + mov.asm | 89 +++++++++++++ opcode.rb | 25 ++++ prologue.asm | 1 + template.asm | 1 + test.code | 2 +- test.rb | 5 +- 11 files changed, 737 insertions(+), 37 deletions(-) create mode 100644 elfwriter.c create mode 100644 lea.asm create mode 100644 min.asm create mode 100644 mov.asm create mode 100644 opcode.rb diff --git a/Makefile b/Makefile index 37fc257..3c797fb 100644 --- a/Makefile +++ b/Makefile @@ -5,8 +5,18 @@ build: test.rb test.code # $? indicates success as per unix convention ./test +elfwriter: elfwriter.c + gcc -o elfwriter elfwriter.c -lelf + +test_elf: elfwriter build + ./elfwriter test.bin 4 test_elf.o + ld -o test_elf test_elf.o + ./test_elf + clean: @rm -f test.o @rm -f test @rm -f test.asm - + @rm -f elfwriter + @rm -f test_elf.o + @rm -f test_elf diff --git a/compiler.rb b/compiler.rb index 6c582f8..a714db5 100644 --- a/compiler.rb +++ b/compiler.rb @@ -6,6 +6,9 @@ # sjs # may 2009 +require 'opcode' +#require 'assembler' + class ParseError < StandardError attr_reader :caller, :context def initialize(caller, context=nil) @@ -15,24 +18,47 @@ class ParseError < StandardError end class Compiler - attr_reader :data, :bss, :code +# include Assembler - def initialize(input=STDIN) - @look = '' # next lookahead char - @input = input # stream to read from - @data = '' # data section - @bss = '' # bss section - @code = '' # code section - @vars = {} # symbol table - @num_labels = 0 # used to generate unique labels - @num_labels_with_suffix = Hash.new(0) - @num_conditions = 0 - @break_stack = [] # for breaking out of loops + attr_reader :data, :bss, :code - # reserved words (... constant?) - # - # if, else, end, while, until, repeat, break - @keywords = %w[i l e w u r b] + def initialize(input=STDIN) + @look = '' # Next lookahead char. + @input = input # Stream to read from. + @data = '' # Data section. + @bss = '' # BSS section. + @code = '' # Code section. + @vars = {} # Symbol table, maps names to locations in BSS. + @num_labels = 0 # Used to generate unique labels. + @num_labels_with_suffix = Hash.new(0) + @break_stack = [] # for breaking out of loops + @binary = [] # Byte array of machine code. + @machine_code = '' # Byte string of machine code. + + @header_size = 0x100 # ELF, Linux, x86 + @text_offset = 0x08048000 + @header_size # Offset of text section in memory (Linux, x86). + @text_size = 0x02be00 # Size of text section. + @data_offset = @text_offset + @text_size # Offset of data section. + @data_size = 0x4e00 # Size of data section. + @bss_offset = @data_offset + @data_size # Offset of bss section. + @bss_size = 0 # Size of bss section. + + # Labels for the assembler. Maps names to locations. + @labels = Hash.new {|h, key| raise "undefined label: #{key}"} + + # Dispatch table for keywords. + @dispatch = { + 'b' => method(:break_stmt), # break + 'e' => nil, # end + 'l' => nil, # else + 'i' => method(:if_else_stmt), # if-else + 'r' => method(:repeat_stmt), # repeat + 'u' => method(:until_stmt), # until + 'w' => method(:while_stmt) # while + } + + # Reserved words (... constant?) + @keywords = @dispatch.keys # seed the lexer get_char @@ -41,7 +67,8 @@ class Compiler def parse block expected(:'end of file') unless eof? - [@data, @bss, @code] + compile + [@data, @bss, @code, @machine_code] end @@ -135,17 +162,8 @@ class Compiler # Parse a statement. def statement - case @look - when 'i' - if_else_stmt - when 'w' - while_stmt - when 'u' - until_stmt - when 'r' - repeat_stmt - when 'b' - break_stmt + if handler = @dispatch[@look] + handler.call else assignment newline @@ -234,8 +252,6 @@ class Compiler # Evaluates any expression for now. There are no boolean operators. def condition - # @num_conditions += 1 - # emit("") expression x86_cmp(:eax, 0) # 0 is false, anything else is true skip_whitespace @@ -394,7 +410,7 @@ class Compiler # Get a number. def get_num expected(:integer) unless digit?(@look) - many(method(:digit?)) + many(method(:digit?)).to_i end # Skip leading whitespace. @@ -417,7 +433,8 @@ class Compiler def var(name, dwords=1) unless @vars[name] @bss << "#{name}: resd #{dwords}\n" - @vars[name] = name + @vars[name] = @bss_size + @bss_size += dwords # else # raise ParseError, "identifier #{name} redefined" end @@ -431,6 +448,12 @@ class Compiler def emit_label(name=unique_label) emit("#{name}:", :tab => nil) + + @labels[name] = @binary.length + end + + def resolve_label(label) + @labels[label] end # Generate a unique label. @@ -444,12 +467,194 @@ class Compiler end - # Some asm methods for convenience and arity checks. + # x86 machine code generation + def emit_byte(byte) + @binary << byte + end + + def emit_dword(num) + @binary += num_to_quad(num) + end + + # 0-2: r/m + # 3-5: reg/opcode + # 6-7: mod + # + # dest and src are tuples of the form [type, value] where type is + # any of :reg, :rm32, :imm32. Max _one_ :rm32 arg per call. + def emit_modrm(dest, src, override) + if dest[0] == :reg + reg = override[:op] || regnum(dest[1]) + + # mod == 11 (register content) + if src[0] == :reg + mod = 3 + rm = regnum(src[1]) + + # mod == 00 (pointer) + elsif src[0] == :rm32 + mod = 0 + parts = decode_addr(src[1]) + rm = case parts[0] + # mod == 00 (direct pointer e.g. [eax]) + when :reg + regnum(parts[1]) + when :sib + sib = parts[1..-1] + 4 + when :disp + disp = parts[1] + 5 + end + end + elsif src[0] == :reg + reg = override[:op] || regnum(src[1]) + else + raise "unsupported mod r/m byte! dest=#{dest} src=#{src}" + end + emit_byte((mod << 6) & (reg << 3) & rm) + emit_sib(sib) if defined? sib + emit_dword(disp) if defined? disp + end + + def emit_sib(sib) + scale, index, base = *sib + if [1,2,4,8].include?(scale) + scale = log2(scale) + else + raise "unsupported SIB scale: #{scale}, should be [1, 2, 4, 8]" + end + emit_byte((scale << 6) & (index << 3) & base) + end + + def compile + @machine_code = @binary.pack('c*') + end + + + # Some asm methods for convenience and arity checks. Now emits + # some real machine code too. + + # This is the full set of x86 registers. + # Registers = [:eax, :ecx, :edx, :ebx, :esp, :ebp, :esi, :edi] + + # This will do for early work. + # Position indicates value in op codes. + Registers = [:eax, :ecx, :edx, :ebx] + + # Regex to match any x86 register name, and then some. Should be + # sufficient. + RegisterRegex = 'e[acdbsd][xip]' + + # Match a literal number in binary, octal, decimal, or hex + NumberRegex = '(0[xXbB]?)?[0-9a-fA-F]+' + + # Match a variable name. + NameRegex = '[a-zA-Z][a-zA-Z0-9]*' + + # 0.size gives the real answer, we only do x86 though + MachineBytes = 4 + MachineBits = MachineBytes * 8 + MinSigned = -1 * 2**(MachineBits-1) + MaxSigned = 2**(MachineBits-1) - 1 + MinUnsigned = 0 + MaxUnsigned = 2**MachineBits - 1 + SignedRange = MinSigned..MaxSigned + + # assemble x86 machine code + def asm + # stash the current number of bytes written + instruction_offset = @binary.length + + yield + + # return the number of bytes written + @binary.length - instruction_offset + end + + def register?(op) + Registers.index(op) + end + alias_method :regnum, :register? + + def immediate?(op) + op.is_a?(Numeric) || (op.is_a?(String) && op.match(/^#{NumberRegex}$/)) + end + + def rm32?(op) + offset?(op) || op.respond_to?(:match) && op.match(/^ + \[ + #{RegisterRegex} # base register + (\+#{RegisterRegex} # optional index register + (\*[1248])? # optional scale + )? + \] + $/x) + end + + # 6 versions of the mov instruction are supported: + # 1. mov reg32, immediate32 (0xb8+destreg, imm32) + # 2. mov reg32, r/m32 (0x8b, mod r/m, maybe sib) + # 2a. mov eax, memoffset32 (0xa1, disp32) + # 3. mov r/m32, reg32 (0x89, mod r/m, maybe sib) + # 3a. mov memoffset32, eax (0xa3, disp32) + # 4. mov r/m32, immediate32 (0xc7, mod r/m, maybe sib, imm32) def x86_mov(dest, src) emit("mov #{dest}, #{src}") + + dest = dest[6..-1] if dest.is_a?(String) && dest[0..5] == 'dword ' + src = src[6..-1] if src.is_a?(String) && src[0..5] == 'dword ' + + asm do + + # version 1: mov r32, imm32 + if register?(dest) && immediate?(src) + emit_byte(0xb8 + regnum(dest)) # dest encoded in instruction + emit_dword(parse_num(src)) + + # version 2: mov r32, r/m32 + elsif register?(dest) && rm32?(src) + # version 2a: mov eax, moffs32 + if dest == :eax && offset?(src) + emit_byte(0xa1) + num = decode_addr(src)[1] + emit_dword(num) + else + emit_byte(0x8b) + emit_modrm([:reg, dest], [:rm32, src]) + end + + # version 3: mov r/m32, r32 + elsif rm32?(dest) && register?(src) + # version 3a: mov moffs32, eax + if offset?(dest) && src == :eax + emit_byte(0xa3) + num = decode_addr(dest)[1] + emit_dword(num) + else + emit_byte(0x89) + emit_modrm([:rm32, dest], [:reg, src]) + end + + # version 4: mov r/m32, imm32 + elsif rm32?(dest) && immediate?(src) + emit_byte(0xc7) + emit_modrm([:rm32, dest], [:imm32, src], :op => 0) + else + puts "rm32?(dest): #{rm32?(dest)}\t\trm32?(src): #{rm32?(src)}" + puts "register?(dest): #{register?(dest)}\t\tregister?(src): #{register?(src)}" + puts "immediate?(dest): #{immediate?(dest)}\t\timmediate?(src): #{immediate?(src)}" + puts "offset?(dest): #{offset?(dest)}\t\toffset?(src): #{offset?(src)}" + #raise "unsupported mov format: mov #{dest}, #{src}" + puts "!!! unsupported mov format: mov #{dest}, #{src}" + end + + end # asm do + end + def x86_add(dest, src) emit("add #{dest}, #{src}") end @@ -501,4 +706,90 @@ class Compiler def x86_cmp(a, b) emit("cmp #{a}, #{b}") end + + + def offset?(addr) + addr.respond_to?(:match) && addr.match(/^\[(#{NameRegex}|#{NumberRegex})\]$/) + end + + def decode_addr(addr) + addr = addr[1..-2] # strip brackets + + if matches = addr.match(/^#{NameRegex}$/) + unless loc = @vars[matches[0]] + raise "undefined variable #{matches[0]}" + end + [:disp, @bss_offset + loc] + elsif matches = addr.match(/^#{NumberRegex}$/) + [:disp, parse_num(matches[0])] + elsif addr.index('*') + bi, scale = *addr.split('*') + base, index = *bi.split('+') + [:sib, scale.to_i, index.to_sym, base.to_sym] + elsif addr.index('+') + base, index = *addr.split('+') + [:sib, 1, index.to_sym, base.to_sym] + else + [:reg, addr.to_sym] + end + end + + # Parse a number from a string. Used by emit_dword. + def parse_num(str) + # If it's not a string it's a number, just return it. + return str unless str.is_a?(String) + + str.downcase! + base = 10 # default to base 10 + if str[0, 1] == '0' + base = case str[1, 1] + when 'x' + 16 + when 'b' + str.slice!(2..-1) + 2 + else + 8 + end + end + str.to_i(base) + end + + # Convert a number to a quad of bytes, discarding excess bits. + # Little endian! + def num_to_quad(num) + [ + num & 0xff, + (num >> 8) & 0xff, + (num >> 16) & 0xff, + (num >> 24) & 0xff + ] + end + + def log2(x, tol=1e-13) + result = 0.0 + + # Integer part + while x < 1 + resultp -= 1 + x *= 2 + end + while x >= 2 + result += 1 + x /= 2 + end + + # Fractional part + fp = 1.0 + while fp >= tol + fp /= 2 + x *= x + if x >= 2 + x /= 2 + result += fp + end + end + result + end + end diff --git a/elfwriter.c b/elfwriter.c new file mode 100644 index 0000000..8955b78 --- /dev/null +++ b/elfwriter.c @@ -0,0 +1,266 @@ +#include +#include +#include +#include +#include +#include +#include + +/* _exit(0) */ +/* uint8_t shell_code[] = { */ +/* 0xbb, 0, 0, 0, 0, /\* mov ebx, 0 *\/ */ +/* 0xb8, 1, 0, 0, 0, /\* mov eax, 1 *\/ */ +/* 0xcd, 0x80 /\* int 0x80 *\/ */ +/* }; */ + +/* uint32_t hash_words[] = { */ +/* 0x12345678, */ +/* 0xdeadc0de, */ +/* 0x1234abcd */ +/* }; */ + +#define header_size 0x100 +#define text_addr 0x8048000 + header_size +#define text_size 0x02be00 +#define data_addr text_addr + text_size +#define data_size 0x4e00 +#define bss_addr data_addr + data_size +size_t bss_size = 0; + +char string_table[] = { + /* Offset 0 */ '\0', + /* Offset 1 */ '.', 't', 'e', 'x', 't', '\0' , + /* Offset 7 */ '.', 'b', 's', 's', '\0', + /* Offset 12 */ '.', 's', 'h', 's', 't', 'r', 't', 'a', 'b', '\0' +}; + + +/* Write a static 32-bit x86 ELF binary to filename. The file is + * clobbered without confirmation! + */ +int +elf_write(const char *filename, uint8_t *code, size_t code_size) +{ + int fd; + size_t shstrndx; + Elf *elf; + Elf_Scn *scn; + Elf_Data *data; + Elf32_Ehdr *ehdr; + Elf32_Phdr *phdr, *load; + Elf32_Shdr *shdr; + + if (elf_version(EV_CURRENT) == EV_NONE) { + printf("Failed to initialize ELF library!\n"); + return -1; + } + if ((fd = open(filename, O_RDWR|O_TRUNC|O_CREAT, 0666)) < 0) { + printf("Can't open %s for writing.\n", filename); + perror("[elf_write]"); + return -2; + } + if ((elf = elf_begin(fd, ELF_C_WRITE, (Elf *)0)) == 0) { + printf("elf_begin failed!\n"); + return -3; + } + + + /************** + * ELF Header * + **************/ + + if ((ehdr = elf32_newehdr(elf)) == NULL) { + printf("elf32_newehdr failed!\n"); + return -4; + } + ehdr->e_ident[EI_DATA] = ELFDATA2LSB; /* 2's complement, little endian */ + ehdr->e_type = ET_EXEC; + ehdr->e_machine = EM_386; /* x86 */ + + /* Image starts at 0x8048000, x86 32-bit abi. We need a bit + * of room for headers and such. TODO figure out how much + * room is needed! + * + * Current entry point is .text section. + */ + ehdr->e_entry = text_addr; + + + /******************* + * Program Headers * + *******************/ + + if ((phdr = elf32_newphdr(elf, 2)) == NULL) { + printf("elf32_newphdr failed!\n"); + return -5; + } + load = phdr+1; + + + /***************** + * .text section * + *****************/ + + if ((scn = elf_newscn(elf)) == NULL) { + printf("elf_newscn failed!\n"); + return -6; + } + if ((data = elf_newdata(scn)) == NULL) { + printf("elf_newdata failed!\n"); + return -7; + } + data->d_align = 16; + data->d_buf = code; + data->d_off = 0LL; + data->d_type = ELF_T_BYTE; + data->d_size = code_size; + data->d_version = EV_CURRENT; + + if ((shdr = elf32_getshdr(scn)) == NULL) { + printf("elf32_getshdr failed!\n"); + return -8; + } + shdr->sh_name = 1; + shdr->sh_type = SHT_PROGBITS; + shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + shdr->sh_addr = text_addr; + + + /**************** + * .bss section * + ****************/ + + if ((scn = elf_newscn(elf)) == NULL) { + printf("elf_newscn failed!\n"); + return -6; + } + if ((data = elf_newdata(scn)) == NULL) { + printf("elf_newdata failed!\n"); + return -7; + } + data->d_align = 4; + data->d_off = 0LL; + data->d_type = ELF_T_BYTE; + data->d_size = bss_size; + data->d_version = EV_CURRENT; + + if ((shdr = elf32_getshdr(scn)) == NULL) { + printf("elf32_getshdr failed!\n"); + return -8; + } + shdr->sh_name = 7; + shdr->sh_type = SHT_NOBITS; + shdr->sh_flags = SHF_WRITE | SHF_ALLOC; + shdr->sh_addr = bss_addr; + + + /******************************* + * section header string table * + *******************************/ + + if ((scn = elf_newscn(elf)) == NULL) { + printf("elf_newscn failed!\n"); + return -9; + } + if ((data = elf_newdata(scn)) == NULL) { + printf("elf_newdata failed!\n"); + return -10; + } + data->d_align = 1; + data->d_buf = string_table; + data->d_off = 0LL; + data->d_type = ELF_T_BYTE; + data->d_size = sizeof(string_table); + data->d_version = EV_CURRENT; + + if ((shdr = elf32_getshdr(scn)) == NULL) { + printf("elf32_getshdr failed!\n"); + return -11; + } + shdr->sh_name = 12; + shdr->sh_type = SHT_STRTAB; + shdr->sh_flags = SHF_STRINGS | SHF_ALLOC; + shdr->sh_entsize = 0; + + + /* int elf_setshstrndx(Elf *e, Elf32_Ehdr *eh, size_t shstrndx) */ + shstrndx = elf_ndxscn(scn); + if (shstrndx >= SHN_LORESERVE) { + if ((scn = elf_getscn(elf, 0)) == NULL) { + printf("elf_getscn failed!\n"); + return -12; + } + /* assert(scn->s_ndx == SHN_UNDEF); */ + /* scn->s_shdr.s_shdr32.sh_link = shstrndx; */ + elf_flagshdr(scn, ELF_C_SET, ELF_F_DIRTY); + shstrndx = SHN_XINDEX; + } + ehdr->e_shstrndx = shstrndx; + + if (elf_update(elf, ELF_C_NULL) < 0) { + printf("elf_update failed!\n"); + return -12; + } + + phdr->p_vaddr = phdr->p_paddr = 0x8048000 + ehdr->e_phoff; + phdr->p_type = PT_PHDR; + phdr->p_offset = ehdr->e_phoff; + phdr->p_filesz = elf32_fsize(ELF_T_PHDR, 1, EV_CURRENT); + + load->p_vaddr = phdr->p_paddr = 0x8048000; + load->p_type = PT_LOAD; + load->p_offset = 0; + load->p_filesz = elf32_fsize(ELF_T_PHDR, 1, EV_CURRENT); + load->p_flags = PF_R | PF_X; + load->p_align = 0x1000; + + elf_flagphdr(elf, ELF_C_SET, ELF_F_DIRTY); + + if (elf_update(elf, ELF_C_WRITE) < 0) { + printf("elf_update failed!\n"); + return -13; + } + + elf_end(elf); + close(fd); + return 0; +} + +int +main(int argc, const char *argv[]) +{ + int result; + pid_t pid; + FILE *fd; + uint8_t *code = NULL; + size_t code_size = 0, chunk_size = 1024, bytes_read; + + if (argc < 4) { + printf("usage: %s \n", argv[0]); + printf(" Wraps the input file in an ELF binary.\n"); + return 1; + } + + bss_size = strtoul(argv[2], 0, 10); + + if ((fd = fopen(argv[1], "r")) < 0) { + printf("[error] can't open %s for reading.\n", argv[1]); + perror("[main]"); + return 2; + } + while (!feof(fd) && !ferror(fd)) { + code = realloc(code, code_size + chunk_size); + bytes_read = fread(code+code_size, 1, chunk_size, fd); + code_size += bytes_read; + } + fclose(fd); + + printf("Writing x86 ELF binary to %s...\n", argv[1]); + result = elf_write(argv[3], code, code_size); + if (result < 0) { + printf("[error] elf_write failed.\n"); + return 3; + } + + return 0; +} diff --git a/lea.asm b/lea.asm new file mode 100644 index 0000000..b2c9155 --- /dev/null +++ b/lea.asm @@ -0,0 +1,12 @@ +BITS 32 + +lea eax, [ebx+ecx*4] +lea ebx, [eax+ecx*4] +lea eax, [ecx+ebx*4] +lea eax, [ecx+ebx*8] +lea eax, [ecx+ebx] +lea eax, [0x1000+10*4] +lea eax, [eax] +lea eax, [ecx] +lea ecx, [eax] +lea eax, [0xdeadbeef] diff --git a/min.asm b/min.asm new file mode 100644 index 0000000..e84d3ac --- /dev/null +++ b/min.asm @@ -0,0 +1,4 @@ +BITS 32 +mov ebx,0 +mov eax,1 +int 0x80 diff --git a/mov.asm b/mov.asm new file mode 100644 index 0000000..85d9ebf --- /dev/null +++ b/mov.asm @@ -0,0 +1,89 @@ +BITS 32 + +;;; 00000000 b8 78 56 34 12 b9 78 56 34 12 ba 78 56 34 12 bb |.xV4..xV4..xV4..| +;;; 00000010 78 56 34 12 89 c0 89 c8 89 d0 89 d8 89 c1 89 c9 |xV4.............| +;;; 00000020 89 d1 89 d9 89 c2 89 ca 89 d2 89 da 89 c3 89 cb |................| +;;; 00000030 89 d3 89 db a1 ef be ad de 8b 0d ef be ad de 8b |................| +;;; 00000040 15 ef be ad de 8b 1d ef be ad de a3 ef be ad de |................| +;;; 00000050 89 0d ef be ad de 89 15 ef be ad de 89 1d ef be |................| +;;; 00000060 ad de 8b 00 8b 01 8b 02 8b 03 8b 08 8b 09 8b 0a |................| +;;; 00000070 8b 0b 8b 10 8b 11 8b 12 8b 13 8b 18 8b 19 8b 1a |................| +;;; 00000080 8b 1b 89 00 89 01 89 02 89 03 89 08 89 09 89 0a |................| +;;; 00000090 89 0b 89 10 89 11 89 12 89 13 89 18 89 19 89 1a |................| +;;; 000000a0 89 1b |..| +;;; 000000a2 + +mov eax, 0x12345678 ; b8 78 56 34 12 +mov ecx, 0x12345678 ; b9 78 56 34 12 +mov edx, 0x12345678 ; ba 78 56 34 12 +mov ebx, 0x12345678 ; bb 78 56 34 12 + +mov eax, eax ; 89 c0 +mov eax, ecx ; 89 c8 +mov eax, edx ; 89 d0 +mov eax, ebx ; 89 d8 + +mov ecx, eax ; 89 c1 +mov ecx, ecx ; 89 c9 +mov ecx, edx ; 89 d1 +mov ecx, ebx ; 89 d9 + +mov edx, eax ; 89 c2 +mov edx, ecx ; 89 ca +mov edx, edx ; 89 d2 +mov edx, ebx ; 89 da + +mov ebx, eax ; 89 c3 +mov ebx, ecx ; 89 cb +mov ebx, edx ; 89 d3 +mov ebx, ebx ; 89 db + +mov eax, dword [0xdeadbeef] ; a1 ef be ad de +mov ecx, dword [0xdeadbeef] ; 8b 0e ef be ad de +mov edx, dword [0xdeadbeef] ; 8b 16 ef be ad de +mov ebx, dword [0xdeadbeef] ; 8b 1e ef be ad de + +mov [0xdeadbeef], eax ; a3 ef be ad de +mov [0xdeadbeef], ecx ; 89 0e ef be ad de +mov [0xdeadbeef], edx ; 89 16 ef be ad de +mov [0xdeadbeef], ebx ; 89 1e ef be ad de + +mov eax, dword [eax] ; 8b 00 +mov eax, dword [ecx] ; 8b 01 +mov eax, dword [edx] ; 8b 02 +mov eax, dword [ebx] ; 8b 03 + +mov ecx, dword [eax] ; 8b 08 +mov ecx, dword [ecx] ; 8b 09 +mov ecx, dword [edx] ; 8b 0a +mov ecx, dword [ebx] ; 8b 0b + +mov edx, dword [eax] ; 8b 10 +mov edx, dword [ecx] ; 8b 11 +mov edx, dword [edx] ; 8b 12 +mov edx, dword [ebx] ; 8b 13 + +mov ebx, dword [eax] ; 8b 18 +mov ebx, dword [ecx] ; 8b 19 +mov ebx, dword [edx] ; 8b 1a +mov ebx, dword [ebx] ; 8b 1b + +mov [eax], eax ; 89 00 +mov [ecx], eax ; 89 01 +mov [edx], eax ; 89 02 +mov [ebx], eax ; 89 03 + +mov [eax], ecx ; 89 08 +mov [ecx], ecx ; 89 09 +mov [edx], ecx ; 89 0a +mov [ebx], ecx ; 89 0b + +mov [eax], edx ; 89 10 +mov [ecx], edx ; 89 11 +mov [edx], edx ; 89 12 +mov [ebx], edx ; 89 13 + +mov [eax], ebx ; 89 18 +mov [ecx], ebx ; 89 19 +mov [edx], ebx ; 89 1a +mov [ebx], ebx ; 89 1b diff --git a/opcode.rb b/opcode.rb new file mode 100644 index 0000000..772bd47 --- /dev/null +++ b/opcode.rb @@ -0,0 +1,25 @@ +class OpCode + Attrs = [:prefix, :op, :modrm, :sib, :extra] + attr_accessor *Attrs + + def initialize(attrs) + Attrs.each do |attr| + send("#{attr}=", attrs[attr]) + end + end + + def size + Attrs.inject(0) {|sum, attr| + iv = instance_variable_get("@#{attr}") + if iv.is_a?(Enumerable) + sum + iv.size + else + sum + 1 + end + } + end + + def binary + Attrs.map {|attr| send(attr)}.flatten.pack('c*') + end +end diff --git a/prologue.asm b/prologue.asm index db8840a..6f69371 100644 --- a/prologue.asm +++ b/prologue.asm @@ -1,3 +1,4 @@ +BITS 32 GLOBAL _start SECTION .text _start: diff --git a/template.asm b/template.asm index bd95f4b..0c851d1 100644 --- a/template.asm +++ b/template.asm @@ -1,3 +1,4 @@ +BITS 32 GLOBAL _start SECTION .data {data} diff --git a/test.code b/test.code index b6f83b3..2022291 100644 --- a/test.code +++ b/test.code @@ -39,7 +39,7 @@ u 1 e r - cc = cc * 2 + cc = c * 2 i 1 b e diff --git a/test.rb b/test.rb index 2c539f1..2ec4c4f 100644 --- a/test.rb +++ b/test.rb @@ -5,7 +5,7 @@ def error(msg) STDERR.puts(msg) end def parse(input) compiler = Compiler.new(input) - compiler.parse # tuple of [data, bss, code] + compiler.parse # tuple of [data, bss, code, binary] rescue ParseError => e error("[error] #{e.message}") @@ -29,10 +29,11 @@ def main(arg) # StringIO.new("5*(3-5)*2+2-9/3-8/2-4*(5+5+5)\n") StringIO.new("abc=999\nabc-888\n") end - data, bss, code = *parse(input) + data, bss, code, binary = *parse(input) template = File.read("template.asm") asm = interpolate(template, :data => data, :bss => bss, :code => code) File.open("test.asm", "w") { |f| f.puts(asm) } + File.open("test.bin", "wb") { |f| f.write(binary) } end main(ARGV[0].to_s) From e20e1399bd07d6d8fbdbcd48fb64d5a7baaa5154 Mon Sep 17 00:00:00 2001 From: sjs Date: Tue, 19 May 2009 17:04:18 -0700 Subject: [PATCH 2/7] [NEW] some notes on x86 encoding. --- x86.txt | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 x86.txt diff --git a/x86.txt b/x86.txt new file mode 100644 index 0000000..594ea9c --- /dev/null +++ b/x86.txt @@ -0,0 +1,11 @@ +mov (0x66) { + reg32, reg32 (0x89) { + op2 - src + + eax ecx edx ebx + op1 eax c0 c8 d0 d8 + dest ecx c1 c9 d1 d9 + edx c2 ca d2 da + ebx c3 cb d3 db + } +} From a93c8d2fa626d01bd14ca141637dc7a0d6380fae Mon Sep 17 00:00:00 2001 From: sjs Date: Tue, 19 May 2009 17:30:56 -0700 Subject: [PATCH 3/7] updated .gitignore --- .gitignore | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.gitignore b/.gitignore index c578301..b9da27a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,13 @@ test *.o test.asm +elfwriter +lea +min +min.bin +min_elf +miv +show_elf_magic +test.bin +test_elf.bin +test_elf From b7a4b408bb23210ac0a65b012ec1855b247464b3 Mon Sep 17 00:00:00 2001 From: sjs Date: Tue, 19 May 2009 17:31:31 -0700 Subject: [PATCH 4/7] [CHANGED] elfwriter now outputs 3 segments/phdrs: text, data, bss (with new sizes) --- elfwriter.c | 50 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/elfwriter.c b/elfwriter.c index 8955b78..c4ec5e5 100644 --- a/elfwriter.c +++ b/elfwriter.c @@ -47,7 +47,7 @@ elf_write(const char *filename, uint8_t *code, size_t code_size) Elf_Scn *scn; Elf_Data *data; Elf32_Ehdr *ehdr; - Elf32_Phdr *phdr, *load; + Elf32_Phdr *phdr; Elf32_Shdr *shdr; if (elf_version(EV_CURRENT) == EV_NONE) { @@ -89,12 +89,12 @@ elf_write(const char *filename, uint8_t *code, size_t code_size) /******************* * Program Headers * *******************/ - - if ((phdr = elf32_newphdr(elf, 2)) == NULL) { + + /* 3 segments => 3 program headers (text, data, bss) */ + if ((phdr = elf32_newphdr(elf, 3)) == NULL) { printf("elf32_newphdr failed!\n"); return -5; } - load = phdr+1; /***************** @@ -202,17 +202,39 @@ elf_write(const char *filename, uint8_t *code, size_t code_size) return -12; } - phdr->p_vaddr = phdr->p_paddr = 0x8048000 + ehdr->e_phoff; - phdr->p_type = PT_PHDR; - phdr->p_offset = ehdr->e_phoff; - phdr->p_filesz = elf32_fsize(ELF_T_PHDR, 1, EV_CURRENT); + /* phdr->p_vaddr = phdr->p_paddr = 0x8048000 + ehdr->e_phoff; */ + /* phdr->p_type = PT_PHDR; */ + /* phdr->p_offset = ehdr->e_phoff; */ + /* phdr->p_filesz = elf32_fsize(ELF_T_PHDR, 1, EV_CURRENT); */ - load->p_vaddr = phdr->p_paddr = 0x8048000; - load->p_type = PT_LOAD; - load->p_offset = 0; - load->p_filesz = elf32_fsize(ELF_T_PHDR, 1, EV_CURRENT); - load->p_flags = PF_R | PF_X; - load->p_align = 0x1000; + /* text segment */ + phdr->p_vaddr = text_addr; + phdr->p_type = PT_LOAD; + phdr->p_offset = header_size; + phdr->p_filesz = text_size; + phdr->p_memsz = text_size; + phdr->p_flags = PF_R | PF_X; + phdr->p_align = 0x1000; + + /* data segment */ + phdr++; + phdr->p_vaddr = data_addr; + phdr->p_type = PT_LOAD; + phdr->p_offset = header_size + text_size; + phdr->p_filesz = data_size; + phdr->p_memsz = data_size + 0x1024; /* XXX unsure why the abi specifies + 0x1024 */ + phdr->p_flags = PF_R | PF_W | PF_X; + phdr->p_align = 0x1000; + + /* bss segment */ + phdr++; + phdr->p_vaddr = bss_addr; + phdr->p_type = PT_LOAD; + phdr->p_offset = header_size + text_size + data_size; + phdr->p_filesz = bss_size; + phdr->p_memsz = bss_size; + phdr->p_flags = PF_R | PF_W; + phdr->p_align = 0x1000; elf_flagphdr(elf, ELF_C_SET, ELF_F_DIRTY); From d22e3b63061b9ee706056eebc3ebd7a36668241e Mon Sep 17 00:00:00 2001 From: sjs Date: Tue, 19 May 2009 18:09:28 -0700 Subject: [PATCH 5/7] [FIXED] compiler appends _exit code to binary output --- elfwriter.c | 2 +- min.code | 1 + min2.asm | 4 ++++ test.rb | 10 +++++++++- 4 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 min.code create mode 100644 min2.asm diff --git a/elfwriter.c b/elfwriter.c index c4ec5e5..95ce6f9 100644 --- a/elfwriter.c +++ b/elfwriter.c @@ -277,7 +277,7 @@ main(int argc, const char *argv[]) } fclose(fd); - printf("Writing x86 ELF binary to %s...\n", argv[1]); + printf("Writing x86 ELF binary to %s...\n", argv[3]); result = elf_write(argv[3], code, code_size); if (result < 0) { printf("[error] elf_write failed.\n"); diff --git a/min.code b/min.code new file mode 100644 index 0000000..e128181 --- /dev/null +++ b/min.code @@ -0,0 +1 @@ +a=0 diff --git a/min2.asm b/min2.asm new file mode 100644 index 0000000..8238ae5 --- /dev/null +++ b/min2.asm @@ -0,0 +1,4 @@ +BITS 32 +mov ebx,eax +mov eax,1 +int 0x80 diff --git a/test.rb b/test.rb index 2ec4c4f..a004c9a 100644 --- a/test.rb +++ b/test.rb @@ -1,6 +1,11 @@ require 'compiler' require 'stringio' +X86_exit = [0x89, 0xc3, # mov ebx, eax (exit code) + 0xb8, 1, 0, 0, 0, # mov eax, 1 + 0xcd, 0x80 # int 0x80 + ].pack('c*') + def error(msg) STDERR.puts(msg) end def parse(input) @@ -33,7 +38,10 @@ def main(arg) template = File.read("template.asm") asm = interpolate(template, :data => data, :bss => bss, :code => code) File.open("test.asm", "w") { |f| f.puts(asm) } - File.open("test.bin", "wb") { |f| f.write(binary) } + File.open("test.bin", "wb") { |f| + f.write(binary) + f.write(X86_exit) + } end main(ARGV[0].to_s) From 9607df73d2deeb92f222dc99df69f27f348dac1c Mon Sep 17 00:00:00 2001 From: sjs Date: Tue, 19 May 2009 18:38:28 -0700 Subject: [PATCH 6/7] added min2 to .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index b9da27a..3cbea5b 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ show_elf_magic test.bin test_elf.bin test_elf +min2 From 83accb5bba38146bbbf470348aa9a3c14fb802ba Mon Sep 17 00:00:00 2001 From: sjs Date: Sun, 24 May 2009 14:05:40 -0700 Subject: [PATCH 7/7] [NEW] x86_inc instruction with some binary encoding. --- compiler.rb | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/compiler.rb b/compiler.rb index a714db5..95ee45a 100644 --- a/compiler.rb +++ b/compiler.rb @@ -671,6 +671,21 @@ class Compiler emit("idiv #{op}") end + def x86_inc(op) + emit("inc #{op}") + + asm do + if register?(op) + emit_byte(0x40 + regnum(op)) + elsif rm32?(op) + emit_byte(0xff) + emit_modrm( + else + raise "unsupported op #{op}, wanted r32 or r/m32" + end + end + end + def x86_push(reg) emit("push #{reg}") end