[BUNK] moving to macbook, broken commit.

This commit is contained in:
sjs 2009-05-24 20:27:28 -07:00
commit 76d4d2be3a
19 changed files with 1066 additions and 156 deletions

16
.gitignore vendored
View file

@ -14,4 +14,18 @@ test/test_repeat
test/test_do
test/test_for
test/test_break
test/test_print
test/test_print
*.o
test.asm
elfwriter
lea
min
min.bin
min_elf
mov
show_elf_magic
test.bin
test_elf.bin
test_elf
min2

View file

@ -1,5 +1,17 @@
test:
cd test && make all
elfwriter: elfwriter.c
gcc -o elfwriter elfwriter.c -lelf
.PHONY: test
test_elf: elfwriter build
./elfwriter test.bin 4 test_elf.o
ld -o test_elf test_elf.o
./test_elf
clean:
@rm -f elfwriter
@rm -f test_elf.o
@rm -f test_elf
.PHONY: test

124
asm.rb
View file

@ -1,124 +0,0 @@
# A very basic x86 assembler library for Ruby. Generally the
# instructions implemented are the minimum needed by the compiler this
# is written for. x86 is just too big.
#
# sjs
# may 2009
module Assembler
# Define a method named `emit` and include this module. Calling the
# assembler methods will output nasm-friendly x86 asm code, line by
# line.
module X86
#####################
# assembler methods #
#####################
def x86_mov(dest, src)
emit("mov #{dest}, #{src.is_a?(Numeric) ? "0x#{src.to_s(16)}" : src}")
end
def x86_movzx(dest, src)
emit("movzx #{dest}, #{src}")
end
def x86_add(dest, src)
emit("add #{dest}, #{src}")
end
def x86_sub(dest, src)
emit("sub #{dest}, #{src}")
end
def x86_imul(op)
emit("imul #{op}")
end
def x86_idiv(op)
emit("idiv #{op}")
end
def x86_inc(op)
emit("inc #{op}")
end
def x86_dec(op)
emit("dec #{op}")
end
def x86_push(reg)
emit("push #{reg}")
end
def x86_pop(reg)
emit("pop #{reg}")
end
def x86_call(label)
emit("call #{label}")
end
def x86_neg(reg)
emit("neg #{reg}")
end
def x86_not(rm32)
emit("not #{rm32}")
end
def x86_xchg(op1, op2)
emit("xchg #{op1}, #{op2}")
end
def x86_and(op1, op2)
emit("and #{op1}, #{op2}")
end
def x86_or(op1, op2)
emit("or #{op1}, #{op2}")
end
def x86_xor(op1, op2)
emit("xor #{op1}, #{op2}")
end
def x86_jz(label)
emit("jz #{label}")
end
def x86_jnz(label)
emit("jnz #{label}")
end
def x86_jmp(label)
emit("jmp #{label}")
end
def x86_jl(label)
emit("jl #{label}")
end
def x86_cmp(a, b)
emit("cmp #{a}, #{b}")
end
def x86_lea(a, b)
emit("lea #{a}, #{b}")
end
def x86_shr(a, b)
emit("shr #{a}, #{b}")
end
def x86_loop(label)
emit("loop #{label}")
end
def x86_int(num)
emit("int 0x#{num.to_s(16)}")
end
end
end

9
asm/asm.rb Normal file
View file

@ -0,0 +1,9 @@
# Assembler container module. Sub modules are Text and Binary, which
# both export the same interface for generating either assembly or
# machine code for x86.
#
# sjs
# may 2009
module Assembler
end

320
asm/binary.rb Normal file
View file

@ -0,0 +1,320 @@
# A very basic x86 assembler library for Ruby. Generally the
# instructions implemented are the minimum needed by the compiler this
# is written for. x86 is just too big.
#
# sjs
# may 2009
module Assembler
# Define a method named `emit_byte` and one named `binary_size` and
# include this module. Calling the assembler methods will output
# x86 machine code ... hopefully. So far it's incomplete and
# binaries just segfault.
class Binary
# This structure allows for x86 registers of all sizes. The
# number of the register is the index of the array in which it was
# found.
Registers = [ [:eax, :ax, :al], # 0
[:ecx, :cx, :cl], # 1
[:edx, :dx, :dl], # 2
[:ebx, :bx, :bl], # 3
[:esp, :sp, :ah], # 4
[:ebp, :bp, :ch], # 5
[:esi, :si, :dh], # 6
[:edi, :di, :bh] # 7
]
# Regex to match any x86 register name.
RegisterRegex = '(e?[acdb]x|e?[sb]p|e?[sd]i|[acdb][hl])'
# Match a literal number in binary, octal, decimal, or hex
NumberRegex = '(0[xXbB]?)?[0-9a-fA-F]+'
# Match a variable name.
NameRegex = '[a-zA-Z][a-zA-Z0-9]*'
# 0.size gives the real answer, we only do x86 though
MachineBytes = 4
MachineBits = MachineBytes * 8
MinSigned = -1 * 2**(MachineBits-1)
MaxSigned = 2**(MachineBits-1) - 1
MinUnsigned = 0
MaxUnsigned = 2**MachineBits - 1
SignedRange = MinSigned..MaxSigned
# Count the bytes that were encoded in the given block.
def asm
# stash the current number of bytes written
instruction_offset = bytes_written
yield
# return the number of bytes written
bytes_written - instruction_offset
end
def emit_dword(num)
num_to_quad(num).each {|byte| emit_byte(byte)}
end
# 0-2: r/m
# 3-5: reg/opcode
# 6-7: mod
#
# dest and src are tuples of the form [type, value] where type is
# any of :reg, :rm32, :imm32. Max _one_ :rm32 arg per call.
def emit_modrm(dest, src, override)
if dest[0] == :reg
reg = override[:op] || regnum(dest[1])
# mod == 11 (register content)
if src[0] == :reg
mod = 3
rm = regnum(src[1])
# mod == 00 (pointer)
elsif src[0] == :rm32
mod = 0
parts = decode_addr(src[1])
rm = case parts[0]
# mod == 00 (direct pointer e.g. [eax])
when :reg
regnum(parts[1])
when :sib
sib = parts[1..-1]
4
when :disp
disp = parts[1]
5
end
end
elsif src[0] == :reg
reg = override[:op] || regnum(src[1])
else
raise "unsupported mod r/m byte! dest=#{dest} src=#{src}"
end
emit_byte((mod << 6) & (reg << 3) & rm)
emit_sib(sib) if defined? sib
emit_dword(disp) if defined? disp
end
def emit_sib(sib)
scale, index, base = *sib
if [1,2,4,8].include?(scale)
scale = log2(scale)
else
raise "unsupported SIB scale: #{scale}, should be [1, 2, 4, 8]"
end
emit_byte((scale << 6) & (index << 3) & base)
end
def register?(op)
Registers.each_with_index { |list,i| return i if list.include?(op) }
nil
end
def regnum(op)
num = register?
raise "not a register: #{op.inspect}" unless num
num
end
def immediate?(op)
op.is_a?(Numeric) || (op.is_a?(String) && op.match(/^#{NumberRegex}$/))
end
def rm32?(op)
offset?(op) || op.respond_to?(:match) && op.match(/^
\[
#{RegisterRegex} # base register
(\+#{RegisterRegex} # optional index register
(\*[1248])? # optional scale
)?
\]
$/x)
end
# 6 versions of the mov instruction are supported:
# 1. mov reg32, immediate32 (0xb8+destreg, imm32)
# 2. mov reg32, r/m32 (0x8b, mod r/m, maybe sib)
# 2a. mov eax, memoffset32 (0xa1, disp32)
# 3. mov r/m32, reg32 (0x89, mod r/m, maybe sib)
# 3a. mov memoffset32, eax (0xa3, disp32)
# 4. mov r/m32, immediate32 (0xc7, mod r/m, maybe sib, imm32)
def x86_mov(dest, src)
dest = dest[6..-1] if dest.is_a?(String) && dest[0..5] == 'dword '
src = src[6..-1] if src.is_a?(String) && src[0..5] == 'dword '
asm do
# version 1: mov r32, imm32
if register?(dest) && immediate?(src)
emit_byte(0xb8 + regnum(dest)) # dest encoded in instruction
emit_dword(parse_num(src))
# version 2: mov r32, r/m32
elsif register?(dest) && rm32?(src)
# version 2a: mov eax, moffs32
if dest == :eax && offset?(src)
emit_byte(0xa1)
num = decode_addr(src)[1]
emit_dword(num)
else
emit_byte(0x8b)
emit_modrm([:reg, dest], [:rm32, src])
end
# version 3: mov r/m32, r32
elsif rm32?(dest) && register?(src)
# version 3a: mov moffs32, eax
if offset?(dest) && src == :eax
emit_byte(0xa3)
num = decode_addr(dest)[1]
emit_dword(num)
else
emit_byte(0x89)
emit_modrm([:rm32, dest], [:reg, src])
end
# version 4: mov r/m32, imm32
elsif rm32?(dest) && immediate?(src)
emit_byte(0xc7)
emit_modrm([:rm32, dest], [:imm32, src], :op => 0)
else
puts "rm32?(dest): #{rm32?(dest)}\t\trm32?(src): #{rm32?(src)}"
puts "register?(dest): #{register?(dest)}\t\tregister?(src): #{register?(src)}"
puts "immediate?(dest): #{immediate?(dest)}\t\timmediate?(src): #{immediate?(src)}"
puts "offset?(dest): #{offset?(dest)}\t\toffset?(src): #{offset?(src)}"
#raise "unsupported mov format: mov #{dest}, #{src}"
puts "!!! unsupported mov format: mov #{dest}, #{src}"
end
end # asm do
end
def x86_add(dest, src)
end
def x86_sub(dest, src)
end
def x86_imul(op)
end
def x86_idiv(op)
end
def x86_inc(op)
asm do
if register?(op)
emit_byte(0x40 + regnum(op))
elsif rm32?(op)
emit_byte(0xff)
emit_modrm(...)
else
raise "unsupported op #{op}, wanted r32 or r/m32"
end
end
end
def x86_push(reg)
end
def x86_cmp(a, b)
end
def offset?(addr)
addr.respond_to?(:match) && addr.match(/^\[(#{NameRegex}|#{NumberRegex})\]$/)
end
def decode_addr(addr)
addr = addr[1..-2] # strip brackets
if matches = addr.match(/^#{NameRegex}$/)
unless loc = @vars[matches[0]]
raise "undefined variable #{matches[0]}"
end
[:disp, @bss_offset + loc]
elsif matches = addr.match(/^#{NumberRegex}$/)
[:disp, parse_num(matches[0])]
elsif addr.index('*')
bi, scale = *addr.split('*')
base, index = *bi.split('+')
[:sib, scale.to_i, index.to_sym, base.to_sym]
elsif addr.index('+')
base, index = *addr.split('+')
[:sib, 1, index.to_sym, base.to_sym]
else
[:reg, addr.to_sym]
end
end
# Parse a number from a string. Used by emit_dword.
def parse_num(str)
# If it's not a string it's a number, just return it.
return str unless str.is_a?(String)
str.downcase!
base = 10 # default to base 10
if str[0, 1] == '0'
base = case str[1, 1]
when 'x'
16
when 'b'
str.slice!(2..-1)
2
else
8
end
end
str.to_i(base)
end
# Convert a number to a quad of bytes, discarding excess bits.
# Little endian!
def num_to_quad(num)
[
num & 0xff,
(num >> 8) & 0xff,
(num >> 16) & 0xff,
(num >> 24) & 0xff
]
end
def log2(x, tol=1e-13)
result = 0.0
# Integer part
while x < 1
resultp -= 1
x *= 2
end
while x >= 2
result += 1
x /= 2
end
# Fractional part
fp = 1.0
while fp >= tol
fp /= 2
x *= x
if x >= 2
x /= 2
result += fp
end
end
result
end
end # module Binary
end # module Assembler

127
asm/text.rb Normal file
View file

@ -0,0 +1,127 @@
# A subset of x86 assembly.
#
# sjs
# may 2009
module Assembler
# Define a method named `emit` and include this module. Calling
# the assembler methods will output nasm-friendly x86 asm code,
# line by line. This is dead easy and we can trust nasm to
# compile correct machine code, which is tricky.
module Text
def self.included(other)
im = other.instance_methods
unless im.include?(:emit)
raise "#{self.name} requires the including class define the emit method"
end
end
def x86_mov(dest, src)
emit("mov #{dest}, #{src.is_a?(Numeric) ? "0x#{src.to_s(16)}" : src}")
end
def x86_movzx(dest, src)
emit("movzx #{dest}, #{src}")
end
def x86_add(dest, src)
emit("add #{dest}, #{src}")
end
def x86_sub(dest, src)
emit("sub #{dest}, #{src}")
end
def x86_imul(op)
emit("imul #{op}")
end
def x86_idiv(op)
emit("idiv #{op}")
end
def x86_inc(op)
emit("inc #{op}")
end
def x86_dec(op)
emit("dec #{op}")
end
def x86_push(reg)
emit("push #{reg}")
end
def x86_pop(reg)
emit("pop #{reg}")
end
def x86_call(label)
emit("call #{label}")
end
def x86_neg(reg)
emit("neg #{reg}")
end
def x86_not(rm32)
emit("not #{rm32}")
end
def x86_xchg(op1, op2)
emit("xchg #{op1}, #{op2}")
end
def x86_and(op1, op2)
emit("and #{op1}, #{op2}")
end
def x86_or(op1, op2)
emit("or #{op1}, #{op2}")
end
def x86_xor(op1, op2)
emit("xor #{op1}, #{op2}")
end
def x86_jz(label)
emit("jz #{label}")
end
def x86_jnz(label)
emit("jnz #{label}")
end
def x86_jmp(label)
emit("jmp #{label}")
end
def x86_jl(label)
emit("jl #{label}")
end
def x86_cmp(a, b)
emit("cmp #{a}, #{b}")
end
def x86_lea(a, b)
emit("lea #{a}, #{b}")
end
def x86_shr(a, b)
emit("shr #{a}, #{b}")
end
def x86_loop(label)
emit("loop #{label}")
end
def x86_int(num)
emit("int 0x#{num.to_s(16)}")
end
end
end

View file

@ -4,6 +4,12 @@ ROOT = __FILE__.sub(/\/build\.rb$/, '') unless defined? ROOT
require 'compiler'
X86_exit = [0x89, 0xc3, # mov ebx, eax (exit code)
0xb8, 1, 0, 0, 0, # mov eax, 1
0xcd, 0x80 # int 0x80
].pack('c*')
def main
filename = ARGV[0].to_s
raise "can't read #{filename}" unless File.readable?(filename)
@ -25,18 +31,32 @@ def interpolate(templatefile, data)
end
end
# input: filename
# output: filename
def compile(filename)
data, bss, code = nil
# filename: input filename
# format: output format, nasm or binary
# returns: output filename
def compile(filename, format='asm')
# compile to asm or binary
output = nil
File.open(filename, 'r') do |input|
compiler = Compiler.new(input)
data, bss, code = compiler.compile
compiler = Compiler.new(input, format)
output = compiler.compile
end
if format == 'asm'
mode = 'w'
data, bss, code = *output
output = interpolate("#{ROOT}/template.asm",
:data => data, :bss => bss, :code => code)
else
mode = 'wb'
output += X86_exit
end
outfile = "#{base(filename)}.#{format}"
File.open(outfile, mode) do |out|
if format == 'asm'
out.puts(output)
end
end
asm = interpolate("#{ROOT}/template.asm",
:data => data, :bss => bss, :code => code)
outfile = "#{base(filename)}.asm"
File.open(outfile, 'w') { |out| out.puts(asm) }
return outfile
rescue ParseError => e
@ -69,8 +89,12 @@ def link(filename)
return f
end
def build(filename)
link( asm( compile(filename) ) )
def build(filename, format='asm')
if format == 'asm'
link( asm( compile(filename) ) )
else # binary
link( compile(filename, format='bin') )
end
end
def run(filename)

View file

@ -13,6 +13,7 @@
# require 'unroller'
require 'asm'
require 'opcode'
class ParseError < StandardError
attr_reader :caller, :context
@ -23,37 +24,58 @@ class ParseError < StandardError
end
class Compiler
# This module uses our `emit` method to output x86 code for nasm.
include Assembler::X86
attr_reader :data, :bss, :code
# This module uses our `emit_byte` method to output x86 machine code
# directly using the assembler library.
# include Assembler::Binary
Keywords = %w[
if else end while until repeat for to do break
print
]
def initialize(input=STDIN)
@look = '' # lookahead char
@token = nil # type of last read token
@value = nil # value of last read token
@input = input # stream to read from
@data = '' # data section
@bss = '' # bss section
@code = '' # code section
@vars = {} # defined variables
@num_labels = 0 # used to generate unique labels
@num_labels_with_suffix = Hash.new(0)
attr_reader :data, :bss, :code
def initialize(input, asm=Assembler::Text.new)
# XXX for development only!
@indent = 0 # for pretty printing
@look = '' # Next lookahead char.
@token = nil # Type of last read token.
@value = nil # Value of last read token.
@input = input # Stream to read from.
@data = '' # Data section.
@bss = '' # BSS section.
@code = '' # Code section.
@binary = [] # Byte array of machine code.
@vars = {} # Symbol table, maps names to locations in BSS.
@num_labels = 0 # Used to generate unique labels.
@num_labels_with_suffix = Hash.new(0)
@header_size = 0x100 # ELF, Linux, x86
@text_offset = 0x08048000 + @header_size # Offset of text section in memory (Linux, x86).
@text_size = 0x02be00 # Size of text section.
@data_offset = @text_offset + @text_size # Offset of data section.
@data_size = 0x4e00 # Size of data section.
@bss_offset = @data_offset + @data_size # Offset of bss section.
@bss_size = 0 # Size of bss section.
# Labels for the assembler. Maps names to locations.
@labels = Hash.new {|h, key| raise "undefined label: #{key}"}
@asm = asm
# seed the lexer
get_char
end
def asm
@asm
end
def compile
block
expected(:'end of file') unless eof?
[@data, @bss, @code]
asm.output
end
# Scan the input stream for the next token.
@ -790,7 +812,8 @@ class Compiler
def defvar(name, dwords=1)
unless var?(name)
@bss << "#{name}: resd #{dwords}\n"
@vars[name] = name
@vars[name] = @bss_size
@bss_size += dwords
else
STDERR.puts "[warning] attempted to redefine #{name}"
end
@ -804,14 +827,30 @@ class Compiler
@vars[name]
end
# Emit a line of code wrapped between a tab and a newline.
# Emit a line of code wrapped between a tab and a newline. Required
# by Assembler::Text.
def emit(code, options={})
tab = options.has_key?(:tab) ? options[:tab] : "\t"
@code << "#{tab}#{code}\n"
end
# emit_byte and bytes_written are required by Assembler::Binary.
def emit_byte(byte)
@binary << byte
end
def bytes_written
@binary.size
end
def emit_label(name=unique_label)
emit("#{name}:", :tab => nil)
@labels[name] = @binary.length
end
def resolve_label(label)
@labels[label]
end
# Generate a unique label.
@ -833,6 +872,12 @@ class Compiler
' ' * (real_indent * 4)
end
# Pack the array into a byte string.
def binary
@binary.pack('c*')
end
def pushing(reg)
x86_push(reg)
yield

288
elfwriter.c Normal file
View file

@ -0,0 +1,288 @@
#include <libelf.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
/* _exit(0) */
/* uint8_t shell_code[] = { */
/* 0xbb, 0, 0, 0, 0, /\* mov ebx, 0 *\/ */
/* 0xb8, 1, 0, 0, 0, /\* mov eax, 1 *\/ */
/* 0xcd, 0x80 /\* int 0x80 *\/ */
/* }; */
/* uint32_t hash_words[] = { */
/* 0x12345678, */
/* 0xdeadc0de, */
/* 0x1234abcd */
/* }; */
#define header_size 0x100
#define text_addr 0x8048000 + header_size
#define text_size 0x02be00
#define data_addr text_addr + text_size
#define data_size 0x4e00
#define bss_addr data_addr + data_size
size_t bss_size = 0;
char string_table[] = {
/* Offset 0 */ '\0',
/* Offset 1 */ '.', 't', 'e', 'x', 't', '\0' ,
/* Offset 7 */ '.', 'b', 's', 's', '\0',
/* Offset 12 */ '.', 's', 'h', 's', 't', 'r', 't', 'a', 'b', '\0'
};
/* Write a static 32-bit x86 ELF binary to filename. The file is
* clobbered without confirmation!
*/
int
elf_write(const char *filename, uint8_t *code, size_t code_size)
{
int fd;
size_t shstrndx;
Elf *elf;
Elf_Scn *scn;
Elf_Data *data;
Elf32_Ehdr *ehdr;
Elf32_Phdr *phdr;
Elf32_Shdr *shdr;
if (elf_version(EV_CURRENT) == EV_NONE) {
printf("Failed to initialize ELF library!\n");
return -1;
}
if ((fd = open(filename, O_RDWR|O_TRUNC|O_CREAT, 0666)) < 0) {
printf("Can't open %s for writing.\n", filename);
perror("[elf_write]");
return -2;
}
if ((elf = elf_begin(fd, ELF_C_WRITE, (Elf *)0)) == 0) {
printf("elf_begin failed!\n");
return -3;
}
/**************
* ELF Header *
**************/
if ((ehdr = elf32_newehdr(elf)) == NULL) {
printf("elf32_newehdr failed!\n");
return -4;
}
ehdr->e_ident[EI_DATA] = ELFDATA2LSB; /* 2's complement, little endian */
ehdr->e_type = ET_EXEC;
ehdr->e_machine = EM_386; /* x86 */
/* Image starts at 0x8048000, x86 32-bit abi. We need a bit
* of room for headers and such. TODO figure out how much
* room is needed!
*
* Current entry point is .text section.
*/
ehdr->e_entry = text_addr;
/*******************
* Program Headers *
*******************/
/* 3 segments => 3 program headers (text, data, bss) */
if ((phdr = elf32_newphdr(elf, 3)) == NULL) {
printf("elf32_newphdr failed!\n");
return -5;
}
/*****************
* .text section *
*****************/
if ((scn = elf_newscn(elf)) == NULL) {
printf("elf_newscn failed!\n");
return -6;
}
if ((data = elf_newdata(scn)) == NULL) {
printf("elf_newdata failed!\n");
return -7;
}
data->d_align = 16;
data->d_buf = code;
data->d_off = 0LL;
data->d_type = ELF_T_BYTE;
data->d_size = code_size;
data->d_version = EV_CURRENT;
if ((shdr = elf32_getshdr(scn)) == NULL) {
printf("elf32_getshdr failed!\n");
return -8;
}
shdr->sh_name = 1;
shdr->sh_type = SHT_PROGBITS;
shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
shdr->sh_addr = text_addr;
/****************
* .bss section *
****************/
if ((scn = elf_newscn(elf)) == NULL) {
printf("elf_newscn failed!\n");
return -6;
}
if ((data = elf_newdata(scn)) == NULL) {
printf("elf_newdata failed!\n");
return -7;
}
data->d_align = 4;
data->d_off = 0LL;
data->d_type = ELF_T_BYTE;
data->d_size = bss_size;
data->d_version = EV_CURRENT;
if ((shdr = elf32_getshdr(scn)) == NULL) {
printf("elf32_getshdr failed!\n");
return -8;
}
shdr->sh_name = 7;
shdr->sh_type = SHT_NOBITS;
shdr->sh_flags = SHF_WRITE | SHF_ALLOC;
shdr->sh_addr = bss_addr;
/*******************************
* section header string table *
*******************************/
if ((scn = elf_newscn(elf)) == NULL) {
printf("elf_newscn failed!\n");
return -9;
}
if ((data = elf_newdata(scn)) == NULL) {
printf("elf_newdata failed!\n");
return -10;
}
data->d_align = 1;
data->d_buf = string_table;
data->d_off = 0LL;
data->d_type = ELF_T_BYTE;
data->d_size = sizeof(string_table);
data->d_version = EV_CURRENT;
if ((shdr = elf32_getshdr(scn)) == NULL) {
printf("elf32_getshdr failed!\n");
return -11;
}
shdr->sh_name = 12;
shdr->sh_type = SHT_STRTAB;
shdr->sh_flags = SHF_STRINGS | SHF_ALLOC;
shdr->sh_entsize = 0;
/* int elf_setshstrndx(Elf *e, Elf32_Ehdr *eh, size_t shstrndx) */
shstrndx = elf_ndxscn(scn);
if (shstrndx >= SHN_LORESERVE) {
if ((scn = elf_getscn(elf, 0)) == NULL) {
printf("elf_getscn failed!\n");
return -12;
}
/* assert(scn->s_ndx == SHN_UNDEF); */
/* scn->s_shdr.s_shdr32.sh_link = shstrndx; */
elf_flagshdr(scn, ELF_C_SET, ELF_F_DIRTY);
shstrndx = SHN_XINDEX;
}
ehdr->e_shstrndx = shstrndx;
if (elf_update(elf, ELF_C_NULL) < 0) {
printf("elf_update failed!\n");
return -12;
}
/* phdr->p_vaddr = phdr->p_paddr = 0x8048000 + ehdr->e_phoff; */
/* phdr->p_type = PT_PHDR; */
/* phdr->p_offset = ehdr->e_phoff; */
/* phdr->p_filesz = elf32_fsize(ELF_T_PHDR, 1, EV_CURRENT); */
/* text segment */
phdr->p_vaddr = text_addr;
phdr->p_type = PT_LOAD;
phdr->p_offset = header_size;
phdr->p_filesz = text_size;
phdr->p_memsz = text_size;
phdr->p_flags = PF_R | PF_X;
phdr->p_align = 0x1000;
/* data segment */
phdr++;
phdr->p_vaddr = data_addr;
phdr->p_type = PT_LOAD;
phdr->p_offset = header_size + text_size;
phdr->p_filesz = data_size;
phdr->p_memsz = data_size + 0x1024; /* XXX unsure why the abi specifies + 0x1024 */
phdr->p_flags = PF_R | PF_W | PF_X;
phdr->p_align = 0x1000;
/* bss segment */
phdr++;
phdr->p_vaddr = bss_addr;
phdr->p_type = PT_LOAD;
phdr->p_offset = header_size + text_size + data_size;
phdr->p_filesz = bss_size;
phdr->p_memsz = bss_size;
phdr->p_flags = PF_R | PF_W;
phdr->p_align = 0x1000;
elf_flagphdr(elf, ELF_C_SET, ELF_F_DIRTY);
if (elf_update(elf, ELF_C_WRITE) < 0) {
printf("elf_update failed!\n");
return -13;
}
elf_end(elf);
close(fd);
return 0;
}
int
main(int argc, const char *argv[])
{
int result;
pid_t pid;
FILE *fd;
uint8_t *code = NULL;
size_t code_size = 0, chunk_size = 1024, bytes_read;
if (argc < 4) {
printf("usage: %s <input> <bss_size> <output>\n", argv[0]);
printf(" Wraps the input file in an ELF binary.\n");
return 1;
}
bss_size = strtoul(argv[2], 0, 10);
if ((fd = fopen(argv[1], "r")) < 0) {
printf("[error] can't open %s for reading.\n", argv[1]);
perror("[main]");
return 2;
}
while (!feof(fd) && !ferror(fd)) {
code = realloc(code, code_size + chunk_size);
bytes_read = fread(code+code_size, 1, chunk_size, fd);
code_size += bytes_read;
}
fclose(fd);
printf("Writing x86 ELF binary to %s...\n", argv[3]);
result = elf_write(argv[3], code, code_size);
if (result < 0) {
printf("[error] elf_write failed.\n");
return 3;
}
return 0;
}

12
lea.asm Normal file
View file

@ -0,0 +1,12 @@
BITS 32
lea eax, [ebx+ecx*4]
lea ebx, [eax+ecx*4]
lea eax, [ecx+ebx*4]
lea eax, [ecx+ebx*8]
lea eax, [ecx+ebx]
lea eax, [0x1000+10*4]
lea eax, [eax]
lea eax, [ecx]
lea ecx, [eax]
lea eax, [0xdeadbeef]

4
min.asm Normal file
View file

@ -0,0 +1,4 @@
BITS 32
mov ebx,0
mov eax,1
int 0x80

1
min.code Normal file
View file

@ -0,0 +1 @@
a=0

4
min2.asm Normal file
View file

@ -0,0 +1,4 @@
BITS 32
mov ebx,eax
mov eax,1
int 0x80

89
mov.asm Normal file
View file

@ -0,0 +1,89 @@
BITS 32
;;; 00000000 b8 78 56 34 12 b9 78 56 34 12 ba 78 56 34 12 bb |.xV4..xV4..xV4..|
;;; 00000010 78 56 34 12 89 c0 89 c8 89 d0 89 d8 89 c1 89 c9 |xV4.............|
;;; 00000020 89 d1 89 d9 89 c2 89 ca 89 d2 89 da 89 c3 89 cb |................|
;;; 00000030 89 d3 89 db a1 ef be ad de 8b 0d ef be ad de 8b |................|
;;; 00000040 15 ef be ad de 8b 1d ef be ad de a3 ef be ad de |................|
;;; 00000050 89 0d ef be ad de 89 15 ef be ad de 89 1d ef be |................|
;;; 00000060 ad de 8b 00 8b 01 8b 02 8b 03 8b 08 8b 09 8b 0a |................|
;;; 00000070 8b 0b 8b 10 8b 11 8b 12 8b 13 8b 18 8b 19 8b 1a |................|
;;; 00000080 8b 1b 89 00 89 01 89 02 89 03 89 08 89 09 89 0a |................|
;;; 00000090 89 0b 89 10 89 11 89 12 89 13 89 18 89 19 89 1a |................|
;;; 000000a0 89 1b |..|
;;; 000000a2
mov eax, 0x12345678 ; b8 78 56 34 12
mov ecx, 0x12345678 ; b9 78 56 34 12
mov edx, 0x12345678 ; ba 78 56 34 12
mov ebx, 0x12345678 ; bb 78 56 34 12
mov eax, eax ; 89 c0
mov eax, ecx ; 89 c8
mov eax, edx ; 89 d0
mov eax, ebx ; 89 d8
mov ecx, eax ; 89 c1
mov ecx, ecx ; 89 c9
mov ecx, edx ; 89 d1
mov ecx, ebx ; 89 d9
mov edx, eax ; 89 c2
mov edx, ecx ; 89 ca
mov edx, edx ; 89 d2
mov edx, ebx ; 89 da
mov ebx, eax ; 89 c3
mov ebx, ecx ; 89 cb
mov ebx, edx ; 89 d3
mov ebx, ebx ; 89 db
mov eax, dword [0xdeadbeef] ; a1 ef be ad de
mov ecx, dword [0xdeadbeef] ; 8b 0e ef be ad de
mov edx, dword [0xdeadbeef] ; 8b 16 ef be ad de
mov ebx, dword [0xdeadbeef] ; 8b 1e ef be ad de
mov [0xdeadbeef], eax ; a3 ef be ad de
mov [0xdeadbeef], ecx ; 89 0e ef be ad de
mov [0xdeadbeef], edx ; 89 16 ef be ad de
mov [0xdeadbeef], ebx ; 89 1e ef be ad de
mov eax, dword [eax] ; 8b 00
mov eax, dword [ecx] ; 8b 01
mov eax, dword [edx] ; 8b 02
mov eax, dword [ebx] ; 8b 03
mov ecx, dword [eax] ; 8b 08
mov ecx, dword [ecx] ; 8b 09
mov ecx, dword [edx] ; 8b 0a
mov ecx, dword [ebx] ; 8b 0b
mov edx, dword [eax] ; 8b 10
mov edx, dword [ecx] ; 8b 11
mov edx, dword [edx] ; 8b 12
mov edx, dword [ebx] ; 8b 13
mov ebx, dword [eax] ; 8b 18
mov ebx, dword [ecx] ; 8b 19
mov ebx, dword [edx] ; 8b 1a
mov ebx, dword [ebx] ; 8b 1b
mov [eax], eax ; 89 00
mov [ecx], eax ; 89 01
mov [edx], eax ; 89 02
mov [ebx], eax ; 89 03
mov [eax], ecx ; 89 08
mov [ecx], ecx ; 89 09
mov [edx], ecx ; 89 0a
mov [ebx], ecx ; 89 0b
mov [eax], edx ; 89 10
mov [ecx], edx ; 89 11
mov [edx], edx ; 89 12
mov [ebx], edx ; 89 13
mov [eax], ebx ; 89 18
mov [ecx], ebx ; 89 19
mov [edx], ebx ; 89 1a
mov [ebx], ebx ; 89 1b

25
opcode.rb Normal file
View file

@ -0,0 +1,25 @@
class OpCode
Attrs = [:prefix, :op, :modrm, :sib, :extra]
attr_accessor *Attrs
def initialize(attrs)
Attrs.each do |attr|
send("#{attr}=", attrs[attr])
end
end
def size
Attrs.inject(0) {|sum, attr|
iv = instance_variable_get("@#{attr}")
if iv.is_a?(Enumerable)
sum + iv.size
else
sum + 1
end
}
end
def binary
Attrs.map {|attr| send(attr)}.flatten.pack('c*')
end
end

View file

@ -1,3 +1,4 @@
BITS 32
GLOBAL _start
SECTION .text
_start:

View file

@ -1,3 +1,4 @@
BITS 32
GLOBAL _start
SECTION .data
{data}

47
test.rb Normal file
View file

@ -0,0 +1,47 @@
require 'compiler'
require 'stringio'
X86_exit = [0x89, 0xc3, # mov ebx, eax (exit code)
0xb8, 1, 0, 0, 0, # mov eax, 1
0xcd, 0x80 # int 0x80
].pack('c*')
def error(msg) STDERR.puts(msg) end
def parse(input)
compiler = Compiler.new(input)
compiler.parse # tuple of [data, bss, code, binary]
rescue ParseError => e
error("[error] #{e.message}")
error("[context] #{e.context}")
# error("Aborting!")
error(e.caller)
exit(1)
end
def interpolate(template, data)
data.inject(template) do |template, mapping|
token, replacement = *mapping
template.sub("{#{token}}", replacement)
end
end
def main(arg)
input = if File.readable?(arg)
File.open(arg)
else
# StringIO.new("5*(3-5)*2+2-9/3-8/2-4*(5+5+5)\n")
StringIO.new("abc=999\nabc-888\n")
end
data, bss, code, binary = *parse(input)
template = File.read("template.asm")
asm = interpolate(template, :data => data, :bss => bss, :code => code)
File.open("test.asm", "w") { |f| f.puts(asm) }
File.open("test.bin", "wb") { |f|
f.write(binary)
f.write(X86_exit)
}
end
main(ARGV[0].to_s)

11
x86.txt Normal file
View file

@ -0,0 +1,11 @@
mov (0x66) {
reg32, reg32 (0x89) {
op2 - src
eax ecx edx ebx
op1 eax c0 c8 d0 d8
dest ecx c1 c9 d1 d9
edx c2 ca d2 da
ebx c3 cb d3 db
}
}