mirror of
https://github.com/samsonjs/compiler.git
synced 2026-03-26 08:55:48 +00:00
[NEW] Emit x86 code for the mov instruction. Barely works 1/2 the time.
The supporting infrastructure includes a C program for reading a binary blob of x86 code and wrapping it in an ELF executable for Linux x86. Unsure about getting the data for other sections of the binary besides .text.
This commit is contained in:
parent
221efca282
commit
3f070cd0db
11 changed files with 737 additions and 37 deletions
12
Makefile
12
Makefile
|
|
@ -5,8 +5,18 @@ build: test.rb test.code
|
|||
# $? indicates success as per unix convention
|
||||
./test
|
||||
|
||||
elfwriter: elfwriter.c
|
||||
gcc -o elfwriter elfwriter.c -lelf
|
||||
|
||||
test_elf: elfwriter build
|
||||
./elfwriter test.bin 4 test_elf.o
|
||||
ld -o test_elf test_elf.o
|
||||
./test_elf
|
||||
|
||||
clean:
|
||||
@rm -f test.o
|
||||
@rm -f test
|
||||
@rm -f test.asm
|
||||
|
||||
@rm -f elfwriter
|
||||
@rm -f test_elf.o
|
||||
@rm -f test_elf
|
||||
|
|
|
|||
357
compiler.rb
357
compiler.rb
|
|
@ -6,6 +6,9 @@
|
|||
# sjs
|
||||
# may 2009
|
||||
|
||||
require 'opcode'
|
||||
#require 'assembler'
|
||||
|
||||
class ParseError < StandardError
|
||||
attr_reader :caller, :context
|
||||
def initialize(caller, context=nil)
|
||||
|
|
@ -15,24 +18,47 @@ class ParseError < StandardError
|
|||
end
|
||||
|
||||
class Compiler
|
||||
attr_reader :data, :bss, :code
|
||||
# include Assembler
|
||||
|
||||
def initialize(input=STDIN)
|
||||
@look = '' # next lookahead char
|
||||
@input = input # stream to read from
|
||||
@data = '' # data section
|
||||
@bss = '' # bss section
|
||||
@code = '' # code section
|
||||
@vars = {} # symbol table
|
||||
@num_labels = 0 # used to generate unique labels
|
||||
@num_labels_with_suffix = Hash.new(0)
|
||||
@num_conditions = 0
|
||||
@break_stack = [] # for breaking out of loops
|
||||
attr_reader :data, :bss, :code
|
||||
|
||||
# reserved words (... constant?)
|
||||
#
|
||||
# if, else, end, while, until, repeat, break
|
||||
@keywords = %w[i l e w u r b]
|
||||
def initialize(input=STDIN)
|
||||
@look = '' # Next lookahead char.
|
||||
@input = input # Stream to read from.
|
||||
@data = '' # Data section.
|
||||
@bss = '' # BSS section.
|
||||
@code = '' # Code section.
|
||||
@vars = {} # Symbol table, maps names to locations in BSS.
|
||||
@num_labels = 0 # Used to generate unique labels.
|
||||
@num_labels_with_suffix = Hash.new(0)
|
||||
@break_stack = [] # for breaking out of loops
|
||||
@binary = [] # Byte array of machine code.
|
||||
@machine_code = '' # Byte string of machine code.
|
||||
|
||||
@header_size = 0x100 # ELF, Linux, x86
|
||||
@text_offset = 0x08048000 + @header_size # Offset of text section in memory (Linux, x86).
|
||||
@text_size = 0x02be00 # Size of text section.
|
||||
@data_offset = @text_offset + @text_size # Offset of data section.
|
||||
@data_size = 0x4e00 # Size of data section.
|
||||
@bss_offset = @data_offset + @data_size # Offset of bss section.
|
||||
@bss_size = 0 # Size of bss section.
|
||||
|
||||
# Labels for the assembler. Maps names to locations.
|
||||
@labels = Hash.new {|h, key| raise "undefined label: #{key}"}
|
||||
|
||||
# Dispatch table for keywords.
|
||||
@dispatch = {
|
||||
'b' => method(:break_stmt), # break
|
||||
'e' => nil, # end
|
||||
'l' => nil, # else
|
||||
'i' => method(:if_else_stmt), # if-else
|
||||
'r' => method(:repeat_stmt), # repeat
|
||||
'u' => method(:until_stmt), # until
|
||||
'w' => method(:while_stmt) # while
|
||||
}
|
||||
|
||||
# Reserved words (... constant?)
|
||||
@keywords = @dispatch.keys
|
||||
|
||||
# seed the lexer
|
||||
get_char
|
||||
|
|
@ -41,7 +67,8 @@ class Compiler
|
|||
def parse
|
||||
block
|
||||
expected(:'end of file') unless eof?
|
||||
[@data, @bss, @code]
|
||||
compile
|
||||
[@data, @bss, @code, @machine_code]
|
||||
end
|
||||
|
||||
|
||||
|
|
@ -135,17 +162,8 @@ class Compiler
|
|||
|
||||
# Parse a statement.
|
||||
def statement
|
||||
case @look
|
||||
when 'i'
|
||||
if_else_stmt
|
||||
when 'w'
|
||||
while_stmt
|
||||
when 'u'
|
||||
until_stmt
|
||||
when 'r'
|
||||
repeat_stmt
|
||||
when 'b'
|
||||
break_stmt
|
||||
if handler = @dispatch[@look]
|
||||
handler.call
|
||||
else
|
||||
assignment
|
||||
newline
|
||||
|
|
@ -234,8 +252,6 @@ class Compiler
|
|||
|
||||
# Evaluates any expression for now. There are no boolean operators.
|
||||
def condition
|
||||
# @num_conditions += 1
|
||||
# emit("<condition ##{@num_conditions}>")
|
||||
expression
|
||||
x86_cmp(:eax, 0) # 0 is false, anything else is true
|
||||
skip_whitespace
|
||||
|
|
@ -394,7 +410,7 @@ class Compiler
|
|||
# Get a number.
|
||||
def get_num
|
||||
expected(:integer) unless digit?(@look)
|
||||
many(method(:digit?))
|
||||
many(method(:digit?)).to_i
|
||||
end
|
||||
|
||||
# Skip leading whitespace.
|
||||
|
|
@ -417,7 +433,8 @@ class Compiler
|
|||
def var(name, dwords=1)
|
||||
unless @vars[name]
|
||||
@bss << "#{name}: resd #{dwords}\n"
|
||||
@vars[name] = name
|
||||
@vars[name] = @bss_size
|
||||
@bss_size += dwords
|
||||
# else
|
||||
# raise ParseError, "identifier #{name} redefined"
|
||||
end
|
||||
|
|
@ -431,6 +448,12 @@ class Compiler
|
|||
|
||||
def emit_label(name=unique_label)
|
||||
emit("#{name}:", :tab => nil)
|
||||
|
||||
@labels[name] = @binary.length
|
||||
end
|
||||
|
||||
def resolve_label(label)
|
||||
@labels[label]
|
||||
end
|
||||
|
||||
# Generate a unique label.
|
||||
|
|
@ -444,12 +467,194 @@ class Compiler
|
|||
end
|
||||
|
||||
|
||||
# Some asm methods for convenience and arity checks.
|
||||
# x86 machine code generation
|
||||
|
||||
def emit_byte(byte)
|
||||
@binary << byte
|
||||
end
|
||||
|
||||
def emit_dword(num)
|
||||
@binary += num_to_quad(num)
|
||||
end
|
||||
|
||||
# 0-2: r/m
|
||||
# 3-5: reg/opcode
|
||||
# 6-7: mod
|
||||
#
|
||||
# dest and src are tuples of the form [type, value] where type is
|
||||
# any of :reg, :rm32, :imm32. Max _one_ :rm32 arg per call.
|
||||
def emit_modrm(dest, src, override)
|
||||
if dest[0] == :reg
|
||||
reg = override[:op] || regnum(dest[1])
|
||||
|
||||
# mod == 11 (register content)
|
||||
if src[0] == :reg
|
||||
mod = 3
|
||||
rm = regnum(src[1])
|
||||
|
||||
# mod == 00 (pointer)
|
||||
elsif src[0] == :rm32
|
||||
mod = 0
|
||||
parts = decode_addr(src[1])
|
||||
rm = case parts[0]
|
||||
# mod == 00 (direct pointer e.g. [eax])
|
||||
when :reg
|
||||
regnum(parts[1])
|
||||
when :sib
|
||||
sib = parts[1..-1]
|
||||
4
|
||||
when :disp
|
||||
disp = parts[1]
|
||||
5
|
||||
end
|
||||
end
|
||||
elsif src[0] == :reg
|
||||
reg = override[:op] || regnum(src[1])
|
||||
else
|
||||
raise "unsupported mod r/m byte! dest=#{dest} src=#{src}"
|
||||
end
|
||||
emit_byte((mod << 6) & (reg << 3) & rm)
|
||||
emit_sib(sib) if defined? sib
|
||||
emit_dword(disp) if defined? disp
|
||||
end
|
||||
|
||||
def emit_sib(sib)
|
||||
scale, index, base = *sib
|
||||
if [1,2,4,8].include?(scale)
|
||||
scale = log2(scale)
|
||||
else
|
||||
raise "unsupported SIB scale: #{scale}, should be [1, 2, 4, 8]"
|
||||
end
|
||||
emit_byte((scale << 6) & (index << 3) & base)
|
||||
end
|
||||
|
||||
def compile
|
||||
@machine_code = @binary.pack('c*')
|
||||
end
|
||||
|
||||
|
||||
# Some asm methods for convenience and arity checks. Now emits
|
||||
# some real machine code too.
|
||||
|
||||
# This is the full set of x86 registers.
|
||||
# Registers = [:eax, :ecx, :edx, :ebx, :esp, :ebp, :esi, :edi]
|
||||
|
||||
# This will do for early work.
|
||||
# Position indicates value in op codes.
|
||||
Registers = [:eax, :ecx, :edx, :ebx]
|
||||
|
||||
# Regex to match any x86 register name, and then some. Should be
|
||||
# sufficient.
|
||||
RegisterRegex = 'e[acdbsd][xip]'
|
||||
|
||||
# Match a literal number in binary, octal, decimal, or hex
|
||||
NumberRegex = '(0[xXbB]?)?[0-9a-fA-F]+'
|
||||
|
||||
# Match a variable name.
|
||||
NameRegex = '[a-zA-Z][a-zA-Z0-9]*'
|
||||
|
||||
# 0.size gives the real answer, we only do x86 though
|
||||
MachineBytes = 4
|
||||
MachineBits = MachineBytes * 8
|
||||
MinSigned = -1 * 2**(MachineBits-1)
|
||||
MaxSigned = 2**(MachineBits-1) - 1
|
||||
MinUnsigned = 0
|
||||
MaxUnsigned = 2**MachineBits - 1
|
||||
SignedRange = MinSigned..MaxSigned
|
||||
|
||||
# assemble x86 machine code
|
||||
def asm
|
||||
# stash the current number of bytes written
|
||||
instruction_offset = @binary.length
|
||||
|
||||
yield
|
||||
|
||||
# return the number of bytes written
|
||||
@binary.length - instruction_offset
|
||||
end
|
||||
|
||||
def register?(op)
|
||||
Registers.index(op)
|
||||
end
|
||||
alias_method :regnum, :register?
|
||||
|
||||
def immediate?(op)
|
||||
op.is_a?(Numeric) || (op.is_a?(String) && op.match(/^#{NumberRegex}$/))
|
||||
end
|
||||
|
||||
def rm32?(op)
|
||||
offset?(op) || op.respond_to?(:match) && op.match(/^
|
||||
\[
|
||||
#{RegisterRegex} # base register
|
||||
(\+#{RegisterRegex} # optional index register
|
||||
(\*[1248])? # optional scale
|
||||
)?
|
||||
\]
|
||||
$/x)
|
||||
end
|
||||
|
||||
# 6 versions of the mov instruction are supported:
|
||||
# 1. mov reg32, immediate32 (0xb8+destreg, imm32)
|
||||
# 2. mov reg32, r/m32 (0x8b, mod r/m, maybe sib)
|
||||
# 2a. mov eax, memoffset32 (0xa1, disp32)
|
||||
# 3. mov r/m32, reg32 (0x89, mod r/m, maybe sib)
|
||||
# 3a. mov memoffset32, eax (0xa3, disp32)
|
||||
# 4. mov r/m32, immediate32 (0xc7, mod r/m, maybe sib, imm32)
|
||||
def x86_mov(dest, src)
|
||||
emit("mov #{dest}, #{src}")
|
||||
|
||||
dest = dest[6..-1] if dest.is_a?(String) && dest[0..5] == 'dword '
|
||||
src = src[6..-1] if src.is_a?(String) && src[0..5] == 'dword '
|
||||
|
||||
asm do
|
||||
|
||||
# version 1: mov r32, imm32
|
||||
if register?(dest) && immediate?(src)
|
||||
emit_byte(0xb8 + regnum(dest)) # dest encoded in instruction
|
||||
emit_dword(parse_num(src))
|
||||
|
||||
# version 2: mov r32, r/m32
|
||||
elsif register?(dest) && rm32?(src)
|
||||
# version 2a: mov eax, moffs32
|
||||
if dest == :eax && offset?(src)
|
||||
emit_byte(0xa1)
|
||||
num = decode_addr(src)[1]
|
||||
emit_dword(num)
|
||||
else
|
||||
emit_byte(0x8b)
|
||||
emit_modrm([:reg, dest], [:rm32, src])
|
||||
end
|
||||
|
||||
# version 3: mov r/m32, r32
|
||||
elsif rm32?(dest) && register?(src)
|
||||
# version 3a: mov moffs32, eax
|
||||
if offset?(dest) && src == :eax
|
||||
emit_byte(0xa3)
|
||||
num = decode_addr(dest)[1]
|
||||
emit_dword(num)
|
||||
else
|
||||
emit_byte(0x89)
|
||||
emit_modrm([:rm32, dest], [:reg, src])
|
||||
end
|
||||
|
||||
# version 4: mov r/m32, imm32
|
||||
elsif rm32?(dest) && immediate?(src)
|
||||
emit_byte(0xc7)
|
||||
emit_modrm([:rm32, dest], [:imm32, src], :op => 0)
|
||||
else
|
||||
puts "rm32?(dest): #{rm32?(dest)}\t\trm32?(src): #{rm32?(src)}"
|
||||
puts "register?(dest): #{register?(dest)}\t\tregister?(src): #{register?(src)}"
|
||||
puts "immediate?(dest): #{immediate?(dest)}\t\timmediate?(src): #{immediate?(src)}"
|
||||
puts "offset?(dest): #{offset?(dest)}\t\toffset?(src): #{offset?(src)}"
|
||||
#raise "unsupported mov format: mov #{dest}, #{src}"
|
||||
puts "!!! unsupported mov format: mov #{dest}, #{src}"
|
||||
end
|
||||
|
||||
end # asm do
|
||||
|
||||
end
|
||||
|
||||
|
||||
def x86_add(dest, src)
|
||||
emit("add #{dest}, #{src}")
|
||||
end
|
||||
|
|
@ -501,4 +706,90 @@ class Compiler
|
|||
def x86_cmp(a, b)
|
||||
emit("cmp #{a}, #{b}")
|
||||
end
|
||||
|
||||
|
||||
def offset?(addr)
|
||||
addr.respond_to?(:match) && addr.match(/^\[(#{NameRegex}|#{NumberRegex})\]$/)
|
||||
end
|
||||
|
||||
def decode_addr(addr)
|
||||
addr = addr[1..-2] # strip brackets
|
||||
|
||||
if matches = addr.match(/^#{NameRegex}$/)
|
||||
unless loc = @vars[matches[0]]
|
||||
raise "undefined variable #{matches[0]}"
|
||||
end
|
||||
[:disp, @bss_offset + loc]
|
||||
elsif matches = addr.match(/^#{NumberRegex}$/)
|
||||
[:disp, parse_num(matches[0])]
|
||||
elsif addr.index('*')
|
||||
bi, scale = *addr.split('*')
|
||||
base, index = *bi.split('+')
|
||||
[:sib, scale.to_i, index.to_sym, base.to_sym]
|
||||
elsif addr.index('+')
|
||||
base, index = *addr.split('+')
|
||||
[:sib, 1, index.to_sym, base.to_sym]
|
||||
else
|
||||
[:reg, addr.to_sym]
|
||||
end
|
||||
end
|
||||
|
||||
# Parse a number from a string. Used by emit_dword.
|
||||
def parse_num(str)
|
||||
# If it's not a string it's a number, just return it.
|
||||
return str unless str.is_a?(String)
|
||||
|
||||
str.downcase!
|
||||
base = 10 # default to base 10
|
||||
if str[0, 1] == '0'
|
||||
base = case str[1, 1]
|
||||
when 'x'
|
||||
16
|
||||
when 'b'
|
||||
str.slice!(2..-1)
|
||||
2
|
||||
else
|
||||
8
|
||||
end
|
||||
end
|
||||
str.to_i(base)
|
||||
end
|
||||
|
||||
# Convert a number to a quad of bytes, discarding excess bits.
|
||||
# Little endian!
|
||||
def num_to_quad(num)
|
||||
[
|
||||
num & 0xff,
|
||||
(num >> 8) & 0xff,
|
||||
(num >> 16) & 0xff,
|
||||
(num >> 24) & 0xff
|
||||
]
|
||||
end
|
||||
|
||||
def log2(x, tol=1e-13)
|
||||
result = 0.0
|
||||
|
||||
# Integer part
|
||||
while x < 1
|
||||
resultp -= 1
|
||||
x *= 2
|
||||
end
|
||||
while x >= 2
|
||||
result += 1
|
||||
x /= 2
|
||||
end
|
||||
|
||||
# Fractional part
|
||||
fp = 1.0
|
||||
while fp >= tol
|
||||
fp /= 2
|
||||
x *= x
|
||||
if x >= 2
|
||||
x /= 2
|
||||
result += fp
|
||||
end
|
||||
end
|
||||
result
|
||||
end
|
||||
|
||||
end
|
||||
|
|
|
|||
266
elfwriter.c
Normal file
266
elfwriter.c
Normal file
|
|
@ -0,0 +1,266 @@
|
|||
#include <libelf.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
/* _exit(0) */
|
||||
/* uint8_t shell_code[] = { */
|
||||
/* 0xbb, 0, 0, 0, 0, /\* mov ebx, 0 *\/ */
|
||||
/* 0xb8, 1, 0, 0, 0, /\* mov eax, 1 *\/ */
|
||||
/* 0xcd, 0x80 /\* int 0x80 *\/ */
|
||||
/* }; */
|
||||
|
||||
/* uint32_t hash_words[] = { */
|
||||
/* 0x12345678, */
|
||||
/* 0xdeadc0de, */
|
||||
/* 0x1234abcd */
|
||||
/* }; */
|
||||
|
||||
#define header_size 0x100
|
||||
#define text_addr 0x8048000 + header_size
|
||||
#define text_size 0x02be00
|
||||
#define data_addr text_addr + text_size
|
||||
#define data_size 0x4e00
|
||||
#define bss_addr data_addr + data_size
|
||||
size_t bss_size = 0;
|
||||
|
||||
char string_table[] = {
|
||||
/* Offset 0 */ '\0',
|
||||
/* Offset 1 */ '.', 't', 'e', 'x', 't', '\0' ,
|
||||
/* Offset 7 */ '.', 'b', 's', 's', '\0',
|
||||
/* Offset 12 */ '.', 's', 'h', 's', 't', 'r', 't', 'a', 'b', '\0'
|
||||
};
|
||||
|
||||
|
||||
/* Write a static 32-bit x86 ELF binary to filename. The file is
|
||||
* clobbered without confirmation!
|
||||
*/
|
||||
int
|
||||
elf_write(const char *filename, uint8_t *code, size_t code_size)
|
||||
{
|
||||
int fd;
|
||||
size_t shstrndx;
|
||||
Elf *elf;
|
||||
Elf_Scn *scn;
|
||||
Elf_Data *data;
|
||||
Elf32_Ehdr *ehdr;
|
||||
Elf32_Phdr *phdr, *load;
|
||||
Elf32_Shdr *shdr;
|
||||
|
||||
if (elf_version(EV_CURRENT) == EV_NONE) {
|
||||
printf("Failed to initialize ELF library!\n");
|
||||
return -1;
|
||||
}
|
||||
if ((fd = open(filename, O_RDWR|O_TRUNC|O_CREAT, 0666)) < 0) {
|
||||
printf("Can't open %s for writing.\n", filename);
|
||||
perror("[elf_write]");
|
||||
return -2;
|
||||
}
|
||||
if ((elf = elf_begin(fd, ELF_C_WRITE, (Elf *)0)) == 0) {
|
||||
printf("elf_begin failed!\n");
|
||||
return -3;
|
||||
}
|
||||
|
||||
|
||||
/**************
|
||||
* ELF Header *
|
||||
**************/
|
||||
|
||||
if ((ehdr = elf32_newehdr(elf)) == NULL) {
|
||||
printf("elf32_newehdr failed!\n");
|
||||
return -4;
|
||||
}
|
||||
ehdr->e_ident[EI_DATA] = ELFDATA2LSB; /* 2's complement, little endian */
|
||||
ehdr->e_type = ET_EXEC;
|
||||
ehdr->e_machine = EM_386; /* x86 */
|
||||
|
||||
/* Image starts at 0x8048000, x86 32-bit abi. We need a bit
|
||||
* of room for headers and such. TODO figure out how much
|
||||
* room is needed!
|
||||
*
|
||||
* Current entry point is .text section.
|
||||
*/
|
||||
ehdr->e_entry = text_addr;
|
||||
|
||||
|
||||
/*******************
|
||||
* Program Headers *
|
||||
*******************/
|
||||
|
||||
if ((phdr = elf32_newphdr(elf, 2)) == NULL) {
|
||||
printf("elf32_newphdr failed!\n");
|
||||
return -5;
|
||||
}
|
||||
load = phdr+1;
|
||||
|
||||
|
||||
/*****************
|
||||
* .text section *
|
||||
*****************/
|
||||
|
||||
if ((scn = elf_newscn(elf)) == NULL) {
|
||||
printf("elf_newscn failed!\n");
|
||||
return -6;
|
||||
}
|
||||
if ((data = elf_newdata(scn)) == NULL) {
|
||||
printf("elf_newdata failed!\n");
|
||||
return -7;
|
||||
}
|
||||
data->d_align = 16;
|
||||
data->d_buf = code;
|
||||
data->d_off = 0LL;
|
||||
data->d_type = ELF_T_BYTE;
|
||||
data->d_size = code_size;
|
||||
data->d_version = EV_CURRENT;
|
||||
|
||||
if ((shdr = elf32_getshdr(scn)) == NULL) {
|
||||
printf("elf32_getshdr failed!\n");
|
||||
return -8;
|
||||
}
|
||||
shdr->sh_name = 1;
|
||||
shdr->sh_type = SHT_PROGBITS;
|
||||
shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
|
||||
shdr->sh_addr = text_addr;
|
||||
|
||||
|
||||
/****************
|
||||
* .bss section *
|
||||
****************/
|
||||
|
||||
if ((scn = elf_newscn(elf)) == NULL) {
|
||||
printf("elf_newscn failed!\n");
|
||||
return -6;
|
||||
}
|
||||
if ((data = elf_newdata(scn)) == NULL) {
|
||||
printf("elf_newdata failed!\n");
|
||||
return -7;
|
||||
}
|
||||
data->d_align = 4;
|
||||
data->d_off = 0LL;
|
||||
data->d_type = ELF_T_BYTE;
|
||||
data->d_size = bss_size;
|
||||
data->d_version = EV_CURRENT;
|
||||
|
||||
if ((shdr = elf32_getshdr(scn)) == NULL) {
|
||||
printf("elf32_getshdr failed!\n");
|
||||
return -8;
|
||||
}
|
||||
shdr->sh_name = 7;
|
||||
shdr->sh_type = SHT_NOBITS;
|
||||
shdr->sh_flags = SHF_WRITE | SHF_ALLOC;
|
||||
shdr->sh_addr = bss_addr;
|
||||
|
||||
|
||||
/*******************************
|
||||
* section header string table *
|
||||
*******************************/
|
||||
|
||||
if ((scn = elf_newscn(elf)) == NULL) {
|
||||
printf("elf_newscn failed!\n");
|
||||
return -9;
|
||||
}
|
||||
if ((data = elf_newdata(scn)) == NULL) {
|
||||
printf("elf_newdata failed!\n");
|
||||
return -10;
|
||||
}
|
||||
data->d_align = 1;
|
||||
data->d_buf = string_table;
|
||||
data->d_off = 0LL;
|
||||
data->d_type = ELF_T_BYTE;
|
||||
data->d_size = sizeof(string_table);
|
||||
data->d_version = EV_CURRENT;
|
||||
|
||||
if ((shdr = elf32_getshdr(scn)) == NULL) {
|
||||
printf("elf32_getshdr failed!\n");
|
||||
return -11;
|
||||
}
|
||||
shdr->sh_name = 12;
|
||||
shdr->sh_type = SHT_STRTAB;
|
||||
shdr->sh_flags = SHF_STRINGS | SHF_ALLOC;
|
||||
shdr->sh_entsize = 0;
|
||||
|
||||
|
||||
/* int elf_setshstrndx(Elf *e, Elf32_Ehdr *eh, size_t shstrndx) */
|
||||
shstrndx = elf_ndxscn(scn);
|
||||
if (shstrndx >= SHN_LORESERVE) {
|
||||
if ((scn = elf_getscn(elf, 0)) == NULL) {
|
||||
printf("elf_getscn failed!\n");
|
||||
return -12;
|
||||
}
|
||||
/* assert(scn->s_ndx == SHN_UNDEF); */
|
||||
/* scn->s_shdr.s_shdr32.sh_link = shstrndx; */
|
||||
elf_flagshdr(scn, ELF_C_SET, ELF_F_DIRTY);
|
||||
shstrndx = SHN_XINDEX;
|
||||
}
|
||||
ehdr->e_shstrndx = shstrndx;
|
||||
|
||||
if (elf_update(elf, ELF_C_NULL) < 0) {
|
||||
printf("elf_update failed!\n");
|
||||
return -12;
|
||||
}
|
||||
|
||||
phdr->p_vaddr = phdr->p_paddr = 0x8048000 + ehdr->e_phoff;
|
||||
phdr->p_type = PT_PHDR;
|
||||
phdr->p_offset = ehdr->e_phoff;
|
||||
phdr->p_filesz = elf32_fsize(ELF_T_PHDR, 1, EV_CURRENT);
|
||||
|
||||
load->p_vaddr = phdr->p_paddr = 0x8048000;
|
||||
load->p_type = PT_LOAD;
|
||||
load->p_offset = 0;
|
||||
load->p_filesz = elf32_fsize(ELF_T_PHDR, 1, EV_CURRENT);
|
||||
load->p_flags = PF_R | PF_X;
|
||||
load->p_align = 0x1000;
|
||||
|
||||
elf_flagphdr(elf, ELF_C_SET, ELF_F_DIRTY);
|
||||
|
||||
if (elf_update(elf, ELF_C_WRITE) < 0) {
|
||||
printf("elf_update failed!\n");
|
||||
return -13;
|
||||
}
|
||||
|
||||
elf_end(elf);
|
||||
close(fd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, const char *argv[])
|
||||
{
|
||||
int result;
|
||||
pid_t pid;
|
||||
FILE *fd;
|
||||
uint8_t *code = NULL;
|
||||
size_t code_size = 0, chunk_size = 1024, bytes_read;
|
||||
|
||||
if (argc < 4) {
|
||||
printf("usage: %s <input> <bss_size> <output>\n", argv[0]);
|
||||
printf(" Wraps the input file in an ELF binary.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
bss_size = strtoul(argv[2], 0, 10);
|
||||
|
||||
if ((fd = fopen(argv[1], "r")) < 0) {
|
||||
printf("[error] can't open %s for reading.\n", argv[1]);
|
||||
perror("[main]");
|
||||
return 2;
|
||||
}
|
||||
while (!feof(fd) && !ferror(fd)) {
|
||||
code = realloc(code, code_size + chunk_size);
|
||||
bytes_read = fread(code+code_size, 1, chunk_size, fd);
|
||||
code_size += bytes_read;
|
||||
}
|
||||
fclose(fd);
|
||||
|
||||
printf("Writing x86 ELF binary to %s...\n", argv[1]);
|
||||
result = elf_write(argv[3], code, code_size);
|
||||
if (result < 0) {
|
||||
printf("[error] elf_write failed.\n");
|
||||
return 3;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
12
lea.asm
Normal file
12
lea.asm
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
BITS 32
|
||||
|
||||
lea eax, [ebx+ecx*4]
|
||||
lea ebx, [eax+ecx*4]
|
||||
lea eax, [ecx+ebx*4]
|
||||
lea eax, [ecx+ebx*8]
|
||||
lea eax, [ecx+ebx]
|
||||
lea eax, [0x1000+10*4]
|
||||
lea eax, [eax]
|
||||
lea eax, [ecx]
|
||||
lea ecx, [eax]
|
||||
lea eax, [0xdeadbeef]
|
||||
4
min.asm
Normal file
4
min.asm
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
BITS 32
|
||||
mov ebx,0
|
||||
mov eax,1
|
||||
int 0x80
|
||||
89
mov.asm
Normal file
89
mov.asm
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
BITS 32
|
||||
|
||||
;;; 00000000 b8 78 56 34 12 b9 78 56 34 12 ba 78 56 34 12 bb |.xV4..xV4..xV4..|
|
||||
;;; 00000010 78 56 34 12 89 c0 89 c8 89 d0 89 d8 89 c1 89 c9 |xV4.............|
|
||||
;;; 00000020 89 d1 89 d9 89 c2 89 ca 89 d2 89 da 89 c3 89 cb |................|
|
||||
;;; 00000030 89 d3 89 db a1 ef be ad de 8b 0d ef be ad de 8b |................|
|
||||
;;; 00000040 15 ef be ad de 8b 1d ef be ad de a3 ef be ad de |................|
|
||||
;;; 00000050 89 0d ef be ad de 89 15 ef be ad de 89 1d ef be |................|
|
||||
;;; 00000060 ad de 8b 00 8b 01 8b 02 8b 03 8b 08 8b 09 8b 0a |................|
|
||||
;;; 00000070 8b 0b 8b 10 8b 11 8b 12 8b 13 8b 18 8b 19 8b 1a |................|
|
||||
;;; 00000080 8b 1b 89 00 89 01 89 02 89 03 89 08 89 09 89 0a |................|
|
||||
;;; 00000090 89 0b 89 10 89 11 89 12 89 13 89 18 89 19 89 1a |................|
|
||||
;;; 000000a0 89 1b |..|
|
||||
;;; 000000a2
|
||||
|
||||
mov eax, 0x12345678 ; b8 78 56 34 12
|
||||
mov ecx, 0x12345678 ; b9 78 56 34 12
|
||||
mov edx, 0x12345678 ; ba 78 56 34 12
|
||||
mov ebx, 0x12345678 ; bb 78 56 34 12
|
||||
|
||||
mov eax, eax ; 89 c0
|
||||
mov eax, ecx ; 89 c8
|
||||
mov eax, edx ; 89 d0
|
||||
mov eax, ebx ; 89 d8
|
||||
|
||||
mov ecx, eax ; 89 c1
|
||||
mov ecx, ecx ; 89 c9
|
||||
mov ecx, edx ; 89 d1
|
||||
mov ecx, ebx ; 89 d9
|
||||
|
||||
mov edx, eax ; 89 c2
|
||||
mov edx, ecx ; 89 ca
|
||||
mov edx, edx ; 89 d2
|
||||
mov edx, ebx ; 89 da
|
||||
|
||||
mov ebx, eax ; 89 c3
|
||||
mov ebx, ecx ; 89 cb
|
||||
mov ebx, edx ; 89 d3
|
||||
mov ebx, ebx ; 89 db
|
||||
|
||||
mov eax, dword [0xdeadbeef] ; a1 ef be ad de
|
||||
mov ecx, dword [0xdeadbeef] ; 8b 0e ef be ad de
|
||||
mov edx, dword [0xdeadbeef] ; 8b 16 ef be ad de
|
||||
mov ebx, dword [0xdeadbeef] ; 8b 1e ef be ad de
|
||||
|
||||
mov [0xdeadbeef], eax ; a3 ef be ad de
|
||||
mov [0xdeadbeef], ecx ; 89 0e ef be ad de
|
||||
mov [0xdeadbeef], edx ; 89 16 ef be ad de
|
||||
mov [0xdeadbeef], ebx ; 89 1e ef be ad de
|
||||
|
||||
mov eax, dword [eax] ; 8b 00
|
||||
mov eax, dword [ecx] ; 8b 01
|
||||
mov eax, dword [edx] ; 8b 02
|
||||
mov eax, dword [ebx] ; 8b 03
|
||||
|
||||
mov ecx, dword [eax] ; 8b 08
|
||||
mov ecx, dword [ecx] ; 8b 09
|
||||
mov ecx, dword [edx] ; 8b 0a
|
||||
mov ecx, dword [ebx] ; 8b 0b
|
||||
|
||||
mov edx, dword [eax] ; 8b 10
|
||||
mov edx, dword [ecx] ; 8b 11
|
||||
mov edx, dword [edx] ; 8b 12
|
||||
mov edx, dword [ebx] ; 8b 13
|
||||
|
||||
mov ebx, dword [eax] ; 8b 18
|
||||
mov ebx, dword [ecx] ; 8b 19
|
||||
mov ebx, dword [edx] ; 8b 1a
|
||||
mov ebx, dword [ebx] ; 8b 1b
|
||||
|
||||
mov [eax], eax ; 89 00
|
||||
mov [ecx], eax ; 89 01
|
||||
mov [edx], eax ; 89 02
|
||||
mov [ebx], eax ; 89 03
|
||||
|
||||
mov [eax], ecx ; 89 08
|
||||
mov [ecx], ecx ; 89 09
|
||||
mov [edx], ecx ; 89 0a
|
||||
mov [ebx], ecx ; 89 0b
|
||||
|
||||
mov [eax], edx ; 89 10
|
||||
mov [ecx], edx ; 89 11
|
||||
mov [edx], edx ; 89 12
|
||||
mov [ebx], edx ; 89 13
|
||||
|
||||
mov [eax], ebx ; 89 18
|
||||
mov [ecx], ebx ; 89 19
|
||||
mov [edx], ebx ; 89 1a
|
||||
mov [ebx], ebx ; 89 1b
|
||||
25
opcode.rb
Normal file
25
opcode.rb
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
class OpCode
|
||||
Attrs = [:prefix, :op, :modrm, :sib, :extra]
|
||||
attr_accessor *Attrs
|
||||
|
||||
def initialize(attrs)
|
||||
Attrs.each do |attr|
|
||||
send("#{attr}=", attrs[attr])
|
||||
end
|
||||
end
|
||||
|
||||
def size
|
||||
Attrs.inject(0) {|sum, attr|
|
||||
iv = instance_variable_get("@#{attr}")
|
||||
if iv.is_a?(Enumerable)
|
||||
sum + iv.size
|
||||
else
|
||||
sum + 1
|
||||
end
|
||||
}
|
||||
end
|
||||
|
||||
def binary
|
||||
Attrs.map {|attr| send(attr)}.flatten.pack('c*')
|
||||
end
|
||||
end
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
BITS 32
|
||||
GLOBAL _start
|
||||
SECTION .text
|
||||
_start:
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
BITS 32
|
||||
GLOBAL _start
|
||||
SECTION .data
|
||||
{data}
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ u 1
|
|||
e
|
||||
|
||||
r
|
||||
cc = cc * 2
|
||||
cc = c * 2
|
||||
i 1
|
||||
b
|
||||
e
|
||||
|
|
|
|||
5
test.rb
5
test.rb
|
|
@ -5,7 +5,7 @@ def error(msg) STDERR.puts(msg) end
|
|||
|
||||
def parse(input)
|
||||
compiler = Compiler.new(input)
|
||||
compiler.parse # tuple of [data, bss, code]
|
||||
compiler.parse # tuple of [data, bss, code, binary]
|
||||
|
||||
rescue ParseError => e
|
||||
error("[error] #{e.message}")
|
||||
|
|
@ -29,10 +29,11 @@ def main(arg)
|
|||
# StringIO.new("5*(3-5)*2+2-9/3-8/2-4*(5+5+5)\n")
|
||||
StringIO.new("abc=999\nabc-888\n")
|
||||
end
|
||||
data, bss, code = *parse(input)
|
||||
data, bss, code, binary = *parse(input)
|
||||
template = File.read("template.asm")
|
||||
asm = interpolate(template, :data => data, :bss => bss, :code => code)
|
||||
File.open("test.asm", "w") { |f| f.puts(asm) }
|
||||
File.open("test.bin", "wb") { |f| f.write(binary) }
|
||||
end
|
||||
|
||||
main(ARGV[0].to_s)
|
||||
|
|
|
|||
Loading…
Reference in a new issue