[NEW] Boolean expressions. Print statement.

This commit is contained in:
sjs 2009-05-21 13:39:20 -07:00
parent 55c047d0ef
commit dc23e23b92
8 changed files with 575 additions and 39 deletions

View file

@ -1,5 +1,19 @@
lt: test.rb test_lt.code
ruby test.rb test_lt.code > test_lt.asm
nasm -f elf -g -o test_lt.o test_lt.asm
ld -o test_lt test_lt.o
# $? indicates success as per unix convention
./test_lt
print: test.rb test_print.code
ruby test.rb test_print.code > test_print.asm
nasm -f elf -g -o test_print.o test_print.asm
ld -o test_print test_print.o
# $? indicates success as per unix convention
./test_print
build: test.rb test.code
ruby test.rb test.code
ruby test.rb test.code > test.asm
nasm -f elf -g -o test.o test.asm
ld -o test test.o
# $? indicates success as per unix convention

View file

@ -29,8 +29,9 @@ class Compiler
# reserved words (... constant?)
#
# if, else, end, while, until, repeat, for, do, break
@keywords = %w[i l e w u r f d b]
# if, else, end, while, until, repeat, do, for, break, true, false, print,
# not, and, or, add, subtract, multiply, divide, xor, bool tests
@keywords = %w[i l e w u r f d b t f p ! & | + - * / ^ = < > #]
# seed the lexer
get_char
@ -63,12 +64,12 @@ class Compiler
def factor
if @look == '('
match('(')
expression
boolean_expression
match(')')
elsif alpha?(@look)
identifier # or call
elsif digit?(@look)
x86_mov(:eax, get_num)
x86_mov(:eax, get_number)
else
expected(:'integer, identifier, function call, or parenthesized expression')
end
@ -76,15 +77,10 @@ class Compiler
# Parse a signed factor.
def signed_factor
unary_op = if @look == '-'
match('-')
:neg
elsif @look == '+'
match('+')
:pos
end
sign = @look
match(sign) if sign == '-' || sign == '+'
factor
x86_neg(:eax) if unary_op == :neg
x86_neg(:eax) if sign == '-'
end
# Parse and translate a single term (factor or mulop). Result is in
@ -172,12 +168,162 @@ class Compiler
end
#######################
# boolean expressions #
#######################
def boolean_expression
boolean_term
while orop?
x86_push(:eax)
case @look
when '|': or_expr
when '^': xor_expr
end
x86_add(:esp, 4)
end
end
def or_expr
match('|')
boolean_term
x86_or(:eax, '[esp]')
end
def xor_expr
match('^')
boolean_term
x86_xor(:eax, '[esp]')
end
def boolean_term
not_factor
while andop?
x86_push(:eax)
# and_expr
match('&')
not_factor
x86_and(:eax, '[esp]')
x86_add(:esp, 4)
end
end
def boolean_factor
if boolean?(@look)
if get_boolean
x86_mov(:eax, -1)
else
x86_xor(:eax, :eax)
end
else
relation
end
end
def not_factor
if @look == '!'
match('!')
boolean_factor
make_boolean(:eax) # ensure it is -1 or 0...
x86_not(:eax) # so that not is also boolean not
else
boolean_factor
end
end
# Convert any identifier to a boolean (-1 or 0). This is
# semantically equivalent to !!reg in C or Ruby.
def make_boolean(reg=:eax)
end_label = unique_label(:endmakebool)
x86_cmp(reg, 0) # if false do nothing
x86_jz(end_label)
x86_mov(reg, -1) # truthy, make it true
emit_label(end_label)
end
def get_boolean
expected(:boolean) unless boolean?(@look)
value = @look == 't'
get_char
value
end
def relation
expression
if relop?
x86_push(:eax)
case @look
when '=': eq_relation
when '#': neq_relation
when '>': gt_relation
when '<': lt_relation
# TODO ge, le (needs real tokens)
end
end
end
def eq_relation
match('=')
expression
x86_pop(:ebx)
x86_sub(:eax, :ebx)
make_boolean
x86_not(:eax)
end
def neq_relation
match('#')
expression
x86_pop(:ebx)
x86_sub(:eax, :ebx)
make_boolean
end
def gt_relation
match('>')
gt_label = unique_label(:gt)
end_label = unique_label(:endgt)
expression
x86_pop(:ebx)
x86_cmp(:eax, :ebx) # b - a < 0 if a > b
x86_jl(gt_label)
x86_xor(:eax, :eax)
x86_jmp(end_label)
emit_label(gt_label)
x86_xor(:eax, :eax)
x86_not(:eax)
emit_label(end_label)
end
def lt_relation
match('<')
lt_label = unique_label(:lt)
end_label = unique_label(:endlt)
expression
x86_pop(:ebx)
x86_cmp(:ebx, :eax) # a - b < 0 if a < b
x86_jl(lt_label)
x86_xor(:eax, :eax)
x86_jmp(end_label)
emit_label(lt_label)
x86_xor(:eax, :eax)
x86_not(:eax)
emit_label(end_label)
end
######################################
# statements and controls structures #
######################################
# Parse an assignment statement. Value is in eax.
def assignment
name = get_name
match('=')
expression
var(name)
boolean_expression
defvar(name) unless var?(name)
x86_mov("dword [#{name}]", :eax)
end
@ -199,6 +345,9 @@ class Compiler
do_stmt
when 'b'
break_stmt(label)
when 'p'
print_stmt
newline
else
assignment
newline
@ -210,10 +359,10 @@ class Compiler
# Parse an if-else statement.
def if_else_stmt(label)
match('i')
condition
skip_any_whitespace
else_label = unique_label(:end_or_else)
end_label = else_label # only generated if else clause present
condition
skip_any_whitespace
x86_jz(else_label)
block(label)
if @look == 'l'
@ -278,12 +427,12 @@ class Compiler
end_label = unique_label(:endfor)
counter = "[#{get_name}]"
match('=')
expression # initial value
boolean_expression # initial value
x86_sub(:eax, 1) # pre-decrement because of the
# following pre-increment
x86_mov(counter, :eax) # stash the counter in memory
match('>'); match('>')
expression # final value
match('.'); match('.')
boolean_expression # final value
skip_any_whitespace
x86_push(:eax) # stash final value on stack
final = '[esp]'
@ -307,7 +456,7 @@ class Compiler
match('d')
start_label = unique_label(:do)
end_label = unique_label(:enddo)
expression
boolean_expression
skip_any_whitespace
x86_mov(:ecx, :eax)
x86_push(:ecx)
@ -335,11 +484,54 @@ class Compiler
# Evaluates any expression for now. There are no boolean operators.
def condition
expression
x86_cmp(:eax, 0) # 0 is false, anything else is true
boolean_expression
skip_whitespace
x86_cmp(:eax, 0) # 0 is false, anything else is true
end
def print_stmt
match('p')
# define a lookup table of digits
unless var?('DIGITS')
defvar('DIGITS', 4)
x86_mov('dword [DIGITS]', 0x33323130)
x86_mov('dword [DIGITS+4]', 0x37363534)
x86_mov('dword [DIGITS+8]', 0x62613938)
x86_mov('dword [DIGITS+12]', 0x66656463)
end
# 3 dwords == 12 chars
defvar('HEX', 3) unless var?('HEX')
# TODO check sign and prepend '-' if negative
x86_mov('word [HEX]', 0x7830) # "0x" == [48, 120]
x86_mov('word [HEX+10]', 0xa) # newline + null terminator
boolean_expression
# convert eax to a hex string
x86_lea(:esi, '[DIGITS]')
x86_lea(:edi, '[HEX+9]')
# build the string backwards (right to left), byte by byte
x86_mov(:ecx, 4)
emit_label(loop_label=unique_label)
# low nybble of nth byte
x86_movzx(:ebx, :al)
x86_and(:bl, 0x0f) # isolate low nybble
x86_movzx(:edx, 'byte [esi+ebx]')
x86_mov('byte [edi]', :dl)
x86_dec(:edi)
# high nybble of nth byte
x86_movzx(:ebx, :al)
x86_and(:bl, 0xf0) # isolate high nybble
x86_shr(:bl, 4)
x86_mov(:dl, 'byte [esi+ebx]')
x86_mov('byte [edi]', :dl)
x86_dec(:edi)
x86_shr(:eax, 8)
x86_loop(loop_label)
x86_mov(:eax, 4) # SYS_write
x86_mov(:ebx, 1) # STDOUT
x86_lea(:ecx, '[HEX]')
x86_mov(:edx, 11) # excluding term, max # of chars to print
x86_int(0x80)
end
############
@ -359,6 +551,17 @@ class Compiler
@look == '*' || @look == '/'
end
def relop?
@look == '=' || @look == '#' || @look == '<' || @look == '>'
end
def orop?
@look == '|' || @look == '^'
end
def andop?
@look == '&'
end
# Read the next character from the input stream.
def get_char
@ -403,6 +606,10 @@ class Compiler
alpha?(char) || digit?(char)
end
def boolean?(char)
char == 't' || char == 'f'
end
def whitespace?(char)
char == ' ' || char == "\t"
end
@ -451,7 +658,7 @@ class Compiler
end
# Parse a number.
def get_num
def get_number
expected(:integer) unless digit?(@look)
many(method(:digit?))
end
@ -473,15 +680,23 @@ class Compiler
end
# Define a variable with the given name and size (in dwords).
def var(name, dwords=1)
unless @vars[name]
def defvar(name, dwords=1)
unless var?(name)
@bss << "#{name}: resd #{dwords}\n"
@vars[name] = name
# else
# raise ParseError, "identifier #{name} redefined"
else
STDERR.puts "[warning] attempted to redefine #{name}"
end
end
def var?(name)
@vars[name]
end
def var(name)
@vars[name]
end
# Emit a line of code wrapped between a tab and a newline.
def emit(code, options={})
tab = options.has_key?(:tab) ? options[:tab] : "\t"
@ -506,7 +721,11 @@ class Compiler
# Some asm methods for convenience and arity checks.
def x86_mov(dest, src)
emit("mov #{dest}, #{src}")
emit("mov #{dest}, #{src.is_a?(Numeric) ? "0x#{src.to_s(16)}" : src}")
end
def x86_movzx(dest, src)
emit("movzx #{dest}, #{src}")
end
def x86_add(dest, src)
@ -529,10 +748,18 @@ class Compiler
emit("inc #{op}")
end
def x86_dec(op)
emit("dec #{op}")
end
def x86_push(reg)
emit("push #{reg}")
end
def x86_pop(reg)
emit("pop #{reg}")
end
def x86_call(label)
emit("call #{label}")
end
@ -541,10 +768,22 @@ class Compiler
emit("neg #{reg}")
end
def x86_not(rm32)
emit("not #{rm32}")
end
def x86_xchg(op1, op2)
emit("xchg #{op1}, #{op2}")
end
def x86_and(op1, op2)
emit("and #{op1}, #{op2}")
end
def x86_or(op1, op2)
emit("or #{op1}, #{op2}")
end
def x86_xor(op1, op2)
emit("xor #{op1}, #{op2}")
end
@ -561,11 +800,27 @@ class Compiler
emit("jmp #{label}")
end
def x86_jl(label)
emit("jl #{label}")
end
def x86_cmp(a, b)
emit("cmp #{a}, #{b}")
end
def x86_lea(a, b)
emit("lea #{a}, #{b}")
end
def x86_shr(a, b)
emit("shr #{a}, #{b}")
end
def x86_loop(label)
emit("loop #{label}")
end
def x86_int(num)
emit("int 0x#{num.to_s(16)}")
end
end

View file

@ -1,4 +1,4 @@
a=9
a=1
aa=10
somethinglong=65536
x=5*(3-5)
@ -19,7 +19,7 @@ i1x=3
i1c=4
ee
i 1
i 1 > 2
x=3
i 1 c=4
e
@ -28,25 +28,28 @@ l
e
w 0
a = a + 1
w a < 10
a = a + 1
e
e
u 1
u a = 0
a = a - 1
i 1
i t
b
e
e
cc = c
r
cc = c * 2
i 1
cc = cc * 2
i cc = 32
b
e
e
s=0
f x = 1 >> 5
f x = 1 .. 5
s = s + x
e
@ -54,4 +57,4 @@ d 10
a = a * a
e
xitcode=a-a
xitcode=(1 < 1)

View file

@ -32,7 +32,7 @@ def main(arg)
data, bss, code = *parse(input)
template = File.read("template.asm")
asm = interpolate(template, :data => data, :bss => bss, :code => code)
File.open("test.asm", "w") { |f| f.puts(asm) }
STDOUT.puts(asm)
end
main(ARGV[0].to_s)

54
test_lt.asm Normal file
View file

@ -0,0 +1,54 @@
GLOBAL _start
SECTION .data
SECTION .bss
z: resd 1
x: resd 1
y: resd 1
SECTION .text
_start:
mov eax, 1
push eax
mov eax, 1
pop ebx
cmp ebx, eax
jl L000001_lt_1
xor eax, eax
jmp L000002_endlt_1
L000001_lt_1:
xor eax, eax
not eax
L000002_endlt_1:
mov dword [z], eax
mov eax, 1
push eax
mov eax, 3
pop ebx
cmp ebx, eax
jl L000003_lt_2
xor eax, eax
jmp L000004_endlt_2
L000003_lt_2:
xor eax, eax
not eax
L000004_endlt_2:
mov dword [x], eax
mov eax, 3
push eax
mov eax, 1
pop ebx
cmp ebx, eax
jl L000005_lt_3
xor eax, eax
jmp L000006_endlt_3
L000005_lt_3:
xor eax, eax
not eax
L000006_endlt_3:
mov dword [y], eax
;; The result in eax is the exit code, move it to ebx.
mov ebx, eax
mov eax, 1 ; _exit syscall
int 0x80 ; call Linux

3
test_lt.code Normal file
View file

@ -0,0 +1,3 @@
x = 1 < 3
y = 3 < 1
z = 1 < 1

199
test_print.asm Normal file
View file

@ -0,0 +1,199 @@
GLOBAL _start
SECTION .data
SECTION .bss
DIGITS: resd 4
HEX: resd 3
xitcode: resd 1
SECTION .text
_start:
mov dword [DIGITS], 0x33323130
mov dword [DIGITS+4], 0x37363534
mov dword [DIGITS+8], 0x62613938
mov dword [DIGITS+12], 0x66656463
mov word [HEX], 0x7830
mov word [HEX+10], 0xa
mov eax, 0
lea esi, [DIGITS]
lea edi, [HEX+9]
mov ecx, 0x4
L000001:
movzx ebx, al
and bl, 15
movzx edx, byte [esi+ebx]
mov byte [edi], dl
dec edi
movzx ebx, al
and bl, 240
shr bl, 4
mov dl, byte [esi+ebx]
mov byte [edi], dl
dec edi
shr eax, 8
loop L000001
mov eax, 0x4
mov ebx, 0x1
lea ecx, [HEX]
mov edx, 0xb
int 0x80
mov word [HEX], 0x7830
mov word [HEX+10], 0xa
mov eax, 1
lea esi, [DIGITS]
lea edi, [HEX+9]
mov ecx, 0x4
L000002:
movzx ebx, al
and bl, 15
movzx edx, byte [esi+ebx]
mov byte [edi], dl
dec edi
movzx ebx, al
and bl, 240
shr bl, 4
mov dl, byte [esi+ebx]
mov byte [edi], dl
dec edi
shr eax, 8
loop L000002
mov eax, 0x4
mov ebx, 0x1
lea ecx, [HEX]
mov edx, 0xb
int 0x80
mov word [HEX], 0x7830
mov word [HEX+10], 0xa
mov eax, 1
neg eax
lea esi, [DIGITS]
lea edi, [HEX+9]
mov ecx, 0x4
L000003:
movzx ebx, al
and bl, 15
movzx edx, byte [esi+ebx]
mov byte [edi], dl
dec edi
movzx ebx, al
and bl, 240
shr bl, 4
mov dl, byte [esi+ebx]
mov byte [edi], dl
dec edi
shr eax, 8
loop L000003
mov eax, 0x4
mov ebx, 0x1
lea ecx, [HEX]
mov edx, 0xb
int 0x80
mov word [HEX], 0x7830
mov word [HEX+10], 0xa
mov eax, 123
lea esi, [DIGITS]
lea edi, [HEX+9]
mov ecx, 0x4
L000004:
movzx ebx, al
and bl, 15
movzx edx, byte [esi+ebx]
mov byte [edi], dl
dec edi
movzx ebx, al
and bl, 240
shr bl, 4
mov dl, byte [esi+ebx]
mov byte [edi], dl
dec edi
shr eax, 8
loop L000004
mov eax, 0x4
mov ebx, 0x1
lea ecx, [HEX]
mov edx, 0xb
int 0x80
mov word [HEX], 0x7830
mov word [HEX+10], 0xa
mov eax, 123
neg eax
lea esi, [DIGITS]
lea edi, [HEX+9]
mov ecx, 0x4
L000005:
movzx ebx, al
and bl, 15
movzx edx, byte [esi+ebx]
mov byte [edi], dl
dec edi
movzx ebx, al
and bl, 240
shr bl, 4
mov dl, byte [esi+ebx]
mov byte [edi], dl
dec edi
shr eax, 8
loop L000005
mov eax, 0x4
mov ebx, 0x1
lea ecx, [HEX]
mov edx, 0xb
int 0x80
mov word [HEX], 0x7830
mov word [HEX+10], 0xa
mov eax, 4096
lea esi, [DIGITS]
lea edi, [HEX+9]
mov ecx, 0x4
L000006:
movzx ebx, al
and bl, 15
movzx edx, byte [esi+ebx]
mov byte [edi], dl
dec edi
movzx ebx, al
and bl, 240
shr bl, 4
mov dl, byte [esi+ebx]
mov byte [edi], dl
dec edi
shr eax, 8
loop L000006
mov eax, 0x4
mov ebx, 0x1
lea ecx, [HEX]
mov edx, 0xb
int 0x80
mov word [HEX], 0x7830
mov word [HEX+10], 0xa
mov eax, 4096
neg eax
lea esi, [DIGITS]
lea edi, [HEX+9]
mov ecx, 0x4
L000007:
movzx ebx, al
and bl, 15
movzx edx, byte [esi+ebx]
mov byte [edi], dl
dec edi
movzx ebx, al
and bl, 240
shr bl, 4
mov dl, byte [esi+ebx]
mov byte [edi], dl
dec edi
shr eax, 8
loop L000007
mov eax, 0x4
mov ebx, 0x1
lea ecx, [HEX]
mov edx, 0xb
int 0x80
mov eax, 0
mov dword [xitcode], eax
;; The result in eax is the exit code, move it to ebx.
mov ebx, eax
mov eax, 1 ; _exit syscall
int 0x80 ; call Linux

8
test_print.code Normal file
View file

@ -0,0 +1,8 @@
p 0
p 1
p -1
p 123
p -123
p 4096
p -4096
xitcode=0