; sectorforth - a 512-byte, bootable x86 Forth. ; Copyright (c) 2020 Cesar Blum ; Distributed under the MIT license. See LICENSE for details. ; ; sectorforth is a 16-bit x86 Forth that fits entirely within a ; boot sector (512 bytes). ; ; It's a direct threaded Forth, with SI acting as the Forth ; instruction pointer. Words are executed using LODSW to advance ; SI and load the next word's address into AX, which is then ; jumped to. ; ; The SP register is used as the data stack pointer, and the BP ; register acts as the return stack pointer. ; ; The minimum CPU required to run sectorforth is the 386, to use ; the SETNZ instruction. bits 16 cpu 386 ; Set CS to a known value by performing a far jump. Memory up to ; 0x0500 is used by the BIOS. Setting the segment to 0x0500 gives ; sectorforth an entire free segment to work with. jmp 0x0050:start ; On x86, the boot sector is loaded at 0x7c00 on boot. In segment ; 0x0500, that's 0x7700 (0x0500 << 4 + 0x7700 == 0x7c00). org 0x7700 ; Define constants for the memory map. Everything is organized ; within a single 64 KB segment. TIB is placed at 0x0000 to ; simplify input parsing (the Forth variable >IN ends up being ; also a pointer into TIB, so there's no need to add >IN to TIB ; to get a pointer to the parse area). TIB is 4 KB long. TIB equ 0x0000 ; terminal input buffer (TIB) STATE equ 0x1000 ; current state (0=interpret, 1=compile) TOIN equ 0x1002 ; current read offset into TIB (>IN) RP0 equ 0x76fe ; bottom of return stack SP0 equ 0xfffe ; bottom of data stack ; Each dictionary entry is laid out in memory as such: ; ; *--------------*--------------*--------------*--------------* ; | Link pointer | Flags+Length | Name... | Code... | ; *--------------*--------------*--------------*--------------* ; 2 bytes 1 byte Length bytes Variable ; ; Flags IMMEDIATE and HIDDEN are used in assembly code. Room is ; left for an additional, user-defined flag, so word names are ; limited to 32 characters. F_IMMEDIATE equ 0x80 F_HIDDEN equ 0x40 LENMASK equ 0x1f ; Each dictionary entry needs a link to the previous entry. The ; initlink macro uses the nasm context stack to push a context and ; define a context-local macro called %$link that represents the ; end of the dictionary. %macro initlink 0 %push %define %$link 0 %endmacro initlink ; expand initlink so %$link is initialized ; The link macro links dictionary entries by pushing a new context ; and redefining %$link as a context-local label at the current ; location (address) in the code. It then writes a 16-bit data ; word with the value of %$link in the previous context (i.e. the ; previous expansion of link), effectively writing a link field to ; the previous location. %macro link 0 %push %$link: dw %$$link %endmacro ; defword lays out a dictionary entry where it is expanded. %macro defword 3-4 0 ; name, length, label, flags word_%3: link ; link to previous word db %4+%2 ; flags+length db %1 ; name %3: ; code starts here %endmacro ; NEXT advances execution to the next word. The actual code is ; placed further ahead for strategic reasons. The macro has to be ; defined here, since it's used in the words defined ahead. %define NEXT jmp next ; sectorforth has only eight primitive words, with which ; everything else can be built in Forth: ; ; @ ( addr -- x ) Fetch memory at addr ; ! ( x addr -- ) Store x at addr ; sp@ ( -- addr ) Get current data stack pointer ; rp@ ( -- addr ) Get current return stack pointer ; 0= ( x -- f ) -1 if top of stack is 0, 0 otherwise ; + ( x1 x2 -- n ) Add the two values at the top of the stack ; nand ( x1 x2 -- n ) NAND the two values at the top of the stack ; exit ( r:addr -- ) Resume execution at address at the top of ; the return stack defword "@",1,FETCH pop bx push word [bx] NEXT defword "!",1,STORE pop bx pop word [bx] NEXT defword "sp@",3,SPFETCH push sp NEXT defword "rp@",3,RPFETCH push bp NEXT defword "0=",2,ZEROEQUALS pop ax test ax,ax setnz al ; AL=0 if ZF=1, else AL=1 dec ax ; AL=ff if AL=0, else AL=0 cbw ; AH=AL push ax NEXT defword "+",1,PLUS pop bx pop ax add ax,bx push ax NEXT defword "nand",4,NAND pop bx pop ax and ax,bx not ax push ax NEXT defword "exit",4,EXIT xchg sp,bp ; swap SP and BP, SP controls return stack pop si ; pop address to next word xchg sp,bp ; restore SP and BP NEXT ; Besides primitives, a few variables are exposed to Forth code: ; TIB, STATE, >IN, HERE, and LATEST. With sectorforth's >IN being ; both an offset and a pointer into TIB (as TIB starts at 0x0000), ; TIB could be left out. But it is exposed so that sectorforth ; code that accesses the parse area can be written in an idiomatic ; fashion (e.g. TIB >IN @ +). defword "tib",3,TIBVAR push word TIB NEXT defword "state",5,STATEVAR push word STATE NEXT defword ">in",3,TOINVAR push word TOIN NEXT ; Strategically define next here so most jumps to it are short, ; saving extra bytes that would be taken by near jumps. next: lodsw ; load next word's address into AX jmp ax ; jump directly to it ; Words and data space for the HERE and LATEST variables. defword "here",4,HEREVAR push word HERE NEXT HERE: dw start_HERE defword "latest",6,LATESTVAR push word LATEST NEXT LATEST: dw word_SEMICOLON ; initialized to last word in dictionary ; Define a couple of I/O primitives to make things interactive. ; They can also be used to build a richer interpreter loop. ; ; KEY waits for a key press and pushes its scan code (AH) and ; ASCII character (AL) to the stack, both in a single cell. defword "key",3,KEY mov ah,0 int 0x16 push ax NEXT ; EMIT writes to the screen the ASCII character corresponding to ; the lowest 8 bits of the value at the top of the stack. defword "emit",4,EMIT pop ax call writechar NEXT ; The colon compiler reads a name from the terminal input buffer, ; creates a dictionary entry for it, writes machine code to jump ; to DOCOL, updates LATEST and HERE, and switches to compilation ; state. defword ":",1,COLON call token ; parse word from input push si mov si,di ; set parsed word as string copy source mov di,[HERE] ; set current value of HERE as destination mov ax,[LATEST] ; get pointer to latest defined word mov [LATEST],di ; update LATEST to new word being defined stosw ; link pointer mov al,cl or al,F_HIDDEN ; hide new word while it's being defined stosb ; word length rep movsb ; word name mov ax,0x26ff stosw ; compile near jump, absolute indirect... mov ax,DOCOL.addr stosw ; ...to DOCOL mov [HERE],di ; update HERE to next free position mov byte [STATE],1 ; switch to compilation state pop si NEXT ; DOCOL sets up and starts execution of a user-defined words. ; Those differ from words defined in machine code by being ; sequences of addresses to other words, so a bit of code is ; needed to save the current value of SI (this Forth's instruction ; pointer), and point it to the sequence of addresses that makes ; up a word's body. ; ; DOCOL advances AX 4 bytes, and then moves that value to SI. When ; DOCOL is jumped to, AX points to the code field of the word ; about to be executed. The 4 bytes being skipped are the actual ; jump instruction to DOCOL itself, inserted by the colon compiler ; when it creates a new entry in the dictionary. DOCOL: xchg sp,bp ; swap SP and BP, SP controls return stack push si ; push current "instruction pointer" xchg sp,bp ; restore SP and BP add ax,4 ; skip word's code field mov si,ax ; point "instruction pointer" to word body NEXT ; start executing the word ; The jump instruction inserted by the compiler is an indirect ; jump, so it needs to read the location to jump to from another ; memory location. .addr: dw DOCOL ; Semicolon is the only immediate primitive. It writes the address ; of EXIT to the end of a new word definition, makes the word ; visible in the dictionary, and switches back to interpretation ; state. defword ";",1,SEMICOLON,F_IMMEDIATE mov bx,[LATEST] and byte [bx+2],~F_HIDDEN ; reveal new word mov byte [STATE],0 ; switch to interpretation state mov ax,EXIT ; prepare to compile EXIT compile: mov di,[HERE] stosw ; compile contents of AX to HERE mov [HERE],di ; advance HERE to next cell NEXT ; Execution starts here. start: cld ; clear direction flag ; Set up segment registers to point to the same segment as CS. push cs push cs push cs pop ds pop es pop ss ; Skip error signaling on initialization jmp init ; Display a red '!!' to let the user know an error happened and the ; interpreter is being reset error: mov ax,0x0921 ; write '!' mov bx,0x0004 ; black background, red text mov cx,2 ; twice int 0x10 ; Initialize stack pointers, state, and terminal input buffer. init: mov bp,RP0 ; BP is the return stack pointer mov sp,SP0 ; SP is the data stack pointer ; Fill TIB with zeros, and set STATE and >IN to 0 mov al,0 mov cx,STATE+4 mov di,TIB rep stosb ; Enter the interpreter loop. ; ; Words are read one at time and searched for in the dictionary. ; If a word is found in the dictionary, it is either interpreted ; (i.e. executed) or compiled, depending on the current state and ; the word's IMMEDIATE flag. ; ; When a word is not found, the state of the interpreter is reset: ; the data and return stacks are cleared as well as the terminal ; input buffer, and the interpreter goes into interpretation mode. interpreter: call token ; parse word from input mov bx,[LATEST] ; start searching for it in the dictionary .1: test bx,bx ; zero? jz error ; not found, reset interpreter state mov si,bx lodsw ; skip link lodsb ; read flags+length mov dl,al ; save those for later use test al,F_HIDDEN ; entry hidden? jnz .2 ; if so, skip it and al,LENMASK ; mask out flags cmp al,cl ; same length? jne .2 ; if not, skip entry push cx push di repe cmpsb ; compare strings pop di pop cx je .3 ; if equal, search is over .2: mov bx,[bx] ; skip to next entry jmp .1 ; try again .3: mov ax,si ; after comparison, SI points to code field mov si,.loop ; set SI so NEXT loops back to interpreter ; Decide whether to interpret or compile the word. The IMMEDIATE ; flag is located in the most significant bit of the flags+length ; byte. STATE can only be 0 or 1. When ORing those two, these are ; the possibilities: ; ; IMMEDIATE STATE OR ACTION ; 0000000 0000000 00000000 Interpret ; 0000000 0000001 00000001 Compile ; 1000000 0000000 10000000 Interpret ; 1000000 0000001 10000001 Interpret ; ; A word is only compiled when the result of that OR is 1. ; Decrementing that result sets the zero flag for a conditional ; jump. and dl,F_IMMEDIATE ; isolate IMMEDIATE flag or dl,[STATE] ; OR with state dec dl ; decrement jz compile ; if result is zero, compile jmp ax ; otherwise, interpret (execute) the word .loop: dw interpreter ; Parse a word from the terminal input buffer and return its ; address and length in DI and CX, respectively. ; ; If after skipping spaces a 0 is found, more input is read from ; the keyboard into the terminal input buffer until return is ; pressed, at which point execution jumps back to the beginning of ; token so it can attempt to parse a word again. ; ; Before reading input from the keyboard, a CRLF is emitted so ; the user can enter input on a fresh, blank line on the screen. token: mov di,[TOIN] ; starting at the current position in TIB mov cx,-1 ; search "indefinitely" mov al,32 ; for a character that's not a space repe scasb dec di ; result is one byte past found character cmp byte [di],0 ; found a 0? je .readline ; if so, read more input mov cx,-1 ; search "indefinitely" again repne scasb ; this time, for a space dec di ; adjust DI again mov [TOIN],di ; update current position in TIB not cx ; after ones' complement, CX=length+1 dec cx ; adjust CX to correct length sub di,cx ; point to start of parsed word ret .readline: mov al,13 call writechar ; CR mov al,10 call writechar ; LF mov di,TIB ; read into TIB .1: mov ah,0 ; wait until a key is pressed int 0x16 cmp al,13 ; return pressed? je .3 ; if so, finish reading cmp al,8 ; backspace pressed? je .2 ; if so, erase character call writechar ; otherwise, write character to screen stosb ; store character in TIB jmp .1 ; keep reading .2: cmp di,TIB ; start of TIB? je .1 ; if so, there's nothing to erase dec di ; erase character in TIB call writechar ; move cursor back one character mov ax,0x0a20 ; erase without moving cursor mov cx,1 int 0x10 ; (BH already set to 0 by writechar) jmp .1 ; keep reading .3: mov ax,0x0020 stosw ; put final delimiter and 0 in TIB call writechar ; write a space between user input and ; execution output mov word [TOIN],0 ; point >IN to start of TIB jmp token ; try parsing a word again ; writechar writes a character to the screen. It uses INT 10/AH=0e ; to perform teletype output, writing the character, updating the ; cursor, and scrolling the screen, all in one go. Writing ; backspace using the BIOS only moves the cursor backwards within ; a line, but does not move it back to the previous line. ; writechar addresses that. writechar: mov bh,0 ; video page 0 for all BIOS calls mov ah,3 ; get cursor position (DH=row, DL=column) int 0x10 mov ah,0x0e ; teletype output mov bl,0x7 ; black background, light grey text int 0x10 cmp al,8 ; backspace? jne .1 ; if not, nothing else to do test dl,dl ; was cursor in first column? jnz .1 ; if not, nothing else to do mov ah,2 ; move cursor mov dl,79 ; to last column dec dh ; of previous row int 0x10 .1: ret times 510-($-$$) db 0 db 0x55, 0xaa ; New dictionary entries will be written starting here. start_HERE: