; bf.asm - Tiny BrainFuck compiler (128 bytes)
;
; Robert Oestling 
; http://www.robos.org/

; nasm -f bin bf.asm -o bf.com
; bf <file.bf >file.com
;

; The length of the output file is fixed, programs larger than 19201 bytes
; will NOT run very well. Programs using more than 510 bytes of memory
; niether. Maximum length of jumps is about 128 bytes. Unlike certain other
; implementations, I do check for EOF. 8086 compatible code (I hope).
;
; Here are some other more or less tiny brainfuck compilers:
;
; Name              Size  Comments
; ==========================================================================
; Jeffry Johnston   123   '@' is EOF. Cheater. 16-bit jumps.
; INT-E             136   16-bit jumps, variable output size. 386+ - Cheater.
; Ben Olmstead      331   16-bit jumps (I think). 286+
; ==========================================================================


[section .text]

        org     0x100

main:
        mov     di,code_buffer+6        ; Where the compiled code goes.
.read_byte:
        xor     bx,bx                   ; stdin.
        mov     ah,0x3f                 ; Read from file.
        mov     cl,1                    ; One byte.
        mov     dx,di                   ; Right after the last compiled data
        int     0x21
        inc     bx                      ; stdout.
        dec     ax                      ; Not EOF? (i.e. one byte read?)
        jz      .not_eof
        mov     al,0xc3                 ; "ret"
        stosb                           ; Compile it.
        mov     ah,0x40                 ; Write to file.
        mov     dl,0x7a                 ; dx = code_buffer..
        ; Note, if anything doesn't work after you modified the file, check
        ; this number first!

        mov     ch,0x4b                 ; This byte means "dec bx". Ugly? :)
        int     0x21                    ; Write and quit.
        ret
.not_eof:
        cmp     byte[di],'['
        jz      .loop_start
        cmp     byte[di],']'
        jz      .loop_end
        mov     si,list                 ; Load instruction table into si.
.next_try:
        lodsw
        xchg    ax,cx                   ; cl = size, ch = instruction.
        lodsb                           ; al = address (low 8 bits).
        mov     ah,1                    ; High 8 bits of address always 1.
        or      ch,ch                   ; Last entry?
        jz      .read_byte              ; Yes, unknown instruction (comment).
        cmp     ch,[di]                 ; Is it the one we're looking for?
        jnz     .next_try               ; Nope, try again.
.found:
        xchg    ax,si                   ; Yes, address of code into si,
        mov     ch,0                    ; cx = cl (length of code).
        rep     movsb                   ; Copy code.
        jmp     short .read_byte

.loop_start:
        push    di                      ; Save current address.
        mov     al,0xeb
        stosw                           ; Compile short jump, address filled
        jmp     short .read_byte        ; in later.
.loop_end:
        pop     ax                      ; Get address to patch+maybe jump to.
        push    ax                      ; Save it again.
        mov     si,code_loop            ; Conditional jump code.
        movsw                           ; Copy first 2 bytes of it.
        sub     ax,di                   ; Calculate relative jump back.
        movsb                           ; Copy last byte of jump code.
        stosb                           ; Copy relative jump address.
        neg     al
        sub     al,4                    ; Relative jump _forward_ (for patch)
        pop     si                      ; si = address of jump to patch.
        mov     byte[si+bx],al          ; bx is always 1 here.
        jmp     short .read_byte

code_inc:
        inc     byte[bx]
code_dec:
        dec     byte[bx]
code_dot:
        mov     al,[bx]
        int     0x29

list:
        db      1,'>'
        db      0x0d
        db      1,'<'
        db      0x19
        db      2,'+'
        db      0x57
        db      2,'-'
        db      0x59
        db      4,'.'
        db      0x5b
        db      6,','
        db      0x71
code_comma:
        mov     ah,0x00                 ; 00 shows that the list is over.
        int     0x16
        mov     [bx],al
code_loop:
        cmp     [bx],ch                 ; ch is always 0 run-time.
        db      0x75                    ; jnz ...

code_buffer:                            ; Runtime initialization code:
        mov     bh,0x7f                 ; bx = 0x7f00 (middle of segment)
        mov     di,bx                   ; di = 0x7f00
        rep     stosw                   ; cx = 0x00ff, only 1/2kB is zeroed

