; AES-256 implementation for 6502 (ACME assembler syntax) ; Robert Oestling ; http://www.robos.org/ !to "aes.prg",cbm !convtab scr * = $0801 !byte $0c,$08,$0a,$00,$9e,$32,$30,$36,$31,$00,$00,$00 data_area = $2000 expkey = data_area + $0000 ; f0 bytes aesblock = data_area + $00f0 ; 10 bytes exptab = data_area + $0100 logtab = data_area + $0200 invtab = data_area + $0300 sbox = data_area + $0100 ssm0 = data_area + $0200 ssm1 = sbox ssm2 = sbox ssm3 = data_area + $0300 tmpblock = $e0 tmp = $f0 !zone main main sei jsr gentabs ldx #$1f .setkey txa sta expkey,x dex bpl .setkey ldx #$0f .setdata txa sta aesblock,x dex bpl .setdata jsr expand jsr encrypt jsr encrypt jsr encrypt jsr encrypt jsr encrypt jsr encrypt jsr encrypt jsr encrypt ldy #0 .print tya lsr tax lda aesblock,x jsr printbyte cpy #$20 bne .print jmp * printbyte pha lsr lsr lsr lsr tax lda .hextab,x sta $0400,y iny pla and #$0f tax lda .hextab,x sta $0400,y iny rts .hextab !tx "0123456789abcdef" !zone gentabs gentabs lda #1 sta exptab+0 tay .genexp asl bcc *+4 eor #$1b eor exptab-1,y sta exptab,y iny bne .genexp sty logtab+0 iny .genlog ldx exptab,y tya sta logtab,x iny bne .genlog sty invtab+0 iny .geninv sec lda #$ff sbc logtab,y tax lda exptab,x sta invtab,y iny bne .geninv .gensbox lda invtab,y sta tmp+0 eor #$63 sta tmp+1 ldx #4 .rotfour lda tmp+0 cmp #$80 rol sta tmp+0 eor tmp+1 sta tmp+1 dex bne .rotfour sta sbox,y iny bne .gensbox .genssm lda sbox,y asl bcc *+4 eor #$1b sta ssm0,y eor sbox,y sta ssm3,y iny bne .genssm rts !zone expand expand lda #1 sta tmp+0 ldy #0 .expandword tya and #$1f bne .notzero ldx expkey+$1c+0,y lda sbox,x eor expkey+3,y sta expkey+$20+3,y ldx expkey+$1c+3,y lda sbox,x eor expkey+2,y sta expkey+$20+2,y ldx expkey+$1c+2,y lda sbox,x eor expkey+1,y sta expkey+$20+1,y ldx expkey+$1c+1,y lda sbox,x eor tmp+0 eor expkey+0,y sta expkey+$20+0,y iny iny iny iny asl tmp+0 bne .done .notzero cmp #$10 bne .notfour lda #4 sta tmp+1 .subcopy ldx expkey+$1c+0,y lda sbox,x eor expkey+0,y sta expkey+$20+0,y iny dec tmp+1 bne .subcopy beq .done .notfour ldx #4 .copy lda expkey+0,y eor expkey+$1c+0,y sta expkey+$20+0,y iny dex bne .copy .done cpy #$f0-$20 beq .return jmp .expandword .return rts ; ~10318 cycles without jsr/rts (~645 per byte) !zone encrypt encrypt ldx #$07 .addfirst lda aesblock+0,x ; 4 eor expkey+0,x ; 8 sta tmpblock+0,x ; 13 lda aesblock+8,x ; 17 eor expkey+8,x ; 21 sta tmpblock+8,x ; 26 dex ; 28 bpl .addfirst ; 31 ldy #$10 .round lda expkey+$00,y ; 4 ldx tmpblock+4*0+0 ; 7 eor ssm0,x ; 11 ldx tmpblock+4*1+1 ; 14 eor ssm3,x ; 18 ldx tmpblock+4*2+2 ; 21 eor ssm2,x ; 25 ldx tmpblock+4*3+3 ; 28 eor ssm1,x ; 32 sta aesblock+$00 ; 36 lda expkey+$01,y ldx tmpblock+4*0+0 eor ssm1,x ldx tmpblock+4*1+1 eor ssm0,x ldx tmpblock+4*2+2 eor ssm3,x ldx tmpblock+4*3+3 eor ssm2,x sta aesblock+$01 lda expkey+$02,y ldx tmpblock+4*0+0 eor ssm2,x ldx tmpblock+4*1+1 eor ssm1,x ldx tmpblock+4*2+2 eor ssm0,x ldx tmpblock+4*3+3 eor ssm3,x sta aesblock+$02 lda expkey+$03,y ldx tmpblock+4*0+0 eor ssm3,x ldx tmpblock+4*1+1 eor ssm2,x ldx tmpblock+4*2+2 eor ssm1,x ldx tmpblock+4*3+3 eor ssm0,x sta aesblock+$03 lda expkey+$04,y ldx tmpblock+4*1+0 eor ssm0,x ldx tmpblock+4*2+1 eor ssm3,x ldx tmpblock+4*3+2 eor ssm2,x ldx tmpblock+4*0+3 eor ssm1,x sta aesblock+$04 lda expkey+$05,y ldx tmpblock+4*1+0 eor ssm1,x ldx tmpblock+4*2+1 eor ssm0,x ldx tmpblock+4*3+2 eor ssm3,x ldx tmpblock+4*0+3 eor ssm2,x sta aesblock+$05 lda expkey+$06,y ldx tmpblock+4*1+0 eor ssm2,x ldx tmpblock+4*2+1 eor ssm1,x ldx tmpblock+4*3+2 eor ssm0,x ldx tmpblock+4*0+3 eor ssm3,x sta aesblock+$06 lda expkey+$07,y ldx tmpblock+4*1+0 eor ssm3,x ldx tmpblock+4*2+1 eor ssm2,x ldx tmpblock+4*3+2 eor ssm1,x ldx tmpblock+4*0+3 eor ssm0,x sta aesblock+$07 lda expkey+$08,y ldx tmpblock+4*2+0 eor ssm0,x ldx tmpblock+4*3+1 eor ssm3,x ldx tmpblock+4*0+2 eor ssm2,x ldx tmpblock+4*1+3 eor ssm1,x sta aesblock+$08 lda expkey+$09,y ldx tmpblock+4*2+0 eor ssm1,x ldx tmpblock+4*3+1 eor ssm0,x ldx tmpblock+4*0+2 eor ssm3,x ldx tmpblock+4*1+3 eor ssm2,x sta aesblock+$09 lda expkey+$0a,y ldx tmpblock+4*2+0 eor ssm2,x ldx tmpblock+4*3+1 eor ssm1,x ldx tmpblock+4*0+2 eor ssm0,x ldx tmpblock+4*1+3 eor ssm3,x sta aesblock+$0a lda expkey+$0b,y ldx tmpblock+4*2+0 eor ssm3,x ldx tmpblock+4*3+1 eor ssm2,x ldx tmpblock+4*0+2 eor ssm1,x ldx tmpblock+4*1+3 eor ssm0,x sta aesblock+$0b lda expkey+$0c,y ldx tmpblock+4*3+0 eor ssm0,x ldx tmpblock+4*0+1 eor ssm3,x ldx tmpblock+4*1+2 eor ssm2,x ldx tmpblock+4*2+3 eor ssm1,x sta aesblock+$0c lda expkey+$0d,y ldx tmpblock+4*3+0 eor ssm1,x ldx tmpblock+4*0+1 eor ssm0,x ldx tmpblock+4*1+2 eor ssm3,x ldx tmpblock+4*2+3 eor ssm2,x sta aesblock+$0d lda expkey+$0e,y ldx tmpblock+4*3+0 eor ssm2,x ldx tmpblock+4*0+1 eor ssm1,x ldx tmpblock+4*1+2 eor ssm0,x ldx tmpblock+4*2+3 eor ssm3,x sta aesblock+$0e lda expkey+$0f,y ldx tmpblock+4*3+0 eor ssm3,x ldx tmpblock+4*0+1 eor ssm2,x ldx tmpblock+4*1+2 eor ssm1,x ldx tmpblock+4*2+3 eor ssm0,x sta aesblock+$0f ldx #$03 .copyblock ; Recuding the unrolling by half saves 10 bytes, at the cost of 260 cycles per ; block. lda aesblock+0,x ; 4 sta tmpblock+0,x ; 8 lda aesblock+4,x ; 12 sta tmpblock+4,x ; 16 lda aesblock+8,x ; 20 sta tmpblock+8,x ; 24 lda aesblock+12,x ; 28 sta tmpblock+12,x ; 32 dex ; 34 bpl .copyblock ; 37 clc tya adc #$10 tay cpy #$e0 beq .lastround jmp .round .lastround ldy #$0f .ssa ; Computing x directly as 5*y mod 16 saves 8 bytes, but requires another 224 ; cycles per block ; ; tya ; 2 ; sta tmp+0 ; 5 ; asl ; 7 ; asl ; 9 ; clc ; 11 ; adc tmp+0 ; 14 ; and #$0f ; 16 ; tax ; 18 ldx .subtab,y ; 4 lda tmpblock,x ; 8 tax ; 10 lda sbox,x ; 14 eor expkey+$e0,y ; 18 sta aesblock,y ; 23 dey ; 25 bpl .ssa ; 28 rts .subtab !byte 4*0+0, 4*1+1, 4*2+2, 4*3+3 !byte 4*1+0, 4*2+1, 4*3+2, 4*0+3 !byte 4*2+0, 4*3+1, 4*0+2, 4*1+3 !byte 4*3+0, 4*0+1, 4*1+2, 4*2+3