; ; Rijndael (AES-128) block cipher ; this code assumes 128b data and a 128b key so: Nk = Nn = Nc = 4 ; ; (c) 2003 Henk van Kampen, www.mediatronix.com ; ; based on documents by Dr. Brian Gladman, ; . http://fp.gladman.plus.com/cryptography_technology/rijndael/spec.v311.pdf ; ; ; generate a VHDL initialization file based on a template and with entity called 'mpu_rom' VHDL "ROM_blank_JTAG.vhd", "MPU_ROM.vhd", "mpu_rom" ; key value inkey DSROM $00, $2B, $7E, $15, $16, $28, $AE, $D2, $A6, $AB, $F7, $15, $88, $09, $CF, $4F, $3C key DSRAM $10 ; ; input and state value input DSROM $30, $32, $43, $F6, $A8, $88, $5A, $30, $8D, $31, $31, $98, $A2, $E0, $37, $07, $34 state DSRAM $40 ; ; a block RAM used as ROM with a byte wide interface at $50 of size 256 and inited by file: 'sbox.mem' s_box BROM $50, 256, "sbox.mem" ; ; sbox.mem should contain: (without the semicolons) ; @00 63 7c 77 7b f2 6b 6f c5 30 01 67 2b fe d7 ab 76 ; @10 ca 82 c9 7d fa 59 47 f0 ad d4 a2 af 9c a4 72 c0 ; @20 b7 fd 93 26 36 3f f7 cc 34 a5 e5 f1 71 d8 31 15 ; @30 04 c7 23 c3 18 96 05 9a 07 12 80 e2 eb 27 b2 75 ; @40 09 83 2c 1a 1b 6e 5a a0 52 3b d6 b3 29 e3 2f 84 ; @50 53 d1 00 ed 20 fc b1 5b 6a cb be 39 4a 4c 58 cf ; @60 d0 ef aa fb 43 4d 33 85 45 f9 02 7f 50 3c 9f a8 ; @70 51 a3 40 8f 92 9d 38 f5 bc b6 da 21 10 ff f3 d2 ; @80 cd 0c 13 ec 5f 97 44 17 c4 a7 7e 3d 64 5d 19 73 ; @90 60 81 4f dc 22 2a 90 88 46 ee b8 14 de 5e 0b db ; @a0 e0 32 3a 0a 49 06 24 5c c2 d3 ac 62 91 95 e4 79 ; @b0 e7 c8 37 6d 8d d5 4e a9 6c 56 f4 ea 65 7a ae 08 ; @c0 ba 78 25 2e 1c a6 b4 c6 e8 dd 74 1f 4b bd 8b 8a ; @d0 70 3e b5 66 48 03 f6 0e 61 35 57 b9 86 c1 1d 9e ; @e0 e1 f8 98 11 69 d9 8e 94 9b 1e 87 e9 ce 55 28 df ; @f0 8c a1 89 0d bf e6 42 68 41 99 2d 0f b0 54 bb 16 ; ; pKey EQU s1 ; key pointer pState EQU s2 ; state pointer X EQU $01 G EQU $1B ; 0x11B b128 EQU 128 / 8 ; 128 bytes of 8 bits ; ; Rijndael encrypt entry ; input is assumed to be in {input}, the key in {inkey} ; both will be copied, final state will be the output Encrypt: CALL InkeyToKey ; CALL InToState ; state = in CALL XorRoundKey ; XorRoundKey( state, k[0], Nc ) LOAD sF, X ; x^(i-1) (i=1) LOAD s3, 9 ; for round = 1 step 1 to Nn - 1 Round: ; CALL SubBytes ; ..SubBytes( state, Nc ) CALL ShiftRows ; ..ShiftRows( state, Nc ) CALL MixColumns ; ..MixColumns( state, Nc ) CALL NextRoundKey ; ..XorRoundKey( state, k[ round ], Nc ) CALL XorRoundKey SUB s3, 1 ; ..step 1 JUMP NZ, Round ; end for CALL SubBytes ; SubBytes( state, Nc ) CALL ShiftRows ; ShiftRows( state, Nc ) CALL NextRoundKey ; XorRoundKey( state, k[ round ], Nc ) CALL XorRoundKey RET ; output is last {state} ; ; result should be: (Gladman) ; R[10].k_sch d014f9a8c9ee2589e13f0cc8b6630ca6 ; R[10].output 3925841d02dc09fbdc118597196a0b32 ; ; ; XorRoundKey( state, k, Nc ) ; XorRoundKey: LOAD pKey, key ; get pointer to key LOAD pState, state ; get pointer to state xor128: LOAD s0, b128 ; set up loop count xornext: IN s4, pKey ; get key byte IN s5, pState ; get state byte XOR s4, s5 ; do the xor OUT s4, pState ; save new state byte ADD pKey, 1 ; increment key pointer ADD pState, 1 ; increment state pointer SUB s0, 1 ; decrement loop counter JUMP NZ, xornext ; loop back if not done 16 times (128/8) RET ; InToState: LOAD pKey, input ; get pointer to input LOAD pState, state ; get pointer to state JUMP move128 ; InkeyToKey: LOAD pKey, inkey ; get pointer to input LOAD pState, key ; get pointer to state ; move128: LOAD s0, b128 ; set up loop count movenext: IN s4, pKey ; get input byte OUT s4, pState ; save new state byte ADD pKey, 1 ; increment key pointer ADD pState, 1 ; increment state pointer SUB s0, 1 ; decrement loop counter JUMP NZ, movenext ; loop back if not done 16 times (128/8) RET ; NextRoundKey: ; temp = k[i - 1] IN s4, key + 12 ; get last word of previous key IN s5, key + 13 IN s6, key + 14 IN s7, key + 15 LOAD s8, s4 ; RotWord LOAD s4, s5 LOAD s5, s6 LOAD s6, s7 LOAD s7, s8 ; OUT s4, s_box IN s4, s_box XOR s4, sF ; xor Rcon( i / Nk ) SL0 sF ; x^(i-1) (i+=1) JUMP NC, nowrap XOR sF, G nowrap: OUT s5, s_box IN s5, s_box OUT s6, s_box IN s6, s_box OUT s7, s_box IN s7, s_box ; LOAD pKey, key ; LOAD s0, b128 key96: IN s8, pKey ; k[i]=k[i - Nk] ^ temp XOR s4, s8 OUT s4, pKey ADD pKey, 1 ; IN s8, pKey ; k[i]=k[i - Nk] ^ temp XOR s5, s8 OUT s5, pKey ADD pKey, 1 ; IN s8, pKey ; k[i]=k[i - Nk] ^ temp XOR s6, s8 OUT s6, pKey ADD pKey, 1 ; IN s8, pKey ; k[i]=k[i - Nk] ^ temp XOR s7, s8 OUT s7, pKey ADD pKey, 1 ; SUB s0, 4 JUMP NZ, key96 RET ; ; Sub bytes of one 32b word pointed at by pKey SubWord: LOAD s0, 4 SubWord1: IN s8, pKey ; CALL SBox OUT s8, s_box IN s8, s_box OUT s8, pKey ADD pKey, 1 SUB s0, 1 JUMP NZ, SubWord1 RET ; ; SubBytes( state, Nc ) SubBytes: LOAD pState, state ; get pointer to state ; LOAD s0, b128 ; set up loop count sub128: IN s8, pState ; get state byte ; CALL SBox OUT s8, s_box IN s8, s_box OUT s8, pState ; save new state byte ADD pState, 1 ; increment state pointer SUB s0, 1 ; decrement loop counter JUMP NZ, sub128 ; loop back if not done 16 times (128/8) RET ; ; SBox( s ) SBoxFast: OUT s8, s_box IN s8, s_box RET ; SBoxSlow: CALL MulInverse ; . x = sbox_affine(mul_inverse(in)); SBoxAffine: ; for(counter = 1; counter <= 4; counter++) { LOAD s8, s9 ; s = in; RL s9 ; s = (s >> (DEGREE - 1)) | (s << 1); s &= MASK; XOR s8, s9 ; in ^= s; RL s9 XOR s8, s9 RL s9 XOR s8, s9 RL s9 XOR s8, s9 XOR s8, $63 ; in ^= 0x63; RET ; return in; ; } ; ; MulInverse by trial and error MulInverse: LOAD s9, 0 ; int result = 0; OR s8, s8 ; if (in == 0) RET Z ; return 0; MulInverse1: ADD s9, 1 ; result = 1; result++ RET Z ; result < MOD LOAD sC, s8 ; in LOAD sD, s9 ; result CALL GMul ; gmul( in, result, ...) SUB sE, 1 ; == 1 JUMP NZ, MulInverse1 ; == 1? RET ; return result ; GMul: LOAD sE, 0 GMul1: SR0 sD JUMP C, GMul2 ; last bit was 1 RET Z ; i2 was 0 already ? JUMP GMul3 GMul2: XOR sE, sC GMul3: SL0 sC JUMP NC, GMul1 XOR sC, G ; i1 ^= field; JUMP GMul1 ; ; ShiftRows( state, Nc ) ShiftRows: IN s7, state + 1 IN s4, state + 1 + 4 IN s5, state + 1 + 4 + 4 IN s6, state + 1 + 4 + 4 + 4 OUT s4, state + 1 OUT s5, state + 1 + 4 OUT s6, state + 1 + 4 + 4 OUT s7, state + 1 + 4 + 4 + 4 ; IN s6, state + 2 IN s7, state + 2 + 4 IN s4, state + 2 + 4 + 4 IN s5, state + 2 + 4 + 4 + 4 OUT s4, state + 2 OUT s5, state + 2 + 4 OUT s6, state + 2 + 4 + 4 OUT s7, state + 2 + 4 + 4 + 4 ; IN s5, state + 3 IN s6, state + 3 + 4 IN s7, state + 3 + 4 + 4 IN s4, state + 3 + 4 + 4 + 4 OUT s4, state + 3 OUT s5, state + 3 + 4 OUT s6, state + 3 + 4 + 4 OUT s7, state + 3 + 4 + 4 + 4 ; RET ; ; MixColumns( state, Nc ) MixColumns: ; IN s4, state + 0 IN s5, state + 1 IN s6, state + 2 IN s7, state + 3 CALL MixColumn OUT s4, state + 0 OUT s5, state + 1 OUT s6, state + 2 OUT s7, state + 3 ; IN s4, state + 0 + 4 IN s5, state + 1 + 4 IN s6, state + 2 + 4 IN s7, state + 3 + 4 CALL MixColumn OUT s4, state + 0 + 4 OUT s5, state + 1 + 4 OUT s6, state + 2 + 4 OUT s7, state + 3 + 4 ; IN s4, state + 0 + 4 + 4 IN s5, state + 1 + 4 + 4 IN s6, state + 2 + 4 + 4 IN s7, state + 3 + 4 + 4 CALL MixColumn OUT s4, state + 0 + 4 + 4 OUT s5, state + 1 + 4 + 4 OUT s6, state + 2 + 4 + 4 OUT s7, state + 3 + 4 + 4 ; IN s4, state + 0 + 4 + 4 + 4 IN s5, state + 1 + 4 + 4 + 4 IN s6, state + 2 + 4 + 4 + 4 IN s7, state + 3 + 4 + 4 + 4 CALL MixColumn OUT s4, state + 0 + 4 + 4 + 4 OUT s5, state + 1 + 4 + 4 + 4 OUT s6, state + 2 + 4 + 4 + 4 OUT s7, state + 3 + 4 + 4 + 4 ; RET ; MixColumn: LOAD s9, s4 ; t = c[0] ^ c[3] XOR s9, s7 LOAD sA, s5 ; u = c[1] ^ c[2] XOR sA, s6 LOAD sB, s9 ; v = t ^ u XOR sB, sA ; LOAD s8, s4 ; c[0] = c[0] ^ v ^ FFmul(0x02, c[0] ^ c[1]) XOR s8, s5 SL0 s8 JUMP NC, mcf1 XOR s8, G mcf1: XOR s8, sB XOR s4, s8 ; LOAD s8, sA ; c[1] = c[1] ^ v ^ FFmul(0x02, u) SL0 s8 JUMP NC, mcf2 XOR s8, G mcf2: XOR s8, sB XOR s5, s8 ; LOAD s8, s6 ; c[2] = c[2] ^ v ^ FFmul(0x02, c[2] ^ c[3]) XOR s8, s7 SL0 s8 JUMP NC, mcf3 XOR s8, G mcf3: XOR s8, sB XOR s6, s8 ; LOAD s8, s9 ; c[3] = c[3] ^ v ^ FFmul(0x02, t) SL0 s8 JUMP NC, mcf4 XOR s8, G mcf4: XOR s8, sB XOR s7, s8 ; RET ;