1// ------------------------------------------------------------------------- 2// Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK. 3// All rights reserved. 4// 5// LICENSE TERMS 6// 7// The free distribution and use of this software in both source and binary 8// form is allowed (with or without changes) provided that: 9// 10// 1. distributions of this source code include the above copyright 11// notice, this list of conditions and the following disclaimer// 12// 13// 2. distributions in binary form include the above copyright 14// notice, this list of conditions and the following disclaimer 15// in the documentation and/or other associated materials// 16// 17// 3. the copyright holder's name is not used to endorse products 18// built using this software without specific written permission. 19// 20// 21// ALTERNATIVELY, provided that this notice is retained in full, this product 22// may be distributed under the terms of the GNU General Public License (GPL), 23// in which case the provisions of the GPL apply INSTEAD OF those given above. 24// 25// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org> 26// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> 27 28// DISCLAIMER 29// 30// This software is provided 'as is' with no explicit or implied warranties 31// in respect of its properties including, but not limited to, correctness 32// and fitness for purpose. 33// ------------------------------------------------------------------------- 34// Issue Date: 29/07/2002 35 36.file "aes-i586-asm.S" 37.text 38 39#include <linux/linkage.h> 40#include <asm/asm-offsets.h> 41 42#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) 43 44/* offsets to parameters with one register pushed onto stack */ 45#define ctx 8 46#define out_blk 12 47#define in_blk 16 48 49/* offsets in crypto_aes_ctx structure */ 50#define klen (480) 51#define ekey (0) 52#define dkey (240) 53 54// register mapping for encrypt and decrypt subroutines 55 56#define r0 eax 57#define r1 ebx 58#define r2 ecx 59#define r3 edx 60#define r4 esi 61#define r5 edi 62 63#define eaxl al 64#define eaxh ah 65#define ebxl bl 66#define ebxh bh 67#define ecxl cl 68#define ecxh ch 69#define edxl dl 70#define edxh dh 71 72#define _h(reg) reg##h 73#define h(reg) _h(reg) 74 75#define _l(reg) reg##l 76#define l(reg) _l(reg) 77 78// This macro takes a 32-bit word representing a column and uses 79// each of its four bytes to index into four tables of 256 32-bit 80// words to obtain values that are then xored into the appropriate 81// output registers r0, r1, r4 or r5. 82 83// Parameters: 84// table table base address 85// %1 out_state[0] 86// %2 out_state[1] 87// %3 out_state[2] 88// %4 out_state[3] 89// idx input register for the round (destroyed) 90// tmp scratch register for the round 91// sched key schedule 92 93#define do_col(table, a1,a2,a3,a4, idx, tmp) \ 94 movzx %l(idx),%tmp; \ 95 xor table(,%tmp,4),%a1; \ 96 movzx %h(idx),%tmp; \ 97 shr $16,%idx; \ 98 xor table+tlen(,%tmp,4),%a2; \ 99 movzx %l(idx),%tmp; \ 100 movzx %h(idx),%idx; \ 101 xor table+2*tlen(,%tmp,4),%a3; \ 102 xor table+3*tlen(,%idx,4),%a4; 103 104// initialise output registers from the key schedule 105// NB1: original value of a3 is in idx on exit 106// NB2: original values of a1,a2,a4 aren't used 107#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \ 108 mov 0 sched,%a1; \ 109 movzx %l(idx),%tmp; \ 110 mov 12 sched,%a2; \ 111 xor table(,%tmp,4),%a1; \ 112 mov 4 sched,%a4; \ 113 movzx %h(idx),%tmp; \ 114 shr $16,%idx; \ 115 xor table+tlen(,%tmp,4),%a2; \ 116 movzx %l(idx),%tmp; \ 117 movzx %h(idx),%idx; \ 118 xor table+3*tlen(,%idx,4),%a4; \ 119 mov %a3,%idx; \ 120 mov 8 sched,%a3; \ 121 xor table+2*tlen(,%tmp,4),%a3; 122 123// initialise output registers from the key schedule 124// NB1: original value of a3 is in idx on exit 125// NB2: original values of a1,a2,a4 aren't used 126#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \ 127 mov 0 sched,%a1; \ 128 movzx %l(idx),%tmp; \ 129 mov 4 sched,%a2; \ 130 xor table(,%tmp,4),%a1; \ 131 mov 12 sched,%a4; \ 132 movzx %h(idx),%tmp; \ 133 shr $16,%idx; \ 134 xor table+tlen(,%tmp,4),%a2; \ 135 movzx %l(idx),%tmp; \ 136 movzx %h(idx),%idx; \ 137 xor table+3*tlen(,%idx,4),%a4; \ 138 mov %a3,%idx; \ 139 mov 8 sched,%a3; \ 140 xor table+2*tlen(,%tmp,4),%a3; 141 142 143// original Gladman had conditional saves to MMX regs. 144#define save(a1, a2) \ 145 mov %a2,4*a1(%esp) 146 147#define restore(a1, a2) \ 148 mov 4*a2(%esp),%a1 149 150// These macros perform a forward encryption cycle. They are entered with 151// the first previous round column values in r0,r1,r4,r5 and 152// exit with the final values in the same registers, using stack 153// for temporary storage. 154 155// round column values 156// on entry: r0,r1,r4,r5 157// on exit: r2,r1,r4,r5 158#define fwd_rnd1(arg, table) \ 159 save (0,r1); \ 160 save (1,r5); \ 161 \ 162 /* compute new column values */ \ 163 do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \ 164 do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \ 165 restore(r0,0); \ 166 do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \ 167 restore(r0,1); \ 168 do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */ 169 170// round column values 171// on entry: r2,r1,r4,r5 172// on exit: r0,r1,r4,r5 173#define fwd_rnd2(arg, table) \ 174 save (0,r1); \ 175 save (1,r5); \ 176 \ 177 /* compute new column values */ \ 178 do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \ 179 do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \ 180 restore(r2,0); \ 181 do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \ 182 restore(r2,1); \ 183 do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */ 184 185// These macros performs an inverse encryption cycle. They are entered with 186// the first previous round column values in r0,r1,r4,r5 and 187// exit with the final values in the same registers, using stack 188// for temporary storage 189 190// round column values 191// on entry: r0,r1,r4,r5 192// on exit: r2,r1,r4,r5 193#define inv_rnd1(arg, table) \ 194 save (0,r1); \ 195 save (1,r5); \ 196 \ 197 /* compute new column values */ \ 198 do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \ 199 do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \ 200 restore(r0,0); \ 201 do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \ 202 restore(r0,1); \ 203 do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */ 204 205// round column values 206// on entry: r2,r1,r4,r5 207// on exit: r0,r1,r4,r5 208#define inv_rnd2(arg, table) \ 209 save (0,r1); \ 210 save (1,r5); \ 211 \ 212 /* compute new column values */ \ 213 do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \ 214 do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \ 215 restore(r2,0); \ 216 do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \ 217 restore(r2,1); \ 218 do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */ 219 220// AES (Rijndael) Encryption Subroutine 221/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ 222 223.extern crypto_ft_tab 224.extern crypto_fl_tab 225 226ENTRY(aes_enc_blk) 227 push %ebp 228 mov ctx(%esp),%ebp 229 230// CAUTION: the order and the values used in these assigns 231// rely on the register mappings 232 2331: push %ebx 234 mov in_blk+4(%esp),%r2 235 push %esi 236 mov klen(%ebp),%r3 // key size 237 push %edi 238#if ekey != 0 239 lea ekey(%ebp),%ebp // key pointer 240#endif 241 242// input four columns and xor in first round key 243 244 mov (%r2),%r0 245 mov 4(%r2),%r1 246 mov 8(%r2),%r4 247 mov 12(%r2),%r5 248 xor (%ebp),%r0 249 xor 4(%ebp),%r1 250 xor 8(%ebp),%r4 251 xor 12(%ebp),%r5 252 253 sub $8,%esp // space for register saves on stack 254 add $16,%ebp // increment to next round key 255 cmp $24,%r3 256 jb 4f // 10 rounds for 128-bit key 257 lea 32(%ebp),%ebp 258 je 3f // 12 rounds for 192-bit key 259 lea 32(%ebp),%ebp 260 2612: fwd_rnd1( -64(%ebp), crypto_ft_tab) // 14 rounds for 256-bit key 262 fwd_rnd2( -48(%ebp), crypto_ft_tab) 2633: fwd_rnd1( -32(%ebp), crypto_ft_tab) // 12 rounds for 192-bit key 264 fwd_rnd2( -16(%ebp), crypto_ft_tab) 2654: fwd_rnd1( (%ebp), crypto_ft_tab) // 10 rounds for 128-bit key 266 fwd_rnd2( +16(%ebp), crypto_ft_tab) 267 fwd_rnd1( +32(%ebp), crypto_ft_tab) 268 fwd_rnd2( +48(%ebp), crypto_ft_tab) 269 fwd_rnd1( +64(%ebp), crypto_ft_tab) 270 fwd_rnd2( +80(%ebp), crypto_ft_tab) 271 fwd_rnd1( +96(%ebp), crypto_ft_tab) 272 fwd_rnd2(+112(%ebp), crypto_ft_tab) 273 fwd_rnd1(+128(%ebp), crypto_ft_tab) 274 fwd_rnd2(+144(%ebp), crypto_fl_tab) // last round uses a different table 275 276// move final values to the output array. CAUTION: the 277// order of these assigns rely on the register mappings 278 279 add $8,%esp 280 mov out_blk+12(%esp),%ebp 281 mov %r5,12(%ebp) 282 pop %edi 283 mov %r4,8(%ebp) 284 pop %esi 285 mov %r1,4(%ebp) 286 pop %ebx 287 mov %r0,(%ebp) 288 pop %ebp 289 ret 290ENDPROC(aes_enc_blk) 291 292// AES (Rijndael) Decryption Subroutine 293/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ 294 295.extern crypto_it_tab 296.extern crypto_il_tab 297 298ENTRY(aes_dec_blk) 299 push %ebp 300 mov ctx(%esp),%ebp 301 302// CAUTION: the order and the values used in these assigns 303// rely on the register mappings 304 3051: push %ebx 306 mov in_blk+4(%esp),%r2 307 push %esi 308 mov klen(%ebp),%r3 // key size 309 push %edi 310#if dkey != 0 311 lea dkey(%ebp),%ebp // key pointer 312#endif 313 314// input four columns and xor in first round key 315 316 mov (%r2),%r0 317 mov 4(%r2),%r1 318 mov 8(%r2),%r4 319 mov 12(%r2),%r5 320 xor (%ebp),%r0 321 xor 4(%ebp),%r1 322 xor 8(%ebp),%r4 323 xor 12(%ebp),%r5 324 325 sub $8,%esp // space for register saves on stack 326 add $16,%ebp // increment to next round key 327 cmp $24,%r3 328 jb 4f // 10 rounds for 128-bit key 329 lea 32(%ebp),%ebp 330 je 3f // 12 rounds for 192-bit key 331 lea 32(%ebp),%ebp 332 3332: inv_rnd1( -64(%ebp), crypto_it_tab) // 14 rounds for 256-bit key 334 inv_rnd2( -48(%ebp), crypto_it_tab) 3353: inv_rnd1( -32(%ebp), crypto_it_tab) // 12 rounds for 192-bit key 336 inv_rnd2( -16(%ebp), crypto_it_tab) 3374: inv_rnd1( (%ebp), crypto_it_tab) // 10 rounds for 128-bit key 338 inv_rnd2( +16(%ebp), crypto_it_tab) 339 inv_rnd1( +32(%ebp), crypto_it_tab) 340 inv_rnd2( +48(%ebp), crypto_it_tab) 341 inv_rnd1( +64(%ebp), crypto_it_tab) 342 inv_rnd2( +80(%ebp), crypto_it_tab) 343 inv_rnd1( +96(%ebp), crypto_it_tab) 344 inv_rnd2(+112(%ebp), crypto_it_tab) 345 inv_rnd1(+128(%ebp), crypto_it_tab) 346 inv_rnd2(+144(%ebp), crypto_il_tab) // last round uses a different table 347 348// move final values to the output array. CAUTION: the 349// order of these assigns rely on the register mappings 350 351 add $8,%esp 352 mov out_blk+12(%esp),%ebp 353 mov %r5,12(%ebp) 354 pop %edi 355 mov %r4,8(%ebp) 356 pop %esi 357 mov %r1,4(%ebp) 358 pop %ebx 359 mov %r0,(%ebp) 360 pop %ebp 361 ret 362ENDPROC(aes_dec_blk) 363