1// -------------------------------------------------------------------------
2// Copyright (c) 2001, Dr Brian Gladman <                 >, Worcester, UK.
3// All rights reserved.
4//
5// LICENSE TERMS
6//
7// The free distribution and use of this software in both source and binary
8// form is allowed (with or without changes) provided that:
9//
10//   1. distributions of this source code include the above copyright
11//      notice, this list of conditions and the following disclaimer//
12//
13//   2. distributions in binary form include the above copyright
14//      notice, this list of conditions and the following disclaimer
15//      in the documentation and/or other associated materials//
16//
17//   3. the copyright holder's name is not used to endorse products
18//      built using this software without specific written permission.
19//
20//
21// ALTERNATIVELY, provided that this notice is retained in full, this product
22// may be distributed under the terms of the GNU General Public License (GPL),
23// in which case the provisions of the GPL apply INSTEAD OF those given above.
24//
25// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
26// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
27
28// DISCLAIMER
29//
30// This software is provided 'as is' with no explicit or implied warranties
31// in respect of its properties including, but not limited to, correctness
32// and fitness for purpose.
33// -------------------------------------------------------------------------
34// Issue Date: 29/07/2002
35
36.file "aes-i586-asm.S"
37.text
38
39#include <linux/linkage.h>
40#include <asm/asm-offsets.h>
41
42#define tlen 1024   // length of each of 4 'xor' arrays (256 32-bit words)
43
44/* offsets to parameters with one register pushed onto stack */
45#define ctx 8
46#define out_blk 12
47#define in_blk 16
48
49/* offsets in crypto_aes_ctx structure */
50#define klen (480)
51#define ekey (0)
52#define dkey (240)
53
54// register mapping for encrypt and decrypt subroutines
55
56#define r0  eax
57#define r1  ebx
58#define r2  ecx
59#define r3  edx
60#define r4  esi
61#define r5  edi
62
63#define eaxl  al
64#define eaxh  ah
65#define ebxl  bl
66#define ebxh  bh
67#define ecxl  cl
68#define ecxh  ch
69#define edxl  dl
70#define edxh  dh
71
72#define _h(reg) reg##h
73#define h(reg) _h(reg)
74
75#define _l(reg) reg##l
76#define l(reg) _l(reg)
77
78// This macro takes a 32-bit word representing a column and uses
79// each of its four bytes to index into four tables of 256 32-bit
80// words to obtain values that are then xored into the appropriate
81// output registers r0, r1, r4 or r5.
82
83// Parameters:
84// table table base address
85//   %1  out_state[0]
86//   %2  out_state[1]
87//   %3  out_state[2]
88//   %4  out_state[3]
89//   idx input register for the round (destroyed)
90//   tmp scratch register for the round
91// sched key schedule
92
93#define do_col(table, a1,a2,a3,a4, idx, tmp)	\
94	movzx   %l(idx),%tmp;			\
95	xor     table(,%tmp,4),%a1;		\
96	movzx   %h(idx),%tmp;			\
97	shr     $16,%idx;			\
98	xor     table+tlen(,%tmp,4),%a2;	\
99	movzx   %l(idx),%tmp;			\
100	movzx   %h(idx),%idx;			\
101	xor     table+2*tlen(,%tmp,4),%a3;	\
102	xor     table+3*tlen(,%idx,4),%a4;
103
104// initialise output registers from the key schedule
105// NB1: original value of a3 is in idx on exit
106// NB2: original values of a1,a2,a4 aren't used
107#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
108	mov     0 sched,%a1;			\
109	movzx   %l(idx),%tmp;			\
110	mov     12 sched,%a2;			\
111	xor     table(,%tmp,4),%a1;		\
112	mov     4 sched,%a4;			\
113	movzx   %h(idx),%tmp;			\
114	shr     $16,%idx;			\
115	xor     table+tlen(,%tmp,4),%a2;	\
116	movzx   %l(idx),%tmp;			\
117	movzx   %h(idx),%idx;			\
118	xor     table+3*tlen(,%idx,4),%a4;	\
119	mov     %a3,%idx;			\
120	mov     8 sched,%a3;			\
121	xor     table+2*tlen(,%tmp,4),%a3;
122
123// initialise output registers from the key schedule
124// NB1: original value of a3 is in idx on exit
125// NB2: original values of a1,a2,a4 aren't used
126#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
127	mov     0 sched,%a1;			\
128	movzx   %l(idx),%tmp;			\
129	mov     4 sched,%a2;			\
130	xor     table(,%tmp,4),%a1;		\
131	mov     12 sched,%a4;			\
132	movzx   %h(idx),%tmp;			\
133	shr     $16,%idx;			\
134	xor     table+tlen(,%tmp,4),%a2;	\
135	movzx   %l(idx),%tmp;			\
136	movzx   %h(idx),%idx;			\
137	xor     table+3*tlen(,%idx,4),%a4;	\
138	mov     %a3,%idx;			\
139	mov     8 sched,%a3;			\
140	xor     table+2*tlen(,%tmp,4),%a3;
141
142
143// original Gladman had conditional saves to MMX regs.
144#define save(a1, a2)		\
145	mov     %a2,4*a1(%esp)
146
147#define restore(a1, a2)		\
148	mov     4*a2(%esp),%a1
149
150// These macros perform a forward encryption cycle. They are entered with
151// the first previous round column values in r0,r1,r4,r5 and
152// exit with the final values in the same registers, using stack
153// for temporary storage.
154
155// round column values
156// on entry: r0,r1,r4,r5
157// on exit:  r2,r1,r4,r5
158#define fwd_rnd1(arg, table)						\
159	save   (0,r1);							\
160	save   (1,r5);							\
161									\
162	/* compute new column values */					\
163	do_fcol(table, r2,r5,r4,r1, r0,r3, arg);	/* idx=r0 */	\
164	do_col (table, r4,r1,r2,r5, r0,r3);		/* idx=r4 */	\
165	restore(r0,0);							\
166	do_col (table, r1,r2,r5,r4, r0,r3);		/* idx=r1 */	\
167	restore(r0,1);							\
168	do_col (table, r5,r4,r1,r2, r0,r3);		/* idx=r5 */
169
170// round column values
171// on entry: r2,r1,r4,r5
172// on exit:  r0,r1,r4,r5
173#define fwd_rnd2(arg, table)						\
174	save   (0,r1);							\
175	save   (1,r5);							\
176									\
177	/* compute new column values */					\
178	do_fcol(table, r0,r5,r4,r1, r2,r3, arg);	/* idx=r2 */	\
179	do_col (table, r4,r1,r0,r5, r2,r3);		/* idx=r4 */	\
180	restore(r2,0);							\
181	do_col (table, r1,r0,r5,r4, r2,r3);		/* idx=r1 */	\
182	restore(r2,1);							\
183	do_col (table, r5,r4,r1,r0, r2,r3);		/* idx=r5 */
184
185// These macros performs an inverse encryption cycle. They are entered with
186// the first previous round column values in r0,r1,r4,r5 and
187// exit with the final values in the same registers, using stack
188// for temporary storage
189
190// round column values
191// on entry: r0,r1,r4,r5
192// on exit:  r2,r1,r4,r5
193#define inv_rnd1(arg, table)						\
194	save    (0,r1);							\
195	save    (1,r5);							\
196									\
197	/* compute new column values */					\
198	do_icol(table, r2,r1,r4,r5, r0,r3, arg);	/* idx=r0 */	\
199	do_col (table, r4,r5,r2,r1, r0,r3);		/* idx=r4 */	\
200	restore(r0,0);							\
201	do_col (table, r1,r4,r5,r2, r0,r3);		/* idx=r1 */	\
202	restore(r0,1);							\
203	do_col (table, r5,r2,r1,r4, r0,r3);		/* idx=r5 */
204
205// round column values
206// on entry: r2,r1,r4,r5
207// on exit:  r0,r1,r4,r5
208#define inv_rnd2(arg, table)						\
209	save    (0,r1);							\
210	save    (1,r5);							\
211									\
212	/* compute new column values */					\
213	do_icol(table, r0,r1,r4,r5, r2,r3, arg);	/* idx=r2 */	\
214	do_col (table, r4,r5,r0,r1, r2,r3);		/* idx=r4 */	\
215	restore(r2,0);							\
216	do_col (table, r1,r4,r5,r0, r2,r3);		/* idx=r1 */	\
217	restore(r2,1);							\
218	do_col (table, r5,r0,r1,r4, r2,r3);		/* idx=r5 */
219
220// AES (Rijndael) Encryption Subroutine
221/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
222
223.extern  crypto_ft_tab
224.extern  crypto_fl_tab
225
226ENTRY(aes_enc_blk)
227	push    %ebp
228	mov     ctx(%esp),%ebp
229
230// CAUTION: the order and the values used in these assigns
231// rely on the register mappings
232
2331:	push    %ebx
234	mov     in_blk+4(%esp),%r2
235	push    %esi
236	mov     klen(%ebp),%r3   // key size
237	push    %edi
238#if ekey != 0
239	lea     ekey(%ebp),%ebp  // key pointer
240#endif
241
242// input four columns and xor in first round key
243
244	mov     (%r2),%r0
245	mov     4(%r2),%r1
246	mov     8(%r2),%r4
247	mov     12(%r2),%r5
248	xor     (%ebp),%r0
249	xor     4(%ebp),%r1
250	xor     8(%ebp),%r4
251	xor     12(%ebp),%r5
252
253	sub     $8,%esp		// space for register saves on stack
254	add     $16,%ebp	// increment to next round key
255	cmp     $24,%r3
256	jb      4f		// 10 rounds for 128-bit key
257	lea     32(%ebp),%ebp
258	je      3f		// 12 rounds for 192-bit key
259	lea     32(%ebp),%ebp
260
2612:	fwd_rnd1( -64(%ebp), crypto_ft_tab)	// 14 rounds for 256-bit key
262	fwd_rnd2( -48(%ebp), crypto_ft_tab)
2633:	fwd_rnd1( -32(%ebp), crypto_ft_tab)	// 12 rounds for 192-bit key
264	fwd_rnd2( -16(%ebp), crypto_ft_tab)
2654:	fwd_rnd1(    (%ebp), crypto_ft_tab)	// 10 rounds for 128-bit key
266	fwd_rnd2( +16(%ebp), crypto_ft_tab)
267	fwd_rnd1( +32(%ebp), crypto_ft_tab)
268	fwd_rnd2( +48(%ebp), crypto_ft_tab)
269	fwd_rnd1( +64(%ebp), crypto_ft_tab)
270	fwd_rnd2( +80(%ebp), crypto_ft_tab)
271	fwd_rnd1( +96(%ebp), crypto_ft_tab)
272	fwd_rnd2(+112(%ebp), crypto_ft_tab)
273	fwd_rnd1(+128(%ebp), crypto_ft_tab)
274	fwd_rnd2(+144(%ebp), crypto_fl_tab)	// last round uses a different table
275
276// move final values to the output array.  CAUTION: the
277// order of these assigns rely on the register mappings
278
279	add     $8,%esp
280	mov     out_blk+12(%esp),%ebp
281	mov     %r5,12(%ebp)
282	pop     %edi
283	mov     %r4,8(%ebp)
284	pop     %esi
285	mov     %r1,4(%ebp)
286	pop     %ebx
287	mov     %r0,(%ebp)
288	pop     %ebp
289	ret
290ENDPROC(aes_enc_blk)
291
292// AES (Rijndael) Decryption Subroutine
293/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
294
295.extern  crypto_it_tab
296.extern  crypto_il_tab
297
298ENTRY(aes_dec_blk)
299	push    %ebp
300	mov     ctx(%esp),%ebp
301
302// CAUTION: the order and the values used in these assigns
303// rely on the register mappings
304
3051:	push    %ebx
306	mov     in_blk+4(%esp),%r2
307	push    %esi
308	mov     klen(%ebp),%r3   // key size
309	push    %edi
310#if dkey != 0
311	lea     dkey(%ebp),%ebp  // key pointer
312#endif
313
314// input four columns and xor in first round key
315
316	mov     (%r2),%r0
317	mov     4(%r2),%r1
318	mov     8(%r2),%r4
319	mov     12(%r2),%r5
320	xor     (%ebp),%r0
321	xor     4(%ebp),%r1
322	xor     8(%ebp),%r4
323	xor     12(%ebp),%r5
324
325	sub     $8,%esp		// space for register saves on stack
326	add     $16,%ebp	// increment to next round key
327	cmp     $24,%r3
328	jb      4f		// 10 rounds for 128-bit key
329	lea     32(%ebp),%ebp
330	je      3f		// 12 rounds for 192-bit key
331	lea     32(%ebp),%ebp
332
3332:	inv_rnd1( -64(%ebp), crypto_it_tab)	// 14 rounds for 256-bit key
334	inv_rnd2( -48(%ebp), crypto_it_tab)
3353:	inv_rnd1( -32(%ebp), crypto_it_tab)	// 12 rounds for 192-bit key
336	inv_rnd2( -16(%ebp), crypto_it_tab)
3374:	inv_rnd1(    (%ebp), crypto_it_tab)	// 10 rounds for 128-bit key
338	inv_rnd2( +16(%ebp), crypto_it_tab)
339	inv_rnd1( +32(%ebp), crypto_it_tab)
340	inv_rnd2( +48(%ebp), crypto_it_tab)
341	inv_rnd1( +64(%ebp), crypto_it_tab)
342	inv_rnd2( +80(%ebp), crypto_it_tab)
343	inv_rnd1( +96(%ebp), crypto_it_tab)
344	inv_rnd2(+112(%ebp), crypto_it_tab)
345	inv_rnd1(+128(%ebp), crypto_it_tab)
346	inv_rnd2(+144(%ebp), crypto_il_tab)	// last round uses a different table
347
348// move final values to the output array.  CAUTION: the
349// order of these assigns rely on the register mappings
350
351	add     $8,%esp
352	mov     out_blk+12(%esp),%ebp
353	mov     %r5,12(%ebp)
354	pop     %edi
355	mov     %r4,8(%ebp)
356	pop     %esi
357	mov     %r1,4(%ebp)
358	pop     %ebx
359	mov     %r0,(%ebp)
360	pop     %ebp
361	ret
362ENDPROC(aes_dec_blk)
363