1/*
2 * Glue code for AES implementation for SPE instructions (PPC)
3 *
4 * Based on generic implementation. The assembler module takes care
5 * about the SPE registers so it can run from interrupt context.
6 *
7 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the Free
11 * Software Foundation; either version 2 of the License, or (at your option)
12 * any later version.
13 *
14 */
15
16#include <crypto/aes.h>
17#include <linux/module.h>
18#include <linux/init.h>
19#include <linux/types.h>
20#include <linux/errno.h>
21#include <linux/crypto.h>
22#include <asm/byteorder.h>
23#include <asm/switch_to.h>
24#include <crypto/algapi.h>
25
26/*
27 * MAX_BYTES defines the number of bytes that are allowed to be processed
28 * between preempt_disable() and preempt_enable(). e500 cores can issue two
29 * instructions per clock cycle using one 32/64 bit unit (SU1) and one 32
30 * bit unit (SU2). One of these can be a memory access that is executed via
31 * a single load and store unit (LSU). XTS-AES-256 takes ~780 operations per
32 * 16 byte block block or 25 cycles per byte. Thus 768 bytes of input data
33 * will need an estimated maximum of 20,000 cycles. Headroom for cache misses
34 * included. Even with the low end model clocked at 667 MHz this equals to a
35 * critical time window of less than 30us. The value has been choosen to
36 * process a 512 byte disk block in one or a large 1400 bytes IPsec network
37 * packet in two runs.
38 *
39 */
40#define MAX_BYTES 768
41
42struct ppc_aes_ctx {
43	u32 key_enc[AES_MAX_KEYLENGTH_U32];
44	u32 key_dec[AES_MAX_KEYLENGTH_U32];
45	u32 rounds;
46};
47
48struct ppc_xts_ctx {
49	u32 key_enc[AES_MAX_KEYLENGTH_U32];
50	u32 key_dec[AES_MAX_KEYLENGTH_U32];
51	u32 key_twk[AES_MAX_KEYLENGTH_U32];
52	u32 rounds;
53};
54
55extern void ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc, u32 rounds);
56extern void ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec, u32 rounds);
57extern void ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
58			    u32 bytes);
59extern void ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
60			    u32 bytes);
61extern void ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
62			    u32 bytes, u8 *iv);
63extern void ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
64			    u32 bytes, u8 *iv);
65extern void ppc_crypt_ctr  (u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
66			    u32 bytes, u8 *iv);
67extern void ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
68			    u32 bytes, u8 *iv, u32 *key_twk);
69extern void ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
70			    u32 bytes, u8 *iv, u32 *key_twk);
71
72extern void ppc_expand_key_128(u32 *key_enc, const u8 *key);
73extern void ppc_expand_key_192(u32 *key_enc, const u8 *key);
74extern void ppc_expand_key_256(u32 *key_enc, const u8 *key);
75
76extern void ppc_generate_decrypt_key(u32 *key_dec,u32 *key_enc,
77				     unsigned int key_len);
78
79static void spe_begin(void)
80{
81	/* disable preemption and save users SPE registers if required */
82	preempt_disable();
83	enable_kernel_spe();
84}
85
86static void spe_end(void)
87{
88	/* reenable preemption */
89	preempt_enable();
90}
91
92static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
93		unsigned int key_len)
94{
95	struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
96
97	if (key_len != AES_KEYSIZE_128 &&
98	    key_len != AES_KEYSIZE_192 &&
99	    key_len != AES_KEYSIZE_256) {
100		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
101		return -EINVAL;
102	}
103
104	switch (key_len) {
105	case AES_KEYSIZE_128:
106		ctx->rounds = 4;
107		ppc_expand_key_128(ctx->key_enc, in_key);
108		break;
109	case AES_KEYSIZE_192:
110		ctx->rounds = 5;
111		ppc_expand_key_192(ctx->key_enc, in_key);
112		break;
113	case AES_KEYSIZE_256:
114		ctx->rounds = 6;
115		ppc_expand_key_256(ctx->key_enc, in_key);
116		break;
117	}
118
119	ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
120
121	return 0;
122}
123
124static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key,
125		   unsigned int key_len)
126{
127	struct ppc_xts_ctx *ctx = crypto_tfm_ctx(tfm);
128
129	key_len >>= 1;
130
131	if (key_len != AES_KEYSIZE_128 &&
132	    key_len != AES_KEYSIZE_192 &&
133	    key_len != AES_KEYSIZE_256) {
134		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
135		return -EINVAL;
136	}
137
138	switch (key_len) {
139	case AES_KEYSIZE_128:
140		ctx->rounds = 4;
141		ppc_expand_key_128(ctx->key_enc, in_key);
142		ppc_expand_key_128(ctx->key_twk, in_key + AES_KEYSIZE_128);
143		break;
144	case AES_KEYSIZE_192:
145		ctx->rounds = 5;
146		ppc_expand_key_192(ctx->key_enc, in_key);
147		ppc_expand_key_192(ctx->key_twk, in_key + AES_KEYSIZE_192);
148		break;
149	case AES_KEYSIZE_256:
150		ctx->rounds = 6;
151		ppc_expand_key_256(ctx->key_enc, in_key);
152		ppc_expand_key_256(ctx->key_twk, in_key + AES_KEYSIZE_256);
153		break;
154	}
155
156	ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
157
158	return 0;
159}
160
161static void ppc_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
162{
163	struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
164
165	spe_begin();
166	ppc_encrypt_aes(out, in, ctx->key_enc, ctx->rounds);
167	spe_end();
168}
169
170static void ppc_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
171{
172	struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
173
174	spe_begin();
175	ppc_decrypt_aes(out, in, ctx->key_dec, ctx->rounds);
176	spe_end();
177}
178
179static int ppc_ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
180			   struct scatterlist *src, unsigned int nbytes)
181{
182	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
183	struct blkcipher_walk walk;
184	unsigned int ubytes;
185	int err;
186
187	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
188	blkcipher_walk_init(&walk, dst, src, nbytes);
189	err = blkcipher_walk_virt(desc, &walk);
190
191	while ((nbytes = walk.nbytes)) {
192		ubytes = nbytes > MAX_BYTES ?
193			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
194		nbytes -= ubytes;
195
196		spe_begin();
197		ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
198				ctx->key_enc, ctx->rounds, nbytes);
199		spe_end();
200
201		err = blkcipher_walk_done(desc, &walk, ubytes);
202	}
203
204	return err;
205}
206
207static int ppc_ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
208			   struct scatterlist *src, unsigned int nbytes)
209{
210	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
211	struct blkcipher_walk walk;
212	unsigned int ubytes;
213	int err;
214
215	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
216	blkcipher_walk_init(&walk, dst, src, nbytes);
217	err = blkcipher_walk_virt(desc, &walk);
218
219	while ((nbytes = walk.nbytes)) {
220		ubytes = nbytes > MAX_BYTES ?
221			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
222		nbytes -= ubytes;
223
224		spe_begin();
225		ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
226				ctx->key_dec, ctx->rounds, nbytes);
227		spe_end();
228
229		err = blkcipher_walk_done(desc, &walk, ubytes);
230	}
231
232	return err;
233}
234
235static int ppc_cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
236			   struct scatterlist *src, unsigned int nbytes)
237{
238	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
239	struct blkcipher_walk walk;
240	unsigned int ubytes;
241	int err;
242
243	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
244	blkcipher_walk_init(&walk, dst, src, nbytes);
245	err = blkcipher_walk_virt(desc, &walk);
246
247	while ((nbytes = walk.nbytes)) {
248		ubytes = nbytes > MAX_BYTES ?
249			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
250		nbytes -= ubytes;
251
252		spe_begin();
253		ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
254				ctx->key_enc, ctx->rounds, nbytes, walk.iv);
255		spe_end();
256
257		err = blkcipher_walk_done(desc, &walk, ubytes);
258	}
259
260	return err;
261}
262
263static int ppc_cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
264			   struct scatterlist *src, unsigned int nbytes)
265{
266	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
267	struct blkcipher_walk walk;
268	unsigned int ubytes;
269	int err;
270
271	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
272	blkcipher_walk_init(&walk, dst, src, nbytes);
273	err = blkcipher_walk_virt(desc, &walk);
274
275	while ((nbytes = walk.nbytes)) {
276		ubytes = nbytes > MAX_BYTES ?
277			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
278		nbytes -= ubytes;
279
280		spe_begin();
281		ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
282				ctx->key_dec, ctx->rounds, nbytes, walk.iv);
283		spe_end();
284
285		err = blkcipher_walk_done(desc, &walk, ubytes);
286	}
287
288	return err;
289}
290
291static int ppc_ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
292			 struct scatterlist *src, unsigned int nbytes)
293{
294	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
295	struct blkcipher_walk walk;
296	unsigned int pbytes, ubytes;
297	int err;
298
299	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
300	blkcipher_walk_init(&walk, dst, src, nbytes);
301	err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
302
303	while ((pbytes = walk.nbytes)) {
304		pbytes = pbytes > MAX_BYTES ? MAX_BYTES : pbytes;
305		pbytes = pbytes == nbytes ?
306			 nbytes : pbytes & ~(AES_BLOCK_SIZE - 1);
307		ubytes = walk.nbytes - pbytes;
308
309		spe_begin();
310		ppc_crypt_ctr(walk.dst.virt.addr, walk.src.virt.addr,
311			      ctx->key_enc, ctx->rounds, pbytes , walk.iv);
312		spe_end();
313
314		nbytes -= pbytes;
315		err = blkcipher_walk_done(desc, &walk, ubytes);
316	}
317
318	return err;
319}
320
321static int ppc_xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
322			   struct scatterlist *src, unsigned int nbytes)
323{
324	struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
325	struct blkcipher_walk walk;
326	unsigned int ubytes;
327	int err;
328	u32 *twk;
329
330	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
331	blkcipher_walk_init(&walk, dst, src, nbytes);
332	err = blkcipher_walk_virt(desc, &walk);
333	twk = ctx->key_twk;
334
335	while ((nbytes = walk.nbytes)) {
336		ubytes = nbytes > MAX_BYTES ?
337			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
338		nbytes -= ubytes;
339
340		spe_begin();
341		ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
342				ctx->key_enc, ctx->rounds, nbytes, walk.iv, twk);
343		spe_end();
344
345		twk = NULL;
346		err = blkcipher_walk_done(desc, &walk, ubytes);
347	}
348
349	return err;
350}
351
352static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
353			   struct scatterlist *src, unsigned int nbytes)
354{
355	struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
356	struct blkcipher_walk walk;
357	unsigned int ubytes;
358	int err;
359	u32 *twk;
360
361	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
362	blkcipher_walk_init(&walk, dst, src, nbytes);
363	err = blkcipher_walk_virt(desc, &walk);
364	twk = ctx->key_twk;
365
366	while ((nbytes = walk.nbytes)) {
367		ubytes = nbytes > MAX_BYTES ?
368			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
369		nbytes -= ubytes;
370
371		spe_begin();
372		ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
373				ctx->key_dec, ctx->rounds, nbytes, walk.iv, twk);
374		spe_end();
375
376		twk = NULL;
377		err = blkcipher_walk_done(desc, &walk, ubytes);
378	}
379
380	return err;
381}
382
383/*
384 * Algorithm definitions. Disabling alignment (cra_alignmask=0) was chosen
385 * because the e500 platform can handle unaligned reads/writes very efficently.
386 * This improves IPsec thoughput by another few percent. Additionally we assume
387 * that AES context is always aligned to at least 8 bytes because it is created
388 * with kmalloc() in the crypto infrastructure
389 *
390 */
391static struct crypto_alg aes_algs[] = { {
392	.cra_name		=	"aes",
393	.cra_driver_name	=	"aes-ppc-spe",
394	.cra_priority		=	300,
395	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
396	.cra_blocksize		=	AES_BLOCK_SIZE,
397	.cra_ctxsize		=	sizeof(struct ppc_aes_ctx),
398	.cra_alignmask		=	0,
399	.cra_module		=	THIS_MODULE,
400	.cra_u			=	{
401		.cipher = {
402			.cia_min_keysize	=	AES_MIN_KEY_SIZE,
403			.cia_max_keysize	=	AES_MAX_KEY_SIZE,
404			.cia_setkey		=	ppc_aes_setkey,
405			.cia_encrypt		=	ppc_aes_encrypt,
406			.cia_decrypt		=	ppc_aes_decrypt
407		}
408	}
409}, {
410	.cra_name		=	"ecb(aes)",
411	.cra_driver_name	=	"ecb-ppc-spe",
412	.cra_priority		=	300,
413	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
414	.cra_blocksize		=	AES_BLOCK_SIZE,
415	.cra_ctxsize		=	sizeof(struct ppc_aes_ctx),
416	.cra_alignmask		=	0,
417	.cra_type		=	&crypto_blkcipher_type,
418	.cra_module		=	THIS_MODULE,
419	.cra_u = {
420		.blkcipher = {
421			.min_keysize		=	AES_MIN_KEY_SIZE,
422			.max_keysize		=	AES_MAX_KEY_SIZE,
423			.ivsize			=	AES_BLOCK_SIZE,
424			.setkey			=	ppc_aes_setkey,
425			.encrypt		=	ppc_ecb_encrypt,
426			.decrypt		=	ppc_ecb_decrypt,
427		}
428	}
429}, {
430	.cra_name		=	"cbc(aes)",
431	.cra_driver_name	=	"cbc-ppc-spe",
432	.cra_priority		=	300,
433	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
434	.cra_blocksize		=	AES_BLOCK_SIZE,
435	.cra_ctxsize		=	sizeof(struct ppc_aes_ctx),
436	.cra_alignmask		=	0,
437	.cra_type		=	&crypto_blkcipher_type,
438	.cra_module		=	THIS_MODULE,
439	.cra_u = {
440		.blkcipher = {
441			.min_keysize		=	AES_MIN_KEY_SIZE,
442			.max_keysize		=	AES_MAX_KEY_SIZE,
443			.ivsize			=	AES_BLOCK_SIZE,
444			.setkey			=	ppc_aes_setkey,
445			.encrypt		=	ppc_cbc_encrypt,
446			.decrypt		=	ppc_cbc_decrypt,
447		}
448	}
449}, {
450	.cra_name		=	"ctr(aes)",
451	.cra_driver_name	=	"ctr-ppc-spe",
452	.cra_priority		=	300,
453	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
454	.cra_blocksize		=	1,
455	.cra_ctxsize		=	sizeof(struct ppc_aes_ctx),
456	.cra_alignmask		=	0,
457	.cra_type		=	&crypto_blkcipher_type,
458	.cra_module		=	THIS_MODULE,
459	.cra_u = {
460		.blkcipher = {
461			.min_keysize		=	AES_MIN_KEY_SIZE,
462			.max_keysize		=	AES_MAX_KEY_SIZE,
463			.ivsize			=	AES_BLOCK_SIZE,
464			.setkey			=	ppc_aes_setkey,
465			.encrypt		=	ppc_ctr_crypt,
466			.decrypt		=	ppc_ctr_crypt,
467		}
468	}
469}, {
470	.cra_name		=	"xts(aes)",
471	.cra_driver_name	=	"xts-ppc-spe",
472	.cra_priority		=	300,
473	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
474	.cra_blocksize		=	AES_BLOCK_SIZE,
475	.cra_ctxsize		=	sizeof(struct ppc_xts_ctx),
476	.cra_alignmask		=	0,
477	.cra_type		=	&crypto_blkcipher_type,
478	.cra_module		=	THIS_MODULE,
479	.cra_u = {
480		.blkcipher = {
481			.min_keysize		=	AES_MIN_KEY_SIZE * 2,
482			.max_keysize		=	AES_MAX_KEY_SIZE * 2,
483			.ivsize			=	AES_BLOCK_SIZE,
484			.setkey			=	ppc_xts_setkey,
485			.encrypt		=	ppc_xts_encrypt,
486			.decrypt		=	ppc_xts_decrypt,
487		}
488	}
489} };
490
491static int __init ppc_aes_mod_init(void)
492{
493	return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
494}
495
496static void __exit ppc_aes_mod_fini(void)
497{
498	crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
499}
500
501module_init(ppc_aes_mod_init);
502module_exit(ppc_aes_mod_fini);
503
504MODULE_LICENSE("GPL");
505MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS, SPE optimized");
506
507MODULE_ALIAS_CRYPTO("aes");
508MODULE_ALIAS_CRYPTO("ecb(aes)");
509MODULE_ALIAS_CRYPTO("cbc(aes)");
510MODULE_ALIAS_CRYPTO("ctr(aes)");
511MODULE_ALIAS_CRYPTO("xts(aes)");
512MODULE_ALIAS_CRYPTO("aes-ppc-spe");
513