1/*
2 * Glue Code for the AVX assembler implemention of the Cast5 Cipher
3 *
4 * Copyright (C) 2012 Johannes Goetzfried
5 *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
20 * USA
21 *
22 */
23
24#include <linux/module.h>
25#include <linux/hardirq.h>
26#include <linux/types.h>
27#include <linux/crypto.h>
28#include <linux/err.h>
29#include <crypto/ablk_helper.h>
30#include <crypto/algapi.h>
31#include <crypto/cast5.h>
32#include <crypto/cryptd.h>
33#include <crypto/ctr.h>
34#include <asm/xcr.h>
35#include <asm/xsave.h>
36#include <asm/crypto/glue_helper.h>
37
38#define CAST5_PARALLEL_BLOCKS 16
39
40asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst,
41				    const u8 *src);
42asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst,
43				    const u8 *src);
44asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
45				    const u8 *src);
46asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
47				__be64 *iv);
48
49static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
50{
51	return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
52			      NULL, fpu_enabled, nbytes);
53}
54
55static inline void cast5_fpu_end(bool fpu_enabled)
56{
57	return glue_fpu_end(fpu_enabled);
58}
59
60static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
61		     bool enc)
62{
63	bool fpu_enabled = false;
64	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
65	const unsigned int bsize = CAST5_BLOCK_SIZE;
66	unsigned int nbytes;
67	void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
68	int err;
69
70	fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
71
72	err = blkcipher_walk_virt(desc, walk);
73	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
74
75	while ((nbytes = walk->nbytes)) {
76		u8 *wsrc = walk->src.virt.addr;
77		u8 *wdst = walk->dst.virt.addr;
78
79		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
80
81		/* Process multi-block batch */
82		if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
83			do {
84				fn(ctx, wdst, wsrc);
85
86				wsrc += bsize * CAST5_PARALLEL_BLOCKS;
87				wdst += bsize * CAST5_PARALLEL_BLOCKS;
88				nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
89			} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
90
91			if (nbytes < bsize)
92				goto done;
93		}
94
95		fn = (enc) ? __cast5_encrypt : __cast5_decrypt;
96
97		/* Handle leftovers */
98		do {
99			fn(ctx, wdst, wsrc);
100
101			wsrc += bsize;
102			wdst += bsize;
103			nbytes -= bsize;
104		} while (nbytes >= bsize);
105
106done:
107		err = blkcipher_walk_done(desc, walk, nbytes);
108	}
109
110	cast5_fpu_end(fpu_enabled);
111	return err;
112}
113
114static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
115		       struct scatterlist *src, unsigned int nbytes)
116{
117	struct blkcipher_walk walk;
118
119	blkcipher_walk_init(&walk, dst, src, nbytes);
120	return ecb_crypt(desc, &walk, true);
121}
122
123static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
124		       struct scatterlist *src, unsigned int nbytes)
125{
126	struct blkcipher_walk walk;
127
128	blkcipher_walk_init(&walk, dst, src, nbytes);
129	return ecb_crypt(desc, &walk, false);
130}
131
132static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
133				  struct blkcipher_walk *walk)
134{
135	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
136	const unsigned int bsize = CAST5_BLOCK_SIZE;
137	unsigned int nbytes = walk->nbytes;
138	u64 *src = (u64 *)walk->src.virt.addr;
139	u64 *dst = (u64 *)walk->dst.virt.addr;
140	u64 *iv = (u64 *)walk->iv;
141
142	do {
143		*dst = *src ^ *iv;
144		__cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
145		iv = dst;
146
147		src += 1;
148		dst += 1;
149		nbytes -= bsize;
150	} while (nbytes >= bsize);
151
152	*(u64 *)walk->iv = *iv;
153	return nbytes;
154}
155
156static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
157		       struct scatterlist *src, unsigned int nbytes)
158{
159	struct blkcipher_walk walk;
160	int err;
161
162	blkcipher_walk_init(&walk, dst, src, nbytes);
163	err = blkcipher_walk_virt(desc, &walk);
164
165	while ((nbytes = walk.nbytes)) {
166		nbytes = __cbc_encrypt(desc, &walk);
167		err = blkcipher_walk_done(desc, &walk, nbytes);
168	}
169
170	return err;
171}
172
173static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
174				  struct blkcipher_walk *walk)
175{
176	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
177	const unsigned int bsize = CAST5_BLOCK_SIZE;
178	unsigned int nbytes = walk->nbytes;
179	u64 *src = (u64 *)walk->src.virt.addr;
180	u64 *dst = (u64 *)walk->dst.virt.addr;
181	u64 last_iv;
182
183	/* Start of the last block. */
184	src += nbytes / bsize - 1;
185	dst += nbytes / bsize - 1;
186
187	last_iv = *src;
188
189	/* Process multi-block batch */
190	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
191		do {
192			nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
193			src -= CAST5_PARALLEL_BLOCKS - 1;
194			dst -= CAST5_PARALLEL_BLOCKS - 1;
195
196			cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src);
197
198			nbytes -= bsize;
199			if (nbytes < bsize)
200				goto done;
201
202			*dst ^= *(src - 1);
203			src -= 1;
204			dst -= 1;
205		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
206	}
207
208	/* Handle leftovers */
209	for (;;) {
210		__cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
211
212		nbytes -= bsize;
213		if (nbytes < bsize)
214			break;
215
216		*dst ^= *(src - 1);
217		src -= 1;
218		dst -= 1;
219	}
220
221done:
222	*dst ^= *(u64 *)walk->iv;
223	*(u64 *)walk->iv = last_iv;
224
225	return nbytes;
226}
227
228static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
229		       struct scatterlist *src, unsigned int nbytes)
230{
231	bool fpu_enabled = false;
232	struct blkcipher_walk walk;
233	int err;
234
235	blkcipher_walk_init(&walk, dst, src, nbytes);
236	err = blkcipher_walk_virt(desc, &walk);
237	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
238
239	while ((nbytes = walk.nbytes)) {
240		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
241		nbytes = __cbc_decrypt(desc, &walk);
242		err = blkcipher_walk_done(desc, &walk, nbytes);
243	}
244
245	cast5_fpu_end(fpu_enabled);
246	return err;
247}
248
249static void ctr_crypt_final(struct blkcipher_desc *desc,
250			    struct blkcipher_walk *walk)
251{
252	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
253	u8 *ctrblk = walk->iv;
254	u8 keystream[CAST5_BLOCK_SIZE];
255	u8 *src = walk->src.virt.addr;
256	u8 *dst = walk->dst.virt.addr;
257	unsigned int nbytes = walk->nbytes;
258
259	__cast5_encrypt(ctx, keystream, ctrblk);
260	crypto_xor(keystream, src, nbytes);
261	memcpy(dst, keystream, nbytes);
262
263	crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
264}
265
266static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
267				struct blkcipher_walk *walk)
268{
269	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
270	const unsigned int bsize = CAST5_BLOCK_SIZE;
271	unsigned int nbytes = walk->nbytes;
272	u64 *src = (u64 *)walk->src.virt.addr;
273	u64 *dst = (u64 *)walk->dst.virt.addr;
274
275	/* Process multi-block batch */
276	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
277		do {
278			cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src,
279					(__be64 *)walk->iv);
280
281			src += CAST5_PARALLEL_BLOCKS;
282			dst += CAST5_PARALLEL_BLOCKS;
283			nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
284		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
285
286		if (nbytes < bsize)
287			goto done;
288	}
289
290	/* Handle leftovers */
291	do {
292		u64 ctrblk;
293
294		if (dst != src)
295			*dst = *src;
296
297		ctrblk = *(u64 *)walk->iv;
298		be64_add_cpu((__be64 *)walk->iv, 1);
299
300		__cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
301		*dst ^= ctrblk;
302
303		src += 1;
304		dst += 1;
305		nbytes -= bsize;
306	} while (nbytes >= bsize);
307
308done:
309	return nbytes;
310}
311
312static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
313		     struct scatterlist *src, unsigned int nbytes)
314{
315	bool fpu_enabled = false;
316	struct blkcipher_walk walk;
317	int err;
318
319	blkcipher_walk_init(&walk, dst, src, nbytes);
320	err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE);
321	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
322
323	while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
324		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
325		nbytes = __ctr_crypt(desc, &walk);
326		err = blkcipher_walk_done(desc, &walk, nbytes);
327	}
328
329	cast5_fpu_end(fpu_enabled);
330
331	if (walk.nbytes) {
332		ctr_crypt_final(desc, &walk);
333		err = blkcipher_walk_done(desc, &walk, 0);
334	}
335
336	return err;
337}
338
339
340static struct crypto_alg cast5_algs[6] = { {
341	.cra_name		= "__ecb-cast5-avx",
342	.cra_driver_name	= "__driver-ecb-cast5-avx",
343	.cra_priority		= 0,
344	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
345				  CRYPTO_ALG_INTERNAL,
346	.cra_blocksize		= CAST5_BLOCK_SIZE,
347	.cra_ctxsize		= sizeof(struct cast5_ctx),
348	.cra_alignmask		= 0,
349	.cra_type		= &crypto_blkcipher_type,
350	.cra_module		= THIS_MODULE,
351	.cra_u = {
352		.blkcipher = {
353			.min_keysize	= CAST5_MIN_KEY_SIZE,
354			.max_keysize	= CAST5_MAX_KEY_SIZE,
355			.setkey		= cast5_setkey,
356			.encrypt	= ecb_encrypt,
357			.decrypt	= ecb_decrypt,
358		},
359	},
360}, {
361	.cra_name		= "__cbc-cast5-avx",
362	.cra_driver_name	= "__driver-cbc-cast5-avx",
363	.cra_priority		= 0,
364	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
365				  CRYPTO_ALG_INTERNAL,
366	.cra_blocksize		= CAST5_BLOCK_SIZE,
367	.cra_ctxsize		= sizeof(struct cast5_ctx),
368	.cra_alignmask		= 0,
369	.cra_type		= &crypto_blkcipher_type,
370	.cra_module		= THIS_MODULE,
371	.cra_u = {
372		.blkcipher = {
373			.min_keysize	= CAST5_MIN_KEY_SIZE,
374			.max_keysize	= CAST5_MAX_KEY_SIZE,
375			.setkey		= cast5_setkey,
376			.encrypt	= cbc_encrypt,
377			.decrypt	= cbc_decrypt,
378		},
379	},
380}, {
381	.cra_name		= "__ctr-cast5-avx",
382	.cra_driver_name	= "__driver-ctr-cast5-avx",
383	.cra_priority		= 0,
384	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
385				  CRYPTO_ALG_INTERNAL,
386	.cra_blocksize		= 1,
387	.cra_ctxsize		= sizeof(struct cast5_ctx),
388	.cra_alignmask		= 0,
389	.cra_type		= &crypto_blkcipher_type,
390	.cra_module		= THIS_MODULE,
391	.cra_u = {
392		.blkcipher = {
393			.min_keysize	= CAST5_MIN_KEY_SIZE,
394			.max_keysize	= CAST5_MAX_KEY_SIZE,
395			.ivsize		= CAST5_BLOCK_SIZE,
396			.setkey		= cast5_setkey,
397			.encrypt	= ctr_crypt,
398			.decrypt	= ctr_crypt,
399		},
400	},
401}, {
402	.cra_name		= "ecb(cast5)",
403	.cra_driver_name	= "ecb-cast5-avx",
404	.cra_priority		= 200,
405	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
406	.cra_blocksize		= CAST5_BLOCK_SIZE,
407	.cra_ctxsize		= sizeof(struct async_helper_ctx),
408	.cra_alignmask		= 0,
409	.cra_type		= &crypto_ablkcipher_type,
410	.cra_module		= THIS_MODULE,
411	.cra_init		= ablk_init,
412	.cra_exit		= ablk_exit,
413	.cra_u = {
414		.ablkcipher = {
415			.min_keysize	= CAST5_MIN_KEY_SIZE,
416			.max_keysize	= CAST5_MAX_KEY_SIZE,
417			.setkey		= ablk_set_key,
418			.encrypt	= ablk_encrypt,
419			.decrypt	= ablk_decrypt,
420		},
421	},
422}, {
423	.cra_name		= "cbc(cast5)",
424	.cra_driver_name	= "cbc-cast5-avx",
425	.cra_priority		= 200,
426	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
427	.cra_blocksize		= CAST5_BLOCK_SIZE,
428	.cra_ctxsize		= sizeof(struct async_helper_ctx),
429	.cra_alignmask		= 0,
430	.cra_type		= &crypto_ablkcipher_type,
431	.cra_module		= THIS_MODULE,
432	.cra_init		= ablk_init,
433	.cra_exit		= ablk_exit,
434	.cra_u = {
435		.ablkcipher = {
436			.min_keysize	= CAST5_MIN_KEY_SIZE,
437			.max_keysize	= CAST5_MAX_KEY_SIZE,
438			.ivsize		= CAST5_BLOCK_SIZE,
439			.setkey		= ablk_set_key,
440			.encrypt	= __ablk_encrypt,
441			.decrypt	= ablk_decrypt,
442		},
443	},
444}, {
445	.cra_name		= "ctr(cast5)",
446	.cra_driver_name	= "ctr-cast5-avx",
447	.cra_priority		= 200,
448	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
449	.cra_blocksize		= 1,
450	.cra_ctxsize		= sizeof(struct async_helper_ctx),
451	.cra_alignmask		= 0,
452	.cra_type		= &crypto_ablkcipher_type,
453	.cra_module		= THIS_MODULE,
454	.cra_init		= ablk_init,
455	.cra_exit		= ablk_exit,
456	.cra_u = {
457		.ablkcipher = {
458			.min_keysize	= CAST5_MIN_KEY_SIZE,
459			.max_keysize	= CAST5_MAX_KEY_SIZE,
460			.ivsize		= CAST5_BLOCK_SIZE,
461			.setkey		= ablk_set_key,
462			.encrypt	= ablk_encrypt,
463			.decrypt	= ablk_encrypt,
464			.geniv		= "chainiv",
465		},
466	},
467} };
468
469static int __init cast5_init(void)
470{
471	u64 xcr0;
472
473	if (!cpu_has_avx || !cpu_has_osxsave) {
474		pr_info("AVX instructions are not detected.\n");
475		return -ENODEV;
476	}
477
478	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
479	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
480		pr_info("AVX detected but unusable.\n");
481		return -ENODEV;
482	}
483
484	return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
485}
486
487static void __exit cast5_exit(void)
488{
489	crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
490}
491
492module_init(cast5_init);
493module_exit(cast5_exit);
494
495MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
496MODULE_LICENSE("GPL");
497MODULE_ALIAS_CRYPTO("cast5");
498