1/*
2 * Glue Code for the AVX assembler implemention of the Cast5 Cipher
3 *
4 * Copyright (C) 2012 Johannes Goetzfried
5 *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
20 * USA
21 *
22 */
23
24#include <linux/module.h>
25#include <linux/hardirq.h>
26#include <linux/types.h>
27#include <linux/crypto.h>
28#include <linux/err.h>
29#include <crypto/ablk_helper.h>
30#include <crypto/algapi.h>
31#include <crypto/cast5.h>
32#include <crypto/cryptd.h>
33#include <crypto/ctr.h>
34#include <asm/fpu/api.h>
35#include <asm/crypto/glue_helper.h>
36
37#define CAST5_PARALLEL_BLOCKS 16
38
39asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst,
40				    const u8 *src);
41asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst,
42				    const u8 *src);
43asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
44				    const u8 *src);
45asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
46				__be64 *iv);
47
48static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
49{
50	return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
51			      NULL, fpu_enabled, nbytes);
52}
53
54static inline void cast5_fpu_end(bool fpu_enabled)
55{
56	return glue_fpu_end(fpu_enabled);
57}
58
59static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
60		     bool enc)
61{
62	bool fpu_enabled = false;
63	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
64	const unsigned int bsize = CAST5_BLOCK_SIZE;
65	unsigned int nbytes;
66	void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
67	int err;
68
69	fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
70
71	err = blkcipher_walk_virt(desc, walk);
72	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
73
74	while ((nbytes = walk->nbytes)) {
75		u8 *wsrc = walk->src.virt.addr;
76		u8 *wdst = walk->dst.virt.addr;
77
78		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
79
80		/* Process multi-block batch */
81		if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
82			do {
83				fn(ctx, wdst, wsrc);
84
85				wsrc += bsize * CAST5_PARALLEL_BLOCKS;
86				wdst += bsize * CAST5_PARALLEL_BLOCKS;
87				nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
88			} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
89
90			if (nbytes < bsize)
91				goto done;
92		}
93
94		fn = (enc) ? __cast5_encrypt : __cast5_decrypt;
95
96		/* Handle leftovers */
97		do {
98			fn(ctx, wdst, wsrc);
99
100			wsrc += bsize;
101			wdst += bsize;
102			nbytes -= bsize;
103		} while (nbytes >= bsize);
104
105done:
106		err = blkcipher_walk_done(desc, walk, nbytes);
107	}
108
109	cast5_fpu_end(fpu_enabled);
110	return err;
111}
112
113static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
114		       struct scatterlist *src, unsigned int nbytes)
115{
116	struct blkcipher_walk walk;
117
118	blkcipher_walk_init(&walk, dst, src, nbytes);
119	return ecb_crypt(desc, &walk, true);
120}
121
122static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
123		       struct scatterlist *src, unsigned int nbytes)
124{
125	struct blkcipher_walk walk;
126
127	blkcipher_walk_init(&walk, dst, src, nbytes);
128	return ecb_crypt(desc, &walk, false);
129}
130
131static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
132				  struct blkcipher_walk *walk)
133{
134	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
135	const unsigned int bsize = CAST5_BLOCK_SIZE;
136	unsigned int nbytes = walk->nbytes;
137	u64 *src = (u64 *)walk->src.virt.addr;
138	u64 *dst = (u64 *)walk->dst.virt.addr;
139	u64 *iv = (u64 *)walk->iv;
140
141	do {
142		*dst = *src ^ *iv;
143		__cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
144		iv = dst;
145
146		src += 1;
147		dst += 1;
148		nbytes -= bsize;
149	} while (nbytes >= bsize);
150
151	*(u64 *)walk->iv = *iv;
152	return nbytes;
153}
154
155static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
156		       struct scatterlist *src, unsigned int nbytes)
157{
158	struct blkcipher_walk walk;
159	int err;
160
161	blkcipher_walk_init(&walk, dst, src, nbytes);
162	err = blkcipher_walk_virt(desc, &walk);
163
164	while ((nbytes = walk.nbytes)) {
165		nbytes = __cbc_encrypt(desc, &walk);
166		err = blkcipher_walk_done(desc, &walk, nbytes);
167	}
168
169	return err;
170}
171
172static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
173				  struct blkcipher_walk *walk)
174{
175	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
176	const unsigned int bsize = CAST5_BLOCK_SIZE;
177	unsigned int nbytes = walk->nbytes;
178	u64 *src = (u64 *)walk->src.virt.addr;
179	u64 *dst = (u64 *)walk->dst.virt.addr;
180	u64 last_iv;
181
182	/* Start of the last block. */
183	src += nbytes / bsize - 1;
184	dst += nbytes / bsize - 1;
185
186	last_iv = *src;
187
188	/* Process multi-block batch */
189	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
190		do {
191			nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
192			src -= CAST5_PARALLEL_BLOCKS - 1;
193			dst -= CAST5_PARALLEL_BLOCKS - 1;
194
195			cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src);
196
197			nbytes -= bsize;
198			if (nbytes < bsize)
199				goto done;
200
201			*dst ^= *(src - 1);
202			src -= 1;
203			dst -= 1;
204		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
205	}
206
207	/* Handle leftovers */
208	for (;;) {
209		__cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
210
211		nbytes -= bsize;
212		if (nbytes < bsize)
213			break;
214
215		*dst ^= *(src - 1);
216		src -= 1;
217		dst -= 1;
218	}
219
220done:
221	*dst ^= *(u64 *)walk->iv;
222	*(u64 *)walk->iv = last_iv;
223
224	return nbytes;
225}
226
227static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
228		       struct scatterlist *src, unsigned int nbytes)
229{
230	bool fpu_enabled = false;
231	struct blkcipher_walk walk;
232	int err;
233
234	blkcipher_walk_init(&walk, dst, src, nbytes);
235	err = blkcipher_walk_virt(desc, &walk);
236	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
237
238	while ((nbytes = walk.nbytes)) {
239		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
240		nbytes = __cbc_decrypt(desc, &walk);
241		err = blkcipher_walk_done(desc, &walk, nbytes);
242	}
243
244	cast5_fpu_end(fpu_enabled);
245	return err;
246}
247
248static void ctr_crypt_final(struct blkcipher_desc *desc,
249			    struct blkcipher_walk *walk)
250{
251	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
252	u8 *ctrblk = walk->iv;
253	u8 keystream[CAST5_BLOCK_SIZE];
254	u8 *src = walk->src.virt.addr;
255	u8 *dst = walk->dst.virt.addr;
256	unsigned int nbytes = walk->nbytes;
257
258	__cast5_encrypt(ctx, keystream, ctrblk);
259	crypto_xor(keystream, src, nbytes);
260	memcpy(dst, keystream, nbytes);
261
262	crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
263}
264
265static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
266				struct blkcipher_walk *walk)
267{
268	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
269	const unsigned int bsize = CAST5_BLOCK_SIZE;
270	unsigned int nbytes = walk->nbytes;
271	u64 *src = (u64 *)walk->src.virt.addr;
272	u64 *dst = (u64 *)walk->dst.virt.addr;
273
274	/* Process multi-block batch */
275	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
276		do {
277			cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src,
278					(__be64 *)walk->iv);
279
280			src += CAST5_PARALLEL_BLOCKS;
281			dst += CAST5_PARALLEL_BLOCKS;
282			nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
283		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
284
285		if (nbytes < bsize)
286			goto done;
287	}
288
289	/* Handle leftovers */
290	do {
291		u64 ctrblk;
292
293		if (dst != src)
294			*dst = *src;
295
296		ctrblk = *(u64 *)walk->iv;
297		be64_add_cpu((__be64 *)walk->iv, 1);
298
299		__cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
300		*dst ^= ctrblk;
301
302		src += 1;
303		dst += 1;
304		nbytes -= bsize;
305	} while (nbytes >= bsize);
306
307done:
308	return nbytes;
309}
310
311static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
312		     struct scatterlist *src, unsigned int nbytes)
313{
314	bool fpu_enabled = false;
315	struct blkcipher_walk walk;
316	int err;
317
318	blkcipher_walk_init(&walk, dst, src, nbytes);
319	err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE);
320	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
321
322	while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
323		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
324		nbytes = __ctr_crypt(desc, &walk);
325		err = blkcipher_walk_done(desc, &walk, nbytes);
326	}
327
328	cast5_fpu_end(fpu_enabled);
329
330	if (walk.nbytes) {
331		ctr_crypt_final(desc, &walk);
332		err = blkcipher_walk_done(desc, &walk, 0);
333	}
334
335	return err;
336}
337
338
339static struct crypto_alg cast5_algs[6] = { {
340	.cra_name		= "__ecb-cast5-avx",
341	.cra_driver_name	= "__driver-ecb-cast5-avx",
342	.cra_priority		= 0,
343	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
344				  CRYPTO_ALG_INTERNAL,
345	.cra_blocksize		= CAST5_BLOCK_SIZE,
346	.cra_ctxsize		= sizeof(struct cast5_ctx),
347	.cra_alignmask		= 0,
348	.cra_type		= &crypto_blkcipher_type,
349	.cra_module		= THIS_MODULE,
350	.cra_u = {
351		.blkcipher = {
352			.min_keysize	= CAST5_MIN_KEY_SIZE,
353			.max_keysize	= CAST5_MAX_KEY_SIZE,
354			.setkey		= cast5_setkey,
355			.encrypt	= ecb_encrypt,
356			.decrypt	= ecb_decrypt,
357		},
358	},
359}, {
360	.cra_name		= "__cbc-cast5-avx",
361	.cra_driver_name	= "__driver-cbc-cast5-avx",
362	.cra_priority		= 0,
363	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
364				  CRYPTO_ALG_INTERNAL,
365	.cra_blocksize		= CAST5_BLOCK_SIZE,
366	.cra_ctxsize		= sizeof(struct cast5_ctx),
367	.cra_alignmask		= 0,
368	.cra_type		= &crypto_blkcipher_type,
369	.cra_module		= THIS_MODULE,
370	.cra_u = {
371		.blkcipher = {
372			.min_keysize	= CAST5_MIN_KEY_SIZE,
373			.max_keysize	= CAST5_MAX_KEY_SIZE,
374			.setkey		= cast5_setkey,
375			.encrypt	= cbc_encrypt,
376			.decrypt	= cbc_decrypt,
377		},
378	},
379}, {
380	.cra_name		= "__ctr-cast5-avx",
381	.cra_driver_name	= "__driver-ctr-cast5-avx",
382	.cra_priority		= 0,
383	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
384				  CRYPTO_ALG_INTERNAL,
385	.cra_blocksize		= 1,
386	.cra_ctxsize		= sizeof(struct cast5_ctx),
387	.cra_alignmask		= 0,
388	.cra_type		= &crypto_blkcipher_type,
389	.cra_module		= THIS_MODULE,
390	.cra_u = {
391		.blkcipher = {
392			.min_keysize	= CAST5_MIN_KEY_SIZE,
393			.max_keysize	= CAST5_MAX_KEY_SIZE,
394			.ivsize		= CAST5_BLOCK_SIZE,
395			.setkey		= cast5_setkey,
396			.encrypt	= ctr_crypt,
397			.decrypt	= ctr_crypt,
398		},
399	},
400}, {
401	.cra_name		= "ecb(cast5)",
402	.cra_driver_name	= "ecb-cast5-avx",
403	.cra_priority		= 200,
404	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
405	.cra_blocksize		= CAST5_BLOCK_SIZE,
406	.cra_ctxsize		= sizeof(struct async_helper_ctx),
407	.cra_alignmask		= 0,
408	.cra_type		= &crypto_ablkcipher_type,
409	.cra_module		= THIS_MODULE,
410	.cra_init		= ablk_init,
411	.cra_exit		= ablk_exit,
412	.cra_u = {
413		.ablkcipher = {
414			.min_keysize	= CAST5_MIN_KEY_SIZE,
415			.max_keysize	= CAST5_MAX_KEY_SIZE,
416			.setkey		= ablk_set_key,
417			.encrypt	= ablk_encrypt,
418			.decrypt	= ablk_decrypt,
419		},
420	},
421}, {
422	.cra_name		= "cbc(cast5)",
423	.cra_driver_name	= "cbc-cast5-avx",
424	.cra_priority		= 200,
425	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
426	.cra_blocksize		= CAST5_BLOCK_SIZE,
427	.cra_ctxsize		= sizeof(struct async_helper_ctx),
428	.cra_alignmask		= 0,
429	.cra_type		= &crypto_ablkcipher_type,
430	.cra_module		= THIS_MODULE,
431	.cra_init		= ablk_init,
432	.cra_exit		= ablk_exit,
433	.cra_u = {
434		.ablkcipher = {
435			.min_keysize	= CAST5_MIN_KEY_SIZE,
436			.max_keysize	= CAST5_MAX_KEY_SIZE,
437			.ivsize		= CAST5_BLOCK_SIZE,
438			.setkey		= ablk_set_key,
439			.encrypt	= __ablk_encrypt,
440			.decrypt	= ablk_decrypt,
441		},
442	},
443}, {
444	.cra_name		= "ctr(cast5)",
445	.cra_driver_name	= "ctr-cast5-avx",
446	.cra_priority		= 200,
447	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
448	.cra_blocksize		= 1,
449	.cra_ctxsize		= sizeof(struct async_helper_ctx),
450	.cra_alignmask		= 0,
451	.cra_type		= &crypto_ablkcipher_type,
452	.cra_module		= THIS_MODULE,
453	.cra_init		= ablk_init,
454	.cra_exit		= ablk_exit,
455	.cra_u = {
456		.ablkcipher = {
457			.min_keysize	= CAST5_MIN_KEY_SIZE,
458			.max_keysize	= CAST5_MAX_KEY_SIZE,
459			.ivsize		= CAST5_BLOCK_SIZE,
460			.setkey		= ablk_set_key,
461			.encrypt	= ablk_encrypt,
462			.decrypt	= ablk_encrypt,
463			.geniv		= "chainiv",
464		},
465	},
466} };
467
468static int __init cast5_init(void)
469{
470	const char *feature_name;
471
472	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
473				&feature_name)) {
474		pr_info("CPU feature '%s' is not supported.\n", feature_name);
475		return -ENODEV;
476	}
477
478	return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
479}
480
481static void __exit cast5_exit(void)
482{
483	crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
484}
485
486module_init(cast5_init);
487module_exit(cast5_exit);
488
489MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
490MODULE_LICENSE("GPL");
491MODULE_ALIAS_CRYPTO("cast5");
492