1/*
2 * Glue Code for 3-way parallel assembler optimized version of Twofish
3 *
4 * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
19 * USA
20 *
21 */
22
23#include <asm/processor.h>
24#include <linux/crypto.h>
25#include <linux/init.h>
26#include <linux/module.h>
27#include <linux/types.h>
28#include <crypto/algapi.h>
29#include <crypto/twofish.h>
30#include <crypto/b128ops.h>
31#include <asm/crypto/twofish.h>
32#include <asm/crypto/glue_helper.h>
33#include <crypto/lrw.h>
34#include <crypto/xts.h>
35
36EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way);
37EXPORT_SYMBOL_GPL(twofish_dec_blk_3way);
38
39static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
40					const u8 *src)
41{
42	__twofish_enc_blk_3way(ctx, dst, src, false);
43}
44
45static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst,
46					    const u8 *src)
47{
48	__twofish_enc_blk_3way(ctx, dst, src, true);
49}
50
51void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src)
52{
53	u128 ivs[2];
54
55	ivs[0] = src[0];
56	ivs[1] = src[1];
57
58	twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
59
60	u128_xor(&dst[1], &dst[1], &ivs[0]);
61	u128_xor(&dst[2], &dst[2], &ivs[1]);
62}
63EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way);
64
65void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
66{
67	be128 ctrblk;
68
69	if (dst != src)
70		*dst = *src;
71
72	le128_to_be128(&ctrblk, iv);
73	le128_inc(iv);
74
75	twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
76	u128_xor(dst, dst, (u128 *)&ctrblk);
77}
78EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr);
79
80void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
81			      le128 *iv)
82{
83	be128 ctrblks[3];
84
85	if (dst != src) {
86		dst[0] = src[0];
87		dst[1] = src[1];
88		dst[2] = src[2];
89	}
90
91	le128_to_be128(&ctrblks[0], iv);
92	le128_inc(iv);
93	le128_to_be128(&ctrblks[1], iv);
94	le128_inc(iv);
95	le128_to_be128(&ctrblks[2], iv);
96	le128_inc(iv);
97
98	twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks);
99}
100EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr_3way);
101
102static const struct common_glue_ctx twofish_enc = {
103	.num_funcs = 2,
104	.fpu_blocks_limit = -1,
105
106	.funcs = { {
107		.num_blocks = 3,
108		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
109	}, {
110		.num_blocks = 1,
111		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
112	} }
113};
114
115static const struct common_glue_ctx twofish_ctr = {
116	.num_funcs = 2,
117	.fpu_blocks_limit = -1,
118
119	.funcs = { {
120		.num_blocks = 3,
121		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr_3way) }
122	}, {
123		.num_blocks = 1,
124		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr) }
125	} }
126};
127
128static const struct common_glue_ctx twofish_dec = {
129	.num_funcs = 2,
130	.fpu_blocks_limit = -1,
131
132	.funcs = { {
133		.num_blocks = 3,
134		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
135	}, {
136		.num_blocks = 1,
137		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
138	} }
139};
140
141static const struct common_glue_ctx twofish_dec_cbc = {
142	.num_funcs = 2,
143	.fpu_blocks_limit = -1,
144
145	.funcs = { {
146		.num_blocks = 3,
147		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
148	}, {
149		.num_blocks = 1,
150		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
151	} }
152};
153
154static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
155		       struct scatterlist *src, unsigned int nbytes)
156{
157	return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes);
158}
159
160static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
161		       struct scatterlist *src, unsigned int nbytes)
162{
163	return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes);
164}
165
166static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
167		       struct scatterlist *src, unsigned int nbytes)
168{
169	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc,
170				       dst, src, nbytes);
171}
172
173static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
174		       struct scatterlist *src, unsigned int nbytes)
175{
176	return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src,
177				       nbytes);
178}
179
180static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
181		     struct scatterlist *src, unsigned int nbytes)
182{
183	return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes);
184}
185
186static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
187{
188	const unsigned int bsize = TF_BLOCK_SIZE;
189	struct twofish_ctx *ctx = priv;
190	int i;
191
192	if (nbytes == 3 * bsize) {
193		twofish_enc_blk_3way(ctx, srcdst, srcdst);
194		return;
195	}
196
197	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
198		twofish_enc_blk(ctx, srcdst, srcdst);
199}
200
201static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
202{
203	const unsigned int bsize = TF_BLOCK_SIZE;
204	struct twofish_ctx *ctx = priv;
205	int i;
206
207	if (nbytes == 3 * bsize) {
208		twofish_dec_blk_3way(ctx, srcdst, srcdst);
209		return;
210	}
211
212	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
213		twofish_dec_blk(ctx, srcdst, srcdst);
214}
215
216int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
217		       unsigned int keylen)
218{
219	struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
220	int err;
221
222	err = __twofish_setkey(&ctx->twofish_ctx, key, keylen - TF_BLOCK_SIZE,
223			       &tfm->crt_flags);
224	if (err)
225		return err;
226
227	return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE);
228}
229EXPORT_SYMBOL_GPL(lrw_twofish_setkey);
230
231static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
232		       struct scatterlist *src, unsigned int nbytes)
233{
234	struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
235	be128 buf[3];
236	struct lrw_crypt_req req = {
237		.tbuf = buf,
238		.tbuflen = sizeof(buf),
239
240		.table_ctx = &ctx->lrw_table,
241		.crypt_ctx = &ctx->twofish_ctx,
242		.crypt_fn = encrypt_callback,
243	};
244
245	return lrw_crypt(desc, dst, src, nbytes, &req);
246}
247
248static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
249		       struct scatterlist *src, unsigned int nbytes)
250{
251	struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
252	be128 buf[3];
253	struct lrw_crypt_req req = {
254		.tbuf = buf,
255		.tbuflen = sizeof(buf),
256
257		.table_ctx = &ctx->lrw_table,
258		.crypt_ctx = &ctx->twofish_ctx,
259		.crypt_fn = decrypt_callback,
260	};
261
262	return lrw_crypt(desc, dst, src, nbytes, &req);
263}
264
265void lrw_twofish_exit_tfm(struct crypto_tfm *tfm)
266{
267	struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
268
269	lrw_free_table(&ctx->lrw_table);
270}
271EXPORT_SYMBOL_GPL(lrw_twofish_exit_tfm);
272
273int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
274		       unsigned int keylen)
275{
276	struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm);
277	u32 *flags = &tfm->crt_flags;
278	int err;
279
280	/* key consists of keys of equal size concatenated, therefore
281	 * the length must be even
282	 */
283	if (keylen % 2) {
284		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
285		return -EINVAL;
286	}
287
288	/* first half of xts-key is for crypt */
289	err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
290	if (err)
291		return err;
292
293	/* second half of xts-key is for tweak */
294	return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
295				flags);
296}
297EXPORT_SYMBOL_GPL(xts_twofish_setkey);
298
299static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
300		       struct scatterlist *src, unsigned int nbytes)
301{
302	struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
303	be128 buf[3];
304	struct xts_crypt_req req = {
305		.tbuf = buf,
306		.tbuflen = sizeof(buf),
307
308		.tweak_ctx = &ctx->tweak_ctx,
309		.tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
310		.crypt_ctx = &ctx->crypt_ctx,
311		.crypt_fn = encrypt_callback,
312	};
313
314	return xts_crypt(desc, dst, src, nbytes, &req);
315}
316
317static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
318		       struct scatterlist *src, unsigned int nbytes)
319{
320	struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
321	be128 buf[3];
322	struct xts_crypt_req req = {
323		.tbuf = buf,
324		.tbuflen = sizeof(buf),
325
326		.tweak_ctx = &ctx->tweak_ctx,
327		.tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
328		.crypt_ctx = &ctx->crypt_ctx,
329		.crypt_fn = decrypt_callback,
330	};
331
332	return xts_crypt(desc, dst, src, nbytes, &req);
333}
334
335static struct crypto_alg tf_algs[5] = { {
336	.cra_name		= "ecb(twofish)",
337	.cra_driver_name	= "ecb-twofish-3way",
338	.cra_priority		= 300,
339	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
340	.cra_blocksize		= TF_BLOCK_SIZE,
341	.cra_ctxsize		= sizeof(struct twofish_ctx),
342	.cra_alignmask		= 0,
343	.cra_type		= &crypto_blkcipher_type,
344	.cra_module		= THIS_MODULE,
345	.cra_u = {
346		.blkcipher = {
347			.min_keysize	= TF_MIN_KEY_SIZE,
348			.max_keysize	= TF_MAX_KEY_SIZE,
349			.setkey		= twofish_setkey,
350			.encrypt	= ecb_encrypt,
351			.decrypt	= ecb_decrypt,
352		},
353	},
354}, {
355	.cra_name		= "cbc(twofish)",
356	.cra_driver_name	= "cbc-twofish-3way",
357	.cra_priority		= 300,
358	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
359	.cra_blocksize		= TF_BLOCK_SIZE,
360	.cra_ctxsize		= sizeof(struct twofish_ctx),
361	.cra_alignmask		= 0,
362	.cra_type		= &crypto_blkcipher_type,
363	.cra_module		= THIS_MODULE,
364	.cra_u = {
365		.blkcipher = {
366			.min_keysize	= TF_MIN_KEY_SIZE,
367			.max_keysize	= TF_MAX_KEY_SIZE,
368			.ivsize		= TF_BLOCK_SIZE,
369			.setkey		= twofish_setkey,
370			.encrypt	= cbc_encrypt,
371			.decrypt	= cbc_decrypt,
372		},
373	},
374}, {
375	.cra_name		= "ctr(twofish)",
376	.cra_driver_name	= "ctr-twofish-3way",
377	.cra_priority		= 300,
378	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
379	.cra_blocksize		= 1,
380	.cra_ctxsize		= sizeof(struct twofish_ctx),
381	.cra_alignmask		= 0,
382	.cra_type		= &crypto_blkcipher_type,
383	.cra_module		= THIS_MODULE,
384	.cra_u = {
385		.blkcipher = {
386			.min_keysize	= TF_MIN_KEY_SIZE,
387			.max_keysize	= TF_MAX_KEY_SIZE,
388			.ivsize		= TF_BLOCK_SIZE,
389			.setkey		= twofish_setkey,
390			.encrypt	= ctr_crypt,
391			.decrypt	= ctr_crypt,
392		},
393	},
394}, {
395	.cra_name		= "lrw(twofish)",
396	.cra_driver_name	= "lrw-twofish-3way",
397	.cra_priority		= 300,
398	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
399	.cra_blocksize		= TF_BLOCK_SIZE,
400	.cra_ctxsize		= sizeof(struct twofish_lrw_ctx),
401	.cra_alignmask		= 0,
402	.cra_type		= &crypto_blkcipher_type,
403	.cra_module		= THIS_MODULE,
404	.cra_exit		= lrw_twofish_exit_tfm,
405	.cra_u = {
406		.blkcipher = {
407			.min_keysize	= TF_MIN_KEY_SIZE + TF_BLOCK_SIZE,
408			.max_keysize	= TF_MAX_KEY_SIZE + TF_BLOCK_SIZE,
409			.ivsize		= TF_BLOCK_SIZE,
410			.setkey		= lrw_twofish_setkey,
411			.encrypt	= lrw_encrypt,
412			.decrypt	= lrw_decrypt,
413		},
414	},
415}, {
416	.cra_name		= "xts(twofish)",
417	.cra_driver_name	= "xts-twofish-3way",
418	.cra_priority		= 300,
419	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
420	.cra_blocksize		= TF_BLOCK_SIZE,
421	.cra_ctxsize		= sizeof(struct twofish_xts_ctx),
422	.cra_alignmask		= 0,
423	.cra_type		= &crypto_blkcipher_type,
424	.cra_module		= THIS_MODULE,
425	.cra_u = {
426		.blkcipher = {
427			.min_keysize	= TF_MIN_KEY_SIZE * 2,
428			.max_keysize	= TF_MAX_KEY_SIZE * 2,
429			.ivsize		= TF_BLOCK_SIZE,
430			.setkey		= xts_twofish_setkey,
431			.encrypt	= xts_encrypt,
432			.decrypt	= xts_decrypt,
433		},
434	},
435} };
436
437static bool is_blacklisted_cpu(void)
438{
439	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
440		return false;
441
442	if (boot_cpu_data.x86 == 0x06 &&
443		(boot_cpu_data.x86_model == 0x1c ||
444		 boot_cpu_data.x86_model == 0x26 ||
445		 boot_cpu_data.x86_model == 0x36)) {
446		/*
447		 * On Atom, twofish-3way is slower than original assembler
448		 * implementation. Twofish-3way trades off some performance in
449		 * storing blocks in 64bit registers to allow three blocks to
450		 * be processed parallel. Parallel operation then allows gaining
451		 * more performance than was trade off, on out-of-order CPUs.
452		 * However Atom does not benefit from this parallellism and
453		 * should be blacklisted.
454		 */
455		return true;
456	}
457
458	if (boot_cpu_data.x86 == 0x0f) {
459		/*
460		 * On Pentium 4, twofish-3way is slower than original assembler
461		 * implementation because excessive uses of 64bit rotate and
462		 * left-shifts (which are really slow on P4) needed to store and
463		 * handle 128bit block in two 64bit registers.
464		 */
465		return true;
466	}
467
468	return false;
469}
470
471static int force;
472module_param(force, int, 0);
473MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
474
475static int __init init(void)
476{
477	if (!force && is_blacklisted_cpu()) {
478		printk(KERN_INFO
479			"twofish-x86_64-3way: performance on this CPU "
480			"would be suboptimal: disabling "
481			"twofish-x86_64-3way.\n");
482		return -ENODEV;
483	}
484
485	return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs));
486}
487
488static void __exit fini(void)
489{
490	crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs));
491}
492
493module_init(init);
494module_exit(fini);
495
496MODULE_LICENSE("GPL");
497MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized");
498MODULE_ALIAS_CRYPTO("twofish");
499MODULE_ALIAS_CRYPTO("twofish-asm");
500