1/*
2 * Glue Code for assembler optimized version of Blowfish
3 *
4 * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
7 *   Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
8 * CTR part based on code (crypto/ctr.c) by:
9 *   (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
24 * USA
25 *
26 */
27
28#include <asm/processor.h>
29#include <crypto/blowfish.h>
30#include <linux/crypto.h>
31#include <linux/init.h>
32#include <linux/module.h>
33#include <linux/types.h>
34#include <crypto/algapi.h>
35
36/* regular block cipher functions */
37asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
38				   bool xor);
39asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
40
41/* 4-way parallel cipher functions */
42asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
43					const u8 *src, bool xor);
44asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
45				      const u8 *src);
46
47static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
48{
49	__blowfish_enc_blk(ctx, dst, src, false);
50}
51
52static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
53					const u8 *src)
54{
55	__blowfish_enc_blk(ctx, dst, src, true);
56}
57
58static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
59					 const u8 *src)
60{
61	__blowfish_enc_blk_4way(ctx, dst, src, false);
62}
63
64static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
65				      const u8 *src)
66{
67	__blowfish_enc_blk_4way(ctx, dst, src, true);
68}
69
70static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
71{
72	blowfish_enc_blk(crypto_tfm_ctx(tfm), dst, src);
73}
74
75static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
76{
77	blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src);
78}
79
80static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
81		     void (*fn)(struct bf_ctx *, u8 *, const u8 *),
82		     void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *))
83{
84	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
85	unsigned int bsize = BF_BLOCK_SIZE;
86	unsigned int nbytes;
87	int err;
88
89	err = blkcipher_walk_virt(desc, walk);
90
91	while ((nbytes = walk->nbytes)) {
92		u8 *wsrc = walk->src.virt.addr;
93		u8 *wdst = walk->dst.virt.addr;
94
95		/* Process four block batch */
96		if (nbytes >= bsize * 4) {
97			do {
98				fn_4way(ctx, wdst, wsrc);
99
100				wsrc += bsize * 4;
101				wdst += bsize * 4;
102				nbytes -= bsize * 4;
103			} while (nbytes >= bsize * 4);
104
105			if (nbytes < bsize)
106				goto done;
107		}
108
109		/* Handle leftovers */
110		do {
111			fn(ctx, wdst, wsrc);
112
113			wsrc += bsize;
114			wdst += bsize;
115			nbytes -= bsize;
116		} while (nbytes >= bsize);
117
118done:
119		err = blkcipher_walk_done(desc, walk, nbytes);
120	}
121
122	return err;
123}
124
125static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
126		       struct scatterlist *src, unsigned int nbytes)
127{
128	struct blkcipher_walk walk;
129
130	blkcipher_walk_init(&walk, dst, src, nbytes);
131	return ecb_crypt(desc, &walk, blowfish_enc_blk, blowfish_enc_blk_4way);
132}
133
134static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
135		       struct scatterlist *src, unsigned int nbytes)
136{
137	struct blkcipher_walk walk;
138
139	blkcipher_walk_init(&walk, dst, src, nbytes);
140	return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way);
141}
142
143static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
144				  struct blkcipher_walk *walk)
145{
146	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
147	unsigned int bsize = BF_BLOCK_SIZE;
148	unsigned int nbytes = walk->nbytes;
149	u64 *src = (u64 *)walk->src.virt.addr;
150	u64 *dst = (u64 *)walk->dst.virt.addr;
151	u64 *iv = (u64 *)walk->iv;
152
153	do {
154		*dst = *src ^ *iv;
155		blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
156		iv = dst;
157
158		src += 1;
159		dst += 1;
160		nbytes -= bsize;
161	} while (nbytes >= bsize);
162
163	*(u64 *)walk->iv = *iv;
164	return nbytes;
165}
166
167static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
168		       struct scatterlist *src, unsigned int nbytes)
169{
170	struct blkcipher_walk walk;
171	int err;
172
173	blkcipher_walk_init(&walk, dst, src, nbytes);
174	err = blkcipher_walk_virt(desc, &walk);
175
176	while ((nbytes = walk.nbytes)) {
177		nbytes = __cbc_encrypt(desc, &walk);
178		err = blkcipher_walk_done(desc, &walk, nbytes);
179	}
180
181	return err;
182}
183
184static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
185				  struct blkcipher_walk *walk)
186{
187	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
188	unsigned int bsize = BF_BLOCK_SIZE;
189	unsigned int nbytes = walk->nbytes;
190	u64 *src = (u64 *)walk->src.virt.addr;
191	u64 *dst = (u64 *)walk->dst.virt.addr;
192	u64 ivs[4 - 1];
193	u64 last_iv;
194
195	/* Start of the last block. */
196	src += nbytes / bsize - 1;
197	dst += nbytes / bsize - 1;
198
199	last_iv = *src;
200
201	/* Process four block batch */
202	if (nbytes >= bsize * 4) {
203		do {
204			nbytes -= bsize * 4 - bsize;
205			src -= 4 - 1;
206			dst -= 4 - 1;
207
208			ivs[0] = src[0];
209			ivs[1] = src[1];
210			ivs[2] = src[2];
211
212			blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src);
213
214			dst[1] ^= ivs[0];
215			dst[2] ^= ivs[1];
216			dst[3] ^= ivs[2];
217
218			nbytes -= bsize;
219			if (nbytes < bsize)
220				goto done;
221
222			*dst ^= *(src - 1);
223			src -= 1;
224			dst -= 1;
225		} while (nbytes >= bsize * 4);
226	}
227
228	/* Handle leftovers */
229	for (;;) {
230		blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
231
232		nbytes -= bsize;
233		if (nbytes < bsize)
234			break;
235
236		*dst ^= *(src - 1);
237		src -= 1;
238		dst -= 1;
239	}
240
241done:
242	*dst ^= *(u64 *)walk->iv;
243	*(u64 *)walk->iv = last_iv;
244
245	return nbytes;
246}
247
248static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
249		       struct scatterlist *src, unsigned int nbytes)
250{
251	struct blkcipher_walk walk;
252	int err;
253
254	blkcipher_walk_init(&walk, dst, src, nbytes);
255	err = blkcipher_walk_virt(desc, &walk);
256
257	while ((nbytes = walk.nbytes)) {
258		nbytes = __cbc_decrypt(desc, &walk);
259		err = blkcipher_walk_done(desc, &walk, nbytes);
260	}
261
262	return err;
263}
264
265static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
266{
267	u8 *ctrblk = walk->iv;
268	u8 keystream[BF_BLOCK_SIZE];
269	u8 *src = walk->src.virt.addr;
270	u8 *dst = walk->dst.virt.addr;
271	unsigned int nbytes = walk->nbytes;
272
273	blowfish_enc_blk(ctx, keystream, ctrblk);
274	crypto_xor(keystream, src, nbytes);
275	memcpy(dst, keystream, nbytes);
276
277	crypto_inc(ctrblk, BF_BLOCK_SIZE);
278}
279
280static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
281				struct blkcipher_walk *walk)
282{
283	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
284	unsigned int bsize = BF_BLOCK_SIZE;
285	unsigned int nbytes = walk->nbytes;
286	u64 *src = (u64 *)walk->src.virt.addr;
287	u64 *dst = (u64 *)walk->dst.virt.addr;
288	u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
289	__be64 ctrblocks[4];
290
291	/* Process four block batch */
292	if (nbytes >= bsize * 4) {
293		do {
294			if (dst != src) {
295				dst[0] = src[0];
296				dst[1] = src[1];
297				dst[2] = src[2];
298				dst[3] = src[3];
299			}
300
301			/* create ctrblks for parallel encrypt */
302			ctrblocks[0] = cpu_to_be64(ctrblk++);
303			ctrblocks[1] = cpu_to_be64(ctrblk++);
304			ctrblocks[2] = cpu_to_be64(ctrblk++);
305			ctrblocks[3] = cpu_to_be64(ctrblk++);
306
307			blowfish_enc_blk_xor_4way(ctx, (u8 *)dst,
308						  (u8 *)ctrblocks);
309
310			src += 4;
311			dst += 4;
312		} while ((nbytes -= bsize * 4) >= bsize * 4);
313
314		if (nbytes < bsize)
315			goto done;
316	}
317
318	/* Handle leftovers */
319	do {
320		if (dst != src)
321			*dst = *src;
322
323		ctrblocks[0] = cpu_to_be64(ctrblk++);
324
325		blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks);
326
327		src += 1;
328		dst += 1;
329	} while ((nbytes -= bsize) >= bsize);
330
331done:
332	*(__be64 *)walk->iv = cpu_to_be64(ctrblk);
333	return nbytes;
334}
335
336static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
337		     struct scatterlist *src, unsigned int nbytes)
338{
339	struct blkcipher_walk walk;
340	int err;
341
342	blkcipher_walk_init(&walk, dst, src, nbytes);
343	err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE);
344
345	while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) {
346		nbytes = __ctr_crypt(desc, &walk);
347		err = blkcipher_walk_done(desc, &walk, nbytes);
348	}
349
350	if (walk.nbytes) {
351		ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk);
352		err = blkcipher_walk_done(desc, &walk, 0);
353	}
354
355	return err;
356}
357
358static struct crypto_alg bf_algs[4] = { {
359	.cra_name		= "blowfish",
360	.cra_driver_name	= "blowfish-asm",
361	.cra_priority		= 200,
362	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
363	.cra_blocksize		= BF_BLOCK_SIZE,
364	.cra_ctxsize		= sizeof(struct bf_ctx),
365	.cra_alignmask		= 0,
366	.cra_module		= THIS_MODULE,
367	.cra_u = {
368		.cipher = {
369			.cia_min_keysize	= BF_MIN_KEY_SIZE,
370			.cia_max_keysize	= BF_MAX_KEY_SIZE,
371			.cia_setkey		= blowfish_setkey,
372			.cia_encrypt		= blowfish_encrypt,
373			.cia_decrypt		= blowfish_decrypt,
374		}
375	}
376}, {
377	.cra_name		= "ecb(blowfish)",
378	.cra_driver_name	= "ecb-blowfish-asm",
379	.cra_priority		= 300,
380	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
381	.cra_blocksize		= BF_BLOCK_SIZE,
382	.cra_ctxsize		= sizeof(struct bf_ctx),
383	.cra_alignmask		= 0,
384	.cra_type		= &crypto_blkcipher_type,
385	.cra_module		= THIS_MODULE,
386	.cra_u = {
387		.blkcipher = {
388			.min_keysize	= BF_MIN_KEY_SIZE,
389			.max_keysize	= BF_MAX_KEY_SIZE,
390			.setkey		= blowfish_setkey,
391			.encrypt	= ecb_encrypt,
392			.decrypt	= ecb_decrypt,
393		},
394	},
395}, {
396	.cra_name		= "cbc(blowfish)",
397	.cra_driver_name	= "cbc-blowfish-asm",
398	.cra_priority		= 300,
399	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
400	.cra_blocksize		= BF_BLOCK_SIZE,
401	.cra_ctxsize		= sizeof(struct bf_ctx),
402	.cra_alignmask		= 0,
403	.cra_type		= &crypto_blkcipher_type,
404	.cra_module		= THIS_MODULE,
405	.cra_u = {
406		.blkcipher = {
407			.min_keysize	= BF_MIN_KEY_SIZE,
408			.max_keysize	= BF_MAX_KEY_SIZE,
409			.ivsize		= BF_BLOCK_SIZE,
410			.setkey		= blowfish_setkey,
411			.encrypt	= cbc_encrypt,
412			.decrypt	= cbc_decrypt,
413		},
414	},
415}, {
416	.cra_name		= "ctr(blowfish)",
417	.cra_driver_name	= "ctr-blowfish-asm",
418	.cra_priority		= 300,
419	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
420	.cra_blocksize		= 1,
421	.cra_ctxsize		= sizeof(struct bf_ctx),
422	.cra_alignmask		= 0,
423	.cra_type		= &crypto_blkcipher_type,
424	.cra_module		= THIS_MODULE,
425	.cra_u = {
426		.blkcipher = {
427			.min_keysize	= BF_MIN_KEY_SIZE,
428			.max_keysize	= BF_MAX_KEY_SIZE,
429			.ivsize		= BF_BLOCK_SIZE,
430			.setkey		= blowfish_setkey,
431			.encrypt	= ctr_crypt,
432			.decrypt	= ctr_crypt,
433		},
434	},
435} };
436
437static bool is_blacklisted_cpu(void)
438{
439	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
440		return false;
441
442	if (boot_cpu_data.x86 == 0x0f) {
443		/*
444		 * On Pentium 4, blowfish-x86_64 is slower than generic C
445		 * implementation because use of 64bit rotates (which are really
446		 * slow on P4). Therefore blacklist P4s.
447		 */
448		return true;
449	}
450
451	return false;
452}
453
454static int force;
455module_param(force, int, 0);
456MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
457
458static int __init init(void)
459{
460	if (!force && is_blacklisted_cpu()) {
461		printk(KERN_INFO
462			"blowfish-x86_64: performance on this CPU "
463			"would be suboptimal: disabling "
464			"blowfish-x86_64.\n");
465		return -ENODEV;
466	}
467
468	return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs));
469}
470
471static void __exit fini(void)
472{
473	crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs));
474}
475
476module_init(init);
477module_exit(fini);
478
479MODULE_LICENSE("GPL");
480MODULE_DESCRIPTION("Blowfish Cipher Algorithm, asm optimized");
481MODULE_ALIAS_CRYPTO("blowfish");
482MODULE_ALIAS_CRYPTO("blowfish-asm");
483