1#include "threefish_api.h"
2
3void threefish_encrypt_256(struct threefish_key *key_ctx, u64 *input,
4			   u64 *output)
5{
6	u64 b0 = input[0], b1 = input[1],
7	    b2 = input[2], b3 = input[3];
8	u64 k0 = key_ctx->key[0], k1 = key_ctx->key[1],
9	    k2 = key_ctx->key[2], k3 = key_ctx->key[3],
10	    k4 = key_ctx->key[4];
11	u64 t0 = key_ctx->tweak[0], t1 = key_ctx->tweak[1],
12	    t2 = key_ctx->tweak[2];
13
14	b1 += k1 + t0;
15	b0 += b1 + k0;
16	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
17
18	b3 += k3;
19	b2 += b3 + k2 + t1;
20	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
21
22	b0 += b3;
23	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
24
25	b2 += b1;
26	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
27
28	b0 += b1;
29	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
30
31	b2 += b3;
32	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
33
34	b0 += b3;
35	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
36
37	b2 += b1;
38	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
39
40	b1 += k2 + t1;
41	b0 += b1 + k1;
42	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
43
44	b3 += k4 + 1;
45	b2 += b3 + k3 + t2;
46	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
47
48	b0 += b3;
49	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
50
51	b2 += b1;
52	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
53
54	b0 += b1;
55	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
56
57	b2 += b3;
58	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
59
60	b0 += b3;
61	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
62
63	b2 += b1;
64	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
65
66
67	b1 += k3 + t2;
68	b0 += b1 + k2;
69	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
70
71	b3 += k0 + 2;
72	b2 += b3 + k4 + t0;
73	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
74
75	b0 += b3;
76	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
77
78	b2 += b1;
79	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
80
81	b0 += b1;
82	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
83
84	b2 += b3;
85	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
86
87	b0 += b3;
88	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
89
90	b2 += b1;
91	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
92
93	b1 += k4 + t0;
94	b0 += b1 + k3;
95	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
96
97	b3 += k1 + 3;
98	b2 += b3 + k0 + t1;
99	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
100
101	b0 += b3;
102	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
103
104	b2 += b1;
105	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
106
107	b0 += b1;
108	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
109
110	b2 += b3;
111	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
112
113	b0 += b3;
114	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
115
116	b2 += b1;
117	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
118
119
120	b1 += k0 + t1;
121	b0 += b1 + k4;
122	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
123
124	b3 += k2 + 4;
125	b2 += b3 + k1 + t2;
126	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
127
128	b0 += b3;
129	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
130
131	b2 += b1;
132	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
133
134	b0 += b1;
135	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
136
137	b2 += b3;
138	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
139
140	b0 += b3;
141	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
142
143	b2 += b1;
144	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
145
146	b1 += k1 + t2;
147	b0 += b1 + k0;
148	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
149
150	b3 += k3 + 5;
151	b2 += b3 + k2 + t0;
152	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
153
154	b0 += b3;
155	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
156
157	b2 += b1;
158	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
159
160	b0 += b1;
161	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
162
163	b2 += b3;
164	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
165
166	b0 += b3;
167	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
168
169	b2 += b1;
170	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
171
172
173	b1 += k2 + t0;
174	b0 += b1 + k1;
175	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
176
177	b3 += k4 + 6;
178	b2 += b3 + k3 + t1;
179	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
180
181	b0 += b3;
182	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
183
184	b2 += b1;
185	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
186
187	b0 += b1;
188	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
189
190	b2 += b3;
191	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
192
193	b0 += b3;
194	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
195
196	b2 += b1;
197	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
198
199	b1 += k3 + t1;
200	b0 += b1 + k2;
201	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
202
203	b3 += k0 + 7;
204	b2 += b3 + k4 + t2;
205	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
206
207	b0 += b3;
208	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
209
210	b2 += b1;
211	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
212
213	b0 += b1;
214	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
215
216	b2 += b3;
217	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
218
219	b0 += b3;
220	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
221
222	b2 += b1;
223	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
224
225
226	b1 += k4 + t2;
227	b0 += b1 + k3;
228	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
229
230	b3 += k1 + 8;
231	b2 += b3 + k0 + t0;
232	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
233
234	b0 += b3;
235	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
236
237	b2 += b1;
238	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
239
240	b0 += b1;
241	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
242
243	b2 += b3;
244	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
245
246	b0 += b3;
247	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
248
249	b2 += b1;
250	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
251
252	b1 += k0 + t0;
253	b0 += b1 + k4;
254	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
255
256	b3 += k2 + 9;
257	b2 += b3 + k1 + t1;
258	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
259
260	b0 += b3;
261	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
262
263	b2 += b1;
264	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
265
266	b0 += b1;
267	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
268
269	b2 += b3;
270	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
271
272	b0 += b3;
273	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
274
275	b2 += b1;
276	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
277
278
279	b1 += k1 + t1;
280	b0 += b1 + k0;
281	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
282
283	b3 += k3 + 10;
284	b2 += b3 + k2 + t2;
285	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
286
287	b0 += b3;
288	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
289
290	b2 += b1;
291	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
292
293	b0 += b1;
294	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
295
296	b2 += b3;
297	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
298
299	b0 += b3;
300	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
301
302	b2 += b1;
303	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
304
305	b1 += k2 + t2;
306	b0 += b1 + k1;
307	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
308
309	b3 += k4 + 11;
310	b2 += b3 + k3 + t0;
311	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
312
313	b0 += b3;
314	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
315
316	b2 += b1;
317	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
318
319	b0 += b1;
320	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
321
322	b2 += b3;
323	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
324
325	b0 += b3;
326	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
327
328	b2 += b1;
329	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
330
331
332	b1 += k3 + t0;
333	b0 += b1 + k2;
334	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
335
336	b3 += k0 + 12;
337	b2 += b3 + k4 + t1;
338	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
339
340	b0 += b3;
341	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
342
343	b2 += b1;
344	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
345
346	b0 += b1;
347	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
348
349	b2 += b3;
350	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
351
352	b0 += b3;
353	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
354
355	b2 += b1;
356	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
357
358	b1 += k4 + t1;
359	b0 += b1 + k3;
360	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
361
362	b3 += k1 + 13;
363	b2 += b3 + k0 + t2;
364	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
365
366	b0 += b3;
367	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
368
369	b2 += b1;
370	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
371
372	b0 += b1;
373	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
374
375	b2 += b3;
376	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
377
378	b0 += b3;
379	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
380
381	b2 += b1;
382	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
383
384
385	b1 += k0 + t2;
386	b0 += b1 + k4;
387	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
388
389	b3 += k2 + 14;
390	b2 += b3 + k1 + t0;
391	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
392
393	b0 += b3;
394	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
395
396	b2 += b1;
397	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
398
399	b0 += b1;
400	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
401
402	b2 += b3;
403	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
404
405	b0 += b3;
406	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
407
408	b2 += b1;
409	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
410
411	b1 += k1 + t0;
412	b0 += b1 + k0;
413	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
414
415	b3 += k3 + 15;
416	b2 += b3 + k2 + t1;
417	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
418
419	b0 += b3;
420	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
421
422	b2 += b1;
423	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
424
425	b0 += b1;
426	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
427
428	b2 += b3;
429	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
430
431	b0 += b3;
432	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
433
434	b2 += b1;
435	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
436
437
438	b1 += k2 + t1;
439	b0 += b1 + k1;
440	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
441
442	b3 += k4 + 16;
443	b2 += b3 + k3 + t2;
444	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
445
446	b0 += b3;
447	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
448
449	b2 += b1;
450	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
451
452	b0 += b1;
453	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
454
455	b2 += b3;
456	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
457
458	b0 += b3;
459	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
460
461	b2 += b1;
462	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
463
464	b1 += k3 + t2;
465	b0 += b1 + k2;
466	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
467
468	b3 += k0 + 17;
469	b2 += b3 + k4 + t0;
470	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
471
472	b0 += b3;
473	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
474
475	b2 += b1;
476	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
477
478	b0 += b1;
479	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
480
481	b2 += b3;
482	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
483
484	b0 += b3;
485	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
486
487	b2 += b1;
488	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
489
490	output[0] = b0 + k3;
491	output[1] = b1 + k4 + t0;
492	output[2] = b2 + k0 + t1;
493	output[3] = b3 + k1 + 18;
494}
495
496void threefish_decrypt_256(struct threefish_key *key_ctx, u64 *input,
497			   u64 *output)
498{
499	u64 b0 = input[0], b1 = input[1],
500	    b2 = input[2], b3 = input[3];
501	u64 k0 = key_ctx->key[0], k1 = key_ctx->key[1],
502	    k2 = key_ctx->key[2], k3 = key_ctx->key[3],
503	    k4 = key_ctx->key[4];
504	u64 t0 = key_ctx->tweak[0], t1 = key_ctx->tweak[1],
505	    t2 = key_ctx->tweak[2];
506
507	u64 tmp;
508
509	b0 -= k3;
510	b1 -= k4 + t0;
511	b2 -= k0 + t1;
512	b3 -= k1 + 18;
513	tmp = b3 ^ b0;
514	b3 = (tmp >> 32) | (tmp << (64 - 32));
515	b0 -= b3;
516
517	tmp = b1 ^ b2;
518	b1 = (tmp >> 32) | (tmp << (64 - 32));
519	b2 -= b1;
520
521	tmp = b1 ^ b0;
522	b1 = (tmp >> 58) | (tmp << (64 - 58));
523	b0 -= b1;
524
525	tmp = b3 ^ b2;
526	b3 = (tmp >> 22) | (tmp << (64 - 22));
527	b2 -= b3;
528
529	tmp = b3 ^ b0;
530	b3 = (tmp >> 46) | (tmp << (64 - 46));
531	b0 -= b3;
532
533	tmp = b1 ^ b2;
534	b1 = (tmp >> 12) | (tmp << (64 - 12));
535	b2 -= b1;
536
537	tmp = b1 ^ b0;
538	b1 = (tmp >> 25) | (tmp << (64 - 25));
539	b0 -= b1 + k2;
540	b1 -= k3 + t2;
541
542	tmp = b3 ^ b2;
543	b3 = (tmp >> 33) | (tmp << (64 - 33));
544	b2 -= b3 + k4 + t0;
545	b3 -= k0 + 17;
546
547	tmp = b3 ^ b0;
548	b3 = (tmp >> 5) | (tmp << (64 - 5));
549	b0 -= b3;
550
551	tmp = b1 ^ b2;
552	b1 = (tmp >> 37) | (tmp << (64 - 37));
553	b2 -= b1;
554
555	tmp = b1 ^ b0;
556	b1 = (tmp >> 23) | (tmp << (64 - 23));
557	b0 -= b1;
558
559	tmp = b3 ^ b2;
560	b3 = (tmp >> 40) | (tmp << (64 - 40));
561	b2 -= b3;
562
563	tmp = b3 ^ b0;
564	b3 = (tmp >> 52) | (tmp << (64 - 52));
565	b0 -= b3;
566
567	tmp = b1 ^ b2;
568	b1 = (tmp >> 57) | (tmp << (64 - 57));
569	b2 -= b1;
570
571	tmp = b1 ^ b0;
572	b1 = (tmp >> 14) | (tmp << (64 - 14));
573	b0 -= b1 + k1;
574	b1 -= k2 + t1;
575
576	tmp = b3 ^ b2;
577	b3 = (tmp >> 16) | (tmp << (64 - 16));
578	b2 -= b3 + k3 + t2;
579	b3 -= k4 + 16;
580
581
582	tmp = b3 ^ b0;
583	b3 = (tmp >> 32) | (tmp << (64 - 32));
584	b0 -= b3;
585
586	tmp = b1 ^ b2;
587	b1 = (tmp >> 32) | (tmp << (64 - 32));
588	b2 -= b1;
589
590	tmp = b1 ^ b0;
591	b1 = (tmp >> 58) | (tmp << (64 - 58));
592	b0 -= b1;
593
594	tmp = b3 ^ b2;
595	b3 = (tmp >> 22) | (tmp << (64 - 22));
596	b2 -= b3;
597
598	tmp = b3 ^ b0;
599	b3 = (tmp >> 46) | (tmp << (64 - 46));
600	b0 -= b3;
601
602	tmp = b1 ^ b2;
603	b1 = (tmp >> 12) | (tmp << (64 - 12));
604	b2 -= b1;
605
606	tmp = b1 ^ b0;
607	b1 = (tmp >> 25) | (tmp << (64 - 25));
608	b0 -= b1 + k0;
609	b1 -= k1 + t0;
610
611	tmp = b3 ^ b2;
612	b3 = (tmp >> 33) | (tmp << (64 - 33));
613	b2 -= b3 + k2 + t1;
614	b3 -= k3 + 15;
615
616	tmp = b3 ^ b0;
617	b3 = (tmp >> 5) | (tmp << (64 - 5));
618	b0 -= b3;
619
620	tmp = b1 ^ b2;
621	b1 = (tmp >> 37) | (tmp << (64 - 37));
622	b2 -= b1;
623
624	tmp = b1 ^ b0;
625	b1 = (tmp >> 23) | (tmp << (64 - 23));
626	b0 -= b1;
627
628	tmp = b3 ^ b2;
629	b3 = (tmp >> 40) | (tmp << (64 - 40));
630	b2 -= b3;
631
632	tmp = b3 ^ b0;
633	b3 = (tmp >> 52) | (tmp << (64 - 52));
634	b0 -= b3;
635
636	tmp = b1 ^ b2;
637	b1 = (tmp >> 57) | (tmp << (64 - 57));
638	b2 -= b1;
639
640	tmp = b1 ^ b0;
641	b1 = (tmp >> 14) | (tmp << (64 - 14));
642	b0 -= b1 + k4;
643	b1 -= k0 + t2;
644
645	tmp = b3 ^ b2;
646	b3 = (tmp >> 16) | (tmp << (64 - 16));
647	b2 -= b3 + k1 + t0;
648	b3 -= k2 + 14;
649
650
651	tmp = b3 ^ b0;
652	b3 = (tmp >> 32) | (tmp << (64 - 32));
653	b0 -= b3;
654
655	tmp = b1 ^ b2;
656	b1 = (tmp >> 32) | (tmp << (64 - 32));
657	b2 -= b1;
658
659	tmp = b1 ^ b0;
660	b1 = (tmp >> 58) | (tmp << (64 - 58));
661	b0 -= b1;
662
663	tmp = b3 ^ b2;
664	b3 = (tmp >> 22) | (tmp << (64 - 22));
665	b2 -= b3;
666
667	tmp = b3 ^ b0;
668	b3 = (tmp >> 46) | (tmp << (64 - 46));
669	b0 -= b3;
670
671	tmp = b1 ^ b2;
672	b1 = (tmp >> 12) | (tmp << (64 - 12));
673	b2 -= b1;
674
675	tmp = b1 ^ b0;
676	b1 = (tmp >> 25) | (tmp << (64 - 25));
677	b0 -= b1 + k3;
678	b1 -= k4 + t1;
679
680	tmp = b3 ^ b2;
681	b3 = (tmp >> 33) | (tmp << (64 - 33));
682	b2 -= b3 + k0 + t2;
683	b3 -= k1 + 13;
684
685	tmp = b3 ^ b0;
686	b3 = (tmp >> 5) | (tmp << (64 - 5));
687	b0 -= b3;
688
689	tmp = b1 ^ b2;
690	b1 = (tmp >> 37) | (tmp << (64 - 37));
691	b2 -= b1;
692
693	tmp = b1 ^ b0;
694	b1 = (tmp >> 23) | (tmp << (64 - 23));
695	b0 -= b1;
696
697	tmp = b3 ^ b2;
698	b3 = (tmp >> 40) | (tmp << (64 - 40));
699	b2 -= b3;
700
701	tmp = b3 ^ b0;
702	b3 = (tmp >> 52) | (tmp << (64 - 52));
703	b0 -= b3;
704
705	tmp = b1 ^ b2;
706	b1 = (tmp >> 57) | (tmp << (64 - 57));
707	b2 -= b1;
708
709	tmp = b1 ^ b0;
710	b1 = (tmp >> 14) | (tmp << (64 - 14));
711	b0 -= b1 + k2;
712	b1 -= k3 + t0;
713
714	tmp = b3 ^ b2;
715	b3 = (tmp >> 16) | (tmp << (64 - 16));
716	b2 -= b3 + k4 + t1;
717	b3 -= k0 + 12;
718
719
720	tmp = b3 ^ b0;
721	b3 = (tmp >> 32) | (tmp << (64 - 32));
722	b0 -= b3;
723
724	tmp = b1 ^ b2;
725	b1 = (tmp >> 32) | (tmp << (64 - 32));
726	b2 -= b1;
727
728	tmp = b1 ^ b0;
729	b1 = (tmp >> 58) | (tmp << (64 - 58));
730	b0 -= b1;
731
732	tmp = b3 ^ b2;
733	b3 = (tmp >> 22) | (tmp << (64 - 22));
734	b2 -= b3;
735
736	tmp = b3 ^ b0;
737	b3 = (tmp >> 46) | (tmp << (64 - 46));
738	b0 -= b3;
739
740	tmp = b1 ^ b2;
741	b1 = (tmp >> 12) | (tmp << (64 - 12));
742	b2 -= b1;
743
744	tmp = b1 ^ b0;
745	b1 = (tmp >> 25) | (tmp << (64 - 25));
746	b0 -= b1 + k1;
747	b1 -= k2 + t2;
748
749	tmp = b3 ^ b2;
750	b3 = (tmp >> 33) | (tmp << (64 - 33));
751	b2 -= b3 + k3 + t0;
752	b3 -= k4 + 11;
753
754	tmp = b3 ^ b0;
755	b3 = (tmp >> 5) | (tmp << (64 - 5));
756	b0 -= b3;
757
758	tmp = b1 ^ b2;
759	b1 = (tmp >> 37) | (tmp << (64 - 37));
760	b2 -= b1;
761
762	tmp = b1 ^ b0;
763	b1 = (tmp >> 23) | (tmp << (64 - 23));
764	b0 -= b1;
765
766	tmp = b3 ^ b2;
767	b3 = (tmp >> 40) | (tmp << (64 - 40));
768	b2 -= b3;
769
770	tmp = b3 ^ b0;
771	b3 = (tmp >> 52) | (tmp << (64 - 52));
772	b0 -= b3;
773
774	tmp = b1 ^ b2;
775	b1 = (tmp >> 57) | (tmp << (64 - 57));
776	b2 -= b1;
777
778	tmp = b1 ^ b0;
779	b1 = (tmp >> 14) | (tmp << (64 - 14));
780	b0 -= b1 + k0;
781	b1 -= k1 + t1;
782
783	tmp = b3 ^ b2;
784	b3 = (tmp >> 16) | (tmp << (64 - 16));
785	b2 -= b3 + k2 + t2;
786	b3 -= k3 + 10;
787
788
789	tmp = b3 ^ b0;
790	b3 = (tmp >> 32) | (tmp << (64 - 32));
791	b0 -= b3;
792
793	tmp = b1 ^ b2;
794	b1 = (tmp >> 32) | (tmp << (64 - 32));
795	b2 -= b1;
796
797	tmp = b1 ^ b0;
798	b1 = (tmp >> 58) | (tmp << (64 - 58));
799	b0 -= b1;
800
801	tmp = b3 ^ b2;
802	b3 = (tmp >> 22) | (tmp << (64 - 22));
803	b2 -= b3;
804
805	tmp = b3 ^ b0;
806	b3 = (tmp >> 46) | (tmp << (64 - 46));
807	b0 -= b3;
808
809	tmp = b1 ^ b2;
810	b1 = (tmp >> 12) | (tmp << (64 - 12));
811	b2 -= b1;
812
813	tmp = b1 ^ b0;
814	b1 = (tmp >> 25) | (tmp << (64 - 25));
815	b0 -= b1 + k4;
816	b1 -= k0 + t0;
817
818	tmp = b3 ^ b2;
819	b3 = (tmp >> 33) | (tmp << (64 - 33));
820	b2 -= b3 + k1 + t1;
821	b3 -= k2 + 9;
822
823	tmp = b3 ^ b0;
824	b3 = (tmp >> 5) | (tmp << (64 - 5));
825	b0 -= b3;
826
827	tmp = b1 ^ b2;
828	b1 = (tmp >> 37) | (tmp << (64 - 37));
829	b2 -= b1;
830
831	tmp = b1 ^ b0;
832	b1 = (tmp >> 23) | (tmp << (64 - 23));
833	b0 -= b1;
834
835	tmp = b3 ^ b2;
836	b3 = (tmp >> 40) | (tmp << (64 - 40));
837	b2 -= b3;
838
839	tmp = b3 ^ b0;
840	b3 = (tmp >> 52) | (tmp << (64 - 52));
841	b0 -= b3;
842
843	tmp = b1 ^ b2;
844	b1 = (tmp >> 57) | (tmp << (64 - 57));
845	b2 -= b1;
846
847	tmp = b1 ^ b0;
848	b1 = (tmp >> 14) | (tmp << (64 - 14));
849	b0 -= b1 + k3;
850	b1 -= k4 + t2;
851
852	tmp = b3 ^ b2;
853	b3 = (tmp >> 16) | (tmp << (64 - 16));
854	b2 -= b3 + k0 + t0;
855	b3 -= k1 + 8;
856
857
858	tmp = b3 ^ b0;
859	b3 = (tmp >> 32) | (tmp << (64 - 32));
860	b0 -= b3;
861
862	tmp = b1 ^ b2;
863	b1 = (tmp >> 32) | (tmp << (64 - 32));
864	b2 -= b1;
865
866	tmp = b1 ^ b0;
867	b1 = (tmp >> 58) | (tmp << (64 - 58));
868	b0 -= b1;
869
870	tmp = b3 ^ b2;
871	b3 = (tmp >> 22) | (tmp << (64 - 22));
872	b2 -= b3;
873
874	tmp = b3 ^ b0;
875	b3 = (tmp >> 46) | (tmp << (64 - 46));
876	b0 -= b3;
877
878	tmp = b1 ^ b2;
879	b1 = (tmp >> 12) | (tmp << (64 - 12));
880	b2 -= b1;
881
882	tmp = b1 ^ b0;
883	b1 = (tmp >> 25) | (tmp << (64 - 25));
884	b0 -= b1 + k2;
885	b1 -= k3 + t1;
886
887	tmp = b3 ^ b2;
888	b3 = (tmp >> 33) | (tmp << (64 - 33));
889	b2 -= b3 + k4 + t2;
890	b3 -= k0 + 7;
891
892	tmp = b3 ^ b0;
893	b3 = (tmp >> 5) | (tmp << (64 - 5));
894	b0 -= b3;
895
896	tmp = b1 ^ b2;
897	b1 = (tmp >> 37) | (tmp << (64 - 37));
898	b2 -= b1;
899
900	tmp = b1 ^ b0;
901	b1 = (tmp >> 23) | (tmp << (64 - 23));
902	b0 -= b1;
903
904	tmp = b3 ^ b2;
905	b3 = (tmp >> 40) | (tmp << (64 - 40));
906	b2 -= b3;
907
908	tmp = b3 ^ b0;
909	b3 = (tmp >> 52) | (tmp << (64 - 52));
910	b0 -= b3;
911
912	tmp = b1 ^ b2;
913	b1 = (tmp >> 57) | (tmp << (64 - 57));
914	b2 -= b1;
915
916	tmp = b1 ^ b0;
917	b1 = (tmp >> 14) | (tmp << (64 - 14));
918	b0 -= b1 + k1;
919	b1 -= k2 + t0;
920
921	tmp = b3 ^ b2;
922	b3 = (tmp >> 16) | (tmp << (64 - 16));
923	b2 -= b3 + k3 + t1;
924	b3 -= k4 + 6;
925
926
927	tmp = b3 ^ b0;
928	b3 = (tmp >> 32) | (tmp << (64 - 32));
929	b0 -= b3;
930
931	tmp = b1 ^ b2;
932	b1 = (tmp >> 32) | (tmp << (64 - 32));
933	b2 -= b1;
934
935	tmp = b1 ^ b0;
936	b1 = (tmp >> 58) | (tmp << (64 - 58));
937	b0 -= b1;
938
939	tmp = b3 ^ b2;
940	b3 = (tmp >> 22) | (tmp << (64 - 22));
941	b2 -= b3;
942
943	tmp = b3 ^ b0;
944	b3 = (tmp >> 46) | (tmp << (64 - 46));
945	b0 -= b3;
946
947	tmp = b1 ^ b2;
948	b1 = (tmp >> 12) | (tmp << (64 - 12));
949	b2 -= b1;
950
951	tmp = b1 ^ b0;
952	b1 = (tmp >> 25) | (tmp << (64 - 25));
953	b0 -= b1 + k0;
954	b1 -= k1 + t2;
955
956	tmp = b3 ^ b2;
957	b3 = (tmp >> 33) | (tmp << (64 - 33));
958	b2 -= b3 + k2 + t0;
959	b3 -= k3 + 5;
960
961	tmp = b3 ^ b0;
962	b3 = (tmp >> 5) | (tmp << (64 - 5));
963	b0 -= b3;
964
965	tmp = b1 ^ b2;
966	b1 = (tmp >> 37) | (tmp << (64 - 37));
967	b2 -= b1;
968
969	tmp = b1 ^ b0;
970	b1 = (tmp >> 23) | (tmp << (64 - 23));
971	b0 -= b1;
972
973	tmp = b3 ^ b2;
974	b3 = (tmp >> 40) | (tmp << (64 - 40));
975	b2 -= b3;
976
977	tmp = b3 ^ b0;
978	b3 = (tmp >> 52) | (tmp << (64 - 52));
979	b0 -= b3;
980
981	tmp = b1 ^ b2;
982	b1 = (tmp >> 57) | (tmp << (64 - 57));
983	b2 -= b1;
984
985	tmp = b1 ^ b0;
986	b1 = (tmp >> 14) | (tmp << (64 - 14));
987	b0 -= b1 + k4;
988	b1 -= k0 + t1;
989
990	tmp = b3 ^ b2;
991	b3 = (tmp >> 16) | (tmp << (64 - 16));
992	b2 -= b3 + k1 + t2;
993	b3 -= k2 + 4;
994
995
996	tmp = b3 ^ b0;
997	b3 = (tmp >> 32) | (tmp << (64 - 32));
998	b0 -= b3;
999
1000	tmp = b1 ^ b2;
1001	b1 = (tmp >> 32) | (tmp << (64 - 32));
1002	b2 -= b1;
1003
1004	tmp = b1 ^ b0;
1005	b1 = (tmp >> 58) | (tmp << (64 - 58));
1006	b0 -= b1;
1007
1008	tmp = b3 ^ b2;
1009	b3 = (tmp >> 22) | (tmp << (64 - 22));
1010	b2 -= b3;
1011
1012	tmp = b3 ^ b0;
1013	b3 = (tmp >> 46) | (tmp << (64 - 46));
1014	b0 -= b3;
1015
1016	tmp = b1 ^ b2;
1017	b1 = (tmp >> 12) | (tmp << (64 - 12));
1018	b2 -= b1;
1019
1020	tmp = b1 ^ b0;
1021	b1 = (tmp >> 25) | (tmp << (64 - 25));
1022	b0 -= b1 + k3;
1023	b1 -= k4 + t0;
1024
1025	tmp = b3 ^ b2;
1026	b3 = (tmp >> 33) | (tmp << (64 - 33));
1027	b2 -= b3 + k0 + t1;
1028	b3 -= k1 + 3;
1029
1030	tmp = b3 ^ b0;
1031	b3 = (tmp >> 5) | (tmp << (64 - 5));
1032	b0 -= b3;
1033
1034	tmp = b1 ^ b2;
1035	b1 = (tmp >> 37) | (tmp << (64 - 37));
1036	b2 -= b1;
1037
1038	tmp = b1 ^ b0;
1039	b1 = (tmp >> 23) | (tmp << (64 - 23));
1040	b0 -= b1;
1041
1042	tmp = b3 ^ b2;
1043	b3 = (tmp >> 40) | (tmp << (64 - 40));
1044	b2 -= b3;
1045
1046	tmp = b3 ^ b0;
1047	b3 = (tmp >> 52) | (tmp << (64 - 52));
1048	b0 -= b3;
1049
1050	tmp = b1 ^ b2;
1051	b1 = (tmp >> 57) | (tmp << (64 - 57));
1052	b2 -= b1;
1053
1054	tmp = b1 ^ b0;
1055	b1 = (tmp >> 14) | (tmp << (64 - 14));
1056	b0 -= b1 + k2;
1057	b1 -= k3 + t2;
1058
1059	tmp = b3 ^ b2;
1060	b3 = (tmp >> 16) | (tmp << (64 - 16));
1061	b2 -= b3 + k4 + t0;
1062	b3 -= k0 + 2;
1063
1064
1065	tmp = b3 ^ b0;
1066	b3 = (tmp >> 32) | (tmp << (64 - 32));
1067	b0 -= b3;
1068
1069	tmp = b1 ^ b2;
1070	b1 = (tmp >> 32) | (tmp << (64 - 32));
1071	b2 -= b1;
1072
1073	tmp = b1 ^ b0;
1074	b1 = (tmp >> 58) | (tmp << (64 - 58));
1075	b0 -= b1;
1076
1077	tmp = b3 ^ b2;
1078	b3 = (tmp >> 22) | (tmp << (64 - 22));
1079	b2 -= b3;
1080
1081	tmp = b3 ^ b0;
1082	b3 = (tmp >> 46) | (tmp << (64 - 46));
1083	b0 -= b3;
1084
1085	tmp = b1 ^ b2;
1086	b1 = (tmp >> 12) | (tmp << (64 - 12));
1087	b2 -= b1;
1088
1089	tmp = b1 ^ b0;
1090	b1 = (tmp >> 25) | (tmp << (64 - 25));
1091	b0 -= b1 + k1;
1092	b1 -= k2 + t1;
1093
1094	tmp = b3 ^ b2;
1095	b3 = (tmp >> 33) | (tmp << (64 - 33));
1096	b2 -= b3 + k3 + t2;
1097	b3 -= k4 + 1;
1098
1099	tmp = b3 ^ b0;
1100	b3 = (tmp >> 5) | (tmp << (64 - 5));
1101	b0 -= b3;
1102
1103	tmp = b1 ^ b2;
1104	b1 = (tmp >> 37) | (tmp << (64 - 37));
1105	b2 -= b1;
1106
1107	tmp = b1 ^ b0;
1108	b1 = (tmp >> 23) | (tmp << (64 - 23));
1109	b0 -= b1;
1110
1111	tmp = b3 ^ b2;
1112	b3 = (tmp >> 40) | (tmp << (64 - 40));
1113	b2 -= b3;
1114
1115	tmp = b3 ^ b0;
1116	b3 = (tmp >> 52) | (tmp << (64 - 52));
1117	b0 -= b3;
1118
1119	tmp = b1 ^ b2;
1120	b1 = (tmp >> 57) | (tmp << (64 - 57));
1121	b2 -= b1;
1122
1123	tmp = b1 ^ b0;
1124	b1 = (tmp >> 14) | (tmp << (64 - 14));
1125	b0 -= b1 + k0;
1126	b1 -= k1 + t0;
1127
1128	tmp = b3 ^ b2;
1129	b3 = (tmp >> 16) | (tmp << (64 - 16));
1130	b2 -= b3 + k2 + t1;
1131	b3 -= k3;
1132
1133	output[0] = b0;
1134	output[1] = b1;
1135	output[2] = b2;
1136	output[3] = b3;
1137}
1138
1139void threefish_encrypt_512(struct threefish_key *key_ctx, u64 *input,
1140			   u64 *output)
1141{
1142	u64 b0 = input[0], b1 = input[1],
1143	    b2 = input[2], b3 = input[3],
1144	    b4 = input[4], b5 = input[5],
1145	    b6 = input[6], b7 = input[7];
1146	u64 k0 = key_ctx->key[0], k1 = key_ctx->key[1],
1147	    k2 = key_ctx->key[2], k3 = key_ctx->key[3],
1148	    k4 = key_ctx->key[4], k5 = key_ctx->key[5],
1149	    k6 = key_ctx->key[6], k7 = key_ctx->key[7],
1150	    k8 = key_ctx->key[8];
1151	u64 t0 = key_ctx->tweak[0], t1 = key_ctx->tweak[1],
1152	    t2 = key_ctx->tweak[2];
1153
1154	b1 += k1;
1155	b0 += b1 + k0;
1156	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
1157
1158	b3 += k3;
1159	b2 += b3 + k2;
1160	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
1161
1162	b5 += k5 + t0;
1163	b4 += b5 + k4;
1164	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
1165
1166	b7 += k7;
1167	b6 += b7 + k6 + t1;
1168	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
1169
1170	b2 += b1;
1171	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
1172
1173	b4 += b7;
1174	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
1175
1176	b6 += b5;
1177	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
1178
1179	b0 += b3;
1180	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
1181
1182	b4 += b1;
1183	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
1184
1185	b6 += b3;
1186	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
1187
1188	b0 += b5;
1189	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
1190
1191	b2 += b7;
1192	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
1193
1194	b6 += b1;
1195	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
1196
1197	b0 += b7;
1198	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
1199
1200	b2 += b5;
1201	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
1202
1203	b4 += b3;
1204	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
1205
1206	b1 += k2;
1207	b0 += b1 + k1;
1208	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
1209
1210	b3 += k4;
1211	b2 += b3 + k3;
1212	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
1213
1214	b5 += k6 + t1;
1215	b4 += b5 + k5;
1216	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
1217
1218	b7 += k8 + 1;
1219	b6 += b7 + k7 + t2;
1220	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
1221
1222	b2 += b1;
1223	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
1224
1225	b4 += b7;
1226	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
1227
1228	b6 += b5;
1229	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
1230
1231	b0 += b3;
1232	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
1233
1234	b4 += b1;
1235	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
1236
1237	b6 += b3;
1238	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
1239
1240	b0 += b5;
1241	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
1242
1243	b2 += b7;
1244	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
1245
1246	b6 += b1;
1247	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
1248
1249	b0 += b7;
1250	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
1251
1252	b2 += b5;
1253	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
1254
1255	b4 += b3;
1256	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
1257
1258	b1 += k3;
1259	b0 += b1 + k2;
1260	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
1261
1262	b3 += k5;
1263	b2 += b3 + k4;
1264	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
1265
1266	b5 += k7 + t2;
1267	b4 += b5 + k6;
1268	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
1269
1270	b7 += k0 + 2;
1271	b6 += b7 + k8 + t0;
1272	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
1273
1274	b2 += b1;
1275	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
1276
1277	b4 += b7;
1278	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
1279
1280	b6 += b5;
1281	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
1282
1283	b0 += b3;
1284	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
1285
1286	b4 += b1;
1287	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
1288
1289	b6 += b3;
1290	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
1291
1292	b0 += b5;
1293	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
1294
1295	b2 += b7;
1296	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
1297
1298	b6 += b1;
1299	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
1300
1301	b0 += b7;
1302	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
1303
1304	b2 += b5;
1305	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
1306
1307	b4 += b3;
1308	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
1309
1310	b1 += k4;
1311	b0 += b1 + k3;
1312	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
1313
1314	b3 += k6;
1315	b2 += b3 + k5;
1316	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
1317
1318	b5 += k8 + t0;
1319	b4 += b5 + k7;
1320	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
1321
1322	b7 += k1 + 3;
1323	b6 += b7 + k0 + t1;
1324	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
1325
1326	b2 += b1;
1327	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
1328
1329	b4 += b7;
1330	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
1331
1332	b6 += b5;
1333	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
1334
1335	b0 += b3;
1336	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
1337
1338	b4 += b1;
1339	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
1340
1341	b6 += b3;
1342	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
1343
1344	b0 += b5;
1345	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
1346
1347	b2 += b7;
1348	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
1349
1350	b6 += b1;
1351	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
1352
1353	b0 += b7;
1354	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
1355
1356	b2 += b5;
1357	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
1358
1359	b4 += b3;
1360	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
1361
1362	b1 += k5;
1363	b0 += b1 + k4;
1364	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
1365
1366	b3 += k7;
1367	b2 += b3 + k6;
1368	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
1369
1370	b5 += k0 + t1;
1371	b4 += b5 + k8;
1372	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
1373
1374	b7 += k2 + 4;
1375	b6 += b7 + k1 + t2;
1376	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
1377
1378	b2 += b1;
1379	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
1380
1381	b4 += b7;
1382	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
1383
1384	b6 += b5;
1385	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
1386
1387	b0 += b3;
1388	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
1389
1390	b4 += b1;
1391	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
1392
1393	b6 += b3;
1394	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
1395
1396	b0 += b5;
1397	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
1398
1399	b2 += b7;
1400	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
1401
1402	b6 += b1;
1403	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
1404
1405	b0 += b7;
1406	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
1407
1408	b2 += b5;
1409	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
1410
1411	b4 += b3;
1412	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
1413
1414	b1 += k6;
1415	b0 += b1 + k5;
1416	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
1417
1418	b3 += k8;
1419	b2 += b3 + k7;
1420	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
1421
1422	b5 += k1 + t2;
1423	b4 += b5 + k0;
1424	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
1425
1426	b7 += k3 + 5;
1427	b6 += b7 + k2 + t0;
1428	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
1429
1430	b2 += b1;
1431	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
1432
1433	b4 += b7;
1434	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
1435
1436	b6 += b5;
1437	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
1438
1439	b0 += b3;
1440	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
1441
1442	b4 += b1;
1443	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
1444
1445	b6 += b3;
1446	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
1447
1448	b0 += b5;
1449	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
1450
1451	b2 += b7;
1452	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
1453
1454	b6 += b1;
1455	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
1456
1457	b0 += b7;
1458	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
1459
1460	b2 += b5;
1461	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
1462
1463	b4 += b3;
1464	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
1465
1466	b1 += k7;
1467	b0 += b1 + k6;
1468	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
1469
1470	b3 += k0;
1471	b2 += b3 + k8;
1472	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
1473
1474	b5 += k2 + t0;
1475	b4 += b5 + k1;
1476	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
1477
1478	b7 += k4 + 6;
1479	b6 += b7 + k3 + t1;
1480	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
1481
1482	b2 += b1;
1483	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
1484
1485	b4 += b7;
1486	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
1487
1488	b6 += b5;
1489	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
1490
1491	b0 += b3;
1492	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
1493
1494	b4 += b1;
1495	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
1496
1497	b6 += b3;
1498	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
1499
1500	b0 += b5;
1501	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
1502
1503	b2 += b7;
1504	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
1505
1506	b6 += b1;
1507	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
1508
1509	b0 += b7;
1510	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
1511
1512	b2 += b5;
1513	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
1514
1515	b4 += b3;
1516	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
1517
1518	b1 += k8;
1519	b0 += b1 + k7;
1520	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
1521
1522	b3 += k1;
1523	b2 += b3 + k0;
1524	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
1525
1526	b5 += k3 + t1;
1527	b4 += b5 + k2;
1528	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
1529
1530	b7 += k5 + 7;
1531	b6 += b7 + k4 + t2;
1532	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
1533
1534	b2 += b1;
1535	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
1536
1537	b4 += b7;
1538	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
1539
1540	b6 += b5;
1541	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
1542
1543	b0 += b3;
1544	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
1545
1546	b4 += b1;
1547	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
1548
1549	b6 += b3;
1550	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
1551
1552	b0 += b5;
1553	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
1554
1555	b2 += b7;
1556	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
1557
1558	b6 += b1;
1559	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
1560
1561	b0 += b7;
1562	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
1563
1564	b2 += b5;
1565	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
1566
1567	b4 += b3;
1568	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
1569
1570	b1 += k0;
1571	b0 += b1 + k8;
1572	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
1573
1574	b3 += k2;
1575	b2 += b3 + k1;
1576	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
1577
1578	b5 += k4 + t2;
1579	b4 += b5 + k3;
1580	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
1581
1582	b7 += k6 + 8;
1583	b6 += b7 + k5 + t0;
1584	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
1585
1586	b2 += b1;
1587	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
1588
1589	b4 += b7;
1590	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
1591
1592	b6 += b5;
1593	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
1594
1595	b0 += b3;
1596	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
1597
1598	b4 += b1;
1599	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
1600
1601	b6 += b3;
1602	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
1603
1604	b0 += b5;
1605	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
1606
1607	b2 += b7;
1608	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
1609
1610	b6 += b1;
1611	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
1612
1613	b0 += b7;
1614	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
1615
1616	b2 += b5;
1617	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
1618
1619	b4 += b3;
1620	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
1621
1622	b1 += k1;
1623	b0 += b1 + k0;
1624	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
1625
1626	b3 += k3;
1627	b2 += b3 + k2;
1628	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
1629
1630	b5 += k5 + t0;
1631	b4 += b5 + k4;
1632	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
1633
1634	b7 += k7 + 9;
1635	b6 += b7 + k6 + t1;
1636	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
1637
1638	b2 += b1;
1639	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
1640
1641	b4 += b7;
1642	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
1643
1644	b6 += b5;
1645	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
1646
1647	b0 += b3;
1648	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
1649
1650	b4 += b1;
1651	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
1652
1653	b6 += b3;
1654	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
1655
1656	b0 += b5;
1657	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
1658
1659	b2 += b7;
1660	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
1661
1662	b6 += b1;
1663	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
1664
1665	b0 += b7;
1666	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
1667
1668	b2 += b5;
1669	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
1670
1671	b4 += b3;
1672	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
1673
1674	b1 += k2;
1675	b0 += b1 + k1;
1676	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
1677
1678	b3 += k4;
1679	b2 += b3 + k3;
1680	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
1681
1682	b5 += k6 + t1;
1683	b4 += b5 + k5;
1684	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
1685
1686	b7 += k8 + 10;
1687	b6 += b7 + k7 + t2;
1688	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
1689
1690	b2 += b1;
1691	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
1692
1693	b4 += b7;
1694	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
1695
1696	b6 += b5;
1697	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
1698
1699	b0 += b3;
1700	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
1701
1702	b4 += b1;
1703	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
1704
1705	b6 += b3;
1706	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
1707
1708	b0 += b5;
1709	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
1710
1711	b2 += b7;
1712	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
1713
1714	b6 += b1;
1715	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
1716
1717	b0 += b7;
1718	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
1719
1720	b2 += b5;
1721	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
1722
1723	b4 += b3;
1724	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
1725
1726	b1 += k3;
1727	b0 += b1 + k2;
1728	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
1729
1730	b3 += k5;
1731	b2 += b3 + k4;
1732	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
1733
1734	b5 += k7 + t2;
1735	b4 += b5 + k6;
1736	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
1737
1738	b7 += k0 + 11;
1739	b6 += b7 + k8 + t0;
1740	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
1741
1742	b2 += b1;
1743	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
1744
1745	b4 += b7;
1746	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
1747
1748	b6 += b5;
1749	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
1750
1751	b0 += b3;
1752	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
1753
1754	b4 += b1;
1755	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
1756
1757	b6 += b3;
1758	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
1759
1760	b0 += b5;
1761	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
1762
1763	b2 += b7;
1764	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
1765
1766	b6 += b1;
1767	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
1768
1769	b0 += b7;
1770	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
1771
1772	b2 += b5;
1773	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
1774
1775	b4 += b3;
1776	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
1777
1778	b1 += k4;
1779	b0 += b1 + k3;
1780	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
1781
1782	b3 += k6;
1783	b2 += b3 + k5;
1784	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
1785
1786	b5 += k8 + t0;
1787	b4 += b5 + k7;
1788	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
1789
1790	b7 += k1 + 12;
1791	b6 += b7 + k0 + t1;
1792	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
1793
1794	b2 += b1;
1795	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
1796
1797	b4 += b7;
1798	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
1799
1800	b6 += b5;
1801	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
1802
1803	b0 += b3;
1804	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
1805
1806	b4 += b1;
1807	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
1808
1809	b6 += b3;
1810	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
1811
1812	b0 += b5;
1813	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
1814
1815	b2 += b7;
1816	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
1817
1818	b6 += b1;
1819	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
1820
1821	b0 += b7;
1822	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
1823
1824	b2 += b5;
1825	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
1826
1827	b4 += b3;
1828	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
1829
1830	b1 += k5;
1831	b0 += b1 + k4;
1832	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
1833
1834	b3 += k7;
1835	b2 += b3 + k6;
1836	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
1837
1838	b5 += k0 + t1;
1839	b4 += b5 + k8;
1840	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
1841
1842	b7 += k2 + 13;
1843	b6 += b7 + k1 + t2;
1844	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
1845
1846	b2 += b1;
1847	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
1848
1849	b4 += b7;
1850	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
1851
1852	b6 += b5;
1853	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
1854
1855	b0 += b3;
1856	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
1857
1858	b4 += b1;
1859	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
1860
1861	b6 += b3;
1862	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
1863
1864	b0 += b5;
1865	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
1866
1867	b2 += b7;
1868	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
1869
1870	b6 += b1;
1871	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
1872
1873	b0 += b7;
1874	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
1875
1876	b2 += b5;
1877	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
1878
1879	b4 += b3;
1880	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
1881
1882	b1 += k6;
1883	b0 += b1 + k5;
1884	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
1885
1886	b3 += k8;
1887	b2 += b3 + k7;
1888	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
1889
1890	b5 += k1 + t2;
1891	b4 += b5 + k0;
1892	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
1893
1894	b7 += k3 + 14;
1895	b6 += b7 + k2 + t0;
1896	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
1897
1898	b2 += b1;
1899	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
1900
1901	b4 += b7;
1902	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
1903
1904	b6 += b5;
1905	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
1906
1907	b0 += b3;
1908	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
1909
1910	b4 += b1;
1911	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
1912
1913	b6 += b3;
1914	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
1915
1916	b0 += b5;
1917	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
1918
1919	b2 += b7;
1920	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
1921
1922	b6 += b1;
1923	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
1924
1925	b0 += b7;
1926	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
1927
1928	b2 += b5;
1929	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
1930
1931	b4 += b3;
1932	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
1933
1934	b1 += k7;
1935	b0 += b1 + k6;
1936	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
1937
1938	b3 += k0;
1939	b2 += b3 + k8;
1940	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
1941
1942	b5 += k2 + t0;
1943	b4 += b5 + k1;
1944	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
1945
1946	b7 += k4 + 15;
1947	b6 += b7 + k3 + t1;
1948	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
1949
1950	b2 += b1;
1951	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
1952
1953	b4 += b7;
1954	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
1955
1956	b6 += b5;
1957	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
1958
1959	b0 += b3;
1960	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
1961
1962	b4 += b1;
1963	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
1964
1965	b6 += b3;
1966	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
1967
1968	b0 += b5;
1969	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
1970
1971	b2 += b7;
1972	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
1973
1974	b6 += b1;
1975	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
1976
1977	b0 += b7;
1978	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
1979
1980	b2 += b5;
1981	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
1982
1983	b4 += b3;
1984	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
1985
1986	b1 += k8;
1987	b0 += b1 + k7;
1988	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
1989
1990	b3 += k1;
1991	b2 += b3 + k0;
1992	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
1993
1994	b5 += k3 + t1;
1995	b4 += b5 + k2;
1996	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
1997
1998	b7 += k5 + 16;
1999	b6 += b7 + k4 + t2;
2000	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
2001
2002	b2 += b1;
2003	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
2004
2005	b4 += b7;
2006	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
2007
2008	b6 += b5;
2009	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
2010
2011	b0 += b3;
2012	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
2013
2014	b4 += b1;
2015	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
2016
2017	b6 += b3;
2018	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
2019
2020	b0 += b5;
2021	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
2022
2023	b2 += b7;
2024	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
2025
2026	b6 += b1;
2027	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
2028
2029	b0 += b7;
2030	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
2031
2032	b2 += b5;
2033	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
2034
2035	b4 += b3;
2036	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
2037
2038	b1 += k0;
2039	b0 += b1 + k8;
2040	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
2041
2042	b3 += k2;
2043	b2 += b3 + k1;
2044	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
2045
2046	b5 += k4 + t2;
2047	b4 += b5 + k3;
2048	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
2049
2050	b7 += k6 + 17;
2051	b6 += b7 + k5 + t0;
2052	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
2053
2054	b2 += b1;
2055	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
2056
2057	b4 += b7;
2058	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
2059
2060	b6 += b5;
2061	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
2062
2063	b0 += b3;
2064	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
2065
2066	b4 += b1;
2067	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
2068
2069	b6 += b3;
2070	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
2071
2072	b0 += b5;
2073	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
2074
2075	b2 += b7;
2076	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
2077
2078	b6 += b1;
2079	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
2080
2081	b0 += b7;
2082	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
2083
2084	b2 += b5;
2085	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
2086
2087	b4 += b3;
2088	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
2089
2090	output[0] = b0 + k0;
2091	output[1] = b1 + k1;
2092	output[2] = b2 + k2;
2093	output[3] = b3 + k3;
2094	output[4] = b4 + k4;
2095	output[5] = b5 + k5 + t0;
2096	output[6] = b6 + k6 + t1;
2097	output[7] = b7 + k7 + 18;
2098}
2099
2100void threefish_decrypt_512(struct threefish_key *key_ctx, u64 *input,
2101			   u64 *output)
2102{
2103	u64 b0 = input[0], b1 = input[1],
2104	    b2 = input[2], b3 = input[3],
2105	    b4 = input[4], b5 = input[5],
2106	    b6 = input[6], b7 = input[7];
2107	u64 k0 = key_ctx->key[0], k1 = key_ctx->key[1],
2108	    k2 = key_ctx->key[2], k3 = key_ctx->key[3],
2109	    k4 = key_ctx->key[4], k5 = key_ctx->key[5],
2110	    k6 = key_ctx->key[6], k7 = key_ctx->key[7],
2111	    k8 = key_ctx->key[8];
2112	u64 t0 = key_ctx->tweak[0], t1 = key_ctx->tweak[1],
2113	    t2 = key_ctx->tweak[2];
2114
2115	u64 tmp;
2116
2117	b0 -= k0;
2118	b1 -= k1;
2119	b2 -= k2;
2120	b3 -= k3;
2121	b4 -= k4;
2122	b5 -= k5 + t0;
2123	b6 -= k6 + t1;
2124	b7 -= k7 + 18;
2125
2126	tmp = b3 ^ b4;
2127	b3 = (tmp >> 22) | (tmp << (64 - 22));
2128	b4 -= b3;
2129
2130	tmp = b5 ^ b2;
2131	b5 = (tmp >> 56) | (tmp << (64 - 56));
2132	b2 -= b5;
2133
2134	tmp = b7 ^ b0;
2135	b7 = (tmp >> 35) | (tmp << (64 - 35));
2136	b0 -= b7;
2137
2138	tmp = b1 ^ b6;
2139	b1 = (tmp >> 8) | (tmp << (64 - 8));
2140	b6 -= b1;
2141
2142	tmp = b7 ^ b2;
2143	b7 = (tmp >> 43) | (tmp << (64 - 43));
2144	b2 -= b7;
2145
2146	tmp = b5 ^ b0;
2147	b5 = (tmp >> 39) | (tmp << (64 - 39));
2148	b0 -= b5;
2149
2150	tmp = b3 ^ b6;
2151	b3 = (tmp >> 29) | (tmp << (64 - 29));
2152	b6 -= b3;
2153
2154	tmp = b1 ^ b4;
2155	b1 = (tmp >> 25) | (tmp << (64 - 25));
2156	b4 -= b1;
2157
2158	tmp = b3 ^ b0;
2159	b3 = (tmp >> 17) | (tmp << (64 - 17));
2160	b0 -= b3;
2161
2162	tmp = b5 ^ b6;
2163	b5 = (tmp >> 10) | (tmp << (64 - 10));
2164	b6 -= b5;
2165
2166	tmp = b7 ^ b4;
2167	b7 = (tmp >> 50) | (tmp << (64 - 50));
2168	b4 -= b7;
2169
2170	tmp = b1 ^ b2;
2171	b1 = (tmp >> 13) | (tmp << (64 - 13));
2172	b2 -= b1;
2173
2174	tmp = b7 ^ b6;
2175	b7 = (tmp >> 24) | (tmp << (64 - 24));
2176	b6 -= b7 + k5 + t0;
2177	b7 -= k6 + 17;
2178
2179	tmp = b5 ^ b4;
2180	b5 = (tmp >> 34) | (tmp << (64 - 34));
2181	b4 -= b5 + k3;
2182	b5 -= k4 + t2;
2183
2184	tmp = b3 ^ b2;
2185	b3 = (tmp >> 30) | (tmp << (64 - 30));
2186	b2 -= b3 + k1;
2187	b3 -= k2;
2188
2189	tmp = b1 ^ b0;
2190	b1 = (tmp >> 39) | (tmp << (64 - 39));
2191	b0 -= b1 + k8;
2192	b1 -= k0;
2193
2194	tmp = b3 ^ b4;
2195	b3 = (tmp >> 56) | (tmp << (64 - 56));
2196	b4 -= b3;
2197
2198	tmp = b5 ^ b2;
2199	b5 = (tmp >> 54) | (tmp << (64 - 54));
2200	b2 -= b5;
2201
2202	tmp = b7 ^ b0;
2203	b7 = (tmp >> 9) | (tmp << (64 - 9));
2204	b0 -= b7;
2205
2206	tmp = b1 ^ b6;
2207	b1 = (tmp >> 44) | (tmp << (64 - 44));
2208	b6 -= b1;
2209
2210	tmp = b7 ^ b2;
2211	b7 = (tmp >> 39) | (tmp << (64 - 39));
2212	b2 -= b7;
2213
2214	tmp = b5 ^ b0;
2215	b5 = (tmp >> 36) | (tmp << (64 - 36));
2216	b0 -= b5;
2217
2218	tmp = b3 ^ b6;
2219	b3 = (tmp >> 49) | (tmp << (64 - 49));
2220	b6 -= b3;
2221
2222	tmp = b1 ^ b4;
2223	b1 = (tmp >> 17) | (tmp << (64 - 17));
2224	b4 -= b1;
2225
2226	tmp = b3 ^ b0;
2227	b3 = (tmp >> 42) | (tmp << (64 - 42));
2228	b0 -= b3;
2229
2230	tmp = b5 ^ b6;
2231	b5 = (tmp >> 14) | (tmp << (64 - 14));
2232	b6 -= b5;
2233
2234	tmp = b7 ^ b4;
2235	b7 = (tmp >> 27) | (tmp << (64 - 27));
2236	b4 -= b7;
2237
2238	tmp = b1 ^ b2;
2239	b1 = (tmp >> 33) | (tmp << (64 - 33));
2240	b2 -= b1;
2241
2242	tmp = b7 ^ b6;
2243	b7 = (tmp >> 37) | (tmp << (64 - 37));
2244	b6 -= b7 + k4 + t2;
2245	b7 -= k5 + 16;
2246
2247	tmp = b5 ^ b4;
2248	b5 = (tmp >> 19) | (tmp << (64 - 19));
2249	b4 -= b5 + k2;
2250	b5 -= k3 + t1;
2251
2252	tmp = b3 ^ b2;
2253	b3 = (tmp >> 36) | (tmp << (64 - 36));
2254	b2 -= b3 + k0;
2255	b3 -= k1;
2256
2257	tmp = b1 ^ b0;
2258	b1 = (tmp >> 46) | (tmp << (64 - 46));
2259	b0 -= b1 + k7;
2260	b1 -= k8;
2261
2262	tmp = b3 ^ b4;
2263	b3 = (tmp >> 22) | (tmp << (64 - 22));
2264	b4 -= b3;
2265
2266	tmp = b5 ^ b2;
2267	b5 = (tmp >> 56) | (tmp << (64 - 56));
2268	b2 -= b5;
2269
2270	tmp = b7 ^ b0;
2271	b7 = (tmp >> 35) | (tmp << (64 - 35));
2272	b0 -= b7;
2273
2274	tmp = b1 ^ b6;
2275	b1 = (tmp >> 8) | (tmp << (64 - 8));
2276	b6 -= b1;
2277
2278	tmp = b7 ^ b2;
2279	b7 = (tmp >> 43) | (tmp << (64 - 43));
2280	b2 -= b7;
2281
2282	tmp = b5 ^ b0;
2283	b5 = (tmp >> 39) | (tmp << (64 - 39));
2284	b0 -= b5;
2285
2286	tmp = b3 ^ b6;
2287	b3 = (tmp >> 29) | (tmp << (64 - 29));
2288	b6 -= b3;
2289
2290	tmp = b1 ^ b4;
2291	b1 = (tmp >> 25) | (tmp << (64 - 25));
2292	b4 -= b1;
2293
2294	tmp = b3 ^ b0;
2295	b3 = (tmp >> 17) | (tmp << (64 - 17));
2296	b0 -= b3;
2297
2298	tmp = b5 ^ b6;
2299	b5 = (tmp >> 10) | (tmp << (64 - 10));
2300	b6 -= b5;
2301
2302	tmp = b7 ^ b4;
2303	b7 = (tmp >> 50) | (tmp << (64 - 50));
2304	b4 -= b7;
2305
2306	tmp = b1 ^ b2;
2307	b1 = (tmp >> 13) | (tmp << (64 - 13));
2308	b2 -= b1;
2309
2310	tmp = b7 ^ b6;
2311	b7 = (tmp >> 24) | (tmp << (64 - 24));
2312	b6 -= b7 + k3 + t1;
2313	b7 -= k4 + 15;
2314
2315	tmp = b5 ^ b4;
2316	b5 = (tmp >> 34) | (tmp << (64 - 34));
2317	b4 -= b5 + k1;
2318	b5 -= k2 + t0;
2319
2320	tmp = b3 ^ b2;
2321	b3 = (tmp >> 30) | (tmp << (64 - 30));
2322	b2 -= b3 + k8;
2323	b3 -= k0;
2324
2325	tmp = b1 ^ b0;
2326	b1 = (tmp >> 39) | (tmp << (64 - 39));
2327	b0 -= b1 + k6;
2328	b1 -= k7;
2329
2330	tmp = b3 ^ b4;
2331	b3 = (tmp >> 56) | (tmp << (64 - 56));
2332	b4 -= b3;
2333
2334	tmp = b5 ^ b2;
2335	b5 = (tmp >> 54) | (tmp << (64 - 54));
2336	b2 -= b5;
2337
2338	tmp = b7 ^ b0;
2339	b7 = (tmp >> 9) | (tmp << (64 - 9));
2340	b0 -= b7;
2341
2342	tmp = b1 ^ b6;
2343	b1 = (tmp >> 44) | (tmp << (64 - 44));
2344	b6 -= b1;
2345
2346	tmp = b7 ^ b2;
2347	b7 = (tmp >> 39) | (tmp << (64 - 39));
2348	b2 -= b7;
2349
2350	tmp = b5 ^ b0;
2351	b5 = (tmp >> 36) | (tmp << (64 - 36));
2352	b0 -= b5;
2353
2354	tmp = b3 ^ b6;
2355	b3 = (tmp >> 49) | (tmp << (64 - 49));
2356	b6 -= b3;
2357
2358	tmp = b1 ^ b4;
2359	b1 = (tmp >> 17) | (tmp << (64 - 17));
2360	b4 -= b1;
2361
2362	tmp = b3 ^ b0;
2363	b3 = (tmp >> 42) | (tmp << (64 - 42));
2364	b0 -= b3;
2365
2366	tmp = b5 ^ b6;
2367	b5 = (tmp >> 14) | (tmp << (64 - 14));
2368	b6 -= b5;
2369
2370	tmp = b7 ^ b4;
2371	b7 = (tmp >> 27) | (tmp << (64 - 27));
2372	b4 -= b7;
2373
2374	tmp = b1 ^ b2;
2375	b1 = (tmp >> 33) | (tmp << (64 - 33));
2376	b2 -= b1;
2377
2378	tmp = b7 ^ b6;
2379	b7 = (tmp >> 37) | (tmp << (64 - 37));
2380	b6 -= b7 + k2 + t0;
2381	b7 -= k3 + 14;
2382
2383	tmp = b5 ^ b4;
2384	b5 = (tmp >> 19) | (tmp << (64 - 19));
2385	b4 -= b5 + k0;
2386	b5 -= k1 + t2;
2387
2388	tmp = b3 ^ b2;
2389	b3 = (tmp >> 36) | (tmp << (64 - 36));
2390	b2 -= b3 + k7;
2391	b3 -= k8;
2392
2393	tmp = b1 ^ b0;
2394	b1 = (tmp >> 46) | (tmp << (64 - 46));
2395	b0 -= b1 + k5;
2396	b1 -= k6;
2397
2398	tmp = b3 ^ b4;
2399	b3 = (tmp >> 22) | (tmp << (64 - 22));
2400	b4 -= b3;
2401
2402	tmp = b5 ^ b2;
2403	b5 = (tmp >> 56) | (tmp << (64 - 56));
2404	b2 -= b5;
2405
2406	tmp = b7 ^ b0;
2407	b7 = (tmp >> 35) | (tmp << (64 - 35));
2408	b0 -= b7;
2409
2410	tmp = b1 ^ b6;
2411	b1 = (tmp >> 8) | (tmp << (64 - 8));
2412	b6 -= b1;
2413
2414	tmp = b7 ^ b2;
2415	b7 = (tmp >> 43) | (tmp << (64 - 43));
2416	b2 -= b7;
2417
2418	tmp = b5 ^ b0;
2419	b5 = (tmp >> 39) | (tmp << (64 - 39));
2420	b0 -= b5;
2421
2422	tmp = b3 ^ b6;
2423	b3 = (tmp >> 29) | (tmp << (64 - 29));
2424	b6 -= b3;
2425
2426	tmp = b1 ^ b4;
2427	b1 = (tmp >> 25) | (tmp << (64 - 25));
2428	b4 -= b1;
2429
2430	tmp = b3 ^ b0;
2431	b3 = (tmp >> 17) | (tmp << (64 - 17));
2432	b0 -= b3;
2433
2434	tmp = b5 ^ b6;
2435	b5 = (tmp >> 10) | (tmp << (64 - 10));
2436	b6 -= b5;
2437
2438	tmp = b7 ^ b4;
2439	b7 = (tmp >> 50) | (tmp << (64 - 50));
2440	b4 -= b7;
2441
2442	tmp = b1 ^ b2;
2443	b1 = (tmp >> 13) | (tmp << (64 - 13));
2444	b2 -= b1;
2445
2446	tmp = b7 ^ b6;
2447	b7 = (tmp >> 24) | (tmp << (64 - 24));
2448	b6 -= b7 + k1 + t2;
2449	b7 -= k2 + 13;
2450
2451	tmp = b5 ^ b4;
2452	b5 = (tmp >> 34) | (tmp << (64 - 34));
2453	b4 -= b5 + k8;
2454	b5 -= k0 + t1;
2455
2456	tmp = b3 ^ b2;
2457	b3 = (tmp >> 30) | (tmp << (64 - 30));
2458	b2 -= b3 + k6;
2459	b3 -= k7;
2460
2461	tmp = b1 ^ b0;
2462	b1 = (tmp >> 39) | (tmp << (64 - 39));
2463	b0 -= b1 + k4;
2464	b1 -= k5;
2465
2466	tmp = b3 ^ b4;
2467	b3 = (tmp >> 56) | (tmp << (64 - 56));
2468	b4 -= b3;
2469
2470	tmp = b5 ^ b2;
2471	b5 = (tmp >> 54) | (tmp << (64 - 54));
2472	b2 -= b5;
2473
2474	tmp = b7 ^ b0;
2475	b7 = (tmp >> 9) | (tmp << (64 - 9));
2476	b0 -= b7;
2477
2478	tmp = b1 ^ b6;
2479	b1 = (tmp >> 44) | (tmp << (64 - 44));
2480	b6 -= b1;
2481
2482	tmp = b7 ^ b2;
2483	b7 = (tmp >> 39) | (tmp << (64 - 39));
2484	b2 -= b7;
2485
2486	tmp = b5 ^ b0;
2487	b5 = (tmp >> 36) | (tmp << (64 - 36));
2488	b0 -= b5;
2489
2490	tmp = b3 ^ b6;
2491	b3 = (tmp >> 49) | (tmp << (64 - 49));
2492	b6 -= b3;
2493
2494	tmp = b1 ^ b4;
2495	b1 = (tmp >> 17) | (tmp << (64 - 17));
2496	b4 -= b1;
2497
2498	tmp = b3 ^ b0;
2499	b3 = (tmp >> 42) | (tmp << (64 - 42));
2500	b0 -= b3;
2501
2502	tmp = b5 ^ b6;
2503	b5 = (tmp >> 14) | (tmp << (64 - 14));
2504	b6 -= b5;
2505
2506	tmp = b7 ^ b4;
2507	b7 = (tmp >> 27) | (tmp << (64 - 27));
2508	b4 -= b7;
2509
2510	tmp = b1 ^ b2;
2511	b1 = (tmp >> 33) | (tmp << (64 - 33));
2512	b2 -= b1;
2513
2514	tmp = b7 ^ b6;
2515	b7 = (tmp >> 37) | (tmp << (64 - 37));
2516	b6 -= b7 + k0 + t1;
2517	b7 -= k1 + 12;
2518
2519	tmp = b5 ^ b4;
2520	b5 = (tmp >> 19) | (tmp << (64 - 19));
2521	b4 -= b5 + k7;
2522	b5 -= k8 + t0;
2523
2524	tmp = b3 ^ b2;
2525	b3 = (tmp >> 36) | (tmp << (64 - 36));
2526	b2 -= b3 + k5;
2527	b3 -= k6;
2528
2529	tmp = b1 ^ b0;
2530	b1 = (tmp >> 46) | (tmp << (64 - 46));
2531	b0 -= b1 + k3;
2532	b1 -= k4;
2533
2534	tmp = b3 ^ b4;
2535	b3 = (tmp >> 22) | (tmp << (64 - 22));
2536	b4 -= b3;
2537
2538	tmp = b5 ^ b2;
2539	b5 = (tmp >> 56) | (tmp << (64 - 56));
2540	b2 -= b5;
2541
2542	tmp = b7 ^ b0;
2543	b7 = (tmp >> 35) | (tmp << (64 - 35));
2544	b0 -= b7;
2545
2546	tmp = b1 ^ b6;
2547	b1 = (tmp >> 8) | (tmp << (64 - 8));
2548	b6 -= b1;
2549
2550	tmp = b7 ^ b2;
2551	b7 = (tmp >> 43) | (tmp << (64 - 43));
2552	b2 -= b7;
2553
2554	tmp = b5 ^ b0;
2555	b5 = (tmp >> 39) | (tmp << (64 - 39));
2556	b0 -= b5;
2557
2558	tmp = b3 ^ b6;
2559	b3 = (tmp >> 29) | (tmp << (64 - 29));
2560	b6 -= b3;
2561
2562	tmp = b1 ^ b4;
2563	b1 = (tmp >> 25) | (tmp << (64 - 25));
2564	b4 -= b1;
2565
2566	tmp = b3 ^ b0;
2567	b3 = (tmp >> 17) | (tmp << (64 - 17));
2568	b0 -= b3;
2569
2570	tmp = b5 ^ b6;
2571	b5 = (tmp >> 10) | (tmp << (64 - 10));
2572	b6 -= b5;
2573
2574	tmp = b7 ^ b4;
2575	b7 = (tmp >> 50) | (tmp << (64 - 50));
2576	b4 -= b7;
2577
2578	tmp = b1 ^ b2;
2579	b1 = (tmp >> 13) | (tmp << (64 - 13));
2580	b2 -= b1;
2581
2582	tmp = b7 ^ b6;
2583	b7 = (tmp >> 24) | (tmp << (64 - 24));
2584	b6 -= b7 + k8 + t0;
2585	b7 -= k0 + 11;
2586
2587	tmp = b5 ^ b4;
2588	b5 = (tmp >> 34) | (tmp << (64 - 34));
2589	b4 -= b5 + k6;
2590	b5 -= k7 + t2;
2591
2592	tmp = b3 ^ b2;
2593	b3 = (tmp >> 30) | (tmp << (64 - 30));
2594	b2 -= b3 + k4;
2595	b3 -= k5;
2596
2597	tmp = b1 ^ b0;
2598	b1 = (tmp >> 39) | (tmp << (64 - 39));
2599	b0 -= b1 + k2;
2600	b1 -= k3;
2601
2602	tmp = b3 ^ b4;
2603	b3 = (tmp >> 56) | (tmp << (64 - 56));
2604	b4 -= b3;
2605
2606	tmp = b5 ^ b2;
2607	b5 = (tmp >> 54) | (tmp << (64 - 54));
2608	b2 -= b5;
2609
2610	tmp = b7 ^ b0;
2611	b7 = (tmp >> 9) | (tmp << (64 - 9));
2612	b0 -= b7;
2613
2614	tmp = b1 ^ b6;
2615	b1 = (tmp >> 44) | (tmp << (64 - 44));
2616	b6 -= b1;
2617
2618	tmp = b7 ^ b2;
2619	b7 = (tmp >> 39) | (tmp << (64 - 39));
2620	b2 -= b7;
2621
2622	tmp = b5 ^ b0;
2623	b5 = (tmp >> 36) | (tmp << (64 - 36));
2624	b0 -= b5;
2625
2626	tmp = b3 ^ b6;
2627	b3 = (tmp >> 49) | (tmp << (64 - 49));
2628	b6 -= b3;
2629
2630	tmp = b1 ^ b4;
2631	b1 = (tmp >> 17) | (tmp << (64 - 17));
2632	b4 -= b1;
2633
2634	tmp = b3 ^ b0;
2635	b3 = (tmp >> 42) | (tmp << (64 - 42));
2636	b0 -= b3;
2637
2638	tmp = b5 ^ b6;
2639	b5 = (tmp >> 14) | (tmp << (64 - 14));
2640	b6 -= b5;
2641
2642	tmp = b7 ^ b4;
2643	b7 = (tmp >> 27) | (tmp << (64 - 27));
2644	b4 -= b7;
2645
2646	tmp = b1 ^ b2;
2647	b1 = (tmp >> 33) | (tmp << (64 - 33));
2648	b2 -= b1;
2649
2650	tmp = b7 ^ b6;
2651	b7 = (tmp >> 37) | (tmp << (64 - 37));
2652	b6 -= b7 + k7 + t2;
2653	b7 -= k8 + 10;
2654
2655	tmp = b5 ^ b4;
2656	b5 = (tmp >> 19) | (tmp << (64 - 19));
2657	b4 -= b5 + k5;
2658	b5 -= k6 + t1;
2659
2660	tmp = b3 ^ b2;
2661	b3 = (tmp >> 36) | (tmp << (64 - 36));
2662	b2 -= b3 + k3;
2663	b3 -= k4;
2664
2665	tmp = b1 ^ b0;
2666	b1 = (tmp >> 46) | (tmp << (64 - 46));
2667	b0 -= b1 + k1;
2668	b1 -= k2;
2669
2670	tmp = b3 ^ b4;
2671	b3 = (tmp >> 22) | (tmp << (64 - 22));
2672	b4 -= b3;
2673
2674	tmp = b5 ^ b2;
2675	b5 = (tmp >> 56) | (tmp << (64 - 56));
2676	b2 -= b5;
2677
2678	tmp = b7 ^ b0;
2679	b7 = (tmp >> 35) | (tmp << (64 - 35));
2680	b0 -= b7;
2681
2682	tmp = b1 ^ b6;
2683	b1 = (tmp >> 8) | (tmp << (64 - 8));
2684	b6 -= b1;
2685
2686	tmp = b7 ^ b2;
2687	b7 = (tmp >> 43) | (tmp << (64 - 43));
2688	b2 -= b7;
2689
2690	tmp = b5 ^ b0;
2691	b5 = (tmp >> 39) | (tmp << (64 - 39));
2692	b0 -= b5;
2693
2694	tmp = b3 ^ b6;
2695	b3 = (tmp >> 29) | (tmp << (64 - 29));
2696	b6 -= b3;
2697
2698	tmp = b1 ^ b4;
2699	b1 = (tmp >> 25) | (tmp << (64 - 25));
2700	b4 -= b1;
2701
2702	tmp = b3 ^ b0;
2703	b3 = (tmp >> 17) | (tmp << (64 - 17));
2704	b0 -= b3;
2705
2706	tmp = b5 ^ b6;
2707	b5 = (tmp >> 10) | (tmp << (64 - 10));
2708	b6 -= b5;
2709
2710	tmp = b7 ^ b4;
2711	b7 = (tmp >> 50) | (tmp << (64 - 50));
2712	b4 -= b7;
2713
2714	tmp = b1 ^ b2;
2715	b1 = (tmp >> 13) | (tmp << (64 - 13));
2716	b2 -= b1;
2717
2718	tmp = b7 ^ b6;
2719	b7 = (tmp >> 24) | (tmp << (64 - 24));
2720	b6 -= b7 + k6 + t1;
2721	b7 -= k7 + 9;
2722
2723	tmp = b5 ^ b4;
2724	b5 = (tmp >> 34) | (tmp << (64 - 34));
2725	b4 -= b5 + k4;
2726	b5 -= k5 + t0;
2727
2728	tmp = b3 ^ b2;
2729	b3 = (tmp >> 30) | (tmp << (64 - 30));
2730	b2 -= b3 + k2;
2731	b3 -= k3;
2732
2733	tmp = b1 ^ b0;
2734	b1 = (tmp >> 39) | (tmp << (64 - 39));
2735	b0 -= b1 + k0;
2736	b1 -= k1;
2737
2738	tmp = b3 ^ b4;
2739	b3 = (tmp >> 56) | (tmp << (64 - 56));
2740	b4 -= b3;
2741
2742	tmp = b5 ^ b2;
2743	b5 = (tmp >> 54) | (tmp << (64 - 54));
2744	b2 -= b5;
2745
2746	tmp = b7 ^ b0;
2747	b7 = (tmp >> 9) | (tmp << (64 - 9));
2748	b0 -= b7;
2749
2750	tmp = b1 ^ b6;
2751	b1 = (tmp >> 44) | (tmp << (64 - 44));
2752	b6 -= b1;
2753
2754	tmp = b7 ^ b2;
2755	b7 = (tmp >> 39) | (tmp << (64 - 39));
2756	b2 -= b7;
2757
2758	tmp = b5 ^ b0;
2759	b5 = (tmp >> 36) | (tmp << (64 - 36));
2760	b0 -= b5;
2761
2762	tmp = b3 ^ b6;
2763	b3 = (tmp >> 49) | (tmp << (64 - 49));
2764	b6 -= b3;
2765
2766	tmp = b1 ^ b4;
2767	b1 = (tmp >> 17) | (tmp << (64 - 17));
2768	b4 -= b1;
2769
2770	tmp = b3 ^ b0;
2771	b3 = (tmp >> 42) | (tmp << (64 - 42));
2772	b0 -= b3;
2773
2774	tmp = b5 ^ b6;
2775	b5 = (tmp >> 14) | (tmp << (64 - 14));
2776	b6 -= b5;
2777
2778	tmp = b7 ^ b4;
2779	b7 = (tmp >> 27) | (tmp << (64 - 27));
2780	b4 -= b7;
2781
2782	tmp = b1 ^ b2;
2783	b1 = (tmp >> 33) | (tmp << (64 - 33));
2784	b2 -= b1;
2785
2786	tmp = b7 ^ b6;
2787	b7 = (tmp >> 37) | (tmp << (64 - 37));
2788	b6 -= b7 + k5 + t0;
2789	b7 -= k6 + 8;
2790
2791	tmp = b5 ^ b4;
2792	b5 = (tmp >> 19) | (tmp << (64 - 19));
2793	b4 -= b5 + k3;
2794	b5 -= k4 + t2;
2795
2796	tmp = b3 ^ b2;
2797	b3 = (tmp >> 36) | (tmp << (64 - 36));
2798	b2 -= b3 + k1;
2799	b3 -= k2;
2800
2801	tmp = b1 ^ b0;
2802	b1 = (tmp >> 46) | (tmp << (64 - 46));
2803	b0 -= b1 + k8;
2804	b1 -= k0;
2805
2806	tmp = b3 ^ b4;
2807	b3 = (tmp >> 22) | (tmp << (64 - 22));
2808	b4 -= b3;
2809
2810	tmp = b5 ^ b2;
2811	b5 = (tmp >> 56) | (tmp << (64 - 56));
2812	b2 -= b5;
2813
2814	tmp = b7 ^ b0;
2815	b7 = (tmp >> 35) | (tmp << (64 - 35));
2816	b0 -= b7;
2817
2818	tmp = b1 ^ b6;
2819	b1 = (tmp >> 8) | (tmp << (64 - 8));
2820	b6 -= b1;
2821
2822	tmp = b7 ^ b2;
2823	b7 = (tmp >> 43) | (tmp << (64 - 43));
2824	b2 -= b7;
2825
2826	tmp = b5 ^ b0;
2827	b5 = (tmp >> 39) | (tmp << (64 - 39));
2828	b0 -= b5;
2829
2830	tmp = b3 ^ b6;
2831	b3 = (tmp >> 29) | (tmp << (64 - 29));
2832	b6 -= b3;
2833
2834	tmp = b1 ^ b4;
2835	b1 = (tmp >> 25) | (tmp << (64 - 25));
2836	b4 -= b1;
2837
2838	tmp = b3 ^ b0;
2839	b3 = (tmp >> 17) | (tmp << (64 - 17));
2840	b0 -= b3;
2841
2842	tmp = b5 ^ b6;
2843	b5 = (tmp >> 10) | (tmp << (64 - 10));
2844	b6 -= b5;
2845
2846	tmp = b7 ^ b4;
2847	b7 = (tmp >> 50) | (tmp << (64 - 50));
2848	b4 -= b7;
2849
2850	tmp = b1 ^ b2;
2851	b1 = (tmp >> 13) | (tmp << (64 - 13));
2852	b2 -= b1;
2853
2854	tmp = b7 ^ b6;
2855	b7 = (tmp >> 24) | (tmp << (64 - 24));
2856	b6 -= b7 + k4 + t2;
2857	b7 -= k5 + 7;
2858
2859	tmp = b5 ^ b4;
2860	b5 = (tmp >> 34) | (tmp << (64 - 34));
2861	b4 -= b5 + k2;
2862	b5 -= k3 + t1;
2863
2864	tmp = b3 ^ b2;
2865	b3 = (tmp >> 30) | (tmp << (64 - 30));
2866	b2 -= b3 + k0;
2867	b3 -= k1;
2868
2869	tmp = b1 ^ b0;
2870	b1 = (tmp >> 39) | (tmp << (64 - 39));
2871	b0 -= b1 + k7;
2872	b1 -= k8;
2873
2874	tmp = b3 ^ b4;
2875	b3 = (tmp >> 56) | (tmp << (64 - 56));
2876	b4 -= b3;
2877
2878	tmp = b5 ^ b2;
2879	b5 = (tmp >> 54) | (tmp << (64 - 54));
2880	b2 -= b5;
2881
2882	tmp = b7 ^ b0;
2883	b7 = (tmp >> 9) | (tmp << (64 - 9));
2884	b0 -= b7;
2885
2886	tmp = b1 ^ b6;
2887	b1 = (tmp >> 44) | (tmp << (64 - 44));
2888	b6 -= b1;
2889
2890	tmp = b7 ^ b2;
2891	b7 = (tmp >> 39) | (tmp << (64 - 39));
2892	b2 -= b7;
2893
2894	tmp = b5 ^ b0;
2895	b5 = (tmp >> 36) | (tmp << (64 - 36));
2896	b0 -= b5;
2897
2898	tmp = b3 ^ b6;
2899	b3 = (tmp >> 49) | (tmp << (64 - 49));
2900	b6 -= b3;
2901
2902	tmp = b1 ^ b4;
2903	b1 = (tmp >> 17) | (tmp << (64 - 17));
2904	b4 -= b1;
2905
2906	tmp = b3 ^ b0;
2907	b3 = (tmp >> 42) | (tmp << (64 - 42));
2908	b0 -= b3;
2909
2910	tmp = b5 ^ b6;
2911	b5 = (tmp >> 14) | (tmp << (64 - 14));
2912	b6 -= b5;
2913
2914	tmp = b7 ^ b4;
2915	b7 = (tmp >> 27) | (tmp << (64 - 27));
2916	b4 -= b7;
2917
2918	tmp = b1 ^ b2;
2919	b1 = (tmp >> 33) | (tmp << (64 - 33));
2920	b2 -= b1;
2921
2922	tmp = b7 ^ b6;
2923	b7 = (tmp >> 37) | (tmp << (64 - 37));
2924	b6 -= b7 + k3 + t1;
2925	b7 -= k4 + 6;
2926
2927	tmp = b5 ^ b4;
2928	b5 = (tmp >> 19) | (tmp << (64 - 19));
2929	b4 -= b5 + k1;
2930	b5 -= k2 + t0;
2931
2932	tmp = b3 ^ b2;
2933	b3 = (tmp >> 36) | (tmp << (64 - 36));
2934	b2 -= b3 + k8;
2935	b3 -= k0;
2936
2937	tmp = b1 ^ b0;
2938	b1 = (tmp >> 46) | (tmp << (64 - 46));
2939	b0 -= b1 + k6;
2940	b1 -= k7;
2941
2942	tmp = b3 ^ b4;
2943	b3 = (tmp >> 22) | (tmp << (64 - 22));
2944	b4 -= b3;
2945
2946	tmp = b5 ^ b2;
2947	b5 = (tmp >> 56) | (tmp << (64 - 56));
2948	b2 -= b5;
2949
2950	tmp = b7 ^ b0;
2951	b7 = (tmp >> 35) | (tmp << (64 - 35));
2952	b0 -= b7;
2953
2954	tmp = b1 ^ b6;
2955	b1 = (tmp >> 8) | (tmp << (64 - 8));
2956	b6 -= b1;
2957
2958	tmp = b7 ^ b2;
2959	b7 = (tmp >> 43) | (tmp << (64 - 43));
2960	b2 -= b7;
2961
2962	tmp = b5 ^ b0;
2963	b5 = (tmp >> 39) | (tmp << (64 - 39));
2964	b0 -= b5;
2965
2966	tmp = b3 ^ b6;
2967	b3 = (tmp >> 29) | (tmp << (64 - 29));
2968	b6 -= b3;
2969
2970	tmp = b1 ^ b4;
2971	b1 = (tmp >> 25) | (tmp << (64 - 25));
2972	b4 -= b1;
2973
2974	tmp = b3 ^ b0;
2975	b3 = (tmp >> 17) | (tmp << (64 - 17));
2976	b0 -= b3;
2977
2978	tmp = b5 ^ b6;
2979	b5 = (tmp >> 10) | (tmp << (64 - 10));
2980	b6 -= b5;
2981
2982	tmp = b7 ^ b4;
2983	b7 = (tmp >> 50) | (tmp << (64 - 50));
2984	b4 -= b7;
2985
2986	tmp = b1 ^ b2;
2987	b1 = (tmp >> 13) | (tmp << (64 - 13));
2988	b2 -= b1;
2989
2990	tmp = b7 ^ b6;
2991	b7 = (tmp >> 24) | (tmp << (64 - 24));
2992	b6 -= b7 + k2 + t0;
2993	b7 -= k3 + 5;
2994
2995	tmp = b5 ^ b4;
2996	b5 = (tmp >> 34) | (tmp << (64 - 34));
2997	b4 -= b5 + k0;
2998	b5 -= k1 + t2;
2999
3000	tmp = b3 ^ b2;
3001	b3 = (tmp >> 30) | (tmp << (64 - 30));
3002	b2 -= b3 + k7;
3003	b3 -= k8;
3004
3005	tmp = b1 ^ b0;
3006	b1 = (tmp >> 39) | (tmp << (64 - 39));
3007	b0 -= b1 + k5;
3008	b1 -= k6;
3009
3010	tmp = b3 ^ b4;
3011	b3 = (tmp >> 56) | (tmp << (64 - 56));
3012	b4 -= b3;
3013
3014	tmp = b5 ^ b2;
3015	b5 = (tmp >> 54) | (tmp << (64 - 54));
3016	b2 -= b5;
3017
3018	tmp = b7 ^ b0;
3019	b7 = (tmp >> 9) | (tmp << (64 - 9));
3020	b0 -= b7;
3021
3022	tmp = b1 ^ b6;
3023	b1 = (tmp >> 44) | (tmp << (64 - 44));
3024	b6 -= b1;
3025
3026	tmp = b7 ^ b2;
3027	b7 = (tmp >> 39) | (tmp << (64 - 39));
3028	b2 -= b7;
3029
3030	tmp = b5 ^ b0;
3031	b5 = (tmp >> 36) | (tmp << (64 - 36));
3032	b0 -= b5;
3033
3034	tmp = b3 ^ b6;
3035	b3 = (tmp >> 49) | (tmp << (64 - 49));
3036	b6 -= b3;
3037
3038	tmp = b1 ^ b4;
3039	b1 = (tmp >> 17) | (tmp << (64 - 17));
3040	b4 -= b1;
3041
3042	tmp = b3 ^ b0;
3043	b3 = (tmp >> 42) | (tmp << (64 - 42));
3044	b0 -= b3;
3045
3046	tmp = b5 ^ b6;
3047	b5 = (tmp >> 14) | (tmp << (64 - 14));
3048	b6 -= b5;
3049
3050	tmp = b7 ^ b4;
3051	b7 = (tmp >> 27) | (tmp << (64 - 27));
3052	b4 -= b7;
3053
3054	tmp = b1 ^ b2;
3055	b1 = (tmp >> 33) | (tmp << (64 - 33));
3056	b2 -= b1;
3057
3058	tmp = b7 ^ b6;
3059	b7 = (tmp >> 37) | (tmp << (64 - 37));
3060	b6 -= b7 + k1 + t2;
3061	b7 -= k2 + 4;
3062
3063	tmp = b5 ^ b4;
3064	b5 = (tmp >> 19) | (tmp << (64 - 19));
3065	b4 -= b5 + k8;
3066	b5 -= k0 + t1;
3067
3068	tmp = b3 ^ b2;
3069	b3 = (tmp >> 36) | (tmp << (64 - 36));
3070	b2 -= b3 + k6;
3071	b3 -= k7;
3072
3073	tmp = b1 ^ b0;
3074	b1 = (tmp >> 46) | (tmp << (64 - 46));
3075	b0 -= b1 + k4;
3076	b1 -= k5;
3077
3078	tmp = b3 ^ b4;
3079	b3 = (tmp >> 22) | (tmp << (64 - 22));
3080	b4 -= b3;
3081
3082	tmp = b5 ^ b2;
3083	b5 = (tmp >> 56) | (tmp << (64 - 56));
3084	b2 -= b5;
3085
3086	tmp = b7 ^ b0;
3087	b7 = (tmp >> 35) | (tmp << (64 - 35));
3088	b0 -= b7;
3089
3090	tmp = b1 ^ b6;
3091	b1 = (tmp >> 8) | (tmp << (64 - 8));
3092	b6 -= b1;
3093
3094	tmp = b7 ^ b2;
3095	b7 = (tmp >> 43) | (tmp << (64 - 43));
3096	b2 -= b7;
3097
3098	tmp = b5 ^ b0;
3099	b5 = (tmp >> 39) | (tmp << (64 - 39));
3100	b0 -= b5;
3101
3102	tmp = b3 ^ b6;
3103	b3 = (tmp >> 29) | (tmp << (64 - 29));
3104	b6 -= b3;
3105
3106	tmp = b1 ^ b4;
3107	b1 = (tmp >> 25) | (tmp << (64 - 25));
3108	b4 -= b1;
3109
3110	tmp = b3 ^ b0;
3111	b3 = (tmp >> 17) | (tmp << (64 - 17));
3112	b0 -= b3;
3113
3114	tmp = b5 ^ b6;
3115	b5 = (tmp >> 10) | (tmp << (64 - 10));
3116	b6 -= b5;
3117
3118	tmp = b7 ^ b4;
3119	b7 = (tmp >> 50) | (tmp << (64 - 50));
3120	b4 -= b7;
3121
3122	tmp = b1 ^ b2;
3123	b1 = (tmp >> 13) | (tmp << (64 - 13));
3124	b2 -= b1;
3125
3126	tmp = b7 ^ b6;
3127	b7 = (tmp >> 24) | (tmp << (64 - 24));
3128	b6 -= b7 + k0 + t1;
3129	b7 -= k1 + 3;
3130
3131	tmp = b5 ^ b4;
3132	b5 = (tmp >> 34) | (tmp << (64 - 34));
3133	b4 -= b5 + k7;
3134	b5 -= k8 + t0;
3135
3136	tmp = b3 ^ b2;
3137	b3 = (tmp >> 30) | (tmp << (64 - 30));
3138	b2 -= b3 + k5;
3139	b3 -= k6;
3140
3141	tmp = b1 ^ b0;
3142	b1 = (tmp >> 39) | (tmp << (64 - 39));
3143	b0 -= b1 + k3;
3144	b1 -= k4;
3145
3146	tmp = b3 ^ b4;
3147	b3 = (tmp >> 56) | (tmp << (64 - 56));
3148	b4 -= b3;
3149
3150	tmp = b5 ^ b2;
3151	b5 = (tmp >> 54) | (tmp << (64 - 54));
3152	b2 -= b5;
3153
3154	tmp = b7 ^ b0;
3155	b7 = (tmp >> 9) | (tmp << (64 - 9));
3156	b0 -= b7;
3157
3158	tmp = b1 ^ b6;
3159	b1 = (tmp >> 44) | (tmp << (64 - 44));
3160	b6 -= b1;
3161
3162	tmp = b7 ^ b2;
3163	b7 = (tmp >> 39) | (tmp << (64 - 39));
3164	b2 -= b7;
3165
3166	tmp = b5 ^ b0;
3167	b5 = (tmp >> 36) | (tmp << (64 - 36));
3168	b0 -= b5;
3169
3170	tmp = b3 ^ b6;
3171	b3 = (tmp >> 49) | (tmp << (64 - 49));
3172	b6 -= b3;
3173
3174	tmp = b1 ^ b4;
3175	b1 = (tmp >> 17) | (tmp << (64 - 17));
3176	b4 -= b1;
3177
3178	tmp = b3 ^ b0;
3179	b3 = (tmp >> 42) | (tmp << (64 - 42));
3180	b0 -= b3;
3181
3182	tmp = b5 ^ b6;
3183	b5 = (tmp >> 14) | (tmp << (64 - 14));
3184	b6 -= b5;
3185
3186	tmp = b7 ^ b4;
3187	b7 = (tmp >> 27) | (tmp << (64 - 27));
3188	b4 -= b7;
3189
3190	tmp = b1 ^ b2;
3191	b1 = (tmp >> 33) | (tmp << (64 - 33));
3192	b2 -= b1;
3193
3194	tmp = b7 ^ b6;
3195	b7 = (tmp >> 37) | (tmp << (64 - 37));
3196	b6 -= b7 + k8 + t0;
3197	b7 -= k0 + 2;
3198
3199	tmp = b5 ^ b4;
3200	b5 = (tmp >> 19) | (tmp << (64 - 19));
3201	b4 -= b5 + k6;
3202	b5 -= k7 + t2;
3203
3204	tmp = b3 ^ b2;
3205	b3 = (tmp >> 36) | (tmp << (64 - 36));
3206	b2 -= b3 + k4;
3207	b3 -= k5;
3208
3209	tmp = b1 ^ b0;
3210	b1 = (tmp >> 46) | (tmp << (64 - 46));
3211	b0 -= b1 + k2;
3212	b1 -= k3;
3213
3214	tmp = b3 ^ b4;
3215	b3 = (tmp >> 22) | (tmp << (64 - 22));
3216	b4 -= b3;
3217
3218	tmp = b5 ^ b2;
3219	b5 = (tmp >> 56) | (tmp << (64 - 56));
3220	b2 -= b5;
3221
3222	tmp = b7 ^ b0;
3223	b7 = (tmp >> 35) | (tmp << (64 - 35));
3224	b0 -= b7;
3225
3226	tmp = b1 ^ b6;
3227	b1 = (tmp >> 8) | (tmp << (64 - 8));
3228	b6 -= b1;
3229
3230	tmp = b7 ^ b2;
3231	b7 = (tmp >> 43) | (tmp << (64 - 43));
3232	b2 -= b7;
3233
3234	tmp = b5 ^ b0;
3235	b5 = (tmp >> 39) | (tmp << (64 - 39));
3236	b0 -= b5;
3237
3238	tmp = b3 ^ b6;
3239	b3 = (tmp >> 29) | (tmp << (64 - 29));
3240	b6 -= b3;
3241
3242	tmp = b1 ^ b4;
3243	b1 = (tmp >> 25) | (tmp << (64 - 25));
3244	b4 -= b1;
3245
3246	tmp = b3 ^ b0;
3247	b3 = (tmp >> 17) | (tmp << (64 - 17));
3248	b0 -= b3;
3249
3250	tmp = b5 ^ b6;
3251	b5 = (tmp >> 10) | (tmp << (64 - 10));
3252	b6 -= b5;
3253
3254	tmp = b7 ^ b4;
3255	b7 = (tmp >> 50) | (tmp << (64 - 50));
3256	b4 -= b7;
3257
3258	tmp = b1 ^ b2;
3259	b1 = (tmp >> 13) | (tmp << (64 - 13));
3260	b2 -= b1;
3261
3262	tmp = b7 ^ b6;
3263	b7 = (tmp >> 24) | (tmp << (64 - 24));
3264	b6 -= b7 + k7 + t2;
3265	b7 -= k8 + 1;
3266
3267	tmp = b5 ^ b4;
3268	b5 = (tmp >> 34) | (tmp << (64 - 34));
3269	b4 -= b5 + k5;
3270	b5 -= k6 + t1;
3271
3272	tmp = b3 ^ b2;
3273	b3 = (tmp >> 30) | (tmp << (64 - 30));
3274	b2 -= b3 + k3;
3275	b3 -= k4;
3276
3277	tmp = b1 ^ b0;
3278	b1 = (tmp >> 39) | (tmp << (64 - 39));
3279	b0 -= b1 + k1;
3280	b1 -= k2;
3281
3282	tmp = b3 ^ b4;
3283	b3 = (tmp >> 56) | (tmp << (64 - 56));
3284	b4 -= b3;
3285
3286	tmp = b5 ^ b2;
3287	b5 = (tmp >> 54) | (tmp << (64 - 54));
3288	b2 -= b5;
3289
3290	tmp = b7 ^ b0;
3291	b7 = (tmp >> 9) | (tmp << (64 - 9));
3292	b0 -= b7;
3293
3294	tmp = b1 ^ b6;
3295	b1 = (tmp >> 44) | (tmp << (64 - 44));
3296	b6 -= b1;
3297
3298	tmp = b7 ^ b2;
3299	b7 = (tmp >> 39) | (tmp << (64 - 39));
3300	b2 -= b7;
3301
3302	tmp = b5 ^ b0;
3303	b5 = (tmp >> 36) | (tmp << (64 - 36));
3304	b0 -= b5;
3305
3306	tmp = b3 ^ b6;
3307	b3 = (tmp >> 49) | (tmp << (64 - 49));
3308	b6 -= b3;
3309
3310	tmp = b1 ^ b4;
3311	b1 = (tmp >> 17) | (tmp << (64 - 17));
3312	b4 -= b1;
3313
3314	tmp = b3 ^ b0;
3315	b3 = (tmp >> 42) | (tmp << (64 - 42));
3316	b0 -= b3;
3317
3318	tmp = b5 ^ b6;
3319	b5 = (tmp >> 14) | (tmp << (64 - 14));
3320	b6 -= b5;
3321
3322	tmp = b7 ^ b4;
3323	b7 = (tmp >> 27) | (tmp << (64 - 27));
3324	b4 -= b7;
3325
3326	tmp = b1 ^ b2;
3327	b1 = (tmp >> 33) | (tmp << (64 - 33));
3328	b2 -= b1;
3329
3330	tmp = b7 ^ b6;
3331	b7 = (tmp >> 37) | (tmp << (64 - 37));
3332	b6 -= b7 + k6 + t1;
3333	b7 -= k7;
3334
3335	tmp = b5 ^ b4;
3336	b5 = (tmp >> 19) | (tmp << (64 - 19));
3337	b4 -= b5 + k4;
3338	b5 -= k5 + t0;
3339
3340	tmp = b3 ^ b2;
3341	b3 = (tmp >> 36) | (tmp << (64 - 36));
3342	b2 -= b3 + k2;
3343	b3 -= k3;
3344
3345	tmp = b1 ^ b0;
3346	b1 = (tmp >> 46) | (tmp << (64 - 46));
3347	b0 -= b1 + k0;
3348	b1 -= k1;
3349
3350	output[0] = b0;
3351	output[1] = b1;
3352	output[2] = b2;
3353	output[3] = b3;
3354
3355	output[7] = b7;
3356	output[6] = b6;
3357	output[5] = b5;
3358	output[4] = b4;
3359}
3360
3361void threefish_encrypt_1024(struct threefish_key *key_ctx, u64 *input,
3362			    u64 *output)
3363{
3364	u64 b0 = input[0], b1 = input[1],
3365	    b2 = input[2], b3 = input[3],
3366	    b4 = input[4], b5 = input[5],
3367	    b6 = input[6], b7 = input[7],
3368	    b8 = input[8], b9 = input[9],
3369	    b10 = input[10], b11 = input[11],
3370	    b12 = input[12], b13 = input[13],
3371	    b14 = input[14], b15 = input[15];
3372	u64 k0 = key_ctx->key[0], k1 = key_ctx->key[1],
3373	    k2 = key_ctx->key[2], k3 = key_ctx->key[3],
3374	    k4 = key_ctx->key[4], k5 = key_ctx->key[5],
3375	    k6 = key_ctx->key[6], k7 = key_ctx->key[7],
3376	    k8 = key_ctx->key[8], k9 = key_ctx->key[9],
3377	    k10 = key_ctx->key[10], k11 = key_ctx->key[11],
3378	    k12 = key_ctx->key[12], k13 = key_ctx->key[13],
3379	    k14 = key_ctx->key[14], k15 = key_ctx->key[15],
3380	    k16 = key_ctx->key[16];
3381	u64 t0 = key_ctx->tweak[0], t1 = key_ctx->tweak[1],
3382	    t2 = key_ctx->tweak[2];
3383
3384	b1 += k1;
3385	b0 += b1 + k0;
3386	b1 = ((b1 << 24) | (b1 >> (64 - 24))) ^ b0;
3387
3388	b3 += k3;
3389	b2 += b3 + k2;
3390	b3 = ((b3 << 13) | (b3 >> (64 - 13))) ^ b2;
3391
3392	b5 += k5;
3393	b4 += b5 + k4;
3394	b5 = ((b5 << 8) | (b5 >> (64 - 8))) ^ b4;
3395
3396	b7 += k7;
3397	b6 += b7 + k6;
3398	b7 = ((b7 << 47) | (b7 >> (64 - 47))) ^ b6;
3399
3400	b9 += k9;
3401	b8 += b9 + k8;
3402	b9 = ((b9 << 8) | (b9 >> (64 - 8))) ^ b8;
3403
3404	b11 += k11;
3405	b10 += b11 + k10;
3406	b11 = ((b11 << 17) | (b11 >> (64 - 17))) ^ b10;
3407
3408	b13 += k13 + t0;
3409	b12 += b13 + k12;
3410	b13 = ((b13 << 22) | (b13 >> (64 - 22))) ^ b12;
3411
3412	b15 += k15;
3413	b14 += b15 + k14 + t1;
3414	b15 = ((b15 << 37) | (b15 >> (64 - 37))) ^ b14;
3415
3416	b0 += b9;
3417	b9 = ((b9 << 38) | (b9 >> (64 - 38))) ^ b0;
3418
3419	b2 += b13;
3420	b13 = ((b13 << 19) | (b13 >> (64 - 19))) ^ b2;
3421
3422	b6 += b11;
3423	b11 = ((b11 << 10) | (b11 >> (64 - 10))) ^ b6;
3424
3425	b4 += b15;
3426	b15 = ((b15 << 55) | (b15 >> (64 - 55))) ^ b4;
3427
3428	b10 += b7;
3429	b7 = ((b7 << 49) | (b7 >> (64 - 49))) ^ b10;
3430
3431	b12 += b3;
3432	b3 = ((b3 << 18) | (b3 >> (64 - 18))) ^ b12;
3433
3434	b14 += b5;
3435	b5 = ((b5 << 23) | (b5 >> (64 - 23))) ^ b14;
3436
3437	b8 += b1;
3438	b1 = ((b1 << 52) | (b1 >> (64 - 52))) ^ b8;
3439
3440	b0 += b7;
3441	b7 = ((b7 << 33) | (b7 >> (64 - 33))) ^ b0;
3442
3443	b2 += b5;
3444	b5 = ((b5 << 4) | (b5 >> (64 - 4))) ^ b2;
3445
3446	b4 += b3;
3447	b3 = ((b3 << 51) | (b3 >> (64 - 51))) ^ b4;
3448
3449	b6 += b1;
3450	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b6;
3451
3452	b12 += b15;
3453	b15 = ((b15 << 34) | (b15 >> (64 - 34))) ^ b12;
3454
3455	b14 += b13;
3456	b13 = ((b13 << 41) | (b13 >> (64 - 41))) ^ b14;
3457
3458	b8 += b11;
3459	b11 = ((b11 << 59) | (b11 >> (64 - 59))) ^ b8;
3460
3461	b10 += b9;
3462	b9 = ((b9 << 17) | (b9 >> (64 - 17))) ^ b10;
3463
3464	b0 += b15;
3465	b15 = ((b15 << 5) | (b15 >> (64 - 5))) ^ b0;
3466
3467	b2 += b11;
3468	b11 = ((b11 << 20) | (b11 >> (64 - 20))) ^ b2;
3469
3470	b6 += b13;
3471	b13 = ((b13 << 48) | (b13 >> (64 - 48))) ^ b6;
3472
3473	b4 += b9;
3474	b9 = ((b9 << 41) | (b9 >> (64 - 41))) ^ b4;
3475
3476	b14 += b1;
3477	b1 = ((b1 << 47) | (b1 >> (64 - 47))) ^ b14;
3478
3479	b8 += b5;
3480	b5 = ((b5 << 28) | (b5 >> (64 - 28))) ^ b8;
3481
3482	b10 += b3;
3483	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b10;
3484
3485	b12 += b7;
3486	b7 = ((b7 << 25) | (b7 >> (64 - 25))) ^ b12;
3487
3488	b1 += k2;
3489	b0 += b1 + k1;
3490	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b0;
3491
3492	b3 += k4;
3493	b2 += b3 + k3;
3494	b3 = ((b3 << 9) | (b3 >> (64 - 9))) ^ b2;
3495
3496	b5 += k6;
3497	b4 += b5 + k5;
3498	b5 = ((b5 << 37) | (b5 >> (64 - 37))) ^ b4;
3499
3500	b7 += k8;
3501	b6 += b7 + k7;
3502	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b6;
3503
3504	b9 += k10;
3505	b8 += b9 + k9;
3506	b9 = ((b9 << 12) | (b9 >> (64 - 12))) ^ b8;
3507
3508	b11 += k12;
3509	b10 += b11 + k11;
3510	b11 = ((b11 << 47) | (b11 >> (64 - 47))) ^ b10;
3511
3512	b13 += k14 + t1;
3513	b12 += b13 + k13;
3514	b13 = ((b13 << 44) | (b13 >> (64 - 44))) ^ b12;
3515
3516	b15 += k16 + 1;
3517	b14 += b15 + k15 + t2;
3518	b15 = ((b15 << 30) | (b15 >> (64 - 30))) ^ b14;
3519
3520	b0 += b9;
3521	b9 = ((b9 << 16) | (b9 >> (64 - 16))) ^ b0;
3522
3523	b2 += b13;
3524	b13 = ((b13 << 34) | (b13 >> (64 - 34))) ^ b2;
3525
3526	b6 += b11;
3527	b11 = ((b11 << 56) | (b11 >> (64 - 56))) ^ b6;
3528
3529	b4 += b15;
3530	b15 = ((b15 << 51) | (b15 >> (64 - 51))) ^ b4;
3531
3532	b10 += b7;
3533	b7 = ((b7 << 4) | (b7 >> (64 - 4))) ^ b10;
3534
3535	b12 += b3;
3536	b3 = ((b3 << 53) | (b3 >> (64 - 53))) ^ b12;
3537
3538	b14 += b5;
3539	b5 = ((b5 << 42) | (b5 >> (64 - 42))) ^ b14;
3540
3541	b8 += b1;
3542	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b8;
3543
3544	b0 += b7;
3545	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b0;
3546
3547	b2 += b5;
3548	b5 = ((b5 << 44) | (b5 >> (64 - 44))) ^ b2;
3549
3550	b4 += b3;
3551	b3 = ((b3 << 47) | (b3 >> (64 - 47))) ^ b4;
3552
3553	b6 += b1;
3554	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b6;
3555
3556	b12 += b15;
3557	b15 = ((b15 << 19) | (b15 >> (64 - 19))) ^ b12;
3558
3559	b14 += b13;
3560	b13 = ((b13 << 42) | (b13 >> (64 - 42))) ^ b14;
3561
3562	b8 += b11;
3563	b11 = ((b11 << 44) | (b11 >> (64 - 44))) ^ b8;
3564
3565	b10 += b9;
3566	b9 = ((b9 << 25) | (b9 >> (64 - 25))) ^ b10;
3567
3568	b0 += b15;
3569	b15 = ((b15 << 9) | (b15 >> (64 - 9))) ^ b0;
3570
3571	b2 += b11;
3572	b11 = ((b11 << 48) | (b11 >> (64 - 48))) ^ b2;
3573
3574	b6 += b13;
3575	b13 = ((b13 << 35) | (b13 >> (64 - 35))) ^ b6;
3576
3577	b4 += b9;
3578	b9 = ((b9 << 52) | (b9 >> (64 - 52))) ^ b4;
3579
3580	b14 += b1;
3581	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b14;
3582
3583	b8 += b5;
3584	b5 = ((b5 << 31) | (b5 >> (64 - 31))) ^ b8;
3585
3586	b10 += b3;
3587	b3 = ((b3 << 37) | (b3 >> (64 - 37))) ^ b10;
3588
3589	b12 += b7;
3590	b7 = ((b7 << 20) | (b7 >> (64 - 20))) ^ b12;
3591
3592	b1 += k3;
3593	b0 += b1 + k2;
3594	b1 = ((b1 << 24) | (b1 >> (64 - 24))) ^ b0;
3595
3596	b3 += k5;
3597	b2 += b3 + k4;
3598	b3 = ((b3 << 13) | (b3 >> (64 - 13))) ^ b2;
3599
3600	b5 += k7;
3601	b4 += b5 + k6;
3602	b5 = ((b5 << 8) | (b5 >> (64 - 8))) ^ b4;
3603
3604	b7 += k9;
3605	b6 += b7 + k8;
3606	b7 = ((b7 << 47) | (b7 >> (64 - 47))) ^ b6;
3607
3608	b9 += k11;
3609	b8 += b9 + k10;
3610	b9 = ((b9 << 8) | (b9 >> (64 - 8))) ^ b8;
3611
3612	b11 += k13;
3613	b10 += b11 + k12;
3614	b11 = ((b11 << 17) | (b11 >> (64 - 17))) ^ b10;
3615
3616	b13 += k15 + t2;
3617	b12 += b13 + k14;
3618	b13 = ((b13 << 22) | (b13 >> (64 - 22))) ^ b12;
3619
3620	b15 += k0 + 2;
3621	b14 += b15 + k16 + t0;
3622	b15 = ((b15 << 37) | (b15 >> (64 - 37))) ^ b14;
3623
3624	b0 += b9;
3625	b9 = ((b9 << 38) | (b9 >> (64 - 38))) ^ b0;
3626
3627	b2 += b13;
3628	b13 = ((b13 << 19) | (b13 >> (64 - 19))) ^ b2;
3629
3630	b6 += b11;
3631	b11 = ((b11 << 10) | (b11 >> (64 - 10))) ^ b6;
3632
3633	b4 += b15;
3634	b15 = ((b15 << 55) | (b15 >> (64 - 55))) ^ b4;
3635
3636	b10 += b7;
3637	b7 = ((b7 << 49) | (b7 >> (64 - 49))) ^ b10;
3638
3639	b12 += b3;
3640	b3 = ((b3 << 18) | (b3 >> (64 - 18))) ^ b12;
3641
3642	b14 += b5;
3643	b5 = ((b5 << 23) | (b5 >> (64 - 23))) ^ b14;
3644
3645	b8 += b1;
3646	b1 = ((b1 << 52) | (b1 >> (64 - 52))) ^ b8;
3647
3648	b0 += b7;
3649	b7 = ((b7 << 33) | (b7 >> (64 - 33))) ^ b0;
3650
3651	b2 += b5;
3652	b5 = ((b5 << 4) | (b5 >> (64 - 4))) ^ b2;
3653
3654	b4 += b3;
3655	b3 = ((b3 << 51) | (b3 >> (64 - 51))) ^ b4;
3656
3657	b6 += b1;
3658	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b6;
3659
3660	b12 += b15;
3661	b15 = ((b15 << 34) | (b15 >> (64 - 34))) ^ b12;
3662
3663	b14 += b13;
3664	b13 = ((b13 << 41) | (b13 >> (64 - 41))) ^ b14;
3665
3666	b8 += b11;
3667	b11 = ((b11 << 59) | (b11 >> (64 - 59))) ^ b8;
3668
3669	b10 += b9;
3670	b9 = ((b9 << 17) | (b9 >> (64 - 17))) ^ b10;
3671
3672	b0 += b15;
3673	b15 = ((b15 << 5) | (b15 >> (64 - 5))) ^ b0;
3674
3675	b2 += b11;
3676	b11 = ((b11 << 20) | (b11 >> (64 - 20))) ^ b2;
3677
3678	b6 += b13;
3679	b13 = ((b13 << 48) | (b13 >> (64 - 48))) ^ b6;
3680
3681	b4 += b9;
3682	b9 = ((b9 << 41) | (b9 >> (64 - 41))) ^ b4;
3683
3684	b14 += b1;
3685	b1 = ((b1 << 47) | (b1 >> (64 - 47))) ^ b14;
3686
3687	b8 += b5;
3688	b5 = ((b5 << 28) | (b5 >> (64 - 28))) ^ b8;
3689
3690	b10 += b3;
3691	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b10;
3692
3693	b12 += b7;
3694	b7 = ((b7 << 25) | (b7 >> (64 - 25))) ^ b12;
3695
3696	b1 += k4;
3697	b0 += b1 + k3;
3698	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b0;
3699
3700	b3 += k6;
3701	b2 += b3 + k5;
3702	b3 = ((b3 << 9) | (b3 >> (64 - 9))) ^ b2;
3703
3704	b5 += k8;
3705	b4 += b5 + k7;
3706	b5 = ((b5 << 37) | (b5 >> (64 - 37))) ^ b4;
3707
3708	b7 += k10;
3709	b6 += b7 + k9;
3710	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b6;
3711
3712	b9 += k12;
3713	b8 += b9 + k11;
3714	b9 = ((b9 << 12) | (b9 >> (64 - 12))) ^ b8;
3715
3716	b11 += k14;
3717	b10 += b11 + k13;
3718	b11 = ((b11 << 47) | (b11 >> (64 - 47))) ^ b10;
3719
3720	b13 += k16 + t0;
3721	b12 += b13 + k15;
3722	b13 = ((b13 << 44) | (b13 >> (64 - 44))) ^ b12;
3723
3724	b15 += k1 + 3;
3725	b14 += b15 + k0 + t1;
3726	b15 = ((b15 << 30) | (b15 >> (64 - 30))) ^ b14;
3727
3728	b0 += b9;
3729	b9 = ((b9 << 16) | (b9 >> (64 - 16))) ^ b0;
3730
3731	b2 += b13;
3732	b13 = ((b13 << 34) | (b13 >> (64 - 34))) ^ b2;
3733
3734	b6 += b11;
3735	b11 = ((b11 << 56) | (b11 >> (64 - 56))) ^ b6;
3736
3737	b4 += b15;
3738	b15 = ((b15 << 51) | (b15 >> (64 - 51))) ^ b4;
3739
3740	b10 += b7;
3741	b7 = ((b7 << 4) | (b7 >> (64 - 4))) ^ b10;
3742
3743	b12 += b3;
3744	b3 = ((b3 << 53) | (b3 >> (64 - 53))) ^ b12;
3745
3746	b14 += b5;
3747	b5 = ((b5 << 42) | (b5 >> (64 - 42))) ^ b14;
3748
3749	b8 += b1;
3750	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b8;
3751
3752	b0 += b7;
3753	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b0;
3754
3755	b2 += b5;
3756	b5 = ((b5 << 44) | (b5 >> (64 - 44))) ^ b2;
3757
3758	b4 += b3;
3759	b3 = ((b3 << 47) | (b3 >> (64 - 47))) ^ b4;
3760
3761	b6 += b1;
3762	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b6;
3763
3764	b12 += b15;
3765	b15 = ((b15 << 19) | (b15 >> (64 - 19))) ^ b12;
3766
3767	b14 += b13;
3768	b13 = ((b13 << 42) | (b13 >> (64 - 42))) ^ b14;
3769
3770	b8 += b11;
3771	b11 = ((b11 << 44) | (b11 >> (64 - 44))) ^ b8;
3772
3773	b10 += b9;
3774	b9 = ((b9 << 25) | (b9 >> (64 - 25))) ^ b10;
3775
3776	b0 += b15;
3777	b15 = ((b15 << 9) | (b15 >> (64 - 9))) ^ b0;
3778
3779	b2 += b11;
3780	b11 = ((b11 << 48) | (b11 >> (64 - 48))) ^ b2;
3781
3782	b6 += b13;
3783	b13 = ((b13 << 35) | (b13 >> (64 - 35))) ^ b6;
3784
3785	b4 += b9;
3786	b9 = ((b9 << 52) | (b9 >> (64 - 52))) ^ b4;
3787
3788	b14 += b1;
3789	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b14;
3790
3791	b8 += b5;
3792	b5 = ((b5 << 31) | (b5 >> (64 - 31))) ^ b8;
3793
3794	b10 += b3;
3795	b3 = ((b3 << 37) | (b3 >> (64 - 37))) ^ b10;
3796
3797	b12 += b7;
3798	b7 = ((b7 << 20) | (b7 >> (64 - 20))) ^ b12;
3799
3800	b1 += k5;
3801	b0 += b1 + k4;
3802	b1 = ((b1 << 24) | (b1 >> (64 - 24))) ^ b0;
3803
3804	b3 += k7;
3805	b2 += b3 + k6;
3806	b3 = ((b3 << 13) | (b3 >> (64 - 13))) ^ b2;
3807
3808	b5 += k9;
3809	b4 += b5 + k8;
3810	b5 = ((b5 << 8) | (b5 >> (64 - 8))) ^ b4;
3811
3812	b7 += k11;
3813	b6 += b7 + k10;
3814	b7 = ((b7 << 47) | (b7 >> (64 - 47))) ^ b6;
3815
3816	b9 += k13;
3817	b8 += b9 + k12;
3818	b9 = ((b9 << 8) | (b9 >> (64 - 8))) ^ b8;
3819
3820	b11 += k15;
3821	b10 += b11 + k14;
3822	b11 = ((b11 << 17) | (b11 >> (64 - 17))) ^ b10;
3823
3824	b13 += k0 + t1;
3825	b12 += b13 + k16;
3826	b13 = ((b13 << 22) | (b13 >> (64 - 22))) ^ b12;
3827
3828	b15 += k2 + 4;
3829	b14 += b15 + k1 + t2;
3830	b15 = ((b15 << 37) | (b15 >> (64 - 37))) ^ b14;
3831
3832	b0 += b9;
3833	b9 = ((b9 << 38) | (b9 >> (64 - 38))) ^ b0;
3834
3835	b2 += b13;
3836	b13 = ((b13 << 19) | (b13 >> (64 - 19))) ^ b2;
3837
3838	b6 += b11;
3839	b11 = ((b11 << 10) | (b11 >> (64 - 10))) ^ b6;
3840
3841	b4 += b15;
3842	b15 = ((b15 << 55) | (b15 >> (64 - 55))) ^ b4;
3843
3844	b10 += b7;
3845	b7 = ((b7 << 49) | (b7 >> (64 - 49))) ^ b10;
3846
3847	b12 += b3;
3848	b3 = ((b3 << 18) | (b3 >> (64 - 18))) ^ b12;
3849
3850	b14 += b5;
3851	b5 = ((b5 << 23) | (b5 >> (64 - 23))) ^ b14;
3852
3853	b8 += b1;
3854	b1 = ((b1 << 52) | (b1 >> (64 - 52))) ^ b8;
3855
3856	b0 += b7;
3857	b7 = ((b7 << 33) | (b7 >> (64 - 33))) ^ b0;
3858
3859	b2 += b5;
3860	b5 = ((b5 << 4) | (b5 >> (64 - 4))) ^ b2;
3861
3862	b4 += b3;
3863	b3 = ((b3 << 51) | (b3 >> (64 - 51))) ^ b4;
3864
3865	b6 += b1;
3866	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b6;
3867
3868	b12 += b15;
3869	b15 = ((b15 << 34) | (b15 >> (64 - 34))) ^ b12;
3870
3871	b14 += b13;
3872	b13 = ((b13 << 41) | (b13 >> (64 - 41))) ^ b14;
3873
3874	b8 += b11;
3875	b11 = ((b11 << 59) | (b11 >> (64 - 59))) ^ b8;
3876
3877	b10 += b9;
3878	b9 = ((b9 << 17) | (b9 >> (64 - 17))) ^ b10;
3879
3880	b0 += b15;
3881	b15 = ((b15 << 5) | (b15 >> (64 - 5))) ^ b0;
3882
3883	b2 += b11;
3884	b11 = ((b11 << 20) | (b11 >> (64 - 20))) ^ b2;
3885
3886	b6 += b13;
3887	b13 = ((b13 << 48) | (b13 >> (64 - 48))) ^ b6;
3888
3889	b4 += b9;
3890	b9 = ((b9 << 41) | (b9 >> (64 - 41))) ^ b4;
3891
3892	b14 += b1;
3893	b1 = ((b1 << 47) | (b1 >> (64 - 47))) ^ b14;
3894
3895	b8 += b5;
3896	b5 = ((b5 << 28) | (b5 >> (64 - 28))) ^ b8;
3897
3898	b10 += b3;
3899	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b10;
3900
3901	b12 += b7;
3902	b7 = ((b7 << 25) | (b7 >> (64 - 25))) ^ b12;
3903
3904	b1 += k6;
3905	b0 += b1 + k5;
3906	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b0;
3907
3908	b3 += k8;
3909	b2 += b3 + k7;
3910	b3 = ((b3 << 9) | (b3 >> (64 - 9))) ^ b2;
3911
3912	b5 += k10;
3913	b4 += b5 + k9;
3914	b5 = ((b5 << 37) | (b5 >> (64 - 37))) ^ b4;
3915
3916	b7 += k12;
3917	b6 += b7 + k11;
3918	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b6;
3919
3920	b9 += k14;
3921	b8 += b9 + k13;
3922	b9 = ((b9 << 12) | (b9 >> (64 - 12))) ^ b8;
3923
3924	b11 += k16;
3925	b10 += b11 + k15;
3926	b11 = ((b11 << 47) | (b11 >> (64 - 47))) ^ b10;
3927
3928	b13 += k1 + t2;
3929	b12 += b13 + k0;
3930	b13 = ((b13 << 44) | (b13 >> (64 - 44))) ^ b12;
3931
3932	b15 += k3 + 5;
3933	b14 += b15 + k2 + t0;
3934	b15 = ((b15 << 30) | (b15 >> (64 - 30))) ^ b14;
3935
3936	b0 += b9;
3937	b9 = ((b9 << 16) | (b9 >> (64 - 16))) ^ b0;
3938
3939	b2 += b13;
3940	b13 = ((b13 << 34) | (b13 >> (64 - 34))) ^ b2;
3941
3942	b6 += b11;
3943	b11 = ((b11 << 56) | (b11 >> (64 - 56))) ^ b6;
3944
3945	b4 += b15;
3946	b15 = ((b15 << 51) | (b15 >> (64 - 51))) ^ b4;
3947
3948	b10 += b7;
3949	b7 = ((b7 << 4) | (b7 >> (64 - 4))) ^ b10;
3950
3951	b12 += b3;
3952	b3 = ((b3 << 53) | (b3 >> (64 - 53))) ^ b12;
3953
3954	b14 += b5;
3955	b5 = ((b5 << 42) | (b5 >> (64 - 42))) ^ b14;
3956
3957	b8 += b1;
3958	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b8;
3959
3960	b0 += b7;
3961	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b0;
3962
3963	b2 += b5;
3964	b5 = ((b5 << 44) | (b5 >> (64 - 44))) ^ b2;
3965
3966	b4 += b3;
3967	b3 = ((b3 << 47) | (b3 >> (64 - 47))) ^ b4;
3968
3969	b6 += b1;
3970	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b6;
3971
3972	b12 += b15;
3973	b15 = ((b15 << 19) | (b15 >> (64 - 19))) ^ b12;
3974
3975	b14 += b13;
3976	b13 = ((b13 << 42) | (b13 >> (64 - 42))) ^ b14;
3977
3978	b8 += b11;
3979	b11 = ((b11 << 44) | (b11 >> (64 - 44))) ^ b8;
3980
3981	b10 += b9;
3982	b9 = ((b9 << 25) | (b9 >> (64 - 25))) ^ b10;
3983
3984	b0 += b15;
3985	b15 = ((b15 << 9) | (b15 >> (64 - 9))) ^ b0;
3986
3987	b2 += b11;
3988	b11 = ((b11 << 48) | (b11 >> (64 - 48))) ^ b2;
3989
3990	b6 += b13;
3991	b13 = ((b13 << 35) | (b13 >> (64 - 35))) ^ b6;
3992
3993	b4 += b9;
3994	b9 = ((b9 << 52) | (b9 >> (64 - 52))) ^ b4;
3995
3996	b14 += b1;
3997	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b14;
3998
3999	b8 += b5;
4000	b5 = ((b5 << 31) | (b5 >> (64 - 31))) ^ b8;
4001
4002	b10 += b3;
4003	b3 = ((b3 << 37) | (b3 >> (64 - 37))) ^ b10;
4004
4005	b12 += b7;
4006	b7 = ((b7 << 20) | (b7 >> (64 - 20))) ^ b12;
4007
4008	b1 += k7;
4009	b0 += b1 + k6;
4010	b1 = ((b1 << 24) | (b1 >> (64 - 24))) ^ b0;
4011
4012	b3 += k9;
4013	b2 += b3 + k8;
4014	b3 = ((b3 << 13) | (b3 >> (64 - 13))) ^ b2;
4015
4016	b5 += k11;
4017	b4 += b5 + k10;
4018	b5 = ((b5 << 8) | (b5 >> (64 - 8))) ^ b4;
4019
4020	b7 += k13;
4021	b6 += b7 + k12;
4022	b7 = ((b7 << 47) | (b7 >> (64 - 47))) ^ b6;
4023
4024	b9 += k15;
4025	b8 += b9 + k14;
4026	b9 = ((b9 << 8) | (b9 >> (64 - 8))) ^ b8;
4027
4028	b11 += k0;
4029	b10 += b11 + k16;
4030	b11 = ((b11 << 17) | (b11 >> (64 - 17))) ^ b10;
4031
4032	b13 += k2 + t0;
4033	b12 += b13 + k1;
4034	b13 = ((b13 << 22) | (b13 >> (64 - 22))) ^ b12;
4035
4036	b15 += k4 + 6;
4037	b14 += b15 + k3 + t1;
4038	b15 = ((b15 << 37) | (b15 >> (64 - 37))) ^ b14;
4039
4040	b0 += b9;
4041	b9 = ((b9 << 38) | (b9 >> (64 - 38))) ^ b0;
4042
4043	b2 += b13;
4044	b13 = ((b13 << 19) | (b13 >> (64 - 19))) ^ b2;
4045
4046	b6 += b11;
4047	b11 = ((b11 << 10) | (b11 >> (64 - 10))) ^ b6;
4048
4049	b4 += b15;
4050	b15 = ((b15 << 55) | (b15 >> (64 - 55))) ^ b4;
4051
4052	b10 += b7;
4053	b7 = ((b7 << 49) | (b7 >> (64 - 49))) ^ b10;
4054
4055	b12 += b3;
4056	b3 = ((b3 << 18) | (b3 >> (64 - 18))) ^ b12;
4057
4058	b14 += b5;
4059	b5 = ((b5 << 23) | (b5 >> (64 - 23))) ^ b14;
4060
4061	b8 += b1;
4062	b1 = ((b1 << 52) | (b1 >> (64 - 52))) ^ b8;
4063
4064	b0 += b7;
4065	b7 = ((b7 << 33) | (b7 >> (64 - 33))) ^ b0;
4066
4067	b2 += b5;
4068	b5 = ((b5 << 4) | (b5 >> (64 - 4))) ^ b2;
4069
4070	b4 += b3;
4071	b3 = ((b3 << 51) | (b3 >> (64 - 51))) ^ b4;
4072
4073	b6 += b1;
4074	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b6;
4075
4076	b12 += b15;
4077	b15 = ((b15 << 34) | (b15 >> (64 - 34))) ^ b12;
4078
4079	b14 += b13;
4080	b13 = ((b13 << 41) | (b13 >> (64 - 41))) ^ b14;
4081
4082	b8 += b11;
4083	b11 = ((b11 << 59) | (b11 >> (64 - 59))) ^ b8;
4084
4085	b10 += b9;
4086	b9 = ((b9 << 17) | (b9 >> (64 - 17))) ^ b10;
4087
4088	b0 += b15;
4089	b15 = ((b15 << 5) | (b15 >> (64 - 5))) ^ b0;
4090
4091	b2 += b11;
4092	b11 = ((b11 << 20) | (b11 >> (64 - 20))) ^ b2;
4093
4094	b6 += b13;
4095	b13 = ((b13 << 48) | (b13 >> (64 - 48))) ^ b6;
4096
4097	b4 += b9;
4098	b9 = ((b9 << 41) | (b9 >> (64 - 41))) ^ b4;
4099
4100	b14 += b1;
4101	b1 = ((b1 << 47) | (b1 >> (64 - 47))) ^ b14;
4102
4103	b8 += b5;
4104	b5 = ((b5 << 28) | (b5 >> (64 - 28))) ^ b8;
4105
4106	b10 += b3;
4107	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b10;
4108
4109	b12 += b7;
4110	b7 = ((b7 << 25) | (b7 >> (64 - 25))) ^ b12;
4111
4112	b1 += k8;
4113	b0 += b1 + k7;
4114	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b0;
4115
4116	b3 += k10;
4117	b2 += b3 + k9;
4118	b3 = ((b3 << 9) | (b3 >> (64 - 9))) ^ b2;
4119
4120	b5 += k12;
4121	b4 += b5 + k11;
4122	b5 = ((b5 << 37) | (b5 >> (64 - 37))) ^ b4;
4123
4124	b7 += k14;
4125	b6 += b7 + k13;
4126	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b6;
4127
4128	b9 += k16;
4129	b8 += b9 + k15;
4130	b9 = ((b9 << 12) | (b9 >> (64 - 12))) ^ b8;
4131
4132	b11 += k1;
4133	b10 += b11 + k0;
4134	b11 = ((b11 << 47) | (b11 >> (64 - 47))) ^ b10;
4135
4136	b13 += k3 + t1;
4137	b12 += b13 + k2;
4138	b13 = ((b13 << 44) | (b13 >> (64 - 44))) ^ b12;
4139
4140	b15 += k5 + 7;
4141	b14 += b15 + k4 + t2;
4142	b15 = ((b15 << 30) | (b15 >> (64 - 30))) ^ b14;
4143
4144	b0 += b9;
4145	b9 = ((b9 << 16) | (b9 >> (64 - 16))) ^ b0;
4146
4147	b2 += b13;
4148	b13 = ((b13 << 34) | (b13 >> (64 - 34))) ^ b2;
4149
4150	b6 += b11;
4151	b11 = ((b11 << 56) | (b11 >> (64 - 56))) ^ b6;
4152
4153	b4 += b15;
4154	b15 = ((b15 << 51) | (b15 >> (64 - 51))) ^ b4;
4155
4156	b10 += b7;
4157	b7 = ((b7 << 4) | (b7 >> (64 - 4))) ^ b10;
4158
4159	b12 += b3;
4160	b3 = ((b3 << 53) | (b3 >> (64 - 53))) ^ b12;
4161
4162	b14 += b5;
4163	b5 = ((b5 << 42) | (b5 >> (64 - 42))) ^ b14;
4164
4165	b8 += b1;
4166	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b8;
4167
4168	b0 += b7;
4169	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b0;
4170
4171	b2 += b5;
4172	b5 = ((b5 << 44) | (b5 >> (64 - 44))) ^ b2;
4173
4174	b4 += b3;
4175	b3 = ((b3 << 47) | (b3 >> (64 - 47))) ^ b4;
4176
4177	b6 += b1;
4178	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b6;
4179
4180	b12 += b15;
4181	b15 = ((b15 << 19) | (b15 >> (64 - 19))) ^ b12;
4182
4183	b14 += b13;
4184	b13 = ((b13 << 42) | (b13 >> (64 - 42))) ^ b14;
4185
4186	b8 += b11;
4187	b11 = ((b11 << 44) | (b11 >> (64 - 44))) ^ b8;
4188
4189	b10 += b9;
4190	b9 = ((b9 << 25) | (b9 >> (64 - 25))) ^ b10;
4191
4192	b0 += b15;
4193	b15 = ((b15 << 9) | (b15 >> (64 - 9))) ^ b0;
4194
4195	b2 += b11;
4196	b11 = ((b11 << 48) | (b11 >> (64 - 48))) ^ b2;
4197
4198	b6 += b13;
4199	b13 = ((b13 << 35) | (b13 >> (64 - 35))) ^ b6;
4200
4201	b4 += b9;
4202	b9 = ((b9 << 52) | (b9 >> (64 - 52))) ^ b4;
4203
4204	b14 += b1;
4205	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b14;
4206
4207	b8 += b5;
4208	b5 = ((b5 << 31) | (b5 >> (64 - 31))) ^ b8;
4209
4210	b10 += b3;
4211	b3 = ((b3 << 37) | (b3 >> (64 - 37))) ^ b10;
4212
4213	b12 += b7;
4214	b7 = ((b7 << 20) | (b7 >> (64 - 20))) ^ b12;
4215
4216	b1 += k9;
4217	b0 += b1 + k8;
4218	b1 = ((b1 << 24) | (b1 >> (64 - 24))) ^ b0;
4219
4220	b3 += k11;
4221	b2 += b3 + k10;
4222	b3 = ((b3 << 13) | (b3 >> (64 - 13))) ^ b2;
4223
4224	b5 += k13;
4225	b4 += b5 + k12;
4226	b5 = ((b5 << 8) | (b5 >> (64 - 8))) ^ b4;
4227
4228	b7 += k15;
4229	b6 += b7 + k14;
4230	b7 = ((b7 << 47) | (b7 >> (64 - 47))) ^ b6;
4231
4232	b9 += k0;
4233	b8 += b9 + k16;
4234	b9 = ((b9 << 8) | (b9 >> (64 - 8))) ^ b8;
4235
4236	b11 += k2;
4237	b10 += b11 + k1;
4238	b11 = ((b11 << 17) | (b11 >> (64 - 17))) ^ b10;
4239
4240	b13 += k4 + t2;
4241	b12 += b13 + k3;
4242	b13 = ((b13 << 22) | (b13 >> (64 - 22))) ^ b12;
4243
4244	b15 += k6 + 8;
4245	b14 += b15 + k5 + t0;
4246	b15 = ((b15 << 37) | (b15 >> (64 - 37))) ^ b14;
4247
4248	b0 += b9;
4249	b9 = ((b9 << 38) | (b9 >> (64 - 38))) ^ b0;
4250
4251	b2 += b13;
4252	b13 = ((b13 << 19) | (b13 >> (64 - 19))) ^ b2;
4253
4254	b6 += b11;
4255	b11 = ((b11 << 10) | (b11 >> (64 - 10))) ^ b6;
4256
4257	b4 += b15;
4258	b15 = ((b15 << 55) | (b15 >> (64 - 55))) ^ b4;
4259
4260	b10 += b7;
4261	b7 = ((b7 << 49) | (b7 >> (64 - 49))) ^ b10;
4262
4263	b12 += b3;
4264	b3 = ((b3 << 18) | (b3 >> (64 - 18))) ^ b12;
4265
4266	b14 += b5;
4267	b5 = ((b5 << 23) | (b5 >> (64 - 23))) ^ b14;
4268
4269	b8 += b1;
4270	b1 = ((b1 << 52) | (b1 >> (64 - 52))) ^ b8;
4271
4272	b0 += b7;
4273	b7 = ((b7 << 33) | (b7 >> (64 - 33))) ^ b0;
4274
4275	b2 += b5;
4276	b5 = ((b5 << 4) | (b5 >> (64 - 4))) ^ b2;
4277
4278	b4 += b3;
4279	b3 = ((b3 << 51) | (b3 >> (64 - 51))) ^ b4;
4280
4281	b6 += b1;
4282	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b6;
4283
4284	b12 += b15;
4285	b15 = ((b15 << 34) | (b15 >> (64 - 34))) ^ b12;
4286
4287	b14 += b13;
4288	b13 = ((b13 << 41) | (b13 >> (64 - 41))) ^ b14;
4289
4290	b8 += b11;
4291	b11 = ((b11 << 59) | (b11 >> (64 - 59))) ^ b8;
4292
4293	b10 += b9;
4294	b9 = ((b9 << 17) | (b9 >> (64 - 17))) ^ b10;
4295
4296	b0 += b15;
4297	b15 = ((b15 << 5) | (b15 >> (64 - 5))) ^ b0;
4298
4299	b2 += b11;
4300	b11 = ((b11 << 20) | (b11 >> (64 - 20))) ^ b2;
4301
4302	b6 += b13;
4303	b13 = ((b13 << 48) | (b13 >> (64 - 48))) ^ b6;
4304
4305	b4 += b9;
4306	b9 = ((b9 << 41) | (b9 >> (64 - 41))) ^ b4;
4307
4308	b14 += b1;
4309	b1 = ((b1 << 47) | (b1 >> (64 - 47))) ^ b14;
4310
4311	b8 += b5;
4312	b5 = ((b5 << 28) | (b5 >> (64 - 28))) ^ b8;
4313
4314	b10 += b3;
4315	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b10;
4316
4317	b12 += b7;
4318	b7 = ((b7 << 25) | (b7 >> (64 - 25))) ^ b12;
4319
4320	b1 += k10;
4321	b0 += b1 + k9;
4322	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b0;
4323
4324	b3 += k12;
4325	b2 += b3 + k11;
4326	b3 = ((b3 << 9) | (b3 >> (64 - 9))) ^ b2;
4327
4328	b5 += k14;
4329	b4 += b5 + k13;
4330	b5 = ((b5 << 37) | (b5 >> (64 - 37))) ^ b4;
4331
4332	b7 += k16;
4333	b6 += b7 + k15;
4334	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b6;
4335
4336	b9 += k1;
4337	b8 += b9 + k0;
4338	b9 = ((b9 << 12) | (b9 >> (64 - 12))) ^ b8;
4339
4340	b11 += k3;
4341	b10 += b11 + k2;
4342	b11 = ((b11 << 47) | (b11 >> (64 - 47))) ^ b10;
4343
4344	b13 += k5 + t0;
4345	b12 += b13 + k4;
4346	b13 = ((b13 << 44) | (b13 >> (64 - 44))) ^ b12;
4347
4348	b15 += k7 + 9;
4349	b14 += b15 + k6 + t1;
4350	b15 = ((b15 << 30) | (b15 >> (64 - 30))) ^ b14;
4351
4352	b0 += b9;
4353	b9 = ((b9 << 16) | (b9 >> (64 - 16))) ^ b0;
4354
4355	b2 += b13;
4356	b13 = ((b13 << 34) | (b13 >> (64 - 34))) ^ b2;
4357
4358	b6 += b11;
4359	b11 = ((b11 << 56) | (b11 >> (64 - 56))) ^ b6;
4360
4361	b4 += b15;
4362	b15 = ((b15 << 51) | (b15 >> (64 - 51))) ^ b4;
4363
4364	b10 += b7;
4365	b7 = ((b7 << 4) | (b7 >> (64 - 4))) ^ b10;
4366
4367	b12 += b3;
4368	b3 = ((b3 << 53) | (b3 >> (64 - 53))) ^ b12;
4369
4370	b14 += b5;
4371	b5 = ((b5 << 42) | (b5 >> (64 - 42))) ^ b14;
4372
4373	b8 += b1;
4374	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b8;
4375
4376	b0 += b7;
4377	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b0;
4378
4379	b2 += b5;
4380	b5 = ((b5 << 44) | (b5 >> (64 - 44))) ^ b2;
4381
4382	b4 += b3;
4383	b3 = ((b3 << 47) | (b3 >> (64 - 47))) ^ b4;
4384
4385	b6 += b1;
4386	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b6;
4387
4388	b12 += b15;
4389	b15 = ((b15 << 19) | (b15 >> (64 - 19))) ^ b12;
4390
4391	b14 += b13;
4392	b13 = ((b13 << 42) | (b13 >> (64 - 42))) ^ b14;
4393
4394	b8 += b11;
4395	b11 = ((b11 << 44) | (b11 >> (64 - 44))) ^ b8;
4396
4397	b10 += b9;
4398	b9 = ((b9 << 25) | (b9 >> (64 - 25))) ^ b10;
4399
4400	b0 += b15;
4401	b15 = ((b15 << 9) | (b15 >> (64 - 9))) ^ b0;
4402
4403	b2 += b11;
4404	b11 = ((b11 << 48) | (b11 >> (64 - 48))) ^ b2;
4405
4406	b6 += b13;
4407	b13 = ((b13 << 35) | (b13 >> (64 - 35))) ^ b6;
4408
4409	b4 += b9;
4410	b9 = ((b9 << 52) | (b9 >> (64 - 52))) ^ b4;
4411
4412	b14 += b1;
4413	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b14;
4414
4415	b8 += b5;
4416	b5 = ((b5 << 31) | (b5 >> (64 - 31))) ^ b8;
4417
4418	b10 += b3;
4419	b3 = ((b3 << 37) | (b3 >> (64 - 37))) ^ b10;
4420
4421	b12 += b7;
4422	b7 = ((b7 << 20) | (b7 >> (64 - 20))) ^ b12;
4423
4424	b1 += k11;
4425	b0 += b1 + k10;
4426	b1 = ((b1 << 24) | (b1 >> (64 - 24))) ^ b0;
4427
4428	b3 += k13;
4429	b2 += b3 + k12;
4430	b3 = ((b3 << 13) | (b3 >> (64 - 13))) ^ b2;
4431
4432	b5 += k15;
4433	b4 += b5 + k14;
4434	b5 = ((b5 << 8) | (b5 >> (64 - 8))) ^ b4;
4435
4436	b7 += k0;
4437	b6 += b7 + k16;
4438	b7 = ((b7 << 47) | (b7 >> (64 - 47))) ^ b6;
4439
4440	b9 += k2;
4441	b8 += b9 + k1;
4442	b9 = ((b9 << 8) | (b9 >> (64 - 8))) ^ b8;
4443
4444	b11 += k4;
4445	b10 += b11 + k3;
4446	b11 = ((b11 << 17) | (b11 >> (64 - 17))) ^ b10;
4447
4448	b13 += k6 + t1;
4449	b12 += b13 + k5;
4450	b13 = ((b13 << 22) | (b13 >> (64 - 22))) ^ b12;
4451
4452	b15 += k8 + 10;
4453	b14 += b15 + k7 + t2;
4454	b15 = ((b15 << 37) | (b15 >> (64 - 37))) ^ b14;
4455
4456	b0 += b9;
4457	b9 = ((b9 << 38) | (b9 >> (64 - 38))) ^ b0;
4458
4459	b2 += b13;
4460	b13 = ((b13 << 19) | (b13 >> (64 - 19))) ^ b2;
4461
4462	b6 += b11;
4463	b11 = ((b11 << 10) | (b11 >> (64 - 10))) ^ b6;
4464
4465	b4 += b15;
4466	b15 = ((b15 << 55) | (b15 >> (64 - 55))) ^ b4;
4467
4468	b10 += b7;
4469	b7 = ((b7 << 49) | (b7 >> (64 - 49))) ^ b10;
4470
4471	b12 += b3;
4472	b3 = ((b3 << 18) | (b3 >> (64 - 18))) ^ b12;
4473
4474	b14 += b5;
4475	b5 = ((b5 << 23) | (b5 >> (64 - 23))) ^ b14;
4476
4477	b8 += b1;
4478	b1 = ((b1 << 52) | (b1 >> (64 - 52))) ^ b8;
4479
4480	b0 += b7;
4481	b7 = ((b7 << 33) | (b7 >> (64 - 33))) ^ b0;
4482
4483	b2 += b5;
4484	b5 = ((b5 << 4) | (b5 >> (64 - 4))) ^ b2;
4485
4486	b4 += b3;
4487	b3 = ((b3 << 51) | (b3 >> (64 - 51))) ^ b4;
4488
4489	b6 += b1;
4490	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b6;
4491
4492	b12 += b15;
4493	b15 = ((b15 << 34) | (b15 >> (64 - 34))) ^ b12;
4494
4495	b14 += b13;
4496	b13 = ((b13 << 41) | (b13 >> (64 - 41))) ^ b14;
4497
4498	b8 += b11;
4499	b11 = ((b11 << 59) | (b11 >> (64 - 59))) ^ b8;
4500
4501	b10 += b9;
4502	b9 = ((b9 << 17) | (b9 >> (64 - 17))) ^ b10;
4503
4504	b0 += b15;
4505	b15 = ((b15 << 5) | (b15 >> (64 - 5))) ^ b0;
4506
4507	b2 += b11;
4508	b11 = ((b11 << 20) | (b11 >> (64 - 20))) ^ b2;
4509
4510	b6 += b13;
4511	b13 = ((b13 << 48) | (b13 >> (64 - 48))) ^ b6;
4512
4513	b4 += b9;
4514	b9 = ((b9 << 41) | (b9 >> (64 - 41))) ^ b4;
4515
4516	b14 += b1;
4517	b1 = ((b1 << 47) | (b1 >> (64 - 47))) ^ b14;
4518
4519	b8 += b5;
4520	b5 = ((b5 << 28) | (b5 >> (64 - 28))) ^ b8;
4521
4522	b10 += b3;
4523	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b10;
4524
4525	b12 += b7;
4526	b7 = ((b7 << 25) | (b7 >> (64 - 25))) ^ b12;
4527
4528	b1 += k12;
4529	b0 += b1 + k11;
4530	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b0;
4531
4532	b3 += k14;
4533	b2 += b3 + k13;
4534	b3 = ((b3 << 9) | (b3 >> (64 - 9))) ^ b2;
4535
4536	b5 += k16;
4537	b4 += b5 + k15;
4538	b5 = ((b5 << 37) | (b5 >> (64 - 37))) ^ b4;
4539
4540	b7 += k1;
4541	b6 += b7 + k0;
4542	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b6;
4543
4544	b9 += k3;
4545	b8 += b9 + k2;
4546	b9 = ((b9 << 12) | (b9 >> (64 - 12))) ^ b8;
4547
4548	b11 += k5;
4549	b10 += b11 + k4;
4550	b11 = ((b11 << 47) | (b11 >> (64 - 47))) ^ b10;
4551
4552	b13 += k7 + t2;
4553	b12 += b13 + k6;
4554	b13 = ((b13 << 44) | (b13 >> (64 - 44))) ^ b12;
4555
4556	b15 += k9 + 11;
4557	b14 += b15 + k8 + t0;
4558	b15 = ((b15 << 30) | (b15 >> (64 - 30))) ^ b14;
4559
4560	b0 += b9;
4561	b9 = ((b9 << 16) | (b9 >> (64 - 16))) ^ b0;
4562
4563	b2 += b13;
4564	b13 = ((b13 << 34) | (b13 >> (64 - 34))) ^ b2;
4565
4566	b6 += b11;
4567	b11 = ((b11 << 56) | (b11 >> (64 - 56))) ^ b6;
4568
4569	b4 += b15;
4570	b15 = ((b15 << 51) | (b15 >> (64 - 51))) ^ b4;
4571
4572	b10 += b7;
4573	b7 = ((b7 << 4) | (b7 >> (64 - 4))) ^ b10;
4574
4575	b12 += b3;
4576	b3 = ((b3 << 53) | (b3 >> (64 - 53))) ^ b12;
4577
4578	b14 += b5;
4579	b5 = ((b5 << 42) | (b5 >> (64 - 42))) ^ b14;
4580
4581	b8 += b1;
4582	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b8;
4583
4584	b0 += b7;
4585	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b0;
4586
4587	b2 += b5;
4588	b5 = ((b5 << 44) | (b5 >> (64 - 44))) ^ b2;
4589
4590	b4 += b3;
4591	b3 = ((b3 << 47) | (b3 >> (64 - 47))) ^ b4;
4592
4593	b6 += b1;
4594	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b6;
4595
4596	b12 += b15;
4597	b15 = ((b15 << 19) | (b15 >> (64 - 19))) ^ b12;
4598
4599	b14 += b13;
4600	b13 = ((b13 << 42) | (b13 >> (64 - 42))) ^ b14;
4601
4602	b8 += b11;
4603	b11 = ((b11 << 44) | (b11 >> (64 - 44))) ^ b8;
4604
4605	b10 += b9;
4606	b9 = ((b9 << 25) | (b9 >> (64 - 25))) ^ b10;
4607
4608	b0 += b15;
4609	b15 = ((b15 << 9) | (b15 >> (64 - 9))) ^ b0;
4610
4611	b2 += b11;
4612	b11 = ((b11 << 48) | (b11 >> (64 - 48))) ^ b2;
4613
4614	b6 += b13;
4615	b13 = ((b13 << 35) | (b13 >> (64 - 35))) ^ b6;
4616
4617	b4 += b9;
4618	b9 = ((b9 << 52) | (b9 >> (64 - 52))) ^ b4;
4619
4620	b14 += b1;
4621	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b14;
4622
4623	b8 += b5;
4624	b5 = ((b5 << 31) | (b5 >> (64 - 31))) ^ b8;
4625
4626	b10 += b3;
4627	b3 = ((b3 << 37) | (b3 >> (64 - 37))) ^ b10;
4628
4629	b12 += b7;
4630	b7 = ((b7 << 20) | (b7 >> (64 - 20))) ^ b12;
4631
4632	b1 += k13;
4633	b0 += b1 + k12;
4634	b1 = ((b1 << 24) | (b1 >> (64 - 24))) ^ b0;
4635
4636	b3 += k15;
4637	b2 += b3 + k14;
4638	b3 = ((b3 << 13) | (b3 >> (64 - 13))) ^ b2;
4639
4640	b5 += k0;
4641	b4 += b5 + k16;
4642	b5 = ((b5 << 8) | (b5 >> (64 - 8))) ^ b4;
4643
4644	b7 += k2;
4645	b6 += b7 + k1;
4646	b7 = ((b7 << 47) | (b7 >> (64 - 47))) ^ b6;
4647
4648	b9 += k4;
4649	b8 += b9 + k3;
4650	b9 = ((b9 << 8) | (b9 >> (64 - 8))) ^ b8;
4651
4652	b11 += k6;
4653	b10 += b11 + k5;
4654	b11 = ((b11 << 17) | (b11 >> (64 - 17))) ^ b10;
4655
4656	b13 += k8 + t0;
4657	b12 += b13 + k7;
4658	b13 = ((b13 << 22) | (b13 >> (64 - 22))) ^ b12;
4659
4660	b15 += k10 + 12;
4661	b14 += b15 + k9 + t1;
4662	b15 = ((b15 << 37) | (b15 >> (64 - 37))) ^ b14;
4663
4664	b0 += b9;
4665	b9 = ((b9 << 38) | (b9 >> (64 - 38))) ^ b0;
4666
4667	b2 += b13;
4668	b13 = ((b13 << 19) | (b13 >> (64 - 19))) ^ b2;
4669
4670	b6 += b11;
4671	b11 = ((b11 << 10) | (b11 >> (64 - 10))) ^ b6;
4672
4673	b4 += b15;
4674	b15 = ((b15 << 55) | (b15 >> (64 - 55))) ^ b4;
4675
4676	b10 += b7;
4677	b7 = ((b7 << 49) | (b7 >> (64 - 49))) ^ b10;
4678
4679	b12 += b3;
4680	b3 = ((b3 << 18) | (b3 >> (64 - 18))) ^ b12;
4681
4682	b14 += b5;
4683	b5 = ((b5 << 23) | (b5 >> (64 - 23))) ^ b14;
4684
4685	b8 += b1;
4686	b1 = ((b1 << 52) | (b1 >> (64 - 52))) ^ b8;
4687
4688	b0 += b7;
4689	b7 = ((b7 << 33) | (b7 >> (64 - 33))) ^ b0;
4690
4691	b2 += b5;
4692	b5 = ((b5 << 4) | (b5 >> (64 - 4))) ^ b2;
4693
4694	b4 += b3;
4695	b3 = ((b3 << 51) | (b3 >> (64 - 51))) ^ b4;
4696
4697	b6 += b1;
4698	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b6;
4699
4700	b12 += b15;
4701	b15 = ((b15 << 34) | (b15 >> (64 - 34))) ^ b12;
4702
4703	b14 += b13;
4704	b13 = ((b13 << 41) | (b13 >> (64 - 41))) ^ b14;
4705
4706	b8 += b11;
4707	b11 = ((b11 << 59) | (b11 >> (64 - 59))) ^ b8;
4708
4709	b10 += b9;
4710	b9 = ((b9 << 17) | (b9 >> (64 - 17))) ^ b10;
4711
4712	b0 += b15;
4713	b15 = ((b15 << 5) | (b15 >> (64 - 5))) ^ b0;
4714
4715	b2 += b11;
4716	b11 = ((b11 << 20) | (b11 >> (64 - 20))) ^ b2;
4717
4718	b6 += b13;
4719	b13 = ((b13 << 48) | (b13 >> (64 - 48))) ^ b6;
4720
4721	b4 += b9;
4722	b9 = ((b9 << 41) | (b9 >> (64 - 41))) ^ b4;
4723
4724	b14 += b1;
4725	b1 = ((b1 << 47) | (b1 >> (64 - 47))) ^ b14;
4726
4727	b8 += b5;
4728	b5 = ((b5 << 28) | (b5 >> (64 - 28))) ^ b8;
4729
4730	b10 += b3;
4731	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b10;
4732
4733	b12 += b7;
4734	b7 = ((b7 << 25) | (b7 >> (64 - 25))) ^ b12;
4735
4736	b1 += k14;
4737	b0 += b1 + k13;
4738	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b0;
4739
4740	b3 += k16;
4741	b2 += b3 + k15;
4742	b3 = ((b3 << 9) | (b3 >> (64 - 9))) ^ b2;
4743
4744	b5 += k1;
4745	b4 += b5 + k0;
4746	b5 = ((b5 << 37) | (b5 >> (64 - 37))) ^ b4;
4747
4748	b7 += k3;
4749	b6 += b7 + k2;
4750	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b6;
4751
4752	b9 += k5;
4753	b8 += b9 + k4;
4754	b9 = ((b9 << 12) | (b9 >> (64 - 12))) ^ b8;
4755
4756	b11 += k7;
4757	b10 += b11 + k6;
4758	b11 = ((b11 << 47) | (b11 >> (64 - 47))) ^ b10;
4759
4760	b13 += k9 + t1;
4761	b12 += b13 + k8;
4762	b13 = ((b13 << 44) | (b13 >> (64 - 44))) ^ b12;
4763
4764	b15 += k11 + 13;
4765	b14 += b15 + k10 + t2;
4766	b15 = ((b15 << 30) | (b15 >> (64 - 30))) ^ b14;
4767
4768	b0 += b9;
4769	b9 = ((b9 << 16) | (b9 >> (64 - 16))) ^ b0;
4770
4771	b2 += b13;
4772	b13 = ((b13 << 34) | (b13 >> (64 - 34))) ^ b2;
4773
4774	b6 += b11;
4775	b11 = ((b11 << 56) | (b11 >> (64 - 56))) ^ b6;
4776
4777	b4 += b15;
4778	b15 = ((b15 << 51) | (b15 >> (64 - 51))) ^ b4;
4779
4780	b10 += b7;
4781	b7 = ((b7 << 4) | (b7 >> (64 - 4))) ^ b10;
4782
4783	b12 += b3;
4784	b3 = ((b3 << 53) | (b3 >> (64 - 53))) ^ b12;
4785
4786	b14 += b5;
4787	b5 = ((b5 << 42) | (b5 >> (64 - 42))) ^ b14;
4788
4789	b8 += b1;
4790	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b8;
4791
4792	b0 += b7;
4793	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b0;
4794
4795	b2 += b5;
4796	b5 = ((b5 << 44) | (b5 >> (64 - 44))) ^ b2;
4797
4798	b4 += b3;
4799	b3 = ((b3 << 47) | (b3 >> (64 - 47))) ^ b4;
4800
4801	b6 += b1;
4802	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b6;
4803
4804	b12 += b15;
4805	b15 = ((b15 << 19) | (b15 >> (64 - 19))) ^ b12;
4806
4807	b14 += b13;
4808	b13 = ((b13 << 42) | (b13 >> (64 - 42))) ^ b14;
4809
4810	b8 += b11;
4811	b11 = ((b11 << 44) | (b11 >> (64 - 44))) ^ b8;
4812
4813	b10 += b9;
4814	b9 = ((b9 << 25) | (b9 >> (64 - 25))) ^ b10;
4815
4816	b0 += b15;
4817	b15 = ((b15 << 9) | (b15 >> (64 - 9))) ^ b0;
4818
4819	b2 += b11;
4820	b11 = ((b11 << 48) | (b11 >> (64 - 48))) ^ b2;
4821
4822	b6 += b13;
4823	b13 = ((b13 << 35) | (b13 >> (64 - 35))) ^ b6;
4824
4825	b4 += b9;
4826	b9 = ((b9 << 52) | (b9 >> (64 - 52))) ^ b4;
4827
4828	b14 += b1;
4829	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b14;
4830
4831	b8 += b5;
4832	b5 = ((b5 << 31) | (b5 >> (64 - 31))) ^ b8;
4833
4834	b10 += b3;
4835	b3 = ((b3 << 37) | (b3 >> (64 - 37))) ^ b10;
4836
4837	b12 += b7;
4838	b7 = ((b7 << 20) | (b7 >> (64 - 20))) ^ b12;
4839
4840	b1 += k15;
4841	b0 += b1 + k14;
4842	b1 = ((b1 << 24) | (b1 >> (64 - 24))) ^ b0;
4843
4844	b3 += k0;
4845	b2 += b3 + k16;
4846	b3 = ((b3 << 13) | (b3 >> (64 - 13))) ^ b2;
4847
4848	b5 += k2;
4849	b4 += b5 + k1;
4850	b5 = ((b5 << 8) | (b5 >> (64 - 8))) ^ b4;
4851
4852	b7 += k4;
4853	b6 += b7 + k3;
4854	b7 = ((b7 << 47) | (b7 >> (64 - 47))) ^ b6;
4855
4856	b9 += k6;
4857	b8 += b9 + k5;
4858	b9 = ((b9 << 8) | (b9 >> (64 - 8))) ^ b8;
4859
4860	b11 += k8;
4861	b10 += b11 + k7;
4862	b11 = ((b11 << 17) | (b11 >> (64 - 17))) ^ b10;
4863
4864	b13 += k10 + t2;
4865	b12 += b13 + k9;
4866	b13 = ((b13 << 22) | (b13 >> (64 - 22))) ^ b12;
4867
4868	b15 += k12 + 14;
4869	b14 += b15 + k11 + t0;
4870	b15 = ((b15 << 37) | (b15 >> (64 - 37))) ^ b14;
4871
4872	b0 += b9;
4873	b9 = ((b9 << 38) | (b9 >> (64 - 38))) ^ b0;
4874
4875	b2 += b13;
4876	b13 = ((b13 << 19) | (b13 >> (64 - 19))) ^ b2;
4877
4878	b6 += b11;
4879	b11 = ((b11 << 10) | (b11 >> (64 - 10))) ^ b6;
4880
4881	b4 += b15;
4882	b15 = ((b15 << 55) | (b15 >> (64 - 55))) ^ b4;
4883
4884	b10 += b7;
4885	b7 = ((b7 << 49) | (b7 >> (64 - 49))) ^ b10;
4886
4887	b12 += b3;
4888	b3 = ((b3 << 18) | (b3 >> (64 - 18))) ^ b12;
4889
4890	b14 += b5;
4891	b5 = ((b5 << 23) | (b5 >> (64 - 23))) ^ b14;
4892
4893	b8 += b1;
4894	b1 = ((b1 << 52) | (b1 >> (64 - 52))) ^ b8;
4895
4896	b0 += b7;
4897	b7 = ((b7 << 33) | (b7 >> (64 - 33))) ^ b0;
4898
4899	b2 += b5;
4900	b5 = ((b5 << 4) | (b5 >> (64 - 4))) ^ b2;
4901
4902	b4 += b3;
4903	b3 = ((b3 << 51) | (b3 >> (64 - 51))) ^ b4;
4904
4905	b6 += b1;
4906	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b6;
4907
4908	b12 += b15;
4909	b15 = ((b15 << 34) | (b15 >> (64 - 34))) ^ b12;
4910
4911	b14 += b13;
4912	b13 = ((b13 << 41) | (b13 >> (64 - 41))) ^ b14;
4913
4914	b8 += b11;
4915	b11 = ((b11 << 59) | (b11 >> (64 - 59))) ^ b8;
4916
4917	b10 += b9;
4918	b9 = ((b9 << 17) | (b9 >> (64 - 17))) ^ b10;
4919
4920	b0 += b15;
4921	b15 = ((b15 << 5) | (b15 >> (64 - 5))) ^ b0;
4922
4923	b2 += b11;
4924	b11 = ((b11 << 20) | (b11 >> (64 - 20))) ^ b2;
4925
4926	b6 += b13;
4927	b13 = ((b13 << 48) | (b13 >> (64 - 48))) ^ b6;
4928
4929	b4 += b9;
4930	b9 = ((b9 << 41) | (b9 >> (64 - 41))) ^ b4;
4931
4932	b14 += b1;
4933	b1 = ((b1 << 47) | (b1 >> (64 - 47))) ^ b14;
4934
4935	b8 += b5;
4936	b5 = ((b5 << 28) | (b5 >> (64 - 28))) ^ b8;
4937
4938	b10 += b3;
4939	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b10;
4940
4941	b12 += b7;
4942	b7 = ((b7 << 25) | (b7 >> (64 - 25))) ^ b12;
4943
4944	b1 += k16;
4945	b0 += b1 + k15;
4946	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b0;
4947
4948	b3 += k1;
4949	b2 += b3 + k0;
4950	b3 = ((b3 << 9) | (b3 >> (64 - 9))) ^ b2;
4951
4952	b5 += k3;
4953	b4 += b5 + k2;
4954	b5 = ((b5 << 37) | (b5 >> (64 - 37))) ^ b4;
4955
4956	b7 += k5;
4957	b6 += b7 + k4;
4958	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b6;
4959
4960	b9 += k7;
4961	b8 += b9 + k6;
4962	b9 = ((b9 << 12) | (b9 >> (64 - 12))) ^ b8;
4963
4964	b11 += k9;
4965	b10 += b11 + k8;
4966	b11 = ((b11 << 47) | (b11 >> (64 - 47))) ^ b10;
4967
4968	b13 += k11 + t0;
4969	b12 += b13 + k10;
4970	b13 = ((b13 << 44) | (b13 >> (64 - 44))) ^ b12;
4971
4972	b15 += k13 + 15;
4973	b14 += b15 + k12 + t1;
4974	b15 = ((b15 << 30) | (b15 >> (64 - 30))) ^ b14;
4975
4976	b0 += b9;
4977	b9 = ((b9 << 16) | (b9 >> (64 - 16))) ^ b0;
4978
4979	b2 += b13;
4980	b13 = ((b13 << 34) | (b13 >> (64 - 34))) ^ b2;
4981
4982	b6 += b11;
4983	b11 = ((b11 << 56) | (b11 >> (64 - 56))) ^ b6;
4984
4985	b4 += b15;
4986	b15 = ((b15 << 51) | (b15 >> (64 - 51))) ^ b4;
4987
4988	b10 += b7;
4989	b7 = ((b7 << 4) | (b7 >> (64 - 4))) ^ b10;
4990
4991	b12 += b3;
4992	b3 = ((b3 << 53) | (b3 >> (64 - 53))) ^ b12;
4993
4994	b14 += b5;
4995	b5 = ((b5 << 42) | (b5 >> (64 - 42))) ^ b14;
4996
4997	b8 += b1;
4998	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b8;
4999
5000	b0 += b7;
5001	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b0;
5002
5003	b2 += b5;
5004	b5 = ((b5 << 44) | (b5 >> (64 - 44))) ^ b2;
5005
5006	b4 += b3;
5007	b3 = ((b3 << 47) | (b3 >> (64 - 47))) ^ b4;
5008
5009	b6 += b1;
5010	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b6;
5011
5012	b12 += b15;
5013	b15 = ((b15 << 19) | (b15 >> (64 - 19))) ^ b12;
5014
5015	b14 += b13;
5016	b13 = ((b13 << 42) | (b13 >> (64 - 42))) ^ b14;
5017
5018	b8 += b11;
5019	b11 = ((b11 << 44) | (b11 >> (64 - 44))) ^ b8;
5020
5021	b10 += b9;
5022	b9 = ((b9 << 25) | (b9 >> (64 - 25))) ^ b10;
5023
5024	b0 += b15;
5025	b15 = ((b15 << 9) | (b15 >> (64 - 9))) ^ b0;
5026
5027	b2 += b11;
5028	b11 = ((b11 << 48) | (b11 >> (64 - 48))) ^ b2;
5029
5030	b6 += b13;
5031	b13 = ((b13 << 35) | (b13 >> (64 - 35))) ^ b6;
5032
5033	b4 += b9;
5034	b9 = ((b9 << 52) | (b9 >> (64 - 52))) ^ b4;
5035
5036	b14 += b1;
5037	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b14;
5038
5039	b8 += b5;
5040	b5 = ((b5 << 31) | (b5 >> (64 - 31))) ^ b8;
5041
5042	b10 += b3;
5043	b3 = ((b3 << 37) | (b3 >> (64 - 37))) ^ b10;
5044
5045	b12 += b7;
5046	b7 = ((b7 << 20) | (b7 >> (64 - 20))) ^ b12;
5047
5048	b1 += k0;
5049	b0 += b1 + k16;
5050	b1 = ((b1 << 24) | (b1 >> (64 - 24))) ^ b0;
5051
5052	b3 += k2;
5053	b2 += b3 + k1;
5054	b3 = ((b3 << 13) | (b3 >> (64 - 13))) ^ b2;
5055
5056	b5 += k4;
5057	b4 += b5 + k3;
5058	b5 = ((b5 << 8) | (b5 >> (64 - 8))) ^ b4;
5059
5060	b7 += k6;
5061	b6 += b7 + k5;
5062	b7 = ((b7 << 47) | (b7 >> (64 - 47))) ^ b6;
5063
5064	b9 += k8;
5065	b8 += b9 + k7;
5066	b9 = ((b9 << 8) | (b9 >> (64 - 8))) ^ b8;
5067
5068	b11 += k10;
5069	b10 += b11 + k9;
5070	b11 = ((b11 << 17) | (b11 >> (64 - 17))) ^ b10;
5071
5072	b13 += k12 + t1;
5073	b12 += b13 + k11;
5074	b13 = ((b13 << 22) | (b13 >> (64 - 22))) ^ b12;
5075
5076	b15 += k14 + 16;
5077	b14 += b15 + k13 + t2;
5078	b15 = ((b15 << 37) | (b15 >> (64 - 37))) ^ b14;
5079
5080	b0 += b9;
5081	b9 = ((b9 << 38) | (b9 >> (64 - 38))) ^ b0;
5082
5083	b2 += b13;
5084	b13 = ((b13 << 19) | (b13 >> (64 - 19))) ^ b2;
5085
5086	b6 += b11;
5087	b11 = ((b11 << 10) | (b11 >> (64 - 10))) ^ b6;
5088
5089	b4 += b15;
5090	b15 = ((b15 << 55) | (b15 >> (64 - 55))) ^ b4;
5091
5092	b10 += b7;
5093	b7 = ((b7 << 49) | (b7 >> (64 - 49))) ^ b10;
5094
5095	b12 += b3;
5096	b3 = ((b3 << 18) | (b3 >> (64 - 18))) ^ b12;
5097
5098	b14 += b5;
5099	b5 = ((b5 << 23) | (b5 >> (64 - 23))) ^ b14;
5100
5101	b8 += b1;
5102	b1 = ((b1 << 52) | (b1 >> (64 - 52))) ^ b8;
5103
5104	b0 += b7;
5105	b7 = ((b7 << 33) | (b7 >> (64 - 33))) ^ b0;
5106
5107	b2 += b5;
5108	b5 = ((b5 << 4) | (b5 >> (64 - 4))) ^ b2;
5109
5110	b4 += b3;
5111	b3 = ((b3 << 51) | (b3 >> (64 - 51))) ^ b4;
5112
5113	b6 += b1;
5114	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b6;
5115
5116	b12 += b15;
5117	b15 = ((b15 << 34) | (b15 >> (64 - 34))) ^ b12;
5118
5119	b14 += b13;
5120	b13 = ((b13 << 41) | (b13 >> (64 - 41))) ^ b14;
5121
5122	b8 += b11;
5123	b11 = ((b11 << 59) | (b11 >> (64 - 59))) ^ b8;
5124
5125	b10 += b9;
5126	b9 = ((b9 << 17) | (b9 >> (64 - 17))) ^ b10;
5127
5128	b0 += b15;
5129	b15 = ((b15 << 5) | (b15 >> (64 - 5))) ^ b0;
5130
5131	b2 += b11;
5132	b11 = ((b11 << 20) | (b11 >> (64 - 20))) ^ b2;
5133
5134	b6 += b13;
5135	b13 = ((b13 << 48) | (b13 >> (64 - 48))) ^ b6;
5136
5137	b4 += b9;
5138	b9 = ((b9 << 41) | (b9 >> (64 - 41))) ^ b4;
5139
5140	b14 += b1;
5141	b1 = ((b1 << 47) | (b1 >> (64 - 47))) ^ b14;
5142
5143	b8 += b5;
5144	b5 = ((b5 << 28) | (b5 >> (64 - 28))) ^ b8;
5145
5146	b10 += b3;
5147	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b10;
5148
5149	b12 += b7;
5150	b7 = ((b7 << 25) | (b7 >> (64 - 25))) ^ b12;
5151
5152	b1 += k1;
5153	b0 += b1 + k0;
5154	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b0;
5155
5156	b3 += k3;
5157	b2 += b3 + k2;
5158	b3 = ((b3 << 9) | (b3 >> (64 - 9))) ^ b2;
5159
5160	b5 += k5;
5161	b4 += b5 + k4;
5162	b5 = ((b5 << 37) | (b5 >> (64 - 37))) ^ b4;
5163
5164	b7 += k7;
5165	b6 += b7 + k6;
5166	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b6;
5167
5168	b9 += k9;
5169	b8 += b9 + k8;
5170	b9 = ((b9 << 12) | (b9 >> (64 - 12))) ^ b8;
5171
5172	b11 += k11;
5173	b10 += b11 + k10;
5174	b11 = ((b11 << 47) | (b11 >> (64 - 47))) ^ b10;
5175
5176	b13 += k13 + t2;
5177	b12 += b13 + k12;
5178	b13 = ((b13 << 44) | (b13 >> (64 - 44))) ^ b12;
5179
5180	b15 += k15 + 17;
5181	b14 += b15 + k14 + t0;
5182	b15 = ((b15 << 30) | (b15 >> (64 - 30))) ^ b14;
5183
5184	b0 += b9;
5185	b9 = ((b9 << 16) | (b9 >> (64 - 16))) ^ b0;
5186
5187	b2 += b13;
5188	b13 = ((b13 << 34) | (b13 >> (64 - 34))) ^ b2;
5189
5190	b6 += b11;
5191	b11 = ((b11 << 56) | (b11 >> (64 - 56))) ^ b6;
5192
5193	b4 += b15;
5194	b15 = ((b15 << 51) | (b15 >> (64 - 51))) ^ b4;
5195
5196	b10 += b7;
5197	b7 = ((b7 << 4) | (b7 >> (64 - 4))) ^ b10;
5198
5199	b12 += b3;
5200	b3 = ((b3 << 53) | (b3 >> (64 - 53))) ^ b12;
5201
5202	b14 += b5;
5203	b5 = ((b5 << 42) | (b5 >> (64 - 42))) ^ b14;
5204
5205	b8 += b1;
5206	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b8;
5207
5208	b0 += b7;
5209	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b0;
5210
5211	b2 += b5;
5212	b5 = ((b5 << 44) | (b5 >> (64 - 44))) ^ b2;
5213
5214	b4 += b3;
5215	b3 = ((b3 << 47) | (b3 >> (64 - 47))) ^ b4;
5216
5217	b6 += b1;
5218	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b6;
5219
5220	b12 += b15;
5221	b15 = ((b15 << 19) | (b15 >> (64 - 19))) ^ b12;
5222
5223	b14 += b13;
5224	b13 = ((b13 << 42) | (b13 >> (64 - 42))) ^ b14;
5225
5226	b8 += b11;
5227	b11 = ((b11 << 44) | (b11 >> (64 - 44))) ^ b8;
5228
5229	b10 += b9;
5230	b9 = ((b9 << 25) | (b9 >> (64 - 25))) ^ b10;
5231
5232	b0 += b15;
5233	b15 = ((b15 << 9) | (b15 >> (64 - 9))) ^ b0;
5234
5235	b2 += b11;
5236	b11 = ((b11 << 48) | (b11 >> (64 - 48))) ^ b2;
5237
5238	b6 += b13;
5239	b13 = ((b13 << 35) | (b13 >> (64 - 35))) ^ b6;
5240
5241	b4 += b9;
5242	b9 = ((b9 << 52) | (b9 >> (64 - 52))) ^ b4;
5243
5244	b14 += b1;
5245	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b14;
5246
5247	b8 += b5;
5248	b5 = ((b5 << 31) | (b5 >> (64 - 31))) ^ b8;
5249
5250	b10 += b3;
5251	b3 = ((b3 << 37) | (b3 >> (64 - 37))) ^ b10;
5252
5253	b12 += b7;
5254	b7 = ((b7 << 20) | (b7 >> (64 - 20))) ^ b12;
5255
5256	b1 += k2;
5257	b0 += b1 + k1;
5258	b1 = ((b1 << 24) | (b1 >> (64 - 24))) ^ b0;
5259
5260	b3 += k4;
5261	b2 += b3 + k3;
5262	b3 = ((b3 << 13) | (b3 >> (64 - 13))) ^ b2;
5263
5264	b5 += k6;
5265	b4 += b5 + k5;
5266	b5 = ((b5 << 8) | (b5 >> (64 - 8))) ^ b4;
5267
5268	b7 += k8;
5269	b6 += b7 + k7;
5270	b7 = ((b7 << 47) | (b7 >> (64 - 47))) ^ b6;
5271
5272	b9 += k10;
5273	b8 += b9 + k9;
5274	b9 = ((b9 << 8) | (b9 >> (64 - 8))) ^ b8;
5275
5276	b11 += k12;
5277	b10 += b11 + k11;
5278	b11 = ((b11 << 17) | (b11 >> (64 - 17))) ^ b10;
5279
5280	b13 += k14 + t0;
5281	b12 += b13 + k13;
5282	b13 = ((b13 << 22) | (b13 >> (64 - 22))) ^ b12;
5283
5284	b15 += k16 + 18;
5285	b14 += b15 + k15 + t1;
5286	b15 = ((b15 << 37) | (b15 >> (64 - 37))) ^ b14;
5287
5288	b0 += b9;
5289	b9 = ((b9 << 38) | (b9 >> (64 - 38))) ^ b0;
5290
5291	b2 += b13;
5292	b13 = ((b13 << 19) | (b13 >> (64 - 19))) ^ b2;
5293
5294	b6 += b11;
5295	b11 = ((b11 << 10) | (b11 >> (64 - 10))) ^ b6;
5296
5297	b4 += b15;
5298	b15 = ((b15 << 55) | (b15 >> (64 - 55))) ^ b4;
5299
5300	b10 += b7;
5301	b7 = ((b7 << 49) | (b7 >> (64 - 49))) ^ b10;
5302
5303	b12 += b3;
5304	b3 = ((b3 << 18) | (b3 >> (64 - 18))) ^ b12;
5305
5306	b14 += b5;
5307	b5 = ((b5 << 23) | (b5 >> (64 - 23))) ^ b14;
5308
5309	b8 += b1;
5310	b1 = ((b1 << 52) | (b1 >> (64 - 52))) ^ b8;
5311
5312	b0 += b7;
5313	b7 = ((b7 << 33) | (b7 >> (64 - 33))) ^ b0;
5314
5315	b2 += b5;
5316	b5 = ((b5 << 4) | (b5 >> (64 - 4))) ^ b2;
5317
5318	b4 += b3;
5319	b3 = ((b3 << 51) | (b3 >> (64 - 51))) ^ b4;
5320
5321	b6 += b1;
5322	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b6;
5323
5324	b12 += b15;
5325	b15 = ((b15 << 34) | (b15 >> (64 - 34))) ^ b12;
5326
5327	b14 += b13;
5328	b13 = ((b13 << 41) | (b13 >> (64 - 41))) ^ b14;
5329
5330	b8 += b11;
5331	b11 = ((b11 << 59) | (b11 >> (64 - 59))) ^ b8;
5332
5333	b10 += b9;
5334	b9 = ((b9 << 17) | (b9 >> (64 - 17))) ^ b10;
5335
5336	b0 += b15;
5337	b15 = ((b15 << 5) | (b15 >> (64 - 5))) ^ b0;
5338
5339	b2 += b11;
5340	b11 = ((b11 << 20) | (b11 >> (64 - 20))) ^ b2;
5341
5342	b6 += b13;
5343	b13 = ((b13 << 48) | (b13 >> (64 - 48))) ^ b6;
5344
5345	b4 += b9;
5346	b9 = ((b9 << 41) | (b9 >> (64 - 41))) ^ b4;
5347
5348	b14 += b1;
5349	b1 = ((b1 << 47) | (b1 >> (64 - 47))) ^ b14;
5350
5351	b8 += b5;
5352	b5 = ((b5 << 28) | (b5 >> (64 - 28))) ^ b8;
5353
5354	b10 += b3;
5355	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b10;
5356
5357	b12 += b7;
5358	b7 = ((b7 << 25) | (b7 >> (64 - 25))) ^ b12;
5359
5360	b1 += k3;
5361	b0 += b1 + k2;
5362	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b0;
5363
5364	b3 += k5;
5365	b2 += b3 + k4;
5366	b3 = ((b3 << 9) | (b3 >> (64 - 9))) ^ b2;
5367
5368	b5 += k7;
5369	b4 += b5 + k6;
5370	b5 = ((b5 << 37) | (b5 >> (64 - 37))) ^ b4;
5371
5372	b7 += k9;
5373	b6 += b7 + k8;
5374	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b6;
5375
5376	b9 += k11;
5377	b8 += b9 + k10;
5378	b9 = ((b9 << 12) | (b9 >> (64 - 12))) ^ b8;
5379
5380	b11 += k13;
5381	b10 += b11 + k12;
5382	b11 = ((b11 << 47) | (b11 >> (64 - 47))) ^ b10;
5383
5384	b13 += k15 + t1;
5385	b12 += b13 + k14;
5386	b13 = ((b13 << 44) | (b13 >> (64 - 44))) ^ b12;
5387
5388	b15 += k0 + 19;
5389	b14 += b15 + k16 + t2;
5390	b15 = ((b15 << 30) | (b15 >> (64 - 30))) ^ b14;
5391
5392	b0 += b9;
5393	b9 = ((b9 << 16) | (b9 >> (64 - 16))) ^ b0;
5394
5395	b2 += b13;
5396	b13 = ((b13 << 34) | (b13 >> (64 - 34))) ^ b2;
5397
5398	b6 += b11;
5399	b11 = ((b11 << 56) | (b11 >> (64 - 56))) ^ b6;
5400
5401	b4 += b15;
5402	b15 = ((b15 << 51) | (b15 >> (64 - 51))) ^ b4;
5403
5404	b10 += b7;
5405	b7 = ((b7 << 4) | (b7 >> (64 - 4))) ^ b10;
5406
5407	b12 += b3;
5408	b3 = ((b3 << 53) | (b3 >> (64 - 53))) ^ b12;
5409
5410	b14 += b5;
5411	b5 = ((b5 << 42) | (b5 >> (64 - 42))) ^ b14;
5412
5413	b8 += b1;
5414	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b8;
5415
5416	b0 += b7;
5417	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b0;
5418
5419	b2 += b5;
5420	b5 = ((b5 << 44) | (b5 >> (64 - 44))) ^ b2;
5421
5422	b4 += b3;
5423	b3 = ((b3 << 47) | (b3 >> (64 - 47))) ^ b4;
5424
5425	b6 += b1;
5426	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b6;
5427
5428	b12 += b15;
5429	b15 = ((b15 << 19) | (b15 >> (64 - 19))) ^ b12;
5430
5431	b14 += b13;
5432	b13 = ((b13 << 42) | (b13 >> (64 - 42))) ^ b14;
5433
5434	b8 += b11;
5435	b11 = ((b11 << 44) | (b11 >> (64 - 44))) ^ b8;
5436
5437	b10 += b9;
5438	b9 = ((b9 << 25) | (b9 >> (64 - 25))) ^ b10;
5439
5440	b0 += b15;
5441	b15 = ((b15 << 9) | (b15 >> (64 - 9))) ^ b0;
5442
5443	b2 += b11;
5444	b11 = ((b11 << 48) | (b11 >> (64 - 48))) ^ b2;
5445
5446	b6 += b13;
5447	b13 = ((b13 << 35) | (b13 >> (64 - 35))) ^ b6;
5448
5449	b4 += b9;
5450	b9 = ((b9 << 52) | (b9 >> (64 - 52))) ^ b4;
5451
5452	b14 += b1;
5453	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b14;
5454
5455	b8 += b5;
5456	b5 = ((b5 << 31) | (b5 >> (64 - 31))) ^ b8;
5457
5458	b10 += b3;
5459	b3 = ((b3 << 37) | (b3 >> (64 - 37))) ^ b10;
5460
5461	b12 += b7;
5462	b7 = ((b7 << 20) | (b7 >> (64 - 20))) ^ b12;
5463
5464	output[0] = b0 + k3;
5465	output[1] = b1 + k4;
5466	output[2] = b2 + k5;
5467	output[3] = b3 + k6;
5468	output[4] = b4 + k7;
5469	output[5] = b5 + k8;
5470	output[6] = b6 + k9;
5471	output[7] = b7 + k10;
5472	output[8] = b8 + k11;
5473	output[9] = b9 + k12;
5474	output[10] = b10 + k13;
5475	output[11] = b11 + k14;
5476	output[12] = b12 + k15;
5477	output[13] = b13 + k16 + t2;
5478	output[14] = b14 + k0 + t0;
5479	output[15] = b15 + k1 + 20;
5480}
5481
5482void threefish_decrypt_1024(struct threefish_key *key_ctx, u64 *input,
5483			    u64 *output)
5484{
5485	u64 b0 = input[0], b1 = input[1],
5486	    b2 = input[2], b3 = input[3],
5487	    b4 = input[4], b5 = input[5],
5488	    b6 = input[6], b7 = input[7],
5489	    b8 = input[8], b9 = input[9],
5490	    b10 = input[10], b11 = input[11],
5491	    b12 = input[12], b13 = input[13],
5492	    b14 = input[14], b15 = input[15];
5493	u64 k0 = key_ctx->key[0], k1 = key_ctx->key[1],
5494	    k2 = key_ctx->key[2], k3 = key_ctx->key[3],
5495	    k4 = key_ctx->key[4], k5 = key_ctx->key[5],
5496	    k6 = key_ctx->key[6], k7 = key_ctx->key[7],
5497	    k8 = key_ctx->key[8], k9 = key_ctx->key[9],
5498	    k10 = key_ctx->key[10], k11 = key_ctx->key[11],
5499	    k12 = key_ctx->key[12], k13 = key_ctx->key[13],
5500	    k14 = key_ctx->key[14], k15 = key_ctx->key[15],
5501	    k16 = key_ctx->key[16];
5502	u64 t0 = key_ctx->tweak[0], t1 = key_ctx->tweak[1],
5503	    t2 = key_ctx->tweak[2];
5504	u64 tmp;
5505
5506	b0 -= k3;
5507	b1 -= k4;
5508	b2 -= k5;
5509	b3 -= k6;
5510	b4 -= k7;
5511	b5 -= k8;
5512	b6 -= k9;
5513	b7 -= k10;
5514	b8 -= k11;
5515	b9 -= k12;
5516	b10 -= k13;
5517	b11 -= k14;
5518	b12 -= k15;
5519	b13 -= k16 + t2;
5520	b14 -= k0 + t0;
5521	b15 -= k1 + 20;
5522	tmp = b7 ^ b12;
5523	b7 = (tmp >> 20) | (tmp << (64 - 20));
5524	b12 -= b7;
5525
5526	tmp = b3 ^ b10;
5527	b3 = (tmp >> 37) | (tmp << (64 - 37));
5528	b10 -= b3;
5529
5530	tmp = b5 ^ b8;
5531	b5 = (tmp >> 31) | (tmp << (64 - 31));
5532	b8 -= b5;
5533
5534	tmp = b1 ^ b14;
5535	b1 = (tmp >> 23) | (tmp << (64 - 23));
5536	b14 -= b1;
5537
5538	tmp = b9 ^ b4;
5539	b9 = (tmp >> 52) | (tmp << (64 - 52));
5540	b4 -= b9;
5541
5542	tmp = b13 ^ b6;
5543	b13 = (tmp >> 35) | (tmp << (64 - 35));
5544	b6 -= b13;
5545
5546	tmp = b11 ^ b2;
5547	b11 = (tmp >> 48) | (tmp << (64 - 48));
5548	b2 -= b11;
5549
5550	tmp = b15 ^ b0;
5551	b15 = (tmp >> 9) | (tmp << (64 - 9));
5552	b0 -= b15;
5553
5554	tmp = b9 ^ b10;
5555	b9 = (tmp >> 25) | (tmp << (64 - 25));
5556	b10 -= b9;
5557
5558	tmp = b11 ^ b8;
5559	b11 = (tmp >> 44) | (tmp << (64 - 44));
5560	b8 -= b11;
5561
5562	tmp = b13 ^ b14;
5563	b13 = (tmp >> 42) | (tmp << (64 - 42));
5564	b14 -= b13;
5565
5566	tmp = b15 ^ b12;
5567	b15 = (tmp >> 19) | (tmp << (64 - 19));
5568	b12 -= b15;
5569
5570	tmp = b1 ^ b6;
5571	b1 = (tmp >> 46) | (tmp << (64 - 46));
5572	b6 -= b1;
5573
5574	tmp = b3 ^ b4;
5575	b3 = (tmp >> 47) | (tmp << (64 - 47));
5576	b4 -= b3;
5577
5578	tmp = b5 ^ b2;
5579	b5 = (tmp >> 44) | (tmp << (64 - 44));
5580	b2 -= b5;
5581
5582	tmp = b7 ^ b0;
5583	b7 = (tmp >> 31) | (tmp << (64 - 31));
5584	b0 -= b7;
5585
5586	tmp = b1 ^ b8;
5587	b1 = (tmp >> 41) | (tmp << (64 - 41));
5588	b8 -= b1;
5589
5590	tmp = b5 ^ b14;
5591	b5 = (tmp >> 42) | (tmp << (64 - 42));
5592	b14 -= b5;
5593
5594	tmp = b3 ^ b12;
5595	b3 = (tmp >> 53) | (tmp << (64 - 53));
5596	b12 -= b3;
5597
5598	tmp = b7 ^ b10;
5599	b7 = (tmp >> 4) | (tmp << (64 - 4));
5600	b10 -= b7;
5601
5602	tmp = b15 ^ b4;
5603	b15 = (tmp >> 51) | (tmp << (64 - 51));
5604	b4 -= b15;
5605
5606	tmp = b11 ^ b6;
5607	b11 = (tmp >> 56) | (tmp << (64 - 56));
5608	b6 -= b11;
5609
5610	tmp = b13 ^ b2;
5611	b13 = (tmp >> 34) | (tmp << (64 - 34));
5612	b2 -= b13;
5613
5614	tmp = b9 ^ b0;
5615	b9 = (tmp >> 16) | (tmp << (64 - 16));
5616	b0 -= b9;
5617
5618	tmp = b15 ^ b14;
5619	b15 = (tmp >> 30) | (tmp << (64 - 30));
5620	b14 -= b15 + k16 + t2;
5621	b15 -= k0 + 19;
5622
5623	tmp = b13 ^ b12;
5624	b13 = (tmp >> 44) | (tmp << (64 - 44));
5625	b12 -= b13 + k14;
5626	b13 -= k15 + t1;
5627
5628	tmp = b11 ^ b10;
5629	b11 = (tmp >> 47) | (tmp << (64 - 47));
5630	b10 -= b11 + k12;
5631	b11 -= k13;
5632
5633	tmp = b9 ^ b8;
5634	b9 = (tmp >> 12) | (tmp << (64 - 12));
5635	b8 -= b9 + k10;
5636	b9 -= k11;
5637
5638	tmp = b7 ^ b6;
5639	b7 = (tmp >> 31) | (tmp << (64 - 31));
5640	b6 -= b7 + k8;
5641	b7 -= k9;
5642
5643	tmp = b5 ^ b4;
5644	b5 = (tmp >> 37) | (tmp << (64 - 37));
5645	b4 -= b5 + k6;
5646	b5 -= k7;
5647
5648	tmp = b3 ^ b2;
5649	b3 = (tmp >> 9) | (tmp << (64 - 9));
5650	b2 -= b3 + k4;
5651	b3 -= k5;
5652
5653	tmp = b1 ^ b0;
5654	b1 = (tmp >> 41) | (tmp << (64 - 41));
5655	b0 -= b1 + k2;
5656	b1 -= k3;
5657
5658	tmp = b7 ^ b12;
5659	b7 = (tmp >> 25) | (tmp << (64 - 25));
5660	b12 -= b7;
5661
5662	tmp = b3 ^ b10;
5663	b3 = (tmp >> 16) | (tmp << (64 - 16));
5664	b10 -= b3;
5665
5666	tmp = b5 ^ b8;
5667	b5 = (tmp >> 28) | (tmp << (64 - 28));
5668	b8 -= b5;
5669
5670	tmp = b1 ^ b14;
5671	b1 = (tmp >> 47) | (tmp << (64 - 47));
5672	b14 -= b1;
5673
5674	tmp = b9 ^ b4;
5675	b9 = (tmp >> 41) | (tmp << (64 - 41));
5676	b4 -= b9;
5677
5678	tmp = b13 ^ b6;
5679	b13 = (tmp >> 48) | (tmp << (64 - 48));
5680	b6 -= b13;
5681
5682	tmp = b11 ^ b2;
5683	b11 = (tmp >> 20) | (tmp << (64 - 20));
5684	b2 -= b11;
5685
5686	tmp = b15 ^ b0;
5687	b15 = (tmp >> 5) | (tmp << (64 - 5));
5688	b0 -= b15;
5689
5690	tmp = b9 ^ b10;
5691	b9 = (tmp >> 17) | (tmp << (64 - 17));
5692	b10 -= b9;
5693
5694	tmp = b11 ^ b8;
5695	b11 = (tmp >> 59) | (tmp << (64 - 59));
5696	b8 -= b11;
5697
5698	tmp = b13 ^ b14;
5699	b13 = (tmp >> 41) | (tmp << (64 - 41));
5700	b14 -= b13;
5701
5702	tmp = b15 ^ b12;
5703	b15 = (tmp >> 34) | (tmp << (64 - 34));
5704	b12 -= b15;
5705
5706	tmp = b1 ^ b6;
5707	b1 = (tmp >> 13) | (tmp << (64 - 13));
5708	b6 -= b1;
5709
5710	tmp = b3 ^ b4;
5711	b3 = (tmp >> 51) | (tmp << (64 - 51));
5712	b4 -= b3;
5713
5714	tmp = b5 ^ b2;
5715	b5 = (tmp >> 4) | (tmp << (64 - 4));
5716	b2 -= b5;
5717
5718	tmp = b7 ^ b0;
5719	b7 = (tmp >> 33) | (tmp << (64 - 33));
5720	b0 -= b7;
5721
5722	tmp = b1 ^ b8;
5723	b1 = (tmp >> 52) | (tmp << (64 - 52));
5724	b8 -= b1;
5725
5726	tmp = b5 ^ b14;
5727	b5 = (tmp >> 23) | (tmp << (64 - 23));
5728	b14 -= b5;
5729
5730	tmp = b3 ^ b12;
5731	b3 = (tmp >> 18) | (tmp << (64 - 18));
5732	b12 -= b3;
5733
5734	tmp = b7 ^ b10;
5735	b7 = (tmp >> 49) | (tmp << (64 - 49));
5736	b10 -= b7;
5737
5738	tmp = b15 ^ b4;
5739	b15 = (tmp >> 55) | (tmp << (64 - 55));
5740	b4 -= b15;
5741
5742	tmp = b11 ^ b6;
5743	b11 = (tmp >> 10) | (tmp << (64 - 10));
5744	b6 -= b11;
5745
5746	tmp = b13 ^ b2;
5747	b13 = (tmp >> 19) | (tmp << (64 - 19));
5748	b2 -= b13;
5749
5750	tmp = b9 ^ b0;
5751	b9 = (tmp >> 38) | (tmp << (64 - 38));
5752	b0 -= b9;
5753
5754	tmp = b15 ^ b14;
5755	b15 = (tmp >> 37) | (tmp << (64 - 37));
5756	b14 -= b15 + k15 + t1;
5757	b15 -= k16 + 18;
5758
5759	tmp = b13 ^ b12;
5760	b13 = (tmp >> 22) | (tmp << (64 - 22));
5761	b12 -= b13 + k13;
5762	b13 -= k14 + t0;
5763
5764	tmp = b11 ^ b10;
5765	b11 = (tmp >> 17) | (tmp << (64 - 17));
5766	b10 -= b11 + k11;
5767	b11 -= k12;
5768
5769	tmp = b9 ^ b8;
5770	b9 = (tmp >> 8) | (tmp << (64 - 8));
5771	b8 -= b9 + k9;
5772	b9 -= k10;
5773
5774	tmp = b7 ^ b6;
5775	b7 = (tmp >> 47) | (tmp << (64 - 47));
5776	b6 -= b7 + k7;
5777	b7 -= k8;
5778
5779	tmp = b5 ^ b4;
5780	b5 = (tmp >> 8) | (tmp << (64 - 8));
5781	b4 -= b5 + k5;
5782	b5 -= k6;
5783
5784	tmp = b3 ^ b2;
5785	b3 = (tmp >> 13) | (tmp << (64 - 13));
5786	b2 -= b3 + k3;
5787	b3 -= k4;
5788
5789	tmp = b1 ^ b0;
5790	b1 = (tmp >> 24) | (tmp << (64 - 24));
5791	b0 -= b1 + k1;
5792	b1 -= k2;
5793
5794	tmp = b7 ^ b12;
5795	b7 = (tmp >> 20) | (tmp << (64 - 20));
5796	b12 -= b7;
5797
5798	tmp = b3 ^ b10;
5799	b3 = (tmp >> 37) | (tmp << (64 - 37));
5800	b10 -= b3;
5801
5802	tmp = b5 ^ b8;
5803	b5 = (tmp >> 31) | (tmp << (64 - 31));
5804	b8 -= b5;
5805
5806	tmp = b1 ^ b14;
5807	b1 = (tmp >> 23) | (tmp << (64 - 23));
5808	b14 -= b1;
5809
5810	tmp = b9 ^ b4;
5811	b9 = (tmp >> 52) | (tmp << (64 - 52));
5812	b4 -= b9;
5813
5814	tmp = b13 ^ b6;
5815	b13 = (tmp >> 35) | (tmp << (64 - 35));
5816	b6 -= b13;
5817
5818	tmp = b11 ^ b2;
5819	b11 = (tmp >> 48) | (tmp << (64 - 48));
5820	b2 -= b11;
5821
5822	tmp = b15 ^ b0;
5823	b15 = (tmp >> 9) | (tmp << (64 - 9));
5824	b0 -= b15;
5825
5826	tmp = b9 ^ b10;
5827	b9 = (tmp >> 25) | (tmp << (64 - 25));
5828	b10 -= b9;
5829
5830	tmp = b11 ^ b8;
5831	b11 = (tmp >> 44) | (tmp << (64 - 44));
5832	b8 -= b11;
5833
5834	tmp = b13 ^ b14;
5835	b13 = (tmp >> 42) | (tmp << (64 - 42));
5836	b14 -= b13;
5837
5838	tmp = b15 ^ b12;
5839	b15 = (tmp >> 19) | (tmp << (64 - 19));
5840	b12 -= b15;
5841
5842	tmp = b1 ^ b6;
5843	b1 = (tmp >> 46) | (tmp << (64 - 46));
5844	b6 -= b1;
5845
5846	tmp = b3 ^ b4;
5847	b3 = (tmp >> 47) | (tmp << (64 - 47));
5848	b4 -= b3;
5849
5850	tmp = b5 ^ b2;
5851	b5 = (tmp >> 44) | (tmp << (64 - 44));
5852	b2 -= b5;
5853
5854	tmp = b7 ^ b0;
5855	b7 = (tmp >> 31) | (tmp << (64 - 31));
5856	b0 -= b7;
5857
5858	tmp = b1 ^ b8;
5859	b1 = (tmp >> 41) | (tmp << (64 - 41));
5860	b8 -= b1;
5861
5862	tmp = b5 ^ b14;
5863	b5 = (tmp >> 42) | (tmp << (64 - 42));
5864	b14 -= b5;
5865
5866	tmp = b3 ^ b12;
5867	b3 = (tmp >> 53) | (tmp << (64 - 53));
5868	b12 -= b3;
5869
5870	tmp = b7 ^ b10;
5871	b7 = (tmp >> 4) | (tmp << (64 - 4));
5872	b10 -= b7;
5873
5874	tmp = b15 ^ b4;
5875	b15 = (tmp >> 51) | (tmp << (64 - 51));
5876	b4 -= b15;
5877
5878	tmp = b11 ^ b6;
5879	b11 = (tmp >> 56) | (tmp << (64 - 56));
5880	b6 -= b11;
5881
5882	tmp = b13 ^ b2;
5883	b13 = (tmp >> 34) | (tmp << (64 - 34));
5884	b2 -= b13;
5885
5886	tmp = b9 ^ b0;
5887	b9 = (tmp >> 16) | (tmp << (64 - 16));
5888	b0 -= b9;
5889
5890	tmp = b15 ^ b14;
5891	b15 = (tmp >> 30) | (tmp << (64 - 30));
5892	b14 -= b15 + k14 + t0;
5893	b15 -= k15 + 17;
5894
5895	tmp = b13 ^ b12;
5896	b13 = (tmp >> 44) | (tmp << (64 - 44));
5897	b12 -= b13 + k12;
5898	b13 -= k13 + t2;
5899
5900	tmp = b11 ^ b10;
5901	b11 = (tmp >> 47) | (tmp << (64 - 47));
5902	b10 -= b11 + k10;
5903	b11 -= k11;
5904
5905	tmp = b9 ^ b8;
5906	b9 = (tmp >> 12) | (tmp << (64 - 12));
5907	b8 -= b9 + k8;
5908	b9 -= k9;
5909
5910	tmp = b7 ^ b6;
5911	b7 = (tmp >> 31) | (tmp << (64 - 31));
5912	b6 -= b7 + k6;
5913	b7 -= k7;
5914
5915	tmp = b5 ^ b4;
5916	b5 = (tmp >> 37) | (tmp << (64 - 37));
5917	b4 -= b5 + k4;
5918	b5 -= k5;
5919
5920	tmp = b3 ^ b2;
5921	b3 = (tmp >> 9) | (tmp << (64 - 9));
5922	b2 -= b3 + k2;
5923	b3 -= k3;
5924
5925	tmp = b1 ^ b0;
5926	b1 = (tmp >> 41) | (tmp << (64 - 41));
5927	b0 -= b1 + k0;
5928	b1 -= k1;
5929
5930	tmp = b7 ^ b12;
5931	b7 = (tmp >> 25) | (tmp << (64 - 25));
5932	b12 -= b7;
5933
5934	tmp = b3 ^ b10;
5935	b3 = (tmp >> 16) | (tmp << (64 - 16));
5936	b10 -= b3;
5937
5938	tmp = b5 ^ b8;
5939	b5 = (tmp >> 28) | (tmp << (64 - 28));
5940	b8 -= b5;
5941
5942	tmp = b1 ^ b14;
5943	b1 = (tmp >> 47) | (tmp << (64 - 47));
5944	b14 -= b1;
5945
5946	tmp = b9 ^ b4;
5947	b9 = (tmp >> 41) | (tmp << (64 - 41));
5948	b4 -= b9;
5949
5950	tmp = b13 ^ b6;
5951	b13 = (tmp >> 48) | (tmp << (64 - 48));
5952	b6 -= b13;
5953
5954	tmp = b11 ^ b2;
5955	b11 = (tmp >> 20) | (tmp << (64 - 20));
5956	b2 -= b11;
5957
5958	tmp = b15 ^ b0;
5959	b15 = (tmp >> 5) | (tmp << (64 - 5));
5960	b0 -= b15;
5961
5962	tmp = b9 ^ b10;
5963	b9 = (tmp >> 17) | (tmp << (64 - 17));
5964	b10 -= b9;
5965
5966	tmp = b11 ^ b8;
5967	b11 = (tmp >> 59) | (tmp << (64 - 59));
5968	b8 -= b11;
5969
5970	tmp = b13 ^ b14;
5971	b13 = (tmp >> 41) | (tmp << (64 - 41));
5972	b14 -= b13;
5973
5974	tmp = b15 ^ b12;
5975	b15 = (tmp >> 34) | (tmp << (64 - 34));
5976	b12 -= b15;
5977
5978	tmp = b1 ^ b6;
5979	b1 = (tmp >> 13) | (tmp << (64 - 13));
5980	b6 -= b1;
5981
5982	tmp = b3 ^ b4;
5983	b3 = (tmp >> 51) | (tmp << (64 - 51));
5984	b4 -= b3;
5985
5986	tmp = b5 ^ b2;
5987	b5 = (tmp >> 4) | (tmp << (64 - 4));
5988	b2 -= b5;
5989
5990	tmp = b7 ^ b0;
5991	b7 = (tmp >> 33) | (tmp << (64 - 33));
5992	b0 -= b7;
5993
5994	tmp = b1 ^ b8;
5995	b1 = (tmp >> 52) | (tmp << (64 - 52));
5996	b8 -= b1;
5997
5998	tmp = b5 ^ b14;
5999	b5 = (tmp >> 23) | (tmp << (64 - 23));
6000	b14 -= b5;
6001
6002	tmp = b3 ^ b12;
6003	b3 = (tmp >> 18) | (tmp << (64 - 18));
6004	b12 -= b3;
6005
6006	tmp = b7 ^ b10;
6007	b7 = (tmp >> 49) | (tmp << (64 - 49));
6008	b10 -= b7;
6009
6010	tmp = b15 ^ b4;
6011	b15 = (tmp >> 55) | (tmp << (64 - 55));
6012	b4 -= b15;
6013
6014	tmp = b11 ^ b6;
6015	b11 = (tmp >> 10) | (tmp << (64 - 10));
6016	b6 -= b11;
6017
6018	tmp = b13 ^ b2;
6019	b13 = (tmp >> 19) | (tmp << (64 - 19));
6020	b2 -= b13;
6021
6022	tmp = b9 ^ b0;
6023	b9 = (tmp >> 38) | (tmp << (64 - 38));
6024	b0 -= b9;
6025
6026	tmp = b15 ^ b14;
6027	b15 = (tmp >> 37) | (tmp << (64 - 37));
6028	b14 -= b15 + k13 + t2;
6029	b15 -= k14 + 16;
6030
6031	tmp = b13 ^ b12;
6032	b13 = (tmp >> 22) | (tmp << (64 - 22));
6033	b12 -= b13 + k11;
6034	b13 -= k12 + t1;
6035
6036	tmp = b11 ^ b10;
6037	b11 = (tmp >> 17) | (tmp << (64 - 17));
6038	b10 -= b11 + k9;
6039	b11 -= k10;
6040
6041	tmp = b9 ^ b8;
6042	b9 = (tmp >> 8) | (tmp << (64 - 8));
6043	b8 -= b9 + k7;
6044	b9 -= k8;
6045
6046	tmp = b7 ^ b6;
6047	b7 = (tmp >> 47) | (tmp << (64 - 47));
6048	b6 -= b7 + k5;
6049	b7 -= k6;
6050
6051	tmp = b5 ^ b4;
6052	b5 = (tmp >> 8) | (tmp << (64 - 8));
6053	b4 -= b5 + k3;
6054	b5 -= k4;
6055
6056	tmp = b3 ^ b2;
6057	b3 = (tmp >> 13) | (tmp << (64 - 13));
6058	b2 -= b3 + k1;
6059	b3 -= k2;
6060
6061	tmp = b1 ^ b0;
6062	b1 = (tmp >> 24) | (tmp << (64 - 24));
6063	b0 -= b1 + k16;
6064	b1 -= k0;
6065
6066	tmp = b7 ^ b12;
6067	b7 = (tmp >> 20) | (tmp << (64 - 20));
6068	b12 -= b7;
6069
6070	tmp = b3 ^ b10;
6071	b3 = (tmp >> 37) | (tmp << (64 - 37));
6072	b10 -= b3;
6073
6074	tmp = b5 ^ b8;
6075	b5 = (tmp >> 31) | (tmp << (64 - 31));
6076	b8 -= b5;
6077
6078	tmp = b1 ^ b14;
6079	b1 = (tmp >> 23) | (tmp << (64 - 23));
6080	b14 -= b1;
6081
6082	tmp = b9 ^ b4;
6083	b9 = (tmp >> 52) | (tmp << (64 - 52));
6084	b4 -= b9;
6085
6086	tmp = b13 ^ b6;
6087	b13 = (tmp >> 35) | (tmp << (64 - 35));
6088	b6 -= b13;
6089
6090	tmp = b11 ^ b2;
6091	b11 = (tmp >> 48) | (tmp << (64 - 48));
6092	b2 -= b11;
6093
6094	tmp = b15 ^ b0;
6095	b15 = (tmp >> 9) | (tmp << (64 - 9));
6096	b0 -= b15;
6097
6098	tmp = b9 ^ b10;
6099	b9 = (tmp >> 25) | (tmp << (64 - 25));
6100	b10 -= b9;
6101
6102	tmp = b11 ^ b8;
6103	b11 = (tmp >> 44) | (tmp << (64 - 44));
6104	b8 -= b11;
6105
6106	tmp = b13 ^ b14;
6107	b13 = (tmp >> 42) | (tmp << (64 - 42));
6108	b14 -= b13;
6109
6110	tmp = b15 ^ b12;
6111	b15 = (tmp >> 19) | (tmp << (64 - 19));
6112	b12 -= b15;
6113
6114	tmp = b1 ^ b6;
6115	b1 = (tmp >> 46) | (tmp << (64 - 46));
6116	b6 -= b1;
6117
6118	tmp = b3 ^ b4;
6119	b3 = (tmp >> 47) | (tmp << (64 - 47));
6120	b4 -= b3;
6121
6122	tmp = b5 ^ b2;
6123	b5 = (tmp >> 44) | (tmp << (64 - 44));
6124	b2 -= b5;
6125
6126	tmp = b7 ^ b0;
6127	b7 = (tmp >> 31) | (tmp << (64 - 31));
6128	b0 -= b7;
6129
6130	tmp = b1 ^ b8;
6131	b1 = (tmp >> 41) | (tmp << (64 - 41));
6132	b8 -= b1;
6133
6134	tmp = b5 ^ b14;
6135	b5 = (tmp >> 42) | (tmp << (64 - 42));
6136	b14 -= b5;
6137
6138	tmp = b3 ^ b12;
6139	b3 = (tmp >> 53) | (tmp << (64 - 53));
6140	b12 -= b3;
6141
6142	tmp = b7 ^ b10;
6143	b7 = (tmp >> 4) | (tmp << (64 - 4));
6144	b10 -= b7;
6145
6146	tmp = b15 ^ b4;
6147	b15 = (tmp >> 51) | (tmp << (64 - 51));
6148	b4 -= b15;
6149
6150	tmp = b11 ^ b6;
6151	b11 = (tmp >> 56) | (tmp << (64 - 56));
6152	b6 -= b11;
6153
6154	tmp = b13 ^ b2;
6155	b13 = (tmp >> 34) | (tmp << (64 - 34));
6156	b2 -= b13;
6157
6158	tmp = b9 ^ b0;
6159	b9 = (tmp >> 16) | (tmp << (64 - 16));
6160	b0 -= b9;
6161
6162	tmp = b15 ^ b14;
6163	b15 = (tmp >> 30) | (tmp << (64 - 30));
6164	b14 -= b15 + k12 + t1;
6165	b15 -= k13 + 15;
6166
6167	tmp = b13 ^ b12;
6168	b13 = (tmp >> 44) | (tmp << (64 - 44));
6169	b12 -= b13 + k10;
6170	b13 -= k11 + t0;
6171
6172	tmp = b11 ^ b10;
6173	b11 = (tmp >> 47) | (tmp << (64 - 47));
6174	b10 -= b11 + k8;
6175	b11 -= k9;
6176
6177	tmp = b9 ^ b8;
6178	b9 = (tmp >> 12) | (tmp << (64 - 12));
6179	b8 -= b9 + k6;
6180	b9 -= k7;
6181
6182	tmp = b7 ^ b6;
6183	b7 = (tmp >> 31) | (tmp << (64 - 31));
6184	b6 -= b7 + k4;
6185	b7 -= k5;
6186
6187	tmp = b5 ^ b4;
6188	b5 = (tmp >> 37) | (tmp << (64 - 37));
6189	b4 -= b5 + k2;
6190	b5 -= k3;
6191
6192	tmp = b3 ^ b2;
6193	b3 = (tmp >> 9) | (tmp << (64 - 9));
6194	b2 -= b3 + k0;
6195	b3 -= k1;
6196
6197	tmp = b1 ^ b0;
6198	b1 = (tmp >> 41) | (tmp << (64 - 41));
6199	b0 -= b1 + k15;
6200	b1 -= k16;
6201
6202	tmp = b7 ^ b12;
6203	b7 = (tmp >> 25) | (tmp << (64 - 25));
6204	b12 -= b7;
6205
6206	tmp = b3 ^ b10;
6207	b3 = (tmp >> 16) | (tmp << (64 - 16));
6208	b10 -= b3;
6209
6210	tmp = b5 ^ b8;
6211	b5 = (tmp >> 28) | (tmp << (64 - 28));
6212	b8 -= b5;
6213
6214	tmp = b1 ^ b14;
6215	b1 = (tmp >> 47) | (tmp << (64 - 47));
6216	b14 -= b1;
6217
6218	tmp = b9 ^ b4;
6219	b9 = (tmp >> 41) | (tmp << (64 - 41));
6220	b4 -= b9;
6221
6222	tmp = b13 ^ b6;
6223	b13 = (tmp >> 48) | (tmp << (64 - 48));
6224	b6 -= b13;
6225
6226	tmp = b11 ^ b2;
6227	b11 = (tmp >> 20) | (tmp << (64 - 20));
6228	b2 -= b11;
6229
6230	tmp = b15 ^ b0;
6231	b15 = (tmp >> 5) | (tmp << (64 - 5));
6232	b0 -= b15;
6233
6234	tmp = b9 ^ b10;
6235	b9 = (tmp >> 17) | (tmp << (64 - 17));
6236	b10 -= b9;
6237
6238	tmp = b11 ^ b8;
6239	b11 = (tmp >> 59) | (tmp << (64 - 59));
6240	b8 -= b11;
6241
6242	tmp = b13 ^ b14;
6243	b13 = (tmp >> 41) | (tmp << (64 - 41));
6244	b14 -= b13;
6245
6246	tmp = b15 ^ b12;
6247	b15 = (tmp >> 34) | (tmp << (64 - 34));
6248	b12 -= b15;
6249
6250	tmp = b1 ^ b6;
6251	b1 = (tmp >> 13) | (tmp << (64 - 13));
6252	b6 -= b1;
6253
6254	tmp = b3 ^ b4;
6255	b3 = (tmp >> 51) | (tmp << (64 - 51));
6256	b4 -= b3;
6257
6258	tmp = b5 ^ b2;
6259	b5 = (tmp >> 4) | (tmp << (64 - 4));
6260	b2 -= b5;
6261
6262	tmp = b7 ^ b0;
6263	b7 = (tmp >> 33) | (tmp << (64 - 33));
6264	b0 -= b7;
6265
6266	tmp = b1 ^ b8;
6267	b1 = (tmp >> 52) | (tmp << (64 - 52));
6268	b8 -= b1;
6269
6270	tmp = b5 ^ b14;
6271	b5 = (tmp >> 23) | (tmp << (64 - 23));
6272	b14 -= b5;
6273
6274	tmp = b3 ^ b12;
6275	b3 = (tmp >> 18) | (tmp << (64 - 18));
6276	b12 -= b3;
6277
6278	tmp = b7 ^ b10;
6279	b7 = (tmp >> 49) | (tmp << (64 - 49));
6280	b10 -= b7;
6281
6282	tmp = b15 ^ b4;
6283	b15 = (tmp >> 55) | (tmp << (64 - 55));
6284	b4 -= b15;
6285
6286	tmp = b11 ^ b6;
6287	b11 = (tmp >> 10) | (tmp << (64 - 10));
6288	b6 -= b11;
6289
6290	tmp = b13 ^ b2;
6291	b13 = (tmp >> 19) | (tmp << (64 - 19));
6292	b2 -= b13;
6293
6294	tmp = b9 ^ b0;
6295	b9 = (tmp >> 38) | (tmp << (64 - 38));
6296	b0 -= b9;
6297
6298	tmp = b15 ^ b14;
6299	b15 = (tmp >> 37) | (tmp << (64 - 37));
6300	b14 -= b15 + k11 + t0;
6301	b15 -= k12 + 14;
6302
6303	tmp = b13 ^ b12;
6304	b13 = (tmp >> 22) | (tmp << (64 - 22));
6305	b12 -= b13 + k9;
6306	b13 -= k10 + t2;
6307
6308	tmp = b11 ^ b10;
6309	b11 = (tmp >> 17) | (tmp << (64 - 17));
6310	b10 -= b11 + k7;
6311	b11 -= k8;
6312
6313	tmp = b9 ^ b8;
6314	b9 = (tmp >> 8) | (tmp << (64 - 8));
6315	b8 -= b9 + k5;
6316	b9 -= k6;
6317
6318	tmp = b7 ^ b6;
6319	b7 = (tmp >> 47) | (tmp << (64 - 47));
6320	b6 -= b7 + k3;
6321	b7 -= k4;
6322
6323	tmp = b5 ^ b4;
6324	b5 = (tmp >> 8) | (tmp << (64 - 8));
6325	b4 -= b5 + k1;
6326	b5 -= k2;
6327
6328	tmp = b3 ^ b2;
6329	b3 = (tmp >> 13) | (tmp << (64 - 13));
6330	b2 -= b3 + k16;
6331	b3 -= k0;
6332
6333	tmp = b1 ^ b0;
6334	b1 = (tmp >> 24) | (tmp << (64 - 24));
6335	b0 -= b1 + k14;
6336	b1 -= k15;
6337
6338	tmp = b7 ^ b12;
6339	b7 = (tmp >> 20) | (tmp << (64 - 20));
6340	b12 -= b7;
6341
6342	tmp = b3 ^ b10;
6343	b3 = (tmp >> 37) | (tmp << (64 - 37));
6344	b10 -= b3;
6345
6346	tmp = b5 ^ b8;
6347	b5 = (tmp >> 31) | (tmp << (64 - 31));
6348	b8 -= b5;
6349
6350	tmp = b1 ^ b14;
6351	b1 = (tmp >> 23) | (tmp << (64 - 23));
6352	b14 -= b1;
6353
6354	tmp = b9 ^ b4;
6355	b9 = (tmp >> 52) | (tmp << (64 - 52));
6356	b4 -= b9;
6357
6358	tmp = b13 ^ b6;
6359	b13 = (tmp >> 35) | (tmp << (64 - 35));
6360	b6 -= b13;
6361
6362	tmp = b11 ^ b2;
6363	b11 = (tmp >> 48) | (tmp << (64 - 48));
6364	b2 -= b11;
6365
6366	tmp = b15 ^ b0;
6367	b15 = (tmp >> 9) | (tmp << (64 - 9));
6368	b0 -= b15;
6369
6370	tmp = b9 ^ b10;
6371	b9 = (tmp >> 25) | (tmp << (64 - 25));
6372	b10 -= b9;
6373
6374	tmp = b11 ^ b8;
6375	b11 = (tmp >> 44) | (tmp << (64 - 44));
6376	b8 -= b11;
6377
6378	tmp = b13 ^ b14;
6379	b13 = (tmp >> 42) | (tmp << (64 - 42));
6380	b14 -= b13;
6381
6382	tmp = b15 ^ b12;
6383	b15 = (tmp >> 19) | (tmp << (64 - 19));
6384	b12 -= b15;
6385
6386	tmp = b1 ^ b6;
6387	b1 = (tmp >> 46) | (tmp << (64 - 46));
6388	b6 -= b1;
6389
6390	tmp = b3 ^ b4;
6391	b3 = (tmp >> 47) | (tmp << (64 - 47));
6392	b4 -= b3;
6393
6394	tmp = b5 ^ b2;
6395	b5 = (tmp >> 44) | (tmp << (64 - 44));
6396	b2 -= b5;
6397
6398	tmp = b7 ^ b0;
6399	b7 = (tmp >> 31) | (tmp << (64 - 31));
6400	b0 -= b7;
6401
6402	tmp = b1 ^ b8;
6403	b1 = (tmp >> 41) | (tmp << (64 - 41));
6404	b8 -= b1;
6405
6406	tmp = b5 ^ b14;
6407	b5 = (tmp >> 42) | (tmp << (64 - 42));
6408	b14 -= b5;
6409
6410	tmp = b3 ^ b12;
6411	b3 = (tmp >> 53) | (tmp << (64 - 53));
6412	b12 -= b3;
6413
6414	tmp = b7 ^ b10;
6415	b7 = (tmp >> 4) | (tmp << (64 - 4));
6416	b10 -= b7;
6417
6418	tmp = b15 ^ b4;
6419	b15 = (tmp >> 51) | (tmp << (64 - 51));
6420	b4 -= b15;
6421
6422	tmp = b11 ^ b6;
6423	b11 = (tmp >> 56) | (tmp << (64 - 56));
6424	b6 -= b11;
6425
6426	tmp = b13 ^ b2;
6427	b13 = (tmp >> 34) | (tmp << (64 - 34));
6428	b2 -= b13;
6429
6430	tmp = b9 ^ b0;
6431	b9 = (tmp >> 16) | (tmp << (64 - 16));
6432	b0 -= b9;
6433
6434	tmp = b15 ^ b14;
6435	b15 = (tmp >> 30) | (tmp << (64 - 30));
6436	b14 -= b15 + k10 + t2;
6437	b15 -= k11 + 13;
6438
6439	tmp = b13 ^ b12;
6440	b13 = (tmp >> 44) | (tmp << (64 - 44));
6441	b12 -= b13 + k8;
6442	b13 -= k9 + t1;
6443
6444	tmp = b11 ^ b10;
6445	b11 = (tmp >> 47) | (tmp << (64 - 47));
6446	b10 -= b11 + k6;
6447	b11 -= k7;
6448
6449	tmp = b9 ^ b8;
6450	b9 = (tmp >> 12) | (tmp << (64 - 12));
6451	b8 -= b9 + k4;
6452	b9 -= k5;
6453
6454	tmp = b7 ^ b6;
6455	b7 = (tmp >> 31) | (tmp << (64 - 31));
6456	b6 -= b7 + k2;
6457	b7 -= k3;
6458
6459	tmp = b5 ^ b4;
6460	b5 = (tmp >> 37) | (tmp << (64 - 37));
6461	b4 -= b5 + k0;
6462	b5 -= k1;
6463
6464	tmp = b3 ^ b2;
6465	b3 = (tmp >> 9) | (tmp << (64 - 9));
6466	b2 -= b3 + k15;
6467	b3 -= k16;
6468
6469	tmp = b1 ^ b0;
6470	b1 = (tmp >> 41) | (tmp << (64 - 41));
6471	b0 -= b1 + k13;
6472	b1 -= k14;
6473
6474	tmp = b7 ^ b12;
6475	b7 = (tmp >> 25) | (tmp << (64 - 25));
6476	b12 -= b7;
6477
6478	tmp = b3 ^ b10;
6479	b3 = (tmp >> 16) | (tmp << (64 - 16));
6480	b10 -= b3;
6481
6482	tmp = b5 ^ b8;
6483	b5 = (tmp >> 28) | (tmp << (64 - 28));
6484	b8 -= b5;
6485
6486	tmp = b1 ^ b14;
6487	b1 = (tmp >> 47) | (tmp << (64 - 47));
6488	b14 -= b1;
6489
6490	tmp = b9 ^ b4;
6491	b9 = (tmp >> 41) | (tmp << (64 - 41));
6492	b4 -= b9;
6493
6494	tmp = b13 ^ b6;
6495	b13 = (tmp >> 48) | (tmp << (64 - 48));
6496	b6 -= b13;
6497
6498	tmp = b11 ^ b2;
6499	b11 = (tmp >> 20) | (tmp << (64 - 20));
6500	b2 -= b11;
6501
6502	tmp = b15 ^ b0;
6503	b15 = (tmp >> 5) | (tmp << (64 - 5));
6504	b0 -= b15;
6505
6506	tmp = b9 ^ b10;
6507	b9 = (tmp >> 17) | (tmp << (64 - 17));
6508	b10 -= b9;
6509
6510	tmp = b11 ^ b8;
6511	b11 = (tmp >> 59) | (tmp << (64 - 59));
6512	b8 -= b11;
6513
6514	tmp = b13 ^ b14;
6515	b13 = (tmp >> 41) | (tmp << (64 - 41));
6516	b14 -= b13;
6517
6518	tmp = b15 ^ b12;
6519	b15 = (tmp >> 34) | (tmp << (64 - 34));
6520	b12 -= b15;
6521
6522	tmp = b1 ^ b6;
6523	b1 = (tmp >> 13) | (tmp << (64 - 13));
6524	b6 -= b1;
6525
6526	tmp = b3 ^ b4;
6527	b3 = (tmp >> 51) | (tmp << (64 - 51));
6528	b4 -= b3;
6529
6530	tmp = b5 ^ b2;
6531	b5 = (tmp >> 4) | (tmp << (64 - 4));
6532	b2 -= b5;
6533
6534	tmp = b7 ^ b0;
6535	b7 = (tmp >> 33) | (tmp << (64 - 33));
6536	b0 -= b7;
6537
6538	tmp = b1 ^ b8;
6539	b1 = (tmp >> 52) | (tmp << (64 - 52));
6540	b8 -= b1;
6541
6542	tmp = b5 ^ b14;
6543	b5 = (tmp >> 23) | (tmp << (64 - 23));
6544	b14 -= b5;
6545
6546	tmp = b3 ^ b12;
6547	b3 = (tmp >> 18) | (tmp << (64 - 18));
6548	b12 -= b3;
6549
6550	tmp = b7 ^ b10;
6551	b7 = (tmp >> 49) | (tmp << (64 - 49));
6552	b10 -= b7;
6553
6554	tmp = b15 ^ b4;
6555	b15 = (tmp >> 55) | (tmp << (64 - 55));
6556	b4 -= b15;
6557
6558	tmp = b11 ^ b6;
6559	b11 = (tmp >> 10) | (tmp << (64 - 10));
6560	b6 -= b11;
6561
6562	tmp = b13 ^ b2;
6563	b13 = (tmp >> 19) | (tmp << (64 - 19));
6564	b2 -= b13;
6565
6566	tmp = b9 ^ b0;
6567	b9 = (tmp >> 38) | (tmp << (64 - 38));
6568	b0 -= b9;
6569
6570	tmp = b15 ^ b14;
6571	b15 = (tmp >> 37) | (tmp << (64 - 37));
6572	b14 -= b15 + k9 + t1;
6573	b15 -= k10 + 12;
6574
6575	tmp = b13 ^ b12;
6576	b13 = (tmp >> 22) | (tmp << (64 - 22));
6577	b12 -= b13 + k7;
6578	b13 -= k8 + t0;
6579
6580	tmp = b11 ^ b10;
6581	b11 = (tmp >> 17) | (tmp << (64 - 17));
6582	b10 -= b11 + k5;
6583	b11 -= k6;
6584
6585	tmp = b9 ^ b8;
6586	b9 = (tmp >> 8) | (tmp << (64 - 8));
6587	b8 -= b9 + k3;
6588	b9 -= k4;
6589
6590	tmp = b7 ^ b6;
6591	b7 = (tmp >> 47) | (tmp << (64 - 47));
6592	b6 -= b7 + k1;
6593	b7 -= k2;
6594
6595	tmp = b5 ^ b4;
6596	b5 = (tmp >> 8) | (tmp << (64 - 8));
6597	b4 -= b5 + k16;
6598	b5 -= k0;
6599
6600	tmp = b3 ^ b2;
6601	b3 = (tmp >> 13) | (tmp << (64 - 13));
6602	b2 -= b3 + k14;
6603	b3 -= k15;
6604
6605	tmp = b1 ^ b0;
6606	b1 = (tmp >> 24) | (tmp << (64 - 24));
6607	b0 -= b1 + k12;
6608	b1 -= k13;
6609
6610	tmp = b7 ^ b12;
6611	b7 = (tmp >> 20) | (tmp << (64 - 20));
6612	b12 -= b7;
6613
6614	tmp = b3 ^ b10;
6615	b3 = (tmp >> 37) | (tmp << (64 - 37));
6616	b10 -= b3;
6617
6618	tmp = b5 ^ b8;
6619	b5 = (tmp >> 31) | (tmp << (64 - 31));
6620	b8 -= b5;
6621
6622	tmp = b1 ^ b14;
6623	b1 = (tmp >> 23) | (tmp << (64 - 23));
6624	b14 -= b1;
6625
6626	tmp = b9 ^ b4;
6627	b9 = (tmp >> 52) | (tmp << (64 - 52));
6628	b4 -= b9;
6629
6630	tmp = b13 ^ b6;
6631	b13 = (tmp >> 35) | (tmp << (64 - 35));
6632	b6 -= b13;
6633
6634	tmp = b11 ^ b2;
6635	b11 = (tmp >> 48) | (tmp << (64 - 48));
6636	b2 -= b11;
6637
6638	tmp = b15 ^ b0;
6639	b15 = (tmp >> 9) | (tmp << (64 - 9));
6640	b0 -= b15;
6641
6642	tmp = b9 ^ b10;
6643	b9 = (tmp >> 25) | (tmp << (64 - 25));
6644	b10 -= b9;
6645
6646	tmp = b11 ^ b8;
6647	b11 = (tmp >> 44) | (tmp << (64 - 44));
6648	b8 -= b11;
6649
6650	tmp = b13 ^ b14;
6651	b13 = (tmp >> 42) | (tmp << (64 - 42));
6652	b14 -= b13;
6653
6654	tmp = b15 ^ b12;
6655	b15 = (tmp >> 19) | (tmp << (64 - 19));
6656	b12 -= b15;
6657
6658	tmp = b1 ^ b6;
6659	b1 = (tmp >> 46) | (tmp << (64 - 46));
6660	b6 -= b1;
6661
6662	tmp = b3 ^ b4;
6663	b3 = (tmp >> 47) | (tmp << (64 - 47));
6664	b4 -= b3;
6665
6666	tmp = b5 ^ b2;
6667	b5 = (tmp >> 44) | (tmp << (64 - 44));
6668	b2 -= b5;
6669
6670	tmp = b7 ^ b0;
6671	b7 = (tmp >> 31) | (tmp << (64 - 31));
6672	b0 -= b7;
6673
6674	tmp = b1 ^ b8;
6675	b1 = (tmp >> 41) | (tmp << (64 - 41));
6676	b8 -= b1;
6677
6678	tmp = b5 ^ b14;
6679	b5 = (tmp >> 42) | (tmp << (64 - 42));
6680	b14 -= b5;
6681
6682	tmp = b3 ^ b12;
6683	b3 = (tmp >> 53) | (tmp << (64 - 53));
6684	b12 -= b3;
6685
6686	tmp = b7 ^ b10;
6687	b7 = (tmp >> 4) | (tmp << (64 - 4));
6688	b10 -= b7;
6689
6690	tmp = b15 ^ b4;
6691	b15 = (tmp >> 51) | (tmp << (64 - 51));
6692	b4 -= b15;
6693
6694	tmp = b11 ^ b6;
6695	b11 = (tmp >> 56) | (tmp << (64 - 56));
6696	b6 -= b11;
6697
6698	tmp = b13 ^ b2;
6699	b13 = (tmp >> 34) | (tmp << (64 - 34));
6700	b2 -= b13;
6701
6702	tmp = b9 ^ b0;
6703	b9 = (tmp >> 16) | (tmp << (64 - 16));
6704	b0 -= b9;
6705
6706	tmp = b15 ^ b14;
6707	b15 = (tmp >> 30) | (tmp << (64 - 30));
6708	b14 -= b15 + k8 + t0;
6709	b15 -= k9 + 11;
6710
6711	tmp = b13 ^ b12;
6712	b13 = (tmp >> 44) | (tmp << (64 - 44));
6713	b12 -= b13 + k6;
6714	b13 -= k7 + t2;
6715
6716	tmp = b11 ^ b10;
6717	b11 = (tmp >> 47) | (tmp << (64 - 47));
6718	b10 -= b11 + k4;
6719	b11 -= k5;
6720
6721	tmp = b9 ^ b8;
6722	b9 = (tmp >> 12) | (tmp << (64 - 12));
6723	b8 -= b9 + k2;
6724	b9 -= k3;
6725
6726	tmp = b7 ^ b6;
6727	b7 = (tmp >> 31) | (tmp << (64 - 31));
6728	b6 -= b7 + k0;
6729	b7 -= k1;
6730
6731	tmp = b5 ^ b4;
6732	b5 = (tmp >> 37) | (tmp << (64 - 37));
6733	b4 -= b5 + k15;
6734	b5 -= k16;
6735
6736	tmp = b3 ^ b2;
6737	b3 = (tmp >> 9) | (tmp << (64 - 9));
6738	b2 -= b3 + k13;
6739	b3 -= k14;
6740
6741	tmp = b1 ^ b0;
6742	b1 = (tmp >> 41) | (tmp << (64 - 41));
6743	b0 -= b1 + k11;
6744	b1 -= k12;
6745
6746	tmp = b7 ^ b12;
6747	b7 = (tmp >> 25) | (tmp << (64 - 25));
6748	b12 -= b7;
6749
6750	tmp = b3 ^ b10;
6751	b3 = (tmp >> 16) | (tmp << (64 - 16));
6752	b10 -= b3;
6753
6754	tmp = b5 ^ b8;
6755	b5 = (tmp >> 28) | (tmp << (64 - 28));
6756	b8 -= b5;
6757
6758	tmp = b1 ^ b14;
6759	b1 = (tmp >> 47) | (tmp << (64 - 47));
6760	b14 -= b1;
6761
6762	tmp = b9 ^ b4;
6763	b9 = (tmp >> 41) | (tmp << (64 - 41));
6764	b4 -= b9;
6765
6766	tmp = b13 ^ b6;
6767	b13 = (tmp >> 48) | (tmp << (64 - 48));
6768	b6 -= b13;
6769
6770	tmp = b11 ^ b2;
6771	b11 = (tmp >> 20) | (tmp << (64 - 20));
6772	b2 -= b11;
6773
6774	tmp = b15 ^ b0;
6775	b15 = (tmp >> 5) | (tmp << (64 - 5));
6776	b0 -= b15;
6777
6778	tmp = b9 ^ b10;
6779	b9 = (tmp >> 17) | (tmp << (64 - 17));
6780	b10 -= b9;
6781
6782	tmp = b11 ^ b8;
6783	b11 = (tmp >> 59) | (tmp << (64 - 59));
6784	b8 -= b11;
6785
6786	tmp = b13 ^ b14;
6787	b13 = (tmp >> 41) | (tmp << (64 - 41));
6788	b14 -= b13;
6789
6790	tmp = b15 ^ b12;
6791	b15 = (tmp >> 34) | (tmp << (64 - 34));
6792	b12 -= b15;
6793
6794	tmp = b1 ^ b6;
6795	b1 = (tmp >> 13) | (tmp << (64 - 13));
6796	b6 -= b1;
6797
6798	tmp = b3 ^ b4;
6799	b3 = (tmp >> 51) | (tmp << (64 - 51));
6800	b4 -= b3;
6801
6802	tmp = b5 ^ b2;
6803	b5 = (tmp >> 4) | (tmp << (64 - 4));
6804	b2 -= b5;
6805
6806	tmp = b7 ^ b0;
6807	b7 = (tmp >> 33) | (tmp << (64 - 33));
6808	b0 -= b7;
6809
6810	tmp = b1 ^ b8;
6811	b1 = (tmp >> 52) | (tmp << (64 - 52));
6812	b8 -= b1;
6813
6814	tmp = b5 ^ b14;
6815	b5 = (tmp >> 23) | (tmp << (64 - 23));
6816	b14 -= b5;
6817
6818	tmp = b3 ^ b12;
6819	b3 = (tmp >> 18) | (tmp << (64 - 18));
6820	b12 -= b3;
6821
6822	tmp = b7 ^ b10;
6823	b7 = (tmp >> 49) | (tmp << (64 - 49));
6824	b10 -= b7;
6825
6826	tmp = b15 ^ b4;
6827	b15 = (tmp >> 55) | (tmp << (64 - 55));
6828	b4 -= b15;
6829
6830	tmp = b11 ^ b6;
6831	b11 = (tmp >> 10) | (tmp << (64 - 10));
6832	b6 -= b11;
6833
6834	tmp = b13 ^ b2;
6835	b13 = (tmp >> 19) | (tmp << (64 - 19));
6836	b2 -= b13;
6837
6838	tmp = b9 ^ b0;
6839	b9 = (tmp >> 38) | (tmp << (64 - 38));
6840	b0 -= b9;
6841
6842	tmp = b15 ^ b14;
6843	b15 = (tmp >> 37) | (tmp << (64 - 37));
6844	b14 -= b15 + k7 + t2;
6845	b15 -= k8 + 10;
6846
6847	tmp = b13 ^ b12;
6848	b13 = (tmp >> 22) | (tmp << (64 - 22));
6849	b12 -= b13 + k5;
6850	b13 -= k6 + t1;
6851
6852	tmp = b11 ^ b10;
6853	b11 = (tmp >> 17) | (tmp << (64 - 17));
6854	b10 -= b11 + k3;
6855	b11 -= k4;
6856
6857	tmp = b9 ^ b8;
6858	b9 = (tmp >> 8) | (tmp << (64 - 8));
6859	b8 -= b9 + k1;
6860	b9 -= k2;
6861
6862	tmp = b7 ^ b6;
6863	b7 = (tmp >> 47) | (tmp << (64 - 47));
6864	b6 -= b7 + k16;
6865	b7 -= k0;
6866
6867	tmp = b5 ^ b4;
6868	b5 = (tmp >> 8) | (tmp << (64 - 8));
6869	b4 -= b5 + k14;
6870	b5 -= k15;
6871
6872	tmp = b3 ^ b2;
6873	b3 = (tmp >> 13) | (tmp << (64 - 13));
6874	b2 -= b3 + k12;
6875	b3 -= k13;
6876
6877	tmp = b1 ^ b0;
6878	b1 = (tmp >> 24) | (tmp << (64 - 24));
6879	b0 -= b1 + k10;
6880	b1 -= k11;
6881
6882	tmp = b7 ^ b12;
6883	b7 = (tmp >> 20) | (tmp << (64 - 20));
6884	b12 -= b7;
6885
6886	tmp = b3 ^ b10;
6887	b3 = (tmp >> 37) | (tmp << (64 - 37));
6888	b10 -= b3;
6889
6890	tmp = b5 ^ b8;
6891	b5 = (tmp >> 31) | (tmp << (64 - 31));
6892	b8 -= b5;
6893
6894	tmp = b1 ^ b14;
6895	b1 = (tmp >> 23) | (tmp << (64 - 23));
6896	b14 -= b1;
6897
6898	tmp = b9 ^ b4;
6899	b9 = (tmp >> 52) | (tmp << (64 - 52));
6900	b4 -= b9;
6901
6902	tmp = b13 ^ b6;
6903	b13 = (tmp >> 35) | (tmp << (64 - 35));
6904	b6 -= b13;
6905
6906	tmp = b11 ^ b2;
6907	b11 = (tmp >> 48) | (tmp << (64 - 48));
6908	b2 -= b11;
6909
6910	tmp = b15 ^ b0;
6911	b15 = (tmp >> 9) | (tmp << (64 - 9));
6912	b0 -= b15;
6913
6914	tmp = b9 ^ b10;
6915	b9 = (tmp >> 25) | (tmp << (64 - 25));
6916	b10 -= b9;
6917
6918	tmp = b11 ^ b8;
6919	b11 = (tmp >> 44) | (tmp << (64 - 44));
6920	b8 -= b11;
6921
6922	tmp = b13 ^ b14;
6923	b13 = (tmp >> 42) | (tmp << (64 - 42));
6924	b14 -= b13;
6925
6926	tmp = b15 ^ b12;
6927	b15 = (tmp >> 19) | (tmp << (64 - 19));
6928	b12 -= b15;
6929
6930	tmp = b1 ^ b6;
6931	b1 = (tmp >> 46) | (tmp << (64 - 46));
6932	b6 -= b1;
6933
6934	tmp = b3 ^ b4;
6935	b3 = (tmp >> 47) | (tmp << (64 - 47));
6936	b4 -= b3;
6937
6938	tmp = b5 ^ b2;
6939	b5 = (tmp >> 44) | (tmp << (64 - 44));
6940	b2 -= b5;
6941
6942	tmp = b7 ^ b0;
6943	b7 = (tmp >> 31) | (tmp << (64 - 31));
6944	b0 -= b7;
6945
6946	tmp = b1 ^ b8;
6947	b1 = (tmp >> 41) | (tmp << (64 - 41));
6948	b8 -= b1;
6949
6950	tmp = b5 ^ b14;
6951	b5 = (tmp >> 42) | (tmp << (64 - 42));
6952	b14 -= b5;
6953
6954	tmp = b3 ^ b12;
6955	b3 = (tmp >> 53) | (tmp << (64 - 53));
6956	b12 -= b3;
6957
6958	tmp = b7 ^ b10;
6959	b7 = (tmp >> 4) | (tmp << (64 - 4));
6960	b10 -= b7;
6961
6962	tmp = b15 ^ b4;
6963	b15 = (tmp >> 51) | (tmp << (64 - 51));
6964	b4 -= b15;
6965
6966	tmp = b11 ^ b6;
6967	b11 = (tmp >> 56) | (tmp << (64 - 56));
6968	b6 -= b11;
6969
6970	tmp = b13 ^ b2;
6971	b13 = (tmp >> 34) | (tmp << (64 - 34));
6972	b2 -= b13;
6973
6974	tmp = b9 ^ b0;
6975	b9 = (tmp >> 16) | (tmp << (64 - 16));
6976	b0 -= b9;
6977
6978	tmp = b15 ^ b14;
6979	b15 = (tmp >> 30) | (tmp << (64 - 30));
6980	b14 -= b15 + k6 + t1;
6981	b15 -= k7 + 9;
6982
6983	tmp = b13 ^ b12;
6984	b13 = (tmp >> 44) | (tmp << (64 - 44));
6985	b12 -= b13 + k4;
6986	b13 -= k5 + t0;
6987
6988	tmp = b11 ^ b10;
6989	b11 = (tmp >> 47) | (tmp << (64 - 47));
6990	b10 -= b11 + k2;
6991	b11 -= k3;
6992
6993	tmp = b9 ^ b8;
6994	b9 = (tmp >> 12) | (tmp << (64 - 12));
6995	b8 -= b9 + k0;
6996	b9 -= k1;
6997
6998	tmp = b7 ^ b6;
6999	b7 = (tmp >> 31) | (tmp << (64 - 31));
7000	b6 -= b7 + k15;
7001	b7 -= k16;
7002
7003	tmp = b5 ^ b4;
7004	b5 = (tmp >> 37) | (tmp << (64 - 37));
7005	b4 -= b5 + k13;
7006	b5 -= k14;
7007
7008	tmp = b3 ^ b2;
7009	b3 = (tmp >> 9) | (tmp << (64 - 9));
7010	b2 -= b3 + k11;
7011	b3 -= k12;
7012
7013	tmp = b1 ^ b0;
7014	b1 = (tmp >> 41) | (tmp << (64 - 41));
7015	b0 -= b1 + k9;
7016	b1 -= k10;
7017
7018	tmp = b7 ^ b12;
7019	b7 = (tmp >> 25) | (tmp << (64 - 25));
7020	b12 -= b7;
7021
7022	tmp = b3 ^ b10;
7023	b3 = (tmp >> 16) | (tmp << (64 - 16));
7024	b10 -= b3;
7025
7026	tmp = b5 ^ b8;
7027	b5 = (tmp >> 28) | (tmp << (64 - 28));
7028	b8 -= b5;
7029
7030	tmp = b1 ^ b14;
7031	b1 = (tmp >> 47) | (tmp << (64 - 47));
7032	b14 -= b1;
7033
7034	tmp = b9 ^ b4;
7035	b9 = (tmp >> 41) | (tmp << (64 - 41));
7036	b4 -= b9;
7037
7038	tmp = b13 ^ b6;
7039	b13 = (tmp >> 48) | (tmp << (64 - 48));
7040	b6 -= b13;
7041
7042	tmp = b11 ^ b2;
7043	b11 = (tmp >> 20) | (tmp << (64 - 20));
7044	b2 -= b11;
7045
7046	tmp = b15 ^ b0;
7047	b15 = (tmp >> 5) | (tmp << (64 - 5));
7048	b0 -= b15;
7049
7050	tmp = b9 ^ b10;
7051	b9 = (tmp >> 17) | (tmp << (64 - 17));
7052	b10 -= b9;
7053
7054	tmp = b11 ^ b8;
7055	b11 = (tmp >> 59) | (tmp << (64 - 59));
7056	b8 -= b11;
7057
7058	tmp = b13 ^ b14;
7059	b13 = (tmp >> 41) | (tmp << (64 - 41));
7060	b14 -= b13;
7061
7062	tmp = b15 ^ b12;
7063	b15 = (tmp >> 34) | (tmp << (64 - 34));
7064	b12 -= b15;
7065
7066	tmp = b1 ^ b6;
7067	b1 = (tmp >> 13) | (tmp << (64 - 13));
7068	b6 -= b1;
7069
7070	tmp = b3 ^ b4;
7071	b3 = (tmp >> 51) | (tmp << (64 - 51));
7072	b4 -= b3;
7073
7074	tmp = b5 ^ b2;
7075	b5 = (tmp >> 4) | (tmp << (64 - 4));
7076	b2 -= b5;
7077
7078	tmp = b7 ^ b0;
7079	b7 = (tmp >> 33) | (tmp << (64 - 33));
7080	b0 -= b7;
7081
7082	tmp = b1 ^ b8;
7083	b1 = (tmp >> 52) | (tmp << (64 - 52));
7084	b8 -= b1;
7085
7086	tmp = b5 ^ b14;
7087	b5 = (tmp >> 23) | (tmp << (64 - 23));
7088	b14 -= b5;
7089
7090	tmp = b3 ^ b12;
7091	b3 = (tmp >> 18) | (tmp << (64 - 18));
7092	b12 -= b3;
7093
7094	tmp = b7 ^ b10;
7095	b7 = (tmp >> 49) | (tmp << (64 - 49));
7096	b10 -= b7;
7097
7098	tmp = b15 ^ b4;
7099	b15 = (tmp >> 55) | (tmp << (64 - 55));
7100	b4 -= b15;
7101
7102	tmp = b11 ^ b6;
7103	b11 = (tmp >> 10) | (tmp << (64 - 10));
7104	b6 -= b11;
7105
7106	tmp = b13 ^ b2;
7107	b13 = (tmp >> 19) | (tmp << (64 - 19));
7108	b2 -= b13;
7109
7110	tmp = b9 ^ b0;
7111	b9 = (tmp >> 38) | (tmp << (64 - 38));
7112	b0 -= b9;
7113
7114	tmp = b15 ^ b14;
7115	b15 = (tmp >> 37) | (tmp << (64 - 37));
7116	b14 -= b15 + k5 + t0;
7117	b15 -= k6 + 8;
7118
7119	tmp = b13 ^ b12;
7120	b13 = (tmp >> 22) | (tmp << (64 - 22));
7121	b12 -= b13 + k3;
7122	b13 -= k4 + t2;
7123
7124	tmp = b11 ^ b10;
7125	b11 = (tmp >> 17) | (tmp << (64 - 17));
7126	b10 -= b11 + k1;
7127	b11 -= k2;
7128
7129	tmp = b9 ^ b8;
7130	b9 = (tmp >> 8) | (tmp << (64 - 8));
7131	b8 -= b9 + k16;
7132	b9 -= k0;
7133
7134	tmp = b7 ^ b6;
7135	b7 = (tmp >> 47) | (tmp << (64 - 47));
7136	b6 -= b7 + k14;
7137	b7 -= k15;
7138
7139	tmp = b5 ^ b4;
7140	b5 = (tmp >> 8) | (tmp << (64 - 8));
7141	b4 -= b5 + k12;
7142	b5 -= k13;
7143
7144	tmp = b3 ^ b2;
7145	b3 = (tmp >> 13) | (tmp << (64 - 13));
7146	b2 -= b3 + k10;
7147	b3 -= k11;
7148
7149	tmp = b1 ^ b0;
7150	b1 = (tmp >> 24) | (tmp << (64 - 24));
7151	b0 -= b1 + k8;
7152	b1 -= k9;
7153
7154	tmp = b7 ^ b12;
7155	b7 = (tmp >> 20) | (tmp << (64 - 20));
7156	b12 -= b7;
7157
7158	tmp = b3 ^ b10;
7159	b3 = (tmp >> 37) | (tmp << (64 - 37));
7160	b10 -= b3;
7161
7162	tmp = b5 ^ b8;
7163	b5 = (tmp >> 31) | (tmp << (64 - 31));
7164	b8 -= b5;
7165
7166	tmp = b1 ^ b14;
7167	b1 = (tmp >> 23) | (tmp << (64 - 23));
7168	b14 -= b1;
7169
7170	tmp = b9 ^ b4;
7171	b9 = (tmp >> 52) | (tmp << (64 - 52));
7172	b4 -= b9;
7173
7174	tmp = b13 ^ b6;
7175	b13 = (tmp >> 35) | (tmp << (64 - 35));
7176	b6 -= b13;
7177
7178	tmp = b11 ^ b2;
7179	b11 = (tmp >> 48) | (tmp << (64 - 48));
7180	b2 -= b11;
7181
7182	tmp = b15 ^ b0;
7183	b15 = (tmp >> 9) | (tmp << (64 - 9));
7184	b0 -= b15;
7185
7186	tmp = b9 ^ b10;
7187	b9 = (tmp >> 25) | (tmp << (64 - 25));
7188	b10 -= b9;
7189
7190	tmp = b11 ^ b8;
7191	b11 = (tmp >> 44) | (tmp << (64 - 44));
7192	b8 -= b11;
7193
7194	tmp = b13 ^ b14;
7195	b13 = (tmp >> 42) | (tmp << (64 - 42));
7196	b14 -= b13;
7197
7198	tmp = b15 ^ b12;
7199	b15 = (tmp >> 19) | (tmp << (64 - 19));
7200	b12 -= b15;
7201
7202	tmp = b1 ^ b6;
7203	b1 = (tmp >> 46) | (tmp << (64 - 46));
7204	b6 -= b1;
7205
7206	tmp = b3 ^ b4;
7207	b3 = (tmp >> 47) | (tmp << (64 - 47));
7208	b4 -= b3;
7209
7210	tmp = b5 ^ b2;
7211	b5 = (tmp >> 44) | (tmp << (64 - 44));
7212	b2 -= b5;
7213
7214	tmp = b7 ^ b0;
7215	b7 = (tmp >> 31) | (tmp << (64 - 31));
7216	b0 -= b7;
7217
7218	tmp = b1 ^ b8;
7219	b1 = (tmp >> 41) | (tmp << (64 - 41));
7220	b8 -= b1;
7221
7222	tmp = b5 ^ b14;
7223	b5 = (tmp >> 42) | (tmp << (64 - 42));
7224	b14 -= b5;
7225
7226	tmp = b3 ^ b12;
7227	b3 = (tmp >> 53) | (tmp << (64 - 53));
7228	b12 -= b3;
7229
7230	tmp = b7 ^ b10;
7231	b7 = (tmp >> 4) | (tmp << (64 - 4));
7232	b10 -= b7;
7233
7234	tmp = b15 ^ b4;
7235	b15 = (tmp >> 51) | (tmp << (64 - 51));
7236	b4 -= b15;
7237
7238	tmp = b11 ^ b6;
7239	b11 = (tmp >> 56) | (tmp << (64 - 56));
7240	b6 -= b11;
7241
7242	tmp = b13 ^ b2;
7243	b13 = (tmp >> 34) | (tmp << (64 - 34));
7244	b2 -= b13;
7245
7246	tmp = b9 ^ b0;
7247	b9 = (tmp >> 16) | (tmp << (64 - 16));
7248	b0 -= b9;
7249
7250	tmp = b15 ^ b14;
7251	b15 = (tmp >> 30) | (tmp << (64 - 30));
7252	b14 -= b15 + k4 + t2;
7253	b15 -= k5 + 7;
7254
7255	tmp = b13 ^ b12;
7256	b13 = (tmp >> 44) | (tmp << (64 - 44));
7257	b12 -= b13 + k2;
7258	b13 -= k3 + t1;
7259
7260	tmp = b11 ^ b10;
7261	b11 = (tmp >> 47) | (tmp << (64 - 47));
7262	b10 -= b11 + k0;
7263	b11 -= k1;
7264
7265	tmp = b9 ^ b8;
7266	b9 = (tmp >> 12) | (tmp << (64 - 12));
7267	b8 -= b9 + k15;
7268	b9 -= k16;
7269
7270	tmp = b7 ^ b6;
7271	b7 = (tmp >> 31) | (tmp << (64 - 31));
7272	b6 -= b7 + k13;
7273	b7 -= k14;
7274
7275	tmp = b5 ^ b4;
7276	b5 = (tmp >> 37) | (tmp << (64 - 37));
7277	b4 -= b5 + k11;
7278	b5 -= k12;
7279
7280	tmp = b3 ^ b2;
7281	b3 = (tmp >> 9) | (tmp << (64 - 9));
7282	b2 -= b3 + k9;
7283	b3 -= k10;
7284
7285	tmp = b1 ^ b0;
7286	b1 = (tmp >> 41) | (tmp << (64 - 41));
7287	b0 -= b1 + k7;
7288	b1 -= k8;
7289
7290	tmp = b7 ^ b12;
7291	b7 = (tmp >> 25) | (tmp << (64 - 25));
7292	b12 -= b7;
7293
7294	tmp = b3 ^ b10;
7295	b3 = (tmp >> 16) | (tmp << (64 - 16));
7296	b10 -= b3;
7297
7298	tmp = b5 ^ b8;
7299	b5 = (tmp >> 28) | (tmp << (64 - 28));
7300	b8 -= b5;
7301
7302	tmp = b1 ^ b14;
7303	b1 = (tmp >> 47) | (tmp << (64 - 47));
7304	b14 -= b1;
7305
7306	tmp = b9 ^ b4;
7307	b9 = (tmp >> 41) | (tmp << (64 - 41));
7308	b4 -= b9;
7309
7310	tmp = b13 ^ b6;
7311	b13 = (tmp >> 48) | (tmp << (64 - 48));
7312	b6 -= b13;
7313
7314	tmp = b11 ^ b2;
7315	b11 = (tmp >> 20) | (tmp << (64 - 20));
7316	b2 -= b11;
7317
7318	tmp = b15 ^ b0;
7319	b15 = (tmp >> 5) | (tmp << (64 - 5));
7320	b0 -= b15;
7321
7322	tmp = b9 ^ b10;
7323	b9 = (tmp >> 17) | (tmp << (64 - 17));
7324	b10 -= b9;
7325
7326	tmp = b11 ^ b8;
7327	b11 = (tmp >> 59) | (tmp << (64 - 59));
7328	b8 -= b11;
7329
7330	tmp = b13 ^ b14;
7331	b13 = (tmp >> 41) | (tmp << (64 - 41));
7332	b14 -= b13;
7333
7334	tmp = b15 ^ b12;
7335	b15 = (tmp >> 34) | (tmp << (64 - 34));
7336	b12 -= b15;
7337
7338	tmp = b1 ^ b6;
7339	b1 = (tmp >> 13) | (tmp << (64 - 13));
7340	b6 -= b1;
7341
7342	tmp = b3 ^ b4;
7343	b3 = (tmp >> 51) | (tmp << (64 - 51));
7344	b4 -= b3;
7345
7346	tmp = b5 ^ b2;
7347	b5 = (tmp >> 4) | (tmp << (64 - 4));
7348	b2 -= b5;
7349
7350	tmp = b7 ^ b0;
7351	b7 = (tmp >> 33) | (tmp << (64 - 33));
7352	b0 -= b7;
7353
7354	tmp = b1 ^ b8;
7355	b1 = (tmp >> 52) | (tmp << (64 - 52));
7356	b8 -= b1;
7357
7358	tmp = b5 ^ b14;
7359	b5 = (tmp >> 23) | (tmp << (64 - 23));
7360	b14 -= b5;
7361
7362	tmp = b3 ^ b12;
7363	b3 = (tmp >> 18) | (tmp << (64 - 18));
7364	b12 -= b3;
7365
7366	tmp = b7 ^ b10;
7367	b7 = (tmp >> 49) | (tmp << (64 - 49));
7368	b10 -= b7;
7369
7370	tmp = b15 ^ b4;
7371	b15 = (tmp >> 55) | (tmp << (64 - 55));
7372	b4 -= b15;
7373
7374	tmp = b11 ^ b6;
7375	b11 = (tmp >> 10) | (tmp << (64 - 10));
7376	b6 -= b11;
7377
7378	tmp = b13 ^ b2;
7379	b13 = (tmp >> 19) | (tmp << (64 - 19));
7380	b2 -= b13;
7381
7382	tmp = b9 ^ b0;
7383	b9 = (tmp >> 38) | (tmp << (64 - 38));
7384	b0 -= b9;
7385
7386	tmp = b15 ^ b14;
7387	b15 = (tmp >> 37) | (tmp << (64 - 37));
7388	b14 -= b15 + k3 + t1;
7389	b15 -= k4 + 6;
7390
7391	tmp = b13 ^ b12;
7392	b13 = (tmp >> 22) | (tmp << (64 - 22));
7393	b12 -= b13 + k1;
7394	b13 -= k2 + t0;
7395
7396	tmp = b11 ^ b10;
7397	b11 = (tmp >> 17) | (tmp << (64 - 17));
7398	b10 -= b11 + k16;
7399	b11 -= k0;
7400
7401	tmp = b9 ^ b8;
7402	b9 = (tmp >> 8) | (tmp << (64 - 8));
7403	b8 -= b9 + k14;
7404	b9 -= k15;
7405
7406	tmp = b7 ^ b6;
7407	b7 = (tmp >> 47) | (tmp << (64 - 47));
7408	b6 -= b7 + k12;
7409	b7 -= k13;
7410
7411	tmp = b5 ^ b4;
7412	b5 = (tmp >> 8) | (tmp << (64 - 8));
7413	b4 -= b5 + k10;
7414	b5 -= k11;
7415
7416	tmp = b3 ^ b2;
7417	b3 = (tmp >> 13) | (tmp << (64 - 13));
7418	b2 -= b3 + k8;
7419	b3 -= k9;
7420
7421	tmp = b1 ^ b0;
7422	b1 = (tmp >> 24) | (tmp << (64 - 24));
7423	b0 -= b1 + k6;
7424	b1 -= k7;
7425
7426	tmp = b7 ^ b12;
7427	b7 = (tmp >> 20) | (tmp << (64 - 20));
7428	b12 -= b7;
7429
7430	tmp = b3 ^ b10;
7431	b3 = (tmp >> 37) | (tmp << (64 - 37));
7432	b10 -= b3;
7433
7434	tmp = b5 ^ b8;
7435	b5 = (tmp >> 31) | (tmp << (64 - 31));
7436	b8 -= b5;
7437
7438	tmp = b1 ^ b14;
7439	b1 = (tmp >> 23) | (tmp << (64 - 23));
7440	b14 -= b1;
7441
7442	tmp = b9 ^ b4;
7443	b9 = (tmp >> 52) | (tmp << (64 - 52));
7444	b4 -= b9;
7445
7446	tmp = b13 ^ b6;
7447	b13 = (tmp >> 35) | (tmp << (64 - 35));
7448	b6 -= b13;
7449
7450	tmp = b11 ^ b2;
7451	b11 = (tmp >> 48) | (tmp << (64 - 48));
7452	b2 -= b11;
7453
7454	tmp = b15 ^ b0;
7455	b15 = (tmp >> 9) | (tmp << (64 - 9));
7456	b0 -= b15;
7457
7458	tmp = b9 ^ b10;
7459	b9 = (tmp >> 25) | (tmp << (64 - 25));
7460	b10 -= b9;
7461
7462	tmp = b11 ^ b8;
7463	b11 = (tmp >> 44) | (tmp << (64 - 44));
7464	b8 -= b11;
7465
7466	tmp = b13 ^ b14;
7467	b13 = (tmp >> 42) | (tmp << (64 - 42));
7468	b14 -= b13;
7469
7470	tmp = b15 ^ b12;
7471	b15 = (tmp >> 19) | (tmp << (64 - 19));
7472	b12 -= b15;
7473
7474	tmp = b1 ^ b6;
7475	b1 = (tmp >> 46) | (tmp << (64 - 46));
7476	b6 -= b1;
7477
7478	tmp = b3 ^ b4;
7479	b3 = (tmp >> 47) | (tmp << (64 - 47));
7480	b4 -= b3;
7481
7482	tmp = b5 ^ b2;
7483	b5 = (tmp >> 44) | (tmp << (64 - 44));
7484	b2 -= b5;
7485
7486	tmp = b7 ^ b0;
7487	b7 = (tmp >> 31) | (tmp << (64 - 31));
7488	b0 -= b7;
7489
7490	tmp = b1 ^ b8;
7491	b1 = (tmp >> 41) | (tmp << (64 - 41));
7492	b8 -= b1;
7493
7494	tmp = b5 ^ b14;
7495	b5 = (tmp >> 42) | (tmp << (64 - 42));
7496	b14 -= b5;
7497
7498	tmp = b3 ^ b12;
7499	b3 = (tmp >> 53) | (tmp << (64 - 53));
7500	b12 -= b3;
7501
7502	tmp = b7 ^ b10;
7503	b7 = (tmp >> 4) | (tmp << (64 - 4));
7504	b10 -= b7;
7505
7506	tmp = b15 ^ b4;
7507	b15 = (tmp >> 51) | (tmp << (64 - 51));
7508	b4 -= b15;
7509
7510	tmp = b11 ^ b6;
7511	b11 = (tmp >> 56) | (tmp << (64 - 56));
7512	b6 -= b11;
7513
7514	tmp = b13 ^ b2;
7515	b13 = (tmp >> 34) | (tmp << (64 - 34));
7516	b2 -= b13;
7517
7518	tmp = b9 ^ b0;
7519	b9 = (tmp >> 16) | (tmp << (64 - 16));
7520	b0 -= b9;
7521
7522	tmp = b15 ^ b14;
7523	b15 = (tmp >> 30) | (tmp << (64 - 30));
7524	b14 -= b15 + k2 + t0;
7525	b15 -= k3 + 5;
7526
7527	tmp = b13 ^ b12;
7528	b13 = (tmp >> 44) | (tmp << (64 - 44));
7529	b12 -= b13 + k0;
7530	b13 -= k1 + t2;
7531
7532	tmp = b11 ^ b10;
7533	b11 = (tmp >> 47) | (tmp << (64 - 47));
7534	b10 -= b11 + k15;
7535	b11 -= k16;
7536
7537	tmp = b9 ^ b8;
7538	b9 = (tmp >> 12) | (tmp << (64 - 12));
7539	b8 -= b9 + k13;
7540	b9 -= k14;
7541
7542	tmp = b7 ^ b6;
7543	b7 = (tmp >> 31) | (tmp << (64 - 31));
7544	b6 -= b7 + k11;
7545	b7 -= k12;
7546
7547	tmp = b5 ^ b4;
7548	b5 = (tmp >> 37) | (tmp << (64 - 37));
7549	b4 -= b5 + k9;
7550	b5 -= k10;
7551
7552	tmp = b3 ^ b2;
7553	b3 = (tmp >> 9) | (tmp << (64 - 9));
7554	b2 -= b3 + k7;
7555	b3 -= k8;
7556
7557	tmp = b1 ^ b0;
7558	b1 = (tmp >> 41) | (tmp << (64 - 41));
7559	b0 -= b1 + k5;
7560	b1 -= k6;
7561
7562	tmp = b7 ^ b12;
7563	b7 = (tmp >> 25) | (tmp << (64 - 25));
7564	b12 -= b7;
7565
7566	tmp = b3 ^ b10;
7567	b3 = (tmp >> 16) | (tmp << (64 - 16));
7568	b10 -= b3;
7569
7570	tmp = b5 ^ b8;
7571	b5 = (tmp >> 28) | (tmp << (64 - 28));
7572	b8 -= b5;
7573
7574	tmp = b1 ^ b14;
7575	b1 = (tmp >> 47) | (tmp << (64 - 47));
7576	b14 -= b1;
7577
7578	tmp = b9 ^ b4;
7579	b9 = (tmp >> 41) | (tmp << (64 - 41));
7580	b4 -= b9;
7581
7582	tmp = b13 ^ b6;
7583	b13 = (tmp >> 48) | (tmp << (64 - 48));
7584	b6 -= b13;
7585
7586	tmp = b11 ^ b2;
7587	b11 = (tmp >> 20) | (tmp << (64 - 20));
7588	b2 -= b11;
7589
7590	tmp = b15 ^ b0;
7591	b15 = (tmp >> 5) | (tmp << (64 - 5));
7592	b0 -= b15;
7593
7594	tmp = b9 ^ b10;
7595	b9 = (tmp >> 17) | (tmp << (64 - 17));
7596	b10 -= b9;
7597
7598	tmp = b11 ^ b8;
7599	b11 = (tmp >> 59) | (tmp << (64 - 59));
7600	b8 -= b11;
7601
7602	tmp = b13 ^ b14;
7603	b13 = (tmp >> 41) | (tmp << (64 - 41));
7604	b14 -= b13;
7605
7606	tmp = b15 ^ b12;
7607	b15 = (tmp >> 34) | (tmp << (64 - 34));
7608	b12 -= b15;
7609
7610	tmp = b1 ^ b6;
7611	b1 = (tmp >> 13) | (tmp << (64 - 13));
7612	b6 -= b1;
7613
7614	tmp = b3 ^ b4;
7615	b3 = (tmp >> 51) | (tmp << (64 - 51));
7616	b4 -= b3;
7617
7618	tmp = b5 ^ b2;
7619	b5 = (tmp >> 4) | (tmp << (64 - 4));
7620	b2 -= b5;
7621
7622	tmp = b7 ^ b0;
7623	b7 = (tmp >> 33) | (tmp << (64 - 33));
7624	b0 -= b7;
7625
7626	tmp = b1 ^ b8;
7627	b1 = (tmp >> 52) | (tmp << (64 - 52));
7628	b8 -= b1;
7629
7630	tmp = b5 ^ b14;
7631	b5 = (tmp >> 23) | (tmp << (64 - 23));
7632	b14 -= b5;
7633
7634	tmp = b3 ^ b12;
7635	b3 = (tmp >> 18) | (tmp << (64 - 18));
7636	b12 -= b3;
7637
7638	tmp = b7 ^ b10;
7639	b7 = (tmp >> 49) | (tmp << (64 - 49));
7640	b10 -= b7;
7641
7642	tmp = b15 ^ b4;
7643	b15 = (tmp >> 55) | (tmp << (64 - 55));
7644	b4 -= b15;
7645
7646	tmp = b11 ^ b6;
7647	b11 = (tmp >> 10) | (tmp << (64 - 10));
7648	b6 -= b11;
7649
7650	tmp = b13 ^ b2;
7651	b13 = (tmp >> 19) | (tmp << (64 - 19));
7652	b2 -= b13;
7653
7654	tmp = b9 ^ b0;
7655	b9 = (tmp >> 38) | (tmp << (64 - 38));
7656	b0 -= b9;
7657
7658	tmp = b15 ^ b14;
7659	b15 = (tmp >> 37) | (tmp << (64 - 37));
7660	b14 -= b15 + k1 + t2;
7661	b15 -= k2 + 4;
7662
7663	tmp = b13 ^ b12;
7664	b13 = (tmp >> 22) | (tmp << (64 - 22));
7665	b12 -= b13 + k16;
7666	b13 -= k0 + t1;
7667
7668	tmp = b11 ^ b10;
7669	b11 = (tmp >> 17) | (tmp << (64 - 17));
7670	b10 -= b11 + k14;
7671	b11 -= k15;
7672
7673	tmp = b9 ^ b8;
7674	b9 = (tmp >> 8) | (tmp << (64 - 8));
7675	b8 -= b9 + k12;
7676	b9 -= k13;
7677
7678	tmp = b7 ^ b6;
7679	b7 = (tmp >> 47) | (tmp << (64 - 47));
7680	b6 -= b7 + k10;
7681	b7 -= k11;
7682
7683	tmp = b5 ^ b4;
7684	b5 = (tmp >> 8) | (tmp << (64 - 8));
7685	b4 -= b5 + k8;
7686	b5 -= k9;
7687
7688	tmp = b3 ^ b2;
7689	b3 = (tmp >> 13) | (tmp << (64 - 13));
7690	b2 -= b3 + k6;
7691	b3 -= k7;
7692
7693	tmp = b1 ^ b0;
7694	b1 = (tmp >> 24) | (tmp << (64 - 24));
7695	b0 -= b1 + k4;
7696	b1 -= k5;
7697
7698	tmp = b7 ^ b12;
7699	b7 = (tmp >> 20) | (tmp << (64 - 20));
7700	b12 -= b7;
7701
7702	tmp = b3 ^ b10;
7703	b3 = (tmp >> 37) | (tmp << (64 - 37));
7704	b10 -= b3;
7705
7706	tmp = b5 ^ b8;
7707	b5 = (tmp >> 31) | (tmp << (64 - 31));
7708	b8 -= b5;
7709
7710	tmp = b1 ^ b14;
7711	b1 = (tmp >> 23) | (tmp << (64 - 23));
7712	b14 -= b1;
7713
7714	tmp = b9 ^ b4;
7715	b9 = (tmp >> 52) | (tmp << (64 - 52));
7716	b4 -= b9;
7717
7718	tmp = b13 ^ b6;
7719	b13 = (tmp >> 35) | (tmp << (64 - 35));
7720	b6 -= b13;
7721
7722	tmp = b11 ^ b2;
7723	b11 = (tmp >> 48) | (tmp << (64 - 48));
7724	b2 -= b11;
7725
7726	tmp = b15 ^ b0;
7727	b15 = (tmp >> 9) | (tmp << (64 - 9));
7728	b0 -= b15;
7729
7730	tmp = b9 ^ b10;
7731	b9 = (tmp >> 25) | (tmp << (64 - 25));
7732	b10 -= b9;
7733
7734	tmp = b11 ^ b8;
7735	b11 = (tmp >> 44) | (tmp << (64 - 44));
7736	b8 -= b11;
7737
7738	tmp = b13 ^ b14;
7739	b13 = (tmp >> 42) | (tmp << (64 - 42));
7740	b14 -= b13;
7741
7742	tmp = b15 ^ b12;
7743	b15 = (tmp >> 19) | (tmp << (64 - 19));
7744	b12 -= b15;
7745
7746	tmp = b1 ^ b6;
7747	b1 = (tmp >> 46) | (tmp << (64 - 46));
7748	b6 -= b1;
7749
7750	tmp = b3 ^ b4;
7751	b3 = (tmp >> 47) | (tmp << (64 - 47));
7752	b4 -= b3;
7753
7754	tmp = b5 ^ b2;
7755	b5 = (tmp >> 44) | (tmp << (64 - 44));
7756	b2 -= b5;
7757
7758	tmp = b7 ^ b0;
7759	b7 = (tmp >> 31) | (tmp << (64 - 31));
7760	b0 -= b7;
7761
7762	tmp = b1 ^ b8;
7763	b1 = (tmp >> 41) | (tmp << (64 - 41));
7764	b8 -= b1;
7765
7766	tmp = b5 ^ b14;
7767	b5 = (tmp >> 42) | (tmp << (64 - 42));
7768	b14 -= b5;
7769
7770	tmp = b3 ^ b12;
7771	b3 = (tmp >> 53) | (tmp << (64 - 53));
7772	b12 -= b3;
7773
7774	tmp = b7 ^ b10;
7775	b7 = (tmp >> 4) | (tmp << (64 - 4));
7776	b10 -= b7;
7777
7778	tmp = b15 ^ b4;
7779	b15 = (tmp >> 51) | (tmp << (64 - 51));
7780	b4 -= b15;
7781
7782	tmp = b11 ^ b6;
7783	b11 = (tmp >> 56) | (tmp << (64 - 56));
7784	b6 -= b11;
7785
7786	tmp = b13 ^ b2;
7787	b13 = (tmp >> 34) | (tmp << (64 - 34));
7788	b2 -= b13;
7789
7790	tmp = b9 ^ b0;
7791	b9 = (tmp >> 16) | (tmp << (64 - 16));
7792	b0 -= b9;
7793
7794	tmp = b15 ^ b14;
7795	b15 = (tmp >> 30) | (tmp << (64 - 30));
7796	b14 -= b15 + k0 + t1;
7797	b15 -= k1 + 3;
7798
7799	tmp = b13 ^ b12;
7800	b13 = (tmp >> 44) | (tmp << (64 - 44));
7801	b12 -= b13 + k15;
7802	b13 -= k16 + t0;
7803
7804	tmp = b11 ^ b10;
7805	b11 = (tmp >> 47) | (tmp << (64 - 47));
7806	b10 -= b11 + k13;
7807	b11 -= k14;
7808
7809	tmp = b9 ^ b8;
7810	b9 = (tmp >> 12) | (tmp << (64 - 12));
7811	b8 -= b9 + k11;
7812	b9 -= k12;
7813
7814	tmp = b7 ^ b6;
7815	b7 = (tmp >> 31) | (tmp << (64 - 31));
7816	b6 -= b7 + k9;
7817	b7 -= k10;
7818
7819	tmp = b5 ^ b4;
7820	b5 = (tmp >> 37) | (tmp << (64 - 37));
7821	b4 -= b5 + k7;
7822	b5 -= k8;
7823
7824	tmp = b3 ^ b2;
7825	b3 = (tmp >> 9) | (tmp << (64 - 9));
7826	b2 -= b3 + k5;
7827	b3 -= k6;
7828
7829	tmp = b1 ^ b0;
7830	b1 = (tmp >> 41) | (tmp << (64 - 41));
7831	b0 -= b1 + k3;
7832	b1 -= k4;
7833
7834	tmp = b7 ^ b12;
7835	b7 = (tmp >> 25) | (tmp << (64 - 25));
7836	b12 -= b7;
7837
7838	tmp = b3 ^ b10;
7839	b3 = (tmp >> 16) | (tmp << (64 - 16));
7840	b10 -= b3;
7841
7842	tmp = b5 ^ b8;
7843	b5 = (tmp >> 28) | (tmp << (64 - 28));
7844	b8 -= b5;
7845
7846	tmp = b1 ^ b14;
7847	b1 = (tmp >> 47) | (tmp << (64 - 47));
7848	b14 -= b1;
7849
7850	tmp = b9 ^ b4;
7851	b9 = (tmp >> 41) | (tmp << (64 - 41));
7852	b4 -= b9;
7853
7854	tmp = b13 ^ b6;
7855	b13 = (tmp >> 48) | (tmp << (64 - 48));
7856	b6 -= b13;
7857
7858	tmp = b11 ^ b2;
7859	b11 = (tmp >> 20) | (tmp << (64 - 20));
7860	b2 -= b11;
7861
7862	tmp = b15 ^ b0;
7863	b15 = (tmp >> 5) | (tmp << (64 - 5));
7864	b0 -= b15;
7865
7866	tmp = b9 ^ b10;
7867	b9 = (tmp >> 17) | (tmp << (64 - 17));
7868	b10 -= b9;
7869
7870	tmp = b11 ^ b8;
7871	b11 = (tmp >> 59) | (tmp << (64 - 59));
7872	b8 -= b11;
7873
7874	tmp = b13 ^ b14;
7875	b13 = (tmp >> 41) | (tmp << (64 - 41));
7876	b14 -= b13;
7877
7878	tmp = b15 ^ b12;
7879	b15 = (tmp >> 34) | (tmp << (64 - 34));
7880	b12 -= b15;
7881
7882	tmp = b1 ^ b6;
7883	b1 = (tmp >> 13) | (tmp << (64 - 13));
7884	b6 -= b1;
7885
7886	tmp = b3 ^ b4;
7887	b3 = (tmp >> 51) | (tmp << (64 - 51));
7888	b4 -= b3;
7889
7890	tmp = b5 ^ b2;
7891	b5 = (tmp >> 4) | (tmp << (64 - 4));
7892	b2 -= b5;
7893
7894	tmp = b7 ^ b0;
7895	b7 = (tmp >> 33) | (tmp << (64 - 33));
7896	b0 -= b7;
7897
7898	tmp = b1 ^ b8;
7899	b1 = (tmp >> 52) | (tmp << (64 - 52));
7900	b8 -= b1;
7901
7902	tmp = b5 ^ b14;
7903	b5 = (tmp >> 23) | (tmp << (64 - 23));
7904	b14 -= b5;
7905
7906	tmp = b3 ^ b12;
7907	b3 = (tmp >> 18) | (tmp << (64 - 18));
7908	b12 -= b3;
7909
7910	tmp = b7 ^ b10;
7911	b7 = (tmp >> 49) | (tmp << (64 - 49));
7912	b10 -= b7;
7913
7914	tmp = b15 ^ b4;
7915	b15 = (tmp >> 55) | (tmp << (64 - 55));
7916	b4 -= b15;
7917
7918	tmp = b11 ^ b6;
7919	b11 = (tmp >> 10) | (tmp << (64 - 10));
7920	b6 -= b11;
7921
7922	tmp = b13 ^ b2;
7923	b13 = (tmp >> 19) | (tmp << (64 - 19));
7924	b2 -= b13;
7925
7926	tmp = b9 ^ b0;
7927	b9 = (tmp >> 38) | (tmp << (64 - 38));
7928	b0 -= b9;
7929
7930	tmp = b15 ^ b14;
7931	b15 = (tmp >> 37) | (tmp << (64 - 37));
7932	b14 -= b15 + k16 + t0;
7933	b15 -= k0 + 2;
7934
7935	tmp = b13 ^ b12;
7936	b13 = (tmp >> 22) | (tmp << (64 - 22));
7937	b12 -= b13 + k14;
7938	b13 -= k15 + t2;
7939
7940	tmp = b11 ^ b10;
7941	b11 = (tmp >> 17) | (tmp << (64 - 17));
7942	b10 -= b11 + k12;
7943	b11 -= k13;
7944
7945	tmp = b9 ^ b8;
7946	b9 = (tmp >> 8) | (tmp << (64 - 8));
7947	b8 -= b9 + k10;
7948	b9 -= k11;
7949
7950	tmp = b7 ^ b6;
7951	b7 = (tmp >> 47) | (tmp << (64 - 47));
7952	b6 -= b7 + k8;
7953	b7 -= k9;
7954
7955	tmp = b5 ^ b4;
7956	b5 = (tmp >> 8) | (tmp << (64 - 8));
7957	b4 -= b5 + k6;
7958	b5 -= k7;
7959
7960	tmp = b3 ^ b2;
7961	b3 = (tmp >> 13) | (tmp << (64 - 13));
7962	b2 -= b3 + k4;
7963	b3 -= k5;
7964
7965	tmp = b1 ^ b0;
7966	b1 = (tmp >> 24) | (tmp << (64 - 24));
7967	b0 -= b1 + k2;
7968	b1 -= k3;
7969
7970	tmp = b7 ^ b12;
7971	b7 = (tmp >> 20) | (tmp << (64 - 20));
7972	b12 -= b7;
7973
7974	tmp = b3 ^ b10;
7975	b3 = (tmp >> 37) | (tmp << (64 - 37));
7976	b10 -= b3;
7977
7978	tmp = b5 ^ b8;
7979	b5 = (tmp >> 31) | (tmp << (64 - 31));
7980	b8 -= b5;
7981
7982	tmp = b1 ^ b14;
7983	b1 = (tmp >> 23) | (tmp << (64 - 23));
7984	b14 -= b1;
7985
7986	tmp = b9 ^ b4;
7987	b9 = (tmp >> 52) | (tmp << (64 - 52));
7988	b4 -= b9;
7989
7990	tmp = b13 ^ b6;
7991	b13 = (tmp >> 35) | (tmp << (64 - 35));
7992	b6 -= b13;
7993
7994	tmp = b11 ^ b2;
7995	b11 = (tmp >> 48) | (tmp << (64 - 48));
7996	b2 -= b11;
7997
7998	tmp = b15 ^ b0;
7999	b15 = (tmp >> 9) | (tmp << (64 - 9));
8000	b0 -= b15;
8001
8002	tmp = b9 ^ b10;
8003	b9 = (tmp >> 25) | (tmp << (64 - 25));
8004	b10 -= b9;
8005
8006	tmp = b11 ^ b8;
8007	b11 = (tmp >> 44) | (tmp << (64 - 44));
8008	b8 -= b11;
8009
8010	tmp = b13 ^ b14;
8011	b13 = (tmp >> 42) | (tmp << (64 - 42));
8012	b14 -= b13;
8013
8014	tmp = b15 ^ b12;
8015	b15 = (tmp >> 19) | (tmp << (64 - 19));
8016	b12 -= b15;
8017
8018	tmp = b1 ^ b6;
8019	b1 = (tmp >> 46) | (tmp << (64 - 46));
8020	b6 -= b1;
8021
8022	tmp = b3 ^ b4;
8023	b3 = (tmp >> 47) | (tmp << (64 - 47));
8024	b4 -= b3;
8025
8026	tmp = b5 ^ b2;
8027	b5 = (tmp >> 44) | (tmp << (64 - 44));
8028	b2 -= b5;
8029
8030	tmp = b7 ^ b0;
8031	b7 = (tmp >> 31) | (tmp << (64 - 31));
8032	b0 -= b7;
8033
8034	tmp = b1 ^ b8;
8035	b1 = (tmp >> 41) | (tmp << (64 - 41));
8036	b8 -= b1;
8037
8038	tmp = b5 ^ b14;
8039	b5 = (tmp >> 42) | (tmp << (64 - 42));
8040	b14 -= b5;
8041
8042	tmp = b3 ^ b12;
8043	b3 = (tmp >> 53) | (tmp << (64 - 53));
8044	b12 -= b3;
8045
8046	tmp = b7 ^ b10;
8047	b7 = (tmp >> 4) | (tmp << (64 - 4));
8048	b10 -= b7;
8049
8050	tmp = b15 ^ b4;
8051	b15 = (tmp >> 51) | (tmp << (64 - 51));
8052	b4 -= b15;
8053
8054	tmp = b11 ^ b6;
8055	b11 = (tmp >> 56) | (tmp << (64 - 56));
8056	b6 -= b11;
8057
8058	tmp = b13 ^ b2;
8059	b13 = (tmp >> 34) | (tmp << (64 - 34));
8060	b2 -= b13;
8061
8062	tmp = b9 ^ b0;
8063	b9 = (tmp >> 16) | (tmp << (64 - 16));
8064	b0 -= b9;
8065
8066	tmp = b15 ^ b14;
8067	b15 = (tmp >> 30) | (tmp << (64 - 30));
8068	b14 -= b15 + k15 + t2;
8069	b15 -= k16 + 1;
8070
8071	tmp = b13 ^ b12;
8072	b13 = (tmp >> 44) | (tmp << (64 - 44));
8073	b12 -= b13 + k13;
8074	b13 -= k14 + t1;
8075
8076	tmp = b11 ^ b10;
8077	b11 = (tmp >> 47) | (tmp << (64 - 47));
8078	b10 -= b11 + k11;
8079	b11 -= k12;
8080
8081	tmp = b9 ^ b8;
8082	b9 = (tmp >> 12) | (tmp << (64 - 12));
8083	b8 -= b9 + k9;
8084	b9 -= k10;
8085
8086	tmp = b7 ^ b6;
8087	b7 = (tmp >> 31) | (tmp << (64 - 31));
8088	b6 -= b7 + k7;
8089	b7 -= k8;
8090
8091	tmp = b5 ^ b4;
8092	b5 = (tmp >> 37) | (tmp << (64 - 37));
8093	b4 -= b5 + k5;
8094	b5 -= k6;
8095
8096	tmp = b3 ^ b2;
8097	b3 = (tmp >> 9) | (tmp << (64 - 9));
8098	b2 -= b3 + k3;
8099	b3 -= k4;
8100
8101	tmp = b1 ^ b0;
8102	b1 = (tmp >> 41) | (tmp << (64 - 41));
8103	b0 -= b1 + k1;
8104	b1 -= k2;
8105
8106	tmp = b7 ^ b12;
8107	b7 = (tmp >> 25) | (tmp << (64 - 25));
8108	b12 -= b7;
8109
8110	tmp = b3 ^ b10;
8111	b3 = (tmp >> 16) | (tmp << (64 - 16));
8112	b10 -= b3;
8113
8114	tmp = b5 ^ b8;
8115	b5 = (tmp >> 28) | (tmp << (64 - 28));
8116	b8 -= b5;
8117
8118	tmp = b1 ^ b14;
8119	b1 = (tmp >> 47) | (tmp << (64 - 47));
8120	b14 -= b1;
8121
8122	tmp = b9 ^ b4;
8123	b9 = (tmp >> 41) | (tmp << (64 - 41));
8124	b4 -= b9;
8125
8126	tmp = b13 ^ b6;
8127	b13 = (tmp >> 48) | (tmp << (64 - 48));
8128	b6 -= b13;
8129
8130	tmp = b11 ^ b2;
8131	b11 = (tmp >> 20) | (tmp << (64 - 20));
8132	b2 -= b11;
8133
8134	tmp = b15 ^ b0;
8135	b15 = (tmp >> 5) | (tmp << (64 - 5));
8136	b0 -= b15;
8137
8138	tmp = b9 ^ b10;
8139	b9 = (tmp >> 17) | (tmp << (64 - 17));
8140	b10 -= b9;
8141
8142	tmp = b11 ^ b8;
8143	b11 = (tmp >> 59) | (tmp << (64 - 59));
8144	b8 -= b11;
8145
8146	tmp = b13 ^ b14;
8147	b13 = (tmp >> 41) | (tmp << (64 - 41));
8148	b14 -= b13;
8149
8150	tmp = b15 ^ b12;
8151	b15 = (tmp >> 34) | (tmp << (64 - 34));
8152	b12 -= b15;
8153
8154	tmp = b1 ^ b6;
8155	b1 = (tmp >> 13) | (tmp << (64 - 13));
8156	b6 -= b1;
8157
8158	tmp = b3 ^ b4;
8159	b3 = (tmp >> 51) | (tmp << (64 - 51));
8160	b4 -= b3;
8161
8162	tmp = b5 ^ b2;
8163	b5 = (tmp >> 4) | (tmp << (64 - 4));
8164	b2 -= b5;
8165
8166	tmp = b7 ^ b0;
8167	b7 = (tmp >> 33) | (tmp << (64 - 33));
8168	b0 -= b7;
8169
8170	tmp = b1 ^ b8;
8171	b1 = (tmp >> 52) | (tmp << (64 - 52));
8172	b8 -= b1;
8173
8174	tmp = b5 ^ b14;
8175	b5 = (tmp >> 23) | (tmp << (64 - 23));
8176	b14 -= b5;
8177
8178	tmp = b3 ^ b12;
8179	b3 = (tmp >> 18) | (tmp << (64 - 18));
8180	b12 -= b3;
8181
8182	tmp = b7 ^ b10;
8183	b7 = (tmp >> 49) | (tmp << (64 - 49));
8184	b10 -= b7;
8185
8186	tmp = b15 ^ b4;
8187	b15 = (tmp >> 55) | (tmp << (64 - 55));
8188	b4 -= b15;
8189
8190	tmp = b11 ^ b6;
8191	b11 = (tmp >> 10) | (tmp << (64 - 10));
8192	b6 -= b11;
8193
8194	tmp = b13 ^ b2;
8195	b13 = (tmp >> 19) | (tmp << (64 - 19));
8196	b2 -= b13;
8197
8198	tmp = b9 ^ b0;
8199	b9 = (tmp >> 38) | (tmp << (64 - 38));
8200	b0 -= b9;
8201
8202	tmp = b15 ^ b14;
8203	b15 = (tmp >> 37) | (tmp << (64 - 37));
8204	b14 -= b15 + k14 + t1;
8205	b15 -= k15;
8206
8207	tmp = b13 ^ b12;
8208	b13 = (tmp >> 22) | (tmp << (64 - 22));
8209	b12 -= b13 + k12;
8210	b13 -= k13 + t0;
8211
8212	tmp = b11 ^ b10;
8213	b11 = (tmp >> 17) | (tmp << (64 - 17));
8214	b10 -= b11 + k10;
8215	b11 -= k11;
8216
8217	tmp = b9 ^ b8;
8218	b9 = (tmp >> 8) | (tmp << (64 - 8));
8219	b8 -= b9 + k8;
8220	b9 -= k9;
8221
8222	tmp = b7 ^ b6;
8223	b7 = (tmp >> 47) | (tmp << (64 - 47));
8224	b6 -= b7 + k6;
8225	b7 -= k7;
8226
8227	tmp = b5 ^ b4;
8228	b5 = (tmp >> 8) | (tmp << (64 - 8));
8229	b4 -= b5 + k4;
8230	b5 -= k5;
8231
8232	tmp = b3 ^ b2;
8233	b3 = (tmp >> 13) | (tmp << (64 - 13));
8234	b2 -= b3 + k2;
8235	b3 -= k3;
8236
8237	tmp = b1 ^ b0;
8238	b1 = (tmp >> 24) | (tmp << (64 - 24));
8239	b0 -= b1 + k0;
8240	b1 -= k1;
8241
8242	output[15] = b15;
8243	output[14] = b14;
8244	output[13] = b13;
8245	output[12] = b12;
8246	output[11] = b11;
8247	output[10] = b10;
8248	output[9] = b9;
8249	output[8] = b8;
8250	output[7] = b7;
8251	output[6] = b6;
8252	output[5] = b5;
8253	output[4] = b4;
8254	output[3] = b3;
8255	output[2] = b2;
8256	output[1] = b1;
8257	output[0] = b0;
8258}
8259