root/arch/x86/crypto/twofish-x86_64-asm_64-3way.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0-or-later */
   2 /*
   3  * Twofish Cipher 3-way parallel algorithm (x86_64)
   4  *
   5  * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
   6  */
   7 
   8 #include <linux/linkage.h>
   9 
  10 .file "twofish-x86_64-asm-3way.S"
  11 .text
  12 
  13 /* structure of crypto context */
  14 #define s0      0
  15 #define s1      1024
  16 #define s2      2048
  17 #define s3      3072
  18 #define w       4096
  19 #define k       4128
  20 
  21 /**********************************************************************
  22   3-way twofish
  23  **********************************************************************/
  24 #define CTX %rdi
  25 #define RIO %rdx
  26 
  27 #define RAB0 %rax
  28 #define RAB1 %rbx
  29 #define RAB2 %rcx
  30 
  31 #define RAB0d %eax
  32 #define RAB1d %ebx
  33 #define RAB2d %ecx
  34 
  35 #define RAB0bh %ah
  36 #define RAB1bh %bh
  37 #define RAB2bh %ch
  38 
  39 #define RAB0bl %al
  40 #define RAB1bl %bl
  41 #define RAB2bl %cl
  42 
  43 #define CD0 0x0(%rsp)
  44 #define CD1 0x8(%rsp)
  45 #define CD2 0x10(%rsp)
  46 
  47 # used only before/after all rounds
  48 #define RCD0 %r8
  49 #define RCD1 %r9
  50 #define RCD2 %r10
  51 
  52 # used only during rounds
  53 #define RX0 %r8
  54 #define RX1 %r9
  55 #define RX2 %r10
  56 
  57 #define RX0d %r8d
  58 #define RX1d %r9d
  59 #define RX2d %r10d
  60 
  61 #define RY0 %r11
  62 #define RY1 %r12
  63 #define RY2 %r13
  64 
  65 #define RY0d %r11d
  66 #define RY1d %r12d
  67 #define RY2d %r13d
  68 
  69 #define RT0 %rdx
  70 #define RT1 %rsi
  71 
  72 #define RT0d %edx
  73 #define RT1d %esi
  74 
  75 #define RT1bl %sil
  76 
  77 #define do16bit_ror(rot, op1, op2, T0, T1, tmp1, tmp2, ab, dst) \
  78         movzbl ab ## bl,                tmp2 ## d; \
  79         movzbl ab ## bh,                tmp1 ## d; \
  80         rorq $(rot),                    ab; \
  81         op1##l T0(CTX, tmp2, 4),        dst ## d; \
  82         op2##l T1(CTX, tmp1, 4),        dst ## d;
  83 
  84 #define swap_ab_with_cd(ab, cd, tmp)    \
  85         movq cd, tmp;                   \
  86         movq ab, cd;                    \
  87         movq tmp, ab;
  88 
  89 /*
  90  * Combined G1 & G2 function. Reordered with help of rotates to have moves
  91  * at begining.
  92  */
  93 #define g1g2_3(ab, cd, Tx0, Tx1, Tx2, Tx3, Ty0, Ty1, Ty2, Ty3, x, y) \
  94         /* G1,1 && G2,1 */ \
  95         do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 0, ab ## 0, x ## 0); \
  96         do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 0, ab ## 0, y ## 0); \
  97         \
  98         do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 1, ab ## 1, x ## 1); \
  99         do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 1, ab ## 1, y ## 1); \
 100         \
 101         do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 2, ab ## 2, x ## 2); \
 102         do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 2, ab ## 2, y ## 2); \
 103         \
 104         /* G1,2 && G2,2 */ \
 105         do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \
 106         do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \
 107         swap_ab_with_cd(ab ## 0, cd ## 0, RT0); \
 108         \
 109         do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \
 110         do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \
 111         swap_ab_with_cd(ab ## 1, cd ## 1, RT0); \
 112         \
 113         do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \
 114         do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \
 115         swap_ab_with_cd(ab ## 2, cd ## 2, RT0);
 116 
 117 #define enc_round_end(ab, x, y, n) \
 118         addl y ## d,                    x ## d; \
 119         addl x ## d,                    y ## d; \
 120         addl k+4*(2*(n))(CTX),          x ## d; \
 121         xorl ab ## d,                   x ## d; \
 122         addl k+4*(2*(n)+1)(CTX),        y ## d; \
 123         shrq $32,                       ab; \
 124         roll $1,                        ab ## d; \
 125         xorl y ## d,                    ab ## d; \
 126         shlq $32,                       ab; \
 127         rorl $1,                        x ## d; \
 128         orq x,                          ab;
 129 
 130 #define dec_round_end(ba, x, y, n) \
 131         addl y ## d,                    x ## d; \
 132         addl x ## d,                    y ## d; \
 133         addl k+4*(2*(n))(CTX),          x ## d; \
 134         addl k+4*(2*(n)+1)(CTX),        y ## d; \
 135         xorl ba ## d,                   y ## d; \
 136         shrq $32,                       ba; \
 137         roll $1,                        ba ## d; \
 138         xorl x ## d,                    ba ## d; \
 139         shlq $32,                       ba; \
 140         rorl $1,                        y ## d; \
 141         orq y,                          ba;
 142 
 143 #define encrypt_round3(ab, cd, n) \
 144         g1g2_3(ab, cd, s0, s1, s2, s3, s0, s1, s2, s3, RX, RY); \
 145         \
 146         enc_round_end(ab ## 0, RX0, RY0, n); \
 147         enc_round_end(ab ## 1, RX1, RY1, n); \
 148         enc_round_end(ab ## 2, RX2, RY2, n);
 149 
 150 #define decrypt_round3(ba, dc, n) \
 151         g1g2_3(ba, dc, s1, s2, s3, s0, s3, s0, s1, s2, RY, RX); \
 152         \
 153         dec_round_end(ba ## 0, RX0, RY0, n); \
 154         dec_round_end(ba ## 1, RX1, RY1, n); \
 155         dec_round_end(ba ## 2, RX2, RY2, n);
 156 
 157 #define encrypt_cycle3(ab, cd, n) \
 158         encrypt_round3(ab, cd, n*2); \
 159         encrypt_round3(ab, cd, (n*2)+1);
 160 
 161 #define decrypt_cycle3(ba, dc, n) \
 162         decrypt_round3(ba, dc, (n*2)+1); \
 163         decrypt_round3(ba, dc, (n*2));
 164 
 165 #define push_cd()       \
 166         pushq RCD2;     \
 167         pushq RCD1;     \
 168         pushq RCD0;
 169 
 170 #define pop_cd()        \
 171         popq RCD0;      \
 172         popq RCD1;      \
 173         popq RCD2;
 174 
 175 #define inpack3(in, n, xy, m) \
 176         movq 4*(n)(in),                 xy ## 0; \
 177         xorq w+4*m(CTX),                xy ## 0; \
 178         \
 179         movq 4*(4+(n))(in),             xy ## 1; \
 180         xorq w+4*m(CTX),                xy ## 1; \
 181         \
 182         movq 4*(8+(n))(in),             xy ## 2; \
 183         xorq w+4*m(CTX),                xy ## 2;
 184 
 185 #define outunpack3(op, out, n, xy, m) \
 186         xorq w+4*m(CTX),                xy ## 0; \
 187         op ## q xy ## 0,                4*(n)(out); \
 188         \
 189         xorq w+4*m(CTX),                xy ## 1; \
 190         op ## q xy ## 1,                4*(4+(n))(out); \
 191         \
 192         xorq w+4*m(CTX),                xy ## 2; \
 193         op ## q xy ## 2,                4*(8+(n))(out);
 194 
 195 #define inpack_enc3() \
 196         inpack3(RIO, 0, RAB, 0); \
 197         inpack3(RIO, 2, RCD, 2);
 198 
 199 #define outunpack_enc3(op) \
 200         outunpack3(op, RIO, 2, RAB, 6); \
 201         outunpack3(op, RIO, 0, RCD, 4);
 202 
 203 #define inpack_dec3() \
 204         inpack3(RIO, 0, RAB, 4); \
 205         rorq $32,                       RAB0; \
 206         rorq $32,                       RAB1; \
 207         rorq $32,                       RAB2; \
 208         inpack3(RIO, 2, RCD, 6); \
 209         rorq $32,                       RCD0; \
 210         rorq $32,                       RCD1; \
 211         rorq $32,                       RCD2;
 212 
 213 #define outunpack_dec3() \
 214         rorq $32,                       RCD0; \
 215         rorq $32,                       RCD1; \
 216         rorq $32,                       RCD2; \
 217         outunpack3(mov, RIO, 0, RCD, 0); \
 218         rorq $32,                       RAB0; \
 219         rorq $32,                       RAB1; \
 220         rorq $32,                       RAB2; \
 221         outunpack3(mov, RIO, 2, RAB, 2);
 222 
 223 ENTRY(__twofish_enc_blk_3way)
 224         /* input:
 225          *      %rdi: ctx, CTX
 226          *      %rsi: dst
 227          *      %rdx: src, RIO
 228          *      %rcx: bool, if true: xor output
 229          */
 230         pushq %r13;
 231         pushq %r12;
 232         pushq %rbx;
 233 
 234         pushq %rcx; /* bool xor */
 235         pushq %rsi; /* dst */
 236 
 237         inpack_enc3();
 238 
 239         push_cd();
 240         encrypt_cycle3(RAB, CD, 0);
 241         encrypt_cycle3(RAB, CD, 1);
 242         encrypt_cycle3(RAB, CD, 2);
 243         encrypt_cycle3(RAB, CD, 3);
 244         encrypt_cycle3(RAB, CD, 4);
 245         encrypt_cycle3(RAB, CD, 5);
 246         encrypt_cycle3(RAB, CD, 6);
 247         encrypt_cycle3(RAB, CD, 7);
 248         pop_cd();
 249 
 250         popq RIO; /* dst */
 251         popq RT1; /* bool xor */
 252 
 253         testb RT1bl, RT1bl;
 254         jnz .L__enc_xor3;
 255 
 256         outunpack_enc3(mov);
 257 
 258         popq %rbx;
 259         popq %r12;
 260         popq %r13;
 261         ret;
 262 
 263 .L__enc_xor3:
 264         outunpack_enc3(xor);
 265 
 266         popq %rbx;
 267         popq %r12;
 268         popq %r13;
 269         ret;
 270 ENDPROC(__twofish_enc_blk_3way)
 271 
 272 ENTRY(twofish_dec_blk_3way)
 273         /* input:
 274          *      %rdi: ctx, CTX
 275          *      %rsi: dst
 276          *      %rdx: src, RIO
 277          */
 278         pushq %r13;
 279         pushq %r12;
 280         pushq %rbx;
 281 
 282         pushq %rsi; /* dst */
 283 
 284         inpack_dec3();
 285 
 286         push_cd();
 287         decrypt_cycle3(RAB, CD, 7);
 288         decrypt_cycle3(RAB, CD, 6);
 289         decrypt_cycle3(RAB, CD, 5);
 290         decrypt_cycle3(RAB, CD, 4);
 291         decrypt_cycle3(RAB, CD, 3);
 292         decrypt_cycle3(RAB, CD, 2);
 293         decrypt_cycle3(RAB, CD, 1);
 294         decrypt_cycle3(RAB, CD, 0);
 295         pop_cd();
 296 
 297         popq RIO; /* dst */
 298 
 299         outunpack_dec3();
 300 
 301         popq %rbx;
 302         popq %r12;
 303         popq %r13;
 304         ret;
 305 ENDPROC(twofish_dec_blk_3way)

/* [<][>][^][v][top][bottom][index][help] */