root/arch/arm/crypto/sha1-armv7-neon.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0-or-later */
   2 /* sha1-armv7-neon.S - ARM/NEON accelerated SHA-1 transform function
   3  *
   4  * Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
   5  */
   6 
   7 #include <linux/linkage.h>
   8 #include <asm/assembler.h>
   9 
  10 .syntax unified
  11 .fpu neon
  12 
  13 .text
  14 
  15 
  16 /* Context structure */
  17 
  18 #define state_h0 0
  19 #define state_h1 4
  20 #define state_h2 8
  21 #define state_h3 12
  22 #define state_h4 16
  23 
  24 
  25 /* Constants */
  26 
  27 #define K1  0x5A827999
  28 #define K2  0x6ED9EBA1
  29 #define K3  0x8F1BBCDC
  30 #define K4  0xCA62C1D6
  31 .align 4
  32 .LK_VEC:
  33 .LK1:   .long K1, K1, K1, K1
  34 .LK2:   .long K2, K2, K2, K2
  35 .LK3:   .long K3, K3, K3, K3
  36 .LK4:   .long K4, K4, K4, K4
  37 
  38 
  39 /* Register macros */
  40 
  41 #define RSTATE r0
  42 #define RDATA r1
  43 #define RNBLKS r2
  44 #define ROLDSTACK r3
  45 #define RWK lr
  46 
  47 #define _a r4
  48 #define _b r5
  49 #define _c r6
  50 #define _d r7
  51 #define _e r8
  52 
  53 #define RT0 r9
  54 #define RT1 r10
  55 #define RT2 r11
  56 #define RT3 r12
  57 
  58 #define W0 q0
  59 #define W1 q7
  60 #define W2 q2
  61 #define W3 q3
  62 #define W4 q4
  63 #define W5 q6
  64 #define W6 q5
  65 #define W7 q1
  66 
  67 #define tmp0 q8
  68 #define tmp1 q9
  69 #define tmp2 q10
  70 #define tmp3 q11
  71 
  72 #define qK1 q12
  73 #define qK2 q13
  74 #define qK3 q14
  75 #define qK4 q15
  76 
  77 #ifdef CONFIG_CPU_BIG_ENDIAN
  78 #define ARM_LE(code...)
  79 #else
  80 #define ARM_LE(code...)         code
  81 #endif
  82 
  83 /* Round function macros. */
  84 
  85 #define WK_offs(i) (((i) & 15) * 4)
  86 
  87 #define _R_F1(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
  88               W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
  89         ldr RT3, [sp, WK_offs(i)]; \
  90                 pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
  91         bic RT0, d, b; \
  92         add e, e, a, ror #(32 - 5); \
  93         and RT1, c, b; \
  94                 pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
  95         add RT0, RT0, RT3; \
  96         add e, e, RT1; \
  97         ror b, #(32 - 30); \
  98                 pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
  99         add e, e, RT0;
 100 
 101 #define _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
 102               W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 103         ldr RT3, [sp, WK_offs(i)]; \
 104                 pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
 105         eor RT0, d, b; \
 106         add e, e, a, ror #(32 - 5); \
 107         eor RT0, RT0, c; \
 108                 pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
 109         add e, e, RT3; \
 110         ror b, #(32 - 30); \
 111                 pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
 112         add e, e, RT0; \
 113 
 114 #define _R_F3(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
 115               W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 116         ldr RT3, [sp, WK_offs(i)]; \
 117                 pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
 118         eor RT0, b, c; \
 119         and RT1, b, c; \
 120         add e, e, a, ror #(32 - 5); \
 121                 pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
 122         and RT0, RT0, d; \
 123         add RT1, RT1, RT3; \
 124         add e, e, RT0; \
 125         ror b, #(32 - 30); \
 126                 pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
 127         add e, e, RT1;
 128 
 129 #define _R_F4(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
 130               W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 131         _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
 132               W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
 133 
 134 #define _R(a,b,c,d,e,f,i,pre1,pre2,pre3,i16,\
 135            W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 136         _R_##f(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
 137                W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
 138 
 139 #define R(a,b,c,d,e,f,i) \
 140         _R_##f(a,b,c,d,e,i,dummy,dummy,dummy,i16,\
 141                W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
 142 
 143 #define dummy(...)
 144 
 145 
 146 /* Input expansion macros. */
 147 
 148 /********* Precalc macros for rounds 0-15 *************************************/
 149 
 150 #define W_PRECALC_00_15() \
 151         add       RWK, sp, #(WK_offs(0));                       \
 152         \
 153         vld1.32   {W0, W7}, [RDATA]!;                           \
 154  ARM_LE(vrev32.8  W0, W0;       )       /* big => little */     \
 155         vld1.32   {W6, W5}, [RDATA]!;                           \
 156         vadd.u32  tmp0, W0, curK;                               \
 157  ARM_LE(vrev32.8  W7, W7;       )       /* big => little */     \
 158  ARM_LE(vrev32.8  W6, W6;       )       /* big => little */     \
 159         vadd.u32  tmp1, W7, curK;                               \
 160  ARM_LE(vrev32.8  W5, W5;       )       /* big => little */     \
 161         vadd.u32  tmp2, W6, curK;                               \
 162         vst1.32   {tmp0, tmp1}, [RWK]!;                         \
 163         vadd.u32  tmp3, W5, curK;                               \
 164         vst1.32   {tmp2, tmp3}, [RWK];                          \
 165 
 166 #define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 167         vld1.32   {W0, W7}, [RDATA]!;                           \
 168 
 169 #define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 170         add       RWK, sp, #(WK_offs(0));                       \
 171 
 172 #define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 173  ARM_LE(vrev32.8  W0, W0;       )       /* big => little */     \
 174 
 175 #define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 176         vld1.32   {W6, W5}, [RDATA]!;                           \
 177 
 178 #define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 179         vadd.u32  tmp0, W0, curK;                               \
 180 
 181 #define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 182  ARM_LE(vrev32.8  W7, W7;       )       /* big => little */     \
 183 
 184 #define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 185  ARM_LE(vrev32.8  W6, W6;       )       /* big => little */     \
 186 
 187 #define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 188         vadd.u32  tmp1, W7, curK;                               \
 189 
 190 #define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 191  ARM_LE(vrev32.8  W5, W5;       )       /* big => little */     \
 192 
 193 #define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 194         vadd.u32  tmp2, W6, curK;                               \
 195 
 196 #define WPRECALC_00_15_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 197         vst1.32   {tmp0, tmp1}, [RWK]!;                         \
 198 
 199 #define WPRECALC_00_15_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 200         vadd.u32  tmp3, W5, curK;                               \
 201 
 202 #define WPRECALC_00_15_12(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 203         vst1.32   {tmp2, tmp3}, [RWK];                          \
 204 
 205 
 206 /********* Precalc macros for rounds 16-31 ************************************/
 207 
 208 #define WPRECALC_16_31_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 209         veor      tmp0, tmp0;                   \
 210         vext.8    W, W_m16, W_m12, #8;          \
 211 
 212 #define WPRECALC_16_31_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 213         add       RWK, sp, #(WK_offs(i));       \
 214         vext.8    tmp0, W_m04, tmp0, #4;        \
 215 
 216 #define WPRECALC_16_31_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 217         veor      tmp0, tmp0, W_m16;            \
 218         veor.32   W, W, W_m08;                  \
 219 
 220 #define WPRECALC_16_31_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 221         veor      tmp1, tmp1;                   \
 222         veor      W, W, tmp0;                   \
 223 
 224 #define WPRECALC_16_31_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 225         vshl.u32  tmp0, W, #1;                  \
 226 
 227 #define WPRECALC_16_31_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 228         vext.8    tmp1, tmp1, W, #(16-12);      \
 229         vshr.u32  W, W, #31;                    \
 230 
 231 #define WPRECALC_16_31_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 232         vorr      tmp0, tmp0, W;                \
 233         vshr.u32  W, tmp1, #30;                 \
 234 
 235 #define WPRECALC_16_31_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 236         vshl.u32  tmp1, tmp1, #2;               \
 237 
 238 #define WPRECALC_16_31_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 239         veor      tmp0, tmp0, W;                \
 240 
 241 #define WPRECALC_16_31_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 242         veor      W, tmp0, tmp1;                \
 243 
 244 #define WPRECALC_16_31_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 245         vadd.u32  tmp0, W, curK;                \
 246 
 247 #define WPRECALC_16_31_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 248         vst1.32   {tmp0}, [RWK];
 249 
 250 
 251 /********* Precalc macros for rounds 32-79 ************************************/
 252 
 253 #define WPRECALC_32_79_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 254         veor W, W_m28; \
 255 
 256 #define WPRECALC_32_79_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 257         vext.8 tmp0, W_m08, W_m04, #8; \
 258 
 259 #define WPRECALC_32_79_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 260         veor W, W_m16; \
 261 
 262 #define WPRECALC_32_79_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 263         veor W, tmp0; \
 264 
 265 #define WPRECALC_32_79_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 266         add RWK, sp, #(WK_offs(i&~3)); \
 267 
 268 #define WPRECALC_32_79_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 269         vshl.u32 tmp1, W, #2; \
 270 
 271 #define WPRECALC_32_79_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 272         vshr.u32 tmp0, W, #30; \
 273 
 274 #define WPRECALC_32_79_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 275         vorr W, tmp0, tmp1; \
 276 
 277 #define WPRECALC_32_79_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 278         vadd.u32 tmp0, W, curK; \
 279 
 280 #define WPRECALC_32_79_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 281         vst1.32 {tmp0}, [RWK];
 282 
 283 
 284 /*
 285  * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
 286  *
 287  * unsigned int
 288  * sha1_transform_neon (void *ctx, const unsigned char *data,
 289  *                      unsigned int nblks)
 290  */
 291 .align 3
 292 ENTRY(sha1_transform_neon)
 293   /* input:
 294    *    r0: ctx, CTX
 295    *    r1: data (64*nblks bytes)
 296    *    r2: nblks
 297    */
 298 
 299   cmp RNBLKS, #0;
 300   beq .Ldo_nothing;
 301 
 302   push {r4-r12, lr};
 303   /*vpush {q4-q7};*/
 304 
 305   adr RT3, .LK_VEC;
 306 
 307   mov ROLDSTACK, sp;
 308 
 309   /* Align stack. */
 310   sub RT0, sp, #(16*4);
 311   and RT0, #(~(16-1));
 312   mov sp, RT0;
 313 
 314   vld1.32 {qK1-qK2}, [RT3]!; /* Load K1,K2 */
 315 
 316   /* Get the values of the chaining variables. */
 317   ldm RSTATE, {_a-_e};
 318 
 319   vld1.32 {qK3-qK4}, [RT3]; /* Load K3,K4 */
 320 
 321 #undef curK
 322 #define curK qK1
 323   /* Precalc 0-15. */
 324   W_PRECALC_00_15();
 325 
 326 .Loop:
 327   /* Transform 0-15 + Precalc 16-31. */
 328   _R( _a, _b, _c, _d, _e, F1,  0,
 329       WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 16,
 330       W4, W5, W6, W7, W0, _, _, _ );
 331   _R( _e, _a, _b, _c, _d, F1,  1,
 332       WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 16,
 333       W4, W5, W6, W7, W0, _, _, _ );
 334   _R( _d, _e, _a, _b, _c, F1,  2,
 335       WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 16,
 336       W4, W5, W6, W7, W0, _, _, _ );
 337   _R( _c, _d, _e, _a, _b, F1,  3,
 338       WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,16,
 339       W4, W5, W6, W7, W0, _, _, _ );
 340 
 341 #undef curK
 342 #define curK qK2
 343   _R( _b, _c, _d, _e, _a, F1,  4,
 344       WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 20,
 345       W3, W4, W5, W6, W7, _, _, _ );
 346   _R( _a, _b, _c, _d, _e, F1,  5,
 347       WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 20,
 348       W3, W4, W5, W6, W7, _, _, _ );
 349   _R( _e, _a, _b, _c, _d, F1,  6,
 350       WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 20,
 351       W3, W4, W5, W6, W7, _, _, _ );
 352   _R( _d, _e, _a, _b, _c, F1,  7,
 353       WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,20,
 354       W3, W4, W5, W6, W7, _, _, _ );
 355 
 356   _R( _c, _d, _e, _a, _b, F1,  8,
 357       WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 24,
 358       W2, W3, W4, W5, W6, _, _, _ );
 359   _R( _b, _c, _d, _e, _a, F1,  9,
 360       WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 24,
 361       W2, W3, W4, W5, W6, _, _, _ );
 362   _R( _a, _b, _c, _d, _e, F1, 10,
 363       WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 24,
 364       W2, W3, W4, W5, W6, _, _, _ );
 365   _R( _e, _a, _b, _c, _d, F1, 11,
 366       WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,24,
 367       W2, W3, W4, W5, W6, _, _, _ );
 368 
 369   _R( _d, _e, _a, _b, _c, F1, 12,
 370       WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 28,
 371       W1, W2, W3, W4, W5, _, _, _ );
 372   _R( _c, _d, _e, _a, _b, F1, 13,
 373       WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 28,
 374       W1, W2, W3, W4, W5, _, _, _ );
 375   _R( _b, _c, _d, _e, _a, F1, 14,
 376       WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 28,
 377       W1, W2, W3, W4, W5, _, _, _ );
 378   _R( _a, _b, _c, _d, _e, F1, 15,
 379       WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,28,
 380       W1, W2, W3, W4, W5, _, _, _ );
 381 
 382   /* Transform 16-63 + Precalc 32-79. */
 383   _R( _e, _a, _b, _c, _d, F1, 16,
 384       WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 32,
 385       W0, W1, W2, W3, W4, W5, W6, W7);
 386   _R( _d, _e, _a, _b, _c, F1, 17,
 387       WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 32,
 388       W0, W1, W2, W3, W4, W5, W6, W7);
 389   _R( _c, _d, _e, _a, _b, F1, 18,
 390       WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 32,
 391       W0, W1, W2, W3, W4, W5, W6, W7);
 392   _R( _b, _c, _d, _e, _a, F1, 19,
 393       WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 32,
 394       W0, W1, W2, W3, W4, W5, W6, W7);
 395 
 396   _R( _a, _b, _c, _d, _e, F2, 20,
 397       WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 36,
 398       W7, W0, W1, W2, W3, W4, W5, W6);
 399   _R( _e, _a, _b, _c, _d, F2, 21,
 400       WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 36,
 401       W7, W0, W1, W2, W3, W4, W5, W6);
 402   _R( _d, _e, _a, _b, _c, F2, 22,
 403       WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 36,
 404       W7, W0, W1, W2, W3, W4, W5, W6);
 405   _R( _c, _d, _e, _a, _b, F2, 23,
 406       WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 36,
 407       W7, W0, W1, W2, W3, W4, W5, W6);
 408 
 409 #undef curK
 410 #define curK qK3
 411   _R( _b, _c, _d, _e, _a, F2, 24,
 412       WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 40,
 413       W6, W7, W0, W1, W2, W3, W4, W5);
 414   _R( _a, _b, _c, _d, _e, F2, 25,
 415       WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 40,
 416       W6, W7, W0, W1, W2, W3, W4, W5);
 417   _R( _e, _a, _b, _c, _d, F2, 26,
 418       WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 40,
 419       W6, W7, W0, W1, W2, W3, W4, W5);
 420   _R( _d, _e, _a, _b, _c, F2, 27,
 421       WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 40,
 422       W6, W7, W0, W1, W2, W3, W4, W5);
 423 
 424   _R( _c, _d, _e, _a, _b, F2, 28,
 425       WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 44,
 426       W5, W6, W7, W0, W1, W2, W3, W4);
 427   _R( _b, _c, _d, _e, _a, F2, 29,
 428       WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 44,
 429       W5, W6, W7, W0, W1, W2, W3, W4);
 430   _R( _a, _b, _c, _d, _e, F2, 30,
 431       WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 44,
 432       W5, W6, W7, W0, W1, W2, W3, W4);
 433   _R( _e, _a, _b, _c, _d, F2, 31,
 434       WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 44,
 435       W5, W6, W7, W0, W1, W2, W3, W4);
 436 
 437   _R( _d, _e, _a, _b, _c, F2, 32,
 438       WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 48,
 439       W4, W5, W6, W7, W0, W1, W2, W3);
 440   _R( _c, _d, _e, _a, _b, F2, 33,
 441       WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 48,
 442       W4, W5, W6, W7, W0, W1, W2, W3);
 443   _R( _b, _c, _d, _e, _a, F2, 34,
 444       WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 48,
 445       W4, W5, W6, W7, W0, W1, W2, W3);
 446   _R( _a, _b, _c, _d, _e, F2, 35,
 447       WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 48,
 448       W4, W5, W6, W7, W0, W1, W2, W3);
 449 
 450   _R( _e, _a, _b, _c, _d, F2, 36,
 451       WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 52,
 452       W3, W4, W5, W6, W7, W0, W1, W2);
 453   _R( _d, _e, _a, _b, _c, F2, 37,
 454       WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 52,
 455       W3, W4, W5, W6, W7, W0, W1, W2);
 456   _R( _c, _d, _e, _a, _b, F2, 38,
 457       WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 52,
 458       W3, W4, W5, W6, W7, W0, W1, W2);
 459   _R( _b, _c, _d, _e, _a, F2, 39,
 460       WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 52,
 461       W3, W4, W5, W6, W7, W0, W1, W2);
 462 
 463   _R( _a, _b, _c, _d, _e, F3, 40,
 464       WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 56,
 465       W2, W3, W4, W5, W6, W7, W0, W1);
 466   _R( _e, _a, _b, _c, _d, F3, 41,
 467       WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 56,
 468       W2, W3, W4, W5, W6, W7, W0, W1);
 469   _R( _d, _e, _a, _b, _c, F3, 42,
 470       WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 56,
 471       W2, W3, W4, W5, W6, W7, W0, W1);
 472   _R( _c, _d, _e, _a, _b, F3, 43,
 473       WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 56,
 474       W2, W3, W4, W5, W6, W7, W0, W1);
 475 
 476 #undef curK
 477 #define curK qK4
 478   _R( _b, _c, _d, _e, _a, F3, 44,
 479       WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 60,
 480       W1, W2, W3, W4, W5, W6, W7, W0);
 481   _R( _a, _b, _c, _d, _e, F3, 45,
 482       WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 60,
 483       W1, W2, W3, W4, W5, W6, W7, W0);
 484   _R( _e, _a, _b, _c, _d, F3, 46,
 485       WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 60,
 486       W1, W2, W3, W4, W5, W6, W7, W0);
 487   _R( _d, _e, _a, _b, _c, F3, 47,
 488       WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 60,
 489       W1, W2, W3, W4, W5, W6, W7, W0);
 490 
 491   _R( _c, _d, _e, _a, _b, F3, 48,
 492       WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 64,
 493       W0, W1, W2, W3, W4, W5, W6, W7);
 494   _R( _b, _c, _d, _e, _a, F3, 49,
 495       WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 64,
 496       W0, W1, W2, W3, W4, W5, W6, W7);
 497   _R( _a, _b, _c, _d, _e, F3, 50,
 498       WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 64,
 499       W0, W1, W2, W3, W4, W5, W6, W7);
 500   _R( _e, _a, _b, _c, _d, F3, 51,
 501       WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 64,
 502       W0, W1, W2, W3, W4, W5, W6, W7);
 503 
 504   _R( _d, _e, _a, _b, _c, F3, 52,
 505       WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 68,
 506       W7, W0, W1, W2, W3, W4, W5, W6);
 507   _R( _c, _d, _e, _a, _b, F3, 53,
 508       WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 68,
 509       W7, W0, W1, W2, W3, W4, W5, W6);
 510   _R( _b, _c, _d, _e, _a, F3, 54,
 511       WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 68,
 512       W7, W0, W1, W2, W3, W4, W5, W6);
 513   _R( _a, _b, _c, _d, _e, F3, 55,
 514       WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 68,
 515       W7, W0, W1, W2, W3, W4, W5, W6);
 516 
 517   _R( _e, _a, _b, _c, _d, F3, 56,
 518       WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 72,
 519       W6, W7, W0, W1, W2, W3, W4, W5);
 520   _R( _d, _e, _a, _b, _c, F3, 57,
 521       WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 72,
 522       W6, W7, W0, W1, W2, W3, W4, W5);
 523   _R( _c, _d, _e, _a, _b, F3, 58,
 524       WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 72,
 525       W6, W7, W0, W1, W2, W3, W4, W5);
 526   _R( _b, _c, _d, _e, _a, F3, 59,
 527       WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 72,
 528       W6, W7, W0, W1, W2, W3, W4, W5);
 529 
 530   subs RNBLKS, #1;
 531 
 532   _R( _a, _b, _c, _d, _e, F4, 60,
 533       WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 76,
 534       W5, W6, W7, W0, W1, W2, W3, W4);
 535   _R( _e, _a, _b, _c, _d, F4, 61,
 536       WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 76,
 537       W5, W6, W7, W0, W1, W2, W3, W4);
 538   _R( _d, _e, _a, _b, _c, F4, 62,
 539       WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 76,
 540       W5, W6, W7, W0, W1, W2, W3, W4);
 541   _R( _c, _d, _e, _a, _b, F4, 63,
 542       WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 76,
 543       W5, W6, W7, W0, W1, W2, W3, W4);
 544 
 545   beq .Lend;
 546 
 547   /* Transform 64-79 + Precalc 0-15 of next block. */
 548 #undef curK
 549 #define curK qK1
 550   _R( _b, _c, _d, _e, _a, F4, 64,
 551       WPRECALC_00_15_0, dummy, dummy, _, _, _, _, _, _, _, _, _ );
 552   _R( _a, _b, _c, _d, _e, F4, 65,
 553       WPRECALC_00_15_1, dummy, dummy, _, _, _, _, _, _, _, _, _ );
 554   _R( _e, _a, _b, _c, _d, F4, 66,
 555       WPRECALC_00_15_2, dummy, dummy, _, _, _, _, _, _, _, _, _ );
 556   _R( _d, _e, _a, _b, _c, F4, 67,
 557       WPRECALC_00_15_3, dummy, dummy, _, _, _, _, _, _, _, _, _ );
 558 
 559   _R( _c, _d, _e, _a, _b, F4, 68,
 560       dummy,            dummy, dummy, _, _, _, _, _, _, _, _, _ );
 561   _R( _b, _c, _d, _e, _a, F4, 69,
 562       dummy,            dummy, dummy, _, _, _, _, _, _, _, _, _ );
 563   _R( _a, _b, _c, _d, _e, F4, 70,
 564       WPRECALC_00_15_4, dummy, dummy, _, _, _, _, _, _, _, _, _ );
 565   _R( _e, _a, _b, _c, _d, F4, 71,
 566       WPRECALC_00_15_5, dummy, dummy, _, _, _, _, _, _, _, _, _ );
 567 
 568   _R( _d, _e, _a, _b, _c, F4, 72,
 569       dummy,            dummy, dummy, _, _, _, _, _, _, _, _, _ );
 570   _R( _c, _d, _e, _a, _b, F4, 73,
 571       dummy,            dummy, dummy, _, _, _, _, _, _, _, _, _ );
 572   _R( _b, _c, _d, _e, _a, F4, 74,
 573       WPRECALC_00_15_6, dummy, dummy, _, _, _, _, _, _, _, _, _ );
 574   _R( _a, _b, _c, _d, _e, F4, 75,
 575       WPRECALC_00_15_7, dummy, dummy, _, _, _, _, _, _, _, _, _ );
 576 
 577   _R( _e, _a, _b, _c, _d, F4, 76,
 578       WPRECALC_00_15_8, dummy, dummy, _, _, _, _, _, _, _, _, _ );
 579   _R( _d, _e, _a, _b, _c, F4, 77,
 580       WPRECALC_00_15_9, dummy, dummy, _, _, _, _, _, _, _, _, _ );
 581   _R( _c, _d, _e, _a, _b, F4, 78,
 582       WPRECALC_00_15_10, dummy, dummy, _, _, _, _, _, _, _, _, _ );
 583   _R( _b, _c, _d, _e, _a, F4, 79,
 584       WPRECALC_00_15_11, dummy, WPRECALC_00_15_12, _, _, _, _, _, _, _, _, _ );
 585 
 586   /* Update the chaining variables. */
 587   ldm RSTATE, {RT0-RT3};
 588   add _a, RT0;
 589   ldr RT0, [RSTATE, #state_h4];
 590   add _b, RT1;
 591   add _c, RT2;
 592   add _d, RT3;
 593   add _e, RT0;
 594   stm RSTATE, {_a-_e};
 595 
 596   b .Loop;
 597 
 598 .Lend:
 599   /* Transform 64-79 */
 600   R( _b, _c, _d, _e, _a, F4, 64 );
 601   R( _a, _b, _c, _d, _e, F4, 65 );
 602   R( _e, _a, _b, _c, _d, F4, 66 );
 603   R( _d, _e, _a, _b, _c, F4, 67 );
 604   R( _c, _d, _e, _a, _b, F4, 68 );
 605   R( _b, _c, _d, _e, _a, F4, 69 );
 606   R( _a, _b, _c, _d, _e, F4, 70 );
 607   R( _e, _a, _b, _c, _d, F4, 71 );
 608   R( _d, _e, _a, _b, _c, F4, 72 );
 609   R( _c, _d, _e, _a, _b, F4, 73 );
 610   R( _b, _c, _d, _e, _a, F4, 74 );
 611   R( _a, _b, _c, _d, _e, F4, 75 );
 612   R( _e, _a, _b, _c, _d, F4, 76 );
 613   R( _d, _e, _a, _b, _c, F4, 77 );
 614   R( _c, _d, _e, _a, _b, F4, 78 );
 615   R( _b, _c, _d, _e, _a, F4, 79 );
 616 
 617   mov sp, ROLDSTACK;
 618 
 619   /* Update the chaining variables. */
 620   ldm RSTATE, {RT0-RT3};
 621   add _a, RT0;
 622   ldr RT0, [RSTATE, #state_h4];
 623   add _b, RT1;
 624   add _c, RT2;
 625   add _d, RT3;
 626   /*vpop {q4-q7};*/
 627   add _e, RT0;
 628   stm RSTATE, {_a-_e};
 629 
 630   pop {r4-r12, pc};
 631 
 632 .Ldo_nothing:
 633   bx lr
 634 ENDPROC(sha1_transform_neon)

/* [<][>][^][v][top][bottom][index][help] */