root/arch/arm64/crypto/nh-neon-core.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0 */
   2 /*
   3  * NH - ε-almost-universal hash function, ARM64 NEON accelerated version
   4  *
   5  * Copyright 2018 Google LLC
   6  *
   7  * Author: Eric Biggers <ebiggers@google.com>
   8  */
   9 
  10 #include <linux/linkage.h>
  11 
  12         KEY             .req    x0
  13         MESSAGE         .req    x1
  14         MESSAGE_LEN     .req    x2
  15         HASH            .req    x3
  16 
  17         PASS0_SUMS      .req    v0
  18         PASS1_SUMS      .req    v1
  19         PASS2_SUMS      .req    v2
  20         PASS3_SUMS      .req    v3
  21         K0              .req    v4
  22         K1              .req    v5
  23         K2              .req    v6
  24         K3              .req    v7
  25         T0              .req    v8
  26         T1              .req    v9
  27         T2              .req    v10
  28         T3              .req    v11
  29         T4              .req    v12
  30         T5              .req    v13
  31         T6              .req    v14
  32         T7              .req    v15
  33 
  34 .macro _nh_stride       k0, k1, k2, k3
  35 
  36         // Load next message stride
  37         ld1             {T3.16b}, [MESSAGE], #16
  38 
  39         // Load next key stride
  40         ld1             {\k3\().4s}, [KEY], #16
  41 
  42         // Add message words to key words
  43         add             T0.4s, T3.4s, \k0\().4s
  44         add             T1.4s, T3.4s, \k1\().4s
  45         add             T2.4s, T3.4s, \k2\().4s
  46         add             T3.4s, T3.4s, \k3\().4s
  47 
  48         // Multiply 32x32 => 64 and accumulate
  49         mov             T4.d[0], T0.d[1]
  50         mov             T5.d[0], T1.d[1]
  51         mov             T6.d[0], T2.d[1]
  52         mov             T7.d[0], T3.d[1]
  53         umlal           PASS0_SUMS.2d, T0.2s, T4.2s
  54         umlal           PASS1_SUMS.2d, T1.2s, T5.2s
  55         umlal           PASS2_SUMS.2d, T2.2s, T6.2s
  56         umlal           PASS3_SUMS.2d, T3.2s, T7.2s
  57 .endm
  58 
  59 /*
  60  * void nh_neon(const u32 *key, const u8 *message, size_t message_len,
  61  *              u8 hash[NH_HASH_BYTES])
  62  *
  63  * It's guaranteed that message_len % 16 == 0.
  64  */
  65 ENTRY(nh_neon)
  66 
  67         ld1             {K0.4s,K1.4s}, [KEY], #32
  68           movi          PASS0_SUMS.2d, #0
  69           movi          PASS1_SUMS.2d, #0
  70         ld1             {K2.4s}, [KEY], #16
  71           movi          PASS2_SUMS.2d, #0
  72           movi          PASS3_SUMS.2d, #0
  73 
  74         subs            MESSAGE_LEN, MESSAGE_LEN, #64
  75         blt             .Lloop4_done
  76 .Lloop4:
  77         _nh_stride      K0, K1, K2, K3
  78         _nh_stride      K1, K2, K3, K0
  79         _nh_stride      K2, K3, K0, K1
  80         _nh_stride      K3, K0, K1, K2
  81         subs            MESSAGE_LEN, MESSAGE_LEN, #64
  82         bge             .Lloop4
  83 
  84 .Lloop4_done:
  85         ands            MESSAGE_LEN, MESSAGE_LEN, #63
  86         beq             .Ldone
  87         _nh_stride      K0, K1, K2, K3
  88 
  89         subs            MESSAGE_LEN, MESSAGE_LEN, #16
  90         beq             .Ldone
  91         _nh_stride      K1, K2, K3, K0
  92 
  93         subs            MESSAGE_LEN, MESSAGE_LEN, #16
  94         beq             .Ldone
  95         _nh_stride      K2, K3, K0, K1
  96 
  97 .Ldone:
  98         // Sum the accumulators for each pass, then store the sums to 'hash'
  99         addp            T0.2d, PASS0_SUMS.2d, PASS1_SUMS.2d
 100         addp            T1.2d, PASS2_SUMS.2d, PASS3_SUMS.2d
 101         st1             {T0.16b,T1.16b}, [HASH]
 102         ret
 103 ENDPROC(nh_neon)

/* [<][>][^][v][top][bottom][index][help] */