root/arch/x86/crypto/nh-sse2-x86_64.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0 */
   2 /*
   3  * NH - ε-almost-universal hash function, x86_64 SSE2 accelerated
   4  *
   5  * Copyright 2018 Google LLC
   6  *
   7  * Author: Eric Biggers <ebiggers@google.com>
   8  */
   9 
  10 #include <linux/linkage.h>
  11 
  12 #define         PASS0_SUMS      %xmm0
  13 #define         PASS1_SUMS      %xmm1
  14 #define         PASS2_SUMS      %xmm2
  15 #define         PASS3_SUMS      %xmm3
  16 #define         K0              %xmm4
  17 #define         K1              %xmm5
  18 #define         K2              %xmm6
  19 #define         K3              %xmm7
  20 #define         T0              %xmm8
  21 #define         T1              %xmm9
  22 #define         T2              %xmm10
  23 #define         T3              %xmm11
  24 #define         T4              %xmm12
  25 #define         T5              %xmm13
  26 #define         T6              %xmm14
  27 #define         T7              %xmm15
  28 #define         KEY             %rdi
  29 #define         MESSAGE         %rsi
  30 #define         MESSAGE_LEN     %rdx
  31 #define         HASH            %rcx
  32 
  33 .macro _nh_stride       k0, k1, k2, k3, offset
  34 
  35         // Load next message stride
  36         movdqu          \offset(MESSAGE), T1
  37 
  38         // Load next key stride
  39         movdqu          \offset(KEY), \k3
  40 
  41         // Add message words to key words
  42         movdqa          T1, T2
  43         movdqa          T1, T3
  44         paddd           T1, \k0    // reuse k0 to avoid a move
  45         paddd           \k1, T1
  46         paddd           \k2, T2
  47         paddd           \k3, T3
  48 
  49         // Multiply 32x32 => 64 and accumulate
  50         pshufd          $0x10, \k0, T4
  51         pshufd          $0x32, \k0, \k0
  52         pshufd          $0x10, T1, T5
  53         pshufd          $0x32, T1, T1
  54         pshufd          $0x10, T2, T6
  55         pshufd          $0x32, T2, T2
  56         pshufd          $0x10, T3, T7
  57         pshufd          $0x32, T3, T3
  58         pmuludq         T4, \k0
  59         pmuludq         T5, T1
  60         pmuludq         T6, T2
  61         pmuludq         T7, T3
  62         paddq           \k0, PASS0_SUMS
  63         paddq           T1, PASS1_SUMS
  64         paddq           T2, PASS2_SUMS
  65         paddq           T3, PASS3_SUMS
  66 .endm
  67 
  68 /*
  69  * void nh_sse2(const u32 *key, const u8 *message, size_t message_len,
  70  *              u8 hash[NH_HASH_BYTES])
  71  *
  72  * It's guaranteed that message_len % 16 == 0.
  73  */
  74 ENTRY(nh_sse2)
  75 
  76         movdqu          0x00(KEY), K0
  77         movdqu          0x10(KEY), K1
  78         movdqu          0x20(KEY), K2
  79         add             $0x30, KEY
  80         pxor            PASS0_SUMS, PASS0_SUMS
  81         pxor            PASS1_SUMS, PASS1_SUMS
  82         pxor            PASS2_SUMS, PASS2_SUMS
  83         pxor            PASS3_SUMS, PASS3_SUMS
  84 
  85         sub             $0x40, MESSAGE_LEN
  86         jl              .Lloop4_done
  87 .Lloop4:
  88         _nh_stride      K0, K1, K2, K3, 0x00
  89         _nh_stride      K1, K2, K3, K0, 0x10
  90         _nh_stride      K2, K3, K0, K1, 0x20
  91         _nh_stride      K3, K0, K1, K2, 0x30
  92         add             $0x40, KEY
  93         add             $0x40, MESSAGE
  94         sub             $0x40, MESSAGE_LEN
  95         jge             .Lloop4
  96 
  97 .Lloop4_done:
  98         and             $0x3f, MESSAGE_LEN
  99         jz              .Ldone
 100         _nh_stride      K0, K1, K2, K3, 0x00
 101 
 102         sub             $0x10, MESSAGE_LEN
 103         jz              .Ldone
 104         _nh_stride      K1, K2, K3, K0, 0x10
 105 
 106         sub             $0x10, MESSAGE_LEN
 107         jz              .Ldone
 108         _nh_stride      K2, K3, K0, K1, 0x20
 109 
 110 .Ldone:
 111         // Sum the accumulators for each pass, then store the sums to 'hash'
 112         movdqa          PASS0_SUMS, T0
 113         movdqa          PASS2_SUMS, T1
 114         punpcklqdq      PASS1_SUMS, T0          // => (PASS0_SUM_A PASS1_SUM_A)
 115         punpcklqdq      PASS3_SUMS, T1          // => (PASS2_SUM_A PASS3_SUM_A)
 116         punpckhqdq      PASS1_SUMS, PASS0_SUMS  // => (PASS0_SUM_B PASS1_SUM_B)
 117         punpckhqdq      PASS3_SUMS, PASS2_SUMS  // => (PASS2_SUM_B PASS3_SUM_B)
 118         paddq           PASS0_SUMS, T0
 119         paddq           PASS2_SUMS, T1
 120         movdqu          T0, 0x00(HASH)
 121         movdqu          T1, 0x10(HASH)
 122         ret
 123 ENDPROC(nh_sse2)

/* [<][>][^][v][top][bottom][index][help] */