1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 #include <linux/linkage.h>
  17 #include <asm/inst.h>
  18 #include <asm/frame.h>
  19 
  20 .section        .rodata.cst16.bswap_mask, "aM", @progbits, 16
  21 .align 16
  22 .Lbswap_mask:
  23         .octa 0x000102030405060708090a0b0c0d0e0f
  24 
  25 #define DATA    %xmm0
  26 #define SHASH   %xmm1
  27 #define T1      %xmm2
  28 #define T2      %xmm3
  29 #define T3      %xmm4
  30 #define BSWAP   %xmm5
  31 #define IN1     %xmm6
  32 
  33 .text
  34 
  35 
  36 
  37 
  38 
  39 
  40 
  41 
  42 
  43 
  44 
  45 
  46 
  47 __clmul_gf128mul_ble:
  48         movaps DATA, T1
  49         pshufd $0b01001110, DATA, T2
  50         pshufd $0b01001110, SHASH, T3
  51         pxor DATA, T2
  52         pxor SHASH, T3
  53 
  54         PCLMULQDQ 0x00 SHASH DATA       # DATA = a0 * b0
  55         PCLMULQDQ 0x11 SHASH T1         # T1 = a1 * b1
  56         PCLMULQDQ 0x00 T3 T2            # T2 = (a1 + a0) * (b1 + b0)
  57         pxor DATA, T2
  58         pxor T1, T2                     # T2 = a0 * b1 + a1 * b0
  59 
  60         movaps T2, T3
  61         pslldq $8, T3
  62         psrldq $8, T2
  63         pxor T3, DATA
  64         pxor T2, T1                     # <T1:DATA> is result of
  65 
  66 
  67 
  68         movaps DATA, T3
  69         psllq $1, T3
  70         pxor DATA, T3
  71         psllq $5, T3
  72         pxor DATA, T3
  73         psllq $57, T3
  74         movaps T3, T2
  75         pslldq $8, T2
  76         psrldq $8, T3
  77         pxor T2, DATA
  78         pxor T3, T1
  79 
  80 
  81         movaps DATA, T2
  82         psrlq $5, T2
  83         pxor DATA, T2
  84         psrlq $1, T2
  85         pxor DATA, T2
  86         psrlq $1, T2
  87         pxor T2, T1
  88         pxor T1, DATA
  89         ret
  90 ENDPROC(__clmul_gf128mul_ble)
  91 
  92 
  93 ENTRY(clmul_ghash_mul)
  94         FRAME_BEGIN
  95         movups (%rdi), DATA
  96         movups (%rsi), SHASH
  97         movaps .Lbswap_mask, BSWAP
  98         PSHUFB_XMM BSWAP DATA
  99         call __clmul_gf128mul_ble
 100         PSHUFB_XMM BSWAP DATA
 101         movups DATA, (%rdi)
 102         FRAME_END
 103         ret
 104 ENDPROC(clmul_ghash_mul)
 105 
 106 
 107 
 108 
 109 
 110 ENTRY(clmul_ghash_update)
 111         FRAME_BEGIN
 112         cmp $16, %rdx
 113         jb .Lupdate_just_ret    # check length
 114         movaps .Lbswap_mask, BSWAP
 115         movups (%rdi), DATA
 116         movups (%rcx), SHASH
 117         PSHUFB_XMM BSWAP DATA
 118 .align 4
 119 .Lupdate_loop:
 120         movups (%rsi), IN1
 121         PSHUFB_XMM BSWAP IN1
 122         pxor IN1, DATA
 123         call __clmul_gf128mul_ble
 124         sub $16, %rdx
 125         add $16, %rsi
 126         cmp $16, %rdx
 127         jge .Lupdate_loop
 128         PSHUFB_XMM BSWAP DATA
 129         movups DATA, (%rdi)
 130 .Lupdate_just_ret:
 131         FRAME_END
 132         ret
 133 ENDPROC(clmul_ghash_update)