root/arch/x86/crypto/ghash-clmulni-intel_asm.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0-only */
   2 /*
   3  * Accelerated GHASH implementation with Intel PCLMULQDQ-NI
   4  * instructions. This file contains accelerated part of ghash
   5  * implementation. More information about PCLMULQDQ can be found at:
   6  *
   7  * http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/
   8  *
   9  * Copyright (c) 2009 Intel Corp.
  10  *   Author: Huang Ying <ying.huang@intel.com>
  11  *           Vinodh Gopal
  12  *           Erdinc Ozturk
  13  *           Deniz Karakoyunlu
  14  */
  15 
  16 #include <linux/linkage.h>
  17 #include <asm/inst.h>
  18 #include <asm/frame.h>
  19 
  20 .section        .rodata.cst16.bswap_mask, "aM", @progbits, 16
  21 .align 16
  22 .Lbswap_mask:
  23         .octa 0x000102030405060708090a0b0c0d0e0f
  24 
  25 #define DATA    %xmm0
  26 #define SHASH   %xmm1
  27 #define T1      %xmm2
  28 #define T2      %xmm3
  29 #define T3      %xmm4
  30 #define BSWAP   %xmm5
  31 #define IN1     %xmm6
  32 
  33 .text
  34 
  35 /*
  36  * __clmul_gf128mul_ble:        internal ABI
  37  * input:
  38  *      DATA:                   operand1
  39  *      SHASH:                  operand2, hash_key << 1 mod poly
  40  * output:
  41  *      DATA:                   operand1 * operand2 mod poly
  42  * changed:
  43  *      T1
  44  *      T2
  45  *      T3
  46  */
  47 __clmul_gf128mul_ble:
  48         movaps DATA, T1
  49         pshufd $0b01001110, DATA, T2
  50         pshufd $0b01001110, SHASH, T3
  51         pxor DATA, T2
  52         pxor SHASH, T3
  53 
  54         PCLMULQDQ 0x00 SHASH DATA       # DATA = a0 * b0
  55         PCLMULQDQ 0x11 SHASH T1         # T1 = a1 * b1
  56         PCLMULQDQ 0x00 T3 T2            # T2 = (a1 + a0) * (b1 + b0)
  57         pxor DATA, T2
  58         pxor T1, T2                     # T2 = a0 * b1 + a1 * b0
  59 
  60         movaps T2, T3
  61         pslldq $8, T3
  62         psrldq $8, T2
  63         pxor T3, DATA
  64         pxor T2, T1                     # <T1:DATA> is result of
  65                                         # carry-less multiplication
  66 
  67         # first phase of the reduction
  68         movaps DATA, T3
  69         psllq $1, T3
  70         pxor DATA, T3
  71         psllq $5, T3
  72         pxor DATA, T3
  73         psllq $57, T3
  74         movaps T3, T2
  75         pslldq $8, T2
  76         psrldq $8, T3
  77         pxor T2, DATA
  78         pxor T3, T1
  79 
  80         # second phase of the reduction
  81         movaps DATA, T2
  82         psrlq $5, T2
  83         pxor DATA, T2
  84         psrlq $1, T2
  85         pxor DATA, T2
  86         psrlq $1, T2
  87         pxor T2, T1
  88         pxor T1, DATA
  89         ret
  90 ENDPROC(__clmul_gf128mul_ble)
  91 
  92 /* void clmul_ghash_mul(char *dst, const u128 *shash) */
  93 ENTRY(clmul_ghash_mul)
  94         FRAME_BEGIN
  95         movups (%rdi), DATA
  96         movups (%rsi), SHASH
  97         movaps .Lbswap_mask, BSWAP
  98         PSHUFB_XMM BSWAP DATA
  99         call __clmul_gf128mul_ble
 100         PSHUFB_XMM BSWAP DATA
 101         movups DATA, (%rdi)
 102         FRAME_END
 103         ret
 104 ENDPROC(clmul_ghash_mul)
 105 
 106 /*
 107  * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
 108  *                         const u128 *shash);
 109  */
 110 ENTRY(clmul_ghash_update)
 111         FRAME_BEGIN
 112         cmp $16, %rdx
 113         jb .Lupdate_just_ret    # check length
 114         movaps .Lbswap_mask, BSWAP
 115         movups (%rdi), DATA
 116         movups (%rcx), SHASH
 117         PSHUFB_XMM BSWAP DATA
 118 .align 4
 119 .Lupdate_loop:
 120         movups (%rsi), IN1
 121         PSHUFB_XMM BSWAP IN1
 122         pxor IN1, DATA
 123         call __clmul_gf128mul_ble
 124         sub $16, %rdx
 125         add $16, %rsi
 126         cmp $16, %rdx
 127         jge .Lupdate_loop
 128         PSHUFB_XMM BSWAP DATA
 129         movups DATA, (%rdi)
 130 .Lupdate_just_ret:
 131         FRAME_END
 132         ret
 133 ENDPROC(clmul_ghash_update)

/* [<][>][^][v][top][bottom][index][help] */