root/arch/arm64/crypto/aes-neon.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0-only */
   2 /*
   3  * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
   4  *
   5  * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
   6  */
   7 
   8 #include <linux/linkage.h>
   9 #include <asm/assembler.h>
  10 
  11 #define AES_ENTRY(func)         ENTRY(neon_ ## func)
  12 #define AES_ENDPROC(func)       ENDPROC(neon_ ## func)
  13 
  14         xtsmask         .req    v7
  15         cbciv           .req    v7
  16         vctr            .req    v4
  17 
  18         .macro          xts_reload_mask, tmp
  19         xts_load_mask   \tmp
  20         .endm
  21 
  22         /* special case for the neon-bs driver calling into this one for CTS */
  23         .macro          xts_cts_skip_tw, reg, lbl
  24         tbnz            \reg, #1, \lbl
  25         .endm
  26 
  27         /* multiply by polynomial 'x' in GF(2^8) */
  28         .macro          mul_by_x, out, in, temp, const
  29         sshr            \temp, \in, #7
  30         shl             \out, \in, #1
  31         and             \temp, \temp, \const
  32         eor             \out, \out, \temp
  33         .endm
  34 
  35         /* multiply by polynomial 'x^2' in GF(2^8) */
  36         .macro          mul_by_x2, out, in, temp, const
  37         ushr            \temp, \in, #6
  38         shl             \out, \in, #2
  39         pmul            \temp, \temp, \const
  40         eor             \out, \out, \temp
  41         .endm
  42 
  43         /* preload the entire Sbox */
  44         .macro          prepare, sbox, shiftrows, temp
  45         movi            v12.16b, #0x1b
  46         ldr_l           q13, \shiftrows, \temp
  47         ldr_l           q14, .Lror32by8, \temp
  48         adr_l           \temp, \sbox
  49         ld1             {v16.16b-v19.16b}, [\temp], #64
  50         ld1             {v20.16b-v23.16b}, [\temp], #64
  51         ld1             {v24.16b-v27.16b}, [\temp], #64
  52         ld1             {v28.16b-v31.16b}, [\temp]
  53         .endm
  54 
  55         /* do preload for encryption */
  56         .macro          enc_prepare, ignore0, ignore1, temp
  57         prepare         crypto_aes_sbox, .LForward_ShiftRows, \temp
  58         .endm
  59 
  60         .macro          enc_switch_key, ignore0, ignore1, temp
  61         /* do nothing */
  62         .endm
  63 
  64         /* do preload for decryption */
  65         .macro          dec_prepare, ignore0, ignore1, temp
  66         prepare         crypto_aes_inv_sbox, .LReverse_ShiftRows, \temp
  67         .endm
  68 
  69         /* apply SubBytes transformation using the the preloaded Sbox */
  70         .macro          sub_bytes, in
  71         sub             v9.16b, \in\().16b, v15.16b
  72         tbl             \in\().16b, {v16.16b-v19.16b}, \in\().16b
  73         sub             v10.16b, v9.16b, v15.16b
  74         tbx             \in\().16b, {v20.16b-v23.16b}, v9.16b
  75         sub             v11.16b, v10.16b, v15.16b
  76         tbx             \in\().16b, {v24.16b-v27.16b}, v10.16b
  77         tbx             \in\().16b, {v28.16b-v31.16b}, v11.16b
  78         .endm
  79 
  80         /* apply MixColumns transformation */
  81         .macro          mix_columns, in, enc
  82         .if             \enc == 0
  83         /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
  84         mul_by_x2       v8.16b, \in\().16b, v9.16b, v12.16b
  85         eor             \in\().16b, \in\().16b, v8.16b
  86         rev32           v8.8h, v8.8h
  87         eor             \in\().16b, \in\().16b, v8.16b
  88         .endif
  89 
  90         mul_by_x        v9.16b, \in\().16b, v8.16b, v12.16b
  91         rev32           v8.8h, \in\().8h
  92         eor             v8.16b, v8.16b, v9.16b
  93         eor             \in\().16b, \in\().16b, v8.16b
  94         tbl             \in\().16b, {\in\().16b}, v14.16b
  95         eor             \in\().16b, \in\().16b, v8.16b
  96         .endm
  97 
  98         .macro          do_block, enc, in, rounds, rk, rkp, i
  99         ld1             {v15.4s}, [\rk]
 100         add             \rkp, \rk, #16
 101         mov             \i, \rounds
 102 1111:   eor             \in\().16b, \in\().16b, v15.16b         /* ^round key */
 103         movi            v15.16b, #0x40
 104         tbl             \in\().16b, {\in\().16b}, v13.16b       /* ShiftRows */
 105         sub_bytes       \in
 106         subs            \i, \i, #1
 107         ld1             {v15.4s}, [\rkp], #16
 108         beq             2222f
 109         mix_columns     \in, \enc
 110         b               1111b
 111 2222:   eor             \in\().16b, \in\().16b, v15.16b         /* ^round key */
 112         .endm
 113 
 114         .macro          encrypt_block, in, rounds, rk, rkp, i
 115         do_block        1, \in, \rounds, \rk, \rkp, \i
 116         .endm
 117 
 118         .macro          decrypt_block, in, rounds, rk, rkp, i
 119         do_block        0, \in, \rounds, \rk, \rkp, \i
 120         .endm
 121 
 122         /*
 123          * Interleaved versions: functionally equivalent to the
 124          * ones above, but applied to AES states in parallel.
 125          */
 126 
 127         .macro          sub_bytes_4x, in0, in1, in2, in3
 128         sub             v8.16b, \in0\().16b, v15.16b
 129         tbl             \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
 130         sub             v9.16b, \in1\().16b, v15.16b
 131         tbl             \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
 132         sub             v10.16b, \in2\().16b, v15.16b
 133         tbl             \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
 134         sub             v11.16b, \in3\().16b, v15.16b
 135         tbl             \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
 136         tbx             \in0\().16b, {v20.16b-v23.16b}, v8.16b
 137         tbx             \in1\().16b, {v20.16b-v23.16b}, v9.16b
 138         sub             v8.16b, v8.16b, v15.16b
 139         tbx             \in2\().16b, {v20.16b-v23.16b}, v10.16b
 140         sub             v9.16b, v9.16b, v15.16b
 141         tbx             \in3\().16b, {v20.16b-v23.16b}, v11.16b
 142         sub             v10.16b, v10.16b, v15.16b
 143         tbx             \in0\().16b, {v24.16b-v27.16b}, v8.16b
 144         sub             v11.16b, v11.16b, v15.16b
 145         tbx             \in1\().16b, {v24.16b-v27.16b}, v9.16b
 146         sub             v8.16b, v8.16b, v15.16b
 147         tbx             \in2\().16b, {v24.16b-v27.16b}, v10.16b
 148         sub             v9.16b, v9.16b, v15.16b
 149         tbx             \in3\().16b, {v24.16b-v27.16b}, v11.16b
 150         sub             v10.16b, v10.16b, v15.16b
 151         tbx             \in0\().16b, {v28.16b-v31.16b}, v8.16b
 152         sub             v11.16b, v11.16b, v15.16b
 153         tbx             \in1\().16b, {v28.16b-v31.16b}, v9.16b
 154         tbx             \in2\().16b, {v28.16b-v31.16b}, v10.16b
 155         tbx             \in3\().16b, {v28.16b-v31.16b}, v11.16b
 156         .endm
 157 
 158         .macro          mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
 159         sshr            \tmp0\().16b, \in0\().16b, #7
 160         shl             \out0\().16b, \in0\().16b, #1
 161         sshr            \tmp1\().16b, \in1\().16b, #7
 162         and             \tmp0\().16b, \tmp0\().16b, \const\().16b
 163         shl             \out1\().16b, \in1\().16b, #1
 164         and             \tmp1\().16b, \tmp1\().16b, \const\().16b
 165         eor             \out0\().16b, \out0\().16b, \tmp0\().16b
 166         eor             \out1\().16b, \out1\().16b, \tmp1\().16b
 167         .endm
 168 
 169         .macro          mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
 170         ushr            \tmp0\().16b, \in0\().16b, #6
 171         shl             \out0\().16b, \in0\().16b, #2
 172         ushr            \tmp1\().16b, \in1\().16b, #6
 173         pmul            \tmp0\().16b, \tmp0\().16b, \const\().16b
 174         shl             \out1\().16b, \in1\().16b, #2
 175         pmul            \tmp1\().16b, \tmp1\().16b, \const\().16b
 176         eor             \out0\().16b, \out0\().16b, \tmp0\().16b
 177         eor             \out1\().16b, \out1\().16b, \tmp1\().16b
 178         .endm
 179 
 180         .macro          mix_columns_2x, in0, in1, enc
 181         .if             \enc == 0
 182         /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
 183         mul_by_x2_2x    v8, v9, \in0, \in1, v10, v11, v12
 184         eor             \in0\().16b, \in0\().16b, v8.16b
 185         rev32           v8.8h, v8.8h
 186         eor             \in1\().16b, \in1\().16b, v9.16b
 187         rev32           v9.8h, v9.8h
 188         eor             \in0\().16b, \in0\().16b, v8.16b
 189         eor             \in1\().16b, \in1\().16b, v9.16b
 190         .endif
 191 
 192         mul_by_x_2x     v8, v9, \in0, \in1, v10, v11, v12
 193         rev32           v10.8h, \in0\().8h
 194         rev32           v11.8h, \in1\().8h
 195         eor             v10.16b, v10.16b, v8.16b
 196         eor             v11.16b, v11.16b, v9.16b
 197         eor             \in0\().16b, \in0\().16b, v10.16b
 198         eor             \in1\().16b, \in1\().16b, v11.16b
 199         tbl             \in0\().16b, {\in0\().16b}, v14.16b
 200         tbl             \in1\().16b, {\in1\().16b}, v14.16b
 201         eor             \in0\().16b, \in0\().16b, v10.16b
 202         eor             \in1\().16b, \in1\().16b, v11.16b
 203         .endm
 204 
 205         .macro          do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
 206         ld1             {v15.4s}, [\rk]
 207         add             \rkp, \rk, #16
 208         mov             \i, \rounds
 209 1111:   eor             \in0\().16b, \in0\().16b, v15.16b       /* ^round key */
 210         eor             \in1\().16b, \in1\().16b, v15.16b       /* ^round key */
 211         eor             \in2\().16b, \in2\().16b, v15.16b       /* ^round key */
 212         eor             \in3\().16b, \in3\().16b, v15.16b       /* ^round key */
 213         movi            v15.16b, #0x40
 214         tbl             \in0\().16b, {\in0\().16b}, v13.16b     /* ShiftRows */
 215         tbl             \in1\().16b, {\in1\().16b}, v13.16b     /* ShiftRows */
 216         tbl             \in2\().16b, {\in2\().16b}, v13.16b     /* ShiftRows */
 217         tbl             \in3\().16b, {\in3\().16b}, v13.16b     /* ShiftRows */
 218         sub_bytes_4x    \in0, \in1, \in2, \in3
 219         subs            \i, \i, #1
 220         ld1             {v15.4s}, [\rkp], #16
 221         beq             2222f
 222         mix_columns_2x  \in0, \in1, \enc
 223         mix_columns_2x  \in2, \in3, \enc
 224         b               1111b
 225 2222:   eor             \in0\().16b, \in0\().16b, v15.16b       /* ^round key */
 226         eor             \in1\().16b, \in1\().16b, v15.16b       /* ^round key */
 227         eor             \in2\().16b, \in2\().16b, v15.16b       /* ^round key */
 228         eor             \in3\().16b, \in3\().16b, v15.16b       /* ^round key */
 229         .endm
 230 
 231         .macro          encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
 232         do_block_4x     1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
 233         .endm
 234 
 235         .macro          decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
 236         do_block_4x     0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
 237         .endm
 238 
 239 #include "aes-modes.S"
 240 
 241         .section        ".rodata", "a"
 242         .align          4
 243 .LForward_ShiftRows:
 244         .octa           0x0b06010c07020d08030e09040f0a0500
 245 
 246 .LReverse_ShiftRows:
 247         .octa           0x0306090c0f0205080b0e0104070a0d00
 248 
 249 .Lror32by8:
 250         .octa           0x0c0f0e0d080b0a090407060500030201

/* [<][>][^][v][top][bottom][index][help] */