root/arch/arm64/crypto/aes-modes.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0-only */
   2 /*
   3  * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
   4  *
   5  * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
   6  */
   7 
   8 /* included by aes-ce.S and aes-neon.S */
   9 
  10         .text
  11         .align          4
  12 
  13 #ifndef MAX_STRIDE
  14 #define MAX_STRIDE      4
  15 #endif
  16 
  17 #if MAX_STRIDE == 4
  18 #define ST4(x...) x
  19 #define ST5(x...)
  20 #else
  21 #define ST4(x...)
  22 #define ST5(x...) x
  23 #endif
  24 
  25 aes_encrypt_block4x:
  26         encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
  27         ret
  28 ENDPROC(aes_encrypt_block4x)
  29 
  30 aes_decrypt_block4x:
  31         decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
  32         ret
  33 ENDPROC(aes_decrypt_block4x)
  34 
  35 #if MAX_STRIDE == 5
  36 aes_encrypt_block5x:
  37         encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
  38         ret
  39 ENDPROC(aes_encrypt_block5x)
  40 
  41 aes_decrypt_block5x:
  42         decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
  43         ret
  44 ENDPROC(aes_decrypt_block5x)
  45 #endif
  46 
  47         /*
  48          * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
  49          *                 int blocks)
  50          * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
  51          *                 int blocks)
  52          */
  53 
  54 AES_ENTRY(aes_ecb_encrypt)
  55         stp             x29, x30, [sp, #-16]!
  56         mov             x29, sp
  57 
  58         enc_prepare     w3, x2, x5
  59 
  60 .LecbencloopNx:
  61         subs            w4, w4, #MAX_STRIDE
  62         bmi             .Lecbenc1x
  63         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 pt blocks */
  64 ST4(    bl              aes_encrypt_block4x             )
  65 ST5(    ld1             {v4.16b}, [x1], #16             )
  66 ST5(    bl              aes_encrypt_block5x             )
  67         st1             {v0.16b-v3.16b}, [x0], #64
  68 ST5(    st1             {v4.16b}, [x0], #16             )
  69         b               .LecbencloopNx
  70 .Lecbenc1x:
  71         adds            w4, w4, #MAX_STRIDE
  72         beq             .Lecbencout
  73 .Lecbencloop:
  74         ld1             {v0.16b}, [x1], #16             /* get next pt block */
  75         encrypt_block   v0, w3, x2, x5, w6
  76         st1             {v0.16b}, [x0], #16
  77         subs            w4, w4, #1
  78         bne             .Lecbencloop
  79 .Lecbencout:
  80         ldp             x29, x30, [sp], #16
  81         ret
  82 AES_ENDPROC(aes_ecb_encrypt)
  83 
  84 
  85 AES_ENTRY(aes_ecb_decrypt)
  86         stp             x29, x30, [sp, #-16]!
  87         mov             x29, sp
  88 
  89         dec_prepare     w3, x2, x5
  90 
  91 .LecbdecloopNx:
  92         subs            w4, w4, #MAX_STRIDE
  93         bmi             .Lecbdec1x
  94         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 ct blocks */
  95 ST4(    bl              aes_decrypt_block4x             )
  96 ST5(    ld1             {v4.16b}, [x1], #16             )
  97 ST5(    bl              aes_decrypt_block5x             )
  98         st1             {v0.16b-v3.16b}, [x0], #64
  99 ST5(    st1             {v4.16b}, [x0], #16             )
 100         b               .LecbdecloopNx
 101 .Lecbdec1x:
 102         adds            w4, w4, #MAX_STRIDE
 103         beq             .Lecbdecout
 104 .Lecbdecloop:
 105         ld1             {v0.16b}, [x1], #16             /* get next ct block */
 106         decrypt_block   v0, w3, x2, x5, w6
 107         st1             {v0.16b}, [x0], #16
 108         subs            w4, w4, #1
 109         bne             .Lecbdecloop
 110 .Lecbdecout:
 111         ldp             x29, x30, [sp], #16
 112         ret
 113 AES_ENDPROC(aes_ecb_decrypt)
 114 
 115 
 116         /*
 117          * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
 118          *                 int blocks, u8 iv[])
 119          * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
 120          *                 int blocks, u8 iv[])
 121          * aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[],
 122          *                       int rounds, int blocks, u8 iv[],
 123          *                       u32 const rk2[]);
 124          * aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[],
 125          *                       int rounds, int blocks, u8 iv[],
 126          *                       u32 const rk2[]);
 127          */
 128 
 129 AES_ENTRY(aes_essiv_cbc_encrypt)
 130         ld1             {v4.16b}, [x5]                  /* get iv */
 131 
 132         mov             w8, #14                         /* AES-256: 14 rounds */
 133         enc_prepare     w8, x6, x7
 134         encrypt_block   v4, w8, x6, x7, w9
 135         enc_switch_key  w3, x2, x6
 136         b               .Lcbcencloop4x
 137 
 138 AES_ENTRY(aes_cbc_encrypt)
 139         ld1             {v4.16b}, [x5]                  /* get iv */
 140         enc_prepare     w3, x2, x6
 141 
 142 .Lcbcencloop4x:
 143         subs            w4, w4, #4
 144         bmi             .Lcbcenc1x
 145         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 pt blocks */
 146         eor             v0.16b, v0.16b, v4.16b          /* ..and xor with iv */
 147         encrypt_block   v0, w3, x2, x6, w7
 148         eor             v1.16b, v1.16b, v0.16b
 149         encrypt_block   v1, w3, x2, x6, w7
 150         eor             v2.16b, v2.16b, v1.16b
 151         encrypt_block   v2, w3, x2, x6, w7
 152         eor             v3.16b, v3.16b, v2.16b
 153         encrypt_block   v3, w3, x2, x6, w7
 154         st1             {v0.16b-v3.16b}, [x0], #64
 155         mov             v4.16b, v3.16b
 156         b               .Lcbcencloop4x
 157 .Lcbcenc1x:
 158         adds            w4, w4, #4
 159         beq             .Lcbcencout
 160 .Lcbcencloop:
 161         ld1             {v0.16b}, [x1], #16             /* get next pt block */
 162         eor             v4.16b, v4.16b, v0.16b          /* ..and xor with iv */
 163         encrypt_block   v4, w3, x2, x6, w7
 164         st1             {v4.16b}, [x0], #16
 165         subs            w4, w4, #1
 166         bne             .Lcbcencloop
 167 .Lcbcencout:
 168         st1             {v4.16b}, [x5]                  /* return iv */
 169         ret
 170 AES_ENDPROC(aes_cbc_encrypt)
 171 AES_ENDPROC(aes_essiv_cbc_encrypt)
 172 
 173 AES_ENTRY(aes_essiv_cbc_decrypt)
 174         stp             x29, x30, [sp, #-16]!
 175         mov             x29, sp
 176 
 177         ld1             {cbciv.16b}, [x5]               /* get iv */
 178 
 179         mov             w8, #14                         /* AES-256: 14 rounds */
 180         enc_prepare     w8, x6, x7
 181         encrypt_block   cbciv, w8, x6, x7, w9
 182         b               .Lessivcbcdecstart
 183 
 184 AES_ENTRY(aes_cbc_decrypt)
 185         stp             x29, x30, [sp, #-16]!
 186         mov             x29, sp
 187 
 188         ld1             {cbciv.16b}, [x5]               /* get iv */
 189 .Lessivcbcdecstart:
 190         dec_prepare     w3, x2, x6
 191 
 192 .LcbcdecloopNx:
 193         subs            w4, w4, #MAX_STRIDE
 194         bmi             .Lcbcdec1x
 195         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 ct blocks */
 196 #if MAX_STRIDE == 5
 197         ld1             {v4.16b}, [x1], #16             /* get 1 ct block */
 198         mov             v5.16b, v0.16b
 199         mov             v6.16b, v1.16b
 200         mov             v7.16b, v2.16b
 201         bl              aes_decrypt_block5x
 202         sub             x1, x1, #32
 203         eor             v0.16b, v0.16b, cbciv.16b
 204         eor             v1.16b, v1.16b, v5.16b
 205         ld1             {v5.16b}, [x1], #16             /* reload 1 ct block */
 206         ld1             {cbciv.16b}, [x1], #16          /* reload 1 ct block */
 207         eor             v2.16b, v2.16b, v6.16b
 208         eor             v3.16b, v3.16b, v7.16b
 209         eor             v4.16b, v4.16b, v5.16b
 210 #else
 211         mov             v4.16b, v0.16b
 212         mov             v5.16b, v1.16b
 213         mov             v6.16b, v2.16b
 214         bl              aes_decrypt_block4x
 215         sub             x1, x1, #16
 216         eor             v0.16b, v0.16b, cbciv.16b
 217         eor             v1.16b, v1.16b, v4.16b
 218         ld1             {cbciv.16b}, [x1], #16          /* reload 1 ct block */
 219         eor             v2.16b, v2.16b, v5.16b
 220         eor             v3.16b, v3.16b, v6.16b
 221 #endif
 222         st1             {v0.16b-v3.16b}, [x0], #64
 223 ST5(    st1             {v4.16b}, [x0], #16             )
 224         b               .LcbcdecloopNx
 225 .Lcbcdec1x:
 226         adds            w4, w4, #MAX_STRIDE
 227         beq             .Lcbcdecout
 228 .Lcbcdecloop:
 229         ld1             {v1.16b}, [x1], #16             /* get next ct block */
 230         mov             v0.16b, v1.16b                  /* ...and copy to v0 */
 231         decrypt_block   v0, w3, x2, x6, w7
 232         eor             v0.16b, v0.16b, cbciv.16b       /* xor with iv => pt */
 233         mov             cbciv.16b, v1.16b               /* ct is next iv */
 234         st1             {v0.16b}, [x0], #16
 235         subs            w4, w4, #1
 236         bne             .Lcbcdecloop
 237 .Lcbcdecout:
 238         st1             {cbciv.16b}, [x5]               /* return iv */
 239         ldp             x29, x30, [sp], #16
 240         ret
 241 AES_ENDPROC(aes_cbc_decrypt)
 242 AES_ENDPROC(aes_essiv_cbc_decrypt)
 243 
 244 
 245         /*
 246          * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
 247          *                     int rounds, int bytes, u8 const iv[])
 248          * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
 249          *                     int rounds, int bytes, u8 const iv[])
 250          */
 251 
 252 AES_ENTRY(aes_cbc_cts_encrypt)
 253         adr_l           x8, .Lcts_permute_table
 254         sub             x4, x4, #16
 255         add             x9, x8, #32
 256         add             x8, x8, x4
 257         sub             x9, x9, x4
 258         ld1             {v3.16b}, [x8]
 259         ld1             {v4.16b}, [x9]
 260 
 261         ld1             {v0.16b}, [x1], x4              /* overlapping loads */
 262         ld1             {v1.16b}, [x1]
 263 
 264         ld1             {v5.16b}, [x5]                  /* get iv */
 265         enc_prepare     w3, x2, x6
 266 
 267         eor             v0.16b, v0.16b, v5.16b          /* xor with iv */
 268         tbl             v1.16b, {v1.16b}, v4.16b
 269         encrypt_block   v0, w3, x2, x6, w7
 270 
 271         eor             v1.16b, v1.16b, v0.16b
 272         tbl             v0.16b, {v0.16b}, v3.16b
 273         encrypt_block   v1, w3, x2, x6, w7
 274 
 275         add             x4, x0, x4
 276         st1             {v0.16b}, [x4]                  /* overlapping stores */
 277         st1             {v1.16b}, [x0]
 278         ret
 279 AES_ENDPROC(aes_cbc_cts_encrypt)
 280 
 281 AES_ENTRY(aes_cbc_cts_decrypt)
 282         adr_l           x8, .Lcts_permute_table
 283         sub             x4, x4, #16
 284         add             x9, x8, #32
 285         add             x8, x8, x4
 286         sub             x9, x9, x4
 287         ld1             {v3.16b}, [x8]
 288         ld1             {v4.16b}, [x9]
 289 
 290         ld1             {v0.16b}, [x1], x4              /* overlapping loads */
 291         ld1             {v1.16b}, [x1]
 292 
 293         ld1             {v5.16b}, [x5]                  /* get iv */
 294         dec_prepare     w3, x2, x6
 295 
 296         decrypt_block   v0, w3, x2, x6, w7
 297         tbl             v2.16b, {v0.16b}, v3.16b
 298         eor             v2.16b, v2.16b, v1.16b
 299 
 300         tbx             v0.16b, {v1.16b}, v4.16b
 301         decrypt_block   v0, w3, x2, x6, w7
 302         eor             v0.16b, v0.16b, v5.16b          /* xor with iv */
 303 
 304         add             x4, x0, x4
 305         st1             {v2.16b}, [x4]                  /* overlapping stores */
 306         st1             {v0.16b}, [x0]
 307         ret
 308 AES_ENDPROC(aes_cbc_cts_decrypt)
 309 
 310         .section        ".rodata", "a"
 311         .align          6
 312 .Lcts_permute_table:
 313         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 314         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 315         .byte            0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
 316         .byte            0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf
 317         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 318         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 319         .previous
 320 
 321 
 322         /*
 323          * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
 324          *                 int blocks, u8 ctr[])
 325          */
 326 
 327 AES_ENTRY(aes_ctr_encrypt)
 328         stp             x29, x30, [sp, #-16]!
 329         mov             x29, sp
 330 
 331         enc_prepare     w3, x2, x6
 332         ld1             {vctr.16b}, [x5]
 333 
 334         umov            x6, vctr.d[1]           /* keep swabbed ctr in reg */
 335         rev             x6, x6
 336         cmn             w6, w4                  /* 32 bit overflow? */
 337         bcs             .Lctrloop
 338 .LctrloopNx:
 339         subs            w4, w4, #MAX_STRIDE
 340         bmi             .Lctr1x
 341         add             w7, w6, #1
 342         mov             v0.16b, vctr.16b
 343         add             w8, w6, #2
 344         mov             v1.16b, vctr.16b
 345         add             w9, w6, #3
 346         mov             v2.16b, vctr.16b
 347         add             w9, w6, #3
 348         rev             w7, w7
 349         mov             v3.16b, vctr.16b
 350         rev             w8, w8
 351 ST5(    mov             v4.16b, vctr.16b                )
 352         mov             v1.s[3], w7
 353         rev             w9, w9
 354 ST5(    add             w10, w6, #4                     )
 355         mov             v2.s[3], w8
 356 ST5(    rev             w10, w10                        )
 357         mov             v3.s[3], w9
 358 ST5(    mov             v4.s[3], w10                    )
 359         ld1             {v5.16b-v7.16b}, [x1], #48      /* get 3 input blocks */
 360 ST4(    bl              aes_encrypt_block4x             )
 361 ST5(    bl              aes_encrypt_block5x             )
 362         eor             v0.16b, v5.16b, v0.16b
 363 ST4(    ld1             {v5.16b}, [x1], #16             )
 364         eor             v1.16b, v6.16b, v1.16b
 365 ST5(    ld1             {v5.16b-v6.16b}, [x1], #32      )
 366         eor             v2.16b, v7.16b, v2.16b
 367         eor             v3.16b, v5.16b, v3.16b
 368 ST5(    eor             v4.16b, v6.16b, v4.16b          )
 369         st1             {v0.16b-v3.16b}, [x0], #64
 370 ST5(    st1             {v4.16b}, [x0], #16             )
 371         add             x6, x6, #MAX_STRIDE
 372         rev             x7, x6
 373         ins             vctr.d[1], x7
 374         cbz             w4, .Lctrout
 375         b               .LctrloopNx
 376 .Lctr1x:
 377         adds            w4, w4, #MAX_STRIDE
 378         beq             .Lctrout
 379 .Lctrloop:
 380         mov             v0.16b, vctr.16b
 381         encrypt_block   v0, w3, x2, x8, w7
 382 
 383         adds            x6, x6, #1              /* increment BE ctr */
 384         rev             x7, x6
 385         ins             vctr.d[1], x7
 386         bcs             .Lctrcarry              /* overflow? */
 387 
 388 .Lctrcarrydone:
 389         subs            w4, w4, #1
 390         bmi             .Lctrtailblock          /* blocks <0 means tail block */
 391         ld1             {v3.16b}, [x1], #16
 392         eor             v3.16b, v0.16b, v3.16b
 393         st1             {v3.16b}, [x0], #16
 394         bne             .Lctrloop
 395 
 396 .Lctrout:
 397         st1             {vctr.16b}, [x5]        /* return next CTR value */
 398         ldp             x29, x30, [sp], #16
 399         ret
 400 
 401 .Lctrtailblock:
 402         st1             {v0.16b}, [x0]
 403         b               .Lctrout
 404 
 405 .Lctrcarry:
 406         umov            x7, vctr.d[0]           /* load upper word of ctr  */
 407         rev             x7, x7                  /* ... to handle the carry */
 408         add             x7, x7, #1
 409         rev             x7, x7
 410         ins             vctr.d[0], x7
 411         b               .Lctrcarrydone
 412 AES_ENDPROC(aes_ctr_encrypt)
 413 
 414 
 415         /*
 416          * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
 417          *                 int bytes, u8 const rk2[], u8 iv[], int first)
 418          * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
 419          *                 int bytes, u8 const rk2[], u8 iv[], int first)
 420          */
 421 
 422         .macro          next_tweak, out, in, tmp
 423         sshr            \tmp\().2d,  \in\().2d,   #63
 424         and             \tmp\().16b, \tmp\().16b, xtsmask.16b
 425         add             \out\().2d,  \in\().2d,   \in\().2d
 426         ext             \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
 427         eor             \out\().16b, \out\().16b, \tmp\().16b
 428         .endm
 429 
 430         .macro          xts_load_mask, tmp
 431         movi            xtsmask.2s, #0x1
 432         movi            \tmp\().2s, #0x87
 433         uzp1            xtsmask.4s, xtsmask.4s, \tmp\().4s
 434         .endm
 435 
 436 AES_ENTRY(aes_xts_encrypt)
 437         stp             x29, x30, [sp, #-16]!
 438         mov             x29, sp
 439 
 440         ld1             {v4.16b}, [x6]
 441         xts_load_mask   v8
 442         cbz             w7, .Lxtsencnotfirst
 443 
 444         enc_prepare     w3, x5, x8
 445         xts_cts_skip_tw w7, .LxtsencNx
 446         encrypt_block   v4, w3, x5, x8, w7              /* first tweak */
 447         enc_switch_key  w3, x2, x8
 448         b               .LxtsencNx
 449 
 450 .Lxtsencnotfirst:
 451         enc_prepare     w3, x2, x8
 452 .LxtsencloopNx:
 453         next_tweak      v4, v4, v8
 454 .LxtsencNx:
 455         subs            w4, w4, #64
 456         bmi             .Lxtsenc1x
 457         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 pt blocks */
 458         next_tweak      v5, v4, v8
 459         eor             v0.16b, v0.16b, v4.16b
 460         next_tweak      v6, v5, v8
 461         eor             v1.16b, v1.16b, v5.16b
 462         eor             v2.16b, v2.16b, v6.16b
 463         next_tweak      v7, v6, v8
 464         eor             v3.16b, v3.16b, v7.16b
 465         bl              aes_encrypt_block4x
 466         eor             v3.16b, v3.16b, v7.16b
 467         eor             v0.16b, v0.16b, v4.16b
 468         eor             v1.16b, v1.16b, v5.16b
 469         eor             v2.16b, v2.16b, v6.16b
 470         st1             {v0.16b-v3.16b}, [x0], #64
 471         mov             v4.16b, v7.16b
 472         cbz             w4, .Lxtsencret
 473         xts_reload_mask v8
 474         b               .LxtsencloopNx
 475 .Lxtsenc1x:
 476         adds            w4, w4, #64
 477         beq             .Lxtsencout
 478         subs            w4, w4, #16
 479         bmi             .LxtsencctsNx
 480 .Lxtsencloop:
 481         ld1             {v0.16b}, [x1], #16
 482 .Lxtsencctsout:
 483         eor             v0.16b, v0.16b, v4.16b
 484         encrypt_block   v0, w3, x2, x8, w7
 485         eor             v0.16b, v0.16b, v4.16b
 486         cbz             w4, .Lxtsencout
 487         subs            w4, w4, #16
 488         next_tweak      v4, v4, v8
 489         bmi             .Lxtsenccts
 490         st1             {v0.16b}, [x0], #16
 491         b               .Lxtsencloop
 492 .Lxtsencout:
 493         st1             {v0.16b}, [x0]
 494 .Lxtsencret:
 495         st1             {v4.16b}, [x6]
 496         ldp             x29, x30, [sp], #16
 497         ret
 498 
 499 .LxtsencctsNx:
 500         mov             v0.16b, v3.16b
 501         sub             x0, x0, #16
 502 .Lxtsenccts:
 503         adr_l           x8, .Lcts_permute_table
 504 
 505         add             x1, x1, w4, sxtw        /* rewind input pointer */
 506         add             w4, w4, #16             /* # bytes in final block */
 507         add             x9, x8, #32
 508         add             x8, x8, x4
 509         sub             x9, x9, x4
 510         add             x4, x0, x4              /* output address of final block */
 511 
 512         ld1             {v1.16b}, [x1]          /* load final block */
 513         ld1             {v2.16b}, [x8]
 514         ld1             {v3.16b}, [x9]
 515 
 516         tbl             v2.16b, {v0.16b}, v2.16b
 517         tbx             v0.16b, {v1.16b}, v3.16b
 518         st1             {v2.16b}, [x4]                  /* overlapping stores */
 519         mov             w4, wzr
 520         b               .Lxtsencctsout
 521 AES_ENDPROC(aes_xts_encrypt)
 522 
 523 AES_ENTRY(aes_xts_decrypt)
 524         stp             x29, x30, [sp, #-16]!
 525         mov             x29, sp
 526 
 527         /* subtract 16 bytes if we are doing CTS */
 528         sub             w8, w4, #0x10
 529         tst             w4, #0xf
 530         csel            w4, w4, w8, eq
 531 
 532         ld1             {v4.16b}, [x6]
 533         xts_load_mask   v8
 534         xts_cts_skip_tw w7, .Lxtsdecskiptw
 535         cbz             w7, .Lxtsdecnotfirst
 536 
 537         enc_prepare     w3, x5, x8
 538         encrypt_block   v4, w3, x5, x8, w7              /* first tweak */
 539 .Lxtsdecskiptw:
 540         dec_prepare     w3, x2, x8
 541         b               .LxtsdecNx
 542 
 543 .Lxtsdecnotfirst:
 544         dec_prepare     w3, x2, x8
 545 .LxtsdecloopNx:
 546         next_tweak      v4, v4, v8
 547 .LxtsdecNx:
 548         subs            w4, w4, #64
 549         bmi             .Lxtsdec1x
 550         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 ct blocks */
 551         next_tweak      v5, v4, v8
 552         eor             v0.16b, v0.16b, v4.16b
 553         next_tweak      v6, v5, v8
 554         eor             v1.16b, v1.16b, v5.16b
 555         eor             v2.16b, v2.16b, v6.16b
 556         next_tweak      v7, v6, v8
 557         eor             v3.16b, v3.16b, v7.16b
 558         bl              aes_decrypt_block4x
 559         eor             v3.16b, v3.16b, v7.16b
 560         eor             v0.16b, v0.16b, v4.16b
 561         eor             v1.16b, v1.16b, v5.16b
 562         eor             v2.16b, v2.16b, v6.16b
 563         st1             {v0.16b-v3.16b}, [x0], #64
 564         mov             v4.16b, v7.16b
 565         cbz             w4, .Lxtsdecout
 566         xts_reload_mask v8
 567         b               .LxtsdecloopNx
 568 .Lxtsdec1x:
 569         adds            w4, w4, #64
 570         beq             .Lxtsdecout
 571         subs            w4, w4, #16
 572 .Lxtsdecloop:
 573         ld1             {v0.16b}, [x1], #16
 574         bmi             .Lxtsdeccts
 575 .Lxtsdecctsout:
 576         eor             v0.16b, v0.16b, v4.16b
 577         decrypt_block   v0, w3, x2, x8, w7
 578         eor             v0.16b, v0.16b, v4.16b
 579         st1             {v0.16b}, [x0], #16
 580         cbz             w4, .Lxtsdecout
 581         subs            w4, w4, #16
 582         next_tweak      v4, v4, v8
 583         b               .Lxtsdecloop
 584 .Lxtsdecout:
 585         st1             {v4.16b}, [x6]
 586         ldp             x29, x30, [sp], #16
 587         ret
 588 
 589 .Lxtsdeccts:
 590         adr_l           x8, .Lcts_permute_table
 591 
 592         add             x1, x1, w4, sxtw        /* rewind input pointer */
 593         add             w4, w4, #16             /* # bytes in final block */
 594         add             x9, x8, #32
 595         add             x8, x8, x4
 596         sub             x9, x9, x4
 597         add             x4, x0, x4              /* output address of final block */
 598 
 599         next_tweak      v5, v4, v8
 600 
 601         ld1             {v1.16b}, [x1]          /* load final block */
 602         ld1             {v2.16b}, [x8]
 603         ld1             {v3.16b}, [x9]
 604 
 605         eor             v0.16b, v0.16b, v5.16b
 606         decrypt_block   v0, w3, x2, x8, w7
 607         eor             v0.16b, v0.16b, v5.16b
 608 
 609         tbl             v2.16b, {v0.16b}, v2.16b
 610         tbx             v0.16b, {v1.16b}, v3.16b
 611 
 612         st1             {v2.16b}, [x4]                  /* overlapping stores */
 613         mov             w4, wzr
 614         b               .Lxtsdecctsout
 615 AES_ENDPROC(aes_xts_decrypt)
 616 
 617         /*
 618          * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
 619          *                int blocks, u8 dg[], int enc_before, int enc_after)
 620          */
 621 AES_ENTRY(aes_mac_update)
 622         frame_push      6
 623 
 624         mov             x19, x0
 625         mov             x20, x1
 626         mov             x21, x2
 627         mov             x22, x3
 628         mov             x23, x4
 629         mov             x24, x6
 630 
 631         ld1             {v0.16b}, [x23]                 /* get dg */
 632         enc_prepare     w2, x1, x7
 633         cbz             w5, .Lmacloop4x
 634 
 635         encrypt_block   v0, w2, x1, x7, w8
 636 
 637 .Lmacloop4x:
 638         subs            w22, w22, #4
 639         bmi             .Lmac1x
 640         ld1             {v1.16b-v4.16b}, [x19], #64     /* get next pt block */
 641         eor             v0.16b, v0.16b, v1.16b          /* ..and xor with dg */
 642         encrypt_block   v0, w21, x20, x7, w8
 643         eor             v0.16b, v0.16b, v2.16b
 644         encrypt_block   v0, w21, x20, x7, w8
 645         eor             v0.16b, v0.16b, v3.16b
 646         encrypt_block   v0, w21, x20, x7, w8
 647         eor             v0.16b, v0.16b, v4.16b
 648         cmp             w22, wzr
 649         csinv           x5, x24, xzr, eq
 650         cbz             w5, .Lmacout
 651         encrypt_block   v0, w21, x20, x7, w8
 652         st1             {v0.16b}, [x23]                 /* return dg */
 653         cond_yield_neon .Lmacrestart
 654         b               .Lmacloop4x
 655 .Lmac1x:
 656         add             w22, w22, #4
 657 .Lmacloop:
 658         cbz             w22, .Lmacout
 659         ld1             {v1.16b}, [x19], #16            /* get next pt block */
 660         eor             v0.16b, v0.16b, v1.16b          /* ..and xor with dg */
 661 
 662         subs            w22, w22, #1
 663         csinv           x5, x24, xzr, eq
 664         cbz             w5, .Lmacout
 665 
 666 .Lmacenc:
 667         encrypt_block   v0, w21, x20, x7, w8
 668         b               .Lmacloop
 669 
 670 .Lmacout:
 671         st1             {v0.16b}, [x23]                 /* return dg */
 672         frame_pop
 673         ret
 674 
 675 .Lmacrestart:
 676         ld1             {v0.16b}, [x23]                 /* get dg */
 677         enc_prepare     w21, x20, x0
 678         b               .Lmacloop4x
 679 AES_ENDPROC(aes_mac_update)

/* [<][>][^][v][top][bottom][index][help] */