root/arch/arm/crypto/aes-ce-core.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0-only */
   2 /*
   3  * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
   4  *
   5  * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
   6  */
   7 
   8 #include <linux/linkage.h>
   9 #include <asm/assembler.h>
  10 
  11         .text
  12         .arch           armv8-a
  13         .fpu            crypto-neon-fp-armv8
  14         .align          3
  15 
  16         .macro          enc_round, state, key
  17         aese.8          \state, \key
  18         aesmc.8         \state, \state
  19         .endm
  20 
  21         .macro          dec_round, state, key
  22         aesd.8          \state, \key
  23         aesimc.8        \state, \state
  24         .endm
  25 
  26         .macro          enc_dround, key1, key2
  27         enc_round       q0, \key1
  28         enc_round       q0, \key2
  29         .endm
  30 
  31         .macro          dec_dround, key1, key2
  32         dec_round       q0, \key1
  33         dec_round       q0, \key2
  34         .endm
  35 
  36         .macro          enc_fround, key1, key2, key3
  37         enc_round       q0, \key1
  38         aese.8          q0, \key2
  39         veor            q0, q0, \key3
  40         .endm
  41 
  42         .macro          dec_fround, key1, key2, key3
  43         dec_round       q0, \key1
  44         aesd.8          q0, \key2
  45         veor            q0, q0, \key3
  46         .endm
  47 
  48         .macro          enc_dround_4x, key1, key2
  49         enc_round       q0, \key1
  50         enc_round       q1, \key1
  51         enc_round       q2, \key1
  52         enc_round       q3, \key1
  53         enc_round       q0, \key2
  54         enc_round       q1, \key2
  55         enc_round       q2, \key2
  56         enc_round       q3, \key2
  57         .endm
  58 
  59         .macro          dec_dround_4x, key1, key2
  60         dec_round       q0, \key1
  61         dec_round       q1, \key1
  62         dec_round       q2, \key1
  63         dec_round       q3, \key1
  64         dec_round       q0, \key2
  65         dec_round       q1, \key2
  66         dec_round       q2, \key2
  67         dec_round       q3, \key2
  68         .endm
  69 
  70         .macro          enc_fround_4x, key1, key2, key3
  71         enc_round       q0, \key1
  72         enc_round       q1, \key1
  73         enc_round       q2, \key1
  74         enc_round       q3, \key1
  75         aese.8          q0, \key2
  76         aese.8          q1, \key2
  77         aese.8          q2, \key2
  78         aese.8          q3, \key2
  79         veor            q0, q0, \key3
  80         veor            q1, q1, \key3
  81         veor            q2, q2, \key3
  82         veor            q3, q3, \key3
  83         .endm
  84 
  85         .macro          dec_fround_4x, key1, key2, key3
  86         dec_round       q0, \key1
  87         dec_round       q1, \key1
  88         dec_round       q2, \key1
  89         dec_round       q3, \key1
  90         aesd.8          q0, \key2
  91         aesd.8          q1, \key2
  92         aesd.8          q2, \key2
  93         aesd.8          q3, \key2
  94         veor            q0, q0, \key3
  95         veor            q1, q1, \key3
  96         veor            q2, q2, \key3
  97         veor            q3, q3, \key3
  98         .endm
  99 
 100         .macro          do_block, dround, fround
 101         cmp             r3, #12                 @ which key size?
 102         vld1.32         {q10-q11}, [ip]!
 103         \dround         q8, q9
 104         vld1.32         {q12-q13}, [ip]!
 105         \dround         q10, q11
 106         vld1.32         {q10-q11}, [ip]!
 107         \dround         q12, q13
 108         vld1.32         {q12-q13}, [ip]!
 109         \dround         q10, q11
 110         blo             0f                      @ AES-128: 10 rounds
 111         vld1.32         {q10-q11}, [ip]!
 112         \dround         q12, q13
 113         beq             1f                      @ AES-192: 12 rounds
 114         vld1.32         {q12-q13}, [ip]
 115         \dround         q10, q11
 116 0:      \fround         q12, q13, q14
 117         bx              lr
 118 
 119 1:      \fround         q10, q11, q14
 120         bx              lr
 121         .endm
 122 
 123         /*
 124          * Internal, non-AAPCS compliant functions that implement the core AES
 125          * transforms. These should preserve all registers except q0 - q2 and ip
 126          * Arguments:
 127          *   q0        : first in/output block
 128          *   q1        : second in/output block (_4x version only)
 129          *   q2        : third in/output block (_4x version only)
 130          *   q3        : fourth in/output block (_4x version only)
 131          *   q8        : first round key
 132          *   q9        : secound round key
 133          *   q14       : final round key
 134          *   r2        : address of round key array
 135          *   r3        : number of rounds
 136          */
 137         .align          6
 138 aes_encrypt:
 139         add             ip, r2, #32             @ 3rd round key
 140 .Laes_encrypt_tweak:
 141         do_block        enc_dround, enc_fround
 142 ENDPROC(aes_encrypt)
 143 
 144         .align          6
 145 aes_decrypt:
 146         add             ip, r2, #32             @ 3rd round key
 147         do_block        dec_dround, dec_fround
 148 ENDPROC(aes_decrypt)
 149 
 150         .align          6
 151 aes_encrypt_4x:
 152         add             ip, r2, #32             @ 3rd round key
 153         do_block        enc_dround_4x, enc_fround_4x
 154 ENDPROC(aes_encrypt_4x)
 155 
 156         .align          6
 157 aes_decrypt_4x:
 158         add             ip, r2, #32             @ 3rd round key
 159         do_block        dec_dround_4x, dec_fround_4x
 160 ENDPROC(aes_decrypt_4x)
 161 
 162         .macro          prepare_key, rk, rounds
 163         add             ip, \rk, \rounds, lsl #4
 164         vld1.32         {q8-q9}, [\rk]          @ load first 2 round keys
 165         vld1.32         {q14}, [ip]             @ load last round key
 166         .endm
 167 
 168         /*
 169          * aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
 170          *                 int blocks)
 171          * aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
 172          *                 int blocks)
 173          */
 174 ENTRY(ce_aes_ecb_encrypt)
 175         push            {r4, lr}
 176         ldr             r4, [sp, #8]
 177         prepare_key     r2, r3
 178 .Lecbencloop4x:
 179         subs            r4, r4, #4
 180         bmi             .Lecbenc1x
 181         vld1.8          {q0-q1}, [r1]!
 182         vld1.8          {q2-q3}, [r1]!
 183         bl              aes_encrypt_4x
 184         vst1.8          {q0-q1}, [r0]!
 185         vst1.8          {q2-q3}, [r0]!
 186         b               .Lecbencloop4x
 187 .Lecbenc1x:
 188         adds            r4, r4, #4
 189         beq             .Lecbencout
 190 .Lecbencloop:
 191         vld1.8          {q0}, [r1]!
 192         bl              aes_encrypt
 193         vst1.8          {q0}, [r0]!
 194         subs            r4, r4, #1
 195         bne             .Lecbencloop
 196 .Lecbencout:
 197         pop             {r4, pc}
 198 ENDPROC(ce_aes_ecb_encrypt)
 199 
 200 ENTRY(ce_aes_ecb_decrypt)
 201         push            {r4, lr}
 202         ldr             r4, [sp, #8]
 203         prepare_key     r2, r3
 204 .Lecbdecloop4x:
 205         subs            r4, r4, #4
 206         bmi             .Lecbdec1x
 207         vld1.8          {q0-q1}, [r1]!
 208         vld1.8          {q2-q3}, [r1]!
 209         bl              aes_decrypt_4x
 210         vst1.8          {q0-q1}, [r0]!
 211         vst1.8          {q2-q3}, [r0]!
 212         b               .Lecbdecloop4x
 213 .Lecbdec1x:
 214         adds            r4, r4, #4
 215         beq             .Lecbdecout
 216 .Lecbdecloop:
 217         vld1.8          {q0}, [r1]!
 218         bl              aes_decrypt
 219         vst1.8          {q0}, [r0]!
 220         subs            r4, r4, #1
 221         bne             .Lecbdecloop
 222 .Lecbdecout:
 223         pop             {r4, pc}
 224 ENDPROC(ce_aes_ecb_decrypt)
 225 
 226         /*
 227          * aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
 228          *                 int blocks, u8 iv[])
 229          * aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
 230          *                 int blocks, u8 iv[])
 231          */
 232 ENTRY(ce_aes_cbc_encrypt)
 233         push            {r4-r6, lr}
 234         ldrd            r4, r5, [sp, #16]
 235         vld1.8          {q0}, [r5]
 236         prepare_key     r2, r3
 237 .Lcbcencloop:
 238         vld1.8          {q1}, [r1]!             @ get next pt block
 239         veor            q0, q0, q1              @ ..and xor with iv
 240         bl              aes_encrypt
 241         vst1.8          {q0}, [r0]!
 242         subs            r4, r4, #1
 243         bne             .Lcbcencloop
 244         vst1.8          {q0}, [r5]
 245         pop             {r4-r6, pc}
 246 ENDPROC(ce_aes_cbc_encrypt)
 247 
 248 ENTRY(ce_aes_cbc_decrypt)
 249         push            {r4-r6, lr}
 250         ldrd            r4, r5, [sp, #16]
 251         vld1.8          {q15}, [r5]             @ keep iv in q15
 252         prepare_key     r2, r3
 253 .Lcbcdecloop4x:
 254         subs            r4, r4, #4
 255         bmi             .Lcbcdec1x
 256         vld1.8          {q0-q1}, [r1]!
 257         vld1.8          {q2-q3}, [r1]!
 258         vmov            q4, q0
 259         vmov            q5, q1
 260         vmov            q6, q2
 261         vmov            q7, q3
 262         bl              aes_decrypt_4x
 263         veor            q0, q0, q15
 264         veor            q1, q1, q4
 265         veor            q2, q2, q5
 266         veor            q3, q3, q6
 267         vmov            q15, q7
 268         vst1.8          {q0-q1}, [r0]!
 269         vst1.8          {q2-q3}, [r0]!
 270         b               .Lcbcdecloop4x
 271 .Lcbcdec1x:
 272         adds            r4, r4, #4
 273         beq             .Lcbcdecout
 274         vmov            q6, q14                 @ preserve last round key
 275 .Lcbcdecloop:
 276         vld1.8          {q0}, [r1]!             @ get next ct block
 277         veor            q14, q15, q6            @ combine prev ct with last key
 278         vmov            q15, q0
 279         bl              aes_decrypt
 280         vst1.8          {q0}, [r0]!
 281         subs            r4, r4, #1
 282         bne             .Lcbcdecloop
 283 .Lcbcdecout:
 284         vst1.8          {q15}, [r5]             @ keep iv in q15
 285         pop             {r4-r6, pc}
 286 ENDPROC(ce_aes_cbc_decrypt)
 287 
 288 
 289         /*
 290          * ce_aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
 291          *                        int rounds, int bytes, u8 const iv[])
 292          * ce_aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
 293          *                        int rounds, int bytes, u8 const iv[])
 294          */
 295 
 296 ENTRY(ce_aes_cbc_cts_encrypt)
 297         push            {r4-r6, lr}
 298         ldrd            r4, r5, [sp, #16]
 299 
 300         movw            ip, :lower16:.Lcts_permute_table
 301         movt            ip, :upper16:.Lcts_permute_table
 302         sub             r4, r4, #16
 303         add             lr, ip, #32
 304         add             ip, ip, r4
 305         sub             lr, lr, r4
 306         vld1.8          {q5}, [ip]
 307         vld1.8          {q6}, [lr]
 308 
 309         add             ip, r1, r4
 310         vld1.8          {q0}, [r1]                      @ overlapping loads
 311         vld1.8          {q3}, [ip]
 312 
 313         vld1.8          {q1}, [r5]                      @ get iv
 314         prepare_key     r2, r3
 315 
 316         veor            q0, q0, q1                      @ xor with iv
 317         bl              aes_encrypt
 318 
 319         vtbl.8          d4, {d0-d1}, d10
 320         vtbl.8          d5, {d0-d1}, d11
 321         vtbl.8          d2, {d6-d7}, d12
 322         vtbl.8          d3, {d6-d7}, d13
 323 
 324         veor            q0, q0, q1
 325         bl              aes_encrypt
 326 
 327         add             r4, r0, r4
 328         vst1.8          {q2}, [r4]                      @ overlapping stores
 329         vst1.8          {q0}, [r0]
 330 
 331         pop             {r4-r6, pc}
 332 ENDPROC(ce_aes_cbc_cts_encrypt)
 333 
 334 ENTRY(ce_aes_cbc_cts_decrypt)
 335         push            {r4-r6, lr}
 336         ldrd            r4, r5, [sp, #16]
 337 
 338         movw            ip, :lower16:.Lcts_permute_table
 339         movt            ip, :upper16:.Lcts_permute_table
 340         sub             r4, r4, #16
 341         add             lr, ip, #32
 342         add             ip, ip, r4
 343         sub             lr, lr, r4
 344         vld1.8          {q5}, [ip]
 345         vld1.8          {q6}, [lr]
 346 
 347         add             ip, r1, r4
 348         vld1.8          {q0}, [r1]                      @ overlapping loads
 349         vld1.8          {q1}, [ip]
 350 
 351         vld1.8          {q3}, [r5]                      @ get iv
 352         prepare_key     r2, r3
 353 
 354         bl              aes_decrypt
 355 
 356         vtbl.8          d4, {d0-d1}, d10
 357         vtbl.8          d5, {d0-d1}, d11
 358         vtbx.8          d0, {d2-d3}, d12
 359         vtbx.8          d1, {d2-d3}, d13
 360 
 361         veor            q1, q1, q2
 362         bl              aes_decrypt
 363         veor            q0, q0, q3                      @ xor with iv
 364 
 365         add             r4, r0, r4
 366         vst1.8          {q1}, [r4]                      @ overlapping stores
 367         vst1.8          {q0}, [r0]
 368 
 369         pop             {r4-r6, pc}
 370 ENDPROC(ce_aes_cbc_cts_decrypt)
 371 
 372 
 373         /*
 374          * aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
 375          *                 int blocks, u8 ctr[])
 376          */
 377 ENTRY(ce_aes_ctr_encrypt)
 378         push            {r4-r6, lr}
 379         ldrd            r4, r5, [sp, #16]
 380         vld1.8          {q7}, [r5]              @ load ctr
 381         prepare_key     r2, r3
 382         vmov            r6, s31                 @ keep swabbed ctr in r6
 383         rev             r6, r6
 384         cmn             r6, r4                  @ 32 bit overflow?
 385         bcs             .Lctrloop
 386 .Lctrloop4x:
 387         subs            r4, r4, #4
 388         bmi             .Lctr1x
 389         add             r6, r6, #1
 390         vmov            q0, q7
 391         vmov            q1, q7
 392         rev             ip, r6
 393         add             r6, r6, #1
 394         vmov            q2, q7
 395         vmov            s7, ip
 396         rev             ip, r6
 397         add             r6, r6, #1
 398         vmov            q3, q7
 399         vmov            s11, ip
 400         rev             ip, r6
 401         add             r6, r6, #1
 402         vmov            s15, ip
 403         vld1.8          {q4-q5}, [r1]!
 404         vld1.8          {q6}, [r1]!
 405         vld1.8          {q15}, [r1]!
 406         bl              aes_encrypt_4x
 407         veor            q0, q0, q4
 408         veor            q1, q1, q5
 409         veor            q2, q2, q6
 410         veor            q3, q3, q15
 411         rev             ip, r6
 412         vst1.8          {q0-q1}, [r0]!
 413         vst1.8          {q2-q3}, [r0]!
 414         vmov            s31, ip
 415         b               .Lctrloop4x
 416 .Lctr1x:
 417         adds            r4, r4, #4
 418         beq             .Lctrout
 419 .Lctrloop:
 420         vmov            q0, q7
 421         bl              aes_encrypt
 422 
 423         adds            r6, r6, #1              @ increment BE ctr
 424         rev             ip, r6
 425         vmov            s31, ip
 426         bcs             .Lctrcarry
 427 
 428 .Lctrcarrydone:
 429         subs            r4, r4, #1
 430         bmi             .Lctrtailblock          @ blocks < 0 means tail block
 431         vld1.8          {q3}, [r1]!
 432         veor            q3, q0, q3
 433         vst1.8          {q3}, [r0]!
 434         bne             .Lctrloop
 435 
 436 .Lctrout:
 437         vst1.8          {q7}, [r5]              @ return next CTR value
 438         pop             {r4-r6, pc}
 439 
 440 .Lctrtailblock:
 441         vst1.8          {q0}, [r0, :64]         @ return the key stream
 442         b               .Lctrout
 443 
 444 .Lctrcarry:
 445         .irp            sreg, s30, s29, s28
 446         vmov            ip, \sreg               @ load next word of ctr
 447         rev             ip, ip                  @ ... to handle the carry
 448         adds            ip, ip, #1
 449         rev             ip, ip
 450         vmov            \sreg, ip
 451         bcc             .Lctrcarrydone
 452         .endr
 453         b               .Lctrcarrydone
 454 ENDPROC(ce_aes_ctr_encrypt)
 455 
 456         /*
 457          * aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
 458          *                 int bytes, u8 iv[], u32 const rk2[], int first)
 459          * aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
 460          *                 int bytes, u8 iv[], u32 const rk2[], int first)
 461          */
 462 
 463         .macro          next_tweak, out, in, const, tmp
 464         vshr.s64        \tmp, \in, #63
 465         vand            \tmp, \tmp, \const
 466         vadd.u64        \out, \in, \in
 467         vext.8          \tmp, \tmp, \tmp, #8
 468         veor            \out, \out, \tmp
 469         .endm
 470 
 471 ce_aes_xts_init:
 472         vmov.i32        d30, #0x87              @ compose tweak mask vector
 473         vmovl.u32       q15, d30
 474         vshr.u64        d30, d31, #7
 475 
 476         ldrd            r4, r5, [sp, #16]       @ load args
 477         ldr             r6, [sp, #28]
 478         vld1.8          {q0}, [r5]              @ load iv
 479         teq             r6, #1                  @ start of a block?
 480         bxne            lr
 481 
 482         @ Encrypt the IV in q0 with the second AES key. This should only
 483         @ be done at the start of a block.
 484         ldr             r6, [sp, #24]           @ load AES key 2
 485         prepare_key     r6, r3
 486         add             ip, r6, #32             @ 3rd round key of key 2
 487         b               .Laes_encrypt_tweak     @ tail call
 488 ENDPROC(ce_aes_xts_init)
 489 
 490 ENTRY(ce_aes_xts_encrypt)
 491         push            {r4-r6, lr}
 492 
 493         bl              ce_aes_xts_init         @ run shared prologue
 494         prepare_key     r2, r3
 495         vmov            q4, q0
 496 
 497         teq             r6, #0                  @ start of a block?
 498         bne             .Lxtsenc4x
 499 
 500 .Lxtsencloop4x:
 501         next_tweak      q4, q4, q15, q10
 502 .Lxtsenc4x:
 503         subs            r4, r4, #64
 504         bmi             .Lxtsenc1x
 505         vld1.8          {q0-q1}, [r1]!          @ get 4 pt blocks
 506         vld1.8          {q2-q3}, [r1]!
 507         next_tweak      q5, q4, q15, q10
 508         veor            q0, q0, q4
 509         next_tweak      q6, q5, q15, q10
 510         veor            q1, q1, q5
 511         next_tweak      q7, q6, q15, q10
 512         veor            q2, q2, q6
 513         veor            q3, q3, q7
 514         bl              aes_encrypt_4x
 515         veor            q0, q0, q4
 516         veor            q1, q1, q5
 517         veor            q2, q2, q6
 518         veor            q3, q3, q7
 519         vst1.8          {q0-q1}, [r0]!          @ write 4 ct blocks
 520         vst1.8          {q2-q3}, [r0]!
 521         vmov            q4, q7
 522         teq             r4, #0
 523         beq             .Lxtsencret
 524         b               .Lxtsencloop4x
 525 .Lxtsenc1x:
 526         adds            r4, r4, #64
 527         beq             .Lxtsencout
 528         subs            r4, r4, #16
 529         bmi             .LxtsencctsNx
 530 .Lxtsencloop:
 531         vld1.8          {q0}, [r1]!
 532 .Lxtsencctsout:
 533         veor            q0, q0, q4
 534         bl              aes_encrypt
 535         veor            q0, q0, q4
 536         teq             r4, #0
 537         beq             .Lxtsencout
 538         subs            r4, r4, #16
 539         next_tweak      q4, q4, q15, q6
 540         bmi             .Lxtsenccts
 541         vst1.8          {q0}, [r0]!
 542         b               .Lxtsencloop
 543 .Lxtsencout:
 544         vst1.8          {q0}, [r0]
 545 .Lxtsencret:
 546         vst1.8          {q4}, [r5]
 547         pop             {r4-r6, pc}
 548 
 549 .LxtsencctsNx:
 550         vmov            q0, q3
 551         sub             r0, r0, #16
 552 .Lxtsenccts:
 553         movw            ip, :lower16:.Lcts_permute_table
 554         movt            ip, :upper16:.Lcts_permute_table
 555 
 556         add             r1, r1, r4              @ rewind input pointer
 557         add             r4, r4, #16             @ # bytes in final block
 558         add             lr, ip, #32
 559         add             ip, ip, r4
 560         sub             lr, lr, r4
 561         add             r4, r0, r4              @ output address of final block
 562 
 563         vld1.8          {q1}, [r1]              @ load final partial block
 564         vld1.8          {q2}, [ip]
 565         vld1.8          {q3}, [lr]
 566 
 567         vtbl.8          d4, {d0-d1}, d4
 568         vtbl.8          d5, {d0-d1}, d5
 569         vtbx.8          d0, {d2-d3}, d6
 570         vtbx.8          d1, {d2-d3}, d7
 571 
 572         vst1.8          {q2}, [r4]              @ overlapping stores
 573         mov             r4, #0
 574         b               .Lxtsencctsout
 575 ENDPROC(ce_aes_xts_encrypt)
 576 
 577 
 578 ENTRY(ce_aes_xts_decrypt)
 579         push            {r4-r6, lr}
 580 
 581         bl              ce_aes_xts_init         @ run shared prologue
 582         prepare_key     r2, r3
 583         vmov            q4, q0
 584 
 585         /* subtract 16 bytes if we are doing CTS */
 586         tst             r4, #0xf
 587         subne           r4, r4, #0x10
 588 
 589         teq             r6, #0                  @ start of a block?
 590         bne             .Lxtsdec4x
 591 
 592 .Lxtsdecloop4x:
 593         next_tweak      q4, q4, q15, q10
 594 .Lxtsdec4x:
 595         subs            r4, r4, #64
 596         bmi             .Lxtsdec1x
 597         vld1.8          {q0-q1}, [r1]!          @ get 4 ct blocks
 598         vld1.8          {q2-q3}, [r1]!
 599         next_tweak      q5, q4, q15, q10
 600         veor            q0, q0, q4
 601         next_tweak      q6, q5, q15, q10
 602         veor            q1, q1, q5
 603         next_tweak      q7, q6, q15, q10
 604         veor            q2, q2, q6
 605         veor            q3, q3, q7
 606         bl              aes_decrypt_4x
 607         veor            q0, q0, q4
 608         veor            q1, q1, q5
 609         veor            q2, q2, q6
 610         veor            q3, q3, q7
 611         vst1.8          {q0-q1}, [r0]!          @ write 4 pt blocks
 612         vst1.8          {q2-q3}, [r0]!
 613         vmov            q4, q7
 614         teq             r4, #0
 615         beq             .Lxtsdecout
 616         b               .Lxtsdecloop4x
 617 .Lxtsdec1x:
 618         adds            r4, r4, #64
 619         beq             .Lxtsdecout
 620         subs            r4, r4, #16
 621 .Lxtsdecloop:
 622         vld1.8          {q0}, [r1]!
 623         bmi             .Lxtsdeccts
 624 .Lxtsdecctsout:
 625         veor            q0, q0, q4
 626         bl              aes_decrypt
 627         veor            q0, q0, q4
 628         vst1.8          {q0}, [r0]!
 629         teq             r4, #0
 630         beq             .Lxtsdecout
 631         subs            r4, r4, #16
 632         next_tweak      q4, q4, q15, q6
 633         b               .Lxtsdecloop
 634 .Lxtsdecout:
 635         vst1.8          {q4}, [r5]
 636         pop             {r4-r6, pc}
 637 
 638 .Lxtsdeccts:
 639         movw            ip, :lower16:.Lcts_permute_table
 640         movt            ip, :upper16:.Lcts_permute_table
 641 
 642         add             r1, r1, r4              @ rewind input pointer
 643         add             r4, r4, #16             @ # bytes in final block
 644         add             lr, ip, #32
 645         add             ip, ip, r4
 646         sub             lr, lr, r4
 647         add             r4, r0, r4              @ output address of final block
 648 
 649         next_tweak      q5, q4, q15, q6
 650 
 651         vld1.8          {q1}, [r1]              @ load final partial block
 652         vld1.8          {q2}, [ip]
 653         vld1.8          {q3}, [lr]
 654 
 655         veor            q0, q0, q5
 656         bl              aes_decrypt
 657         veor            q0, q0, q5
 658 
 659         vtbl.8          d4, {d0-d1}, d4
 660         vtbl.8          d5, {d0-d1}, d5
 661         vtbx.8          d0, {d2-d3}, d6
 662         vtbx.8          d1, {d2-d3}, d7
 663 
 664         vst1.8          {q2}, [r4]              @ overlapping stores
 665         mov             r4, #0
 666         b               .Lxtsdecctsout
 667 ENDPROC(ce_aes_xts_decrypt)
 668 
 669         /*
 670          * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
 671          *                             AES sbox substitution on each byte in
 672          *                             'input'
 673          */
 674 ENTRY(ce_aes_sub)
 675         vdup.32         q1, r0
 676         veor            q0, q0, q0
 677         aese.8          q0, q1
 678         vmov            r0, s0
 679         bx              lr
 680 ENDPROC(ce_aes_sub)
 681 
 682         /*
 683          * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns
 684          *                                        operation on round key *src
 685          */
 686 ENTRY(ce_aes_invert)
 687         vld1.32         {q0}, [r1]
 688         aesimc.8        q0, q0
 689         vst1.32         {q0}, [r0]
 690         bx              lr
 691 ENDPROC(ce_aes_invert)
 692 
 693         .section        ".rodata", "a"
 694         .align          6
 695 .Lcts_permute_table:
 696         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 697         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 698         .byte            0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
 699         .byte            0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf
 700         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 701         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff

/* [<][>][^][v][top][bottom][index][help] */