root/arch/arm64/crypto/sha3-ce-core.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0 */
   2 /*
   3  * sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions
   4  *
   5  * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
   6  *
   7  * This program is free software; you can redistribute it and/or modify
   8  * it under the terms of the GNU General Public License version 2 as
   9  * published by the Free Software Foundation.
  10  */
  11 
  12 #include <linux/linkage.h>
  13 #include <asm/assembler.h>
  14 
  15         .irp    b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
  16         .set    .Lv\b\().2d, \b
  17         .set    .Lv\b\().16b, \b
  18         .endr
  19 
  20         /*
  21          * ARMv8.2 Crypto Extensions instructions
  22          */
  23         .macro  eor3, rd, rn, rm, ra
  24         .inst   0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
  25         .endm
  26 
  27         .macro  rax1, rd, rn, rm
  28         .inst   0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
  29         .endm
  30 
  31         .macro  bcax, rd, rn, rm, ra
  32         .inst   0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
  33         .endm
  34 
  35         .macro  xar, rd, rn, rm, imm6
  36         .inst   0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16)
  37         .endm
  38 
  39         /*
  40          * sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size)
  41          */
  42         .text
  43 ENTRY(sha3_ce_transform)
  44         frame_push      4
  45 
  46         mov     x19, x0
  47         mov     x20, x1
  48         mov     x21, x2
  49         mov     x22, x3
  50 
  51 0:      /* load state */
  52         add     x8, x19, #32
  53         ld1     { v0.1d- v3.1d}, [x19]
  54         ld1     { v4.1d- v7.1d}, [x8], #32
  55         ld1     { v8.1d-v11.1d}, [x8], #32
  56         ld1     {v12.1d-v15.1d}, [x8], #32
  57         ld1     {v16.1d-v19.1d}, [x8], #32
  58         ld1     {v20.1d-v23.1d}, [x8], #32
  59         ld1     {v24.1d}, [x8]
  60 
  61 1:      sub     w21, w21, #1
  62         mov     w8, #24
  63         adr_l   x9, .Lsha3_rcon
  64 
  65         /* load input */
  66         ld1     {v25.8b-v28.8b}, [x20], #32
  67         ld1     {v29.8b-v31.8b}, [x20], #24
  68         eor     v0.8b, v0.8b, v25.8b
  69         eor     v1.8b, v1.8b, v26.8b
  70         eor     v2.8b, v2.8b, v27.8b
  71         eor     v3.8b, v3.8b, v28.8b
  72         eor     v4.8b, v4.8b, v29.8b
  73         eor     v5.8b, v5.8b, v30.8b
  74         eor     v6.8b, v6.8b, v31.8b
  75 
  76         tbnz    x22, #6, 3f             // SHA3-512
  77 
  78         ld1     {v25.8b-v28.8b}, [x20], #32
  79         ld1     {v29.8b-v30.8b}, [x20], #16
  80         eor      v7.8b,  v7.8b, v25.8b
  81         eor      v8.8b,  v8.8b, v26.8b
  82         eor      v9.8b,  v9.8b, v27.8b
  83         eor     v10.8b, v10.8b, v28.8b
  84         eor     v11.8b, v11.8b, v29.8b
  85         eor     v12.8b, v12.8b, v30.8b
  86 
  87         tbnz    x22, #4, 2f             // SHA3-384 or SHA3-224
  88 
  89         // SHA3-256
  90         ld1     {v25.8b-v28.8b}, [x20], #32
  91         eor     v13.8b, v13.8b, v25.8b
  92         eor     v14.8b, v14.8b, v26.8b
  93         eor     v15.8b, v15.8b, v27.8b
  94         eor     v16.8b, v16.8b, v28.8b
  95         b       4f
  96 
  97 2:      tbz     x22, #2, 4f             // bit 2 cleared? SHA-384
  98 
  99         // SHA3-224
 100         ld1     {v25.8b-v28.8b}, [x20], #32
 101         ld1     {v29.8b}, [x20], #8
 102         eor     v13.8b, v13.8b, v25.8b
 103         eor     v14.8b, v14.8b, v26.8b
 104         eor     v15.8b, v15.8b, v27.8b
 105         eor     v16.8b, v16.8b, v28.8b
 106         eor     v17.8b, v17.8b, v29.8b
 107         b       4f
 108 
 109         // SHA3-512
 110 3:      ld1     {v25.8b-v26.8b}, [x20], #16
 111         eor      v7.8b,  v7.8b, v25.8b
 112         eor      v8.8b,  v8.8b, v26.8b
 113 
 114 4:      sub     w8, w8, #1
 115 
 116         eor3    v29.16b,  v4.16b,  v9.16b, v14.16b
 117         eor3    v26.16b,  v1.16b,  v6.16b, v11.16b
 118         eor3    v28.16b,  v3.16b,  v8.16b, v13.16b
 119         eor3    v25.16b,  v0.16b,  v5.16b, v10.16b
 120         eor3    v27.16b,  v2.16b,  v7.16b, v12.16b
 121         eor3    v29.16b, v29.16b, v19.16b, v24.16b
 122         eor3    v26.16b, v26.16b, v16.16b, v21.16b
 123         eor3    v28.16b, v28.16b, v18.16b, v23.16b
 124         eor3    v25.16b, v25.16b, v15.16b, v20.16b
 125         eor3    v27.16b, v27.16b, v17.16b, v22.16b
 126 
 127         rax1    v30.2d, v29.2d, v26.2d  // bc[0]
 128         rax1    v26.2d, v26.2d, v28.2d  // bc[2]
 129         rax1    v28.2d, v28.2d, v25.2d  // bc[4]
 130         rax1    v25.2d, v25.2d, v27.2d  // bc[1]
 131         rax1    v27.2d, v27.2d, v29.2d  // bc[3]
 132 
 133         eor      v0.16b,  v0.16b, v30.16b
 134         xar      v29.2d,   v1.2d,  v25.2d, (64 - 1)
 135         xar       v1.2d,   v6.2d,  v25.2d, (64 - 44)
 136         xar       v6.2d,   v9.2d,  v28.2d, (64 - 20)
 137         xar       v9.2d,  v22.2d,  v26.2d, (64 - 61)
 138         xar      v22.2d,  v14.2d,  v28.2d, (64 - 39)
 139         xar      v14.2d,  v20.2d,  v30.2d, (64 - 18)
 140         xar      v31.2d,   v2.2d,  v26.2d, (64 - 62)
 141         xar       v2.2d,  v12.2d,  v26.2d, (64 - 43)
 142         xar      v12.2d,  v13.2d,  v27.2d, (64 - 25)
 143         xar      v13.2d,  v19.2d,  v28.2d, (64 - 8)
 144         xar      v19.2d,  v23.2d,  v27.2d, (64 - 56)
 145         xar      v23.2d,  v15.2d,  v30.2d, (64 - 41)
 146         xar      v15.2d,   v4.2d,  v28.2d, (64 - 27)
 147         xar      v28.2d,  v24.2d,  v28.2d, (64 - 14)
 148         xar      v24.2d,  v21.2d,  v25.2d, (64 - 2)
 149         xar       v8.2d,   v8.2d,  v27.2d, (64 - 55)
 150         xar       v4.2d,  v16.2d,  v25.2d, (64 - 45)
 151         xar      v16.2d,   v5.2d,  v30.2d, (64 - 36)
 152         xar       v5.2d,   v3.2d,  v27.2d, (64 - 28)
 153         xar      v27.2d,  v18.2d,  v27.2d, (64 - 21)
 154         xar       v3.2d,  v17.2d,  v26.2d, (64 - 15)
 155         xar      v25.2d,  v11.2d,  v25.2d, (64 - 10)
 156         xar      v26.2d,   v7.2d,  v26.2d, (64 - 6)
 157         xar      v30.2d,  v10.2d,  v30.2d, (64 - 3)
 158 
 159         bcax    v20.16b, v31.16b, v22.16b,  v8.16b
 160         bcax    v21.16b,  v8.16b, v23.16b, v22.16b
 161         bcax    v22.16b, v22.16b, v24.16b, v23.16b
 162         bcax    v23.16b, v23.16b, v31.16b, v24.16b
 163         bcax    v24.16b, v24.16b,  v8.16b, v31.16b
 164 
 165         ld1r    {v31.2d}, [x9], #8
 166 
 167         bcax    v17.16b, v25.16b, v19.16b,  v3.16b
 168         bcax    v18.16b,  v3.16b, v15.16b, v19.16b
 169         bcax    v19.16b, v19.16b, v16.16b, v15.16b
 170         bcax    v15.16b, v15.16b, v25.16b, v16.16b
 171         bcax    v16.16b, v16.16b,  v3.16b, v25.16b
 172 
 173         bcax    v10.16b, v29.16b, v12.16b, v26.16b
 174         bcax    v11.16b, v26.16b, v13.16b, v12.16b
 175         bcax    v12.16b, v12.16b, v14.16b, v13.16b
 176         bcax    v13.16b, v13.16b, v29.16b, v14.16b
 177         bcax    v14.16b, v14.16b, v26.16b, v29.16b
 178 
 179         bcax     v7.16b, v30.16b,  v9.16b,  v4.16b
 180         bcax     v8.16b,  v4.16b,  v5.16b,  v9.16b
 181         bcax     v9.16b,  v9.16b,  v6.16b,  v5.16b
 182         bcax     v5.16b,  v5.16b, v30.16b,  v6.16b
 183         bcax     v6.16b,  v6.16b,  v4.16b, v30.16b
 184 
 185         bcax     v3.16b, v27.16b,  v0.16b, v28.16b
 186         bcax     v4.16b, v28.16b,  v1.16b,  v0.16b
 187         bcax     v0.16b,  v0.16b,  v2.16b,  v1.16b
 188         bcax     v1.16b,  v1.16b, v27.16b,  v2.16b
 189         bcax     v2.16b,  v2.16b, v28.16b, v27.16b
 190 
 191         eor      v0.16b,  v0.16b, v31.16b
 192 
 193         cbnz    w8, 4b
 194         cbz     w21, 5f
 195 
 196         if_will_cond_yield_neon
 197         add     x8, x19, #32
 198         st1     { v0.1d- v3.1d}, [x19]
 199         st1     { v4.1d- v7.1d}, [x8], #32
 200         st1     { v8.1d-v11.1d}, [x8], #32
 201         st1     {v12.1d-v15.1d}, [x8], #32
 202         st1     {v16.1d-v19.1d}, [x8], #32
 203         st1     {v20.1d-v23.1d}, [x8], #32
 204         st1     {v24.1d}, [x8]
 205         do_cond_yield_neon
 206         b               0b
 207         endif_yield_neon
 208 
 209         b       1b
 210 
 211         /* save state */
 212 5:      st1     { v0.1d- v3.1d}, [x19], #32
 213         st1     { v4.1d- v7.1d}, [x19], #32
 214         st1     { v8.1d-v11.1d}, [x19], #32
 215         st1     {v12.1d-v15.1d}, [x19], #32
 216         st1     {v16.1d-v19.1d}, [x19], #32
 217         st1     {v20.1d-v23.1d}, [x19], #32
 218         st1     {v24.1d}, [x19]
 219         frame_pop
 220         ret
 221 ENDPROC(sha3_ce_transform)
 222 
 223         .section        ".rodata", "a"
 224         .align          8
 225 .Lsha3_rcon:
 226         .quad   0x0000000000000001, 0x0000000000008082, 0x800000000000808a
 227         .quad   0x8000000080008000, 0x000000000000808b, 0x0000000080000001
 228         .quad   0x8000000080008081, 0x8000000000008009, 0x000000000000008a
 229         .quad   0x0000000000000088, 0x0000000080008009, 0x000000008000000a
 230         .quad   0x000000008000808b, 0x800000000000008b, 0x8000000000008089
 231         .quad   0x8000000000008003, 0x8000000000008002, 0x8000000000000080
 232         .quad   0x000000000000800a, 0x800000008000000a, 0x8000000080008081
 233         .quad   0x8000000000008080, 0x0000000080000001, 0x8000000080008008

/* [<][>][^][v][top][bottom][index][help] */