root/arch/arm64/crypto/sm3-ce-core.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0-only */
   2 /*
   3  * sm3-ce-core.S - SM3 secure hash using ARMv8.2 Crypto Extensions
   4  *
   5  * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
   6  */
   7 
   8 #include <linux/linkage.h>
   9 #include <asm/assembler.h>
  10 
  11         .irp            b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
  12         .set            .Lv\b\().4s, \b
  13         .endr
  14 
  15         .macro          sm3partw1, rd, rn, rm
  16         .inst           0xce60c000 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
  17         .endm
  18 
  19         .macro          sm3partw2, rd, rn, rm
  20         .inst           0xce60c400 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
  21         .endm
  22 
  23         .macro          sm3ss1, rd, rn, rm, ra
  24         .inst           0xce400000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
  25         .endm
  26 
  27         .macro          sm3tt1a, rd, rn, rm, imm2
  28         .inst           0xce408000 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
  29         .endm
  30 
  31         .macro          sm3tt1b, rd, rn, rm, imm2
  32         .inst           0xce408400 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
  33         .endm
  34 
  35         .macro          sm3tt2a, rd, rn, rm, imm2
  36         .inst           0xce408800 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
  37         .endm
  38 
  39         .macro          sm3tt2b, rd, rn, rm, imm2
  40         .inst           0xce408c00 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
  41         .endm
  42 
  43         .macro          round, ab, s0, t0, t1, i
  44         sm3ss1          v5.4s, v8.4s, \t0\().4s, v9.4s
  45         shl             \t1\().4s, \t0\().4s, #1
  46         sri             \t1\().4s, \t0\().4s, #31
  47         sm3tt1\ab       v8.4s, v5.4s, v10.4s, \i
  48         sm3tt2\ab       v9.4s, v5.4s, \s0\().4s, \i
  49         .endm
  50 
  51         .macro          qround, ab, s0, s1, s2, s3, s4
  52         .ifnb           \s4
  53         ext             \s4\().16b, \s1\().16b, \s2\().16b, #12
  54         ext             v6.16b, \s0\().16b, \s1\().16b, #12
  55         ext             v7.16b, \s2\().16b, \s3\().16b, #8
  56         sm3partw1       \s4\().4s, \s0\().4s, \s3\().4s
  57         .endif
  58 
  59         eor             v10.16b, \s0\().16b, \s1\().16b
  60 
  61         round           \ab, \s0, v11, v12, 0
  62         round           \ab, \s0, v12, v11, 1
  63         round           \ab, \s0, v11, v12, 2
  64         round           \ab, \s0, v12, v11, 3
  65 
  66         .ifnb           \s4
  67         sm3partw2       \s4\().4s, v7.4s, v6.4s
  68         .endif
  69         .endm
  70 
  71         /*
  72          * void sm3_ce_transform(struct sm3_state *sst, u8 const *src,
  73          *                       int blocks)
  74          */
  75         .text
  76 ENTRY(sm3_ce_transform)
  77         /* load state */
  78         ld1             {v8.4s-v9.4s}, [x0]
  79         rev64           v8.4s, v8.4s
  80         rev64           v9.4s, v9.4s
  81         ext             v8.16b, v8.16b, v8.16b, #8
  82         ext             v9.16b, v9.16b, v9.16b, #8
  83 
  84         adr_l           x8, .Lt
  85         ldp             s13, s14, [x8]
  86 
  87         /* load input */
  88 0:      ld1             {v0.16b-v3.16b}, [x1], #64
  89         sub             w2, w2, #1
  90 
  91         mov             v15.16b, v8.16b
  92         mov             v16.16b, v9.16b
  93 
  94 CPU_LE( rev32           v0.16b, v0.16b          )
  95 CPU_LE( rev32           v1.16b, v1.16b          )
  96 CPU_LE( rev32           v2.16b, v2.16b          )
  97 CPU_LE( rev32           v3.16b, v3.16b          )
  98 
  99         ext             v11.16b, v13.16b, v13.16b, #4
 100 
 101         qround          a, v0, v1, v2, v3, v4
 102         qround          a, v1, v2, v3, v4, v0
 103         qround          a, v2, v3, v4, v0, v1
 104         qround          a, v3, v4, v0, v1, v2
 105 
 106         ext             v11.16b, v14.16b, v14.16b, #4
 107 
 108         qround          b, v4, v0, v1, v2, v3
 109         qround          b, v0, v1, v2, v3, v4
 110         qround          b, v1, v2, v3, v4, v0
 111         qround          b, v2, v3, v4, v0, v1
 112         qround          b, v3, v4, v0, v1, v2
 113         qround          b, v4, v0, v1, v2, v3
 114         qround          b, v0, v1, v2, v3, v4
 115         qround          b, v1, v2, v3, v4, v0
 116         qround          b, v2, v3, v4, v0, v1
 117         qround          b, v3, v4
 118         qround          b, v4, v0
 119         qround          b, v0, v1
 120 
 121         eor             v8.16b, v8.16b, v15.16b
 122         eor             v9.16b, v9.16b, v16.16b
 123 
 124         /* handled all input blocks? */
 125         cbnz            w2, 0b
 126 
 127         /* save state */
 128         rev64           v8.4s, v8.4s
 129         rev64           v9.4s, v9.4s
 130         ext             v8.16b, v8.16b, v8.16b, #8
 131         ext             v9.16b, v9.16b, v9.16b, #8
 132         st1             {v8.4s-v9.4s}, [x0]
 133         ret
 134 ENDPROC(sm3_ce_transform)
 135 
 136         .section        ".rodata", "a"
 137         .align          3
 138 .Lt:    .word           0x79cc4519, 0x9d8a7a87

/* [<][>][^][v][top][bottom][index][help] */