1
2
3
4
5
6
7
8 #include <linux/linkage.h>
9 #include <asm/assembler.h>
10
11 .irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
12 .set .Lv\b\().4s, \b
13 .endr
14
15 .macro sm3partw1, rd, rn, rm
16 .inst 0xce60c000 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
17 .endm
18
19 .macro sm3partw2, rd, rn, rm
20 .inst 0xce60c400 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
21 .endm
22
23 .macro sm3ss1, rd, rn, rm, ra
24 .inst 0xce400000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
25 .endm
26
27 .macro sm3tt1a, rd, rn, rm, imm2
28 .inst 0xce408000 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
29 .endm
30
31 .macro sm3tt1b, rd, rn, rm, imm2
32 .inst 0xce408400 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
33 .endm
34
35 .macro sm3tt2a, rd, rn, rm, imm2
36 .inst 0xce408800 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
37 .endm
38
39 .macro sm3tt2b, rd, rn, rm, imm2
40 .inst 0xce408c00 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
41 .endm
42
43 .macro round, ab, s0, t0, t1, i
44 sm3ss1 v5.4s, v8.4s, \t0\().4s, v9.4s
45 shl \t1\().4s, \t0\().4s, #1
46 sri \t1\().4s, \t0\().4s, #31
47 sm3tt1\ab v8.4s, v5.4s, v10.4s, \i
48 sm3tt2\ab v9.4s, v5.4s, \s0\().4s, \i
49 .endm
50
51 .macro qround, ab, s0, s1, s2, s3, s4
52 .ifnb \s4
53 ext \s4\().16b, \s1\().16b, \s2\().16b, #12
54 ext v6.16b, \s0\().16b, \s1\().16b, #12
55 ext v7.16b, \s2\().16b, \s3\().16b, #8
56 sm3partw1 \s4\().4s, \s0\().4s, \s3\().4s
57 .endif
58
59 eor v10.16b, \s0\().16b, \s1\().16b
60
61 round \ab, \s0, v11, v12, 0
62 round \ab, \s0, v12, v11, 1
63 round \ab, \s0, v11, v12, 2
64 round \ab, \s0, v12, v11, 3
65
66 .ifnb \s4
67 sm3partw2 \s4\().4s, v7.4s, v6.4s
68 .endif
69 .endm
70
71
72
73
74
75 .text
76 ENTRY(sm3_ce_transform)
77
78 ld1 {v8.4s-v9.4s}, [x0]
79 rev64 v8.4s, v8.4s
80 rev64 v9.4s, v9.4s
81 ext v8.16b, v8.16b, v8.16b, #8
82 ext v9.16b, v9.16b, v9.16b, #8
83
84 adr_l x8, .Lt
85 ldp s13, s14, [x8]
86
87
88 0: ld1 {v0.16b-v3.16b}, [x1], #64
89 sub w2, w2, #1
90
91 mov v15.16b, v8.16b
92 mov v16.16b, v9.16b
93
94 CPU_LE( rev32 v0.16b, v0.16b )
95 CPU_LE( rev32 v1.16b, v1.16b )
96 CPU_LE( rev32 v2.16b, v2.16b )
97 CPU_LE( rev32 v3.16b, v3.16b )
98
99 ext v11.16b, v13.16b, v13.16b, #4
100
101 qround a, v0, v1, v2, v3, v4
102 qround a, v1, v2, v3, v4, v0
103 qround a, v2, v3, v4, v0, v1
104 qround a, v3, v4, v0, v1, v2
105
106 ext v11.16b, v14.16b, v14.16b, #4
107
108 qround b, v4, v0, v1, v2, v3
109 qround b, v0, v1, v2, v3, v4
110 qround b, v1, v2, v3, v4, v0
111 qround b, v2, v3, v4, v0, v1
112 qround b, v3, v4, v0, v1, v2
113 qround b, v4, v0, v1, v2, v3
114 qround b, v0, v1, v2, v3, v4
115 qround b, v1, v2, v3, v4, v0
116 qround b, v2, v3, v4, v0, v1
117 qround b, v3, v4
118 qround b, v4, v0
119 qround b, v0, v1
120
121 eor v8.16b, v8.16b, v15.16b
122 eor v9.16b, v9.16b, v16.16b
123
124
125 cbnz w2, 0b
126
127
128 rev64 v8.4s, v8.4s
129 rev64 v9.4s, v9.4s
130 ext v8.16b, v8.16b, v8.16b, #8
131 ext v9.16b, v9.16b, v9.16b, #8
132 st1 {v8.4s-v9.4s}, [x0]
133 ret
134 ENDPROC(sm3_ce_transform)
135
136 .section ".rodata", "a"
137 .align 3
138 .Lt: .word 0x79cc4519, 0x9d8a7a87