1
2
3
4
5
6
7
8 #include <linux/linkage.h>
9 #include <asm/assembler.h>
10
11 #define AES_ENTRY(func) ENTRY(neon_ ## func)
12 #define AES_ENDPROC(func) ENDPROC(neon_ ## func)
13
14 xtsmask .req v7
15 cbciv .req v7
16 vctr .req v4
17
18 .macro xts_reload_mask, tmp
19 xts_load_mask \tmp
20 .endm
21
22
23 .macro xts_cts_skip_tw, reg, lbl
24 tbnz \reg, #1, \lbl
25 .endm
26
27
28 .macro mul_by_x, out, in, temp, const
29 sshr \temp, \in, #7
30 shl \out, \in, #1
31 and \temp, \temp, \const
32 eor \out, \out, \temp
33 .endm
34
35
36 .macro mul_by_x2, out, in, temp, const
37 ushr \temp, \in, #6
38 shl \out, \in, #2
39 pmul \temp, \temp, \const
40 eor \out, \out, \temp
41 .endm
42
43
44 .macro prepare, sbox, shiftrows, temp
45 movi v12.16b, #0x1b
46 ldr_l q13, \shiftrows, \temp
47 ldr_l q14, .Lror32by8, \temp
48 adr_l \temp, \sbox
49 ld1 {v16.16b-v19.16b}, [\temp], #64
50 ld1 {v20.16b-v23.16b}, [\temp], #64
51 ld1 {v24.16b-v27.16b}, [\temp], #64
52 ld1 {v28.16b-v31.16b}, [\temp]
53 .endm
54
55
56 .macro enc_prepare, ignore0, ignore1, temp
57 prepare crypto_aes_sbox, .LForward_ShiftRows, \temp
58 .endm
59
60 .macro enc_switch_key, ignore0, ignore1, temp
61
62 .endm
63
64
65 .macro dec_prepare, ignore0, ignore1, temp
66 prepare crypto_aes_inv_sbox, .LReverse_ShiftRows, \temp
67 .endm
68
69
70 .macro sub_bytes, in
71 sub v9.16b, \in\().16b, v15.16b
72 tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b
73 sub v10.16b, v9.16b, v15.16b
74 tbx \in\().16b, {v20.16b-v23.16b}, v9.16b
75 sub v11.16b, v10.16b, v15.16b
76 tbx \in\().16b, {v24.16b-v27.16b}, v10.16b
77 tbx \in\().16b, {v28.16b-v31.16b}, v11.16b
78 .endm
79
80
81 .macro mix_columns, in, enc
82 .if \enc == 0
83
84 mul_by_x2 v8.16b, \in\().16b, v9.16b, v12.16b
85 eor \in\().16b, \in\().16b, v8.16b
86 rev32 v8.8h, v8.8h
87 eor \in\().16b, \in\().16b, v8.16b
88 .endif
89
90 mul_by_x v9.16b, \in\().16b, v8.16b, v12.16b
91 rev32 v8.8h, \in\().8h
92 eor v8.16b, v8.16b, v9.16b
93 eor \in\().16b, \in\().16b, v8.16b
94 tbl \in\().16b, {\in\().16b}, v14.16b
95 eor \in\().16b, \in\().16b, v8.16b
96 .endm
97
98 .macro do_block, enc, in, rounds, rk, rkp, i
99 ld1 {v15.4s}, [\rk]
100 add \rkp, \rk, #16
101 mov \i, \rounds
102 1111: eor \in\().16b, \in\().16b, v15.16b
103 movi v15.16b, #0x40
104 tbl \in\().16b, {\in\().16b}, v13.16b
105 sub_bytes \in
106 subs \i, \i, #1
107 ld1 {v15.4s}, [\rkp], #16
108 beq 2222f
109 mix_columns \in, \enc
110 b 1111b
111 2222: eor \in\().16b, \in\().16b, v15.16b
112 .endm
113
114 .macro encrypt_block, in, rounds, rk, rkp, i
115 do_block 1, \in, \rounds, \rk, \rkp, \i
116 .endm
117
118 .macro decrypt_block, in, rounds, rk, rkp, i
119 do_block 0, \in, \rounds, \rk, \rkp, \i
120 .endm
121
122
123
124
125
126
127 .macro sub_bytes_4x, in0, in1, in2, in3
128 sub v8.16b, \in0\().16b, v15.16b
129 tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
130 sub v9.16b, \in1\().16b, v15.16b
131 tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
132 sub v10.16b, \in2\().16b, v15.16b
133 tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
134 sub v11.16b, \in3\().16b, v15.16b
135 tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
136 tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
137 tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
138 sub v8.16b, v8.16b, v15.16b
139 tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b
140 sub v9.16b, v9.16b, v15.16b
141 tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b
142 sub v10.16b, v10.16b, v15.16b
143 tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b
144 sub v11.16b, v11.16b, v15.16b
145 tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b
146 sub v8.16b, v8.16b, v15.16b
147 tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b
148 sub v9.16b, v9.16b, v15.16b
149 tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b
150 sub v10.16b, v10.16b, v15.16b
151 tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
152 sub v11.16b, v11.16b, v15.16b
153 tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
154 tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b
155 tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b
156 .endm
157
158 .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
159 sshr \tmp0\().16b, \in0\().16b, #7
160 shl \out0\().16b, \in0\().16b, #1
161 sshr \tmp1\().16b, \in1\().16b, #7
162 and \tmp0\().16b, \tmp0\().16b, \const\().16b
163 shl \out1\().16b, \in1\().16b, #1
164 and \tmp1\().16b, \tmp1\().16b, \const\().16b
165 eor \out0\().16b, \out0\().16b, \tmp0\().16b
166 eor \out1\().16b, \out1\().16b, \tmp1\().16b
167 .endm
168
169 .macro mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
170 ushr \tmp0\().16b, \in0\().16b, #6
171 shl \out0\().16b, \in0\().16b, #2
172 ushr \tmp1\().16b, \in1\().16b, #6
173 pmul \tmp0\().16b, \tmp0\().16b, \const\().16b
174 shl \out1\().16b, \in1\().16b, #2
175 pmul \tmp1\().16b, \tmp1\().16b, \const\().16b
176 eor \out0\().16b, \out0\().16b, \tmp0\().16b
177 eor \out1\().16b, \out1\().16b, \tmp1\().16b
178 .endm
179
180 .macro mix_columns_2x, in0, in1, enc
181 .if \enc == 0
182
183 mul_by_x2_2x v8, v9, \in0, \in1, v10, v11, v12
184 eor \in0\().16b, \in0\().16b, v8.16b
185 rev32 v8.8h, v8.8h
186 eor \in1\().16b, \in1\().16b, v9.16b
187 rev32 v9.8h, v9.8h
188 eor \in0\().16b, \in0\().16b, v8.16b
189 eor \in1\().16b, \in1\().16b, v9.16b
190 .endif
191
192 mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v12
193 rev32 v10.8h, \in0\().8h
194 rev32 v11.8h, \in1\().8h
195 eor v10.16b, v10.16b, v8.16b
196 eor v11.16b, v11.16b, v9.16b
197 eor \in0\().16b, \in0\().16b, v10.16b
198 eor \in1\().16b, \in1\().16b, v11.16b
199 tbl \in0\().16b, {\in0\().16b}, v14.16b
200 tbl \in1\().16b, {\in1\().16b}, v14.16b
201 eor \in0\().16b, \in0\().16b, v10.16b
202 eor \in1\().16b, \in1\().16b, v11.16b
203 .endm
204
205 .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
206 ld1 {v15.4s}, [\rk]
207 add \rkp, \rk, #16
208 mov \i, \rounds
209 1111: eor \in0\().16b, \in0\().16b, v15.16b
210 eor \in1\().16b, \in1\().16b, v15.16b
211 eor \in2\().16b, \in2\().16b, v15.16b
212 eor \in3\().16b, \in3\().16b, v15.16b
213 movi v15.16b, #0x40
214 tbl \in0\().16b, {\in0\().16b}, v13.16b
215 tbl \in1\().16b, {\in1\().16b}, v13.16b
216 tbl \in2\().16b, {\in2\().16b}, v13.16b
217 tbl \in3\().16b, {\in3\().16b}, v13.16b
218 sub_bytes_4x \in0, \in1, \in2, \in3
219 subs \i, \i, #1
220 ld1 {v15.4s}, [\rkp], #16
221 beq 2222f
222 mix_columns_2x \in0, \in1, \enc
223 mix_columns_2x \in2, \in3, \enc
224 b 1111b
225 2222: eor \in0\().16b, \in0\().16b, v15.16b
226 eor \in1\().16b, \in1\().16b, v15.16b
227 eor \in2\().16b, \in2\().16b, v15.16b
228 eor \in3\().16b, \in3\().16b, v15.16b
229 .endm
230
231 .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
232 do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
233 .endm
234
235 .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
236 do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
237 .endm
238
239 #include "aes-modes.S"
240
241 .section ".rodata", "a"
242 .align 4
243 .LForward_ShiftRows:
244 .octa 0x0b06010c07020d08030e09040f0a0500
245
246 .LReverse_ShiftRows:
247 .octa 0x0306090c0f0205080b0e0104070a0d00
248
249 .Lror32by8:
250 .octa 0x0c0f0e0d080b0a090407060500030201