1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 #include <linux/linkage.h>
18 #include <asm/assembler.h>
19
20 .text
21
22 rounds .req x11
23 bskey .req x12
24
25 .macro in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
26 eor \b2, \b2, \b1
27 eor \b5, \b5, \b6
28 eor \b3, \b3, \b0
29 eor \b6, \b6, \b2
30 eor \b5, \b5, \b0
31 eor \b6, \b6, \b3
32 eor \b3, \b3, \b7
33 eor \b7, \b7, \b5
34 eor \b3, \b3, \b4
35 eor \b4, \b4, \b5
36 eor \b2, \b2, \b7
37 eor \b3, \b3, \b1
38 eor \b1, \b1, \b5
39 .endm
40
41 .macro out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
42 eor \b0, \b0, \b6
43 eor \b1, \b1, \b4
44 eor \b4, \b4, \b6
45 eor \b2, \b2, \b0
46 eor \b6, \b6, \b1
47 eor \b1, \b1, \b5
48 eor \b5, \b5, \b3
49 eor \b3, \b3, \b7
50 eor \b7, \b7, \b5
51 eor \b2, \b2, \b5
52 eor \b4, \b4, \b7
53 .endm
54
55 .macro inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5
56 eor \b1, \b1, \b7
57 eor \b4, \b4, \b7
58 eor \b7, \b7, \b5
59 eor \b1, \b1, \b3
60 eor \b2, \b2, \b5
61 eor \b3, \b3, \b7
62 eor \b6, \b6, \b1
63 eor \b2, \b2, \b0
64 eor \b5, \b5, \b3
65 eor \b4, \b4, \b6
66 eor \b0, \b0, \b6
67 eor \b1, \b1, \b4
68 .endm
69
70 .macro inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2
71 eor \b1, \b1, \b5
72 eor \b2, \b2, \b7
73 eor \b3, \b3, \b1
74 eor \b4, \b4, \b5
75 eor \b7, \b7, \b5
76 eor \b3, \b3, \b4
77 eor \b5, \b5, \b0
78 eor \b3, \b3, \b7
79 eor \b6, \b6, \b2
80 eor \b2, \b2, \b1
81 eor \b6, \b6, \b3
82 eor \b3, \b3, \b0
83 eor \b5, \b5, \b6
84 .endm
85
86 .macro mul_gf4, x0, x1, y0, y1, t0, t1
87 eor \t0, \y0, \y1
88 and \t0, \t0, \x0
89 eor \x0, \x0, \x1
90 and \t1, \x1, \y0
91 and \x0, \x0, \y1
92 eor \x1, \t1, \t0
93 eor \x0, \x0, \t1
94 .endm
95
96 .macro mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1
97 eor \t0, \y0, \y1
98 eor \t1, \y2, \y3
99 and \t0, \t0, \x0
100 and \t1, \t1, \x2
101 eor \x0, \x0, \x1
102 eor \x2, \x2, \x3
103 and \x1, \x1, \y0
104 and \x3, \x3, \y2
105 and \x0, \x0, \y1
106 and \x2, \x2, \y3
107 eor \x1, \x1, \x0
108 eor \x2, \x2, \x3
109 eor \x0, \x0, \t0
110 eor \x3, \x3, \t1
111 .endm
112
113 .macro mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \
114 y0, y1, y2, y3, t0, t1, t2, t3
115 eor \t0, \x0, \x2
116 eor \t1, \x1, \x3
117 mul_gf4 \x0, \x1, \y0, \y1, \t2, \t3
118 eor \y0, \y0, \y2
119 eor \y1, \y1, \y3
120 mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2
121 eor \x0, \x0, \t0
122 eor \x2, \x2, \t0
123 eor \x1, \x1, \t1
124 eor \x3, \x3, \t1
125 eor \t0, \x4, \x6
126 eor \t1, \x5, \x7
127 mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2
128 eor \y0, \y0, \y2
129 eor \y1, \y1, \y3
130 mul_gf4 \x4, \x5, \y0, \y1, \t2, \t3
131 eor \x4, \x4, \t0
132 eor \x6, \x6, \t0
133 eor \x5, \x5, \t1
134 eor \x7, \x7, \t1
135 .endm
136
137 .macro inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \
138 t0, t1, t2, t3, s0, s1, s2, s3
139 eor \t3, \x4, \x6
140 eor \t0, \x5, \x7
141 eor \t1, \x1, \x3
142 eor \s1, \x7, \x6
143 eor \s0, \x0, \x2
144 eor \s3, \t3, \t0
145 orr \t2, \t0, \t1
146 and \s2, \t3, \s0
147 orr \t3, \t3, \s0
148 eor \s0, \s0, \t1
149 and \t0, \t0, \t1
150 eor \t1, \x3, \x2
151 and \s3, \s3, \s0
152 and \s1, \s1, \t1
153 eor \t1, \x4, \x5
154 eor \s0, \x1, \x0
155 eor \t3, \t3, \s1
156 eor \t2, \t2, \s1
157 and \s1, \t1, \s0
158 orr \t1, \t1, \s0
159 eor \t3, \t3, \s3
160 eor \t0, \t0, \s1
161 eor \t2, \t2, \s2
162 eor \t1, \t1, \s3
163 eor \t0, \t0, \s2
164 and \s0, \x7, \x3
165 eor \t1, \t1, \s2
166 and \s1, \x6, \x2
167 and \s2, \x5, \x1
168 orr \s3, \x4, \x0
169 eor \t3, \t3, \s0
170 eor \t1, \t1, \s2
171 eor \s0, \t0, \s3
172 eor \t2, \t2, \s1
173 and \s2, \t3, \t1
174 eor \s1, \t2, \s2
175 eor \s3, \s0, \s2
176 bsl \s1, \t1, \s0
177 not \t0, \s0
178 bsl \s0, \s1, \s3
179 bsl \t0, \s1, \s3
180 bsl \s3, \t3, \t2
181 eor \t3, \t3, \t2
182 and \s2, \s0, \s3
183 eor \t1, \t1, \t0
184 eor \s2, \s2, \t3
185 mul_gf16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
186 \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
187 .endm
188
189 .macro sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
190 t0, t1, t2, t3, s0, s1, s2, s3
191 in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
192 \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
193 inv_gf256 \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b, \
194 \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
195 \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
196 \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
197 out_bs_ch \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
198 \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b
199 .endm
200
201 .macro inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
202 t0, t1, t2, t3, s0, s1, s2, s3
203 inv_in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
204 \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
205 inv_gf256 \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b, \
206 \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
207 \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
208 \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
209 inv_out_bs_ch \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
210 \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b
211 .endm
212
213 .macro enc_next_rk
214 ldp q16, q17, [bskey], #128
215 ldp q18, q19, [bskey, #-96]
216 ldp q20, q21, [bskey, #-64]
217 ldp q22, q23, [bskey, #-32]
218 .endm
219
220 .macro dec_next_rk
221 ldp q16, q17, [bskey, #-128]!
222 ldp q18, q19, [bskey, #32]
223 ldp q20, q21, [bskey, #64]
224 ldp q22, q23, [bskey, #96]
225 .endm
226
227 .macro add_round_key, x0, x1, x2, x3, x4, x5, x6, x7
228 eor \x0\().16b, \x0\().16b, v16.16b
229 eor \x1\().16b, \x1\().16b, v17.16b
230 eor \x2\().16b, \x2\().16b, v18.16b
231 eor \x3\().16b, \x3\().16b, v19.16b
232 eor \x4\().16b, \x4\().16b, v20.16b
233 eor \x5\().16b, \x5\().16b, v21.16b
234 eor \x6\().16b, \x6\().16b, v22.16b
235 eor \x7\().16b, \x7\().16b, v23.16b
236 .endm
237
238 .macro shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, mask
239 tbl \x0\().16b, {\x0\().16b}, \mask\().16b
240 tbl \x1\().16b, {\x1\().16b}, \mask\().16b
241 tbl \x2\().16b, {\x2\().16b}, \mask\().16b
242 tbl \x3\().16b, {\x3\().16b}, \mask\().16b
243 tbl \x4\().16b, {\x4\().16b}, \mask\().16b
244 tbl \x5\().16b, {\x5\().16b}, \mask\().16b
245 tbl \x6\().16b, {\x6\().16b}, \mask\().16b
246 tbl \x7\().16b, {\x7\().16b}, \mask\().16b
247 .endm
248
249 .macro mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
250 t0, t1, t2, t3, t4, t5, t6, t7, inv
251 ext \t0\().16b, \x0\().16b, \x0\().16b, #12
252 ext \t1\().16b, \x1\().16b, \x1\().16b, #12
253 eor \x0\().16b, \x0\().16b, \t0\().16b
254 ext \t2\().16b, \x2\().16b, \x2\().16b, #12
255 eor \x1\().16b, \x1\().16b, \t1\().16b
256 ext \t3\().16b, \x3\().16b, \x3\().16b, #12
257 eor \x2\().16b, \x2\().16b, \t2\().16b
258 ext \t4\().16b, \x4\().16b, \x4\().16b, #12
259 eor \x3\().16b, \x3\().16b, \t3\().16b
260 ext \t5\().16b, \x5\().16b, \x5\().16b, #12
261 eor \x4\().16b, \x4\().16b, \t4\().16b
262 ext \t6\().16b, \x6\().16b, \x6\().16b, #12
263 eor \x5\().16b, \x5\().16b, \t5\().16b
264 ext \t7\().16b, \x7\().16b, \x7\().16b, #12
265 eor \x6\().16b, \x6\().16b, \t6\().16b
266 eor \t1\().16b, \t1\().16b, \x0\().16b
267 eor \x7\().16b, \x7\().16b, \t7\().16b
268 ext \x0\().16b, \x0\().16b, \x0\().16b, #8
269 eor \t2\().16b, \t2\().16b, \x1\().16b
270 eor \t0\().16b, \t0\().16b, \x7\().16b
271 eor \t1\().16b, \t1\().16b, \x7\().16b
272 ext \x1\().16b, \x1\().16b, \x1\().16b, #8
273 eor \t5\().16b, \t5\().16b, \x4\().16b
274 eor \x0\().16b, \x0\().16b, \t0\().16b
275 eor \t6\().16b, \t6\().16b, \x5\().16b
276 eor \x1\().16b, \x1\().16b, \t1\().16b
277 ext \t0\().16b, \x4\().16b, \x4\().16b, #8
278 eor \t4\().16b, \t4\().16b, \x3\().16b
279 ext \t1\().16b, \x5\().16b, \x5\().16b, #8
280 eor \t7\().16b, \t7\().16b, \x6\().16b
281 ext \x4\().16b, \x3\().16b, \x3\().16b, #8
282 eor \t3\().16b, \t3\().16b, \x2\().16b
283 ext \x5\().16b, \x7\().16b, \x7\().16b, #8
284 eor \t4\().16b, \t4\().16b, \x7\().16b
285 ext \x3\().16b, \x6\().16b, \x6\().16b, #8
286 eor \t3\().16b, \t3\().16b, \x7\().16b
287 ext \x6\().16b, \x2\().16b, \x2\().16b, #8
288 eor \x7\().16b, \t1\().16b, \t5\().16b
289 .ifb \inv
290 eor \x2\().16b, \t0\().16b, \t4\().16b
291 eor \x4\().16b, \x4\().16b, \t3\().16b
292 eor \x5\().16b, \x5\().16b, \t7\().16b
293 eor \x3\().16b, \x3\().16b, \t6\().16b
294 eor \x6\().16b, \x6\().16b, \t2\().16b
295 .else
296 eor \t3\().16b, \t3\().16b, \x4\().16b
297 eor \x5\().16b, \x5\().16b, \t7\().16b
298 eor \x2\().16b, \x3\().16b, \t6\().16b
299 eor \x3\().16b, \t0\().16b, \t4\().16b
300 eor \x4\().16b, \x6\().16b, \t2\().16b
301 mov \x6\().16b, \t3\().16b
302 .endif
303 .endm
304
305 .macro inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
306 t0, t1, t2, t3, t4, t5, t6, t7
307 ext \t0\().16b, \x0\().16b, \x0\().16b, #8
308 ext \t6\().16b, \x6\().16b, \x6\().16b, #8
309 ext \t7\().16b, \x7\().16b, \x7\().16b, #8
310 eor \t0\().16b, \t0\().16b, \x0\().16b
311 ext \t1\().16b, \x1\().16b, \x1\().16b, #8
312 eor \t6\().16b, \t6\().16b, \x6\().16b
313 ext \t2\().16b, \x2\().16b, \x2\().16b, #8
314 eor \t7\().16b, \t7\().16b, \x7\().16b
315 ext \t3\().16b, \x3\().16b, \x3\().16b, #8
316 eor \t1\().16b, \t1\().16b, \x1\().16b
317 ext \t4\().16b, \x4\().16b, \x4\().16b, #8
318 eor \t2\().16b, \t2\().16b, \x2\().16b
319 ext \t5\().16b, \x5\().16b, \x5\().16b, #8
320 eor \t3\().16b, \t3\().16b, \x3\().16b
321 eor \t4\().16b, \t4\().16b, \x4\().16b
322 eor \t5\().16b, \t5\().16b, \x5\().16b
323 eor \x0\().16b, \x0\().16b, \t6\().16b
324 eor \x1\().16b, \x1\().16b, \t6\().16b
325 eor \x2\().16b, \x2\().16b, \t0\().16b
326 eor \x4\().16b, \x4\().16b, \t2\().16b
327 eor \x3\().16b, \x3\().16b, \t1\().16b
328 eor \x1\().16b, \x1\().16b, \t7\().16b
329 eor \x2\().16b, \x2\().16b, \t7\().16b
330 eor \x4\().16b, \x4\().16b, \t6\().16b
331 eor \x5\().16b, \x5\().16b, \t3\().16b
332 eor \x3\().16b, \x3\().16b, \t6\().16b
333 eor \x6\().16b, \x6\().16b, \t4\().16b
334 eor \x4\().16b, \x4\().16b, \t7\().16b
335 eor \x5\().16b, \x5\().16b, \t7\().16b
336 eor \x7\().16b, \x7\().16b, \t5\().16b
337 mix_cols \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
338 \t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1
339 .endm
340
341 .macro swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1
342 ushr \t0\().2d, \b0\().2d, #\n
343 ushr \t1\().2d, \b1\().2d, #\n
344 eor \t0\().16b, \t0\().16b, \a0\().16b
345 eor \t1\().16b, \t1\().16b, \a1\().16b
346 and \t0\().16b, \t0\().16b, \mask\().16b
347 and \t1\().16b, \t1\().16b, \mask\().16b
348 eor \a0\().16b, \a0\().16b, \t0\().16b
349 shl \t0\().2d, \t0\().2d, #\n
350 eor \a1\().16b, \a1\().16b, \t1\().16b
351 shl \t1\().2d, \t1\().2d, #\n
352 eor \b0\().16b, \b0\().16b, \t0\().16b
353 eor \b1\().16b, \b1\().16b, \t1\().16b
354 .endm
355
356 .macro bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3
357 movi \t0\().16b, #0x55
358 movi \t1\().16b, #0x33
359 swapmove_2x \x0, \x1, \x2, \x3, 1, \t0, \t2, \t3
360 swapmove_2x \x4, \x5, \x6, \x7, 1, \t0, \t2, \t3
361 movi \t0\().16b, #0x0f
362 swapmove_2x \x0, \x2, \x1, \x3, 2, \t1, \t2, \t3
363 swapmove_2x \x4, \x6, \x5, \x7, 2, \t1, \t2, \t3
364 swapmove_2x \x0, \x4, \x1, \x5, 4, \t0, \t2, \t3
365 swapmove_2x \x2, \x6, \x3, \x7, 4, \t0, \t2, \t3
366 .endm
367
368
369 .align 6
370 M0: .octa 0x0004080c0105090d02060a0e03070b0f
371
372 M0SR: .octa 0x0004080c05090d010a0e02060f03070b
373 SR: .octa 0x0f0e0d0c0a09080b0504070600030201
374 SRM0: .octa 0x01060b0c0207080d0304090e00050a0f
375
376 M0ISR: .octa 0x0004080c0d0105090a0e0206070b0f03
377 ISR: .octa 0x0f0e0d0c080b0a090504070602010003
378 ISRM0: .octa 0x0306090c00070a0d01040b0e0205080f
379
380
381
382
383 ENTRY(aesbs_convert_key)
384 ld1 {v7.4s}, [x1], #16
385 ld1 {v17.4s}, [x1], #16
386
387 movi v8.16b, #0x01
388 movi v9.16b, #0x02
389 movi v10.16b, #0x04
390 movi v11.16b, #0x08
391 movi v12.16b, #0x10
392 movi v13.16b, #0x20
393 movi v14.16b, #0x40
394 movi v15.16b, #0x80
395 ldr q16, M0
396
397 sub x2, x2, #1
398 str q7, [x0], #16
399
400 .Lkey_loop:
401 tbl v7.16b ,{v17.16b}, v16.16b
402 ld1 {v17.4s}, [x1], #16
403
404 cmtst v0.16b, v7.16b, v8.16b
405 cmtst v1.16b, v7.16b, v9.16b
406 cmtst v2.16b, v7.16b, v10.16b
407 cmtst v3.16b, v7.16b, v11.16b
408 cmtst v4.16b, v7.16b, v12.16b
409 cmtst v5.16b, v7.16b, v13.16b
410 cmtst v6.16b, v7.16b, v14.16b
411 cmtst v7.16b, v7.16b, v15.16b
412 not v0.16b, v0.16b
413 not v1.16b, v1.16b
414 not v5.16b, v5.16b
415 not v6.16b, v6.16b
416
417 subs x2, x2, #1
418 stp q0, q1, [x0], #128
419 stp q2, q3, [x0, #-96]
420 stp q4, q5, [x0, #-64]
421 stp q6, q7, [x0, #-32]
422 b.ne .Lkey_loop
423
424 movi v7.16b, #0x63
425 eor v17.16b, v17.16b, v7.16b
426 str q17, [x0]
427 ret
428 ENDPROC(aesbs_convert_key)
429
430 .align 4
431 aesbs_encrypt8:
432 ldr q9, [bskey], #16
433 ldr q8, M0SR
434 ldr q24, SR
435
436 eor v10.16b, v0.16b, v9.16b
437 eor v11.16b, v1.16b, v9.16b
438 tbl v0.16b, {v10.16b}, v8.16b
439 eor v12.16b, v2.16b, v9.16b
440 tbl v1.16b, {v11.16b}, v8.16b
441 eor v13.16b, v3.16b, v9.16b
442 tbl v2.16b, {v12.16b}, v8.16b
443 eor v14.16b, v4.16b, v9.16b
444 tbl v3.16b, {v13.16b}, v8.16b
445 eor v15.16b, v5.16b, v9.16b
446 tbl v4.16b, {v14.16b}, v8.16b
447 eor v10.16b, v6.16b, v9.16b
448 tbl v5.16b, {v15.16b}, v8.16b
449 eor v11.16b, v7.16b, v9.16b
450 tbl v6.16b, {v10.16b}, v8.16b
451 tbl v7.16b, {v11.16b}, v8.16b
452
453 bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
454
455 sub rounds, rounds, #1
456 b .Lenc_sbox
457
458 .Lenc_loop:
459 shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24
460 .Lenc_sbox:
461 sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
462 v13, v14, v15
463 subs rounds, rounds, #1
464 b.cc .Lenc_done
465
466 enc_next_rk
467
468 mix_cols v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11, v12, \
469 v13, v14, v15
470
471 add_round_key v0, v1, v2, v3, v4, v5, v6, v7
472
473 b.ne .Lenc_loop
474 ldr q24, SRM0
475 b .Lenc_loop
476
477 .Lenc_done:
478 ldr q12, [bskey]
479
480 bitslice v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11
481
482 eor v0.16b, v0.16b, v12.16b
483 eor v1.16b, v1.16b, v12.16b
484 eor v4.16b, v4.16b, v12.16b
485 eor v6.16b, v6.16b, v12.16b
486 eor v3.16b, v3.16b, v12.16b
487 eor v7.16b, v7.16b, v12.16b
488 eor v2.16b, v2.16b, v12.16b
489 eor v5.16b, v5.16b, v12.16b
490 ret
491 ENDPROC(aesbs_encrypt8)
492
493 .align 4
494 aesbs_decrypt8:
495 lsl x9, rounds, #7
496 add bskey, bskey, x9
497
498 ldr q9, [bskey, #-112]!
499 ldr q8, M0ISR
500 ldr q24, ISR
501
502 eor v10.16b, v0.16b, v9.16b
503 eor v11.16b, v1.16b, v9.16b
504 tbl v0.16b, {v10.16b}, v8.16b
505 eor v12.16b, v2.16b, v9.16b
506 tbl v1.16b, {v11.16b}, v8.16b
507 eor v13.16b, v3.16b, v9.16b
508 tbl v2.16b, {v12.16b}, v8.16b
509 eor v14.16b, v4.16b, v9.16b
510 tbl v3.16b, {v13.16b}, v8.16b
511 eor v15.16b, v5.16b, v9.16b
512 tbl v4.16b, {v14.16b}, v8.16b
513 eor v10.16b, v6.16b, v9.16b
514 tbl v5.16b, {v15.16b}, v8.16b
515 eor v11.16b, v7.16b, v9.16b
516 tbl v6.16b, {v10.16b}, v8.16b
517 tbl v7.16b, {v11.16b}, v8.16b
518
519 bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
520
521 sub rounds, rounds, #1
522 b .Ldec_sbox
523
524 .Ldec_loop:
525 shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24
526 .Ldec_sbox:
527 inv_sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
528 v13, v14, v15
529 subs rounds, rounds, #1
530 b.cc .Ldec_done
531
532 dec_next_rk
533
534 add_round_key v0, v1, v6, v4, v2, v7, v3, v5
535
536 inv_mix_cols v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11, v12, \
537 v13, v14, v15
538
539 b.ne .Ldec_loop
540 ldr q24, ISRM0
541 b .Ldec_loop
542 .Ldec_done:
543 ldr q12, [bskey, #-16]
544
545 bitslice v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11
546
547 eor v0.16b, v0.16b, v12.16b
548 eor v1.16b, v1.16b, v12.16b
549 eor v6.16b, v6.16b, v12.16b
550 eor v4.16b, v4.16b, v12.16b
551 eor v2.16b, v2.16b, v12.16b
552 eor v7.16b, v7.16b, v12.16b
553 eor v3.16b, v3.16b, v12.16b
554 eor v5.16b, v5.16b, v12.16b
555 ret
556 ENDPROC(aesbs_decrypt8)
557
558
559
560
561
562
563
564 .macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
565 frame_push 5
566
567 mov x19, x0
568 mov x20, x1
569 mov x21, x2
570 mov x22, x3
571 mov x23, x4
572
573 99: mov x5, #1
574 lsl x5, x5, x23
575 subs w23, w23, #8
576 csel x23, x23, xzr, pl
577 csel x5, x5, xzr, mi
578
579 ld1 {v0.16b}, [x20], #16
580 tbnz x5, #1, 0f
581 ld1 {v1.16b}, [x20], #16
582 tbnz x5, #2, 0f
583 ld1 {v2.16b}, [x20], #16
584 tbnz x5, #3, 0f
585 ld1 {v3.16b}, [x20], #16
586 tbnz x5, #4, 0f
587 ld1 {v4.16b}, [x20], #16
588 tbnz x5, #5, 0f
589 ld1 {v5.16b}, [x20], #16
590 tbnz x5, #6, 0f
591 ld1 {v6.16b}, [x20], #16
592 tbnz x5, #7, 0f
593 ld1 {v7.16b}, [x20], #16
594
595 0: mov bskey, x21
596 mov rounds, x22
597 bl \do8
598
599 st1 {\o0\().16b}, [x19], #16
600 tbnz x5, #1, 1f
601 st1 {\o1\().16b}, [x19], #16
602 tbnz x5, #2, 1f
603 st1 {\o2\().16b}, [x19], #16
604 tbnz x5, #3, 1f
605 st1 {\o3\().16b}, [x19], #16
606 tbnz x5, #4, 1f
607 st1 {\o4\().16b}, [x19], #16
608 tbnz x5, #5, 1f
609 st1 {\o5\().16b}, [x19], #16
610 tbnz x5, #6, 1f
611 st1 {\o6\().16b}, [x19], #16
612 tbnz x5, #7, 1f
613 st1 {\o7\().16b}, [x19], #16
614
615 cbz x23, 1f
616 cond_yield_neon
617 b 99b
618
619 1: frame_pop
620 ret
621 .endm
622
623 .align 4
624 ENTRY(aesbs_ecb_encrypt)
625 __ecb_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
626 ENDPROC(aesbs_ecb_encrypt)
627
628 .align 4
629 ENTRY(aesbs_ecb_decrypt)
630 __ecb_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
631 ENDPROC(aesbs_ecb_decrypt)
632
633
634
635
636
637 .align 4
638 ENTRY(aesbs_cbc_decrypt)
639 frame_push 6
640
641 mov x19, x0
642 mov x20, x1
643 mov x21, x2
644 mov x22, x3
645 mov x23, x4
646 mov x24, x5
647
648 99: mov x6, #1
649 lsl x6, x6, x23
650 subs w23, w23, #8
651 csel x23, x23, xzr, pl
652 csel x6, x6, xzr, mi
653
654 ld1 {v0.16b}, [x20], #16
655 mov v25.16b, v0.16b
656 tbnz x6, #1, 0f
657 ld1 {v1.16b}, [x20], #16
658 mov v26.16b, v1.16b
659 tbnz x6, #2, 0f
660 ld1 {v2.16b}, [x20], #16
661 mov v27.16b, v2.16b
662 tbnz x6, #3, 0f
663 ld1 {v3.16b}, [x20], #16
664 mov v28.16b, v3.16b
665 tbnz x6, #4, 0f
666 ld1 {v4.16b}, [x20], #16
667 mov v29.16b, v4.16b
668 tbnz x6, #5, 0f
669 ld1 {v5.16b}, [x20], #16
670 mov v30.16b, v5.16b
671 tbnz x6, #6, 0f
672 ld1 {v6.16b}, [x20], #16
673 mov v31.16b, v6.16b
674 tbnz x6, #7, 0f
675 ld1 {v7.16b}, [x20]
676
677 0: mov bskey, x21
678 mov rounds, x22
679 bl aesbs_decrypt8
680
681 ld1 {v24.16b}, [x24]
682
683 eor v1.16b, v1.16b, v25.16b
684 eor v6.16b, v6.16b, v26.16b
685 eor v4.16b, v4.16b, v27.16b
686 eor v2.16b, v2.16b, v28.16b
687 eor v7.16b, v7.16b, v29.16b
688 eor v0.16b, v0.16b, v24.16b
689 eor v3.16b, v3.16b, v30.16b
690 eor v5.16b, v5.16b, v31.16b
691
692 st1 {v0.16b}, [x19], #16
693 mov v24.16b, v25.16b
694 tbnz x6, #1, 1f
695 st1 {v1.16b}, [x19], #16
696 mov v24.16b, v26.16b
697 tbnz x6, #2, 1f
698 st1 {v6.16b}, [x19], #16
699 mov v24.16b, v27.16b
700 tbnz x6, #3, 1f
701 st1 {v4.16b}, [x19], #16
702 mov v24.16b, v28.16b
703 tbnz x6, #4, 1f
704 st1 {v2.16b}, [x19], #16
705 mov v24.16b, v29.16b
706 tbnz x6, #5, 1f
707 st1 {v7.16b}, [x19], #16
708 mov v24.16b, v30.16b
709 tbnz x6, #6, 1f
710 st1 {v3.16b}, [x19], #16
711 mov v24.16b, v31.16b
712 tbnz x6, #7, 1f
713 ld1 {v24.16b}, [x20], #16
714 st1 {v5.16b}, [x19], #16
715 1: st1 {v24.16b}, [x24]
716
717 cbz x23, 2f
718 cond_yield_neon
719 b 99b
720
721 2: frame_pop
722 ret
723 ENDPROC(aesbs_cbc_decrypt)
724
725 .macro next_tweak, out, in, const, tmp
726 sshr \tmp\().2d, \in\().2d, #63
727 and \tmp\().16b, \tmp\().16b, \const\().16b
728 add \out\().2d, \in\().2d, \in\().2d
729 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
730 eor \out\().16b, \out\().16b, \tmp\().16b
731 .endm
732
733
734
735
736
737
738
739 __xts_crypt8:
740 mov x6, #1
741 lsl x6, x6, x23
742 subs w23, w23, #8
743 csel x23, x23, xzr, pl
744 csel x6, x6, xzr, mi
745
746 ld1 {v0.16b}, [x20], #16
747 next_tweak v26, v25, v30, v31
748 eor v0.16b, v0.16b, v25.16b
749 tbnz x6, #1, 0f
750
751 ld1 {v1.16b}, [x20], #16
752 next_tweak v27, v26, v30, v31
753 eor v1.16b, v1.16b, v26.16b
754 tbnz x6, #2, 0f
755
756 ld1 {v2.16b}, [x20], #16
757 next_tweak v28, v27, v30, v31
758 eor v2.16b, v2.16b, v27.16b
759 tbnz x6, #3, 0f
760
761 ld1 {v3.16b}, [x20], #16
762 next_tweak v29, v28, v30, v31
763 eor v3.16b, v3.16b, v28.16b
764 tbnz x6, #4, 0f
765
766 ld1 {v4.16b}, [x20], #16
767 str q29, [sp, #.Lframe_local_offset]
768 eor v4.16b, v4.16b, v29.16b
769 next_tweak v29, v29, v30, v31
770 tbnz x6, #5, 0f
771
772 ld1 {v5.16b}, [x20], #16
773 str q29, [sp, #.Lframe_local_offset + 16]
774 eor v5.16b, v5.16b, v29.16b
775 next_tweak v29, v29, v30, v31
776 tbnz x6, #6, 0f
777
778 ld1 {v6.16b}, [x20], #16
779 str q29, [sp, #.Lframe_local_offset + 32]
780 eor v6.16b, v6.16b, v29.16b
781 next_tweak v29, v29, v30, v31
782 tbnz x6, #7, 0f
783
784 ld1 {v7.16b}, [x20], #16
785 str q29, [sp, #.Lframe_local_offset + 48]
786 eor v7.16b, v7.16b, v29.16b
787 next_tweak v29, v29, v30, v31
788
789 0: mov bskey, x21
790 mov rounds, x22
791 br x7
792 ENDPROC(__xts_crypt8)
793
794 .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
795 frame_push 6, 64
796
797 mov x19, x0
798 mov x20, x1
799 mov x21, x2
800 mov x22, x3
801 mov x23, x4
802 mov x24, x5
803
804 0: movi v30.2s, #0x1
805 movi v25.2s, #0x87
806 uzp1 v30.4s, v30.4s, v25.4s
807 ld1 {v25.16b}, [x24]
808
809 99: adr x7, \do8
810 bl __xts_crypt8
811
812 ldp q16, q17, [sp, #.Lframe_local_offset]
813 ldp q18, q19, [sp, #.Lframe_local_offset + 32]
814
815 eor \o0\().16b, \o0\().16b, v25.16b
816 eor \o1\().16b, \o1\().16b, v26.16b
817 eor \o2\().16b, \o2\().16b, v27.16b
818 eor \o3\().16b, \o3\().16b, v28.16b
819
820 st1 {\o0\().16b}, [x19], #16
821 mov v25.16b, v26.16b
822 tbnz x6, #1, 1f
823 st1 {\o1\().16b}, [x19], #16
824 mov v25.16b, v27.16b
825 tbnz x6, #2, 1f
826 st1 {\o2\().16b}, [x19], #16
827 mov v25.16b, v28.16b
828 tbnz x6, #3, 1f
829 st1 {\o3\().16b}, [x19], #16
830 mov v25.16b, v29.16b
831 tbnz x6, #4, 1f
832
833 eor \o4\().16b, \o4\().16b, v16.16b
834 eor \o5\().16b, \o5\().16b, v17.16b
835 eor \o6\().16b, \o6\().16b, v18.16b
836 eor \o7\().16b, \o7\().16b, v19.16b
837
838 st1 {\o4\().16b}, [x19], #16
839 tbnz x6, #5, 1f
840 st1 {\o5\().16b}, [x19], #16
841 tbnz x6, #6, 1f
842 st1 {\o6\().16b}, [x19], #16
843 tbnz x6, #7, 1f
844 st1 {\o7\().16b}, [x19], #16
845
846 cbz x23, 1f
847 st1 {v25.16b}, [x24]
848
849 cond_yield_neon 0b
850 b 99b
851
852 1: st1 {v25.16b}, [x24]
853 frame_pop
854 ret
855 .endm
856
857 ENTRY(aesbs_xts_encrypt)
858 __xts_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
859 ENDPROC(aesbs_xts_encrypt)
860
861 ENTRY(aesbs_xts_decrypt)
862 __xts_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
863 ENDPROC(aesbs_xts_decrypt)
864
865 .macro next_ctr, v
866 mov \v\().d[1], x8
867 adds x8, x8, #1
868 mov \v\().d[0], x7
869 adc x7, x7, xzr
870 rev64 \v\().16b, \v\().16b
871 .endm
872
873
874
875
876
877 ENTRY(aesbs_ctr_encrypt)
878 frame_push 8
879
880 mov x19, x0
881 mov x20, x1
882 mov x21, x2
883 mov x22, x3
884 mov x23, x4
885 mov x24, x5
886 mov x25, x6
887
888 cmp x25, #0
889 cset x26, ne
890 add x23, x23, x26
891
892 98: ldp x7, x8, [x24]
893 ld1 {v0.16b}, [x24]
894 CPU_LE( rev x7, x7 )
895 CPU_LE( rev x8, x8 )
896 adds x8, x8, #1
897 adc x7, x7, xzr
898
899 99: mov x9, #1
900 lsl x9, x9, x23
901 subs w23, w23, #8
902 csel x23, x23, xzr, pl
903 csel x9, x9, xzr, le
904
905 tbnz x9, #1, 0f
906 next_ctr v1
907 tbnz x9, #2, 0f
908 next_ctr v2
909 tbnz x9, #3, 0f
910 next_ctr v3
911 tbnz x9, #4, 0f
912 next_ctr v4
913 tbnz x9, #5, 0f
914 next_ctr v5
915 tbnz x9, #6, 0f
916 next_ctr v6
917 tbnz x9, #7, 0f
918 next_ctr v7
919
920 0: mov bskey, x21
921 mov rounds, x22
922 bl aesbs_encrypt8
923
924 lsr x9, x9, x26
925 tbnz x9, #0, 0f
926
927 ld1 {v8.16b}, [x20], #16
928 eor v0.16b, v0.16b, v8.16b
929 st1 {v0.16b}, [x19], #16
930 tbnz x9, #1, 1f
931
932 ld1 {v9.16b}, [x20], #16
933 eor v1.16b, v1.16b, v9.16b
934 st1 {v1.16b}, [x19], #16
935 tbnz x9, #2, 2f
936
937 ld1 {v10.16b}, [x20], #16
938 eor v4.16b, v4.16b, v10.16b
939 st1 {v4.16b}, [x19], #16
940 tbnz x9, #3, 3f
941
942 ld1 {v11.16b}, [x20], #16
943 eor v6.16b, v6.16b, v11.16b
944 st1 {v6.16b}, [x19], #16
945 tbnz x9, #4, 4f
946
947 ld1 {v12.16b}, [x20], #16
948 eor v3.16b, v3.16b, v12.16b
949 st1 {v3.16b}, [x19], #16
950 tbnz x9, #5, 5f
951
952 ld1 {v13.16b}, [x20], #16
953 eor v7.16b, v7.16b, v13.16b
954 st1 {v7.16b}, [x19], #16
955 tbnz x9, #6, 6f
956
957 ld1 {v14.16b}, [x20], #16
958 eor v2.16b, v2.16b, v14.16b
959 st1 {v2.16b}, [x19], #16
960 tbnz x9, #7, 7f
961
962 ld1 {v15.16b}, [x20], #16
963 eor v5.16b, v5.16b, v15.16b
964 st1 {v5.16b}, [x19], #16
965
966 8: next_ctr v0
967 st1 {v0.16b}, [x24]
968 cbz x23, .Lctr_done
969
970 cond_yield_neon 98b
971 b 99b
972
973 .Lctr_done:
974 frame_pop
975 ret
976
977
978
979
980
981 0: cbz x25, 8b
982 st1 {v0.16b}, [x25]
983 b 8b
984 1: cbz x25, 8b
985 st1 {v1.16b}, [x25]
986 b 8b
987 2: cbz x25, 8b
988 st1 {v4.16b}, [x25]
989 b 8b
990 3: cbz x25, 8b
991 st1 {v6.16b}, [x25]
992 b 8b
993 4: cbz x25, 8b
994 st1 {v3.16b}, [x25]
995 b 8b
996 5: cbz x25, 8b
997 st1 {v7.16b}, [x25]
998 b 8b
999 6: cbz x25, 8b
1000 st1 {v2.16b}, [x25]
1001 b 8b
1002 7: cbz x25, 8b
1003 st1 {v5.16b}, [x25]
1004 b 8b
1005 ENDPROC(aesbs_ctr_encrypt)