1
2
3
4
5
6
7
8
9
10 .text
11 .align 4
12
13 #ifndef MAX_STRIDE
14 #define MAX_STRIDE 4
15 #endif
16
17 #if MAX_STRIDE == 4
18 #define ST4(x...) x
19 #define ST5(x...)
20 #else
21 #define ST4(x...)
22 #define ST5(x...) x
23 #endif
24
25 aes_encrypt_block4x:
26 encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
27 ret
28 ENDPROC(aes_encrypt_block4x)
29
30 aes_decrypt_block4x:
31 decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
32 ret
33 ENDPROC(aes_decrypt_block4x)
34
35 #if MAX_STRIDE == 5
36 aes_encrypt_block5x:
37 encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
38 ret
39 ENDPROC(aes_encrypt_block5x)
40
41 aes_decrypt_block5x:
42 decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
43 ret
44 ENDPROC(aes_decrypt_block5x)
45 #endif
46
47
48
49
50
51
52
53
54 AES_ENTRY(aes_ecb_encrypt)
55 stp x29, x30, [sp, #-16]!
56 mov x29, sp
57
58 enc_prepare w3, x2, x5
59
60 .LecbencloopNx:
61 subs w4, w4, #MAX_STRIDE
62 bmi .Lecbenc1x
63 ld1 {v0.16b-v3.16b}, [x1], #64
64 ST4( bl aes_encrypt_block4x )
65 ST5( ld1 {v4.16b}, [x1], #16 )
66 ST5( bl aes_encrypt_block5x )
67 st1 {v0.16b-v3.16b}, [x0], #64
68 ST5( st1 {v4.16b}, [x0], #16 )
69 b .LecbencloopNx
70 .Lecbenc1x:
71 adds w4, w4, #MAX_STRIDE
72 beq .Lecbencout
73 .Lecbencloop:
74 ld1 {v0.16b}, [x1], #16
75 encrypt_block v0, w3, x2, x5, w6
76 st1 {v0.16b}, [x0], #16
77 subs w4, w4, #1
78 bne .Lecbencloop
79 .Lecbencout:
80 ldp x29, x30, [sp], #16
81 ret
82 AES_ENDPROC(aes_ecb_encrypt)
83
84
85 AES_ENTRY(aes_ecb_decrypt)
86 stp x29, x30, [sp, #-16]!
87 mov x29, sp
88
89 dec_prepare w3, x2, x5
90
91 .LecbdecloopNx:
92 subs w4, w4, #MAX_STRIDE
93 bmi .Lecbdec1x
94 ld1 {v0.16b-v3.16b}, [x1], #64
95 ST4( bl aes_decrypt_block4x )
96 ST5( ld1 {v4.16b}, [x1], #16 )
97 ST5( bl aes_decrypt_block5x )
98 st1 {v0.16b-v3.16b}, [x0], #64
99 ST5( st1 {v4.16b}, [x0], #16 )
100 b .LecbdecloopNx
101 .Lecbdec1x:
102 adds w4, w4, #MAX_STRIDE
103 beq .Lecbdecout
104 .Lecbdecloop:
105 ld1 {v0.16b}, [x1], #16
106 decrypt_block v0, w3, x2, x5, w6
107 st1 {v0.16b}, [x0], #16
108 subs w4, w4, #1
109 bne .Lecbdecloop
110 .Lecbdecout:
111 ldp x29, x30, [sp], #16
112 ret
113 AES_ENDPROC(aes_ecb_decrypt)
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129 AES_ENTRY(aes_essiv_cbc_encrypt)
130 ld1 {v4.16b}, [x5]
131
132 mov w8, #14
133 enc_prepare w8, x6, x7
134 encrypt_block v4, w8, x6, x7, w9
135 enc_switch_key w3, x2, x6
136 b .Lcbcencloop4x
137
138 AES_ENTRY(aes_cbc_encrypt)
139 ld1 {v4.16b}, [x5]
140 enc_prepare w3, x2, x6
141
142 .Lcbcencloop4x:
143 subs w4, w4, #4
144 bmi .Lcbcenc1x
145 ld1 {v0.16b-v3.16b}, [x1], #64
146 eor v0.16b, v0.16b, v4.16b
147 encrypt_block v0, w3, x2, x6, w7
148 eor v1.16b, v1.16b, v0.16b
149 encrypt_block v1, w3, x2, x6, w7
150 eor v2.16b, v2.16b, v1.16b
151 encrypt_block v2, w3, x2, x6, w7
152 eor v3.16b, v3.16b, v2.16b
153 encrypt_block v3, w3, x2, x6, w7
154 st1 {v0.16b-v3.16b}, [x0], #64
155 mov v4.16b, v3.16b
156 b .Lcbcencloop4x
157 .Lcbcenc1x:
158 adds w4, w4, #4
159 beq .Lcbcencout
160 .Lcbcencloop:
161 ld1 {v0.16b}, [x1], #16
162 eor v4.16b, v4.16b, v0.16b
163 encrypt_block v4, w3, x2, x6, w7
164 st1 {v4.16b}, [x0], #16
165 subs w4, w4, #1
166 bne .Lcbcencloop
167 .Lcbcencout:
168 st1 {v4.16b}, [x5]
169 ret
170 AES_ENDPROC(aes_cbc_encrypt)
171 AES_ENDPROC(aes_essiv_cbc_encrypt)
172
173 AES_ENTRY(aes_essiv_cbc_decrypt)
174 stp x29, x30, [sp, #-16]!
175 mov x29, sp
176
177 ld1 {cbciv.16b}, [x5]
178
179 mov w8, #14
180 enc_prepare w8, x6, x7
181 encrypt_block cbciv, w8, x6, x7, w9
182 b .Lessivcbcdecstart
183
184 AES_ENTRY(aes_cbc_decrypt)
185 stp x29, x30, [sp, #-16]!
186 mov x29, sp
187
188 ld1 {cbciv.16b}, [x5]
189 .Lessivcbcdecstart:
190 dec_prepare w3, x2, x6
191
192 .LcbcdecloopNx:
193 subs w4, w4, #MAX_STRIDE
194 bmi .Lcbcdec1x
195 ld1 {v0.16b-v3.16b}, [x1], #64
196 #if MAX_STRIDE == 5
197 ld1 {v4.16b}, [x1], #16
198 mov v5.16b, v0.16b
199 mov v6.16b, v1.16b
200 mov v7.16b, v2.16b
201 bl aes_decrypt_block5x
202 sub x1, x1, #32
203 eor v0.16b, v0.16b, cbciv.16b
204 eor v1.16b, v1.16b, v5.16b
205 ld1 {v5.16b}, [x1], #16
206 ld1 {cbciv.16b}, [x1], #16
207 eor v2.16b, v2.16b, v6.16b
208 eor v3.16b, v3.16b, v7.16b
209 eor v4.16b, v4.16b, v5.16b
210 #else
211 mov v4.16b, v0.16b
212 mov v5.16b, v1.16b
213 mov v6.16b, v2.16b
214 bl aes_decrypt_block4x
215 sub x1, x1, #16
216 eor v0.16b, v0.16b, cbciv.16b
217 eor v1.16b, v1.16b, v4.16b
218 ld1 {cbciv.16b}, [x1], #16
219 eor v2.16b, v2.16b, v5.16b
220 eor v3.16b, v3.16b, v6.16b
221 #endif
222 st1 {v0.16b-v3.16b}, [x0], #64
223 ST5( st1 {v4.16b}, [x0], #16 )
224 b .LcbcdecloopNx
225 .Lcbcdec1x:
226 adds w4, w4, #MAX_STRIDE
227 beq .Lcbcdecout
228 .Lcbcdecloop:
229 ld1 {v1.16b}, [x1], #16
230 mov v0.16b, v1.16b
231 decrypt_block v0, w3, x2, x6, w7
232 eor v0.16b, v0.16b, cbciv.16b
233 mov cbciv.16b, v1.16b
234 st1 {v0.16b}, [x0], #16
235 subs w4, w4, #1
236 bne .Lcbcdecloop
237 .Lcbcdecout:
238 st1 {cbciv.16b}, [x5]
239 ldp x29, x30, [sp], #16
240 ret
241 AES_ENDPROC(aes_cbc_decrypt)
242 AES_ENDPROC(aes_essiv_cbc_decrypt)
243
244
245
246
247
248
249
250
251
252 AES_ENTRY(aes_cbc_cts_encrypt)
253 adr_l x8, .Lcts_permute_table
254 sub x4, x4, #16
255 add x9, x8, #32
256 add x8, x8, x4
257 sub x9, x9, x4
258 ld1 {v3.16b}, [x8]
259 ld1 {v4.16b}, [x9]
260
261 ld1 {v0.16b}, [x1], x4
262 ld1 {v1.16b}, [x1]
263
264 ld1 {v5.16b}, [x5]
265 enc_prepare w3, x2, x6
266
267 eor v0.16b, v0.16b, v5.16b
268 tbl v1.16b, {v1.16b}, v4.16b
269 encrypt_block v0, w3, x2, x6, w7
270
271 eor v1.16b, v1.16b, v0.16b
272 tbl v0.16b, {v0.16b}, v3.16b
273 encrypt_block v1, w3, x2, x6, w7
274
275 add x4, x0, x4
276 st1 {v0.16b}, [x4]
277 st1 {v1.16b}, [x0]
278 ret
279 AES_ENDPROC(aes_cbc_cts_encrypt)
280
281 AES_ENTRY(aes_cbc_cts_decrypt)
282 adr_l x8, .Lcts_permute_table
283 sub x4, x4, #16
284 add x9, x8, #32
285 add x8, x8, x4
286 sub x9, x9, x4
287 ld1 {v3.16b}, [x8]
288 ld1 {v4.16b}, [x9]
289
290 ld1 {v0.16b}, [x1], x4
291 ld1 {v1.16b}, [x1]
292
293 ld1 {v5.16b}, [x5]
294 dec_prepare w3, x2, x6
295
296 decrypt_block v0, w3, x2, x6, w7
297 tbl v2.16b, {v0.16b}, v3.16b
298 eor v2.16b, v2.16b, v1.16b
299
300 tbx v0.16b, {v1.16b}, v4.16b
301 decrypt_block v0, w3, x2, x6, w7
302 eor v0.16b, v0.16b, v5.16b
303
304 add x4, x0, x4
305 st1 {v2.16b}, [x4]
306 st1 {v0.16b}, [x0]
307 ret
308 AES_ENDPROC(aes_cbc_cts_decrypt)
309
310 .section ".rodata", "a"
311 .align 6
312 .Lcts_permute_table:
313 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
314 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
315 .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
316 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
317 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
318 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
319 .previous
320
321
322
323
324
325
326
327 AES_ENTRY(aes_ctr_encrypt)
328 stp x29, x30, [sp, #-16]!
329 mov x29, sp
330
331 enc_prepare w3, x2, x6
332 ld1 {vctr.16b}, [x5]
333
334 umov x6, vctr.d[1]
335 rev x6, x6
336 cmn w6, w4
337 bcs .Lctrloop
338 .LctrloopNx:
339 subs w4, w4, #MAX_STRIDE
340 bmi .Lctr1x
341 add w7, w6, #1
342 mov v0.16b, vctr.16b
343 add w8, w6, #2
344 mov v1.16b, vctr.16b
345 add w9, w6, #3
346 mov v2.16b, vctr.16b
347 add w9, w6, #3
348 rev w7, w7
349 mov v3.16b, vctr.16b
350 rev w8, w8
351 ST5( mov v4.16b, vctr.16b )
352 mov v1.s[3], w7
353 rev w9, w9
354 ST5( add w10, w6, #4 )
355 mov v2.s[3], w8
356 ST5( rev w10, w10 )
357 mov v3.s[3], w9
358 ST5( mov v4.s[3], w10 )
359 ld1 {v5.16b-v7.16b}, [x1], #48
360 ST4( bl aes_encrypt_block4x )
361 ST5( bl aes_encrypt_block5x )
362 eor v0.16b, v5.16b, v0.16b
363 ST4( ld1 {v5.16b}, [x1], #16 )
364 eor v1.16b, v6.16b, v1.16b
365 ST5( ld1 {v5.16b-v6.16b}, [x1], #32 )
366 eor v2.16b, v7.16b, v2.16b
367 eor v3.16b, v5.16b, v3.16b
368 ST5( eor v4.16b, v6.16b, v4.16b )
369 st1 {v0.16b-v3.16b}, [x0], #64
370 ST5( st1 {v4.16b}, [x0], #16 )
371 add x6, x6, #MAX_STRIDE
372 rev x7, x6
373 ins vctr.d[1], x7
374 cbz w4, .Lctrout
375 b .LctrloopNx
376 .Lctr1x:
377 adds w4, w4, #MAX_STRIDE
378 beq .Lctrout
379 .Lctrloop:
380 mov v0.16b, vctr.16b
381 encrypt_block v0, w3, x2, x8, w7
382
383 adds x6, x6, #1
384 rev x7, x6
385 ins vctr.d[1], x7
386 bcs .Lctrcarry
387
388 .Lctrcarrydone:
389 subs w4, w4, #1
390 bmi .Lctrtailblock
391 ld1 {v3.16b}, [x1], #16
392 eor v3.16b, v0.16b, v3.16b
393 st1 {v3.16b}, [x0], #16
394 bne .Lctrloop
395
396 .Lctrout:
397 st1 {vctr.16b}, [x5]
398 ldp x29, x30, [sp], #16
399 ret
400
401 .Lctrtailblock:
402 st1 {v0.16b}, [x0]
403 b .Lctrout
404
405 .Lctrcarry:
406 umov x7, vctr.d[0]
407 rev x7, x7
408 add x7, x7, #1
409 rev x7, x7
410 ins vctr.d[0], x7
411 b .Lctrcarrydone
412 AES_ENDPROC(aes_ctr_encrypt)
413
414
415
416
417
418
419
420
421
422 .macro next_tweak, out, in, tmp
423 sshr \tmp\().2d, \in\().2d, #63
424 and \tmp\().16b, \tmp\().16b, xtsmask.16b
425 add \out\().2d, \in\().2d, \in\().2d
426 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
427 eor \out\().16b, \out\().16b, \tmp\().16b
428 .endm
429
430 .macro xts_load_mask, tmp
431 movi xtsmask.2s, #0x1
432 movi \tmp\().2s, #0x87
433 uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s
434 .endm
435
436 AES_ENTRY(aes_xts_encrypt)
437 stp x29, x30, [sp, #-16]!
438 mov x29, sp
439
440 ld1 {v4.16b}, [x6]
441 xts_load_mask v8
442 cbz w7, .Lxtsencnotfirst
443
444 enc_prepare w3, x5, x8
445 xts_cts_skip_tw w7, .LxtsencNx
446 encrypt_block v4, w3, x5, x8, w7
447 enc_switch_key w3, x2, x8
448 b .LxtsencNx
449
450 .Lxtsencnotfirst:
451 enc_prepare w3, x2, x8
452 .LxtsencloopNx:
453 next_tweak v4, v4, v8
454 .LxtsencNx:
455 subs w4, w4, #64
456 bmi .Lxtsenc1x
457 ld1 {v0.16b-v3.16b}, [x1], #64
458 next_tweak v5, v4, v8
459 eor v0.16b, v0.16b, v4.16b
460 next_tweak v6, v5, v8
461 eor v1.16b, v1.16b, v5.16b
462 eor v2.16b, v2.16b, v6.16b
463 next_tweak v7, v6, v8
464 eor v3.16b, v3.16b, v7.16b
465 bl aes_encrypt_block4x
466 eor v3.16b, v3.16b, v7.16b
467 eor v0.16b, v0.16b, v4.16b
468 eor v1.16b, v1.16b, v5.16b
469 eor v2.16b, v2.16b, v6.16b
470 st1 {v0.16b-v3.16b}, [x0], #64
471 mov v4.16b, v7.16b
472 cbz w4, .Lxtsencret
473 xts_reload_mask v8
474 b .LxtsencloopNx
475 .Lxtsenc1x:
476 adds w4, w4, #64
477 beq .Lxtsencout
478 subs w4, w4, #16
479 bmi .LxtsencctsNx
480 .Lxtsencloop:
481 ld1 {v0.16b}, [x1], #16
482 .Lxtsencctsout:
483 eor v0.16b, v0.16b, v4.16b
484 encrypt_block v0, w3, x2, x8, w7
485 eor v0.16b, v0.16b, v4.16b
486 cbz w4, .Lxtsencout
487 subs w4, w4, #16
488 next_tweak v4, v4, v8
489 bmi .Lxtsenccts
490 st1 {v0.16b}, [x0], #16
491 b .Lxtsencloop
492 .Lxtsencout:
493 st1 {v0.16b}, [x0]
494 .Lxtsencret:
495 st1 {v4.16b}, [x6]
496 ldp x29, x30, [sp], #16
497 ret
498
499 .LxtsencctsNx:
500 mov v0.16b, v3.16b
501 sub x0, x0, #16
502 .Lxtsenccts:
503 adr_l x8, .Lcts_permute_table
504
505 add x1, x1, w4, sxtw
506 add w4, w4, #16
507 add x9, x8, #32
508 add x8, x8, x4
509 sub x9, x9, x4
510 add x4, x0, x4
511
512 ld1 {v1.16b}, [x1]
513 ld1 {v2.16b}, [x8]
514 ld1 {v3.16b}, [x9]
515
516 tbl v2.16b, {v0.16b}, v2.16b
517 tbx v0.16b, {v1.16b}, v3.16b
518 st1 {v2.16b}, [x4]
519 mov w4, wzr
520 b .Lxtsencctsout
521 AES_ENDPROC(aes_xts_encrypt)
522
523 AES_ENTRY(aes_xts_decrypt)
524 stp x29, x30, [sp, #-16]!
525 mov x29, sp
526
527
528 sub w8, w4, #0x10
529 tst w4, #0xf
530 csel w4, w4, w8, eq
531
532 ld1 {v4.16b}, [x6]
533 xts_load_mask v8
534 xts_cts_skip_tw w7, .Lxtsdecskiptw
535 cbz w7, .Lxtsdecnotfirst
536
537 enc_prepare w3, x5, x8
538 encrypt_block v4, w3, x5, x8, w7
539 .Lxtsdecskiptw:
540 dec_prepare w3, x2, x8
541 b .LxtsdecNx
542
543 .Lxtsdecnotfirst:
544 dec_prepare w3, x2, x8
545 .LxtsdecloopNx:
546 next_tweak v4, v4, v8
547 .LxtsdecNx:
548 subs w4, w4, #64
549 bmi .Lxtsdec1x
550 ld1 {v0.16b-v3.16b}, [x1], #64
551 next_tweak v5, v4, v8
552 eor v0.16b, v0.16b, v4.16b
553 next_tweak v6, v5, v8
554 eor v1.16b, v1.16b, v5.16b
555 eor v2.16b, v2.16b, v6.16b
556 next_tweak v7, v6, v8
557 eor v3.16b, v3.16b, v7.16b
558 bl aes_decrypt_block4x
559 eor v3.16b, v3.16b, v7.16b
560 eor v0.16b, v0.16b, v4.16b
561 eor v1.16b, v1.16b, v5.16b
562 eor v2.16b, v2.16b, v6.16b
563 st1 {v0.16b-v3.16b}, [x0], #64
564 mov v4.16b, v7.16b
565 cbz w4, .Lxtsdecout
566 xts_reload_mask v8
567 b .LxtsdecloopNx
568 .Lxtsdec1x:
569 adds w4, w4, #64
570 beq .Lxtsdecout
571 subs w4, w4, #16
572 .Lxtsdecloop:
573 ld1 {v0.16b}, [x1], #16
574 bmi .Lxtsdeccts
575 .Lxtsdecctsout:
576 eor v0.16b, v0.16b, v4.16b
577 decrypt_block v0, w3, x2, x8, w7
578 eor v0.16b, v0.16b, v4.16b
579 st1 {v0.16b}, [x0], #16
580 cbz w4, .Lxtsdecout
581 subs w4, w4, #16
582 next_tweak v4, v4, v8
583 b .Lxtsdecloop
584 .Lxtsdecout:
585 st1 {v4.16b}, [x6]
586 ldp x29, x30, [sp], #16
587 ret
588
589 .Lxtsdeccts:
590 adr_l x8, .Lcts_permute_table
591
592 add x1, x1, w4, sxtw
593 add w4, w4, #16
594 add x9, x8, #32
595 add x8, x8, x4
596 sub x9, x9, x4
597 add x4, x0, x4
598
599 next_tweak v5, v4, v8
600
601 ld1 {v1.16b}, [x1]
602 ld1 {v2.16b}, [x8]
603 ld1 {v3.16b}, [x9]
604
605 eor v0.16b, v0.16b, v5.16b
606 decrypt_block v0, w3, x2, x8, w7
607 eor v0.16b, v0.16b, v5.16b
608
609 tbl v2.16b, {v0.16b}, v2.16b
610 tbx v0.16b, {v1.16b}, v3.16b
611
612 st1 {v2.16b}, [x4]
613 mov w4, wzr
614 b .Lxtsdecctsout
615 AES_ENDPROC(aes_xts_decrypt)
616
617
618
619
620
621 AES_ENTRY(aes_mac_update)
622 frame_push 6
623
624 mov x19, x0
625 mov x20, x1
626 mov x21, x2
627 mov x22, x3
628 mov x23, x4
629 mov x24, x6
630
631 ld1 {v0.16b}, [x23]
632 enc_prepare w2, x1, x7
633 cbz w5, .Lmacloop4x
634
635 encrypt_block v0, w2, x1, x7, w8
636
637 .Lmacloop4x:
638 subs w22, w22, #4
639 bmi .Lmac1x
640 ld1 {v1.16b-v4.16b}, [x19], #64
641 eor v0.16b, v0.16b, v1.16b
642 encrypt_block v0, w21, x20, x7, w8
643 eor v0.16b, v0.16b, v2.16b
644 encrypt_block v0, w21, x20, x7, w8
645 eor v0.16b, v0.16b, v3.16b
646 encrypt_block v0, w21, x20, x7, w8
647 eor v0.16b, v0.16b, v4.16b
648 cmp w22, wzr
649 csinv x5, x24, xzr, eq
650 cbz w5, .Lmacout
651 encrypt_block v0, w21, x20, x7, w8
652 st1 {v0.16b}, [x23]
653 cond_yield_neon .Lmacrestart
654 b .Lmacloop4x
655 .Lmac1x:
656 add w22, w22, #4
657 .Lmacloop:
658 cbz w22, .Lmacout
659 ld1 {v1.16b}, [x19], #16
660 eor v0.16b, v0.16b, v1.16b
661
662 subs w22, w22, #1
663 csinv x5, x24, xzr, eq
664 cbz w5, .Lmacout
665
666 .Lmacenc:
667 encrypt_block v0, w21, x20, x7, w8
668 b .Lmacloop
669
670 .Lmacout:
671 st1 {v0.16b}, [x23]
672 frame_pop
673 ret
674
675 .Lmacrestart:
676 ld1 {v0.16b}, [x23]
677 enc_prepare w21, x20, x0
678 b .Lmacloop4x
679 AES_ENDPROC(aes_mac_update)