1
2
3
4
5
6
7
8 #include <linux/linkage.h>
9 #include <asm/assembler.h>
10
11 SHASH .req v0
12 SHASH2 .req v1
13 T1 .req v2
14 T2 .req v3
15 MASK .req v4
16 XL .req v5
17 XM .req v6
18 XH .req v7
19 IN1 .req v7
20
21 k00_16 .req v8
22 k32_48 .req v9
23
24 t3 .req v10
25 t4 .req v11
26 t5 .req v12
27 t6 .req v13
28 t7 .req v14
29 t8 .req v15
30 t9 .req v16
31
32 perm1 .req v17
33 perm2 .req v18
34 perm3 .req v19
35
36 sh1 .req v20
37 sh2 .req v21
38 sh3 .req v22
39 sh4 .req v23
40
41 ss1 .req v24
42 ss2 .req v25
43 ss3 .req v26
44 ss4 .req v27
45
46 XL2 .req v8
47 XM2 .req v9
48 XH2 .req v10
49 XL3 .req v11
50 XM3 .req v12
51 XH3 .req v13
52 TT3 .req v14
53 TT4 .req v15
54 HH .req v16
55 HH3 .req v17
56 HH4 .req v18
57 HH34 .req v19
58
59 .text
60 .arch armv8-a+crypto
61
62 .macro __pmull_p64, rd, rn, rm
63 pmull \rd\().1q, \rn\().1d, \rm\().1d
64 .endm
65
66 .macro __pmull2_p64, rd, rn, rm
67 pmull2 \rd\().1q, \rn\().2d, \rm\().2d
68 .endm
69
70 .macro __pmull_p8, rq, ad, bd
71 ext t3.8b, \ad\().8b, \ad\().8b, #1
72 ext t5.8b, \ad\().8b, \ad\().8b, #2
73 ext t7.8b, \ad\().8b, \ad\().8b, #3
74
75 __pmull_p8_\bd \rq, \ad
76 .endm
77
78 .macro __pmull2_p8, rq, ad, bd
79 tbl t3.16b, {\ad\().16b}, perm1.16b
80 tbl t5.16b, {\ad\().16b}, perm2.16b
81 tbl t7.16b, {\ad\().16b}, perm3.16b
82
83 __pmull2_p8_\bd \rq, \ad
84 .endm
85
86 .macro __pmull_p8_SHASH, rq, ad
87 __pmull_p8_tail \rq, \ad\().8b, SHASH.8b, 8b,, sh1, sh2, sh3, sh4
88 .endm
89
90 .macro __pmull_p8_SHASH2, rq, ad
91 __pmull_p8_tail \rq, \ad\().8b, SHASH2.8b, 8b,, ss1, ss2, ss3, ss4
92 .endm
93
94 .macro __pmull2_p8_SHASH, rq, ad
95 __pmull_p8_tail \rq, \ad\().16b, SHASH.16b, 16b, 2, sh1, sh2, sh3, sh4
96 .endm
97
98 .macro __pmull_p8_tail, rq, ad, bd, nb, t, b1, b2, b3, b4
99 pmull\t t3.8h, t3.\nb, \bd
100 pmull\t t4.8h, \ad, \b1\().\nb
101 pmull\t t5.8h, t5.\nb, \bd
102 pmull\t t6.8h, \ad, \b2\().\nb
103 pmull\t t7.8h, t7.\nb, \bd
104 pmull\t t8.8h, \ad, \b3\().\nb
105 pmull\t t9.8h, \ad, \b4\().\nb
106 pmull\t \rq\().8h, \ad, \bd
107
108 eor t3.16b, t3.16b, t4.16b
109 eor t5.16b, t5.16b, t6.16b
110 eor t7.16b, t7.16b, t8.16b
111
112 uzp1 t4.2d, t3.2d, t5.2d
113 uzp2 t3.2d, t3.2d, t5.2d
114 uzp1 t6.2d, t7.2d, t9.2d
115 uzp2 t7.2d, t7.2d, t9.2d
116
117
118
119 eor t4.16b, t4.16b, t3.16b
120 and t3.16b, t3.16b, k32_48.16b
121
122
123
124 eor t6.16b, t6.16b, t7.16b
125 and t7.16b, t7.16b, k00_16.16b
126
127 eor t4.16b, t4.16b, t3.16b
128 eor t6.16b, t6.16b, t7.16b
129
130 zip2 t5.2d, t4.2d, t3.2d
131 zip1 t3.2d, t4.2d, t3.2d
132 zip2 t9.2d, t6.2d, t7.2d
133 zip1 t7.2d, t6.2d, t7.2d
134
135 ext t3.16b, t3.16b, t3.16b, #15
136 ext t5.16b, t5.16b, t5.16b, #14
137 ext t7.16b, t7.16b, t7.16b, #13
138 ext t9.16b, t9.16b, t9.16b, #12
139
140 eor t3.16b, t3.16b, t5.16b
141 eor t7.16b, t7.16b, t9.16b
142 eor \rq\().16b, \rq\().16b, t3.16b
143 eor \rq\().16b, \rq\().16b, t7.16b
144 .endm
145
146 .macro __pmull_pre_p64
147 add x8, x3, #16
148 ld1 {HH.2d-HH4.2d}, [x8]
149
150 trn1 SHASH2.2d, SHASH.2d, HH.2d
151 trn2 T1.2d, SHASH.2d, HH.2d
152 eor SHASH2.16b, SHASH2.16b, T1.16b
153
154 trn1 HH34.2d, HH3.2d, HH4.2d
155 trn2 T1.2d, HH3.2d, HH4.2d
156 eor HH34.16b, HH34.16b, T1.16b
157
158 movi MASK.16b, #0xe1
159 shl MASK.2d, MASK.2d, #57
160 .endm
161
162 .macro __pmull_pre_p8
163 ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
164 eor SHASH2.16b, SHASH2.16b, SHASH.16b
165
166
167
168 movi k32_48.2d, #0xffffffff
169 mov k32_48.h[2], k32_48.h[0]
170 ushr k00_16.2d, k32_48.2d, #32
171
172
173 mov_q x5, 0x080f0e0d0c0b0a09
174 movi T1.8b, #8
175 dup perm1.2d, x5
176 eor perm1.16b, perm1.16b, T1.16b
177 ushr perm2.2d, perm1.2d, #8
178 ushr perm3.2d, perm1.2d, #16
179 ushr T1.2d, perm1.2d, #24
180 sli perm2.2d, perm1.2d, #56
181 sli perm3.2d, perm1.2d, #48
182 sli T1.2d, perm1.2d, #40
183
184
185 tbl sh1.16b, {SHASH.16b}, perm1.16b
186 tbl sh2.16b, {SHASH.16b}, perm2.16b
187 tbl sh3.16b, {SHASH.16b}, perm3.16b
188 tbl sh4.16b, {SHASH.16b}, T1.16b
189 ext ss1.8b, SHASH2.8b, SHASH2.8b, #1
190 ext ss2.8b, SHASH2.8b, SHASH2.8b, #2
191 ext ss3.8b, SHASH2.8b, SHASH2.8b, #3
192 ext ss4.8b, SHASH2.8b, SHASH2.8b, #4
193 .endm
194
195
196
197
198
199 .macro __pmull_reduce_p64
200 pmull T2.1q, XL.1d, MASK.1d
201 eor XM.16b, XM.16b, T1.16b
202
203 mov XH.d[0], XM.d[1]
204 mov XM.d[1], XL.d[0]
205
206 eor XL.16b, XM.16b, T2.16b
207 ext T2.16b, XL.16b, XL.16b, #8
208 pmull XL.1q, XL.1d, MASK.1d
209 .endm
210
211
212
213
214
215 .macro __pmull_reduce_p8
216 eor XM.16b, XM.16b, T1.16b
217
218 mov XL.d[1], XM.d[0]
219 mov XH.d[0], XM.d[1]
220
221 shl T1.2d, XL.2d, #57
222 shl T2.2d, XL.2d, #62
223 eor T2.16b, T2.16b, T1.16b
224 shl T1.2d, XL.2d, #63
225 eor T2.16b, T2.16b, T1.16b
226 ext T1.16b, XL.16b, XH.16b, #8
227 eor T2.16b, T2.16b, T1.16b
228
229 mov XL.d[1], T2.d[0]
230 mov XH.d[0], T2.d[1]
231
232 ushr T2.2d, XL.2d, #1
233 eor XH.16b, XH.16b, XL.16b
234 eor XL.16b, XL.16b, T2.16b
235 ushr T2.2d, T2.2d, #6
236 ushr XL.2d, XL.2d, #1
237 .endm
238
239 .macro __pmull_ghash, pn
240 ld1 {SHASH.2d}, [x3]
241 ld1 {XL.2d}, [x1]
242
243 __pmull_pre_\pn
244
245
246 cbz x4, 0f
247 ld1 {T1.2d}, [x4]
248 mov x4, xzr
249 b 3f
250
251 0: .ifc \pn, p64
252 tbnz w0, #0, 2f
253 tbnz w0, #1, 2f
254
255 1: ld1 {XM3.16b-TT4.16b}, [x2], #64
256
257 sub w0, w0, #4
258
259 rev64 T1.16b, XM3.16b
260 rev64 T2.16b, XH3.16b
261 rev64 TT4.16b, TT4.16b
262 rev64 TT3.16b, TT3.16b
263
264 ext IN1.16b, TT4.16b, TT4.16b, #8
265 ext XL3.16b, TT3.16b, TT3.16b, #8
266
267 eor TT4.16b, TT4.16b, IN1.16b
268 pmull2 XH2.1q, SHASH.2d, IN1.2d
269 pmull XL2.1q, SHASH.1d, IN1.1d
270 pmull XM2.1q, SHASH2.1d, TT4.1d
271
272 eor TT3.16b, TT3.16b, XL3.16b
273 pmull2 XH3.1q, HH.2d, XL3.2d
274 pmull XL3.1q, HH.1d, XL3.1d
275 pmull2 XM3.1q, SHASH2.2d, TT3.2d
276
277 ext IN1.16b, T2.16b, T2.16b, #8
278 eor XL2.16b, XL2.16b, XL3.16b
279 eor XH2.16b, XH2.16b, XH3.16b
280 eor XM2.16b, XM2.16b, XM3.16b
281
282 eor T2.16b, T2.16b, IN1.16b
283 pmull2 XH3.1q, HH3.2d, IN1.2d
284 pmull XL3.1q, HH3.1d, IN1.1d
285 pmull XM3.1q, HH34.1d, T2.1d
286
287 eor XL2.16b, XL2.16b, XL3.16b
288 eor XH2.16b, XH2.16b, XH3.16b
289 eor XM2.16b, XM2.16b, XM3.16b
290
291 ext IN1.16b, T1.16b, T1.16b, #8
292 ext TT3.16b, XL.16b, XL.16b, #8
293 eor XL.16b, XL.16b, IN1.16b
294 eor T1.16b, T1.16b, TT3.16b
295
296 pmull2 XH.1q, HH4.2d, XL.2d
297 eor T1.16b, T1.16b, XL.16b
298 pmull XL.1q, HH4.1d, XL.1d
299 pmull2 XM.1q, HH34.2d, T1.2d
300
301 eor XL.16b, XL.16b, XL2.16b
302 eor XH.16b, XH.16b, XH2.16b
303 eor XM.16b, XM.16b, XM2.16b
304
305 eor T2.16b, XL.16b, XH.16b
306 ext T1.16b, XL.16b, XH.16b, #8
307 eor XM.16b, XM.16b, T2.16b
308
309 __pmull_reduce_p64
310
311 eor T2.16b, T2.16b, XH.16b
312 eor XL.16b, XL.16b, T2.16b
313
314 cbz w0, 5f
315 b 1b
316 .endif
317
318 2: ld1 {T1.2d}, [x2], #16
319 sub w0, w0, #1
320
321 3:
322 CPU_LE( rev64 T1.16b, T1.16b )
323
324 ext T2.16b, XL.16b, XL.16b, #8
325 ext IN1.16b, T1.16b, T1.16b, #8
326 eor T1.16b, T1.16b, T2.16b
327 eor XL.16b, XL.16b, IN1.16b
328
329 __pmull2_\pn XH, XL, SHASH
330 eor T1.16b, T1.16b, XL.16b
331 __pmull_\pn XL, XL, SHASH
332 __pmull_\pn XM, T1, SHASH2
333
334 4: eor T2.16b, XL.16b, XH.16b
335 ext T1.16b, XL.16b, XH.16b, #8
336 eor XM.16b, XM.16b, T2.16b
337
338 __pmull_reduce_\pn
339
340 eor T2.16b, T2.16b, XH.16b
341 eor XL.16b, XL.16b, T2.16b
342
343 cbnz w0, 0b
344
345 5: st1 {XL.2d}, [x1]
346 ret
347 .endm
348
349
350
351
352
353 ENTRY(pmull_ghash_update_p64)
354 __pmull_ghash p64
355 ENDPROC(pmull_ghash_update_p64)
356
357 ENTRY(pmull_ghash_update_p8)
358 __pmull_ghash p8
359 ENDPROC(pmull_ghash_update_p8)
360
361 KS0 .req v12
362 KS1 .req v13
363 INP0 .req v14
364 INP1 .req v15
365
366 .macro load_round_keys, rounds, rk
367 cmp \rounds, #12
368 blo 2222f
369 beq 1111f
370 ld1 {v17.4s-v18.4s}, [\rk], #32
371 1111: ld1 {v19.4s-v20.4s}, [\rk], #32
372 2222: ld1 {v21.4s-v24.4s}, [\rk], #64
373 ld1 {v25.4s-v28.4s}, [\rk], #64
374 ld1 {v29.4s-v31.4s}, [\rk]
375 .endm
376
377 .macro enc_round, state, key
378 aese \state\().16b, \key\().16b
379 aesmc \state\().16b, \state\().16b
380 .endm
381
382 .macro enc_block, state, rounds
383 cmp \rounds, #12
384 b.lo 2222f
385 b.eq 1111f
386 enc_round \state, v17
387 enc_round \state, v18
388 1111: enc_round \state, v19
389 enc_round \state, v20
390 2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29
391 enc_round \state, \key
392 .endr
393 aese \state\().16b, v30.16b
394 eor \state\().16b, \state\().16b, v31.16b
395 .endm
396
397 .macro pmull_gcm_do_crypt, enc
398 ld1 {SHASH.2d}, [x4], #16
399 ld1 {HH.2d}, [x4]
400 ld1 {XL.2d}, [x1]
401 ldr x8, [x5, #8]
402
403 movi MASK.16b, #0xe1
404 trn1 SHASH2.2d, SHASH.2d, HH.2d
405 trn2 T1.2d, SHASH.2d, HH.2d
406 CPU_LE( rev x8, x8 )
407 shl MASK.2d, MASK.2d, #57
408 eor SHASH2.16b, SHASH2.16b, T1.16b
409
410 .if \enc == 1
411 ldr x10, [sp]
412 ld1 {KS0.16b-KS1.16b}, [x10]
413 .endif
414
415 cbnz x6, 4f
416
417 0: ld1 {INP0.16b-INP1.16b}, [x3], #32
418
419 rev x9, x8
420 add x11, x8, #1
421 add x8, x8, #2
422
423 .if \enc == 1
424 eor INP0.16b, INP0.16b, KS0.16b
425 eor INP1.16b, INP1.16b, KS1.16b
426 .endif
427
428 ld1 {KS0.8b}, [x5]
429 rev x11, x11
430 sub w0, w0, #2
431 mov KS1.8b, KS0.8b
432 ins KS0.d[1], x9
433 ins KS1.d[1], x11
434
435 rev64 T1.16b, INP1.16b
436
437 cmp w7, #12
438 b.ge 2f
439
440 1: enc_round KS0, v21
441 ext IN1.16b, T1.16b, T1.16b, #8
442
443 enc_round KS1, v21
444 pmull2 XH2.1q, SHASH.2d, IN1.2d
445
446 enc_round KS0, v22
447 eor T1.16b, T1.16b, IN1.16b
448
449 enc_round KS1, v22
450 pmull XL2.1q, SHASH.1d, IN1.1d
451
452 enc_round KS0, v23
453 pmull XM2.1q, SHASH2.1d, T1.1d
454
455 enc_round KS1, v23
456 rev64 T1.16b, INP0.16b
457 ext T2.16b, XL.16b, XL.16b, #8
458
459 enc_round KS0, v24
460 ext IN1.16b, T1.16b, T1.16b, #8
461 eor T1.16b, T1.16b, T2.16b
462
463 enc_round KS1, v24
464 eor XL.16b, XL.16b, IN1.16b
465
466 enc_round KS0, v25
467 eor T1.16b, T1.16b, XL.16b
468
469 enc_round KS1, v25
470 pmull2 XH.1q, HH.2d, XL.2d
471
472 enc_round KS0, v26
473 pmull XL.1q, HH.1d, XL.1d
474
475 enc_round KS1, v26
476 pmull2 XM.1q, SHASH2.2d, T1.2d
477
478 enc_round KS0, v27
479 eor XL.16b, XL.16b, XL2.16b
480 eor XH.16b, XH.16b, XH2.16b
481
482 enc_round KS1, v27
483 eor XM.16b, XM.16b, XM2.16b
484 ext T1.16b, XL.16b, XH.16b, #8
485
486 enc_round KS0, v28
487 eor T2.16b, XL.16b, XH.16b
488 eor XM.16b, XM.16b, T1.16b
489
490 enc_round KS1, v28
491 eor XM.16b, XM.16b, T2.16b
492
493 enc_round KS0, v29
494 pmull T2.1q, XL.1d, MASK.1d
495
496 enc_round KS1, v29
497 mov XH.d[0], XM.d[1]
498 mov XM.d[1], XL.d[0]
499
500 aese KS0.16b, v30.16b
501 eor XL.16b, XM.16b, T2.16b
502
503 aese KS1.16b, v30.16b
504 ext T2.16b, XL.16b, XL.16b, #8
505
506 eor KS0.16b, KS0.16b, v31.16b
507 pmull XL.1q, XL.1d, MASK.1d
508 eor T2.16b, T2.16b, XH.16b
509
510 eor KS1.16b, KS1.16b, v31.16b
511 eor XL.16b, XL.16b, T2.16b
512
513 .if \enc == 0
514 eor INP0.16b, INP0.16b, KS0.16b
515 eor INP1.16b, INP1.16b, KS1.16b
516 .endif
517
518 st1 {INP0.16b-INP1.16b}, [x2], #32
519
520 cbnz w0, 0b
521
522 CPU_LE( rev x8, x8 )
523 st1 {XL.2d}, [x1]
524 str x8, [x5, #8]
525
526 .if \enc == 1
527 st1 {KS0.16b-KS1.16b}, [x10]
528 .endif
529
530 ret
531
532 2: b.eq 3f
533 enc_round KS0, v17
534 enc_round KS1, v17
535 enc_round KS0, v18
536 enc_round KS1, v18
537 3: enc_round KS0, v19
538 enc_round KS1, v19
539 enc_round KS0, v20
540 enc_round KS1, v20
541 b 1b
542
543 4: load_round_keys w7, x6
544 b 0b
545 .endm
546
547
548
549
550
551
552 ENTRY(pmull_gcm_encrypt)
553 pmull_gcm_do_crypt 1
554 ENDPROC(pmull_gcm_encrypt)
555
556
557
558
559
560
561 ENTRY(pmull_gcm_decrypt)
562 pmull_gcm_do_crypt 0
563 ENDPROC(pmull_gcm_decrypt)
564
565
566
567
568 ENTRY(pmull_gcm_encrypt_block)
569 cbz x2, 0f
570 load_round_keys w3, x2
571 0: ld1 {v0.16b}, [x1]
572 enc_block v0, w3
573 st1 {v0.16b}, [x0]
574 ret
575 ENDPROC(pmull_gcm_encrypt_block)