1
2
3
4
5
6
7
8
9
10 #include <linux/linkage.h>
11
12 KEY .req x0
13 MESSAGE .req x1
14 MESSAGE_LEN .req x2
15 HASH .req x3
16
17 PASS0_SUMS .req v0
18 PASS1_SUMS .req v1
19 PASS2_SUMS .req v2
20 PASS3_SUMS .req v3
21 K0 .req v4
22 K1 .req v5
23 K2 .req v6
24 K3 .req v7
25 T0 .req v8
26 T1 .req v9
27 T2 .req v10
28 T3 .req v11
29 T4 .req v12
30 T5 .req v13
31 T6 .req v14
32 T7 .req v15
33
34 .macro _nh_stride k0, k1, k2, k3
35
36
37 ld1 {T3.16b}, [MESSAGE], #16
38
39
40 ld1 {\k3\().4s}, [KEY], #16
41
42
43 add T0.4s, T3.4s, \k0\().4s
44 add T1.4s, T3.4s, \k1\().4s
45 add T2.4s, T3.4s, \k2\().4s
46 add T3.4s, T3.4s, \k3\().4s
47
48
49 mov T4.d[0], T0.d[1]
50 mov T5.d[0], T1.d[1]
51 mov T6.d[0], T2.d[1]
52 mov T7.d[0], T3.d[1]
53 umlal PASS0_SUMS.2d, T0.2s, T4.2s
54 umlal PASS1_SUMS.2d, T1.2s, T5.2s
55 umlal PASS2_SUMS.2d, T2.2s, T6.2s
56 umlal PASS3_SUMS.2d, T3.2s, T7.2s
57 .endm
58
59
60
61
62
63
64
65 ENTRY(nh_neon)
66
67 ld1 {K0.4s,K1.4s}, [KEY], #32
68 movi PASS0_SUMS.2d, #0
69 movi PASS1_SUMS.2d, #0
70 ld1 {K2.4s}, [KEY], #16
71 movi PASS2_SUMS.2d, #0
72 movi PASS3_SUMS.2d, #0
73
74 subs MESSAGE_LEN, MESSAGE_LEN, #64
75 blt .Lloop4_done
76 .Lloop4:
77 _nh_stride K0, K1, K2, K3
78 _nh_stride K1, K2, K3, K0
79 _nh_stride K2, K3, K0, K1
80 _nh_stride K3, K0, K1, K2
81 subs MESSAGE_LEN, MESSAGE_LEN, #64
82 bge .Lloop4
83
84 .Lloop4_done:
85 ands MESSAGE_LEN, MESSAGE_LEN, #63
86 beq .Ldone
87 _nh_stride K0, K1, K2, K3
88
89 subs MESSAGE_LEN, MESSAGE_LEN, #16
90 beq .Ldone
91 _nh_stride K1, K2, K3, K0
92
93 subs MESSAGE_LEN, MESSAGE_LEN, #16
94 beq .Ldone
95 _nh_stride K2, K3, K0, K1
96
97 .Ldone:
98
99 addp T0.2d, PASS0_SUMS.2d, PASS1_SUMS.2d
100 addp T1.2d, PASS2_SUMS.2d, PASS3_SUMS.2d
101 st1 {T0.16b,T1.16b}, [HASH]
102 ret
103 ENDPROC(nh_neon)