1
2
3
4
5
6
7
8 #include <linux/linkage.h>
9 #include <asm/assembler.h>
10
11 SHASH .req q0
12 T1 .req q1
13 XL .req q2
14 XM .req q3
15 XH .req q4
16 IN1 .req q4
17
18 SHASH_L .req d0
19 SHASH_H .req d1
20 T1_L .req d2
21 T1_H .req d3
22 XL_L .req d4
23 XL_H .req d5
24 XM_L .req d6
25 XM_H .req d7
26 XH_L .req d8
27
28 t0l .req d10
29 t0h .req d11
30 t1l .req d12
31 t1h .req d13
32 t2l .req d14
33 t2h .req d15
34 t3l .req d16
35 t3h .req d17
36 t4l .req d18
37 t4h .req d19
38
39 t0q .req q5
40 t1q .req q6
41 t2q .req q7
42 t3q .req q8
43 t4q .req q9
44 T2 .req q9
45
46 s1l .req d20
47 s1h .req d21
48 s2l .req d22
49 s2h .req d23
50 s3l .req d24
51 s3h .req d25
52 s4l .req d26
53 s4h .req d27
54
55 MASK .req d28
56 SHASH2_p8 .req d28
57
58 k16 .req d29
59 k32 .req d30
60 k48 .req d31
61 SHASH2_p64 .req d31
62
63 HH .req q10
64 HH3 .req q11
65 HH4 .req q12
66 HH34 .req q13
67
68 HH_L .req d20
69 HH_H .req d21
70 HH3_L .req d22
71 HH3_H .req d23
72 HH4_L .req d24
73 HH4_H .req d25
74 HH34_L .req d26
75 HH34_H .req d27
76 SHASH2_H .req d29
77
78 XL2 .req q5
79 XM2 .req q6
80 XH2 .req q7
81 T3 .req q8
82
83 XL2_L .req d10
84 XL2_H .req d11
85 XM2_L .req d12
86 XM2_H .req d13
87 T3_L .req d16
88 T3_H .req d17
89
90 .text
91 .fpu crypto-neon-fp-armv8
92
93 .macro __pmull_p64, rd, rn, rm, b1, b2, b3, b4
94 vmull.p64 \rd, \rn, \rm
95 .endm
96
97
98
99
100
101
102
103
104
105
106
107 .macro __pmull_p8, rq, ad, bd, b1=t4l, b2=t3l, b3=t4l, b4=t3l
108 vext.8 t0l, \ad, \ad, #1 @ A1
109 .ifc \b1, t4l
110 vext.8 t4l, \bd, \bd, #1 @ B1
111 .endif
112 vmull.p8 t0q, t0l, \bd @ F = A1*B
113 vext.8 t1l, \ad, \ad, #2 @ A2
114 vmull.p8 t4q, \ad, \b1 @ E = A*B1
115 .ifc \b2, t3l
116 vext.8 t3l, \bd, \bd, #2 @ B2
117 .endif
118 vmull.p8 t1q, t1l, \bd @ H = A2*B
119 vext.8 t2l, \ad, \ad, #3 @ A3
120 vmull.p8 t3q, \ad, \b2 @ G = A*B2
121 veor t0q, t0q, t4q @ L = E + F
122 .ifc \b3, t4l
123 vext.8 t4l, \bd, \bd, #3 @ B3
124 .endif
125 vmull.p8 t2q, t2l, \bd @ J = A3*B
126 veor t0l, t0l, t0h @ t0 = (L) (P0 + P1) << 8
127 veor t1q, t1q, t3q @ M = G + H
128 .ifc \b4, t3l
129 vext.8 t3l, \bd, \bd, #4 @ B4
130 .endif
131 vmull.p8 t4q, \ad, \b3 @ I = A*B3
132 veor t1l, t1l, t1h @ t1 = (M) (P2 + P3) << 16
133 vmull.p8 t3q, \ad, \b4 @ K = A*B4
134 vand t0h, t0h, k48
135 vand t1h, t1h, k32
136 veor t2q, t2q, t4q @ N = I + J
137 veor t0l, t0l, t0h
138 veor t1l, t1l, t1h
139 veor t2l, t2l, t2h @ t2 = (N) (P4 + P5) << 24
140 vand t2h, t2h, k16
141 veor t3l, t3l, t3h @ t3 = (K) (P6 + P7) << 32
142 vmov.i64 t3h, #0
143 vext.8 t0q, t0q, t0q, #15
144 veor t2l, t2l, t2h
145 vext.8 t1q, t1q, t1q, #14
146 vmull.p8 \rq, \ad, \bd @ D = A*B
147 vext.8 t2q, t2q, t2q, #13
148 vext.8 t3q, t3q, t3q, #12
149 veor t0q, t0q, t1q
150 veor t2q, t2q, t3q
151 veor \rq, \rq, t0q
152 veor \rq, \rq, t2q
153 .endm
154
155
156
157
158
159 .macro __pmull_reduce_p64
160 vmull.p64 T1, XL_L, MASK
161
162 veor XH_L, XH_L, XM_H
163 vext.8 T1, T1, T1, #8
164 veor XL_H, XL_H, XM_L
165 veor T1, T1, XL
166
167 vmull.p64 XL, T1_H, MASK
168 .endm
169
170
171
172
173
174 .macro __pmull_reduce_p8
175 veor XL_H, XL_H, XM_L
176 veor XH_L, XH_L, XM_H
177
178 vshl.i64 T1, XL, #57
179 vshl.i64 T2, XL, #62
180 veor T1, T1, T2
181 vshl.i64 T2, XL, #63
182 veor T1, T1, T2
183 veor XL_H, XL_H, T1_L
184 veor XH_L, XH_L, T1_H
185
186 vshr.u64 T1, XL, #1
187 veor XH, XH, XL
188 veor XL, XL, T1
189 vshr.u64 T1, T1, #6
190 vshr.u64 XL, XL, #1
191 .endm
192
193 .macro ghash_update, pn
194 vld1.64 {XL}, [r1]
195
196
197 ldr ip, [sp]
198 teq ip, #0
199 beq 0f
200 vld1.64 {T1}, [ip]
201 teq r0, #0
202 b 3f
203
204 0: .ifc \pn, p64
205 tst r0, #3
206 bne 2f
207
208 vld1.8 {XL2-XM2}, [r2]!
209 1: vld1.8 {T3-T2}, [r2]!
210 vrev64.8 XL2, XL2
211 vrev64.8 XM2, XM2
212
213 subs r0, r0, #4
214
215 vext.8 T1, XL2, XL2, #8
216 veor XL2_H, XL2_H, XL_L
217 veor XL, XL, T1
218
219 vrev64.8 T3, T3
220 vrev64.8 T1, T2
221
222 vmull.p64 XH, HH4_H, XL_H
223 veor XL2_H, XL2_H, XL_H
224 vmull.p64 XL, HH4_L, XL_L
225 vmull.p64 XM, HH34_H, XL2_H
226
227 vmull.p64 XH2, HH3_H, XM2_L
228 veor XM2_L, XM2_L, XM2_H
229 vmull.p64 XL2, HH3_L, XM2_H
230 vmull.p64 XM2, HH34_L, XM2_L
231
232 veor XH, XH, XH2
233 veor XL, XL, XL2
234 veor XM, XM, XM2
235
236 vmull.p64 XH2, HH_H, T3_L
237 veor T3_L, T3_L, T3_H
238 vmull.p64 XL2, HH_L, T3_H
239 vmull.p64 XM2, SHASH2_H, T3_L
240
241 veor XH, XH, XH2
242 veor XL, XL, XL2
243 veor XM, XM, XM2
244
245 vmull.p64 XH2, SHASH_H, T1_L
246 veor T1_L, T1_L, T1_H
247 vmull.p64 XL2, SHASH_L, T1_H
248 vmull.p64 XM2, SHASH2_p64, T1_L
249
250 veor XH, XH, XH2
251 veor XL, XL, XL2
252 veor XM, XM, XM2
253
254 beq 4f
255
256 vld1.8 {XL2-XM2}, [r2]!
257
258 veor T1, XL, XH
259 veor XM, XM, T1
260
261 __pmull_reduce_p64
262
263 veor T1, T1, XH
264 veor XL, XL, T1
265
266 b 1b
267 .endif
268
269 2: vld1.64 {T1}, [r2]!
270 subs r0, r0, #1
271
272 3:
273 #ifndef CONFIG_CPU_BIG_ENDIAN
274 vrev64.8 T1, T1
275 #endif
276 vext.8 IN1, T1, T1, #8
277 veor T1_L, T1_L, XL_H
278 veor XL, XL, IN1
279
280 __pmull_\pn XH, XL_H, SHASH_H, s1h, s2h, s3h, s4h @ a1 * b1
281 veor T1, T1, XL
282 __pmull_\pn XL, XL_L, SHASH_L, s1l, s2l, s3l, s4l @ a0 * b0
283 __pmull_\pn XM, T1_L, SHASH2_\pn @ (a1+a0)(b1+b0)
284
285 4: veor T1, XL, XH
286 veor XM, XM, T1
287
288 __pmull_reduce_\pn
289
290 veor T1, T1, XH
291 veor XL, XL, T1
292
293 bne 0b
294
295 vst1.64 {XL}, [r1]
296 bx lr
297 .endm
298
299
300
301
302
303 ENTRY(pmull_ghash_update_p64)
304 vld1.64 {SHASH}, [r3]!
305 vld1.64 {HH}, [r3]!
306 vld1.64 {HH3-HH4}, [r3]
307
308 veor SHASH2_p64, SHASH_L, SHASH_H
309 veor SHASH2_H, HH_L, HH_H
310 veor HH34_L, HH3_L, HH3_H
311 veor HH34_H, HH4_L, HH4_H
312
313 vmov.i8 MASK, #0xe1
314 vshl.u64 MASK, MASK, #57
315
316 ghash_update p64
317 ENDPROC(pmull_ghash_update_p64)
318
319 ENTRY(pmull_ghash_update_p8)
320 vld1.64 {SHASH}, [r3]
321 veor SHASH2_p8, SHASH_L, SHASH_H
322
323 vext.8 s1l, SHASH_L, SHASH_L, #1
324 vext.8 s2l, SHASH_L, SHASH_L, #2
325 vext.8 s3l, SHASH_L, SHASH_L, #3
326 vext.8 s4l, SHASH_L, SHASH_L, #4
327 vext.8 s1h, SHASH_H, SHASH_H, #1
328 vext.8 s2h, SHASH_H, SHASH_H, #2
329 vext.8 s3h, SHASH_H, SHASH_H, #3
330 vext.8 s4h, SHASH_H, SHASH_H, #4
331
332 vmov.i64 k16, #0xffff
333 vmov.i64 k32, #0xffffffff
334 vmov.i64 k48, #0xffffffffffff
335
336 ghash_update p8
337 ENDPROC(pmull_ghash_update_p8)