1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56 #include <linux/linkage.h>
57
58 #define DIGEST_PTR %rdi
59 #define DATA_PTR %rsi
60 #define NUM_BLKS %rdx
61
62 #define SHA256CONSTANTS %rax
63
64 #define MSG %xmm0
65 #define STATE0 %xmm1
66 #define STATE1 %xmm2
67 #define MSGTMP0 %xmm3
68 #define MSGTMP1 %xmm4
69 #define MSGTMP2 %xmm5
70 #define MSGTMP3 %xmm6
71 #define MSGTMP4 %xmm7
72
73 #define SHUF_MASK %xmm8
74
75 #define ABEF_SAVE %xmm9
76 #define CDGH_SAVE %xmm10
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98 .text
99 .align 32
100 ENTRY(sha256_ni_transform)
101
102 shl $6, NUM_BLKS
103 jz .Ldone_hash
104 add DATA_PTR, NUM_BLKS
105
106
107
108
109
110
111 movdqu 0*16(DIGEST_PTR), STATE0
112 movdqu 1*16(DIGEST_PTR), STATE1
113
114 pshufd $0xB1, STATE0, STATE0
115 pshufd $0x1B, STATE1, STATE1
116 movdqa STATE0, MSGTMP4
117 palignr $8, STATE1, STATE0
118 pblendw $0xF0, MSGTMP4, STATE1
119
120 movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
121 lea K256(%rip), SHA256CONSTANTS
122
123 .Lloop0:
124
125 movdqa STATE0, ABEF_SAVE
126 movdqa STATE1, CDGH_SAVE
127
128
129 movdqu 0*16(DATA_PTR), MSG
130 pshufb SHUF_MASK, MSG
131 movdqa MSG, MSGTMP0
132 paddd 0*16(SHA256CONSTANTS), MSG
133 sha256rnds2 STATE0, STATE1
134 pshufd $0x0E, MSG, MSG
135 sha256rnds2 STATE1, STATE0
136
137
138 movdqu 1*16(DATA_PTR), MSG
139 pshufb SHUF_MASK, MSG
140 movdqa MSG, MSGTMP1
141 paddd 1*16(SHA256CONSTANTS), MSG
142 sha256rnds2 STATE0, STATE1
143 pshufd $0x0E, MSG, MSG
144 sha256rnds2 STATE1, STATE0
145 sha256msg1 MSGTMP1, MSGTMP0
146
147
148 movdqu 2*16(DATA_PTR), MSG
149 pshufb SHUF_MASK, MSG
150 movdqa MSG, MSGTMP2
151 paddd 2*16(SHA256CONSTANTS), MSG
152 sha256rnds2 STATE0, STATE1
153 pshufd $0x0E, MSG, MSG
154 sha256rnds2 STATE1, STATE0
155 sha256msg1 MSGTMP2, MSGTMP1
156
157
158 movdqu 3*16(DATA_PTR), MSG
159 pshufb SHUF_MASK, MSG
160 movdqa MSG, MSGTMP3
161 paddd 3*16(SHA256CONSTANTS), MSG
162 sha256rnds2 STATE0, STATE1
163 movdqa MSGTMP3, MSGTMP4
164 palignr $4, MSGTMP2, MSGTMP4
165 paddd MSGTMP4, MSGTMP0
166 sha256msg2 MSGTMP3, MSGTMP0
167 pshufd $0x0E, MSG, MSG
168 sha256rnds2 STATE1, STATE0
169 sha256msg1 MSGTMP3, MSGTMP2
170
171
172 movdqa MSGTMP0, MSG
173 paddd 4*16(SHA256CONSTANTS), MSG
174 sha256rnds2 STATE0, STATE1
175 movdqa MSGTMP0, MSGTMP4
176 palignr $4, MSGTMP3, MSGTMP4
177 paddd MSGTMP4, MSGTMP1
178 sha256msg2 MSGTMP0, MSGTMP1
179 pshufd $0x0E, MSG, MSG
180 sha256rnds2 STATE1, STATE0
181 sha256msg1 MSGTMP0, MSGTMP3
182
183
184 movdqa MSGTMP1, MSG
185 paddd 5*16(SHA256CONSTANTS), MSG
186 sha256rnds2 STATE0, STATE1
187 movdqa MSGTMP1, MSGTMP4
188 palignr $4, MSGTMP0, MSGTMP4
189 paddd MSGTMP4, MSGTMP2
190 sha256msg2 MSGTMP1, MSGTMP2
191 pshufd $0x0E, MSG, MSG
192 sha256rnds2 STATE1, STATE0
193 sha256msg1 MSGTMP1, MSGTMP0
194
195
196 movdqa MSGTMP2, MSG
197 paddd 6*16(SHA256CONSTANTS), MSG
198 sha256rnds2 STATE0, STATE1
199 movdqa MSGTMP2, MSGTMP4
200 palignr $4, MSGTMP1, MSGTMP4
201 paddd MSGTMP4, MSGTMP3
202 sha256msg2 MSGTMP2, MSGTMP3
203 pshufd $0x0E, MSG, MSG
204 sha256rnds2 STATE1, STATE0
205 sha256msg1 MSGTMP2, MSGTMP1
206
207
208 movdqa MSGTMP3, MSG
209 paddd 7*16(SHA256CONSTANTS), MSG
210 sha256rnds2 STATE0, STATE1
211 movdqa MSGTMP3, MSGTMP4
212 palignr $4, MSGTMP2, MSGTMP4
213 paddd MSGTMP4, MSGTMP0
214 sha256msg2 MSGTMP3, MSGTMP0
215 pshufd $0x0E, MSG, MSG
216 sha256rnds2 STATE1, STATE0
217 sha256msg1 MSGTMP3, MSGTMP2
218
219
220 movdqa MSGTMP0, MSG
221 paddd 8*16(SHA256CONSTANTS), MSG
222 sha256rnds2 STATE0, STATE1
223 movdqa MSGTMP0, MSGTMP4
224 palignr $4, MSGTMP3, MSGTMP4
225 paddd MSGTMP4, MSGTMP1
226 sha256msg2 MSGTMP0, MSGTMP1
227 pshufd $0x0E, MSG, MSG
228 sha256rnds2 STATE1, STATE0
229 sha256msg1 MSGTMP0, MSGTMP3
230
231
232 movdqa MSGTMP1, MSG
233 paddd 9*16(SHA256CONSTANTS), MSG
234 sha256rnds2 STATE0, STATE1
235 movdqa MSGTMP1, MSGTMP4
236 palignr $4, MSGTMP0, MSGTMP4
237 paddd MSGTMP4, MSGTMP2
238 sha256msg2 MSGTMP1, MSGTMP2
239 pshufd $0x0E, MSG, MSG
240 sha256rnds2 STATE1, STATE0
241 sha256msg1 MSGTMP1, MSGTMP0
242
243
244 movdqa MSGTMP2, MSG
245 paddd 10*16(SHA256CONSTANTS), MSG
246 sha256rnds2 STATE0, STATE1
247 movdqa MSGTMP2, MSGTMP4
248 palignr $4, MSGTMP1, MSGTMP4
249 paddd MSGTMP4, MSGTMP3
250 sha256msg2 MSGTMP2, MSGTMP3
251 pshufd $0x0E, MSG, MSG
252 sha256rnds2 STATE1, STATE0
253 sha256msg1 MSGTMP2, MSGTMP1
254
255
256 movdqa MSGTMP3, MSG
257 paddd 11*16(SHA256CONSTANTS), MSG
258 sha256rnds2 STATE0, STATE1
259 movdqa MSGTMP3, MSGTMP4
260 palignr $4, MSGTMP2, MSGTMP4
261 paddd MSGTMP4, MSGTMP0
262 sha256msg2 MSGTMP3, MSGTMP0
263 pshufd $0x0E, MSG, MSG
264 sha256rnds2 STATE1, STATE0
265 sha256msg1 MSGTMP3, MSGTMP2
266
267
268 movdqa MSGTMP0, MSG
269 paddd 12*16(SHA256CONSTANTS), MSG
270 sha256rnds2 STATE0, STATE1
271 movdqa MSGTMP0, MSGTMP4
272 palignr $4, MSGTMP3, MSGTMP4
273 paddd MSGTMP4, MSGTMP1
274 sha256msg2 MSGTMP0, MSGTMP1
275 pshufd $0x0E, MSG, MSG
276 sha256rnds2 STATE1, STATE0
277 sha256msg1 MSGTMP0, MSGTMP3
278
279
280 movdqa MSGTMP1, MSG
281 paddd 13*16(SHA256CONSTANTS), MSG
282 sha256rnds2 STATE0, STATE1
283 movdqa MSGTMP1, MSGTMP4
284 palignr $4, MSGTMP0, MSGTMP4
285 paddd MSGTMP4, MSGTMP2
286 sha256msg2 MSGTMP1, MSGTMP2
287 pshufd $0x0E, MSG, MSG
288 sha256rnds2 STATE1, STATE0
289
290
291 movdqa MSGTMP2, MSG
292 paddd 14*16(SHA256CONSTANTS), MSG
293 sha256rnds2 STATE0, STATE1
294 movdqa MSGTMP2, MSGTMP4
295 palignr $4, MSGTMP1, MSGTMP4
296 paddd MSGTMP4, MSGTMP3
297 sha256msg2 MSGTMP2, MSGTMP3
298 pshufd $0x0E, MSG, MSG
299 sha256rnds2 STATE1, STATE0
300
301
302 movdqa MSGTMP3, MSG
303 paddd 15*16(SHA256CONSTANTS), MSG
304 sha256rnds2 STATE0, STATE1
305 pshufd $0x0E, MSG, MSG
306 sha256rnds2 STATE1, STATE0
307
308
309 paddd ABEF_SAVE, STATE0
310 paddd CDGH_SAVE, STATE1
311
312
313 add $64, DATA_PTR
314 cmp NUM_BLKS, DATA_PTR
315 jne .Lloop0
316
317
318 pshufd $0x1B, STATE0, STATE0
319 pshufd $0xB1, STATE1, STATE1
320 movdqa STATE0, MSGTMP4
321 pblendw $0xF0, STATE1, STATE0
322 palignr $8, MSGTMP4, STATE1
323
324 movdqu STATE0, 0*16(DIGEST_PTR)
325 movdqu STATE1, 1*16(DIGEST_PTR)
326
327 .Ldone_hash:
328
329 ret
330 ENDPROC(sha256_ni_transform)
331
332 .section .rodata.cst256.K256, "aM", @progbits, 256
333 .align 64
334 K256:
335 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
336 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
337 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
338 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
339 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
340 .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
341 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
342 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
343 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
344 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
345 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
346 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
347 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
348 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
349 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
350 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
351
352 .section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
353 .align 16
354 PSHUFFLE_BYTE_FLIP_MASK:
355 .octa 0x0c0d0e0f08090a0b0405060700010203