1
2@ ====================================================================
3@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
4@ project. The module is, however, dual licensed under OpenSSL and
5@ CRYPTOGAMS licenses depending on where you obtain it. For further
6@ details see http://www.openssl.org/~appro/cryptogams/.
7@
8@ Permission to use under GPL terms is granted.
9@ ====================================================================
10
11@ SHA256 block procedure for ARMv4. May 2007.
12
13@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
14@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
15@ byte [on single-issue Xscale PXA250 core].
16
17@ July 2010.
18@
19@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
20@ Cortex A8 core and ~20 cycles per processed byte.
21
22@ February 2011.
23@
24@ Profiler-assisted and platform-specific optimization resulted in 16%
25@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
26
27@ September 2013.
28@
29@ Add NEON implementation. On Cortex A8 it was measured to process one
30@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
31@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
32@ code (meaning that latter performs sub-optimally, nothing was done
33@ about it).
34
35@ May 2014.
36@
37@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
38
39#ifndef __KERNEL__
40# include "arm_arch.h"
41#else
42# define __ARM_ARCH__ __LINUX_ARM_ARCH__
43# define __ARM_MAX_ARCH__ 7
44#endif
45
46.text
47#if __ARM_ARCH__<7
48.code	32
49#else
50.syntax unified
51# ifdef __thumb2__
52#  define adrl adr
53.thumb
54# else
55.code   32
56# endif
57#endif
58
59.type	K256,%object
60.align	5
61K256:
62.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
63.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
64.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
65.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
66.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
67.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
68.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
69.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
70.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
71.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
72.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
73.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
74.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
75.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
76.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
77.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
78.size	K256,.-K256
79.word	0				@ terminator
80#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
81.LOPENSSL_armcap:
82.word	OPENSSL_armcap_P-sha256_block_data_order
83#endif
84.align	5
85
86.global	sha256_block_data_order
87.type	sha256_block_data_order,%function
88sha256_block_data_order:
89#if __ARM_ARCH__<7
90	sub	r3,pc,#8		@ sha256_block_data_order
91#else
92	adr	r3,sha256_block_data_order
93#endif
94#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
95	ldr	r12,.LOPENSSL_armcap
96	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
97	tst	r12,#ARMV8_SHA256
98	bne	.LARMv8
99	tst	r12,#ARMV7_NEON
100	bne	.LNEON
101#endif
102	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
103	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
104	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
105	sub	r14,r3,#256+32	@ K256
106	sub	sp,sp,#16*4		@ alloca(X[16])
107.Loop:
108# if __ARM_ARCH__>=7
109	ldr	r2,[r1],#4
110# else
111	ldrb	r2,[r1,#3]
112# endif
113	eor	r3,r5,r6		@ magic
114	eor	r12,r12,r12
115#if __ARM_ARCH__>=7
116	@ ldr	r2,[r1],#4			@ 0
117# if 0==15
118	str	r1,[sp,#17*4]			@ make room for r1
119# endif
120	eor	r0,r8,r8,ror#5
121	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
122	eor	r0,r0,r8,ror#19	@ Sigma1(e)
123# ifndef __ARMEB__
124	rev	r2,r2
125# endif
126#else
127	@ ldrb	r2,[r1,#3]			@ 0
128	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
129	ldrb	r12,[r1,#2]
130	ldrb	r0,[r1,#1]
131	orr	r2,r2,r12,lsl#8
132	ldrb	r12,[r1],#4
133	orr	r2,r2,r0,lsl#16
134# if 0==15
135	str	r1,[sp,#17*4]			@ make room for r1
136# endif
137	eor	r0,r8,r8,ror#5
138	orr	r2,r2,r12,lsl#24
139	eor	r0,r0,r8,ror#19	@ Sigma1(e)
140#endif
141	ldr	r12,[r14],#4			@ *K256++
142	add	r11,r11,r2			@ h+=X[i]
143	str	r2,[sp,#0*4]
144	eor	r2,r9,r10
145	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
146	and	r2,r2,r8
147	add	r11,r11,r12			@ h+=K256[i]
148	eor	r2,r2,r10			@ Ch(e,f,g)
149	eor	r0,r4,r4,ror#11
150	add	r11,r11,r2			@ h+=Ch(e,f,g)
151#if 0==31
152	and	r12,r12,#0xff
153	cmp	r12,#0xf2			@ done?
154#endif
155#if 0<15
156# if __ARM_ARCH__>=7
157	ldr	r2,[r1],#4			@ prefetch
158# else
159	ldrb	r2,[r1,#3]
160# endif
161	eor	r12,r4,r5			@ a^b, b^c in next round
162#else
163	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
164	eor	r12,r4,r5			@ a^b, b^c in next round
165	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
166#endif
167	eor	r0,r0,r4,ror#20	@ Sigma0(a)
168	and	r3,r3,r12			@ (b^c)&=(a^b)
169	add	r7,r7,r11			@ d+=h
170	eor	r3,r3,r5			@ Maj(a,b,c)
171	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
172	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
173#if __ARM_ARCH__>=7
174	@ ldr	r2,[r1],#4			@ 1
175# if 1==15
176	str	r1,[sp,#17*4]			@ make room for r1
177# endif
178	eor	r0,r7,r7,ror#5
179	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
180	eor	r0,r0,r7,ror#19	@ Sigma1(e)
181# ifndef __ARMEB__
182	rev	r2,r2
183# endif
184#else
185	@ ldrb	r2,[r1,#3]			@ 1
186	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
187	ldrb	r3,[r1,#2]
188	ldrb	r0,[r1,#1]
189	orr	r2,r2,r3,lsl#8
190	ldrb	r3,[r1],#4
191	orr	r2,r2,r0,lsl#16
192# if 1==15
193	str	r1,[sp,#17*4]			@ make room for r1
194# endif
195	eor	r0,r7,r7,ror#5
196	orr	r2,r2,r3,lsl#24
197	eor	r0,r0,r7,ror#19	@ Sigma1(e)
198#endif
199	ldr	r3,[r14],#4			@ *K256++
200	add	r10,r10,r2			@ h+=X[i]
201	str	r2,[sp,#1*4]
202	eor	r2,r8,r9
203	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
204	and	r2,r2,r7
205	add	r10,r10,r3			@ h+=K256[i]
206	eor	r2,r2,r9			@ Ch(e,f,g)
207	eor	r0,r11,r11,ror#11
208	add	r10,r10,r2			@ h+=Ch(e,f,g)
209#if 1==31
210	and	r3,r3,#0xff
211	cmp	r3,#0xf2			@ done?
212#endif
213#if 1<15
214# if __ARM_ARCH__>=7
215	ldr	r2,[r1],#4			@ prefetch
216# else
217	ldrb	r2,[r1,#3]
218# endif
219	eor	r3,r11,r4			@ a^b, b^c in next round
220#else
221	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
222	eor	r3,r11,r4			@ a^b, b^c in next round
223	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
224#endif
225	eor	r0,r0,r11,ror#20	@ Sigma0(a)
226	and	r12,r12,r3			@ (b^c)&=(a^b)
227	add	r6,r6,r10			@ d+=h
228	eor	r12,r12,r4			@ Maj(a,b,c)
229	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
230	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
231#if __ARM_ARCH__>=7
232	@ ldr	r2,[r1],#4			@ 2
233# if 2==15
234	str	r1,[sp,#17*4]			@ make room for r1
235# endif
236	eor	r0,r6,r6,ror#5
237	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
238	eor	r0,r0,r6,ror#19	@ Sigma1(e)
239# ifndef __ARMEB__
240	rev	r2,r2
241# endif
242#else
243	@ ldrb	r2,[r1,#3]			@ 2
244	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
245	ldrb	r12,[r1,#2]
246	ldrb	r0,[r1,#1]
247	orr	r2,r2,r12,lsl#8
248	ldrb	r12,[r1],#4
249	orr	r2,r2,r0,lsl#16
250# if 2==15
251	str	r1,[sp,#17*4]			@ make room for r1
252# endif
253	eor	r0,r6,r6,ror#5
254	orr	r2,r2,r12,lsl#24
255	eor	r0,r0,r6,ror#19	@ Sigma1(e)
256#endif
257	ldr	r12,[r14],#4			@ *K256++
258	add	r9,r9,r2			@ h+=X[i]
259	str	r2,[sp,#2*4]
260	eor	r2,r7,r8
261	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
262	and	r2,r2,r6
263	add	r9,r9,r12			@ h+=K256[i]
264	eor	r2,r2,r8			@ Ch(e,f,g)
265	eor	r0,r10,r10,ror#11
266	add	r9,r9,r2			@ h+=Ch(e,f,g)
267#if 2==31
268	and	r12,r12,#0xff
269	cmp	r12,#0xf2			@ done?
270#endif
271#if 2<15
272# if __ARM_ARCH__>=7
273	ldr	r2,[r1],#4			@ prefetch
274# else
275	ldrb	r2,[r1,#3]
276# endif
277	eor	r12,r10,r11			@ a^b, b^c in next round
278#else
279	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
280	eor	r12,r10,r11			@ a^b, b^c in next round
281	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
282#endif
283	eor	r0,r0,r10,ror#20	@ Sigma0(a)
284	and	r3,r3,r12			@ (b^c)&=(a^b)
285	add	r5,r5,r9			@ d+=h
286	eor	r3,r3,r11			@ Maj(a,b,c)
287	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
288	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
289#if __ARM_ARCH__>=7
290	@ ldr	r2,[r1],#4			@ 3
291# if 3==15
292	str	r1,[sp,#17*4]			@ make room for r1
293# endif
294	eor	r0,r5,r5,ror#5
295	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
296	eor	r0,r0,r5,ror#19	@ Sigma1(e)
297# ifndef __ARMEB__
298	rev	r2,r2
299# endif
300#else
301	@ ldrb	r2,[r1,#3]			@ 3
302	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
303	ldrb	r3,[r1,#2]
304	ldrb	r0,[r1,#1]
305	orr	r2,r2,r3,lsl#8
306	ldrb	r3,[r1],#4
307	orr	r2,r2,r0,lsl#16
308# if 3==15
309	str	r1,[sp,#17*4]			@ make room for r1
310# endif
311	eor	r0,r5,r5,ror#5
312	orr	r2,r2,r3,lsl#24
313	eor	r0,r0,r5,ror#19	@ Sigma1(e)
314#endif
315	ldr	r3,[r14],#4			@ *K256++
316	add	r8,r8,r2			@ h+=X[i]
317	str	r2,[sp,#3*4]
318	eor	r2,r6,r7
319	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
320	and	r2,r2,r5
321	add	r8,r8,r3			@ h+=K256[i]
322	eor	r2,r2,r7			@ Ch(e,f,g)
323	eor	r0,r9,r9,ror#11
324	add	r8,r8,r2			@ h+=Ch(e,f,g)
325#if 3==31
326	and	r3,r3,#0xff
327	cmp	r3,#0xf2			@ done?
328#endif
329#if 3<15
330# if __ARM_ARCH__>=7
331	ldr	r2,[r1],#4			@ prefetch
332# else
333	ldrb	r2,[r1,#3]
334# endif
335	eor	r3,r9,r10			@ a^b, b^c in next round
336#else
337	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
338	eor	r3,r9,r10			@ a^b, b^c in next round
339	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
340#endif
341	eor	r0,r0,r9,ror#20	@ Sigma0(a)
342	and	r12,r12,r3			@ (b^c)&=(a^b)
343	add	r4,r4,r8			@ d+=h
344	eor	r12,r12,r10			@ Maj(a,b,c)
345	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
346	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
347#if __ARM_ARCH__>=7
348	@ ldr	r2,[r1],#4			@ 4
349# if 4==15
350	str	r1,[sp,#17*4]			@ make room for r1
351# endif
352	eor	r0,r4,r4,ror#5
353	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
354	eor	r0,r0,r4,ror#19	@ Sigma1(e)
355# ifndef __ARMEB__
356	rev	r2,r2
357# endif
358#else
359	@ ldrb	r2,[r1,#3]			@ 4
360	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
361	ldrb	r12,[r1,#2]
362	ldrb	r0,[r1,#1]
363	orr	r2,r2,r12,lsl#8
364	ldrb	r12,[r1],#4
365	orr	r2,r2,r0,lsl#16
366# if 4==15
367	str	r1,[sp,#17*4]			@ make room for r1
368# endif
369	eor	r0,r4,r4,ror#5
370	orr	r2,r2,r12,lsl#24
371	eor	r0,r0,r4,ror#19	@ Sigma1(e)
372#endif
373	ldr	r12,[r14],#4			@ *K256++
374	add	r7,r7,r2			@ h+=X[i]
375	str	r2,[sp,#4*4]
376	eor	r2,r5,r6
377	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
378	and	r2,r2,r4
379	add	r7,r7,r12			@ h+=K256[i]
380	eor	r2,r2,r6			@ Ch(e,f,g)
381	eor	r0,r8,r8,ror#11
382	add	r7,r7,r2			@ h+=Ch(e,f,g)
383#if 4==31
384	and	r12,r12,#0xff
385	cmp	r12,#0xf2			@ done?
386#endif
387#if 4<15
388# if __ARM_ARCH__>=7
389	ldr	r2,[r1],#4			@ prefetch
390# else
391	ldrb	r2,[r1,#3]
392# endif
393	eor	r12,r8,r9			@ a^b, b^c in next round
394#else
395	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
396	eor	r12,r8,r9			@ a^b, b^c in next round
397	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
398#endif
399	eor	r0,r0,r8,ror#20	@ Sigma0(a)
400	and	r3,r3,r12			@ (b^c)&=(a^b)
401	add	r11,r11,r7			@ d+=h
402	eor	r3,r3,r9			@ Maj(a,b,c)
403	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
404	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
405#if __ARM_ARCH__>=7
406	@ ldr	r2,[r1],#4			@ 5
407# if 5==15
408	str	r1,[sp,#17*4]			@ make room for r1
409# endif
410	eor	r0,r11,r11,ror#5
411	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
412	eor	r0,r0,r11,ror#19	@ Sigma1(e)
413# ifndef __ARMEB__
414	rev	r2,r2
415# endif
416#else
417	@ ldrb	r2,[r1,#3]			@ 5
418	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
419	ldrb	r3,[r1,#2]
420	ldrb	r0,[r1,#1]
421	orr	r2,r2,r3,lsl#8
422	ldrb	r3,[r1],#4
423	orr	r2,r2,r0,lsl#16
424# if 5==15
425	str	r1,[sp,#17*4]			@ make room for r1
426# endif
427	eor	r0,r11,r11,ror#5
428	orr	r2,r2,r3,lsl#24
429	eor	r0,r0,r11,ror#19	@ Sigma1(e)
430#endif
431	ldr	r3,[r14],#4			@ *K256++
432	add	r6,r6,r2			@ h+=X[i]
433	str	r2,[sp,#5*4]
434	eor	r2,r4,r5
435	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
436	and	r2,r2,r11
437	add	r6,r6,r3			@ h+=K256[i]
438	eor	r2,r2,r5			@ Ch(e,f,g)
439	eor	r0,r7,r7,ror#11
440	add	r6,r6,r2			@ h+=Ch(e,f,g)
441#if 5==31
442	and	r3,r3,#0xff
443	cmp	r3,#0xf2			@ done?
444#endif
445#if 5<15
446# if __ARM_ARCH__>=7
447	ldr	r2,[r1],#4			@ prefetch
448# else
449	ldrb	r2,[r1,#3]
450# endif
451	eor	r3,r7,r8			@ a^b, b^c in next round
452#else
453	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
454	eor	r3,r7,r8			@ a^b, b^c in next round
455	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
456#endif
457	eor	r0,r0,r7,ror#20	@ Sigma0(a)
458	and	r12,r12,r3			@ (b^c)&=(a^b)
459	add	r10,r10,r6			@ d+=h
460	eor	r12,r12,r8			@ Maj(a,b,c)
461	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
462	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
463#if __ARM_ARCH__>=7
464	@ ldr	r2,[r1],#4			@ 6
465# if 6==15
466	str	r1,[sp,#17*4]			@ make room for r1
467# endif
468	eor	r0,r10,r10,ror#5
469	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
470	eor	r0,r0,r10,ror#19	@ Sigma1(e)
471# ifndef __ARMEB__
472	rev	r2,r2
473# endif
474#else
475	@ ldrb	r2,[r1,#3]			@ 6
476	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
477	ldrb	r12,[r1,#2]
478	ldrb	r0,[r1,#1]
479	orr	r2,r2,r12,lsl#8
480	ldrb	r12,[r1],#4
481	orr	r2,r2,r0,lsl#16
482# if 6==15
483	str	r1,[sp,#17*4]			@ make room for r1
484# endif
485	eor	r0,r10,r10,ror#5
486	orr	r2,r2,r12,lsl#24
487	eor	r0,r0,r10,ror#19	@ Sigma1(e)
488#endif
489	ldr	r12,[r14],#4			@ *K256++
490	add	r5,r5,r2			@ h+=X[i]
491	str	r2,[sp,#6*4]
492	eor	r2,r11,r4
493	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
494	and	r2,r2,r10
495	add	r5,r5,r12			@ h+=K256[i]
496	eor	r2,r2,r4			@ Ch(e,f,g)
497	eor	r0,r6,r6,ror#11
498	add	r5,r5,r2			@ h+=Ch(e,f,g)
499#if 6==31
500	and	r12,r12,#0xff
501	cmp	r12,#0xf2			@ done?
502#endif
503#if 6<15
504# if __ARM_ARCH__>=7
505	ldr	r2,[r1],#4			@ prefetch
506# else
507	ldrb	r2,[r1,#3]
508# endif
509	eor	r12,r6,r7			@ a^b, b^c in next round
510#else
511	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
512	eor	r12,r6,r7			@ a^b, b^c in next round
513	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
514#endif
515	eor	r0,r0,r6,ror#20	@ Sigma0(a)
516	and	r3,r3,r12			@ (b^c)&=(a^b)
517	add	r9,r9,r5			@ d+=h
518	eor	r3,r3,r7			@ Maj(a,b,c)
519	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
520	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
521#if __ARM_ARCH__>=7
522	@ ldr	r2,[r1],#4			@ 7
523# if 7==15
524	str	r1,[sp,#17*4]			@ make room for r1
525# endif
526	eor	r0,r9,r9,ror#5
527	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
528	eor	r0,r0,r9,ror#19	@ Sigma1(e)
529# ifndef __ARMEB__
530	rev	r2,r2
531# endif
532#else
533	@ ldrb	r2,[r1,#3]			@ 7
534	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
535	ldrb	r3,[r1,#2]
536	ldrb	r0,[r1,#1]
537	orr	r2,r2,r3,lsl#8
538	ldrb	r3,[r1],#4
539	orr	r2,r2,r0,lsl#16
540# if 7==15
541	str	r1,[sp,#17*4]			@ make room for r1
542# endif
543	eor	r0,r9,r9,ror#5
544	orr	r2,r2,r3,lsl#24
545	eor	r0,r0,r9,ror#19	@ Sigma1(e)
546#endif
547	ldr	r3,[r14],#4			@ *K256++
548	add	r4,r4,r2			@ h+=X[i]
549	str	r2,[sp,#7*4]
550	eor	r2,r10,r11
551	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
552	and	r2,r2,r9
553	add	r4,r4,r3			@ h+=K256[i]
554	eor	r2,r2,r11			@ Ch(e,f,g)
555	eor	r0,r5,r5,ror#11
556	add	r4,r4,r2			@ h+=Ch(e,f,g)
557#if 7==31
558	and	r3,r3,#0xff
559	cmp	r3,#0xf2			@ done?
560#endif
561#if 7<15
562# if __ARM_ARCH__>=7
563	ldr	r2,[r1],#4			@ prefetch
564# else
565	ldrb	r2,[r1,#3]
566# endif
567	eor	r3,r5,r6			@ a^b, b^c in next round
568#else
569	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
570	eor	r3,r5,r6			@ a^b, b^c in next round
571	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
572#endif
573	eor	r0,r0,r5,ror#20	@ Sigma0(a)
574	and	r12,r12,r3			@ (b^c)&=(a^b)
575	add	r8,r8,r4			@ d+=h
576	eor	r12,r12,r6			@ Maj(a,b,c)
577	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
578	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
579#if __ARM_ARCH__>=7
580	@ ldr	r2,[r1],#4			@ 8
581# if 8==15
582	str	r1,[sp,#17*4]			@ make room for r1
583# endif
584	eor	r0,r8,r8,ror#5
585	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
586	eor	r0,r0,r8,ror#19	@ Sigma1(e)
587# ifndef __ARMEB__
588	rev	r2,r2
589# endif
590#else
591	@ ldrb	r2,[r1,#3]			@ 8
592	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
593	ldrb	r12,[r1,#2]
594	ldrb	r0,[r1,#1]
595	orr	r2,r2,r12,lsl#8
596	ldrb	r12,[r1],#4
597	orr	r2,r2,r0,lsl#16
598# if 8==15
599	str	r1,[sp,#17*4]			@ make room for r1
600# endif
601	eor	r0,r8,r8,ror#5
602	orr	r2,r2,r12,lsl#24
603	eor	r0,r0,r8,ror#19	@ Sigma1(e)
604#endif
605	ldr	r12,[r14],#4			@ *K256++
606	add	r11,r11,r2			@ h+=X[i]
607	str	r2,[sp,#8*4]
608	eor	r2,r9,r10
609	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
610	and	r2,r2,r8
611	add	r11,r11,r12			@ h+=K256[i]
612	eor	r2,r2,r10			@ Ch(e,f,g)
613	eor	r0,r4,r4,ror#11
614	add	r11,r11,r2			@ h+=Ch(e,f,g)
615#if 8==31
616	and	r12,r12,#0xff
617	cmp	r12,#0xf2			@ done?
618#endif
619#if 8<15
620# if __ARM_ARCH__>=7
621	ldr	r2,[r1],#4			@ prefetch
622# else
623	ldrb	r2,[r1,#3]
624# endif
625	eor	r12,r4,r5			@ a^b, b^c in next round
626#else
627	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
628	eor	r12,r4,r5			@ a^b, b^c in next round
629	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
630#endif
631	eor	r0,r0,r4,ror#20	@ Sigma0(a)
632	and	r3,r3,r12			@ (b^c)&=(a^b)
633	add	r7,r7,r11			@ d+=h
634	eor	r3,r3,r5			@ Maj(a,b,c)
635	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
636	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
637#if __ARM_ARCH__>=7
638	@ ldr	r2,[r1],#4			@ 9
639# if 9==15
640	str	r1,[sp,#17*4]			@ make room for r1
641# endif
642	eor	r0,r7,r7,ror#5
643	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
644	eor	r0,r0,r7,ror#19	@ Sigma1(e)
645# ifndef __ARMEB__
646	rev	r2,r2
647# endif
648#else
649	@ ldrb	r2,[r1,#3]			@ 9
650	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
651	ldrb	r3,[r1,#2]
652	ldrb	r0,[r1,#1]
653	orr	r2,r2,r3,lsl#8
654	ldrb	r3,[r1],#4
655	orr	r2,r2,r0,lsl#16
656# if 9==15
657	str	r1,[sp,#17*4]			@ make room for r1
658# endif
659	eor	r0,r7,r7,ror#5
660	orr	r2,r2,r3,lsl#24
661	eor	r0,r0,r7,ror#19	@ Sigma1(e)
662#endif
663	ldr	r3,[r14],#4			@ *K256++
664	add	r10,r10,r2			@ h+=X[i]
665	str	r2,[sp,#9*4]
666	eor	r2,r8,r9
667	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
668	and	r2,r2,r7
669	add	r10,r10,r3			@ h+=K256[i]
670	eor	r2,r2,r9			@ Ch(e,f,g)
671	eor	r0,r11,r11,ror#11
672	add	r10,r10,r2			@ h+=Ch(e,f,g)
673#if 9==31
674	and	r3,r3,#0xff
675	cmp	r3,#0xf2			@ done?
676#endif
677#if 9<15
678# if __ARM_ARCH__>=7
679	ldr	r2,[r1],#4			@ prefetch
680# else
681	ldrb	r2,[r1,#3]
682# endif
683	eor	r3,r11,r4			@ a^b, b^c in next round
684#else
685	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
686	eor	r3,r11,r4			@ a^b, b^c in next round
687	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
688#endif
689	eor	r0,r0,r11,ror#20	@ Sigma0(a)
690	and	r12,r12,r3			@ (b^c)&=(a^b)
691	add	r6,r6,r10			@ d+=h
692	eor	r12,r12,r4			@ Maj(a,b,c)
693	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
694	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
695#if __ARM_ARCH__>=7
696	@ ldr	r2,[r1],#4			@ 10
697# if 10==15
698	str	r1,[sp,#17*4]			@ make room for r1
699# endif
700	eor	r0,r6,r6,ror#5
701	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
702	eor	r0,r0,r6,ror#19	@ Sigma1(e)
703# ifndef __ARMEB__
704	rev	r2,r2
705# endif
706#else
707	@ ldrb	r2,[r1,#3]			@ 10
708	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
709	ldrb	r12,[r1,#2]
710	ldrb	r0,[r1,#1]
711	orr	r2,r2,r12,lsl#8
712	ldrb	r12,[r1],#4
713	orr	r2,r2,r0,lsl#16
714# if 10==15
715	str	r1,[sp,#17*4]			@ make room for r1
716# endif
717	eor	r0,r6,r6,ror#5
718	orr	r2,r2,r12,lsl#24
719	eor	r0,r0,r6,ror#19	@ Sigma1(e)
720#endif
721	ldr	r12,[r14],#4			@ *K256++
722	add	r9,r9,r2			@ h+=X[i]
723	str	r2,[sp,#10*4]
724	eor	r2,r7,r8
725	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
726	and	r2,r2,r6
727	add	r9,r9,r12			@ h+=K256[i]
728	eor	r2,r2,r8			@ Ch(e,f,g)
729	eor	r0,r10,r10,ror#11
730	add	r9,r9,r2			@ h+=Ch(e,f,g)
731#if 10==31
732	and	r12,r12,#0xff
733	cmp	r12,#0xf2			@ done?
734#endif
735#if 10<15
736# if __ARM_ARCH__>=7
737	ldr	r2,[r1],#4			@ prefetch
738# else
739	ldrb	r2,[r1,#3]
740# endif
741	eor	r12,r10,r11			@ a^b, b^c in next round
742#else
743	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
744	eor	r12,r10,r11			@ a^b, b^c in next round
745	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
746#endif
747	eor	r0,r0,r10,ror#20	@ Sigma0(a)
748	and	r3,r3,r12			@ (b^c)&=(a^b)
749	add	r5,r5,r9			@ d+=h
750	eor	r3,r3,r11			@ Maj(a,b,c)
751	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
752	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
753#if __ARM_ARCH__>=7
754	@ ldr	r2,[r1],#4			@ 11
755# if 11==15
756	str	r1,[sp,#17*4]			@ make room for r1
757# endif
758	eor	r0,r5,r5,ror#5
759	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
760	eor	r0,r0,r5,ror#19	@ Sigma1(e)
761# ifndef __ARMEB__
762	rev	r2,r2
763# endif
764#else
765	@ ldrb	r2,[r1,#3]			@ 11
766	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
767	ldrb	r3,[r1,#2]
768	ldrb	r0,[r1,#1]
769	orr	r2,r2,r3,lsl#8
770	ldrb	r3,[r1],#4
771	orr	r2,r2,r0,lsl#16
772# if 11==15
773	str	r1,[sp,#17*4]			@ make room for r1
774# endif
775	eor	r0,r5,r5,ror#5
776	orr	r2,r2,r3,lsl#24
777	eor	r0,r0,r5,ror#19	@ Sigma1(e)
778#endif
779	ldr	r3,[r14],#4			@ *K256++
780	add	r8,r8,r2			@ h+=X[i]
781	str	r2,[sp,#11*4]
782	eor	r2,r6,r7
783	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
784	and	r2,r2,r5
785	add	r8,r8,r3			@ h+=K256[i]
786	eor	r2,r2,r7			@ Ch(e,f,g)
787	eor	r0,r9,r9,ror#11
788	add	r8,r8,r2			@ h+=Ch(e,f,g)
789#if 11==31
790	and	r3,r3,#0xff
791	cmp	r3,#0xf2			@ done?
792#endif
793#if 11<15
794# if __ARM_ARCH__>=7
795	ldr	r2,[r1],#4			@ prefetch
796# else
797	ldrb	r2,[r1,#3]
798# endif
799	eor	r3,r9,r10			@ a^b, b^c in next round
800#else
801	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
802	eor	r3,r9,r10			@ a^b, b^c in next round
803	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
804#endif
805	eor	r0,r0,r9,ror#20	@ Sigma0(a)
806	and	r12,r12,r3			@ (b^c)&=(a^b)
807	add	r4,r4,r8			@ d+=h
808	eor	r12,r12,r10			@ Maj(a,b,c)
809	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
810	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
811#if __ARM_ARCH__>=7
812	@ ldr	r2,[r1],#4			@ 12
813# if 12==15
814	str	r1,[sp,#17*4]			@ make room for r1
815# endif
816	eor	r0,r4,r4,ror#5
817	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
818	eor	r0,r0,r4,ror#19	@ Sigma1(e)
819# ifndef __ARMEB__
820	rev	r2,r2
821# endif
822#else
823	@ ldrb	r2,[r1,#3]			@ 12
824	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
825	ldrb	r12,[r1,#2]
826	ldrb	r0,[r1,#1]
827	orr	r2,r2,r12,lsl#8
828	ldrb	r12,[r1],#4
829	orr	r2,r2,r0,lsl#16
830# if 12==15
831	str	r1,[sp,#17*4]			@ make room for r1
832# endif
833	eor	r0,r4,r4,ror#5
834	orr	r2,r2,r12,lsl#24
835	eor	r0,r0,r4,ror#19	@ Sigma1(e)
836#endif
837	ldr	r12,[r14],#4			@ *K256++
838	add	r7,r7,r2			@ h+=X[i]
839	str	r2,[sp,#12*4]
840	eor	r2,r5,r6
841	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
842	and	r2,r2,r4
843	add	r7,r7,r12			@ h+=K256[i]
844	eor	r2,r2,r6			@ Ch(e,f,g)
845	eor	r0,r8,r8,ror#11
846	add	r7,r7,r2			@ h+=Ch(e,f,g)
847#if 12==31
848	and	r12,r12,#0xff
849	cmp	r12,#0xf2			@ done?
850#endif
851#if 12<15
852# if __ARM_ARCH__>=7
853	ldr	r2,[r1],#4			@ prefetch
854# else
855	ldrb	r2,[r1,#3]
856# endif
857	eor	r12,r8,r9			@ a^b, b^c in next round
858#else
859	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
860	eor	r12,r8,r9			@ a^b, b^c in next round
861	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
862#endif
863	eor	r0,r0,r8,ror#20	@ Sigma0(a)
864	and	r3,r3,r12			@ (b^c)&=(a^b)
865	add	r11,r11,r7			@ d+=h
866	eor	r3,r3,r9			@ Maj(a,b,c)
867	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
868	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
869#if __ARM_ARCH__>=7
870	@ ldr	r2,[r1],#4			@ 13
871# if 13==15
872	str	r1,[sp,#17*4]			@ make room for r1
873# endif
874	eor	r0,r11,r11,ror#5
875	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
876	eor	r0,r0,r11,ror#19	@ Sigma1(e)
877# ifndef __ARMEB__
878	rev	r2,r2
879# endif
880#else
881	@ ldrb	r2,[r1,#3]			@ 13
882	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
883	ldrb	r3,[r1,#2]
884	ldrb	r0,[r1,#1]
885	orr	r2,r2,r3,lsl#8
886	ldrb	r3,[r1],#4
887	orr	r2,r2,r0,lsl#16
888# if 13==15
889	str	r1,[sp,#17*4]			@ make room for r1
890# endif
891	eor	r0,r11,r11,ror#5
892	orr	r2,r2,r3,lsl#24
893	eor	r0,r0,r11,ror#19	@ Sigma1(e)
894#endif
895	ldr	r3,[r14],#4			@ *K256++
896	add	r6,r6,r2			@ h+=X[i]
897	str	r2,[sp,#13*4]
898	eor	r2,r4,r5
899	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
900	and	r2,r2,r11
901	add	r6,r6,r3			@ h+=K256[i]
902	eor	r2,r2,r5			@ Ch(e,f,g)
903	eor	r0,r7,r7,ror#11
904	add	r6,r6,r2			@ h+=Ch(e,f,g)
905#if 13==31
906	and	r3,r3,#0xff
907	cmp	r3,#0xf2			@ done?
908#endif
909#if 13<15
910# if __ARM_ARCH__>=7
911	ldr	r2,[r1],#4			@ prefetch
912# else
913	ldrb	r2,[r1,#3]
914# endif
915	eor	r3,r7,r8			@ a^b, b^c in next round
916#else
917	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
918	eor	r3,r7,r8			@ a^b, b^c in next round
919	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
920#endif
921	eor	r0,r0,r7,ror#20	@ Sigma0(a)
922	and	r12,r12,r3			@ (b^c)&=(a^b)
923	add	r10,r10,r6			@ d+=h
924	eor	r12,r12,r8			@ Maj(a,b,c)
925	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
926	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
927#if __ARM_ARCH__>=7
928	@ ldr	r2,[r1],#4			@ 14
929# if 14==15
930	str	r1,[sp,#17*4]			@ make room for r1
931# endif
932	eor	r0,r10,r10,ror#5
933	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
934	eor	r0,r0,r10,ror#19	@ Sigma1(e)
935# ifndef __ARMEB__
936	rev	r2,r2
937# endif
938#else
939	@ ldrb	r2,[r1,#3]			@ 14
940	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
941	ldrb	r12,[r1,#2]
942	ldrb	r0,[r1,#1]
943	orr	r2,r2,r12,lsl#8
944	ldrb	r12,[r1],#4
945	orr	r2,r2,r0,lsl#16
946# if 14==15
947	str	r1,[sp,#17*4]			@ make room for r1
948# endif
949	eor	r0,r10,r10,ror#5
950	orr	r2,r2,r12,lsl#24
951	eor	r0,r0,r10,ror#19	@ Sigma1(e)
952#endif
953	ldr	r12,[r14],#4			@ *K256++
954	add	r5,r5,r2			@ h+=X[i]
955	str	r2,[sp,#14*4]
956	eor	r2,r11,r4
957	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
958	and	r2,r2,r10
959	add	r5,r5,r12			@ h+=K256[i]
960	eor	r2,r2,r4			@ Ch(e,f,g)
961	eor	r0,r6,r6,ror#11
962	add	r5,r5,r2			@ h+=Ch(e,f,g)
963#if 14==31
964	and	r12,r12,#0xff
965	cmp	r12,#0xf2			@ done?
966#endif
967#if 14<15
968# if __ARM_ARCH__>=7
969	ldr	r2,[r1],#4			@ prefetch
970# else
971	ldrb	r2,[r1,#3]
972# endif
973	eor	r12,r6,r7			@ a^b, b^c in next round
974#else
975	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
976	eor	r12,r6,r7			@ a^b, b^c in next round
977	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
978#endif
979	eor	r0,r0,r6,ror#20	@ Sigma0(a)
980	and	r3,r3,r12			@ (b^c)&=(a^b)
981	add	r9,r9,r5			@ d+=h
982	eor	r3,r3,r7			@ Maj(a,b,c)
983	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
984	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
985#if __ARM_ARCH__>=7
986	@ ldr	r2,[r1],#4			@ 15
987# if 15==15
988	str	r1,[sp,#17*4]			@ make room for r1
989# endif
990	eor	r0,r9,r9,ror#5
991	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
992	eor	r0,r0,r9,ror#19	@ Sigma1(e)
993# ifndef __ARMEB__
994	rev	r2,r2
995# endif
996#else
997	@ ldrb	r2,[r1,#3]			@ 15
998	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
999	ldrb	r3,[r1,#2]
1000	ldrb	r0,[r1,#1]
1001	orr	r2,r2,r3,lsl#8
1002	ldrb	r3,[r1],#4
1003	orr	r2,r2,r0,lsl#16
1004# if 15==15
1005	str	r1,[sp,#17*4]			@ make room for r1
1006# endif
1007	eor	r0,r9,r9,ror#5
1008	orr	r2,r2,r3,lsl#24
1009	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1010#endif
1011	ldr	r3,[r14],#4			@ *K256++
1012	add	r4,r4,r2			@ h+=X[i]
1013	str	r2,[sp,#15*4]
1014	eor	r2,r10,r11
1015	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1016	and	r2,r2,r9
1017	add	r4,r4,r3			@ h+=K256[i]
1018	eor	r2,r2,r11			@ Ch(e,f,g)
1019	eor	r0,r5,r5,ror#11
1020	add	r4,r4,r2			@ h+=Ch(e,f,g)
1021#if 15==31
1022	and	r3,r3,#0xff
1023	cmp	r3,#0xf2			@ done?
1024#endif
1025#if 15<15
1026# if __ARM_ARCH__>=7
1027	ldr	r2,[r1],#4			@ prefetch
1028# else
1029	ldrb	r2,[r1,#3]
1030# endif
1031	eor	r3,r5,r6			@ a^b, b^c in next round
1032#else
1033	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1034	eor	r3,r5,r6			@ a^b, b^c in next round
1035	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1036#endif
1037	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1038	and	r12,r12,r3			@ (b^c)&=(a^b)
1039	add	r8,r8,r4			@ d+=h
1040	eor	r12,r12,r6			@ Maj(a,b,c)
1041	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1042	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1043.Lrounds_16_xx:
1044	@ ldr	r2,[sp,#1*4]		@ 16
1045	@ ldr	r1,[sp,#14*4]
1046	mov	r0,r2,ror#7
1047	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1048	mov	r12,r1,ror#17
1049	eor	r0,r0,r2,ror#18
1050	eor	r12,r12,r1,ror#19
1051	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1052	ldr	r2,[sp,#0*4]
1053	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1054	ldr	r1,[sp,#9*4]
1055
1056	add	r12,r12,r0
1057	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1058	add	r2,r2,r12
1059	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1060	add	r2,r2,r1			@ X[i]
1061	ldr	r12,[r14],#4			@ *K256++
1062	add	r11,r11,r2			@ h+=X[i]
1063	str	r2,[sp,#0*4]
1064	eor	r2,r9,r10
1065	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1066	and	r2,r2,r8
1067	add	r11,r11,r12			@ h+=K256[i]
1068	eor	r2,r2,r10			@ Ch(e,f,g)
1069	eor	r0,r4,r4,ror#11
1070	add	r11,r11,r2			@ h+=Ch(e,f,g)
1071#if 16==31
1072	and	r12,r12,#0xff
1073	cmp	r12,#0xf2			@ done?
1074#endif
1075#if 16<15
1076# if __ARM_ARCH__>=7
1077	ldr	r2,[r1],#4			@ prefetch
1078# else
1079	ldrb	r2,[r1,#3]
1080# endif
1081	eor	r12,r4,r5			@ a^b, b^c in next round
1082#else
1083	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1084	eor	r12,r4,r5			@ a^b, b^c in next round
1085	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1086#endif
1087	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1088	and	r3,r3,r12			@ (b^c)&=(a^b)
1089	add	r7,r7,r11			@ d+=h
1090	eor	r3,r3,r5			@ Maj(a,b,c)
1091	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1092	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1093	@ ldr	r2,[sp,#2*4]		@ 17
1094	@ ldr	r1,[sp,#15*4]
1095	mov	r0,r2,ror#7
1096	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1097	mov	r3,r1,ror#17
1098	eor	r0,r0,r2,ror#18
1099	eor	r3,r3,r1,ror#19
1100	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1101	ldr	r2,[sp,#1*4]
1102	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1103	ldr	r1,[sp,#10*4]
1104
1105	add	r3,r3,r0
1106	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1107	add	r2,r2,r3
1108	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1109	add	r2,r2,r1			@ X[i]
1110	ldr	r3,[r14],#4			@ *K256++
1111	add	r10,r10,r2			@ h+=X[i]
1112	str	r2,[sp,#1*4]
1113	eor	r2,r8,r9
1114	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1115	and	r2,r2,r7
1116	add	r10,r10,r3			@ h+=K256[i]
1117	eor	r2,r2,r9			@ Ch(e,f,g)
1118	eor	r0,r11,r11,ror#11
1119	add	r10,r10,r2			@ h+=Ch(e,f,g)
1120#if 17==31
1121	and	r3,r3,#0xff
1122	cmp	r3,#0xf2			@ done?
1123#endif
1124#if 17<15
1125# if __ARM_ARCH__>=7
1126	ldr	r2,[r1],#4			@ prefetch
1127# else
1128	ldrb	r2,[r1,#3]
1129# endif
1130	eor	r3,r11,r4			@ a^b, b^c in next round
1131#else
1132	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1133	eor	r3,r11,r4			@ a^b, b^c in next round
1134	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1135#endif
1136	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1137	and	r12,r12,r3			@ (b^c)&=(a^b)
1138	add	r6,r6,r10			@ d+=h
1139	eor	r12,r12,r4			@ Maj(a,b,c)
1140	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1141	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1142	@ ldr	r2,[sp,#3*4]		@ 18
1143	@ ldr	r1,[sp,#0*4]
1144	mov	r0,r2,ror#7
1145	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1146	mov	r12,r1,ror#17
1147	eor	r0,r0,r2,ror#18
1148	eor	r12,r12,r1,ror#19
1149	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1150	ldr	r2,[sp,#2*4]
1151	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1152	ldr	r1,[sp,#11*4]
1153
1154	add	r12,r12,r0
1155	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1156	add	r2,r2,r12
1157	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1158	add	r2,r2,r1			@ X[i]
1159	ldr	r12,[r14],#4			@ *K256++
1160	add	r9,r9,r2			@ h+=X[i]
1161	str	r2,[sp,#2*4]
1162	eor	r2,r7,r8
1163	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1164	and	r2,r2,r6
1165	add	r9,r9,r12			@ h+=K256[i]
1166	eor	r2,r2,r8			@ Ch(e,f,g)
1167	eor	r0,r10,r10,ror#11
1168	add	r9,r9,r2			@ h+=Ch(e,f,g)
1169#if 18==31
1170	and	r12,r12,#0xff
1171	cmp	r12,#0xf2			@ done?
1172#endif
1173#if 18<15
1174# if __ARM_ARCH__>=7
1175	ldr	r2,[r1],#4			@ prefetch
1176# else
1177	ldrb	r2,[r1,#3]
1178# endif
1179	eor	r12,r10,r11			@ a^b, b^c in next round
1180#else
1181	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1182	eor	r12,r10,r11			@ a^b, b^c in next round
1183	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1184#endif
1185	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1186	and	r3,r3,r12			@ (b^c)&=(a^b)
1187	add	r5,r5,r9			@ d+=h
1188	eor	r3,r3,r11			@ Maj(a,b,c)
1189	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1190	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1191	@ ldr	r2,[sp,#4*4]		@ 19
1192	@ ldr	r1,[sp,#1*4]
1193	mov	r0,r2,ror#7
1194	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1195	mov	r3,r1,ror#17
1196	eor	r0,r0,r2,ror#18
1197	eor	r3,r3,r1,ror#19
1198	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1199	ldr	r2,[sp,#3*4]
1200	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1201	ldr	r1,[sp,#12*4]
1202
1203	add	r3,r3,r0
1204	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1205	add	r2,r2,r3
1206	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1207	add	r2,r2,r1			@ X[i]
1208	ldr	r3,[r14],#4			@ *K256++
1209	add	r8,r8,r2			@ h+=X[i]
1210	str	r2,[sp,#3*4]
1211	eor	r2,r6,r7
1212	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1213	and	r2,r2,r5
1214	add	r8,r8,r3			@ h+=K256[i]
1215	eor	r2,r2,r7			@ Ch(e,f,g)
1216	eor	r0,r9,r9,ror#11
1217	add	r8,r8,r2			@ h+=Ch(e,f,g)
1218#if 19==31
1219	and	r3,r3,#0xff
1220	cmp	r3,#0xf2			@ done?
1221#endif
1222#if 19<15
1223# if __ARM_ARCH__>=7
1224	ldr	r2,[r1],#4			@ prefetch
1225# else
1226	ldrb	r2,[r1,#3]
1227# endif
1228	eor	r3,r9,r10			@ a^b, b^c in next round
1229#else
1230	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1231	eor	r3,r9,r10			@ a^b, b^c in next round
1232	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1233#endif
1234	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1235	and	r12,r12,r3			@ (b^c)&=(a^b)
1236	add	r4,r4,r8			@ d+=h
1237	eor	r12,r12,r10			@ Maj(a,b,c)
1238	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1239	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1240	@ ldr	r2,[sp,#5*4]		@ 20
1241	@ ldr	r1,[sp,#2*4]
1242	mov	r0,r2,ror#7
1243	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1244	mov	r12,r1,ror#17
1245	eor	r0,r0,r2,ror#18
1246	eor	r12,r12,r1,ror#19
1247	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1248	ldr	r2,[sp,#4*4]
1249	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1250	ldr	r1,[sp,#13*4]
1251
1252	add	r12,r12,r0
1253	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1254	add	r2,r2,r12
1255	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1256	add	r2,r2,r1			@ X[i]
1257	ldr	r12,[r14],#4			@ *K256++
1258	add	r7,r7,r2			@ h+=X[i]
1259	str	r2,[sp,#4*4]
1260	eor	r2,r5,r6
1261	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1262	and	r2,r2,r4
1263	add	r7,r7,r12			@ h+=K256[i]
1264	eor	r2,r2,r6			@ Ch(e,f,g)
1265	eor	r0,r8,r8,ror#11
1266	add	r7,r7,r2			@ h+=Ch(e,f,g)
1267#if 20==31
1268	and	r12,r12,#0xff
1269	cmp	r12,#0xf2			@ done?
1270#endif
1271#if 20<15
1272# if __ARM_ARCH__>=7
1273	ldr	r2,[r1],#4			@ prefetch
1274# else
1275	ldrb	r2,[r1,#3]
1276# endif
1277	eor	r12,r8,r9			@ a^b, b^c in next round
1278#else
1279	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1280	eor	r12,r8,r9			@ a^b, b^c in next round
1281	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1282#endif
1283	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1284	and	r3,r3,r12			@ (b^c)&=(a^b)
1285	add	r11,r11,r7			@ d+=h
1286	eor	r3,r3,r9			@ Maj(a,b,c)
1287	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1288	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1289	@ ldr	r2,[sp,#6*4]		@ 21
1290	@ ldr	r1,[sp,#3*4]
1291	mov	r0,r2,ror#7
1292	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1293	mov	r3,r1,ror#17
1294	eor	r0,r0,r2,ror#18
1295	eor	r3,r3,r1,ror#19
1296	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1297	ldr	r2,[sp,#5*4]
1298	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1299	ldr	r1,[sp,#14*4]
1300
1301	add	r3,r3,r0
1302	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1303	add	r2,r2,r3
1304	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1305	add	r2,r2,r1			@ X[i]
1306	ldr	r3,[r14],#4			@ *K256++
1307	add	r6,r6,r2			@ h+=X[i]
1308	str	r2,[sp,#5*4]
1309	eor	r2,r4,r5
1310	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1311	and	r2,r2,r11
1312	add	r6,r6,r3			@ h+=K256[i]
1313	eor	r2,r2,r5			@ Ch(e,f,g)
1314	eor	r0,r7,r7,ror#11
1315	add	r6,r6,r2			@ h+=Ch(e,f,g)
1316#if 21==31
1317	and	r3,r3,#0xff
1318	cmp	r3,#0xf2			@ done?
1319#endif
1320#if 21<15
1321# if __ARM_ARCH__>=7
1322	ldr	r2,[r1],#4			@ prefetch
1323# else
1324	ldrb	r2,[r1,#3]
1325# endif
1326	eor	r3,r7,r8			@ a^b, b^c in next round
1327#else
1328	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1329	eor	r3,r7,r8			@ a^b, b^c in next round
1330	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1331#endif
1332	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1333	and	r12,r12,r3			@ (b^c)&=(a^b)
1334	add	r10,r10,r6			@ d+=h
1335	eor	r12,r12,r8			@ Maj(a,b,c)
1336	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1337	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1338	@ ldr	r2,[sp,#7*4]		@ 22
1339	@ ldr	r1,[sp,#4*4]
1340	mov	r0,r2,ror#7
1341	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1342	mov	r12,r1,ror#17
1343	eor	r0,r0,r2,ror#18
1344	eor	r12,r12,r1,ror#19
1345	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1346	ldr	r2,[sp,#6*4]
1347	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1348	ldr	r1,[sp,#15*4]
1349
1350	add	r12,r12,r0
1351	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1352	add	r2,r2,r12
1353	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1354	add	r2,r2,r1			@ X[i]
1355	ldr	r12,[r14],#4			@ *K256++
1356	add	r5,r5,r2			@ h+=X[i]
1357	str	r2,[sp,#6*4]
1358	eor	r2,r11,r4
1359	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1360	and	r2,r2,r10
1361	add	r5,r5,r12			@ h+=K256[i]
1362	eor	r2,r2,r4			@ Ch(e,f,g)
1363	eor	r0,r6,r6,ror#11
1364	add	r5,r5,r2			@ h+=Ch(e,f,g)
1365#if 22==31
1366	and	r12,r12,#0xff
1367	cmp	r12,#0xf2			@ done?
1368#endif
1369#if 22<15
1370# if __ARM_ARCH__>=7
1371	ldr	r2,[r1],#4			@ prefetch
1372# else
1373	ldrb	r2,[r1,#3]
1374# endif
1375	eor	r12,r6,r7			@ a^b, b^c in next round
1376#else
1377	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1378	eor	r12,r6,r7			@ a^b, b^c in next round
1379	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1380#endif
1381	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1382	and	r3,r3,r12			@ (b^c)&=(a^b)
1383	add	r9,r9,r5			@ d+=h
1384	eor	r3,r3,r7			@ Maj(a,b,c)
1385	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1386	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1387	@ ldr	r2,[sp,#8*4]		@ 23
1388	@ ldr	r1,[sp,#5*4]
1389	mov	r0,r2,ror#7
1390	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1391	mov	r3,r1,ror#17
1392	eor	r0,r0,r2,ror#18
1393	eor	r3,r3,r1,ror#19
1394	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1395	ldr	r2,[sp,#7*4]
1396	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1397	ldr	r1,[sp,#0*4]
1398
1399	add	r3,r3,r0
1400	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1401	add	r2,r2,r3
1402	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1403	add	r2,r2,r1			@ X[i]
1404	ldr	r3,[r14],#4			@ *K256++
1405	add	r4,r4,r2			@ h+=X[i]
1406	str	r2,[sp,#7*4]
1407	eor	r2,r10,r11
1408	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1409	and	r2,r2,r9
1410	add	r4,r4,r3			@ h+=K256[i]
1411	eor	r2,r2,r11			@ Ch(e,f,g)
1412	eor	r0,r5,r5,ror#11
1413	add	r4,r4,r2			@ h+=Ch(e,f,g)
1414#if 23==31
1415	and	r3,r3,#0xff
1416	cmp	r3,#0xf2			@ done?
1417#endif
1418#if 23<15
1419# if __ARM_ARCH__>=7
1420	ldr	r2,[r1],#4			@ prefetch
1421# else
1422	ldrb	r2,[r1,#3]
1423# endif
1424	eor	r3,r5,r6			@ a^b, b^c in next round
1425#else
1426	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1427	eor	r3,r5,r6			@ a^b, b^c in next round
1428	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1429#endif
1430	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1431	and	r12,r12,r3			@ (b^c)&=(a^b)
1432	add	r8,r8,r4			@ d+=h
1433	eor	r12,r12,r6			@ Maj(a,b,c)
1434	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1435	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1436	@ ldr	r2,[sp,#9*4]		@ 24
1437	@ ldr	r1,[sp,#6*4]
1438	mov	r0,r2,ror#7
1439	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1440	mov	r12,r1,ror#17
1441	eor	r0,r0,r2,ror#18
1442	eor	r12,r12,r1,ror#19
1443	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1444	ldr	r2,[sp,#8*4]
1445	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1446	ldr	r1,[sp,#1*4]
1447
1448	add	r12,r12,r0
1449	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1450	add	r2,r2,r12
1451	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1452	add	r2,r2,r1			@ X[i]
1453	ldr	r12,[r14],#4			@ *K256++
1454	add	r11,r11,r2			@ h+=X[i]
1455	str	r2,[sp,#8*4]
1456	eor	r2,r9,r10
1457	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1458	and	r2,r2,r8
1459	add	r11,r11,r12			@ h+=K256[i]
1460	eor	r2,r2,r10			@ Ch(e,f,g)
1461	eor	r0,r4,r4,ror#11
1462	add	r11,r11,r2			@ h+=Ch(e,f,g)
1463#if 24==31
1464	and	r12,r12,#0xff
1465	cmp	r12,#0xf2			@ done?
1466#endif
1467#if 24<15
1468# if __ARM_ARCH__>=7
1469	ldr	r2,[r1],#4			@ prefetch
1470# else
1471	ldrb	r2,[r1,#3]
1472# endif
1473	eor	r12,r4,r5			@ a^b, b^c in next round
1474#else
1475	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1476	eor	r12,r4,r5			@ a^b, b^c in next round
1477	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1478#endif
1479	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1480	and	r3,r3,r12			@ (b^c)&=(a^b)
1481	add	r7,r7,r11			@ d+=h
1482	eor	r3,r3,r5			@ Maj(a,b,c)
1483	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1484	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1485	@ ldr	r2,[sp,#10*4]		@ 25
1486	@ ldr	r1,[sp,#7*4]
1487	mov	r0,r2,ror#7
1488	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1489	mov	r3,r1,ror#17
1490	eor	r0,r0,r2,ror#18
1491	eor	r3,r3,r1,ror#19
1492	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1493	ldr	r2,[sp,#9*4]
1494	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1495	ldr	r1,[sp,#2*4]
1496
1497	add	r3,r3,r0
1498	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1499	add	r2,r2,r3
1500	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1501	add	r2,r2,r1			@ X[i]
1502	ldr	r3,[r14],#4			@ *K256++
1503	add	r10,r10,r2			@ h+=X[i]
1504	str	r2,[sp,#9*4]
1505	eor	r2,r8,r9
1506	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1507	and	r2,r2,r7
1508	add	r10,r10,r3			@ h+=K256[i]
1509	eor	r2,r2,r9			@ Ch(e,f,g)
1510	eor	r0,r11,r11,ror#11
1511	add	r10,r10,r2			@ h+=Ch(e,f,g)
1512#if 25==31
1513	and	r3,r3,#0xff
1514	cmp	r3,#0xf2			@ done?
1515#endif
1516#if 25<15
1517# if __ARM_ARCH__>=7
1518	ldr	r2,[r1],#4			@ prefetch
1519# else
1520	ldrb	r2,[r1,#3]
1521# endif
1522	eor	r3,r11,r4			@ a^b, b^c in next round
1523#else
1524	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1525	eor	r3,r11,r4			@ a^b, b^c in next round
1526	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1527#endif
1528	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1529	and	r12,r12,r3			@ (b^c)&=(a^b)
1530	add	r6,r6,r10			@ d+=h
1531	eor	r12,r12,r4			@ Maj(a,b,c)
1532	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1533	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1534	@ ldr	r2,[sp,#11*4]		@ 26
1535	@ ldr	r1,[sp,#8*4]
1536	mov	r0,r2,ror#7
1537	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1538	mov	r12,r1,ror#17
1539	eor	r0,r0,r2,ror#18
1540	eor	r12,r12,r1,ror#19
1541	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1542	ldr	r2,[sp,#10*4]
1543	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1544	ldr	r1,[sp,#3*4]
1545
1546	add	r12,r12,r0
1547	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1548	add	r2,r2,r12
1549	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1550	add	r2,r2,r1			@ X[i]
1551	ldr	r12,[r14],#4			@ *K256++
1552	add	r9,r9,r2			@ h+=X[i]
1553	str	r2,[sp,#10*4]
1554	eor	r2,r7,r8
1555	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1556	and	r2,r2,r6
1557	add	r9,r9,r12			@ h+=K256[i]
1558	eor	r2,r2,r8			@ Ch(e,f,g)
1559	eor	r0,r10,r10,ror#11
1560	add	r9,r9,r2			@ h+=Ch(e,f,g)
1561#if 26==31
1562	and	r12,r12,#0xff
1563	cmp	r12,#0xf2			@ done?
1564#endif
1565#if 26<15
1566# if __ARM_ARCH__>=7
1567	ldr	r2,[r1],#4			@ prefetch
1568# else
1569	ldrb	r2,[r1,#3]
1570# endif
1571	eor	r12,r10,r11			@ a^b, b^c in next round
1572#else
1573	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1574	eor	r12,r10,r11			@ a^b, b^c in next round
1575	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1576#endif
1577	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1578	and	r3,r3,r12			@ (b^c)&=(a^b)
1579	add	r5,r5,r9			@ d+=h
1580	eor	r3,r3,r11			@ Maj(a,b,c)
1581	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1582	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1583	@ ldr	r2,[sp,#12*4]		@ 27
1584	@ ldr	r1,[sp,#9*4]
1585	mov	r0,r2,ror#7
1586	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1587	mov	r3,r1,ror#17
1588	eor	r0,r0,r2,ror#18
1589	eor	r3,r3,r1,ror#19
1590	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1591	ldr	r2,[sp,#11*4]
1592	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1593	ldr	r1,[sp,#4*4]
1594
1595	add	r3,r3,r0
1596	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1597	add	r2,r2,r3
1598	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1599	add	r2,r2,r1			@ X[i]
1600	ldr	r3,[r14],#4			@ *K256++
1601	add	r8,r8,r2			@ h+=X[i]
1602	str	r2,[sp,#11*4]
1603	eor	r2,r6,r7
1604	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1605	and	r2,r2,r5
1606	add	r8,r8,r3			@ h+=K256[i]
1607	eor	r2,r2,r7			@ Ch(e,f,g)
1608	eor	r0,r9,r9,ror#11
1609	add	r8,r8,r2			@ h+=Ch(e,f,g)
1610#if 27==31
1611	and	r3,r3,#0xff
1612	cmp	r3,#0xf2			@ done?
1613#endif
1614#if 27<15
1615# if __ARM_ARCH__>=7
1616	ldr	r2,[r1],#4			@ prefetch
1617# else
1618	ldrb	r2,[r1,#3]
1619# endif
1620	eor	r3,r9,r10			@ a^b, b^c in next round
1621#else
1622	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1623	eor	r3,r9,r10			@ a^b, b^c in next round
1624	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1625#endif
1626	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1627	and	r12,r12,r3			@ (b^c)&=(a^b)
1628	add	r4,r4,r8			@ d+=h
1629	eor	r12,r12,r10			@ Maj(a,b,c)
1630	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1631	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1632	@ ldr	r2,[sp,#13*4]		@ 28
1633	@ ldr	r1,[sp,#10*4]
1634	mov	r0,r2,ror#7
1635	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1636	mov	r12,r1,ror#17
1637	eor	r0,r0,r2,ror#18
1638	eor	r12,r12,r1,ror#19
1639	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1640	ldr	r2,[sp,#12*4]
1641	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1642	ldr	r1,[sp,#5*4]
1643
1644	add	r12,r12,r0
1645	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1646	add	r2,r2,r12
1647	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1648	add	r2,r2,r1			@ X[i]
1649	ldr	r12,[r14],#4			@ *K256++
1650	add	r7,r7,r2			@ h+=X[i]
1651	str	r2,[sp,#12*4]
1652	eor	r2,r5,r6
1653	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1654	and	r2,r2,r4
1655	add	r7,r7,r12			@ h+=K256[i]
1656	eor	r2,r2,r6			@ Ch(e,f,g)
1657	eor	r0,r8,r8,ror#11
1658	add	r7,r7,r2			@ h+=Ch(e,f,g)
1659#if 28==31
1660	and	r12,r12,#0xff
1661	cmp	r12,#0xf2			@ done?
1662#endif
1663#if 28<15
1664# if __ARM_ARCH__>=7
1665	ldr	r2,[r1],#4			@ prefetch
1666# else
1667	ldrb	r2,[r1,#3]
1668# endif
1669	eor	r12,r8,r9			@ a^b, b^c in next round
1670#else
1671	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1672	eor	r12,r8,r9			@ a^b, b^c in next round
1673	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1674#endif
1675	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1676	and	r3,r3,r12			@ (b^c)&=(a^b)
1677	add	r11,r11,r7			@ d+=h
1678	eor	r3,r3,r9			@ Maj(a,b,c)
1679	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1680	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1681	@ ldr	r2,[sp,#14*4]		@ 29
1682	@ ldr	r1,[sp,#11*4]
1683	mov	r0,r2,ror#7
1684	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1685	mov	r3,r1,ror#17
1686	eor	r0,r0,r2,ror#18
1687	eor	r3,r3,r1,ror#19
1688	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1689	ldr	r2,[sp,#13*4]
1690	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1691	ldr	r1,[sp,#6*4]
1692
1693	add	r3,r3,r0
1694	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1695	add	r2,r2,r3
1696	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1697	add	r2,r2,r1			@ X[i]
1698	ldr	r3,[r14],#4			@ *K256++
1699	add	r6,r6,r2			@ h+=X[i]
1700	str	r2,[sp,#13*4]
1701	eor	r2,r4,r5
1702	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1703	and	r2,r2,r11
1704	add	r6,r6,r3			@ h+=K256[i]
1705	eor	r2,r2,r5			@ Ch(e,f,g)
1706	eor	r0,r7,r7,ror#11
1707	add	r6,r6,r2			@ h+=Ch(e,f,g)
1708#if 29==31
1709	and	r3,r3,#0xff
1710	cmp	r3,#0xf2			@ done?
1711#endif
1712#if 29<15
1713# if __ARM_ARCH__>=7
1714	ldr	r2,[r1],#4			@ prefetch
1715# else
1716	ldrb	r2,[r1,#3]
1717# endif
1718	eor	r3,r7,r8			@ a^b, b^c in next round
1719#else
1720	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1721	eor	r3,r7,r8			@ a^b, b^c in next round
1722	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1723#endif
1724	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1725	and	r12,r12,r3			@ (b^c)&=(a^b)
1726	add	r10,r10,r6			@ d+=h
1727	eor	r12,r12,r8			@ Maj(a,b,c)
1728	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1729	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1730	@ ldr	r2,[sp,#15*4]		@ 30
1731	@ ldr	r1,[sp,#12*4]
1732	mov	r0,r2,ror#7
1733	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1734	mov	r12,r1,ror#17
1735	eor	r0,r0,r2,ror#18
1736	eor	r12,r12,r1,ror#19
1737	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1738	ldr	r2,[sp,#14*4]
1739	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1740	ldr	r1,[sp,#7*4]
1741
1742	add	r12,r12,r0
1743	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1744	add	r2,r2,r12
1745	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1746	add	r2,r2,r1			@ X[i]
1747	ldr	r12,[r14],#4			@ *K256++
1748	add	r5,r5,r2			@ h+=X[i]
1749	str	r2,[sp,#14*4]
1750	eor	r2,r11,r4
1751	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1752	and	r2,r2,r10
1753	add	r5,r5,r12			@ h+=K256[i]
1754	eor	r2,r2,r4			@ Ch(e,f,g)
1755	eor	r0,r6,r6,ror#11
1756	add	r5,r5,r2			@ h+=Ch(e,f,g)
1757#if 30==31
1758	and	r12,r12,#0xff
1759	cmp	r12,#0xf2			@ done?
1760#endif
1761#if 30<15
1762# if __ARM_ARCH__>=7
1763	ldr	r2,[r1],#4			@ prefetch
1764# else
1765	ldrb	r2,[r1,#3]
1766# endif
1767	eor	r12,r6,r7			@ a^b, b^c in next round
1768#else
1769	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1770	eor	r12,r6,r7			@ a^b, b^c in next round
1771	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1772#endif
1773	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1774	and	r3,r3,r12			@ (b^c)&=(a^b)
1775	add	r9,r9,r5			@ d+=h
1776	eor	r3,r3,r7			@ Maj(a,b,c)
1777	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1778	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1779	@ ldr	r2,[sp,#0*4]		@ 31
1780	@ ldr	r1,[sp,#13*4]
1781	mov	r0,r2,ror#7
1782	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1783	mov	r3,r1,ror#17
1784	eor	r0,r0,r2,ror#18
1785	eor	r3,r3,r1,ror#19
1786	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1787	ldr	r2,[sp,#15*4]
1788	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1789	ldr	r1,[sp,#8*4]
1790
1791	add	r3,r3,r0
1792	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1793	add	r2,r2,r3
1794	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1795	add	r2,r2,r1			@ X[i]
1796	ldr	r3,[r14],#4			@ *K256++
1797	add	r4,r4,r2			@ h+=X[i]
1798	str	r2,[sp,#15*4]
1799	eor	r2,r10,r11
1800	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1801	and	r2,r2,r9
1802	add	r4,r4,r3			@ h+=K256[i]
1803	eor	r2,r2,r11			@ Ch(e,f,g)
1804	eor	r0,r5,r5,ror#11
1805	add	r4,r4,r2			@ h+=Ch(e,f,g)
1806#if 31==31
1807	and	r3,r3,#0xff
1808	cmp	r3,#0xf2			@ done?
1809#endif
1810#if 31<15
1811# if __ARM_ARCH__>=7
1812	ldr	r2,[r1],#4			@ prefetch
1813# else
1814	ldrb	r2,[r1,#3]
1815# endif
1816	eor	r3,r5,r6			@ a^b, b^c in next round
1817#else
1818	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1819	eor	r3,r5,r6			@ a^b, b^c in next round
1820	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1821#endif
1822	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1823	and	r12,r12,r3			@ (b^c)&=(a^b)
1824	add	r8,r8,r4			@ d+=h
1825	eor	r12,r12,r6			@ Maj(a,b,c)
1826	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1827	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1828#if __ARM_ARCH__>=7
1829	ite	eq			@ Thumb2 thing, sanity check in ARM
1830#endif
1831	ldreq	r3,[sp,#16*4]		@ pull ctx
1832	bne	.Lrounds_16_xx
1833
1834	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1835	ldr	r0,[r3,#0]
1836	ldr	r2,[r3,#4]
1837	ldr	r12,[r3,#8]
1838	add	r4,r4,r0
1839	ldr	r0,[r3,#12]
1840	add	r5,r5,r2
1841	ldr	r2,[r3,#16]
1842	add	r6,r6,r12
1843	ldr	r12,[r3,#20]
1844	add	r7,r7,r0
1845	ldr	r0,[r3,#24]
1846	add	r8,r8,r2
1847	ldr	r2,[r3,#28]
1848	add	r9,r9,r12
1849	ldr	r1,[sp,#17*4]		@ pull inp
1850	ldr	r12,[sp,#18*4]		@ pull inp+len
1851	add	r10,r10,r0
1852	add	r11,r11,r2
1853	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1854	cmp	r1,r12
1855	sub	r14,r14,#256	@ rewind Ktbl
1856	bne	.Loop
1857
1858	add	sp,sp,#19*4	@ destroy frame
1859#if __ARM_ARCH__>=5
1860	ldmia	sp!,{r4-r11,pc}
1861#else
1862	ldmia	sp!,{r4-r11,lr}
1863	tst	lr,#1
1864	moveq	pc,lr			@ be binary compatible with V4, yet
1865	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1866#endif
1867.size	sha256_block_data_order,.-sha256_block_data_order
1868#if __ARM_MAX_ARCH__>=7
1869.arch	armv7-a
1870.fpu	neon
1871
1872.global	sha256_block_data_order_neon
1873.type	sha256_block_data_order_neon,%function
1874.align	4
1875sha256_block_data_order_neon:
1876.LNEON:
1877	stmdb	sp!,{r4-r12,lr}
1878
1879	sub	r11,sp,#16*4+16
1880	adrl	r14,K256
1881	bic	r11,r11,#15		@ align for 128-bit stores
1882	mov	r12,sp
1883	mov	sp,r11			@ alloca
1884	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1885
1886	vld1.8		{q0},[r1]!
1887	vld1.8		{q1},[r1]!
1888	vld1.8		{q2},[r1]!
1889	vld1.8		{q3},[r1]!
1890	vld1.32		{q8},[r14,:128]!
1891	vld1.32		{q9},[r14,:128]!
1892	vld1.32		{q10},[r14,:128]!
1893	vld1.32		{q11},[r14,:128]!
1894	vrev32.8	q0,q0		@ yes, even on
1895	str		r0,[sp,#64]
1896	vrev32.8	q1,q1		@ big-endian
1897	str		r1,[sp,#68]
1898	mov		r1,sp
1899	vrev32.8	q2,q2
1900	str		r2,[sp,#72]
1901	vrev32.8	q3,q3
1902	str		r12,[sp,#76]		@ save original sp
1903	vadd.i32	q8,q8,q0
1904	vadd.i32	q9,q9,q1
1905	vst1.32		{q8},[r1,:128]!
1906	vadd.i32	q10,q10,q2
1907	vst1.32		{q9},[r1,:128]!
1908	vadd.i32	q11,q11,q3
1909	vst1.32		{q10},[r1,:128]!
1910	vst1.32		{q11},[r1,:128]!
1911
1912	ldmia		r0,{r4-r11}
1913	sub		r1,r1,#64
1914	ldr		r2,[sp,#0]
1915	eor		r12,r12,r12
1916	eor		r3,r5,r6
1917	b		.L_00_48
1918
1919.align	4
1920.L_00_48:
1921	vext.8	q8,q0,q1,#4
1922	add	r11,r11,r2
1923	eor	r2,r9,r10
1924	eor	r0,r8,r8,ror#5
1925	vext.8	q9,q2,q3,#4
1926	add	r4,r4,r12
1927	and	r2,r2,r8
1928	eor	r12,r0,r8,ror#19
1929	vshr.u32	q10,q8,#7
1930	eor	r0,r4,r4,ror#11
1931	eor	r2,r2,r10
1932	vadd.i32	q0,q0,q9
1933	add	r11,r11,r12,ror#6
1934	eor	r12,r4,r5
1935	vshr.u32	q9,q8,#3
1936	eor	r0,r0,r4,ror#20
1937	add	r11,r11,r2
1938	vsli.32	q10,q8,#25
1939	ldr	r2,[sp,#4]
1940	and	r3,r3,r12
1941	vshr.u32	q11,q8,#18
1942	add	r7,r7,r11
1943	add	r11,r11,r0,ror#2
1944	eor	r3,r3,r5
1945	veor	q9,q9,q10
1946	add	r10,r10,r2
1947	vsli.32	q11,q8,#14
1948	eor	r2,r8,r9
1949	eor	r0,r7,r7,ror#5
1950	vshr.u32	d24,d7,#17
1951	add	r11,r11,r3
1952	and	r2,r2,r7
1953	veor	q9,q9,q11
1954	eor	r3,r0,r7,ror#19
1955	eor	r0,r11,r11,ror#11
1956	vsli.32	d24,d7,#15
1957	eor	r2,r2,r9
1958	add	r10,r10,r3,ror#6
1959	vshr.u32	d25,d7,#10
1960	eor	r3,r11,r4
1961	eor	r0,r0,r11,ror#20
1962	vadd.i32	q0,q0,q9
1963	add	r10,r10,r2
1964	ldr	r2,[sp,#8]
1965	veor	d25,d25,d24
1966	and	r12,r12,r3
1967	add	r6,r6,r10
1968	vshr.u32	d24,d7,#19
1969	add	r10,r10,r0,ror#2
1970	eor	r12,r12,r4
1971	vsli.32	d24,d7,#13
1972	add	r9,r9,r2
1973	eor	r2,r7,r8
1974	veor	d25,d25,d24
1975	eor	r0,r6,r6,ror#5
1976	add	r10,r10,r12
1977	vadd.i32	d0,d0,d25
1978	and	r2,r2,r6
1979	eor	r12,r0,r6,ror#19
1980	vshr.u32	d24,d0,#17
1981	eor	r0,r10,r10,ror#11
1982	eor	r2,r2,r8
1983	vsli.32	d24,d0,#15
1984	add	r9,r9,r12,ror#6
1985	eor	r12,r10,r11
1986	vshr.u32	d25,d0,#10
1987	eor	r0,r0,r10,ror#20
1988	add	r9,r9,r2
1989	veor	d25,d25,d24
1990	ldr	r2,[sp,#12]
1991	and	r3,r3,r12
1992	vshr.u32	d24,d0,#19
1993	add	r5,r5,r9
1994	add	r9,r9,r0,ror#2
1995	eor	r3,r3,r11
1996	vld1.32	{q8},[r14,:128]!
1997	add	r8,r8,r2
1998	vsli.32	d24,d0,#13
1999	eor	r2,r6,r7
2000	eor	r0,r5,r5,ror#5
2001	veor	d25,d25,d24
2002	add	r9,r9,r3
2003	and	r2,r2,r5
2004	vadd.i32	d1,d1,d25
2005	eor	r3,r0,r5,ror#19
2006	eor	r0,r9,r9,ror#11
2007	vadd.i32	q8,q8,q0
2008	eor	r2,r2,r7
2009	add	r8,r8,r3,ror#6
2010	eor	r3,r9,r10
2011	eor	r0,r0,r9,ror#20
2012	add	r8,r8,r2
2013	ldr	r2,[sp,#16]
2014	and	r12,r12,r3
2015	add	r4,r4,r8
2016	vst1.32	{q8},[r1,:128]!
2017	add	r8,r8,r0,ror#2
2018	eor	r12,r12,r10
2019	vext.8	q8,q1,q2,#4
2020	add	r7,r7,r2
2021	eor	r2,r5,r6
2022	eor	r0,r4,r4,ror#5
2023	vext.8	q9,q3,q0,#4
2024	add	r8,r8,r12
2025	and	r2,r2,r4
2026	eor	r12,r0,r4,ror#19
2027	vshr.u32	q10,q8,#7
2028	eor	r0,r8,r8,ror#11
2029	eor	r2,r2,r6
2030	vadd.i32	q1,q1,q9
2031	add	r7,r7,r12,ror#6
2032	eor	r12,r8,r9
2033	vshr.u32	q9,q8,#3
2034	eor	r0,r0,r8,ror#20
2035	add	r7,r7,r2
2036	vsli.32	q10,q8,#25
2037	ldr	r2,[sp,#20]
2038	and	r3,r3,r12
2039	vshr.u32	q11,q8,#18
2040	add	r11,r11,r7
2041	add	r7,r7,r0,ror#2
2042	eor	r3,r3,r9
2043	veor	q9,q9,q10
2044	add	r6,r6,r2
2045	vsli.32	q11,q8,#14
2046	eor	r2,r4,r5
2047	eor	r0,r11,r11,ror#5
2048	vshr.u32	d24,d1,#17
2049	add	r7,r7,r3
2050	and	r2,r2,r11
2051	veor	q9,q9,q11
2052	eor	r3,r0,r11,ror#19
2053	eor	r0,r7,r7,ror#11
2054	vsli.32	d24,d1,#15
2055	eor	r2,r2,r5
2056	add	r6,r6,r3,ror#6
2057	vshr.u32	d25,d1,#10
2058	eor	r3,r7,r8
2059	eor	r0,r0,r7,ror#20
2060	vadd.i32	q1,q1,q9
2061	add	r6,r6,r2
2062	ldr	r2,[sp,#24]
2063	veor	d25,d25,d24
2064	and	r12,r12,r3
2065	add	r10,r10,r6
2066	vshr.u32	d24,d1,#19
2067	add	r6,r6,r0,ror#2
2068	eor	r12,r12,r8
2069	vsli.32	d24,d1,#13
2070	add	r5,r5,r2
2071	eor	r2,r11,r4
2072	veor	d25,d25,d24
2073	eor	r0,r10,r10,ror#5
2074	add	r6,r6,r12
2075	vadd.i32	d2,d2,d25
2076	and	r2,r2,r10
2077	eor	r12,r0,r10,ror#19
2078	vshr.u32	d24,d2,#17
2079	eor	r0,r6,r6,ror#11
2080	eor	r2,r2,r4
2081	vsli.32	d24,d2,#15
2082	add	r5,r5,r12,ror#6
2083	eor	r12,r6,r7
2084	vshr.u32	d25,d2,#10
2085	eor	r0,r0,r6,ror#20
2086	add	r5,r5,r2
2087	veor	d25,d25,d24
2088	ldr	r2,[sp,#28]
2089	and	r3,r3,r12
2090	vshr.u32	d24,d2,#19
2091	add	r9,r9,r5
2092	add	r5,r5,r0,ror#2
2093	eor	r3,r3,r7
2094	vld1.32	{q8},[r14,:128]!
2095	add	r4,r4,r2
2096	vsli.32	d24,d2,#13
2097	eor	r2,r10,r11
2098	eor	r0,r9,r9,ror#5
2099	veor	d25,d25,d24
2100	add	r5,r5,r3
2101	and	r2,r2,r9
2102	vadd.i32	d3,d3,d25
2103	eor	r3,r0,r9,ror#19
2104	eor	r0,r5,r5,ror#11
2105	vadd.i32	q8,q8,q1
2106	eor	r2,r2,r11
2107	add	r4,r4,r3,ror#6
2108	eor	r3,r5,r6
2109	eor	r0,r0,r5,ror#20
2110	add	r4,r4,r2
2111	ldr	r2,[sp,#32]
2112	and	r12,r12,r3
2113	add	r8,r8,r4
2114	vst1.32	{q8},[r1,:128]!
2115	add	r4,r4,r0,ror#2
2116	eor	r12,r12,r6
2117	vext.8	q8,q2,q3,#4
2118	add	r11,r11,r2
2119	eor	r2,r9,r10
2120	eor	r0,r8,r8,ror#5
2121	vext.8	q9,q0,q1,#4
2122	add	r4,r4,r12
2123	and	r2,r2,r8
2124	eor	r12,r0,r8,ror#19
2125	vshr.u32	q10,q8,#7
2126	eor	r0,r4,r4,ror#11
2127	eor	r2,r2,r10
2128	vadd.i32	q2,q2,q9
2129	add	r11,r11,r12,ror#6
2130	eor	r12,r4,r5
2131	vshr.u32	q9,q8,#3
2132	eor	r0,r0,r4,ror#20
2133	add	r11,r11,r2
2134	vsli.32	q10,q8,#25
2135	ldr	r2,[sp,#36]
2136	and	r3,r3,r12
2137	vshr.u32	q11,q8,#18
2138	add	r7,r7,r11
2139	add	r11,r11,r0,ror#2
2140	eor	r3,r3,r5
2141	veor	q9,q9,q10
2142	add	r10,r10,r2
2143	vsli.32	q11,q8,#14
2144	eor	r2,r8,r9
2145	eor	r0,r7,r7,ror#5
2146	vshr.u32	d24,d3,#17
2147	add	r11,r11,r3
2148	and	r2,r2,r7
2149	veor	q9,q9,q11
2150	eor	r3,r0,r7,ror#19
2151	eor	r0,r11,r11,ror#11
2152	vsli.32	d24,d3,#15
2153	eor	r2,r2,r9
2154	add	r10,r10,r3,ror#6
2155	vshr.u32	d25,d3,#10
2156	eor	r3,r11,r4
2157	eor	r0,r0,r11,ror#20
2158	vadd.i32	q2,q2,q9
2159	add	r10,r10,r2
2160	ldr	r2,[sp,#40]
2161	veor	d25,d25,d24
2162	and	r12,r12,r3
2163	add	r6,r6,r10
2164	vshr.u32	d24,d3,#19
2165	add	r10,r10,r0,ror#2
2166	eor	r12,r12,r4
2167	vsli.32	d24,d3,#13
2168	add	r9,r9,r2
2169	eor	r2,r7,r8
2170	veor	d25,d25,d24
2171	eor	r0,r6,r6,ror#5
2172	add	r10,r10,r12
2173	vadd.i32	d4,d4,d25
2174	and	r2,r2,r6
2175	eor	r12,r0,r6,ror#19
2176	vshr.u32	d24,d4,#17
2177	eor	r0,r10,r10,ror#11
2178	eor	r2,r2,r8
2179	vsli.32	d24,d4,#15
2180	add	r9,r9,r12,ror#6
2181	eor	r12,r10,r11
2182	vshr.u32	d25,d4,#10
2183	eor	r0,r0,r10,ror#20
2184	add	r9,r9,r2
2185	veor	d25,d25,d24
2186	ldr	r2,[sp,#44]
2187	and	r3,r3,r12
2188	vshr.u32	d24,d4,#19
2189	add	r5,r5,r9
2190	add	r9,r9,r0,ror#2
2191	eor	r3,r3,r11
2192	vld1.32	{q8},[r14,:128]!
2193	add	r8,r8,r2
2194	vsli.32	d24,d4,#13
2195	eor	r2,r6,r7
2196	eor	r0,r5,r5,ror#5
2197	veor	d25,d25,d24
2198	add	r9,r9,r3
2199	and	r2,r2,r5
2200	vadd.i32	d5,d5,d25
2201	eor	r3,r0,r5,ror#19
2202	eor	r0,r9,r9,ror#11
2203	vadd.i32	q8,q8,q2
2204	eor	r2,r2,r7
2205	add	r8,r8,r3,ror#6
2206	eor	r3,r9,r10
2207	eor	r0,r0,r9,ror#20
2208	add	r8,r8,r2
2209	ldr	r2,[sp,#48]
2210	and	r12,r12,r3
2211	add	r4,r4,r8
2212	vst1.32	{q8},[r1,:128]!
2213	add	r8,r8,r0,ror#2
2214	eor	r12,r12,r10
2215	vext.8	q8,q3,q0,#4
2216	add	r7,r7,r2
2217	eor	r2,r5,r6
2218	eor	r0,r4,r4,ror#5
2219	vext.8	q9,q1,q2,#4
2220	add	r8,r8,r12
2221	and	r2,r2,r4
2222	eor	r12,r0,r4,ror#19
2223	vshr.u32	q10,q8,#7
2224	eor	r0,r8,r8,ror#11
2225	eor	r2,r2,r6
2226	vadd.i32	q3,q3,q9
2227	add	r7,r7,r12,ror#6
2228	eor	r12,r8,r9
2229	vshr.u32	q9,q8,#3
2230	eor	r0,r0,r8,ror#20
2231	add	r7,r7,r2
2232	vsli.32	q10,q8,#25
2233	ldr	r2,[sp,#52]
2234	and	r3,r3,r12
2235	vshr.u32	q11,q8,#18
2236	add	r11,r11,r7
2237	add	r7,r7,r0,ror#2
2238	eor	r3,r3,r9
2239	veor	q9,q9,q10
2240	add	r6,r6,r2
2241	vsli.32	q11,q8,#14
2242	eor	r2,r4,r5
2243	eor	r0,r11,r11,ror#5
2244	vshr.u32	d24,d5,#17
2245	add	r7,r7,r3
2246	and	r2,r2,r11
2247	veor	q9,q9,q11
2248	eor	r3,r0,r11,ror#19
2249	eor	r0,r7,r7,ror#11
2250	vsli.32	d24,d5,#15
2251	eor	r2,r2,r5
2252	add	r6,r6,r3,ror#6
2253	vshr.u32	d25,d5,#10
2254	eor	r3,r7,r8
2255	eor	r0,r0,r7,ror#20
2256	vadd.i32	q3,q3,q9
2257	add	r6,r6,r2
2258	ldr	r2,[sp,#56]
2259	veor	d25,d25,d24
2260	and	r12,r12,r3
2261	add	r10,r10,r6
2262	vshr.u32	d24,d5,#19
2263	add	r6,r6,r0,ror#2
2264	eor	r12,r12,r8
2265	vsli.32	d24,d5,#13
2266	add	r5,r5,r2
2267	eor	r2,r11,r4
2268	veor	d25,d25,d24
2269	eor	r0,r10,r10,ror#5
2270	add	r6,r6,r12
2271	vadd.i32	d6,d6,d25
2272	and	r2,r2,r10
2273	eor	r12,r0,r10,ror#19
2274	vshr.u32	d24,d6,#17
2275	eor	r0,r6,r6,ror#11
2276	eor	r2,r2,r4
2277	vsli.32	d24,d6,#15
2278	add	r5,r5,r12,ror#6
2279	eor	r12,r6,r7
2280	vshr.u32	d25,d6,#10
2281	eor	r0,r0,r6,ror#20
2282	add	r5,r5,r2
2283	veor	d25,d25,d24
2284	ldr	r2,[sp,#60]
2285	and	r3,r3,r12
2286	vshr.u32	d24,d6,#19
2287	add	r9,r9,r5
2288	add	r5,r5,r0,ror#2
2289	eor	r3,r3,r7
2290	vld1.32	{q8},[r14,:128]!
2291	add	r4,r4,r2
2292	vsli.32	d24,d6,#13
2293	eor	r2,r10,r11
2294	eor	r0,r9,r9,ror#5
2295	veor	d25,d25,d24
2296	add	r5,r5,r3
2297	and	r2,r2,r9
2298	vadd.i32	d7,d7,d25
2299	eor	r3,r0,r9,ror#19
2300	eor	r0,r5,r5,ror#11
2301	vadd.i32	q8,q8,q3
2302	eor	r2,r2,r11
2303	add	r4,r4,r3,ror#6
2304	eor	r3,r5,r6
2305	eor	r0,r0,r5,ror#20
2306	add	r4,r4,r2
2307	ldr	r2,[r14]
2308	and	r12,r12,r3
2309	add	r8,r8,r4
2310	vst1.32	{q8},[r1,:128]!
2311	add	r4,r4,r0,ror#2
2312	eor	r12,r12,r6
2313	teq	r2,#0				@ check for K256 terminator
2314	ldr	r2,[sp,#0]
2315	sub	r1,r1,#64
2316	bne	.L_00_48
2317
2318	ldr		r1,[sp,#68]
2319	ldr		r0,[sp,#72]
2320	sub		r14,r14,#256	@ rewind r14
2321	teq		r1,r0
2322	it		eq
2323	subeq		r1,r1,#64		@ avoid SEGV
2324	vld1.8		{q0},[r1]!		@ load next input block
2325	vld1.8		{q1},[r1]!
2326	vld1.8		{q2},[r1]!
2327	vld1.8		{q3},[r1]!
2328	it		ne
2329	strne		r1,[sp,#68]
2330	mov		r1,sp
2331	add	r11,r11,r2
2332	eor	r2,r9,r10
2333	eor	r0,r8,r8,ror#5
2334	add	r4,r4,r12
2335	vld1.32	{q8},[r14,:128]!
2336	and	r2,r2,r8
2337	eor	r12,r0,r8,ror#19
2338	eor	r0,r4,r4,ror#11
2339	eor	r2,r2,r10
2340	vrev32.8	q0,q0
2341	add	r11,r11,r12,ror#6
2342	eor	r12,r4,r5
2343	eor	r0,r0,r4,ror#20
2344	add	r11,r11,r2
2345	vadd.i32	q8,q8,q0
2346	ldr	r2,[sp,#4]
2347	and	r3,r3,r12
2348	add	r7,r7,r11
2349	add	r11,r11,r0,ror#2
2350	eor	r3,r3,r5
2351	add	r10,r10,r2
2352	eor	r2,r8,r9
2353	eor	r0,r7,r7,ror#5
2354	add	r11,r11,r3
2355	and	r2,r2,r7
2356	eor	r3,r0,r7,ror#19
2357	eor	r0,r11,r11,ror#11
2358	eor	r2,r2,r9
2359	add	r10,r10,r3,ror#6
2360	eor	r3,r11,r4
2361	eor	r0,r0,r11,ror#20
2362	add	r10,r10,r2
2363	ldr	r2,[sp,#8]
2364	and	r12,r12,r3
2365	add	r6,r6,r10
2366	add	r10,r10,r0,ror#2
2367	eor	r12,r12,r4
2368	add	r9,r9,r2
2369	eor	r2,r7,r8
2370	eor	r0,r6,r6,ror#5
2371	add	r10,r10,r12
2372	and	r2,r2,r6
2373	eor	r12,r0,r6,ror#19
2374	eor	r0,r10,r10,ror#11
2375	eor	r2,r2,r8
2376	add	r9,r9,r12,ror#6
2377	eor	r12,r10,r11
2378	eor	r0,r0,r10,ror#20
2379	add	r9,r9,r2
2380	ldr	r2,[sp,#12]
2381	and	r3,r3,r12
2382	add	r5,r5,r9
2383	add	r9,r9,r0,ror#2
2384	eor	r3,r3,r11
2385	add	r8,r8,r2
2386	eor	r2,r6,r7
2387	eor	r0,r5,r5,ror#5
2388	add	r9,r9,r3
2389	and	r2,r2,r5
2390	eor	r3,r0,r5,ror#19
2391	eor	r0,r9,r9,ror#11
2392	eor	r2,r2,r7
2393	add	r8,r8,r3,ror#6
2394	eor	r3,r9,r10
2395	eor	r0,r0,r9,ror#20
2396	add	r8,r8,r2
2397	ldr	r2,[sp,#16]
2398	and	r12,r12,r3
2399	add	r4,r4,r8
2400	add	r8,r8,r0,ror#2
2401	eor	r12,r12,r10
2402	vst1.32	{q8},[r1,:128]!
2403	add	r7,r7,r2
2404	eor	r2,r5,r6
2405	eor	r0,r4,r4,ror#5
2406	add	r8,r8,r12
2407	vld1.32	{q8},[r14,:128]!
2408	and	r2,r2,r4
2409	eor	r12,r0,r4,ror#19
2410	eor	r0,r8,r8,ror#11
2411	eor	r2,r2,r6
2412	vrev32.8	q1,q1
2413	add	r7,r7,r12,ror#6
2414	eor	r12,r8,r9
2415	eor	r0,r0,r8,ror#20
2416	add	r7,r7,r2
2417	vadd.i32	q8,q8,q1
2418	ldr	r2,[sp,#20]
2419	and	r3,r3,r12
2420	add	r11,r11,r7
2421	add	r7,r7,r0,ror#2
2422	eor	r3,r3,r9
2423	add	r6,r6,r2
2424	eor	r2,r4,r5
2425	eor	r0,r11,r11,ror#5
2426	add	r7,r7,r3
2427	and	r2,r2,r11
2428	eor	r3,r0,r11,ror#19
2429	eor	r0,r7,r7,ror#11
2430	eor	r2,r2,r5
2431	add	r6,r6,r3,ror#6
2432	eor	r3,r7,r8
2433	eor	r0,r0,r7,ror#20
2434	add	r6,r6,r2
2435	ldr	r2,[sp,#24]
2436	and	r12,r12,r3
2437	add	r10,r10,r6
2438	add	r6,r6,r0,ror#2
2439	eor	r12,r12,r8
2440	add	r5,r5,r2
2441	eor	r2,r11,r4
2442	eor	r0,r10,r10,ror#5
2443	add	r6,r6,r12
2444	and	r2,r2,r10
2445	eor	r12,r0,r10,ror#19
2446	eor	r0,r6,r6,ror#11
2447	eor	r2,r2,r4
2448	add	r5,r5,r12,ror#6
2449	eor	r12,r6,r7
2450	eor	r0,r0,r6,ror#20
2451	add	r5,r5,r2
2452	ldr	r2,[sp,#28]
2453	and	r3,r3,r12
2454	add	r9,r9,r5
2455	add	r5,r5,r0,ror#2
2456	eor	r3,r3,r7
2457	add	r4,r4,r2
2458	eor	r2,r10,r11
2459	eor	r0,r9,r9,ror#5
2460	add	r5,r5,r3
2461	and	r2,r2,r9
2462	eor	r3,r0,r9,ror#19
2463	eor	r0,r5,r5,ror#11
2464	eor	r2,r2,r11
2465	add	r4,r4,r3,ror#6
2466	eor	r3,r5,r6
2467	eor	r0,r0,r5,ror#20
2468	add	r4,r4,r2
2469	ldr	r2,[sp,#32]
2470	and	r12,r12,r3
2471	add	r8,r8,r4
2472	add	r4,r4,r0,ror#2
2473	eor	r12,r12,r6
2474	vst1.32	{q8},[r1,:128]!
2475	add	r11,r11,r2
2476	eor	r2,r9,r10
2477	eor	r0,r8,r8,ror#5
2478	add	r4,r4,r12
2479	vld1.32	{q8},[r14,:128]!
2480	and	r2,r2,r8
2481	eor	r12,r0,r8,ror#19
2482	eor	r0,r4,r4,ror#11
2483	eor	r2,r2,r10
2484	vrev32.8	q2,q2
2485	add	r11,r11,r12,ror#6
2486	eor	r12,r4,r5
2487	eor	r0,r0,r4,ror#20
2488	add	r11,r11,r2
2489	vadd.i32	q8,q8,q2
2490	ldr	r2,[sp,#36]
2491	and	r3,r3,r12
2492	add	r7,r7,r11
2493	add	r11,r11,r0,ror#2
2494	eor	r3,r3,r5
2495	add	r10,r10,r2
2496	eor	r2,r8,r9
2497	eor	r0,r7,r7,ror#5
2498	add	r11,r11,r3
2499	and	r2,r2,r7
2500	eor	r3,r0,r7,ror#19
2501	eor	r0,r11,r11,ror#11
2502	eor	r2,r2,r9
2503	add	r10,r10,r3,ror#6
2504	eor	r3,r11,r4
2505	eor	r0,r0,r11,ror#20
2506	add	r10,r10,r2
2507	ldr	r2,[sp,#40]
2508	and	r12,r12,r3
2509	add	r6,r6,r10
2510	add	r10,r10,r0,ror#2
2511	eor	r12,r12,r4
2512	add	r9,r9,r2
2513	eor	r2,r7,r8
2514	eor	r0,r6,r6,ror#5
2515	add	r10,r10,r12
2516	and	r2,r2,r6
2517	eor	r12,r0,r6,ror#19
2518	eor	r0,r10,r10,ror#11
2519	eor	r2,r2,r8
2520	add	r9,r9,r12,ror#6
2521	eor	r12,r10,r11
2522	eor	r0,r0,r10,ror#20
2523	add	r9,r9,r2
2524	ldr	r2,[sp,#44]
2525	and	r3,r3,r12
2526	add	r5,r5,r9
2527	add	r9,r9,r0,ror#2
2528	eor	r3,r3,r11
2529	add	r8,r8,r2
2530	eor	r2,r6,r7
2531	eor	r0,r5,r5,ror#5
2532	add	r9,r9,r3
2533	and	r2,r2,r5
2534	eor	r3,r0,r5,ror#19
2535	eor	r0,r9,r9,ror#11
2536	eor	r2,r2,r7
2537	add	r8,r8,r3,ror#6
2538	eor	r3,r9,r10
2539	eor	r0,r0,r9,ror#20
2540	add	r8,r8,r2
2541	ldr	r2,[sp,#48]
2542	and	r12,r12,r3
2543	add	r4,r4,r8
2544	add	r8,r8,r0,ror#2
2545	eor	r12,r12,r10
2546	vst1.32	{q8},[r1,:128]!
2547	add	r7,r7,r2
2548	eor	r2,r5,r6
2549	eor	r0,r4,r4,ror#5
2550	add	r8,r8,r12
2551	vld1.32	{q8},[r14,:128]!
2552	and	r2,r2,r4
2553	eor	r12,r0,r4,ror#19
2554	eor	r0,r8,r8,ror#11
2555	eor	r2,r2,r6
2556	vrev32.8	q3,q3
2557	add	r7,r7,r12,ror#6
2558	eor	r12,r8,r9
2559	eor	r0,r0,r8,ror#20
2560	add	r7,r7,r2
2561	vadd.i32	q8,q8,q3
2562	ldr	r2,[sp,#52]
2563	and	r3,r3,r12
2564	add	r11,r11,r7
2565	add	r7,r7,r0,ror#2
2566	eor	r3,r3,r9
2567	add	r6,r6,r2
2568	eor	r2,r4,r5
2569	eor	r0,r11,r11,ror#5
2570	add	r7,r7,r3
2571	and	r2,r2,r11
2572	eor	r3,r0,r11,ror#19
2573	eor	r0,r7,r7,ror#11
2574	eor	r2,r2,r5
2575	add	r6,r6,r3,ror#6
2576	eor	r3,r7,r8
2577	eor	r0,r0,r7,ror#20
2578	add	r6,r6,r2
2579	ldr	r2,[sp,#56]
2580	and	r12,r12,r3
2581	add	r10,r10,r6
2582	add	r6,r6,r0,ror#2
2583	eor	r12,r12,r8
2584	add	r5,r5,r2
2585	eor	r2,r11,r4
2586	eor	r0,r10,r10,ror#5
2587	add	r6,r6,r12
2588	and	r2,r2,r10
2589	eor	r12,r0,r10,ror#19
2590	eor	r0,r6,r6,ror#11
2591	eor	r2,r2,r4
2592	add	r5,r5,r12,ror#6
2593	eor	r12,r6,r7
2594	eor	r0,r0,r6,ror#20
2595	add	r5,r5,r2
2596	ldr	r2,[sp,#60]
2597	and	r3,r3,r12
2598	add	r9,r9,r5
2599	add	r5,r5,r0,ror#2
2600	eor	r3,r3,r7
2601	add	r4,r4,r2
2602	eor	r2,r10,r11
2603	eor	r0,r9,r9,ror#5
2604	add	r5,r5,r3
2605	and	r2,r2,r9
2606	eor	r3,r0,r9,ror#19
2607	eor	r0,r5,r5,ror#11
2608	eor	r2,r2,r11
2609	add	r4,r4,r3,ror#6
2610	eor	r3,r5,r6
2611	eor	r0,r0,r5,ror#20
2612	add	r4,r4,r2
2613	ldr	r2,[sp,#64]
2614	and	r12,r12,r3
2615	add	r8,r8,r4
2616	add	r4,r4,r0,ror#2
2617	eor	r12,r12,r6
2618	vst1.32	{q8},[r1,:128]!
2619	ldr	r0,[r2,#0]
2620	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2621	ldr	r12,[r2,#4]
2622	ldr	r3,[r2,#8]
2623	ldr	r1,[r2,#12]
2624	add	r4,r4,r0			@ accumulate
2625	ldr	r0,[r2,#16]
2626	add	r5,r5,r12
2627	ldr	r12,[r2,#20]
2628	add	r6,r6,r3
2629	ldr	r3,[r2,#24]
2630	add	r7,r7,r1
2631	ldr	r1,[r2,#28]
2632	add	r8,r8,r0
2633	str	r4,[r2],#4
2634	add	r9,r9,r12
2635	str	r5,[r2],#4
2636	add	r10,r10,r3
2637	str	r6,[r2],#4
2638	add	r11,r11,r1
2639	str	r7,[r2],#4
2640	stmia	r2,{r8-r11}
2641
2642	ittte	ne
2643	movne	r1,sp
2644	ldrne	r2,[sp,#0]
2645	eorne	r12,r12,r12
2646	ldreq	sp,[sp,#76]			@ restore original sp
2647	itt	ne
2648	eorne	r3,r5,r6
2649	bne	.L_00_48
2650
2651	ldmia	sp!,{r4-r12,pc}
2652.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
2653#endif
2654#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2655
2656# ifdef __thumb2__
2657#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2658# else
2659#  define INST(a,b,c,d)	.byte	a,b,c,d
2660# endif
2661
2662.type	sha256_block_data_order_armv8,%function
2663.align	5
2664sha256_block_data_order_armv8:
2665.LARMv8:
2666	vld1.32	{q0,q1},[r0]
2667# ifdef __thumb2__
2668	adr	r3,.LARMv8
2669	sub	r3,r3,#.LARMv8-K256
2670# else
2671	adrl	r3,K256
2672# endif
2673	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2674
2675.Loop_v8:
2676	vld1.8		{q8-q9},[r1]!
2677	vld1.8		{q10-q11},[r1]!
2678	vld1.32		{q12},[r3]!
2679	vrev32.8	q8,q8
2680	vrev32.8	q9,q9
2681	vrev32.8	q10,q10
2682	vrev32.8	q11,q11
2683	vmov		q14,q0	@ offload
2684	vmov		q15,q1
2685	teq		r1,r2
2686	vld1.32		{q13},[r3]!
2687	vadd.i32	q12,q12,q8
2688	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2689	vmov		q2,q0
2690	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2691	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2692	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2693	vld1.32		{q12},[r3]!
2694	vadd.i32	q13,q13,q9
2695	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2696	vmov		q2,q0
2697	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2698	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2699	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2700	vld1.32		{q13},[r3]!
2701	vadd.i32	q12,q12,q10
2702	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2703	vmov		q2,q0
2704	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2705	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2706	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2707	vld1.32		{q12},[r3]!
2708	vadd.i32	q13,q13,q11
2709	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2710	vmov		q2,q0
2711	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2712	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2713	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2714	vld1.32		{q13},[r3]!
2715	vadd.i32	q12,q12,q8
2716	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2717	vmov		q2,q0
2718	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2719	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2720	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2721	vld1.32		{q12},[r3]!
2722	vadd.i32	q13,q13,q9
2723	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2724	vmov		q2,q0
2725	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2726	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2727	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2728	vld1.32		{q13},[r3]!
2729	vadd.i32	q12,q12,q10
2730	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2731	vmov		q2,q0
2732	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2733	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2734	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2735	vld1.32		{q12},[r3]!
2736	vadd.i32	q13,q13,q11
2737	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2738	vmov		q2,q0
2739	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2740	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2741	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2742	vld1.32		{q13},[r3]!
2743	vadd.i32	q12,q12,q8
2744	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2745	vmov		q2,q0
2746	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2747	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2748	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2749	vld1.32		{q12},[r3]!
2750	vadd.i32	q13,q13,q9
2751	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2752	vmov		q2,q0
2753	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2754	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2755	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2756	vld1.32		{q13},[r3]!
2757	vadd.i32	q12,q12,q10
2758	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2759	vmov		q2,q0
2760	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2761	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2762	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2763	vld1.32		{q12},[r3]!
2764	vadd.i32	q13,q13,q11
2765	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2766	vmov		q2,q0
2767	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2768	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2769	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2770	vld1.32		{q13},[r3]!
2771	vadd.i32	q12,q12,q8
2772	vmov		q2,q0
2773	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2774	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2775
2776	vld1.32		{q12},[r3]!
2777	vadd.i32	q13,q13,q9
2778	vmov		q2,q0
2779	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2780	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2781
2782	vld1.32		{q13},[r3]
2783	vadd.i32	q12,q12,q10
2784	sub		r3,r3,#256-16	@ rewind
2785	vmov		q2,q0
2786	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2787	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2788
2789	vadd.i32	q13,q13,q11
2790	vmov		q2,q0
2791	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2792	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2793
2794	vadd.i32	q0,q0,q14
2795	vadd.i32	q1,q1,q15
2796	it		ne
2797	bne		.Loop_v8
2798
2799	vst1.32		{q0,q1},[r0]
2800
2801	bx	lr		@ bx lr
2802.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2803#endif
2804.asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
2805.align	2
2806#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2807.comm   OPENSSL_armcap_P,4,4
2808#endif
2809