1/*
2 *  linux/arch/arm/lib/memzero.S
3 *
4 *  Copyright (C) 1995-2000 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#include <linux/linkage.h>
11#include <asm/assembler.h>
12#include <asm/unwind.h>
13
14	.text
15	.align	5
16	.word	0
17/*
18 * Align the pointer in r0.  r3 contains the number of bytes that we are
19 * mis-aligned by, and r1 is the number of bytes.  If r1 < 4, then we
20 * don't bother; we use byte stores instead.
21 */
22UNWIND(	.fnstart			)
231:	subs	r1, r1, #4		@ 1 do we have enough
24	blt	5f			@ 1 bytes to align with?
25	cmp	r3, #2			@ 1
26	strltb	r2, [r0], #1		@ 1
27	strleb	r2, [r0], #1		@ 1
28	strb	r2, [r0], #1		@ 1
29	add	r1, r1, r3		@ 1 (r1 = r1 - (4 - r3))
30/*
31 * The pointer is now aligned and the length is adjusted.  Try doing the
32 * memzero again.
33 */
34
35ENTRY(__memzero)
36	mov	r2, #0			@ 1
37	ands	r3, r0, #3		@ 1 unaligned?
38	bne	1b			@ 1
39/*
40 * r3 = 0, and we know that the pointer in r0 is aligned to a word boundary.
41 */
42	cmp	r1, #16			@ 1 we can skip this chunk if we
43	blt	4f			@ 1 have < 16 bytes
44
45#if ! CALGN(1)+0
46
47/*
48 * We need an extra register for this loop - save the return address and
49 * use the LR
50 */
51	str	lr, [sp, #-4]!		@ 1
52UNWIND(	.fnend				)
53UNWIND(	.fnstart			)
54UNWIND(	.save 	{lr}			)
55	mov	ip, r2			@ 1
56	mov	lr, r2			@ 1
57
583:	subs	r1, r1, #64		@ 1 write 32 bytes out per loop
59	stmgeia	r0!, {r2, r3, ip, lr}	@ 4
60	stmgeia	r0!, {r2, r3, ip, lr}	@ 4
61	stmgeia	r0!, {r2, r3, ip, lr}	@ 4
62	stmgeia	r0!, {r2, r3, ip, lr}	@ 4
63	bgt	3b			@ 1
64	ldmeqfd	sp!, {pc}		@ 1/2 quick exit
65/*
66 * No need to correct the count; we're only testing bits from now on
67 */
68	tst	r1, #32			@ 1
69	stmneia	r0!, {r2, r3, ip, lr}	@ 4
70	stmneia	r0!, {r2, r3, ip, lr}	@ 4
71	tst	r1, #16			@ 1 16 bytes or more?
72	stmneia	r0!, {r2, r3, ip, lr}	@ 4
73	ldr	lr, [sp], #4		@ 1
74UNWIND(	.fnend				)
75
76#else
77
78/*
79 * This version aligns the destination pointer in order to write
80 * whole cache lines at once.
81 */
82
83	stmfd	sp!, {r4-r7, lr}
84UNWIND(	.fnend		       )
85UNWIND(	.fnstart	       )
86UNWIND(	.save 	{r4-r7, lr}    )
87	mov	r4, r2
88	mov	r5, r2
89	mov	r6, r2
90	mov	r7, r2
91	mov	ip, r2
92	mov	lr, r2
93
94	cmp	r1, #96
95	andgts	ip, r0, #31
96	ble	3f
97
98	rsb	ip, ip, #32
99	sub	r1, r1, ip
100	movs	ip, ip, lsl #(32 - 4)
101	stmcsia	r0!, {r4, r5, r6, r7}
102	stmmiia	r0!, {r4, r5}
103	movs	ip, ip, lsl #2
104	strcs	r2, [r0], #4
105
1063:	subs	r1, r1, #64
107	stmgeia	r0!, {r2-r7, ip, lr}
108	stmgeia	r0!, {r2-r7, ip, lr}
109	bgt	3b
110	ldmeqfd	sp!, {r4-r7, pc}
111
112	tst	r1, #32
113	stmneia	r0!, {r2-r7, ip, lr}
114	tst	r1, #16
115	stmneia	r0!, {r4-r7}
116	ldmfd	sp!, {r4-r7, lr}
117UNWIND(	.fnend		       )
118
119#endif
120
121UNWIND(	.fnstart			)
1224:	tst	r1, #8			@ 1 8 bytes or more?
123	stmneia	r0!, {r2, r3}		@ 2
124	tst	r1, #4			@ 1 4 bytes or more?
125	strne	r2, [r0], #4		@ 1
126/*
127 * When we get here, we've got less than 4 bytes to zero.  We
128 * may have an unaligned pointer as well.
129 */
1305:	tst	r1, #2			@ 1 2 bytes or more?
131	strneb	r2, [r0], #1		@ 1
132	strneb	r2, [r0], #1		@ 1
133	tst	r1, #1			@ 1 a byte left over
134	strneb	r2, [r0], #1		@ 1
135	ret	lr			@ 1
136UNWIND(	.fnend				)
137ENDPROC(__memzero)
138