1/*
2 * arch/alpha/lib/memmove.S
3 *
4 * Barely optimized memmove routine for Alpha EV5.
5 *
6 * This is hand-massaged output from the original memcpy.c.  We defer to
7 * memcpy whenever possible; the backwards copy loops are not unrolled.
8 */
9
10	.set noat
11	.set noreorder
12	.text
13
14	.align 4
15	.globl memmove
16	.ent memmove
17memmove:
18	ldgp $29, 0($27)
19	unop
20	nop
21	.prologue 1
22
23	addq $16,$18,$4
24	addq $17,$18,$5
25	cmpule $4,$17,$1		/*  dest + n <= src  */
26	cmpule $5,$16,$2		/*  dest >= src + n  */
27
28	bis $1,$2,$1
29	mov $16,$0
30	xor $16,$17,$2
31	bne $1,memcpy			!samegp
32
33	and $2,7,$2			/* Test for src/dest co-alignment.  */
34	and $16,7,$1
35	cmpule $16,$17,$3
36	bne $3,$memmove_up		/* dest < src */
37
38	and $4,7,$1
39	bne $2,$misaligned_dn
40	unop
41	beq $1,$skip_aligned_byte_loop_head_dn
42
43$aligned_byte_loop_head_dn:
44	lda $4,-1($4)
45	lda $5,-1($5)
46	unop
47	ble $18,$egress
48
49	ldq_u $3,0($5)
50	ldq_u $2,0($4)
51	lda $18,-1($18)
52	extbl $3,$5,$1
53
54	insbl $1,$4,$1
55	mskbl $2,$4,$2
56	bis $1,$2,$1
57	and $4,7,$6
58
59	stq_u $1,0($4)
60	bne $6,$aligned_byte_loop_head_dn
61
62$skip_aligned_byte_loop_head_dn:
63	lda $18,-8($18)
64	blt $18,$skip_aligned_word_loop_dn
65
66$aligned_word_loop_dn:
67	ldq $1,-8($5)
68	nop
69	lda $5,-8($5)
70	lda $18,-8($18)
71
72	stq $1,-8($4)
73	nop
74	lda $4,-8($4)
75	bge $18,$aligned_word_loop_dn
76
77$skip_aligned_word_loop_dn:
78	lda $18,8($18)
79	bgt $18,$byte_loop_tail_dn
80	unop
81	ret $31,($26),1
82
83	.align 4
84$misaligned_dn:
85	nop
86	fnop
87	unop
88	beq $18,$egress
89
90$byte_loop_tail_dn:
91	ldq_u $3,-1($5)
92	ldq_u $2,-1($4)
93	lda $5,-1($5)
94	lda $4,-1($4)
95
96	lda $18,-1($18)
97	extbl $3,$5,$1
98	insbl $1,$4,$1
99	mskbl $2,$4,$2
100
101	bis $1,$2,$1
102	stq_u $1,0($4)
103	bgt $18,$byte_loop_tail_dn
104	br $egress
105
106$memmove_up:
107	mov $16,$4
108	mov $17,$5
109	bne $2,$misaligned_up
110	beq $1,$skip_aligned_byte_loop_head_up
111
112$aligned_byte_loop_head_up:
113	unop
114	ble $18,$egress
115	ldq_u $3,0($5)
116	ldq_u $2,0($4)
117
118	lda $18,-1($18)
119	extbl $3,$5,$1
120	insbl $1,$4,$1
121	mskbl $2,$4,$2
122
123	bis $1,$2,$1
124	lda $5,1($5)
125	stq_u $1,0($4)
126	lda $4,1($4)
127
128	and $4,7,$6
129	bne $6,$aligned_byte_loop_head_up
130
131$skip_aligned_byte_loop_head_up:
132	lda $18,-8($18)
133	blt $18,$skip_aligned_word_loop_up
134
135$aligned_word_loop_up:
136	ldq $1,0($5)
137	nop
138	lda $5,8($5)
139	lda $18,-8($18)
140
141	stq $1,0($4)
142	nop
143	lda $4,8($4)
144	bge $18,$aligned_word_loop_up
145
146$skip_aligned_word_loop_up:
147	lda $18,8($18)
148	bgt $18,$byte_loop_tail_up
149	unop
150	ret $31,($26),1
151
152	.align 4
153$misaligned_up:
154	nop
155	fnop
156	unop
157	beq $18,$egress
158
159$byte_loop_tail_up:
160	ldq_u $3,0($5)
161	ldq_u $2,0($4)
162	lda $18,-1($18)
163	extbl $3,$5,$1
164
165	insbl $1,$4,$1
166	mskbl $2,$4,$2
167	bis $1,$2,$1
168	stq_u $1,0($4)
169
170	lda $5,1($5)
171	lda $4,1($4)
172	nop
173	bgt $18,$byte_loop_tail_up
174
175$egress:
176	ret $31,($26),1
177	nop
178	nop
179	nop
180
181	.end memmove
182