1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56 #include <linux/linkage.h>
57 #include <asm/asmmacro.h>
58 #include <asm/core.h>
59
60 .text
61 ENTRY(__xtensa_copy_user)
62
63 abi_entry_default
64
65 mov a5, a2 # copy dst so that a2 is return value
66 mov a11, a4 # preserve original len for error case
67 .Lcommon:
68 bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2
69 bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4
70 .Ldstaligned: # return here from .Ldstunaligned when dst is aligned
71 srli a7, a4, 4 # number of loop iterations with 16B
72
73 movi a8, 3 # if source is also aligned,
74 bnone a3, a8, .Laligned # then use word copy
75 __ssa8 a3 # set shift amount from byte offset
76 bnez a4, .Lsrcunaligned
77 movi a2, 0 # return success for len==0
78 abi_ret_default
79
80
81
82
83
84 .Ldst1mod2: # dst is only byte aligned
85 bltui a4, 7, .Lbytecopy # do short copies byte by byte
86
87
88 EX(10f) l8ui a6, a3, 0
89 addi a3, a3, 1
90 EX(10f) s8i a6, a5, 0
91 addi a5, a5, 1
92 addi a4, a4, -1
93 bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then
94
95 .Ldst2mod4: # dst 16-bit aligned
96
97 bltui a4, 6, .Lbytecopy # do short copies byte by byte
98 EX(10f) l8ui a6, a3, 0
99 EX(10f) l8ui a7, a3, 1
100 addi a3, a3, 2
101 EX(10f) s8i a6, a5, 0
102 EX(10f) s8i a7, a5, 1
103 addi a5, a5, 2
104 addi a4, a4, -2
105 j .Ldstaligned # dst is now aligned, return to main algorithm
106
107
108
109
110 .align 4
111 .byte 0 # 1 mod 4 alignment for LOOPNEZ
112 # (0 mod 4 alignment for LBEG)
113 .Lbytecopy:
114 #if XCHAL_HAVE_LOOPS
115 loopnez a4, .Lbytecopydone
116 #else
117 beqz a4, .Lbytecopydone
118 add a7, a3, a4 # a7 = end address for source
119 #endif
120 .Lnextbyte:
121 EX(10f) l8ui a6, a3, 0
122 addi a3, a3, 1
123 EX(10f) s8i a6, a5, 0
124 addi a5, a5, 1
125 #if !XCHAL_HAVE_LOOPS
126 blt a3, a7, .Lnextbyte
127 #endif
128 .Lbytecopydone:
129 movi a2, 0 # return success for len bytes copied
130 abi_ret_default
131
132
133
134
135
136 .align 4 # 1 mod 4 alignment for LOOPNEZ
137 .byte 0 # (0 mod 4 alignment for LBEG)
138 .Laligned:
139 #if XCHAL_HAVE_LOOPS
140 loopnez a7, .Loop1done
141 #else
142 beqz a7, .Loop1done
143 slli a8, a7, 4
144 add a8, a8, a3 # a8 = end of last 16B source chunk
145 #endif
146 .Loop1:
147 EX(10f) l32i a6, a3, 0
148 EX(10f) l32i a7, a3, 4
149 EX(10f) s32i a6, a5, 0
150 EX(10f) l32i a6, a3, 8
151 EX(10f) s32i a7, a5, 4
152 EX(10f) l32i a7, a3, 12
153 EX(10f) s32i a6, a5, 8
154 addi a3, a3, 16
155 EX(10f) s32i a7, a5, 12
156 addi a5, a5, 16
157 #if !XCHAL_HAVE_LOOPS
158 blt a3, a8, .Loop1
159 #endif
160 .Loop1done:
161 bbci.l a4, 3, .L2
162
163 EX(10f) l32i a6, a3, 0
164 EX(10f) l32i a7, a3, 4
165 addi a3, a3, 8
166 EX(10f) s32i a6, a5, 0
167 EX(10f) s32i a7, a5, 4
168 addi a5, a5, 8
169 .L2:
170 bbci.l a4, 2, .L3
171
172 EX(10f) l32i a6, a3, 0
173 addi a3, a3, 4
174 EX(10f) s32i a6, a5, 0
175 addi a5, a5, 4
176 .L3:
177 bbci.l a4, 1, .L4
178
179 EX(10f) l16ui a6, a3, 0
180 addi a3, a3, 2
181 EX(10f) s16i a6, a5, 0
182 addi a5, a5, 2
183 .L4:
184 bbci.l a4, 0, .L5
185
186 EX(10f) l8ui a6, a3, 0
187 EX(10f) s8i a6, a5, 0
188 .L5:
189 movi a2, 0 # return success for len bytes copied
190 abi_ret_default
191
192
193
194
195
196 .align 4
197 .byte 0 # 1 mod 4 alignement for LOOPNEZ
198 # (0 mod 4 alignment for LBEG)
199 .Lsrcunaligned:
200
201 and a10, a3, a8 # save unalignment offset for below
202 sub a3, a3, a10 # align a3 (to avoid sim warnings only; not needed for hardware)
203 EX(10f) l32i a6, a3, 0 # load first word
204 #if XCHAL_HAVE_LOOPS
205 loopnez a7, .Loop2done
206 #else
207 beqz a7, .Loop2done
208 slli a12, a7, 4
209 add a12, a12, a3 # a12 = end of last 16B source chunk
210 #endif
211 .Loop2:
212 EX(10f) l32i a7, a3, 4
213 EX(10f) l32i a8, a3, 8
214 __src_b a6, a6, a7
215 EX(10f) s32i a6, a5, 0
216 EX(10f) l32i a9, a3, 12
217 __src_b a7, a7, a8
218 EX(10f) s32i a7, a5, 4
219 EX(10f) l32i a6, a3, 16
220 __src_b a8, a8, a9
221 EX(10f) s32i a8, a5, 8
222 addi a3, a3, 16
223 __src_b a9, a9, a6
224 EX(10f) s32i a9, a5, 12
225 addi a5, a5, 16
226 #if !XCHAL_HAVE_LOOPS
227 blt a3, a12, .Loop2
228 #endif
229 .Loop2done:
230 bbci.l a4, 3, .L12
231
232 EX(10f) l32i a7, a3, 4
233 EX(10f) l32i a8, a3, 8
234 __src_b a6, a6, a7
235 EX(10f) s32i a6, a5, 0
236 addi a3, a3, 8
237 __src_b a7, a7, a8
238 EX(10f) s32i a7, a5, 4
239 addi a5, a5, 8
240 mov a6, a8
241 .L12:
242 bbci.l a4, 2, .L13
243
244 EX(10f) l32i a7, a3, 4
245 addi a3, a3, 4
246 __src_b a6, a6, a7
247 EX(10f) s32i a6, a5, 0
248 addi a5, a5, 4
249 mov a6, a7
250 .L13:
251 add a3, a3, a10 # readjust a3 with correct misalignment
252 bbci.l a4, 1, .L14
253
254 EX(10f) l8ui a6, a3, 0
255 EX(10f) l8ui a7, a3, 1
256 addi a3, a3, 2
257 EX(10f) s8i a6, a5, 0
258 EX(10f) s8i a7, a5, 1
259 addi a5, a5, 2
260 .L14:
261 bbci.l a4, 0, .L15
262
263 EX(10f) l8ui a6, a3, 0
264 EX(10f) s8i a6, a5, 0
265 .L15:
266 movi a2, 0 # return success for len bytes copied
267 abi_ret_default
268
269 ENDPROC(__xtensa_copy_user)
270
271 .section .fixup, "ax"
272 .align 4
273
274
275
276
277
278
279
280
281 10:
282 sub a2, a5, a2
283 sub a2, a11, a2
284 abi_ret_default