1
2
3
4
5
6
7
8
9
10
11
12 #include <linux/linkage.h>
13 #include <asm/asmmacro.h>
14 #include <asm/core.h>
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58 .text
59
60
61
62
63 .align 4
64 .byte 0 # 1 mod 4 alignment for LOOPNEZ
65 # (0 mod 4 alignment for LBEG)
66 .Lbytecopy:
67 #if XCHAL_HAVE_LOOPS
68 loopnez a4, .Lbytecopydone
69 #else
70 beqz a4, .Lbytecopydone
71 add a7, a3, a4 # a7 = end address for source
72 #endif
73 .Lnextbyte:
74 l8ui a6, a3, 0
75 addi a3, a3, 1
76 s8i a6, a5, 0
77 addi a5, a5, 1
78 #if !XCHAL_HAVE_LOOPS
79 bne a3, a7, .Lnextbyte # continue loop if $a3:src != $a7:src_end
80 #endif
81 .Lbytecopydone:
82 abi_ret_default
83
84
85
86
87
88 .align 4
89 .Ldst1mod2: # dst is only byte aligned
90 _bltui a4, 7, .Lbytecopy # do short copies byte by byte
91
92
93 l8ui a6, a3, 0
94 addi a3, a3, 1
95 addi a4, a4, -1
96 s8i a6, a5, 0
97 addi a5, a5, 1
98 _bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then
99
100 .Ldst2mod4: # dst 16-bit aligned
101
102 _bltui a4, 6, .Lbytecopy # do short copies byte by byte
103 l8ui a6, a3, 0
104 l8ui a7, a3, 1
105 addi a3, a3, 2
106 addi a4, a4, -2
107 s8i a6, a5, 0
108 s8i a7, a5, 1
109 addi a5, a5, 2
110 j .Ldstaligned # dst is now aligned, return to main algorithm
111
112 ENTRY(__memcpy)
113 WEAK(memcpy)
114
115 abi_entry_default
116
117 mov a5, a2 # copy dst so that a2 is return value
118 .Lcommon:
119 _bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2
120 _bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4
121 .Ldstaligned: # return here from .Ldst?mod? once dst is aligned
122 srli a7, a4, 4 # number of loop iterations with 16B
123
124 movi a8, 3 # if source is not aligned,
125 _bany a3, a8, .Lsrcunaligned # then use shifting copy
126
127
128
129
130 #if XCHAL_HAVE_LOOPS
131 loopnez a7, .Loop1done
132 #else
133 beqz a7, .Loop1done
134 slli a8, a7, 4
135 add a8, a8, a3 # a8 = end of last 16B source chunk
136 #endif
137 .Loop1:
138 l32i a6, a3, 0
139 l32i a7, a3, 4
140 s32i a6, a5, 0
141 l32i a6, a3, 8
142 s32i a7, a5, 4
143 l32i a7, a3, 12
144 s32i a6, a5, 8
145 addi a3, a3, 16
146 s32i a7, a5, 12
147 addi a5, a5, 16
148 #if !XCHAL_HAVE_LOOPS
149 bne a3, a8, .Loop1 # continue loop if a3:src != a8:src_end
150 #endif
151 .Loop1done:
152 bbci.l a4, 3, .L2
153
154 l32i a6, a3, 0
155 l32i a7, a3, 4
156 addi a3, a3, 8
157 s32i a6, a5, 0
158 s32i a7, a5, 4
159 addi a5, a5, 8
160 .L2:
161 bbsi.l a4, 2, .L3
162 bbsi.l a4, 1, .L4
163 bbsi.l a4, 0, .L5
164 abi_ret_default
165 .L3:
166
167 l32i a6, a3, 0
168 addi a3, a3, 4
169 s32i a6, a5, 0
170 addi a5, a5, 4
171 bbsi.l a4, 1, .L4
172 bbsi.l a4, 0, .L5
173 abi_ret_default
174 .L4:
175
176 l16ui a6, a3, 0
177 addi a3, a3, 2
178 s16i a6, a5, 0
179 addi a5, a5, 2
180 bbsi.l a4, 0, .L5
181 abi_ret_default
182 .L5:
183
184 l8ui a6, a3, 0
185 s8i a6, a5, 0
186 abi_ret_default
187
188
189
190
191
192 .align 4
193 .Lsrcunaligned:
194 _beqz a4, .Ldone # avoid loading anything for zero-length copies
195
196 __ssa8 a3 # set shift amount from byte offset
197
198
199
200 #define SIM_CHECKS_ALIGNMENT 1
201 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
202 and a11, a3, a8 # save unalignment offset for below
203 sub a3, a3, a11 # align a3
204 #endif
205 l32i a6, a3, 0 # load first word
206 #if XCHAL_HAVE_LOOPS
207 loopnez a7, .Loop2done
208 #else
209 beqz a7, .Loop2done
210 slli a10, a7, 4
211 add a10, a10, a3 # a10 = end of last 16B source chunk
212 #endif
213 .Loop2:
214 l32i a7, a3, 4
215 l32i a8, a3, 8
216 __src_b a6, a6, a7
217 s32i a6, a5, 0
218 l32i a9, a3, 12
219 __src_b a7, a7, a8
220 s32i a7, a5, 4
221 l32i a6, a3, 16
222 __src_b a8, a8, a9
223 s32i a8, a5, 8
224 addi a3, a3, 16
225 __src_b a9, a9, a6
226 s32i a9, a5, 12
227 addi a5, a5, 16
228 #if !XCHAL_HAVE_LOOPS
229 bne a3, a10, .Loop2 # continue loop if a3:src != a10:src_end
230 #endif
231 .Loop2done:
232 bbci.l a4, 3, .L12
233
234 l32i a7, a3, 4
235 l32i a8, a3, 8
236 __src_b a6, a6, a7
237 s32i a6, a5, 0
238 addi a3, a3, 8
239 __src_b a7, a7, a8
240 s32i a7, a5, 4
241 addi a5, a5, 8
242 mov a6, a8
243 .L12:
244 bbci.l a4, 2, .L13
245
246 l32i a7, a3, 4
247 addi a3, a3, 4
248 __src_b a6, a6, a7
249 s32i a6, a5, 0
250 addi a5, a5, 4
251 mov a6, a7
252 .L13:
253 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
254 add a3, a3, a11 # readjust a3 with correct misalignment
255 #endif
256 bbsi.l a4, 1, .L14
257 bbsi.l a4, 0, .L15
258 .Ldone: abi_ret_default
259 .L14:
260
261 l8ui a6, a3, 0
262 l8ui a7, a3, 1
263 addi a3, a3, 2
264 s8i a6, a5, 0
265 s8i a7, a5, 1
266 addi a5, a5, 2
267 bbsi.l a4, 0, .L15
268 abi_ret_default
269 .L15:
270
271 l8ui a6, a3, 0
272 s8i a6, a5, 0
273 abi_ret_default
274
275 ENDPROC(__memcpy)
276
277
278
279
280
281 ENTRY(bcopy)
282
283 abi_entry_default
284
285 mov a5, a3
286 mov a3, a2
287 mov a2, a5
288 j .Lmovecommon # go to common code for memmove+bcopy
289
290 ENDPROC(bcopy)
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329 .align 4
330 .byte 0 # 1 mod 4 alignment for LOOPNEZ
331 # (0 mod 4 alignment for LBEG)
332 .Lbackbytecopy:
333 #if XCHAL_HAVE_LOOPS
334 loopnez a4, .Lbackbytecopydone
335 #else
336 beqz a4, .Lbackbytecopydone
337 sub a7, a3, a4 # a7 = start address for source
338 #endif
339 .Lbacknextbyte:
340 addi a3, a3, -1
341 l8ui a6, a3, 0
342 addi a5, a5, -1
343 s8i a6, a5, 0
344 #if !XCHAL_HAVE_LOOPS
345 bne a3, a7, .Lbacknextbyte # continue loop if
346 # $a3:src != $a7:src_start
347 #endif
348 .Lbackbytecopydone:
349 abi_ret_default
350
351
352
353
354
355 .align 4
356 .Lbackdst1mod2: # dst is only byte aligned
357 _bltui a4, 7, .Lbackbytecopy # do short copies byte by byte
358
359
360 addi a3, a3, -1
361 l8ui a6, a3, 0
362 addi a5, a5, -1
363 s8i a6, a5, 0
364 addi a4, a4, -1
365 _bbci.l a5, 1, .Lbackdstaligned # if dst is now aligned, then
366
367 .Lbackdst2mod4: # dst 16-bit aligned
368
369 _bltui a4, 6, .Lbackbytecopy # do short copies byte by byte
370 addi a3, a3, -2
371 l8ui a6, a3, 0
372 l8ui a7, a3, 1
373 addi a5, a5, -2
374 s8i a6, a5, 0
375 s8i a7, a5, 1
376 addi a4, a4, -2
377 j .Lbackdstaligned # dst is now aligned,
378
379
380 ENTRY(__memmove)
381 WEAK(memmove)
382
383 abi_entry_default
384
385 mov a5, a2 # copy dst so that a2 is return value
386 .Lmovecommon:
387 sub a6, a5, a3
388 bgeu a6, a4, .Lcommon
389
390 add a5, a5, a4
391 add a3, a3, a4
392
393 _bbsi.l a5, 0, .Lbackdst1mod2 # if dst is 1 mod 2
394 _bbsi.l a5, 1, .Lbackdst2mod4 # if dst is 2 mod 4
395 .Lbackdstaligned: # return here from .Lbackdst?mod? once dst is aligned
396 srli a7, a4, 4 # number of loop iterations with 16B
397
398 movi a8, 3 # if source is not aligned,
399 _bany a3, a8, .Lbacksrcunaligned # then use shifting copy
400
401
402
403
404 #if XCHAL_HAVE_LOOPS
405 loopnez a7, .backLoop1done
406 #else
407 beqz a7, .backLoop1done
408 slli a8, a7, 4
409 sub a8, a3, a8 # a8 = start of first 16B source chunk
410 #endif
411 .backLoop1:
412 addi a3, a3, -16
413 l32i a7, a3, 12
414 l32i a6, a3, 8
415 addi a5, a5, -16
416 s32i a7, a5, 12
417 l32i a7, a3, 4
418 s32i a6, a5, 8
419 l32i a6, a3, 0
420 s32i a7, a5, 4
421 s32i a6, a5, 0
422 #if !XCHAL_HAVE_LOOPS
423 bne a3, a8, .backLoop1 # continue loop if a3:src != a8:src_start
424 #endif
425 .backLoop1done:
426 bbci.l a4, 3, .Lback2
427
428 addi a3, a3, -8
429 l32i a6, a3, 0
430 l32i a7, a3, 4
431 addi a5, a5, -8
432 s32i a6, a5, 0
433 s32i a7, a5, 4
434 .Lback2:
435 bbsi.l a4, 2, .Lback3
436 bbsi.l a4, 1, .Lback4
437 bbsi.l a4, 0, .Lback5
438 abi_ret_default
439 .Lback3:
440
441 addi a3, a3, -4
442 l32i a6, a3, 0
443 addi a5, a5, -4
444 s32i a6, a5, 0
445 bbsi.l a4, 1, .Lback4
446 bbsi.l a4, 0, .Lback5
447 abi_ret_default
448 .Lback4:
449
450 addi a3, a3, -2
451 l16ui a6, a3, 0
452 addi a5, a5, -2
453 s16i a6, a5, 0
454 bbsi.l a4, 0, .Lback5
455 abi_ret_default
456 .Lback5:
457
458 addi a3, a3, -1
459 l8ui a6, a3, 0
460 addi a5, a5, -1
461 s8i a6, a5, 0
462 abi_ret_default
463
464
465
466
467
468 .align 4
469 .Lbacksrcunaligned:
470 _beqz a4, .Lbackdone # avoid loading anything for zero-length copies
471
472 __ssa8 a3 # set shift amount from byte offset
473 #define SIM_CHECKS_ALIGNMENT 1
474
475
476 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
477 and a11, a3, a8 # save unalignment offset for below
478 sub a3, a3, a11 # align a3
479 #endif
480 l32i a6, a3, 0 # load first word
481 #if XCHAL_HAVE_LOOPS
482 loopnez a7, .backLoop2done
483 #else
484 beqz a7, .backLoop2done
485 slli a10, a7, 4
486 sub a10, a3, a10 # a10 = start of first 16B source chunk
487 #endif
488 .backLoop2:
489 addi a3, a3, -16
490 l32i a7, a3, 12
491 l32i a8, a3, 8
492 addi a5, a5, -16
493 __src_b a6, a7, a6
494 s32i a6, a5, 12
495 l32i a9, a3, 4
496 __src_b a7, a8, a7
497 s32i a7, a5, 8
498 l32i a6, a3, 0
499 __src_b a8, a9, a8
500 s32i a8, a5, 4
501 __src_b a9, a6, a9
502 s32i a9, a5, 0
503 #if !XCHAL_HAVE_LOOPS
504 bne a3, a10, .backLoop2 # continue loop if a3:src != a10:src_start
505 #endif
506 .backLoop2done:
507 bbci.l a4, 3, .Lback12
508
509 addi a3, a3, -8
510 l32i a7, a3, 4
511 l32i a8, a3, 0
512 addi a5, a5, -8
513 __src_b a6, a7, a6
514 s32i a6, a5, 4
515 __src_b a7, a8, a7
516 s32i a7, a5, 0
517 mov a6, a8
518 .Lback12:
519 bbci.l a4, 2, .Lback13
520
521 addi a3, a3, -4
522 l32i a7, a3, 0
523 addi a5, a5, -4
524 __src_b a6, a7, a6
525 s32i a6, a5, 0
526 mov a6, a7
527 .Lback13:
528 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
529 add a3, a3, a11 # readjust a3 with correct misalignment
530 #endif
531 bbsi.l a4, 1, .Lback14
532 bbsi.l a4, 0, .Lback15
533 .Lbackdone:
534 abi_ret_default
535 .Lback14:
536
537 addi a3, a3, -2
538 l8ui a6, a3, 0
539 l8ui a7, a3, 1
540 addi a5, a5, -2
541 s8i a6, a5, 0
542 s8i a7, a5, 1
543 bbsi.l a4, 0, .Lback15
544 abi_ret_default
545 .Lback15:
546
547 addi a3, a3, -1
548 addi a5, a5, -1
549 l8ui a6, a3, 0
550 s8i a6, a5, 0
551 abi_ret_default
552
553 ENDPROC(__memmove)