1
2
3
4 !
5 ! Fast SH memset
6 !
7 ! by Toshiyasu Morita (tm@netcom.com)
8 !
9 ! SH5 code by J"orn Rennecke (joern.rennecke@superh.com)
10 ! Copyright 2002 SuperH Ltd.
11 !
12
13 #if __BYTE_ORDER == __LITTLE_ENDIAN
14 #define SHHI shlld
15 #define SHLO shlrd
16 #else
17 #define SHHI shlrd
18 #define SHLO shlld
19 #endif
20
21 .section .text..SHmedia32,"ax"
22 .globl memset
23 .type memset, @function
24
25 .align 5
26
27 memset:
28 pta/l multiquad, tr0
29 andi r2, 7, r22
30 ptabs r18, tr2
31 mshflo.b r3,r3,r3
32 add r4, r22, r23
33 mperm.w r3, r63, r3
34
35 movi 8, r9
36 bgtu/u r23, r9, tr0
37
38 beqi/u r4, 0, tr2
39 ldlo.q r2, 0, r7
40 shlli r4, 2, r4
41 movi -1, r8
42 SHHI r8, r4, r8
43 SHHI r8, r4, r8
44 mcmv r7, r8, r3
45 stlo.q r2, 0, r3
46 blink tr2, r63
47
48 multiquad:
49 pta/l lastquad, tr0
50 stlo.q r2, 0, r3
51 shlri r23, 3, r24
52 add r2, r4, r5
53 beqi/u r24, 1, tr0
54 pta/l loop, tr1
55 sub r2, r22, r25
56 andi r5, -8, r20
57 addi r20, -7*8, r8
58
59 bge/u r24, r9, tr1
60 st.q r25, 8, r3
61 st.q r20, -8, r3
62 shlri r24, 1, r24
63 beqi/u r24, 1, tr0
64 st.q r25, 16, r3
65 st.q r20, -16, r3
66 beqi/u r24, 2, tr0
67 st.q r25, 24, r3
68 st.q r20, -24, r3
69 lastquad:
70 sthi.q r5, -1, r3
71 blink tr2,r63
72
73 loop:
74 !!! alloco r25, 32
75
76
77 st.q r25, 8, r3
78 st.q r25, 16, r3
79 st.q r25, 24, r3
80 st.q r25, 32, r3
81 addi r25, 32, r25
82 bgeu/l r8, r25, tr1
83
84 st.q r20, -40, r3
85 st.q r20, -32, r3
86 st.q r20, -24, r3
87 st.q r20, -16, r3
88 st.q r20, -8, r3
89 sthi.q r5, -1, r3
90 blink tr2,r63
91
92 .size memset,.-memset