1
2
3
4
5
6 #include <linux/linkage.h>
7
8 #ifdef __LITTLE_ENDIAN__
9 # define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; <<
10 # define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >>
11 # define MERGE_1(RX,RY,IMM) asl RX, RY, IMM
12 # define MERGE_2(RX,RY,IMM)
13 # define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF
14 # define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM
15 #else
16 # define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >>
17 # define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; <<
18 # define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; <<
19 # define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; <<
20 # define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM
21 # define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08
22 #endif
23
24 #ifdef CONFIG_ARC_HAS_LL64
25 # define LOADX(DST,RX) ldd.ab DST, [RX, 8]
26 # define STOREX(SRC,RX) std.ab SRC, [RX, 8]
27 # define ZOLSHFT 5
28 # define ZOLAND 0x1F
29 #else
30 # define LOADX(DST,RX) ld.ab DST, [RX, 4]
31 # define STOREX(SRC,RX) st.ab SRC, [RX, 4]
32 # define ZOLSHFT 4
33 # define ZOLAND 0xF
34 #endif
35
36 ENTRY_CFI(memcpy)
37 mov.f 0, r2
38 ;;; if size is zero
39 jz.d [blink]
40 mov r3, r0 ; don;t clobber ret val
41
42 ;;; if size <= 8
43 cmp r2, 8
44 bls.d @.Lsmallchunk
45 mov.f lp_count, r2
46
47 and.f r4, r0, 0x03
48 rsub lp_count, r4, 4
49 lpnz @.Laligndestination
50 ;; LOOP BEGIN
51 ldb.ab r5, [r1,1]
52 sub r2, r2, 1
53 stb.ab r5, [r3,1]
54 .Laligndestination:
55
56 ;;; Check the alignment of the source
57 and.f r4, r1, 0x03
58 bnz.d @.Lsourceunaligned
59
60 ;;; CASE 0: Both source and destination are 32bit aligned
61 ;;; Convert len to Dwords, unfold x4
62 lsr.f lp_count, r2, ZOLSHFT
63 lpnz @.Lcopy32_64bytes
64 ;; LOOP START
65 LOADX (r6, r1)
66 LOADX (r8, r1)
67 LOADX (r10, r1)
68 LOADX (r4, r1)
69 STOREX (r6, r3)
70 STOREX (r8, r3)
71 STOREX (r10, r3)
72 STOREX (r4, r3)
73 .Lcopy32_64bytes:
74
75 and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes
76 .Lsmallchunk:
77 lpnz @.Lcopyremainingbytes
78 ;; LOOP START
79 ldb.ab r5, [r1,1]
80 stb.ab r5, [r3,1]
81 .Lcopyremainingbytes:
82
83 j [blink]
84 ;;; END CASE 0
85
86 .Lsourceunaligned:
87 cmp r4, 2
88 beq.d @.LunalignedOffby2
89 sub r2, r2, 1
90
91 bhi.d @.LunalignedOffby3
92 ldb.ab r5, [r1, 1]
93
94 ;;; CASE 1: The source is unaligned, off by 1
95 ;; Hence I need to read 1 byte for a 16bit alignment
96 ;; and 2bytes to reach 32bit alignment
97 ldh.ab r6, [r1, 2]
98 sub r2, r2, 2
99 ;; Convert to words, unfold x2
100 lsr.f lp_count, r2, 3
101 MERGE_1 (r6, r6, 8)
102 MERGE_2 (r5, r5, 24)
103 or r5, r5, r6
104
105 ;; Both src and dst are aligned
106 lpnz @.Lcopy8bytes_1
107 ;; LOOP START
108 ld.ab r6, [r1, 4]
109 ld.ab r8, [r1,4]
110
111 SHIFT_1 (r7, r6, 24)
112 or r7, r7, r5
113 SHIFT_2 (r5, r6, 8)
114
115 SHIFT_1 (r9, r8, 24)
116 or r9, r9, r5
117 SHIFT_2 (r5, r8, 8)
118
119 st.ab r7, [r3, 4]
120 st.ab r9, [r3, 4]
121 .Lcopy8bytes_1:
122
123 ;; Write back the remaining 16bits
124 EXTRACT_1 (r6, r5, 16)
125 sth.ab r6, [r3, 2]
126 ;; Write back the remaining 8bits
127 EXTRACT_2 (r5, r5, 16)
128 stb.ab r5, [r3, 1]
129
130 and.f lp_count, r2, 0x07 ;Last 8bytes
131 lpnz @.Lcopybytewise_1
132 ;; LOOP START
133 ldb.ab r6, [r1,1]
134 stb.ab r6, [r3,1]
135 .Lcopybytewise_1:
136 j [blink]
137
138 .LunalignedOffby2:
139 ;;; CASE 2: The source is unaligned, off by 2
140 ldh.ab r5, [r1, 2]
141 sub r2, r2, 1
142
143 ;; Both src and dst are aligned
144 ;; Convert to words, unfold x2
145 lsr.f lp_count, r2, 3
146 #ifdef __BIG_ENDIAN__
147 asl.nz r5, r5, 16
148 #endif
149 lpnz @.Lcopy8bytes_2
150 ;; LOOP START
151 ld.ab r6, [r1, 4]
152 ld.ab r8, [r1,4]
153
154 SHIFT_1 (r7, r6, 16)
155 or r7, r7, r5
156 SHIFT_2 (r5, r6, 16)
157
158 SHIFT_1 (r9, r8, 16)
159 or r9, r9, r5
160 SHIFT_2 (r5, r8, 16)
161
162 st.ab r7, [r3, 4]
163 st.ab r9, [r3, 4]
164 .Lcopy8bytes_2:
165
166 #ifdef __BIG_ENDIAN__
167 lsr.nz r5, r5, 16
168 #endif
169 sth.ab r5, [r3, 2]
170
171 and.f lp_count, r2, 0x07 ;Last 8bytes
172 lpnz @.Lcopybytewise_2
173 ;; LOOP START
174 ldb.ab r6, [r1,1]
175 stb.ab r6, [r3,1]
176 .Lcopybytewise_2:
177 j [blink]
178
179 .LunalignedOffby3:
180 ;;; CASE 3: The source is unaligned, off by 3
181 ;;; Hence, I need to read 1byte for achieve the 32bit alignment
182
183 ;; Both src and dst are aligned
184 ;; Convert to words, unfold x2
185 lsr.f lp_count, r2, 3
186 #ifdef __BIG_ENDIAN__
187 asl.ne r5, r5, 24
188 #endif
189 lpnz @.Lcopy8bytes_3
190 ;; LOOP START
191 ld.ab r6, [r1, 4]
192 ld.ab r8, [r1,4]
193
194 SHIFT_1 (r7, r6, 8)
195 or r7, r7, r5
196 SHIFT_2 (r5, r6, 24)
197
198 SHIFT_1 (r9, r8, 8)
199 or r9, r9, r5
200 SHIFT_2 (r5, r8, 24)
201
202 st.ab r7, [r3, 4]
203 st.ab r9, [r3, 4]
204 .Lcopy8bytes_3:
205
206 #ifdef __BIG_ENDIAN__
207 lsr.nz r5, r5, 24
208 #endif
209 stb.ab r5, [r3, 1]
210
211 and.f lp_count, r2, 0x07 ;Last 8bytes
212 lpnz @.Lcopybytewise_3
213 ;; LOOP START
214 ldb.ab r6, [r1,1]
215 stb.ab r6, [r3,1]
216 .Lcopybytewise_3:
217 j [blink]
218
219 END_CFI(memcpy)