1 ; SPDX-License-Identifier: GPL-2.0-only
2 ;
3 ; linux/arch/c6x/lib/csum_64plus.s
4 ;
5 ; Port on Texas Instruments TMS320C6x architecture
6 ;
7 ; Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated
8 ; Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
9 ;
10 #include <linux/linkage.h>
11
12 ;
13 ;unsigned int csum_partial_copy(const char *src, char * dst,
14 ; int len, int sum)
15 ;
16 ; A4: src
17 ; B4: dst
18 ; A6: len
19 ; B6: sum
20 ; return csum in A4
21 ;
22
23 .text
24 ENTRY(csum_partial_copy)
25 MVC .S2 ILC,B30
26
27 MV .D1X B6,A31 ; given csum
28 ZERO .D1 A9 ; csum (a side)
29 || ZERO .D2 B9 ; csum (b side)
30 || SHRU .S2X A6,2,B5 ; len / 4
31
32 ;; Check alignment and size
33 AND .S1 3,A4,A1
34 || AND .S2 3,B4,B0
35 OR .L2X B0,A1,B0 ; non aligned condition
36 || MVC .S2 B5,ILC
37 || MVK .D2 1,B2
38 || MV .D1X B5,A1 ; words condition
39 [!A1] B .S1 L8
40 [B0] BNOP .S1 L6,5
41
42 SPLOOP 1
43
44 ;; Main loop for aligned words
45 LDW .D1T1 *A4++,A7
46 NOP 4
47 MV .S2X A7,B7
48 || EXTU .S1 A7,0,16,A16
49 STW .D2T2 B7,*B4++
50 || MPYU .M2 B7,B2,B8
51 || ADD .L1 A16,A9,A9
52 NOP
53 SPKERNEL 8,0
54 || ADD .L2 B8,B9,B9
55
56 ZERO .D1 A1
57 || ADD .L1X A9,B9,A9 ; add csum from a and b sides
58
59 L6:
60 [!A1] BNOP .S1 L8,5
61
62 ;; Main loop for non-aligned words
63 SPLOOP 2
64 || MVK .L1 1,A2
65
66 LDNW .D1T1 *A4++,A7
67 NOP 3
68
69 NOP
70 MV .S2X A7,B7
71 || EXTU .S1 A7,0,16,A16
72 || MPYU .M1 A7,A2,A8
73
74 ADD .L1 A16,A9,A9
75 SPKERNEL 6,0
76 || STNW .D2T2 B7,*B4++
77 || ADD .L1 A8,A9,A9
78
79 L8: AND .S2X 2,A6,B5
80 CMPGT .L2 B5,0,B0
81 [!B0] BNOP .S1 L82,4
82
83 ;; Manage half-word
84 ZERO .L1 A7
85 || ZERO .D1 A8
86
87 #ifdef CONFIG_CPU_BIG_ENDIAN
88
89 LDBU .D1T1 *A4++,A7
90 LDBU .D1T1 *A4++,A8
91 NOP 3
92 SHL .S1 A7,8,A0
93 ADD .S1 A8,A9,A9
94 STB .D2T1 A7,*B4++
95 || ADD .S1 A0,A9,A9
96 STB .D2T1 A8,*B4++
97
98 #else
99
100 LDBU .D1T1 *A4++,A7
101 LDBU .D1T1 *A4++,A8
102 NOP 3
103 ADD .S1 A7,A9,A9
104 SHL .S1 A8,8,A0
105
106 STB .D2T1 A7,*B4++
107 || ADD .S1 A0,A9,A9
108 STB .D2T1 A8,*B4++
109
110 #endif
111
112 ;; Manage eventually the last byte
113 L82: AND .S2X 1,A6,B0
114 [!B0] BNOP .S1 L9,5
115
116 || ZERO .L1 A7
117
118 L83: LDBU .D1T1 *A4++,A7
119 NOP 4
120
121 MV .L2X A7,B7
122
123 #ifdef CONFIG_CPU_BIG_ENDIAN
124
125 STB .D2T2 B7,*B4++
126 || SHL .S1 A7,8,A7
127 ADD .S1 A7,A9,A9
128
129 #else
130
131 STB .D2T2 B7,*B4++
132 || ADD .S1 A7,A9,A9
133
134 #endif
135
136 ;; Fold the csum
137 L9: SHRU .S2X A9,16,B0
138 [!B0] BNOP .S1 L10,5
139
140 L91: SHRU .S2X A9,16,B4
141 || EXTU .S1 A9,16,16,A3
142 ADD .D1X A3,B4,A9
143
144 SHRU .S1 A9,16,A0
145 [A0] BNOP .S1 L91,5
146
147 L10: ADD .D1 A31,A9,A9
148 MV .D1 A9,A4
149
150 BNOP .S2 B3,4
151 MVC .S2 B30,ILC
152 ENDPROC(csum_partial_copy)
153
154 ;
155 ;unsigned short
156 ;ip_fast_csum(unsigned char *iph, unsigned int ihl)
157 ;{
158 ; unsigned int checksum = 0;
159 ; unsigned short *tosum = (unsigned short *) iph;
160 ; int len;
161 ;
162 ; len = ihl*4;
163 ;
164 ; if (len <= 0)
165 ; return 0;
166 ;
167 ; while(len) {
168 ; len -= 2;
169 ; checksum += *tosum++;
170 ; }
171 ; if (len & 1)
172 ; checksum += *(unsigned char*) tosum;
173 ;
174 ; while(checksum >> 16)
175 ; checksum = (checksum & 0xffff) + (checksum >> 16);
176 ;
177 ; return ~checksum;
178 ;}
179 ;
180 ; A4: iph
181 ; B4: ihl
182 ; return checksum in A4
183 ;
184 .text
185
186 ENTRY(ip_fast_csum)
187 ZERO .D1 A5
188 || MVC .S2 ILC,B30
189 SHL .S2 B4,2,B0
190 CMPGT .L2 B0,0,B1
191 [!B1] BNOP .S1 L15,4
192 [!B1] ZERO .D1 A3
193
194 [!B0] B .S1 L12
195 SHRU .S2 B0,1,B0
196 MVC .S2 B0,ILC
197 NOP 3
198
199 SPLOOP 1
200 LDHU .D1T1 *A4++,A3
201 NOP 3
202 NOP
203 SPKERNEL 5,0
204 || ADD .L1 A3,A5,A5
205
206 L12: SHRU .S1 A5,16,A0
207 [!A0] BNOP .S1 L14,5
208
209 L13: SHRU .S2X A5,16,B4
210 EXTU .S1 A5,16,16,A3
211 ADD .D1X A3,B4,A5
212 SHRU .S1 A5,16,A0
213 [A0] BNOP .S1 L13,5
214
215 L14: NOT .D1 A5,A3
216 EXTU .S1 A3,16,16,A3
217
218 L15: BNOP .S2 B3,3
219 MVC .S2 B30,ILC
220 MV .D1 A3,A4
221 ENDPROC(ip_fast_csum)
222
223 ;
224 ;unsigned short
225 ;do_csum(unsigned char *buff, unsigned int len)
226 ;{
227 ; int odd, count;
228 ; unsigned int result = 0;
229 ;
230 ; if (len <= 0)
231 ; goto out;
232 ; odd = 1 & (unsigned long) buff;
233 ; if (odd) {
234 ;#ifdef __LITTLE_ENDIAN
235 ; result += (*buff << 8);
236 ;#else
237 ; result = *buff;
238 ;#endif
239 ; len--;
240 ; buff++;
241 ; }
242 ; count = len >> 1;
243 ; if (count) {
244 ; if (2 & (unsigned long) buff) {
245 ; result += *(unsigned short *) buff;
246 ; count--;
247 ; len -= 2;
248 ; buff += 2;
249 ; }
250 ; count >>= 1;
251 ; if (count) {
252 ; unsigned int carry = 0;
253 ; do {
254 ; unsigned int w = *(unsigned int *) buff;
255 ; count--;
256 ; buff += 4;
257 ; result += carry;
258 ; result += w;
259 ; carry = (w > result);
260 ; } while (count);
261 ; result += carry;
262 ; result = (result & 0xffff) + (result >> 16);
263 ; }
264 ; if (len & 2) {
265 ; result += *(unsigned short *) buff;
266 ; buff += 2;
267 ; }
268 ; }
269 ; if (len & 1)
270 ;#ifdef __LITTLE_ENDIAN
271 ; result += *buff;
272 ;#else
273 ; result += (*buff << 8);
274 ;#endif
275 ; result = (result & 0xffff) + (result >> 16);
276 ;
277 ; result = (result & 0xffff) + (result >> 16);
278 ; if (odd)
279 ; result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
280 ;out:
281 ; return result;
282 ;}
283 ;
284 ; A4: buff
285 ; B4: len
286 ; return checksum in A4
287 ;
288
289 ENTRY(do_csum)
290 CMPGT .L2 B4,0,B0
291 [!B0] BNOP .S1 L26,3
292 EXTU .S1 A4,31,31,A0
293
294 MV .L1 A0,A3
295 || MV .S1X B3,A5
296 || MV .L2 B4,B3
297 || ZERO .D1 A1
298
299 #ifdef CONFIG_CPU_BIG_ENDIAN
300 [A0] SUB .L2 B3,1,B3
301 || [A0] LDBU .D1T1 *A4++,A1
302 #else
303 [!A0] BNOP .S1 L21,5
304 || [A0] LDBU .D1T1 *A4++,A0
305 SUB .L2 B3,1,B3
306 || SHL .S1 A0,8,A1
307 L21:
308 #endif
309 SHR .S2 B3,1,B0
310 [!B0] BNOP .S1 L24,3
311 MVK .L1 2,A0
312 AND .L1 A4,A0,A0
313
314 [!A0] BNOP .S1 L22,5
315 || [A0] LDHU .D1T1 *A4++,A0
316 SUB .L2 B0,1,B0
317 || SUB .S2 B3,2,B3
318 || ADD .L1 A0,A1,A1
319 L22:
320 SHR .S2 B0,1,B0
321 || ZERO .L1 A0
322
323 [!B0] BNOP .S1 L23,5
324 || [B0] MVC .S2 B0,ILC
325
326 SPLOOP 3
327 SPMASK L1
328 || MV .L1 A1,A2
329 || LDW .D1T1 *A4++,A1
330
331 NOP 4
332 ADD .L1 A0,A1,A0
333 ADD .L1 A2,A0,A2
334
335 SPKERNEL 1,2
336 || CMPGTU .L1 A1,A2,A0
337
338 ADD .L1 A0,A2,A6
339 EXTU .S1 A6,16,16,A7
340 SHRU .S2X A6,16,B0
341 NOP 1
342 ADD .L1X A7,B0,A1
343 L23:
344 MVK .L2 2,B0
345 AND .L2 B3,B0,B0
346 [B0] LDHU .D1T1 *A4++,A0
347 NOP 4
348 [B0] ADD .L1 A0,A1,A1
349 L24:
350 EXTU .S2 B3,31,31,B0
351 #ifdef CONFIG_CPU_BIG_ENDIAN
352 [!B0] BNOP .S1 L25,4
353 || [B0] LDBU .D1T1 *A4,A0
354 SHL .S1 A0,8,A0
355 ADD .L1 A0,A1,A1
356 L25:
357 #else
358 [B0] LDBU .D1T1 *A4,A0
359 NOP 4
360 [B0] ADD .L1 A0,A1,A1
361 #endif
362 EXTU .S1 A1,16,16,A0
363 SHRU .S2X A1,16,B0
364 NOP 1
365 ADD .L1X A0,B0,A0
366 SHRU .S1 A0,16,A1
367 ADD .L1 A0,A1,A0
368 EXTU .S1 A0,16,16,A1
369 EXTU .S1 A1,16,24,A2
370
371 EXTU .S1 A1,24,16,A0
372 || MV .L2X A3,B0
373
374 [B0] OR .L1 A0,A2,A1
375 L26:
376 NOP 1
377 BNOP .S2X A5,4
378 MV .L1 A1,A4
379 ENDPROC(do_csum)
380
381 ;__wsum csum_partial(const void *buff, int len, __wsum wsum)
382 ;{
383 ; unsigned int sum = (__force unsigned int)wsum;
384 ; unsigned int result = do_csum(buff, len);
385 ;
386 ;
387 ; result += sum;
388 ; if (sum > result)
389 ; result += 1;
390 ; return (__force __wsum)result;
391 ;}
392 ;
393 ENTRY(csum_partial)
394 MV .L1X B3,A9
395 || CALLP .S2 do_csum,B3
396 || MV .S1 A6,A8
397 BNOP .S2X A9,2
398 ADD .L1 A8,A4,A1
399 CMPGTU .L1 A8,A1,A0
400 ADD .L1 A1,A0,A4
401 ENDPROC(csum_partial)
402
403 ;unsigned short
404 ;ip_compute_csum(unsigned char *buff, unsigned int len)
405 ;
406 ; A4: buff
407 ; B4: len
408 ; return checksum in A4
409
410 ENTRY(ip_compute_csum)
411 MV .L1X B3,A9
412 || CALLP .S2 do_csum,B3
413 BNOP .S2X A9,3
414 NOT .S1 A4,A4
415 CLR .S1 A4,16,31,A4
416 ENDPROC(ip_compute_csum)