1;
2;  linux/arch/c6x/lib/csum_64plus.s
3;
4;  Port on Texas Instruments TMS320C6x architecture
5;
6;  Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated
7;  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
8;
9;  This program is free software; you can redistribute it and/or modify
10;  it under the terms of the GNU General Public License version 2 as
11;  published by the Free Software Foundation.
12;
13#include <linux/linkage.h>
14
15;
16;unsigned int csum_partial_copy(const char *src, char * dst,
17;				int len, int sum)
18;
19; A4:	src
20; B4:	dst
21; A6:	len
22; B6:	sum
23; return csum in A4
24;
25
26	.text
27ENTRY(csum_partial_copy)
28	MVC	.S2	ILC,B30
29
30	MV	.D1X	B6,A31		; given csum
31	ZERO	.D1	A9		; csum (a side)
32||	ZERO	.D2	B9		; csum (b side)
33||	SHRU	.S2X	A6,2,B5		; len / 4
34
35	;; Check alignment and size
36	AND	.S1	3,A4,A1
37||	AND	.S2	3,B4,B0
38	OR	.L2X	B0,A1,B0	; non aligned condition
39||	MVC	.S2	B5,ILC
40||	MVK	.D2	1,B2
41||	MV	.D1X	B5,A1		; words condition
42  [!A1]	B	.S1	L8
43   [B0] BNOP	.S1	L6,5
44
45	SPLOOP		1
46
47	;; Main loop for aligned words
48	LDW	.D1T1	*A4++,A7
49	NOP	4
50	MV	.S2X	A7,B7
51||	EXTU	.S1	A7,0,16,A16
52	STW	.D2T2	B7,*B4++
53||	MPYU	.M2	B7,B2,B8
54||	ADD	.L1	A16,A9,A9
55	NOP
56	SPKERNEL	8,0
57||	ADD	.L2	B8,B9,B9
58
59	ZERO	.D1	A1
60||	ADD	.L1X	A9,B9,A9	;  add csum from a and b sides
61
62L6:
63  [!A1]	BNOP	.S1	L8,5
64
65	;; Main loop for non-aligned words
66	SPLOOP		2
67 ||	MVK	.L1	1,A2
68
69	LDNW	.D1T1	*A4++,A7
70	NOP		3
71
72	NOP
73	MV	.S2X	A7,B7
74 ||	EXTU	.S1	A7,0,16,A16
75 ||	MPYU	.M1	A7,A2,A8
76
77	ADD	.L1	A16,A9,A9
78	SPKERNEL	6,0
79 ||	STNW	.D2T2	B7,*B4++
80 ||	ADD	.L1	A8,A9,A9
81
82L8:	AND	.S2X	2,A6,B5
83	CMPGT	.L2	B5,0,B0
84  [!B0]	BNOP	.S1	L82,4
85
86	;; Manage half-word
87	ZERO	.L1	A7
88||	ZERO	.D1	A8
89
90#ifdef CONFIG_CPU_BIG_ENDIAN
91
92	LDBU	.D1T1	*A4++,A7
93	LDBU	.D1T1	*A4++,A8
94	NOP		3
95	SHL	.S1	A7,8,A0
96	ADD	.S1	A8,A9,A9
97	STB	.D2T1	A7,*B4++
98||	ADD	.S1	A0,A9,A9
99	STB	.D2T1	A8,*B4++
100
101#else
102
103	LDBU	.D1T1	*A4++,A7
104	LDBU	.D1T1	*A4++,A8
105	NOP		3
106	ADD	.S1	A7,A9,A9
107	SHL	.S1	A8,8,A0
108
109	STB	.D2T1	A7,*B4++
110||	ADD	.S1	A0,A9,A9
111	STB	.D2T1	A8,*B4++
112
113#endif
114
115	;; Manage eventually the last byte
116L82:	AND	.S2X	1,A6,B0
117  [!B0]	BNOP	.S1	L9,5
118
119||	ZERO	.L1	A7
120
121L83:	LDBU	.D1T1	*A4++,A7
122	NOP		4
123
124	MV	.L2X	A7,B7
125
126#ifdef CONFIG_CPU_BIG_ENDIAN
127
128	STB	.D2T2	B7,*B4++
129||	SHL	.S1	A7,8,A7
130	ADD	.S1	A7,A9,A9
131
132#else
133
134	STB	.D2T2	B7,*B4++
135||	ADD	.S1	A7,A9,A9
136
137#endif
138
139	;; Fold the csum
140L9:	SHRU	.S2X	A9,16,B0
141  [!B0]	BNOP	.S1	L10,5
142
143L91:	SHRU	.S2X	A9,16,B4
144||	EXTU	.S1	A9,16,16,A3
145	ADD	.D1X	A3,B4,A9
146
147	SHRU	.S1	A9,16,A0
148   [A0]	BNOP	.S1	L91,5
149
150L10:	ADD	.D1	A31,A9,A9
151	MV	.D1	A9,A4
152
153	BNOP	.S2	B3,4
154	MVC	.S2	B30,ILC
155ENDPROC(csum_partial_copy)
156
157;
158;unsigned short
159;ip_fast_csum(unsigned char *iph, unsigned int ihl)
160;{
161;	unsigned int checksum = 0;
162;	unsigned short *tosum = (unsigned short *) iph;
163;	int len;
164;
165;	len = ihl*4;
166;
167;	if (len <= 0)
168;		return 0;
169;
170;	while(len) {
171;		len -= 2;
172;		checksum += *tosum++;
173;	}
174;	if (len & 1)
175;		checksum += *(unsigned char*) tosum;
176;
177;	while(checksum >> 16)
178;		checksum = (checksum & 0xffff) + (checksum >> 16);
179;
180;	return ~checksum;
181;}
182;
183; A4:	iph
184; B4:	ihl
185; return checksum in A4
186;
187	.text
188
189ENTRY(ip_fast_csum)
190	ZERO	.D1	A5
191 ||	MVC	.S2	ILC,B30
192	SHL	.S2	B4,2,B0
193	CMPGT	.L2	B0,0,B1
194  [!B1] BNOP	.S1	L15,4
195  [!B1]	ZERO	.D1	A3
196
197  [!B0]	B	.S1	L12
198	SHRU	.S2	B0,1,B0
199	MVC	.S2	B0,ILC
200	NOP	3
201
202	SPLOOP	1
203	LDHU	.D1T1	*A4++,A3
204	NOP	3
205	NOP
206	SPKERNEL	5,0
207 ||	ADD	.L1	A3,A5,A5
208
209L12:	SHRU	.S1	A5,16,A0
210  [!A0]	BNOP	.S1	L14,5
211
212L13:	SHRU	.S2X	A5,16,B4
213	EXTU	.S1	A5,16,16,A3
214	ADD	.D1X	A3,B4,A5
215	SHRU	.S1	A5,16,A0
216  [A0]	BNOP	.S1	L13,5
217
218L14:	NOT	.D1	A5,A3
219	EXTU	.S1	A3,16,16,A3
220
221L15:	BNOP	.S2	B3,3
222	MVC	.S2	B30,ILC
223	MV	.D1	A3,A4
224ENDPROC(ip_fast_csum)
225
226;
227;unsigned short
228;do_csum(unsigned char *buff, unsigned int len)
229;{
230;	int odd, count;
231;	unsigned int result = 0;
232;
233;	if (len <= 0)
234;		goto out;
235;	odd = 1 & (unsigned long) buff;
236;	if (odd) {
237;#ifdef __LITTLE_ENDIAN
238;		result += (*buff << 8);
239;#else
240;		result = *buff;
241;#endif
242;		len--;
243;		buff++;
244;	}
245;	count = len >> 1;		/* nr of 16-bit words.. */
246;	if (count) {
247;		if (2 & (unsigned long) buff) {
248;			result += *(unsigned short *) buff;
249;			count--;
250;			len -= 2;
251;			buff += 2;
252;		}
253;		count >>= 1;		/* nr of 32-bit words.. */
254;		if (count) {
255;			unsigned int carry = 0;
256;			do {
257;				unsigned int w = *(unsigned int *) buff;
258;				count--;
259;				buff += 4;
260;				result += carry;
261;				result += w;
262;				carry = (w > result);
263;			} while (count);
264;			result += carry;
265;			result = (result & 0xffff) + (result >> 16);
266;		}
267;		if (len & 2) {
268;			result += *(unsigned short *) buff;
269;			buff += 2;
270;		}
271;	}
272;	if (len & 1)
273;#ifdef __LITTLE_ENDIAN
274;		result += *buff;
275;#else
276;		result += (*buff << 8);
277;#endif
278;	result = (result & 0xffff) + (result >> 16);
279;	/* add up carry.. */
280;	result = (result & 0xffff) + (result >> 16);
281;	if (odd)
282;		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
283;out:
284;	return result;
285;}
286;
287; A4:	buff
288; B4:	len
289; return checksum in A4
290;
291
292ENTRY(do_csum)
293	   CMPGT   .L2	   B4,0,B0
294   [!B0]   BNOP    .S1	   L26,3
295	   EXTU    .S1	   A4,31,31,A0
296
297	   MV	   .L1	   A0,A3
298||	   MV	   .S1X    B3,A5
299||	   MV	   .L2	   B4,B3
300||	   ZERO    .D1	   A1
301
302#ifdef CONFIG_CPU_BIG_ENDIAN
303   [A0]    SUB	   .L2	   B3,1,B3
304|| [A0]    LDBU    .D1T1   *A4++,A1
305#else
306   [!A0]   BNOP    .S1	   L21,5
307|| [A0]    LDBU    .D1T1   *A4++,A0
308	   SUB	   .L2	   B3,1,B3
309||	   SHL	   .S1	   A0,8,A1
310L21:
311#endif
312	   SHR	   .S2	   B3,1,B0
313   [!B0]   BNOP    .S1	   L24,3
314	   MVK	   .L1	   2,A0
315	   AND	   .L1	   A4,A0,A0
316
317   [!A0]   BNOP    .S1	   L22,5
318|| [A0]    LDHU    .D1T1   *A4++,A0
319	   SUB	   .L2	   B0,1,B0
320||	   SUB	   .S2	   B3,2,B3
321||	   ADD	   .L1	   A0,A1,A1
322L22:
323	   SHR	   .S2	   B0,1,B0
324||	   ZERO    .L1	   A0
325
326   [!B0]   BNOP    .S1	   L23,5
327|| [B0]    MVC	   .S2	   B0,ILC
328
329	   SPLOOP  3
330	   SPMASK  L1
331||	   MV	   .L1	   A1,A2
332||	   LDW	   .D1T1   *A4++,A1
333
334	   NOP	   4
335	   ADD	   .L1	   A0,A1,A0
336	   ADD	   .L1	   A2,A0,A2
337
338	   SPKERNEL 1,2
339||	   CMPGTU  .L1	   A1,A2,A0
340
341	   ADD	   .L1	   A0,A2,A6
342	   EXTU    .S1	   A6,16,16,A7
343	   SHRU    .S2X    A6,16,B0
344	   NOP		   1
345	   ADD	   .L1X    A7,B0,A1
346L23:
347	   MVK	   .L2	   2,B0
348	   AND	   .L2	   B3,B0,B0
349   [B0]    LDHU    .D1T1   *A4++,A0
350	   NOP	   4
351   [B0]    ADD	   .L1	   A0,A1,A1
352L24:
353	   EXTU    .S2	   B3,31,31,B0
354#ifdef CONFIG_CPU_BIG_ENDIAN
355   [!B0]   BNOP    .S1	   L25,4
356|| [B0]    LDBU    .D1T1   *A4,A0
357	   SHL	   .S1	   A0,8,A0
358	   ADD	   .L1	   A0,A1,A1
359L25:
360#else
361   [B0]    LDBU    .D1T1   *A4,A0
362	   NOP	   4
363   [B0]    ADD	   .L1	   A0,A1,A1
364#endif
365	   EXTU    .S1	   A1,16,16,A0
366	   SHRU    .S2X    A1,16,B0
367	   NOP	   1
368	   ADD	   .L1X    A0,B0,A0
369	   SHRU    .S1	   A0,16,A1
370	   ADD	   .L1	   A0,A1,A0
371	   EXTU    .S1	   A0,16,16,A1
372	   EXTU    .S1	   A1,16,24,A2
373
374	   EXTU    .S1	   A1,24,16,A0
375||	   MV	   .L2X    A3,B0
376
377   [B0]    OR	   .L1	   A0,A2,A1
378L26:
379	   NOP	   1
380	   BNOP    .S2X    A5,4
381	   MV	   .L1	   A1,A4
382ENDPROC(do_csum)
383
384;__wsum csum_partial(const void *buff, int len, __wsum wsum)
385;{
386;	unsigned int sum = (__force unsigned int)wsum;
387;	unsigned int result = do_csum(buff, len);
388;
389;	/* add in old sum, and carry.. */
390;	result += sum;
391;	if (sum > result)
392;		result += 1;
393;	return (__force __wsum)result;
394;}
395;
396ENTRY(csum_partial)
397	   MV	   .L1X    B3,A9
398||	   CALLP   .S2	   do_csum,B3
399||	   MV	   .S1	   A6,A8
400	   BNOP    .S2X    A9,2
401	   ADD	   .L1	   A8,A4,A1
402	   CMPGTU  .L1	   A8,A1,A0
403	   ADD	   .L1	   A1,A0,A4
404ENDPROC(csum_partial)
405
406;unsigned short
407;ip_compute_csum(unsigned char *buff, unsigned int len)
408;
409; A4:	buff
410; B4:	len
411; return checksum in A4
412
413ENTRY(ip_compute_csum)
414	   MV	   .L1X    B3,A9
415||	   CALLP   .S2	   do_csum,B3
416	   BNOP    .S2X    A9,3
417	   NOT	   .S1	   A4,A4
418	   CLR     .S1	   A4,16,31,A4
419ENDPROC(ip_compute_csum)
420