1; 2; linux/arch/c6x/lib/csum_64plus.s 3; 4; Port on Texas Instruments TMS320C6x architecture 5; 6; Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated 7; Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com) 8; 9; This program is free software; you can redistribute it and/or modify 10; it under the terms of the GNU General Public License version 2 as 11; published by the Free Software Foundation. 12; 13#include <linux/linkage.h> 14 15; 16;unsigned int csum_partial_copy(const char *src, char * dst, 17; int len, int sum) 18; 19; A4: src 20; B4: dst 21; A6: len 22; B6: sum 23; return csum in A4 24; 25 26 .text 27ENTRY(csum_partial_copy) 28 MVC .S2 ILC,B30 29 30 MV .D1X B6,A31 ; given csum 31 ZERO .D1 A9 ; csum (a side) 32|| ZERO .D2 B9 ; csum (b side) 33|| SHRU .S2X A6,2,B5 ; len / 4 34 35 ;; Check alignment and size 36 AND .S1 3,A4,A1 37|| AND .S2 3,B4,B0 38 OR .L2X B0,A1,B0 ; non aligned condition 39|| MVC .S2 B5,ILC 40|| MVK .D2 1,B2 41|| MV .D1X B5,A1 ; words condition 42 [!A1] B .S1 L8 43 [B0] BNOP .S1 L6,5 44 45 SPLOOP 1 46 47 ;; Main loop for aligned words 48 LDW .D1T1 *A4++,A7 49 NOP 4 50 MV .S2X A7,B7 51|| EXTU .S1 A7,0,16,A16 52 STW .D2T2 B7,*B4++ 53|| MPYU .M2 B7,B2,B8 54|| ADD .L1 A16,A9,A9 55 NOP 56 SPKERNEL 8,0 57|| ADD .L2 B8,B9,B9 58 59 ZERO .D1 A1 60|| ADD .L1X A9,B9,A9 ; add csum from a and b sides 61 62L6: 63 [!A1] BNOP .S1 L8,5 64 65 ;; Main loop for non-aligned words 66 SPLOOP 2 67 || MVK .L1 1,A2 68 69 LDNW .D1T1 *A4++,A7 70 NOP 3 71 72 NOP 73 MV .S2X A7,B7 74 || EXTU .S1 A7,0,16,A16 75 || MPYU .M1 A7,A2,A8 76 77 ADD .L1 A16,A9,A9 78 SPKERNEL 6,0 79 || STNW .D2T2 B7,*B4++ 80 || ADD .L1 A8,A9,A9 81 82L8: AND .S2X 2,A6,B5 83 CMPGT .L2 B5,0,B0 84 [!B0] BNOP .S1 L82,4 85 86 ;; Manage half-word 87 ZERO .L1 A7 88|| ZERO .D1 A8 89 90#ifdef CONFIG_CPU_BIG_ENDIAN 91 92 LDBU .D1T1 *A4++,A7 93 LDBU .D1T1 *A4++,A8 94 NOP 3 95 SHL .S1 A7,8,A0 96 ADD .S1 A8,A9,A9 97 STB .D2T1 A7,*B4++ 98|| ADD .S1 A0,A9,A9 99 STB .D2T1 A8,*B4++ 100 101#else 102 103 LDBU .D1T1 *A4++,A7 104 LDBU .D1T1 *A4++,A8 105 NOP 3 106 ADD .S1 A7,A9,A9 107 SHL .S1 A8,8,A0 108 109 STB .D2T1 A7,*B4++ 110|| ADD .S1 A0,A9,A9 111 STB .D2T1 A8,*B4++ 112 113#endif 114 115 ;; Manage eventually the last byte 116L82: AND .S2X 1,A6,B0 117 [!B0] BNOP .S1 L9,5 118 119|| ZERO .L1 A7 120 121L83: LDBU .D1T1 *A4++,A7 122 NOP 4 123 124 MV .L2X A7,B7 125 126#ifdef CONFIG_CPU_BIG_ENDIAN 127 128 STB .D2T2 B7,*B4++ 129|| SHL .S1 A7,8,A7 130 ADD .S1 A7,A9,A9 131 132#else 133 134 STB .D2T2 B7,*B4++ 135|| ADD .S1 A7,A9,A9 136 137#endif 138 139 ;; Fold the csum 140L9: SHRU .S2X A9,16,B0 141 [!B0] BNOP .S1 L10,5 142 143L91: SHRU .S2X A9,16,B4 144|| EXTU .S1 A9,16,16,A3 145 ADD .D1X A3,B4,A9 146 147 SHRU .S1 A9,16,A0 148 [A0] BNOP .S1 L91,5 149 150L10: ADD .D1 A31,A9,A9 151 MV .D1 A9,A4 152 153 BNOP .S2 B3,4 154 MVC .S2 B30,ILC 155ENDPROC(csum_partial_copy) 156 157; 158;unsigned short 159;ip_fast_csum(unsigned char *iph, unsigned int ihl) 160;{ 161; unsigned int checksum = 0; 162; unsigned short *tosum = (unsigned short *) iph; 163; int len; 164; 165; len = ihl*4; 166; 167; if (len <= 0) 168; return 0; 169; 170; while(len) { 171; len -= 2; 172; checksum += *tosum++; 173; } 174; if (len & 1) 175; checksum += *(unsigned char*) tosum; 176; 177; while(checksum >> 16) 178; checksum = (checksum & 0xffff) + (checksum >> 16); 179; 180; return ~checksum; 181;} 182; 183; A4: iph 184; B4: ihl 185; return checksum in A4 186; 187 .text 188 189ENTRY(ip_fast_csum) 190 ZERO .D1 A5 191 || MVC .S2 ILC,B30 192 SHL .S2 B4,2,B0 193 CMPGT .L2 B0,0,B1 194 [!B1] BNOP .S1 L15,4 195 [!B1] ZERO .D1 A3 196 197 [!B0] B .S1 L12 198 SHRU .S2 B0,1,B0 199 MVC .S2 B0,ILC 200 NOP 3 201 202 SPLOOP 1 203 LDHU .D1T1 *A4++,A3 204 NOP 3 205 NOP 206 SPKERNEL 5,0 207 || ADD .L1 A3,A5,A5 208 209L12: SHRU .S1 A5,16,A0 210 [!A0] BNOP .S1 L14,5 211 212L13: SHRU .S2X A5,16,B4 213 EXTU .S1 A5,16,16,A3 214 ADD .D1X A3,B4,A5 215 SHRU .S1 A5,16,A0 216 [A0] BNOP .S1 L13,5 217 218L14: NOT .D1 A5,A3 219 EXTU .S1 A3,16,16,A3 220 221L15: BNOP .S2 B3,3 222 MVC .S2 B30,ILC 223 MV .D1 A3,A4 224ENDPROC(ip_fast_csum) 225 226; 227;unsigned short 228;do_csum(unsigned char *buff, unsigned int len) 229;{ 230; int odd, count; 231; unsigned int result = 0; 232; 233; if (len <= 0) 234; goto out; 235; odd = 1 & (unsigned long) buff; 236; if (odd) { 237;#ifdef __LITTLE_ENDIAN 238; result += (*buff << 8); 239;#else 240; result = *buff; 241;#endif 242; len--; 243; buff++; 244; } 245; count = len >> 1; /* nr of 16-bit words.. */ 246; if (count) { 247; if (2 & (unsigned long) buff) { 248; result += *(unsigned short *) buff; 249; count--; 250; len -= 2; 251; buff += 2; 252; } 253; count >>= 1; /* nr of 32-bit words.. */ 254; if (count) { 255; unsigned int carry = 0; 256; do { 257; unsigned int w = *(unsigned int *) buff; 258; count--; 259; buff += 4; 260; result += carry; 261; result += w; 262; carry = (w > result); 263; } while (count); 264; result += carry; 265; result = (result & 0xffff) + (result >> 16); 266; } 267; if (len & 2) { 268; result += *(unsigned short *) buff; 269; buff += 2; 270; } 271; } 272; if (len & 1) 273;#ifdef __LITTLE_ENDIAN 274; result += *buff; 275;#else 276; result += (*buff << 8); 277;#endif 278; result = (result & 0xffff) + (result >> 16); 279; /* add up carry.. */ 280; result = (result & 0xffff) + (result >> 16); 281; if (odd) 282; result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); 283;out: 284; return result; 285;} 286; 287; A4: buff 288; B4: len 289; return checksum in A4 290; 291 292ENTRY(do_csum) 293 CMPGT .L2 B4,0,B0 294 [!B0] BNOP .S1 L26,3 295 EXTU .S1 A4,31,31,A0 296 297 MV .L1 A0,A3 298|| MV .S1X B3,A5 299|| MV .L2 B4,B3 300|| ZERO .D1 A1 301 302#ifdef CONFIG_CPU_BIG_ENDIAN 303 [A0] SUB .L2 B3,1,B3 304|| [A0] LDBU .D1T1 *A4++,A1 305#else 306 [!A0] BNOP .S1 L21,5 307|| [A0] LDBU .D1T1 *A4++,A0 308 SUB .L2 B3,1,B3 309|| SHL .S1 A0,8,A1 310L21: 311#endif 312 SHR .S2 B3,1,B0 313 [!B0] BNOP .S1 L24,3 314 MVK .L1 2,A0 315 AND .L1 A4,A0,A0 316 317 [!A0] BNOP .S1 L22,5 318|| [A0] LDHU .D1T1 *A4++,A0 319 SUB .L2 B0,1,B0 320|| SUB .S2 B3,2,B3 321|| ADD .L1 A0,A1,A1 322L22: 323 SHR .S2 B0,1,B0 324|| ZERO .L1 A0 325 326 [!B0] BNOP .S1 L23,5 327|| [B0] MVC .S2 B0,ILC 328 329 SPLOOP 3 330 SPMASK L1 331|| MV .L1 A1,A2 332|| LDW .D1T1 *A4++,A1 333 334 NOP 4 335 ADD .L1 A0,A1,A0 336 ADD .L1 A2,A0,A2 337 338 SPKERNEL 1,2 339|| CMPGTU .L1 A1,A2,A0 340 341 ADD .L1 A0,A2,A6 342 EXTU .S1 A6,16,16,A7 343 SHRU .S2X A6,16,B0 344 NOP 1 345 ADD .L1X A7,B0,A1 346L23: 347 MVK .L2 2,B0 348 AND .L2 B3,B0,B0 349 [B0] LDHU .D1T1 *A4++,A0 350 NOP 4 351 [B0] ADD .L1 A0,A1,A1 352L24: 353 EXTU .S2 B3,31,31,B0 354#ifdef CONFIG_CPU_BIG_ENDIAN 355 [!B0] BNOP .S1 L25,4 356|| [B0] LDBU .D1T1 *A4,A0 357 SHL .S1 A0,8,A0 358 ADD .L1 A0,A1,A1 359L25: 360#else 361 [B0] LDBU .D1T1 *A4,A0 362 NOP 4 363 [B0] ADD .L1 A0,A1,A1 364#endif 365 EXTU .S1 A1,16,16,A0 366 SHRU .S2X A1,16,B0 367 NOP 1 368 ADD .L1X A0,B0,A0 369 SHRU .S1 A0,16,A1 370 ADD .L1 A0,A1,A0 371 EXTU .S1 A0,16,16,A1 372 EXTU .S1 A1,16,24,A2 373 374 EXTU .S1 A1,24,16,A0 375|| MV .L2X A3,B0 376 377 [B0] OR .L1 A0,A2,A1 378L26: 379 NOP 1 380 BNOP .S2X A5,4 381 MV .L1 A1,A4 382ENDPROC(do_csum) 383 384;__wsum csum_partial(const void *buff, int len, __wsum wsum) 385;{ 386; unsigned int sum = (__force unsigned int)wsum; 387; unsigned int result = do_csum(buff, len); 388; 389; /* add in old sum, and carry.. */ 390; result += sum; 391; if (sum > result) 392; result += 1; 393; return (__force __wsum)result; 394;} 395; 396ENTRY(csum_partial) 397 MV .L1X B3,A9 398|| CALLP .S2 do_csum,B3 399|| MV .S1 A6,A8 400 BNOP .S2X A9,2 401 ADD .L1 A8,A4,A1 402 CMPGTU .L1 A8,A1,A0 403 ADD .L1 A1,A0,A4 404ENDPROC(csum_partial) 405 406;unsigned short 407;ip_compute_csum(unsigned char *buff, unsigned int len) 408; 409; A4: buff 410; B4: len 411; return checksum in A4 412 413ENTRY(ip_compute_csum) 414 MV .L1X B3,A9 415|| CALLP .S2 do_csum,B3 416 BNOP .S2X A9,3 417 NOT .S1 A4,A4 418 CLR .S1 A4,16,31,A4 419ENDPROC(ip_compute_csum) 420