1/* 2 * A fast checksum routine using movem 3 * Copyright (c) 1998-2007 Axis Communications AB 4 * 5 * csum_partial(const unsigned char * buff, int len, unsigned int sum) 6 */ 7 8 .globl csum_partial 9 .type csum_partial,@function 10csum_partial: 11 12 ;; r10 - src 13 ;; r11 - length 14 ;; r12 - checksum 15 16 ;; Optimized for large packets 17 subq 10*4, $r11 18 blt _word_loop 19 move.d $r11, $acr 20 21 subq 9*4,$sp 22 clearf c 23 movem $r8,[$sp] 24 25 ;; do a movem checksum 26 27_mloop: movem [$r10+],$r9 ; read 10 longwords 28 ;; Loop count without touching the c flag. 29 addoq -10*4, $acr, $acr 30 ;; perform dword checksumming on the 10 longwords 31 32 addc $r0,$r12 33 addc $r1,$r12 34 addc $r2,$r12 35 addc $r3,$r12 36 addc $r4,$r12 37 addc $r5,$r12 38 addc $r6,$r12 39 addc $r7,$r12 40 addc $r8,$r12 41 addc $r9,$r12 42 43 ;; test $acr without trashing carry. 44 move.d $acr, $acr 45 bpl _mloop 46 ;; r11 <= acr is not really needed in the mloop, just using the dslot 47 ;; to prepare for what is needed after mloop. 48 move.d $acr, $r11 49 50 ;; fold the last carry into r13 51 addc 0, $r12 52 movem [$sp+],$r8 ; restore regs 53 54_word_loop: 55 addq 10*4,$r11 ; compensate for last loop underflowing length 56 57 moveq -1,$r9 ; put 0xffff in r9, faster than move.d 0xffff,r9 58 lsrq 16,$r9 59 60 move.d $r12,$r13 61 lsrq 16,$r13 ; r13 = checksum >> 16 62 and.d $r9,$r12 ; checksum = checksum & 0xffff 63 64_no_fold: 65 subq 2,$r11 66 blt _no_words 67 add.d $r13,$r12 ; checksum += r13 68 69 ;; checksum the rest of the words 70_wloop: subq 2,$r11 71 bge _wloop 72 addu.w [$r10+],$r12 73 74_no_words: 75 addq 2,$r11 76 ;; see if we have one odd byte more 77 bne _do_byte 78 nop 79 ret 80 move.d $r12,$r10 81 82_do_byte: 83 ;; copy and checksum the last byte 84 addu.b [$r10],$r12 85 ret 86 move.d $r12,$r10 87 88 .size csum_partial, .-csum_partial 89