1/* 2 * A fast checksum routine using movem 3 * Copyright (c) 1998-2001 Axis Communications AB 4 * 5 * csum_partial(const unsigned char * buff, int len, unsigned int sum) 6 */ 7 8 .globl csum_partial 9csum_partial: 10 11 ;; r10 - src 12 ;; r11 - length 13 ;; r12 - checksum 14 15 ;; check for breakeven length between movem and normal word looping versions 16 ;; we also do _NOT_ want to compute a checksum over more than the 17 ;; actual length when length < 40 18 19 cmpu.w 80,$r11 20 blo _word_loop 21 nop 22 23 ;; need to save the registers we use below in the movem loop 24 ;; this overhead is why we have a check above for breakeven length 25 ;; only r0 - r8 have to be saved, the other ones are clobber-able 26 ;; according to the ABI 27 28 subq 9*4,$sp 29 movem $r8,[$sp] 30 31 ;; do a movem checksum 32 33 subq 10*4,$r11 ; update length for the first loop 34 35_mloop: movem [$r10+],$r9 ; read 10 longwords 36 37 ;; perform dword checksumming on the 10 longwords 38 39 add.d $r0,$r12 40 ax 41 add.d $r1,$r12 42 ax 43 add.d $r2,$r12 44 ax 45 add.d $r3,$r12 46 ax 47 add.d $r4,$r12 48 ax 49 add.d $r5,$r12 50 ax 51 add.d $r6,$r12 52 ax 53 add.d $r7,$r12 54 ax 55 add.d $r8,$r12 56 ax 57 add.d $r9,$r12 58 59 ;; fold the carry into the checksum, to avoid having to loop the carry 60 ;; back into the top 61 62 ax 63 addq 0,$r12 64 65 subq 10*4,$r11 66 bge _mloop 67 nop 68 69 addq 10*4,$r11 ; compensate for last loop underflowing length 70 71 movem [$sp+],$r8 ; restore regs 72 73_word_loop: 74 ;; only fold if there is anything to fold. 75 76 cmpq 0,$r12 77 beq _no_fold 78 79 ;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below. 80 ;; r9 and r13 can be used as temporaries. 81 82 moveq -1,$r9 ; put 0xffff in r9, faster than move.d 0xffff,r9 83 lsrq 16,$r9 84 85 move.d $r12,$r13 86 lsrq 16,$r13 ; r13 = checksum >> 16 87 and.d $r9,$r12 ; checksum = checksum & 0xffff 88 add.d $r13,$r12 ; checksum += r13 89 90_no_fold: 91 cmpq 2,$r11 92 blt _no_words 93 nop 94 95 ;; checksum the rest of the words 96 97 subq 2,$r11 98 99_wloop: subq 2,$r11 100 bge _wloop 101 addu.w [$r10+],$r12 102 103 addq 2,$r11 104 105_no_words: 106 ;; see if we have one odd byte more 107 cmpq 1,$r11 108 beq _do_byte 109 nop 110 ret 111 move.d $r12, $r10 112 113_do_byte: 114 ;; copy and checksum the last byte 115 addu.b [$r10],$r12 116 ret 117 move.d $r12, $r10 118 119