1/* Optimised simple memory checksum 2 * 3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. 4 * Written by David Howells (dhowells@redhat.com) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public Licence 8 * as published by the Free Software Foundation; either version 9 * 2 of the Licence, or (at your option) any later version. 10 */ 11#include <asm/cache.h> 12 13 .section .text 14 .balign L1_CACHE_BYTES 15 16############################################################################### 17# 18# unsigned int do_csum(const unsigned char *buff, int len) 19# 20############################################################################### 21 .globl do_csum 22 .type do_csum,@function 23do_csum: 24 movm [d2,d3],(sp) 25 mov d1,d2 # count 26 mov d0,a0 # buff 27 mov a0,a1 28 clr d1 # accumulator 29 30 cmp +0,d2 31 ble do_csum_done # check for zero length or negative 32 33 # 4-byte align the buffer pointer 34 btst +3,a0 35 beq do_csum_now_4b_aligned 36 37 btst +1,a0 38 beq do_csum_addr_not_odd 39 movbu (a0),d0 40 inc a0 41 asl +8,d0 42 add d0,d1 43 add -1,d2 44 45do_csum_addr_not_odd: 46 cmp +2,d2 47 bcs do_csum_fewer_than_4 48 btst +2,a0 49 beq do_csum_now_4b_aligned 50 movhu (a0+),d0 51 add d0,d1 52 add -2,d2 53 cmp +4,d2 54 bcs do_csum_fewer_than_4 55 56do_csum_now_4b_aligned: 57 # we want to checksum as much as we can in chunks of 32 bytes 58 cmp +31,d2 59 bls do_csum_remainder # 4-byte aligned remainder 60 61 add -32,d2 62 mov +32,d3 63 64do_csum_loop: 65 mov (a0+),d0 66 mov (a0+),e0 67 mov (a0+),e1 68 mov (a0+),e3 69 add d0,d1 70 addc e0,d1 71 addc e1,d1 72 addc e3,d1 73 mov (a0+),d0 74 mov (a0+),e0 75 mov (a0+),e1 76 mov (a0+),e3 77 addc d0,d1 78 addc e0,d1 79 addc e1,d1 80 addc e3,d1 81 addc +0,d1 82 83 sub d3,d2 84 bcc do_csum_loop 85 86 add d3,d2 87 beq do_csum_done 88 89do_csum_remainder: 90 # cut 16-31 bytes down to 0-15 91 cmp +16,d2 92 bcs do_csum_fewer_than_16 93 mov (a0+),d0 94 mov (a0+),e0 95 mov (a0+),e1 96 mov (a0+),e3 97 add d0,d1 98 addc e0,d1 99 addc e1,d1 100 addc e3,d1 101 addc +0,d1 102 add -16,d2 103 beq do_csum_done 104 105do_csum_fewer_than_16: 106 # copy the remaining whole words 107 cmp +4,d2 108 bcs do_csum_fewer_than_4 109 cmp +8,d2 110 bcs do_csum_one_word 111 cmp +12,d2 112 bcs do_csum_two_words 113 mov (a0+),d0 114 add d0,d1 115 addc +0,d1 116do_csum_two_words: 117 mov (a0+),d0 118 add d0,d1 119 addc +0,d1 120do_csum_one_word: 121 mov (a0+),d0 122 add d0,d1 123 addc +0,d1 124 125do_csum_fewer_than_4: 126 and +3,d2 127 beq do_csum_done 128 xor_cmp d0,d0,+2,d2 129 bcs do_csum_fewer_than_2 130 movhu (a0+),d0 131 and +1,d2 132 beq do_csum_add_last_bit 133do_csum_fewer_than_2: 134 movbu (a0),d3 135 add d3,d0 136do_csum_add_last_bit: 137 add d0,d1 138 addc +0,d1 139 140do_csum_done: 141 # compress the checksum down to 16 bits 142 mov +0xffff0000,d0 143 and d1,d0 144 asl +16,d1 145 add d1,d0 146 addc +0xffff,d0 147 lsr +16,d0 148 149 # flip the halves of the word result if the buffer was oddly aligned 150 and +1,a1 151 beq do_csum_not_oddly_aligned 152 swaph d0,d0 # exchange bits 15:8 with 7:0 153 154do_csum_not_oddly_aligned: 155 ret [d2,d3],8 156 157 .size do_csum, .-do_csum 158