1/* Optimised simple memory checksum
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11#include <asm/cache.h>
12
13	.section .text
14	.balign	L1_CACHE_BYTES
15
16###############################################################################
17#
18# unsigned int do_csum(const unsigned char *buff, int len)
19#
20###############################################################################
21	.globl	do_csum
22	.type	do_csum,@function
23do_csum:
24	movm	[d2,d3],(sp)
25	mov	d1,d2				# count
26	mov	d0,a0				# buff
27	mov	a0,a1
28	clr	d1				# accumulator
29
30	cmp	+0,d2
31	ble	do_csum_done			# check for zero length or negative
32
33	# 4-byte align the buffer pointer
34	btst	+3,a0
35	beq	do_csum_now_4b_aligned
36
37	btst	+1,a0
38	beq	do_csum_addr_not_odd
39	movbu	(a0),d0
40	inc	a0
41	asl	+8,d0
42	add	d0,d1
43	add	-1,d2
44
45do_csum_addr_not_odd:
46	cmp	+2,d2
47	bcs	do_csum_fewer_than_4
48	btst	+2,a0
49	beq	do_csum_now_4b_aligned
50	movhu	(a0+),d0
51	add	d0,d1
52	add	-2,d2
53	cmp	+4,d2
54	bcs	do_csum_fewer_than_4
55
56do_csum_now_4b_aligned:
57	# we want to checksum as much as we can in chunks of 32 bytes
58	cmp	+31,d2
59	bls	do_csum_remainder		# 4-byte aligned remainder
60
61	add	-32,d2
62	mov	+32,d3
63
64do_csum_loop:
65	mov	(a0+),d0
66	mov	(a0+),e0
67	mov	(a0+),e1
68	mov	(a0+),e3
69	add	d0,d1
70	addc	e0,d1
71	addc	e1,d1
72	addc	e3,d1
73	mov	(a0+),d0
74	mov	(a0+),e0
75	mov	(a0+),e1
76	mov	(a0+),e3
77	addc	d0,d1
78	addc	e0,d1
79	addc	e1,d1
80	addc	e3,d1
81	addc	+0,d1
82
83	sub	d3,d2
84	bcc	do_csum_loop
85
86	add	d3,d2
87	beq	do_csum_done
88
89do_csum_remainder:
90	# cut 16-31 bytes down to 0-15
91	cmp	+16,d2
92	bcs	do_csum_fewer_than_16
93	mov	(a0+),d0
94	mov	(a0+),e0
95	mov	(a0+),e1
96	mov	(a0+),e3
97	add	d0,d1
98	addc	e0,d1
99	addc	e1,d1
100	addc	e3,d1
101	addc	+0,d1
102	add	-16,d2
103	beq	do_csum_done
104
105do_csum_fewer_than_16:
106	# copy the remaining whole words
107	cmp	+4,d2
108	bcs	do_csum_fewer_than_4
109	cmp	+8,d2
110	bcs	do_csum_one_word
111	cmp	+12,d2
112	bcs	do_csum_two_words
113	mov	(a0+),d0
114	add	d0,d1
115	addc	+0,d1
116do_csum_two_words:
117	mov	(a0+),d0
118	add	d0,d1
119	addc	+0,d1
120do_csum_one_word:
121	mov	(a0+),d0
122	add	d0,d1
123	addc	+0,d1
124
125do_csum_fewer_than_4:
126	and	+3,d2
127	beq	do_csum_done
128	xor_cmp	d0,d0,+2,d2
129	bcs	do_csum_fewer_than_2
130	movhu	(a0+),d0
131	and	+1,d2
132	beq	do_csum_add_last_bit
133do_csum_fewer_than_2:
134	movbu	(a0),d3
135	add	d3,d0
136do_csum_add_last_bit:
137	add	d0,d1
138	addc	+0,d1
139
140do_csum_done:
141	# compress the checksum down to 16 bits
142	mov	+0xffff0000,d0
143	and	d1,d0
144	asl	+16,d1
145	add	d1,d0
146	addc	+0xffff,d0
147	lsr	+16,d0
148
149	# flip the halves of the word result if the buffer was oddly aligned
150	and	+1,a1
151	beq	do_csum_not_oddly_aligned
152	swaph	d0,d0				# exchange bits 15:8 with 7:0
153
154do_csum_not_oddly_aligned:
155	ret	[d2,d3],8
156
157	.size	do_csum, .-do_csum
158