1/*
2 * A fast checksum routine using movem
3 * Copyright (c) 1998-2007 Axis Communications AB
4 *
5 * csum_partial(const unsigned char * buff, int len, unsigned int sum)
6 */
7
8	.globl	csum_partial
9	.type   csum_partial,@function
10csum_partial:
11
12	;; r10 - src
13	;; r11 - length
14	;; r12 - checksum
15
16	;; Optimized for large packets
17	subq	10*4, $r11
18	blt	_word_loop
19	move.d	$r11, $acr
20
21	subq	9*4,$sp
22	clearf	c
23	movem	$r8,[$sp]
24
25	;; do a movem checksum
26
27_mloop:	movem	[$r10+],$r9	; read 10 longwords
28	;; Loop count without touching the c flag.
29	addoq	-10*4, $acr, $acr
30	;; perform dword checksumming on the 10 longwords
31
32	addc	$r0,$r12
33	addc	$r1,$r12
34	addc	$r2,$r12
35	addc	$r3,$r12
36	addc	$r4,$r12
37	addc	$r5,$r12
38	addc	$r6,$r12
39	addc	$r7,$r12
40	addc	$r8,$r12
41	addc	$r9,$r12
42
43	;; test $acr without trashing carry.
44	move.d	$acr, $acr
45	bpl	_mloop
46	;; r11 <= acr  is not really needed in the mloop, just using the dslot
47	;; to prepare for what is needed after mloop.
48	move.d	$acr, $r11
49
50	;; fold the last carry into r13
51	addc	0, $r12
52	movem	[$sp+],$r8	; restore regs
53
54_word_loop:
55	addq	10*4,$r11	; compensate for last loop underflowing length
56
57	moveq	-1,$r9		; put 0xffff in r9, faster than move.d 0xffff,r9
58	lsrq	16,$r9
59
60	move.d	$r12,$r13
61	lsrq	16,$r13		; r13 = checksum >> 16
62	and.d	$r9,$r12	; checksum = checksum & 0xffff
63
64_no_fold:
65	subq	2,$r11
66	blt	_no_words
67	add.d	$r13,$r12	; checksum += r13
68
69	;; checksum the rest of the words
70_wloop:	subq	2,$r11
71	bge	_wloop
72	addu.w	[$r10+],$r12
73
74_no_words:
75	addq	2,$r11
76	;; see if we have one odd byte more
77	bne	_do_byte
78	nop
79	ret
80	move.d	$r12,$r10
81
82_do_byte:
83	;; copy and checksum the last byte
84	addu.b	[$r10],$r12
85	ret
86	move.d	$r12,$r10
87
88	.size   csum_partial, .-csum_partial
89