1/*
2 * A fast checksum+copy routine using movem
3 * Copyright (c) 1998, 2001 Axis Communications AB
4 *
5 * Authors:	Bjorn Wesen
6 *
7 * csum_partial_copy_nocheck(const char *src, char *dst,
8 *		             int len, unsigned int sum)
9 */
10
11	.globl	csum_partial_copy_nocheck
12csum_partial_copy_nocheck:
13
14	;; r10 - src
15	;; r11 - dst
16	;; r12 - length
17	;; r13 - checksum
18
19	;; check for breakeven length between movem and normal word looping versions
20	;; we also do _NOT_ want to compute a checksum over more than the
21	;; actual length when length < 40
22
23	cmpu.w	80, $r12
24	blo	_word_loop
25	nop
26
27	;; need to save the registers we use below in the movem loop
28	;; this overhead is why we have a check above for breakeven length
29	;; only r0 - r8 have to be saved, the other ones are clobber-able
30	;; according to the ABI
31
32	subq	9*4, $sp
33	movem	$r8, [$sp]
34
35	;; do a movem copy and checksum
36
37	subq	10*4, $r12	; update length for the first loop
38
39_mloop:	movem	[$r10+],$r9	; read 10 longwords
401:	;; A failing userspace access will have this as PC.
41	movem	$r9,[$r11+]	; write 10 longwords
42
43	;; perform dword checksumming on the 10 longwords
44
45	add.d	$r0,$r13
46	ax
47	add.d	$r1,$r13
48	ax
49	add.d	$r2,$r13
50	ax
51	add.d	$r3,$r13
52	ax
53	add.d	$r4,$r13
54	ax
55	add.d	$r5,$r13
56	ax
57	add.d	$r6,$r13
58	ax
59	add.d	$r7,$r13
60	ax
61	add.d	$r8,$r13
62	ax
63	add.d	$r9,$r13
64
65	;; fold the carry into the checksum, to avoid having to loop the carry
66	;; back into the top
67
68	ax
69	addq	0,$r13
70
71	subq	10*4,$r12
72	bge	_mloop
73	nop
74
75	addq	10*4,$r12	; compensate for last loop underflowing length
76
77	movem	[$sp+],$r8	; restore regs
78
79_word_loop:
80	;; only fold if there is anything to fold.
81
82	cmpq	0,$r13
83	beq	_no_fold
84
85	;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below
86	;; r9 can be used as temporary.
87
88	move.d	$r13,$r9
89	lsrq	16,$r9		; r0 = checksum >> 16
90	and.d	0xffff,$r13	; checksum = checksum & 0xffff
91	add.d	$r9,$r13	; checksum += r0
92
93_no_fold:
94	cmpq	2,$r12
95	blt	_no_words
96	nop
97
98	;; copy and checksum the rest of the words
99
100	subq	2,$r12
101
102_wloop:	move.w	[$r10+],$r9
1032:	;; A failing userspace access will have this as PC.
104	addu.w	$r9,$r13
105	subq	2,$r12
106	bge	_wloop
107	move.w	$r9,[$r11+]
108
109	addq	2,$r12
110
111_no_words:
112	;; see if we have one odd byte more
113	cmpq	1,$r12
114	beq	_do_byte
115	nop
116	ret
117	move.d	$r13, $r10
118
119_do_byte:
120	;; copy and checksum the last byte
121	move.b	[$r10],$r9
1223:	;; A failing userspace access will have this as PC.
123	addu.b	$r9,$r13
124	move.b	$r9,[$r11]
125	ret
126	move.d	$r13, $r10
127