1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IP/TCP/UDP checksumming routines
7 *
8 * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
9 *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
10 *		Tom May, <ftom@netcom.com>
11 *              Pentium Pro/II routines:
12 *              Alexander Kjeldaas <astor@guardian.no>
13 *              Finn Arne Gangstad <finnag@guardian.no>
14 *		Lots of code moved from tcp.c and ip.c; see those files
15 *		for more names.
16 *
17 * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
18 *			     handling.
19 *		Andi Kleen,  add zeroing on error
20 *                   converted to pure assembler
21 *
22 *		This program is free software; you can redistribute it and/or
23 *		modify it under the terms of the GNU General Public License
24 *		as published by the Free Software Foundation; either version
25 *		2 of the License, or (at your option) any later version.
26 */
27
28#include <asm/errno.h>
29#include <asm/asm.h>
30
31/*
32 * computes a partial checksum, e.g. for TCP/UDP fragments
33 */
34
35/*
36unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
37 */
38
39.text
40.align 4
41.globl csum_partial
42
43#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
44
45	  /*
46	   * Experiments with Ethernet and SLIP connections show that buff
47	   * is aligned on either a 2-byte or 4-byte boundary.  We get at
48	   * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
49	   * Fortunately, it is easy to convert 2-byte alignment to 4-byte
50	   * alignment for the unrolled loop.
51	   */
52csum_partial:
53	pushl %esi
54	pushl %ebx
55	movl 20(%esp),%eax	# Function arg: unsigned int sum
56	movl 16(%esp),%ecx	# Function arg: int len
57	movl 12(%esp),%esi	# Function arg: unsigned char *buff
58	testl $2, %esi		# Check alignment.
59	jz 2f			# Jump if alignment is ok.
60	subl $2, %ecx		# Alignment uses up two bytes.
61	jae 1f			# Jump if we had at least two bytes.
62	addl $2, %ecx		# ecx was < 2.  Deal with it.
63	jmp 4f
641:	movw (%esi), %bx
65	addl $2, %esi
66	addw %bx, %ax
67	adcl $0, %eax
682:
69	movl %ecx, %edx
70	shrl $5, %ecx
71	jz 2f
72	testl %esi, %esi
731:	movl (%esi), %ebx
74	adcl %ebx, %eax
75	movl 4(%esi), %ebx
76	adcl %ebx, %eax
77	movl 8(%esi), %ebx
78	adcl %ebx, %eax
79	movl 12(%esi), %ebx
80	adcl %ebx, %eax
81	movl 16(%esi), %ebx
82	adcl %ebx, %eax
83	movl 20(%esi), %ebx
84	adcl %ebx, %eax
85	movl 24(%esi), %ebx
86	adcl %ebx, %eax
87	movl 28(%esi), %ebx
88	adcl %ebx, %eax
89	lea 32(%esi), %esi
90	dec %ecx
91	jne 1b
92	adcl $0, %eax
932:	movl %edx, %ecx
94	andl $0x1c, %edx
95	je 4f
96	shrl $2, %edx		# This clears CF
973:	adcl (%esi), %eax
98	lea 4(%esi), %esi
99	dec %edx
100	jne 3b
101	adcl $0, %eax
1024:	andl $3, %ecx
103	jz 7f
104	cmpl $2, %ecx
105	jb 5f
106	movw (%esi),%cx
107	leal 2(%esi),%esi
108	je 6f
109	shll $16,%ecx
1105:	movb (%esi),%cl
1116:	addl %ecx,%eax
112	adcl $0, %eax
1137:
114	popl %ebx
115	popl %esi
116	ret
117
118#else
119
120/* Version for PentiumII/PPro */
121
122csum_partial:
123	pushl %esi
124	pushl %ebx
125	movl 20(%esp),%eax	# Function arg: unsigned int sum
126	movl 16(%esp),%ecx	# Function arg: int len
127	movl 12(%esp),%esi	# Function arg:	const unsigned char *buf
128
129	testl $2, %esi
130	jnz 30f
13110:
132	movl %ecx, %edx
133	movl %ecx, %ebx
134	andl $0x7c, %ebx
135	shrl $7, %ecx
136	addl %ebx,%esi
137	shrl $2, %ebx
138	negl %ebx
139	lea 45f(%ebx,%ebx,2), %ebx
140	testl %esi, %esi
141	jmp *%ebx
142
143	# Handle 2-byte-aligned regions
14420:	addw (%esi), %ax
145	lea 2(%esi), %esi
146	adcl $0, %eax
147	jmp 10b
148
14930:	subl $2, %ecx
150	ja 20b
151	je 32f
152	movzbl (%esi),%ebx	# csumming 1 byte, 2-aligned
153	addl %ebx, %eax
154	adcl $0, %eax
155	jmp 80f
15632:
157	addw (%esi), %ax	# csumming 2 bytes, 2-aligned
158	adcl $0, %eax
159	jmp 80f
160
16140:
162	addl -128(%esi), %eax
163	adcl -124(%esi), %eax
164	adcl -120(%esi), %eax
165	adcl -116(%esi), %eax
166	adcl -112(%esi), %eax
167	adcl -108(%esi), %eax
168	adcl -104(%esi), %eax
169	adcl -100(%esi), %eax
170	adcl -96(%esi), %eax
171	adcl -92(%esi), %eax
172	adcl -88(%esi), %eax
173	adcl -84(%esi), %eax
174	adcl -80(%esi), %eax
175	adcl -76(%esi), %eax
176	adcl -72(%esi), %eax
177	adcl -68(%esi), %eax
178	adcl -64(%esi), %eax
179	adcl -60(%esi), %eax
180	adcl -56(%esi), %eax
181	adcl -52(%esi), %eax
182	adcl -48(%esi), %eax
183	adcl -44(%esi), %eax
184	adcl -40(%esi), %eax
185	adcl -36(%esi), %eax
186	adcl -32(%esi), %eax
187	adcl -28(%esi), %eax
188	adcl -24(%esi), %eax
189	adcl -20(%esi), %eax
190	adcl -16(%esi), %eax
191	adcl -12(%esi), %eax
192	adcl -8(%esi), %eax
193	adcl -4(%esi), %eax
19445:
195	lea 128(%esi), %esi
196	adcl $0, %eax
197	dec %ecx
198	jge 40b
199	movl %edx, %ecx
20050:	andl $3, %ecx
201	jz 80f
202
203	# Handle the last 1-3 bytes without jumping
204	notl %ecx		# 1->2, 2->1, 3->0, higher bits are masked
205	movl $0xffffff,%ebx	# by the shll and shrl instructions
206	shll $3,%ecx
207	shrl %cl,%ebx
208	andl -128(%esi),%ebx	# esi is 4-aligned so should be ok
209	addl %ebx,%eax
210	adcl $0,%eax
21180:
212	popl %ebx
213	popl %esi
214	ret
215
216#endif
217