1
2
3
4
5
6
7
8
9
10
11 #include <linux/sys.h>
12 #include <asm/processor.h>
13 #include <asm/cache.h>
14 #include <asm/errno.h>
15 #include <asm/ppc_asm.h>
16 #include <asm/export.h>
17
18 .text
19
20
21
22
23
24
25
26 _GLOBAL(__csum_partial)
27 subi r3,r3,4
28 srawi. r6,r4,2
29 beq 3f
30 andi. r0,r3,2
31 beq+ 1f
32 lhz r0,4(r3)
33 subi r4,r4,2
34 addi r3,r3,2
35 srwi. r6,r4,2
36 adde r5,r5,r0
37 beq 3f
38 1: andi. r6,r6,3
39 beq 21f
40 mtctr r6
41 2: lwzu r0,4(r3)
42 adde r5,r5,r0
43 bdnz 2b
44 21: srwi. r6,r4,4
45 beq 3f
46 lwz r0,4(r3)
47 mtctr r6
48 lwz r6,8(r3)
49 adde r5,r5,r0
50 lwz r7,12(r3)
51 adde r5,r5,r6
52 lwzu r8,16(r3)
53 adde r5,r5,r7
54 bdz 23f
55 22: lwz r0,4(r3)
56 adde r5,r5,r8
57 lwz r6,8(r3)
58 adde r5,r5,r0
59 lwz r7,12(r3)
60 adde r5,r5,r6
61 lwzu r8,16(r3)
62 adde r5,r5,r7
63 bdnz 22b
64 23: adde r5,r5,r8
65 3: andi. r0,r4,2
66 beq+ 4f
67 lhz r0,4(r3)
68 addi r3,r3,2
69 adde r5,r5,r0
70 4: andi. r0,r4,1
71 beq+ 5f
72 lbz r0,4(r3)
73 slwi r0,r0,8
74 adde r5,r5,r0
75 5: addze r3,r5
76 blr
77 EXPORT_SYMBOL(__csum_partial)
78
79
80
81
82
83
84
85
86
87
88 #define CSUM_COPY_16_BYTES_WITHEX(n) \
89 8 ## n ## 0: \
90 lwz r7,4(r4); \
91 8 ## n ## 1: \
92 lwz r8,8(r4); \
93 8 ## n ## 2: \
94 lwz r9,12(r4); \
95 8 ## n ## 3: \
96 lwzu r10,16(r4); \
97 8 ## n ## 4: \
98 stw r7,4(r6); \
99 adde r12,r12,r7; \
100 8 ## n ## 5: \
101 stw r8,8(r6); \
102 adde r12,r12,r8; \
103 8 ## n ## 6: \
104 stw r9,12(r6); \
105 adde r12,r12,r9; \
106 8 ## n ## 7: \
107 stwu r10,16(r6); \
108 adde r12,r12,r10
109
110 #define CSUM_COPY_16_BYTES_EXCODE(n) \
111 EX_TABLE(8 ## n ## 0b, src_error); \
112 EX_TABLE(8 ## n ## 1b, src_error); \
113 EX_TABLE(8 ## n ## 2b, src_error); \
114 EX_TABLE(8 ## n ## 3b, src_error); \
115 EX_TABLE(8 ## n ## 4b, dst_error); \
116 EX_TABLE(8 ## n ## 5b, dst_error); \
117 EX_TABLE(8 ## n ## 6b, dst_error); \
118 EX_TABLE(8 ## n ## 7b, dst_error);
119
120 .text
121 .stabs "arch/powerpc/lib/",N_SO,0,0,0f
122 .stabs "checksum_32.S",N_SO,0,0,0f
123 0:
124
125 CACHELINE_BYTES = L1_CACHE_BYTES
126 LG_CACHELINE_BYTES = L1_CACHE_SHIFT
127 CACHELINE_MASK = (L1_CACHE_BYTES-1)
128
129 _GLOBAL(csum_partial_copy_generic)
130 stwu r1,-16(r1)
131 stw r7,12(r1)
132 stw r8,8(r1)
133
134 addic r12,r6,0
135 addi r6,r4,-4
136 neg r0,r4
137 addi r4,r3,-4
138 andi. r0,r0,CACHELINE_MASK
139 crset 4*cr7+eq
140 beq 58f
141
142 cmplw 0,r5,r0
143 blt 63f
144 rlwinm r7,r6,3,0x8
145 rlwnm r12,r12,r7,0,31
146 cmplwi cr7,r7,0
147 andi. r8,r0,3
148 mtctr r8
149 beq+ 61f
150 li r3,0
151 70: lbz r9,4(r4)
152 addi r4,r4,1
153 slwi r3,r3,8
154 rlwimi r3,r9,0,24,31
155 71: stb r9,4(r6)
156 addi r6,r6,1
157 bdnz 70b
158 adde r12,r12,r3
159 61: subf r5,r0,r5
160 srwi. r0,r0,2
161 mtctr r0
162 beq 58f
163 72: lwzu r9,4(r4)
164 adde r12,r12,r9
165 73: stwu r9,4(r6)
166 bdnz 72b
167
168 58: srwi. r0,r5,LG_CACHELINE_BYTES
169 clrlwi r5,r5,32-LG_CACHELINE_BYTES
170 li r11,4
171 beq 63f
172
173
174 li r3,4
175 cmpwi r0,1
176 li r7,0
177 ble 114f
178 li r7,1
179 #if MAX_COPY_PREFETCH > 1
180
181
182
183 cmpwi r0,MAX_COPY_PREFETCH
184 ble 112f
185 li r7,MAX_COPY_PREFETCH
186 112: mtctr r7
187 111: dcbt r3,r4
188 addi r3,r3,CACHELINE_BYTES
189 bdnz 111b
190 #else
191 dcbt r3,r4
192 addi r3,r3,CACHELINE_BYTES
193 #endif
194
195 114: subf r8,r7,r0
196 mr r0,r7
197 mtctr r8
198
199 53: dcbt r3,r4
200 54: dcbz r11,r6
201
202 CSUM_COPY_16_BYTES_WITHEX(0)
203 #if L1_CACHE_BYTES >= 32
204 CSUM_COPY_16_BYTES_WITHEX(1)
205 #if L1_CACHE_BYTES >= 64
206 CSUM_COPY_16_BYTES_WITHEX(2)
207 CSUM_COPY_16_BYTES_WITHEX(3)
208 #if L1_CACHE_BYTES >= 128
209 CSUM_COPY_16_BYTES_WITHEX(4)
210 CSUM_COPY_16_BYTES_WITHEX(5)
211 CSUM_COPY_16_BYTES_WITHEX(6)
212 CSUM_COPY_16_BYTES_WITHEX(7)
213 #endif
214 #endif
215 #endif
216 bdnz 53b
217 cmpwi r0,0
218 li r3,4
219 li r7,0
220 bne 114b
221
222 63: srwi. r0,r5,2
223 mtctr r0
224 beq 64f
225 30: lwzu r0,4(r4)
226 adde r12,r12,r0
227 31: stwu r0,4(r6)
228 bdnz 30b
229
230 64: andi. r0,r5,2
231 beq+ 65f
232 40: lhz r0,4(r4)
233 addi r4,r4,2
234 41: sth r0,4(r6)
235 adde r12,r12,r0
236 addi r6,r6,2
237 65: andi. r0,r5,1
238 beq+ 66f
239 50: lbz r0,4(r4)
240 51: stb r0,4(r6)
241 slwi r0,r0,8
242 adde r12,r12,r0
243 66: addze r3,r12
244 addi r1,r1,16
245 beqlr+ cr7
246 rlwinm r3,r3,8,0,31
247 blr
248
249
250 src_error:
251 lwz r7,12(r1)
252 addi r1,r1,16
253 cmpwi cr0,r7,0
254 beqlr
255 li r0,-EFAULT
256 stw r0,0(r7)
257 blr
258
259 dst_error:
260 lwz r8,8(r1)
261 addi r1,r1,16
262 cmpwi cr0,r8,0
263 beqlr
264 li r0,-EFAULT
265 stw r0,0(r8)
266 blr
267
268 EX_TABLE(70b, src_error);
269 EX_TABLE(71b, dst_error);
270 EX_TABLE(72b, src_error);
271 EX_TABLE(73b, dst_error);
272 EX_TABLE(54b, dst_error);
273
274
275
276
277
278 CSUM_COPY_16_BYTES_EXCODE(0)
279 #if L1_CACHE_BYTES >= 32
280 CSUM_COPY_16_BYTES_EXCODE(1)
281 #if L1_CACHE_BYTES >= 64
282 CSUM_COPY_16_BYTES_EXCODE(2)
283 CSUM_COPY_16_BYTES_EXCODE(3)
284 #if L1_CACHE_BYTES >= 128
285 CSUM_COPY_16_BYTES_EXCODE(4)
286 CSUM_COPY_16_BYTES_EXCODE(5)
287 CSUM_COPY_16_BYTES_EXCODE(6)
288 CSUM_COPY_16_BYTES_EXCODE(7)
289 #endif
290 #endif
291 #endif
292
293 EX_TABLE(30b, src_error);
294 EX_TABLE(31b, dst_error);
295 EX_TABLE(40b, src_error);
296 EX_TABLE(41b, dst_error);
297 EX_TABLE(50b, src_error);
298 EX_TABLE(51b, dst_error);
299
300 EXPORT_SYMBOL(csum_partial_copy_generic)
301
302
303
304
305
306
307
308 _GLOBAL(csum_ipv6_magic)
309 lwz r8, 0(r3)
310 lwz r9, 4(r3)
311 addc r0, r7, r8
312 lwz r10, 8(r3)
313 adde r0, r0, r9
314 lwz r11, 12(r3)
315 adde r0, r0, r10
316 lwz r8, 0(r4)
317 adde r0, r0, r11
318 lwz r9, 4(r4)
319 adde r0, r0, r8
320 lwz r10, 8(r4)
321 adde r0, r0, r9
322 lwz r11, 12(r4)
323 adde r0, r0, r10
324 add r5, r5, r6
325 adde r0, r0, r11
326 adde r0, r0, r5
327 addze r0, r0
328 rotlwi r3, r0, 16
329 add r3, r0, r3
330 not r3, r3
331 rlwinm r3, r3, 16, 16, 31
332 blr
333 EXPORT_SYMBOL(csum_ipv6_magic)