1
2
3
4
5
6
7
8 extern void xor_alpha_2(unsigned long, unsigned long *, unsigned long *);
9 extern void xor_alpha_3(unsigned long, unsigned long *, unsigned long *,
10 unsigned long *);
11 extern void xor_alpha_4(unsigned long, unsigned long *, unsigned long *,
12 unsigned long *, unsigned long *);
13 extern void xor_alpha_5(unsigned long, unsigned long *, unsigned long *,
14 unsigned long *, unsigned long *, unsigned long *);
15
16 extern void xor_alpha_prefetch_2(unsigned long, unsigned long *,
17 unsigned long *);
18 extern void xor_alpha_prefetch_3(unsigned long, unsigned long *,
19 unsigned long *, unsigned long *);
20 extern void xor_alpha_prefetch_4(unsigned long, unsigned long *,
21 unsigned long *, unsigned long *,
22 unsigned long *);
23 extern void xor_alpha_prefetch_5(unsigned long, unsigned long *,
24 unsigned long *, unsigned long *,
25 unsigned long *, unsigned long *);
26
27 asm(" \n\
28 .text \n\
29 .align 3 \n\
30 .ent xor_alpha_2 \n\
31 xor_alpha_2: \n\
32 .prologue 0 \n\
33 srl $16, 6, $16 \n\
34 .align 4 \n\
35 2: \n\
36 ldq $0,0($17) \n\
37 ldq $1,0($18) \n\
38 ldq $2,8($17) \n\
39 ldq $3,8($18) \n\
40 \n\
41 ldq $4,16($17) \n\
42 ldq $5,16($18) \n\
43 ldq $6,24($17) \n\
44 ldq $7,24($18) \n\
45 \n\
46 ldq $19,32($17) \n\
47 ldq $20,32($18) \n\
48 ldq $21,40($17) \n\
49 ldq $22,40($18) \n\
50 \n\
51 ldq $23,48($17) \n\
52 ldq $24,48($18) \n\
53 ldq $25,56($17) \n\
54 xor $0,$1,$0 # 7 cycles from $1 load \n\
55 \n\
56 ldq $27,56($18) \n\
57 xor $2,$3,$2 \n\
58 stq $0,0($17) \n\
59 xor $4,$5,$4 \n\
60 \n\
61 stq $2,8($17) \n\
62 xor $6,$7,$6 \n\
63 stq $4,16($17) \n\
64 xor $19,$20,$19 \n\
65 \n\
66 stq $6,24($17) \n\
67 xor $21,$22,$21 \n\
68 stq $19,32($17) \n\
69 xor $23,$24,$23 \n\
70 \n\
71 stq $21,40($17) \n\
72 xor $25,$27,$25 \n\
73 stq $23,48($17) \n\
74 subq $16,1,$16 \n\
75 \n\
76 stq $25,56($17) \n\
77 addq $17,64,$17 \n\
78 addq $18,64,$18 \n\
79 bgt $16,2b \n\
80 \n\
81 ret \n\
82 .end xor_alpha_2 \n\
83 \n\
84 .align 3 \n\
85 .ent xor_alpha_3 \n\
86 xor_alpha_3: \n\
87 .prologue 0 \n\
88 srl $16, 6, $16 \n\
89 .align 4 \n\
90 3: \n\
91 ldq $0,0($17) \n\
92 ldq $1,0($18) \n\
93 ldq $2,0($19) \n\
94 ldq $3,8($17) \n\
95 \n\
96 ldq $4,8($18) \n\
97 ldq $6,16($17) \n\
98 ldq $7,16($18) \n\
99 ldq $21,24($17) \n\
100 \n\
101 ldq $22,24($18) \n\
102 ldq $24,32($17) \n\
103 ldq $25,32($18) \n\
104 ldq $5,8($19) \n\
105 \n\
106 ldq $20,16($19) \n\
107 ldq $23,24($19) \n\
108 ldq $27,32($19) \n\
109 nop \n\
110 \n\
111 xor $0,$1,$1 # 8 cycles from $0 load \n\
112 xor $3,$4,$4 # 6 cycles from $4 load \n\
113 xor $6,$7,$7 # 6 cycles from $7 load \n\
114 xor $21,$22,$22 # 5 cycles from $22 load \n\
115 \n\
116 xor $1,$2,$2 # 9 cycles from $2 load \n\
117 xor $24,$25,$25 # 5 cycles from $25 load \n\
118 stq $2,0($17) \n\
119 xor $4,$5,$5 # 6 cycles from $5 load \n\
120 \n\
121 stq $5,8($17) \n\
122 xor $7,$20,$20 # 7 cycles from $20 load \n\
123 stq $20,16($17) \n\
124 xor $22,$23,$23 # 7 cycles from $23 load \n\
125 \n\
126 stq $23,24($17) \n\
127 xor $25,$27,$27 # 7 cycles from $27 load \n\
128 stq $27,32($17) \n\
129 nop \n\
130 \n\
131 ldq $0,40($17) \n\
132 ldq $1,40($18) \n\
133 ldq $3,48($17) \n\
134 ldq $4,48($18) \n\
135 \n\
136 ldq $6,56($17) \n\
137 ldq $7,56($18) \n\
138 ldq $2,40($19) \n\
139 ldq $5,48($19) \n\
140 \n\
141 ldq $20,56($19) \n\
142 xor $0,$1,$1 # 4 cycles from $1 load \n\
143 xor $3,$4,$4 # 5 cycles from $4 load \n\
144 xor $6,$7,$7 # 5 cycles from $7 load \n\
145 \n\
146 xor $1,$2,$2 # 4 cycles from $2 load \n\
147 xor $4,$5,$5 # 5 cycles from $5 load \n\
148 stq $2,40($17) \n\
149 xor $7,$20,$20 # 4 cycles from $20 load \n\
150 \n\
151 stq $5,48($17) \n\
152 subq $16,1,$16 \n\
153 stq $20,56($17) \n\
154 addq $19,64,$19 \n\
155 \n\
156 addq $18,64,$18 \n\
157 addq $17,64,$17 \n\
158 bgt $16,3b \n\
159 ret \n\
160 .end xor_alpha_3 \n\
161 \n\
162 .align 3 \n\
163 .ent xor_alpha_4 \n\
164 xor_alpha_4: \n\
165 .prologue 0 \n\
166 srl $16, 6, $16 \n\
167 .align 4 \n\
168 4: \n\
169 ldq $0,0($17) \n\
170 ldq $1,0($18) \n\
171 ldq $2,0($19) \n\
172 ldq $3,0($20) \n\
173 \n\
174 ldq $4,8($17) \n\
175 ldq $5,8($18) \n\
176 ldq $6,8($19) \n\
177 ldq $7,8($20) \n\
178 \n\
179 ldq $21,16($17) \n\
180 ldq $22,16($18) \n\
181 ldq $23,16($19) \n\
182 ldq $24,16($20) \n\
183 \n\
184 ldq $25,24($17) \n\
185 xor $0,$1,$1 # 6 cycles from $1 load \n\
186 ldq $27,24($18) \n\
187 xor $2,$3,$3 # 6 cycles from $3 load \n\
188 \n\
189 ldq $0,24($19) \n\
190 xor $1,$3,$3 \n\
191 ldq $1,24($20) \n\
192 xor $4,$5,$5 # 7 cycles from $5 load \n\
193 \n\
194 stq $3,0($17) \n\
195 xor $6,$7,$7 \n\
196 xor $21,$22,$22 # 7 cycles from $22 load \n\
197 xor $5,$7,$7 \n\
198 \n\
199 stq $7,8($17) \n\
200 xor $23,$24,$24 # 7 cycles from $24 load \n\
201 ldq $2,32($17) \n\
202 xor $22,$24,$24 \n\
203 \n\
204 ldq $3,32($18) \n\
205 ldq $4,32($19) \n\
206 ldq $5,32($20) \n\
207 xor $25,$27,$27 # 8 cycles from $27 load \n\
208 \n\
209 ldq $6,40($17) \n\
210 ldq $7,40($18) \n\
211 ldq $21,40($19) \n\
212 ldq $22,40($20) \n\
213 \n\
214 stq $24,16($17) \n\
215 xor $0,$1,$1 # 9 cycles from $1 load \n\
216 xor $2,$3,$3 # 5 cycles from $3 load \n\
217 xor $27,$1,$1 \n\
218 \n\
219 stq $1,24($17) \n\
220 xor $4,$5,$5 # 5 cycles from $5 load \n\
221 ldq $23,48($17) \n\
222 ldq $24,48($18) \n\
223 \n\
224 ldq $25,48($19) \n\
225 xor $3,$5,$5 \n\
226 ldq $27,48($20) \n\
227 ldq $0,56($17) \n\
228 \n\
229 ldq $1,56($18) \n\
230 ldq $2,56($19) \n\
231 xor $6,$7,$7 # 8 cycles from $6 load \n\
232 ldq $3,56($20) \n\
233 \n\
234 stq $5,32($17) \n\
235 xor $21,$22,$22 # 8 cycles from $22 load \n\
236 xor $7,$22,$22 \n\
237 xor $23,$24,$24 # 5 cycles from $24 load \n\
238 \n\
239 stq $22,40($17) \n\
240 xor $25,$27,$27 # 5 cycles from $27 load \n\
241 xor $24,$27,$27 \n\
242 xor $0,$1,$1 # 5 cycles from $1 load \n\
243 \n\
244 stq $27,48($17) \n\
245 xor $2,$3,$3 # 4 cycles from $3 load \n\
246 xor $1,$3,$3 \n\
247 subq $16,1,$16 \n\
248 \n\
249 stq $3,56($17) \n\
250 addq $20,64,$20 \n\
251 addq $19,64,$19 \n\
252 addq $18,64,$18 \n\
253 \n\
254 addq $17,64,$17 \n\
255 bgt $16,4b \n\
256 ret \n\
257 .end xor_alpha_4 \n\
258 \n\
259 .align 3 \n\
260 .ent xor_alpha_5 \n\
261 xor_alpha_5: \n\
262 .prologue 0 \n\
263 srl $16, 6, $16 \n\
264 .align 4 \n\
265 5: \n\
266 ldq $0,0($17) \n\
267 ldq $1,0($18) \n\
268 ldq $2,0($19) \n\
269 ldq $3,0($20) \n\
270 \n\
271 ldq $4,0($21) \n\
272 ldq $5,8($17) \n\
273 ldq $6,8($18) \n\
274 ldq $7,8($19) \n\
275 \n\
276 ldq $22,8($20) \n\
277 ldq $23,8($21) \n\
278 ldq $24,16($17) \n\
279 ldq $25,16($18) \n\
280 \n\
281 ldq $27,16($19) \n\
282 xor $0,$1,$1 # 6 cycles from $1 load \n\
283 ldq $28,16($20) \n\
284 xor $2,$3,$3 # 6 cycles from $3 load \n\
285 \n\
286 ldq $0,16($21) \n\
287 xor $1,$3,$3 \n\
288 ldq $1,24($17) \n\
289 xor $3,$4,$4 # 7 cycles from $4 load \n\
290 \n\
291 stq $4,0($17) \n\
292 xor $5,$6,$6 # 7 cycles from $6 load \n\
293 xor $7,$22,$22 # 7 cycles from $22 load \n\
294 xor $6,$23,$23 # 7 cycles from $23 load \n\
295 \n\
296 ldq $2,24($18) \n\
297 xor $22,$23,$23 \n\
298 ldq $3,24($19) \n\
299 xor $24,$25,$25 # 8 cycles from $25 load \n\
300 \n\
301 stq $23,8($17) \n\
302 xor $25,$27,$27 # 8 cycles from $27 load \n\
303 ldq $4,24($20) \n\
304 xor $28,$0,$0 # 7 cycles from $0 load \n\
305 \n\
306 ldq $5,24($21) \n\
307 xor $27,$0,$0 \n\
308 ldq $6,32($17) \n\
309 ldq $7,32($18) \n\
310 \n\
311 stq $0,16($17) \n\
312 xor $1,$2,$2 # 6 cycles from $2 load \n\
313 ldq $22,32($19) \n\
314 xor $3,$4,$4 # 4 cycles from $4 load \n\
315 \n\
316 ldq $23,32($20) \n\
317 xor $2,$4,$4 \n\
318 ldq $24,32($21) \n\
319 ldq $25,40($17) \n\
320 \n\
321 ldq $27,40($18) \n\
322 ldq $28,40($19) \n\
323 ldq $0,40($20) \n\
324 xor $4,$5,$5 # 7 cycles from $5 load \n\
325 \n\
326 stq $5,24($17) \n\
327 xor $6,$7,$7 # 7 cycles from $7 load \n\
328 ldq $1,40($21) \n\
329 ldq $2,48($17) \n\
330 \n\
331 ldq $3,48($18) \n\
332 xor $7,$22,$22 # 7 cycles from $22 load \n\
333 ldq $4,48($19) \n\
334 xor $23,$24,$24 # 6 cycles from $24 load \n\
335 \n\
336 ldq $5,48($20) \n\
337 xor $22,$24,$24 \n\
338 ldq $6,48($21) \n\
339 xor $25,$27,$27 # 7 cycles from $27 load \n\
340 \n\
341 stq $24,32($17) \n\
342 xor $27,$28,$28 # 8 cycles from $28 load \n\
343 ldq $7,56($17) \n\
344 xor $0,$1,$1 # 6 cycles from $1 load \n\
345 \n\
346 ldq $22,56($18) \n\
347 ldq $23,56($19) \n\
348 ldq $24,56($20) \n\
349 ldq $25,56($21) \n\
350 \n\
351 xor $28,$1,$1 \n\
352 xor $2,$3,$3 # 9 cycles from $3 load \n\
353 xor $3,$4,$4 # 9 cycles from $4 load \n\
354 xor $5,$6,$6 # 8 cycles from $6 load \n\
355 \n\
356 stq $1,40($17) \n\
357 xor $4,$6,$6 \n\
358 xor $7,$22,$22 # 7 cycles from $22 load \n\
359 xor $23,$24,$24 # 6 cycles from $24 load \n\
360 \n\
361 stq $6,48($17) \n\
362 xor $22,$24,$24 \n\
363 subq $16,1,$16 \n\
364 xor $24,$25,$25 # 8 cycles from $25 load \n\
365 \n\
366 stq $25,56($17) \n\
367 addq $21,64,$21 \n\
368 addq $20,64,$20 \n\
369 addq $19,64,$19 \n\
370 \n\
371 addq $18,64,$18 \n\
372 addq $17,64,$17 \n\
373 bgt $16,5b \n\
374 ret \n\
375 .end xor_alpha_5 \n\
376 \n\
377 .align 3 \n\
378 .ent xor_alpha_prefetch_2 \n\
379 xor_alpha_prefetch_2: \n\
380 .prologue 0 \n\
381 srl $16, 6, $16 \n\
382 \n\
383 ldq $31, 0($17) \n\
384 ldq $31, 0($18) \n\
385 \n\
386 ldq $31, 64($17) \n\
387 ldq $31, 64($18) \n\
388 \n\
389 ldq $31, 128($17) \n\
390 ldq $31, 128($18) \n\
391 \n\
392 ldq $31, 192($17) \n\
393 ldq $31, 192($18) \n\
394 .align 4 \n\
395 2: \n\
396 ldq $0,0($17) \n\
397 ldq $1,0($18) \n\
398 ldq $2,8($17) \n\
399 ldq $3,8($18) \n\
400 \n\
401 ldq $4,16($17) \n\
402 ldq $5,16($18) \n\
403 ldq $6,24($17) \n\
404 ldq $7,24($18) \n\
405 \n\
406 ldq $19,32($17) \n\
407 ldq $20,32($18) \n\
408 ldq $21,40($17) \n\
409 ldq $22,40($18) \n\
410 \n\
411 ldq $23,48($17) \n\
412 ldq $24,48($18) \n\
413 ldq $25,56($17) \n\
414 ldq $27,56($18) \n\
415 \n\
416 ldq $31,256($17) \n\
417 xor $0,$1,$0 # 8 cycles from $1 load \n\
418 ldq $31,256($18) \n\
419 xor $2,$3,$2 \n\
420 \n\
421 stq $0,0($17) \n\
422 xor $4,$5,$4 \n\
423 stq $2,8($17) \n\
424 xor $6,$7,$6 \n\
425 \n\
426 stq $4,16($17) \n\
427 xor $19,$20,$19 \n\
428 stq $6,24($17) \n\
429 xor $21,$22,$21 \n\
430 \n\
431 stq $19,32($17) \n\
432 xor $23,$24,$23 \n\
433 stq $21,40($17) \n\
434 xor $25,$27,$25 \n\
435 \n\
436 stq $23,48($17) \n\
437 subq $16,1,$16 \n\
438 stq $25,56($17) \n\
439 addq $17,64,$17 \n\
440 \n\
441 addq $18,64,$18 \n\
442 bgt $16,2b \n\
443 ret \n\
444 .end xor_alpha_prefetch_2 \n\
445 \n\
446 .align 3 \n\
447 .ent xor_alpha_prefetch_3 \n\
448 xor_alpha_prefetch_3: \n\
449 .prologue 0 \n\
450 srl $16, 6, $16 \n\
451 \n\
452 ldq $31, 0($17) \n\
453 ldq $31, 0($18) \n\
454 ldq $31, 0($19) \n\
455 \n\
456 ldq $31, 64($17) \n\
457 ldq $31, 64($18) \n\
458 ldq $31, 64($19) \n\
459 \n\
460 ldq $31, 128($17) \n\
461 ldq $31, 128($18) \n\
462 ldq $31, 128($19) \n\
463 \n\
464 ldq $31, 192($17) \n\
465 ldq $31, 192($18) \n\
466 ldq $31, 192($19) \n\
467 .align 4 \n\
468 3: \n\
469 ldq $0,0($17) \n\
470 ldq $1,0($18) \n\
471 ldq $2,0($19) \n\
472 ldq $3,8($17) \n\
473 \n\
474 ldq $4,8($18) \n\
475 ldq $6,16($17) \n\
476 ldq $7,16($18) \n\
477 ldq $21,24($17) \n\
478 \n\
479 ldq $22,24($18) \n\
480 ldq $24,32($17) \n\
481 ldq $25,32($18) \n\
482 ldq $5,8($19) \n\
483 \n\
484 ldq $20,16($19) \n\
485 ldq $23,24($19) \n\
486 ldq $27,32($19) \n\
487 nop \n\
488 \n\
489 xor $0,$1,$1 # 8 cycles from $0 load \n\
490 xor $3,$4,$4 # 7 cycles from $4 load \n\
491 xor $6,$7,$7 # 6 cycles from $7 load \n\
492 xor $21,$22,$22 # 5 cycles from $22 load \n\
493 \n\
494 xor $1,$2,$2 # 9 cycles from $2 load \n\
495 xor $24,$25,$25 # 5 cycles from $25 load \n\
496 stq $2,0($17) \n\
497 xor $4,$5,$5 # 6 cycles from $5 load \n\
498 \n\
499 stq $5,8($17) \n\
500 xor $7,$20,$20 # 7 cycles from $20 load \n\
501 stq $20,16($17) \n\
502 xor $22,$23,$23 # 7 cycles from $23 load \n\
503 \n\
504 stq $23,24($17) \n\
505 xor $25,$27,$27 # 7 cycles from $27 load \n\
506 stq $27,32($17) \n\
507 nop \n\
508 \n\
509 ldq $0,40($17) \n\
510 ldq $1,40($18) \n\
511 ldq $3,48($17) \n\
512 ldq $4,48($18) \n\
513 \n\
514 ldq $6,56($17) \n\
515 ldq $7,56($18) \n\
516 ldq $2,40($19) \n\
517 ldq $5,48($19) \n\
518 \n\
519 ldq $20,56($19) \n\
520 ldq $31,256($17) \n\
521 ldq $31,256($18) \n\
522 ldq $31,256($19) \n\
523 \n\
524 xor $0,$1,$1 # 6 cycles from $1 load \n\
525 xor $3,$4,$4 # 5 cycles from $4 load \n\
526 xor $6,$7,$7 # 5 cycles from $7 load \n\
527 xor $1,$2,$2 # 4 cycles from $2 load \n\
528 \n\
529 xor $4,$5,$5 # 5 cycles from $5 load \n\
530 xor $7,$20,$20 # 4 cycles from $20 load \n\
531 stq $2,40($17) \n\
532 subq $16,1,$16 \n\
533 \n\
534 stq $5,48($17) \n\
535 addq $19,64,$19 \n\
536 stq $20,56($17) \n\
537 addq $18,64,$18 \n\
538 \n\
539 addq $17,64,$17 \n\
540 bgt $16,3b \n\
541 ret \n\
542 .end xor_alpha_prefetch_3 \n\
543 \n\
544 .align 3 \n\
545 .ent xor_alpha_prefetch_4 \n\
546 xor_alpha_prefetch_4: \n\
547 .prologue 0 \n\
548 srl $16, 6, $16 \n\
549 \n\
550 ldq $31, 0($17) \n\
551 ldq $31, 0($18) \n\
552 ldq $31, 0($19) \n\
553 ldq $31, 0($20) \n\
554 \n\
555 ldq $31, 64($17) \n\
556 ldq $31, 64($18) \n\
557 ldq $31, 64($19) \n\
558 ldq $31, 64($20) \n\
559 \n\
560 ldq $31, 128($17) \n\
561 ldq $31, 128($18) \n\
562 ldq $31, 128($19) \n\
563 ldq $31, 128($20) \n\
564 \n\
565 ldq $31, 192($17) \n\
566 ldq $31, 192($18) \n\
567 ldq $31, 192($19) \n\
568 ldq $31, 192($20) \n\
569 .align 4 \n\
570 4: \n\
571 ldq $0,0($17) \n\
572 ldq $1,0($18) \n\
573 ldq $2,0($19) \n\
574 ldq $3,0($20) \n\
575 \n\
576 ldq $4,8($17) \n\
577 ldq $5,8($18) \n\
578 ldq $6,8($19) \n\
579 ldq $7,8($20) \n\
580 \n\
581 ldq $21,16($17) \n\
582 ldq $22,16($18) \n\
583 ldq $23,16($19) \n\
584 ldq $24,16($20) \n\
585 \n\
586 ldq $25,24($17) \n\
587 xor $0,$1,$1 # 6 cycles from $1 load \n\
588 ldq $27,24($18) \n\
589 xor $2,$3,$3 # 6 cycles from $3 load \n\
590 \n\
591 ldq $0,24($19) \n\
592 xor $1,$3,$3 \n\
593 ldq $1,24($20) \n\
594 xor $4,$5,$5 # 7 cycles from $5 load \n\
595 \n\
596 stq $3,0($17) \n\
597 xor $6,$7,$7 \n\
598 xor $21,$22,$22 # 7 cycles from $22 load \n\
599 xor $5,$7,$7 \n\
600 \n\
601 stq $7,8($17) \n\
602 xor $23,$24,$24 # 7 cycles from $24 load \n\
603 ldq $2,32($17) \n\
604 xor $22,$24,$24 \n\
605 \n\
606 ldq $3,32($18) \n\
607 ldq $4,32($19) \n\
608 ldq $5,32($20) \n\
609 xor $25,$27,$27 # 8 cycles from $27 load \n\
610 \n\
611 ldq $6,40($17) \n\
612 ldq $7,40($18) \n\
613 ldq $21,40($19) \n\
614 ldq $22,40($20) \n\
615 \n\
616 stq $24,16($17) \n\
617 xor $0,$1,$1 # 9 cycles from $1 load \n\
618 xor $2,$3,$3 # 5 cycles from $3 load \n\
619 xor $27,$1,$1 \n\
620 \n\
621 stq $1,24($17) \n\
622 xor $4,$5,$5 # 5 cycles from $5 load \n\
623 ldq $23,48($17) \n\
624 xor $3,$5,$5 \n\
625 \n\
626 ldq $24,48($18) \n\
627 ldq $25,48($19) \n\
628 ldq $27,48($20) \n\
629 ldq $0,56($17) \n\
630 \n\
631 ldq $1,56($18) \n\
632 ldq $2,56($19) \n\
633 ldq $3,56($20) \n\
634 xor $6,$7,$7 # 8 cycles from $6 load \n\
635 \n\
636 ldq $31,256($17) \n\
637 xor $21,$22,$22 # 8 cycles from $22 load \n\
638 ldq $31,256($18) \n\
639 xor $7,$22,$22 \n\
640 \n\
641 ldq $31,256($19) \n\
642 xor $23,$24,$24 # 6 cycles from $24 load \n\
643 ldq $31,256($20) \n\
644 xor $25,$27,$27 # 6 cycles from $27 load \n\
645 \n\
646 stq $5,32($17) \n\
647 xor $24,$27,$27 \n\
648 xor $0,$1,$1 # 7 cycles from $1 load \n\
649 xor $2,$3,$3 # 6 cycles from $3 load \n\
650 \n\
651 stq $22,40($17) \n\
652 xor $1,$3,$3 \n\
653 stq $27,48($17) \n\
654 subq $16,1,$16 \n\
655 \n\
656 stq $3,56($17) \n\
657 addq $20,64,$20 \n\
658 addq $19,64,$19 \n\
659 addq $18,64,$18 \n\
660 \n\
661 addq $17,64,$17 \n\
662 bgt $16,4b \n\
663 ret \n\
664 .end xor_alpha_prefetch_4 \n\
665 \n\
666 .align 3 \n\
667 .ent xor_alpha_prefetch_5 \n\
668 xor_alpha_prefetch_5: \n\
669 .prologue 0 \n\
670 srl $16, 6, $16 \n\
671 \n\
672 ldq $31, 0($17) \n\
673 ldq $31, 0($18) \n\
674 ldq $31, 0($19) \n\
675 ldq $31, 0($20) \n\
676 ldq $31, 0($21) \n\
677 \n\
678 ldq $31, 64($17) \n\
679 ldq $31, 64($18) \n\
680 ldq $31, 64($19) \n\
681 ldq $31, 64($20) \n\
682 ldq $31, 64($21) \n\
683 \n\
684 ldq $31, 128($17) \n\
685 ldq $31, 128($18) \n\
686 ldq $31, 128($19) \n\
687 ldq $31, 128($20) \n\
688 ldq $31, 128($21) \n\
689 \n\
690 ldq $31, 192($17) \n\
691 ldq $31, 192($18) \n\
692 ldq $31, 192($19) \n\
693 ldq $31, 192($20) \n\
694 ldq $31, 192($21) \n\
695 .align 4 \n\
696 5: \n\
697 ldq $0,0($17) \n\
698 ldq $1,0($18) \n\
699 ldq $2,0($19) \n\
700 ldq $3,0($20) \n\
701 \n\
702 ldq $4,0($21) \n\
703 ldq $5,8($17) \n\
704 ldq $6,8($18) \n\
705 ldq $7,8($19) \n\
706 \n\
707 ldq $22,8($20) \n\
708 ldq $23,8($21) \n\
709 ldq $24,16($17) \n\
710 ldq $25,16($18) \n\
711 \n\
712 ldq $27,16($19) \n\
713 xor $0,$1,$1 # 6 cycles from $1 load \n\
714 ldq $28,16($20) \n\
715 xor $2,$3,$3 # 6 cycles from $3 load \n\
716 \n\
717 ldq $0,16($21) \n\
718 xor $1,$3,$3 \n\
719 ldq $1,24($17) \n\
720 xor $3,$4,$4 # 7 cycles from $4 load \n\
721 \n\
722 stq $4,0($17) \n\
723 xor $5,$6,$6 # 7 cycles from $6 load \n\
724 xor $7,$22,$22 # 7 cycles from $22 load \n\
725 xor $6,$23,$23 # 7 cycles from $23 load \n\
726 \n\
727 ldq $2,24($18) \n\
728 xor $22,$23,$23 \n\
729 ldq $3,24($19) \n\
730 xor $24,$25,$25 # 8 cycles from $25 load \n\
731 \n\
732 stq $23,8($17) \n\
733 xor $25,$27,$27 # 8 cycles from $27 load \n\
734 ldq $4,24($20) \n\
735 xor $28,$0,$0 # 7 cycles from $0 load \n\
736 \n\
737 ldq $5,24($21) \n\
738 xor $27,$0,$0 \n\
739 ldq $6,32($17) \n\
740 ldq $7,32($18) \n\
741 \n\
742 stq $0,16($17) \n\
743 xor $1,$2,$2 # 6 cycles from $2 load \n\
744 ldq $22,32($19) \n\
745 xor $3,$4,$4 # 4 cycles from $4 load \n\
746 \n\
747 ldq $23,32($20) \n\
748 xor $2,$4,$4 \n\
749 ldq $24,32($21) \n\
750 ldq $25,40($17) \n\
751 \n\
752 ldq $27,40($18) \n\
753 ldq $28,40($19) \n\
754 ldq $0,40($20) \n\
755 xor $4,$5,$5 # 7 cycles from $5 load \n\
756 \n\
757 stq $5,24($17) \n\
758 xor $6,$7,$7 # 7 cycles from $7 load \n\
759 ldq $1,40($21) \n\
760 ldq $2,48($17) \n\
761 \n\
762 ldq $3,48($18) \n\
763 xor $7,$22,$22 # 7 cycles from $22 load \n\
764 ldq $4,48($19) \n\
765 xor $23,$24,$24 # 6 cycles from $24 load \n\
766 \n\
767 ldq $5,48($20) \n\
768 xor $22,$24,$24 \n\
769 ldq $6,48($21) \n\
770 xor $25,$27,$27 # 7 cycles from $27 load \n\
771 \n\
772 stq $24,32($17) \n\
773 xor $27,$28,$28 # 8 cycles from $28 load \n\
774 ldq $7,56($17) \n\
775 xor $0,$1,$1 # 6 cycles from $1 load \n\
776 \n\
777 ldq $22,56($18) \n\
778 ldq $23,56($19) \n\
779 ldq $24,56($20) \n\
780 ldq $25,56($21) \n\
781 \n\
782 ldq $31,256($17) \n\
783 xor $28,$1,$1 \n\
784 ldq $31,256($18) \n\
785 xor $2,$3,$3 # 9 cycles from $3 load \n\
786 \n\
787 ldq $31,256($19) \n\
788 xor $3,$4,$4 # 9 cycles from $4 load \n\
789 ldq $31,256($20) \n\
790 xor $5,$6,$6 # 8 cycles from $6 load \n\
791 \n\
792 stq $1,40($17) \n\
793 xor $4,$6,$6 \n\
794 xor $7,$22,$22 # 7 cycles from $22 load \n\
795 xor $23,$24,$24 # 6 cycles from $24 load \n\
796 \n\
797 stq $6,48($17) \n\
798 xor $22,$24,$24 \n\
799 ldq $31,256($21) \n\
800 xor $24,$25,$25 # 8 cycles from $25 load \n\
801 \n\
802 stq $25,56($17) \n\
803 subq $16,1,$16 \n\
804 addq $21,64,$21 \n\
805 addq $20,64,$20 \n\
806 \n\
807 addq $19,64,$19 \n\
808 addq $18,64,$18 \n\
809 addq $17,64,$17 \n\
810 bgt $16,5b \n\
811 \n\
812 ret \n\
813 .end xor_alpha_prefetch_5 \n\
814 ");
815
816 static struct xor_block_template xor_block_alpha = {
817 .name = "alpha",
818 .do_2 = xor_alpha_2,
819 .do_3 = xor_alpha_3,
820 .do_4 = xor_alpha_4,
821 .do_5 = xor_alpha_5,
822 };
823
824 static struct xor_block_template xor_block_alpha_prefetch = {
825 .name = "alpha prefetch",
826 .do_2 = xor_alpha_prefetch_2,
827 .do_3 = xor_alpha_prefetch_3,
828 .do_4 = xor_alpha_prefetch_4,
829 .do_5 = xor_alpha_prefetch_5,
830 };
831
832
833 #include <asm-generic/xor.h>
834
835 #undef XOR_TRY_TEMPLATES
836 #define XOR_TRY_TEMPLATES \
837 do { \
838 xor_speed(&xor_block_8regs); \
839 xor_speed(&xor_block_32regs); \
840 xor_speed(&xor_block_alpha); \
841 xor_speed(&xor_block_alpha_prefetch); \
842 } while (0)
843
844
845
846 #define XOR_SELECT_TEMPLATE(FASTEST) \
847 (implver() == IMPLVER_EV6 ? &xor_block_alpha_prefetch : FASTEST)