This source file includes following definitions.
- xor_pII_mmx_2
- xor_pII_mmx_3
- xor_pII_mmx_4
- xor_pII_mmx_5
- xor_p5_mmx_2
- xor_p5_mmx_3
- xor_p5_mmx_4
- xor_p5_mmx_5
1
2 #ifndef _ASM_X86_XOR_32_H
3 #define _ASM_X86_XOR_32_H
4
5
6
7
8
9
10
11
12
13
14 #define LD(x, y) " movq 8*("#x")(%1), %%mm"#y" ;\n"
15 #define ST(x, y) " movq %%mm"#y", 8*("#x")(%1) ;\n"
16 #define XO1(x, y) " pxor 8*("#x")(%2), %%mm"#y" ;\n"
17 #define XO2(x, y) " pxor 8*("#x")(%3), %%mm"#y" ;\n"
18 #define XO3(x, y) " pxor 8*("#x")(%4), %%mm"#y" ;\n"
19 #define XO4(x, y) " pxor 8*("#x")(%5), %%mm"#y" ;\n"
20
21 #include <asm/fpu/api.h>
22
23 static void
24 xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
25 {
26 unsigned long lines = bytes >> 7;
27
28 kernel_fpu_begin();
29
30 asm volatile(
31 #undef BLOCK
32 #define BLOCK(i) \
33 LD(i, 0) \
34 LD(i + 1, 1) \
35 LD(i + 2, 2) \
36 LD(i + 3, 3) \
37 XO1(i, 0) \
38 ST(i, 0) \
39 XO1(i+1, 1) \
40 ST(i+1, 1) \
41 XO1(i + 2, 2) \
42 ST(i + 2, 2) \
43 XO1(i + 3, 3) \
44 ST(i + 3, 3)
45
46 " .align 32 ;\n"
47 " 1: ;\n"
48
49 BLOCK(0)
50 BLOCK(4)
51 BLOCK(8)
52 BLOCK(12)
53
54 " addl $128, %1 ;\n"
55 " addl $128, %2 ;\n"
56 " decl %0 ;\n"
57 " jnz 1b ;\n"
58 : "+r" (lines),
59 "+r" (p1), "+r" (p2)
60 :
61 : "memory");
62
63 kernel_fpu_end();
64 }
65
66 static void
67 xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
68 unsigned long *p3)
69 {
70 unsigned long lines = bytes >> 7;
71
72 kernel_fpu_begin();
73
74 asm volatile(
75 #undef BLOCK
76 #define BLOCK(i) \
77 LD(i, 0) \
78 LD(i + 1, 1) \
79 LD(i + 2, 2) \
80 LD(i + 3, 3) \
81 XO1(i, 0) \
82 XO1(i + 1, 1) \
83 XO1(i + 2, 2) \
84 XO1(i + 3, 3) \
85 XO2(i, 0) \
86 ST(i, 0) \
87 XO2(i + 1, 1) \
88 ST(i + 1, 1) \
89 XO2(i + 2, 2) \
90 ST(i + 2, 2) \
91 XO2(i + 3, 3) \
92 ST(i + 3, 3)
93
94 " .align 32 ;\n"
95 " 1: ;\n"
96
97 BLOCK(0)
98 BLOCK(4)
99 BLOCK(8)
100 BLOCK(12)
101
102 " addl $128, %1 ;\n"
103 " addl $128, %2 ;\n"
104 " addl $128, %3 ;\n"
105 " decl %0 ;\n"
106 " jnz 1b ;\n"
107 : "+r" (lines),
108 "+r" (p1), "+r" (p2), "+r" (p3)
109 :
110 : "memory");
111
112 kernel_fpu_end();
113 }
114
115 static void
116 xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
117 unsigned long *p3, unsigned long *p4)
118 {
119 unsigned long lines = bytes >> 7;
120
121 kernel_fpu_begin();
122
123 asm volatile(
124 #undef BLOCK
125 #define BLOCK(i) \
126 LD(i, 0) \
127 LD(i + 1, 1) \
128 LD(i + 2, 2) \
129 LD(i + 3, 3) \
130 XO1(i, 0) \
131 XO1(i + 1, 1) \
132 XO1(i + 2, 2) \
133 XO1(i + 3, 3) \
134 XO2(i, 0) \
135 XO2(i + 1, 1) \
136 XO2(i + 2, 2) \
137 XO2(i + 3, 3) \
138 XO3(i, 0) \
139 ST(i, 0) \
140 XO3(i + 1, 1) \
141 ST(i + 1, 1) \
142 XO3(i + 2, 2) \
143 ST(i + 2, 2) \
144 XO3(i + 3, 3) \
145 ST(i + 3, 3)
146
147 " .align 32 ;\n"
148 " 1: ;\n"
149
150 BLOCK(0)
151 BLOCK(4)
152 BLOCK(8)
153 BLOCK(12)
154
155 " addl $128, %1 ;\n"
156 " addl $128, %2 ;\n"
157 " addl $128, %3 ;\n"
158 " addl $128, %4 ;\n"
159 " decl %0 ;\n"
160 " jnz 1b ;\n"
161 : "+r" (lines),
162 "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
163 :
164 : "memory");
165
166 kernel_fpu_end();
167 }
168
169
170 static void
171 xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
172 unsigned long *p3, unsigned long *p4, unsigned long *p5)
173 {
174 unsigned long lines = bytes >> 7;
175
176 kernel_fpu_begin();
177
178
179
180
181
182
183
184 asm("" : "+r" (p4), "+r" (p5));
185
186 asm volatile(
187 #undef BLOCK
188 #define BLOCK(i) \
189 LD(i, 0) \
190 LD(i + 1, 1) \
191 LD(i + 2, 2) \
192 LD(i + 3, 3) \
193 XO1(i, 0) \
194 XO1(i + 1, 1) \
195 XO1(i + 2, 2) \
196 XO1(i + 3, 3) \
197 XO2(i, 0) \
198 XO2(i + 1, 1) \
199 XO2(i + 2, 2) \
200 XO2(i + 3, 3) \
201 XO3(i, 0) \
202 XO3(i + 1, 1) \
203 XO3(i + 2, 2) \
204 XO3(i + 3, 3) \
205 XO4(i, 0) \
206 ST(i, 0) \
207 XO4(i + 1, 1) \
208 ST(i + 1, 1) \
209 XO4(i + 2, 2) \
210 ST(i + 2, 2) \
211 XO4(i + 3, 3) \
212 ST(i + 3, 3)
213
214 " .align 32 ;\n"
215 " 1: ;\n"
216
217 BLOCK(0)
218 BLOCK(4)
219 BLOCK(8)
220 BLOCK(12)
221
222 " addl $128, %1 ;\n"
223 " addl $128, %2 ;\n"
224 " addl $128, %3 ;\n"
225 " addl $128, %4 ;\n"
226 " addl $128, %5 ;\n"
227 " decl %0 ;\n"
228 " jnz 1b ;\n"
229 : "+r" (lines),
230 "+r" (p1), "+r" (p2), "+r" (p3)
231 : "r" (p4), "r" (p5)
232 : "memory");
233
234
235
236
237 asm("" : "=r" (p4), "=r" (p5));
238
239 kernel_fpu_end();
240 }
241
242 #undef LD
243 #undef XO1
244 #undef XO2
245 #undef XO3
246 #undef XO4
247 #undef ST
248 #undef BLOCK
249
250 static void
251 xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
252 {
253 unsigned long lines = bytes >> 6;
254
255 kernel_fpu_begin();
256
257 asm volatile(
258 " .align 32 ;\n"
259 " 1: ;\n"
260 " movq (%1), %%mm0 ;\n"
261 " movq 8(%1), %%mm1 ;\n"
262 " pxor (%2), %%mm0 ;\n"
263 " movq 16(%1), %%mm2 ;\n"
264 " movq %%mm0, (%1) ;\n"
265 " pxor 8(%2), %%mm1 ;\n"
266 " movq 24(%1), %%mm3 ;\n"
267 " movq %%mm1, 8(%1) ;\n"
268 " pxor 16(%2), %%mm2 ;\n"
269 " movq 32(%1), %%mm4 ;\n"
270 " movq %%mm2, 16(%1) ;\n"
271 " pxor 24(%2), %%mm3 ;\n"
272 " movq 40(%1), %%mm5 ;\n"
273 " movq %%mm3, 24(%1) ;\n"
274 " pxor 32(%2), %%mm4 ;\n"
275 " movq 48(%1), %%mm6 ;\n"
276 " movq %%mm4, 32(%1) ;\n"
277 " pxor 40(%2), %%mm5 ;\n"
278 " movq 56(%1), %%mm7 ;\n"
279 " movq %%mm5, 40(%1) ;\n"
280 " pxor 48(%2), %%mm6 ;\n"
281 " pxor 56(%2), %%mm7 ;\n"
282 " movq %%mm6, 48(%1) ;\n"
283 " movq %%mm7, 56(%1) ;\n"
284
285 " addl $64, %1 ;\n"
286 " addl $64, %2 ;\n"
287 " decl %0 ;\n"
288 " jnz 1b ;\n"
289 : "+r" (lines),
290 "+r" (p1), "+r" (p2)
291 :
292 : "memory");
293
294 kernel_fpu_end();
295 }
296
297 static void
298 xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
299 unsigned long *p3)
300 {
301 unsigned long lines = bytes >> 6;
302
303 kernel_fpu_begin();
304
305 asm volatile(
306 " .align 32,0x90 ;\n"
307 " 1: ;\n"
308 " movq (%1), %%mm0 ;\n"
309 " movq 8(%1), %%mm1 ;\n"
310 " pxor (%2), %%mm0 ;\n"
311 " movq 16(%1), %%mm2 ;\n"
312 " pxor 8(%2), %%mm1 ;\n"
313 " pxor (%3), %%mm0 ;\n"
314 " pxor 16(%2), %%mm2 ;\n"
315 " movq %%mm0, (%1) ;\n"
316 " pxor 8(%3), %%mm1 ;\n"
317 " pxor 16(%3), %%mm2 ;\n"
318 " movq 24(%1), %%mm3 ;\n"
319 " movq %%mm1, 8(%1) ;\n"
320 " movq 32(%1), %%mm4 ;\n"
321 " movq 40(%1), %%mm5 ;\n"
322 " pxor 24(%2), %%mm3 ;\n"
323 " movq %%mm2, 16(%1) ;\n"
324 " pxor 32(%2), %%mm4 ;\n"
325 " pxor 24(%3), %%mm3 ;\n"
326 " pxor 40(%2), %%mm5 ;\n"
327 " movq %%mm3, 24(%1) ;\n"
328 " pxor 32(%3), %%mm4 ;\n"
329 " pxor 40(%3), %%mm5 ;\n"
330 " movq 48(%1), %%mm6 ;\n"
331 " movq %%mm4, 32(%1) ;\n"
332 " movq 56(%1), %%mm7 ;\n"
333 " pxor 48(%2), %%mm6 ;\n"
334 " movq %%mm5, 40(%1) ;\n"
335 " pxor 56(%2), %%mm7 ;\n"
336 " pxor 48(%3), %%mm6 ;\n"
337 " pxor 56(%3), %%mm7 ;\n"
338 " movq %%mm6, 48(%1) ;\n"
339 " movq %%mm7, 56(%1) ;\n"
340
341 " addl $64, %1 ;\n"
342 " addl $64, %2 ;\n"
343 " addl $64, %3 ;\n"
344 " decl %0 ;\n"
345 " jnz 1b ;\n"
346 : "+r" (lines),
347 "+r" (p1), "+r" (p2), "+r" (p3)
348 :
349 : "memory" );
350
351 kernel_fpu_end();
352 }
353
354 static void
355 xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
356 unsigned long *p3, unsigned long *p4)
357 {
358 unsigned long lines = bytes >> 6;
359
360 kernel_fpu_begin();
361
362 asm volatile(
363 " .align 32,0x90 ;\n"
364 " 1: ;\n"
365 " movq (%1), %%mm0 ;\n"
366 " movq 8(%1), %%mm1 ;\n"
367 " pxor (%2), %%mm0 ;\n"
368 " movq 16(%1), %%mm2 ;\n"
369 " pxor 8(%2), %%mm1 ;\n"
370 " pxor (%3), %%mm0 ;\n"
371 " pxor 16(%2), %%mm2 ;\n"
372 " pxor 8(%3), %%mm1 ;\n"
373 " pxor (%4), %%mm0 ;\n"
374 " movq 24(%1), %%mm3 ;\n"
375 " pxor 16(%3), %%mm2 ;\n"
376 " pxor 8(%4), %%mm1 ;\n"
377 " movq %%mm0, (%1) ;\n"
378 " movq 32(%1), %%mm4 ;\n"
379 " pxor 24(%2), %%mm3 ;\n"
380 " pxor 16(%4), %%mm2 ;\n"
381 " movq %%mm1, 8(%1) ;\n"
382 " movq 40(%1), %%mm5 ;\n"
383 " pxor 32(%2), %%mm4 ;\n"
384 " pxor 24(%3), %%mm3 ;\n"
385 " movq %%mm2, 16(%1) ;\n"
386 " pxor 40(%2), %%mm5 ;\n"
387 " pxor 32(%3), %%mm4 ;\n"
388 " pxor 24(%4), %%mm3 ;\n"
389 " movq %%mm3, 24(%1) ;\n"
390 " movq 56(%1), %%mm7 ;\n"
391 " movq 48(%1), %%mm6 ;\n"
392 " pxor 40(%3), %%mm5 ;\n"
393 " pxor 32(%4), %%mm4 ;\n"
394 " pxor 48(%2), %%mm6 ;\n"
395 " movq %%mm4, 32(%1) ;\n"
396 " pxor 56(%2), %%mm7 ;\n"
397 " pxor 40(%4), %%mm5 ;\n"
398 " pxor 48(%3), %%mm6 ;\n"
399 " pxor 56(%3), %%mm7 ;\n"
400 " movq %%mm5, 40(%1) ;\n"
401 " pxor 48(%4), %%mm6 ;\n"
402 " pxor 56(%4), %%mm7 ;\n"
403 " movq %%mm6, 48(%1) ;\n"
404 " movq %%mm7, 56(%1) ;\n"
405
406 " addl $64, %1 ;\n"
407 " addl $64, %2 ;\n"
408 " addl $64, %3 ;\n"
409 " addl $64, %4 ;\n"
410 " decl %0 ;\n"
411 " jnz 1b ;\n"
412 : "+r" (lines),
413 "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
414 :
415 : "memory");
416
417 kernel_fpu_end();
418 }
419
420 static void
421 xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
422 unsigned long *p3, unsigned long *p4, unsigned long *p5)
423 {
424 unsigned long lines = bytes >> 6;
425
426 kernel_fpu_begin();
427
428
429
430
431
432
433
434 asm("" : "+r" (p4), "+r" (p5));
435
436 asm volatile(
437 " .align 32,0x90 ;\n"
438 " 1: ;\n"
439 " movq (%1), %%mm0 ;\n"
440 " movq 8(%1), %%mm1 ;\n"
441 " pxor (%2), %%mm0 ;\n"
442 " pxor 8(%2), %%mm1 ;\n"
443 " movq 16(%1), %%mm2 ;\n"
444 " pxor (%3), %%mm0 ;\n"
445 " pxor 8(%3), %%mm1 ;\n"
446 " pxor 16(%2), %%mm2 ;\n"
447 " pxor (%4), %%mm0 ;\n"
448 " pxor 8(%4), %%mm1 ;\n"
449 " pxor 16(%3), %%mm2 ;\n"
450 " movq 24(%1), %%mm3 ;\n"
451 " pxor (%5), %%mm0 ;\n"
452 " pxor 8(%5), %%mm1 ;\n"
453 " movq %%mm0, (%1) ;\n"
454 " pxor 16(%4), %%mm2 ;\n"
455 " pxor 24(%2), %%mm3 ;\n"
456 " movq %%mm1, 8(%1) ;\n"
457 " pxor 16(%5), %%mm2 ;\n"
458 " pxor 24(%3), %%mm3 ;\n"
459 " movq 32(%1), %%mm4 ;\n"
460 " movq %%mm2, 16(%1) ;\n"
461 " pxor 24(%4), %%mm3 ;\n"
462 " pxor 32(%2), %%mm4 ;\n"
463 " movq 40(%1), %%mm5 ;\n"
464 " pxor 24(%5), %%mm3 ;\n"
465 " pxor 32(%3), %%mm4 ;\n"
466 " pxor 40(%2), %%mm5 ;\n"
467 " movq %%mm3, 24(%1) ;\n"
468 " pxor 32(%4), %%mm4 ;\n"
469 " pxor 40(%3), %%mm5 ;\n"
470 " movq 48(%1), %%mm6 ;\n"
471 " movq 56(%1), %%mm7 ;\n"
472 " pxor 32(%5), %%mm4 ;\n"
473 " pxor 40(%4), %%mm5 ;\n"
474 " pxor 48(%2), %%mm6 ;\n"
475 " pxor 56(%2), %%mm7 ;\n"
476 " movq %%mm4, 32(%1) ;\n"
477 " pxor 48(%3), %%mm6 ;\n"
478 " pxor 56(%3), %%mm7 ;\n"
479 " pxor 40(%5), %%mm5 ;\n"
480 " pxor 48(%4), %%mm6 ;\n"
481 " pxor 56(%4), %%mm7 ;\n"
482 " movq %%mm5, 40(%1) ;\n"
483 " pxor 48(%5), %%mm6 ;\n"
484 " pxor 56(%5), %%mm7 ;\n"
485 " movq %%mm6, 48(%1) ;\n"
486 " movq %%mm7, 56(%1) ;\n"
487
488 " addl $64, %1 ;\n"
489 " addl $64, %2 ;\n"
490 " addl $64, %3 ;\n"
491 " addl $64, %4 ;\n"
492 " addl $64, %5 ;\n"
493 " decl %0 ;\n"
494 " jnz 1b ;\n"
495 : "+r" (lines),
496 "+r" (p1), "+r" (p2), "+r" (p3)
497 : "r" (p4), "r" (p5)
498 : "memory");
499
500
501
502
503 asm("" : "=r" (p4), "=r" (p5));
504
505 kernel_fpu_end();
506 }
507
508 static struct xor_block_template xor_block_pII_mmx = {
509 .name = "pII_mmx",
510 .do_2 = xor_pII_mmx_2,
511 .do_3 = xor_pII_mmx_3,
512 .do_4 = xor_pII_mmx_4,
513 .do_5 = xor_pII_mmx_5,
514 };
515
516 static struct xor_block_template xor_block_p5_mmx = {
517 .name = "p5_mmx",
518 .do_2 = xor_p5_mmx_2,
519 .do_3 = xor_p5_mmx_3,
520 .do_4 = xor_p5_mmx_4,
521 .do_5 = xor_p5_mmx_5,
522 };
523
524 static struct xor_block_template xor_block_pIII_sse = {
525 .name = "pIII_sse",
526 .do_2 = xor_sse_2,
527 .do_3 = xor_sse_3,
528 .do_4 = xor_sse_4,
529 .do_5 = xor_sse_5,
530 };
531
532
533 #include <asm/xor_avx.h>
534
535
536 #include <asm-generic/xor.h>
537
538
539
540
541 #undef XOR_TRY_TEMPLATES
542 #define XOR_TRY_TEMPLATES \
543 do { \
544 AVX_XOR_SPEED; \
545 if (boot_cpu_has(X86_FEATURE_XMM)) { \
546 xor_speed(&xor_block_pIII_sse); \
547 xor_speed(&xor_block_sse_pf64); \
548 } else if (boot_cpu_has(X86_FEATURE_MMX)) { \
549 xor_speed(&xor_block_pII_mmx); \
550 xor_speed(&xor_block_p5_mmx); \
551 } else { \
552 xor_speed(&xor_block_8regs); \
553 xor_speed(&xor_block_8regs_p); \
554 xor_speed(&xor_block_32regs); \
555 xor_speed(&xor_block_32regs_p); \
556 } \
557 } while (0)
558
559 #endif