1
2
3
4
5
6
7
8 #include <asm/ppc_asm.h>
9
10 #ifndef SELFTEST_CASE
11
12 #define SELFTEST_CASE 0
13 #endif
14
15 #ifdef __BIG_ENDIAN__
16 #define LVS(VRT,RA,RB) lvsl VRT,RA,RB
17 #define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
18 #else
19 #define LVS(VRT,RA,RB) lvsr VRT,RA,RB
20 #define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC
21 #endif
22
23 .macro err1
24 100:
25 EX_TABLE(100b,.Ldo_err1)
26 .endm
27
28 .macro err2
29 200:
30 EX_TABLE(200b,.Ldo_err2)
31 .endm
32
33 #ifdef CONFIG_ALTIVEC
34 .macro err3
35 300:
36 EX_TABLE(300b,.Ldo_err3)
37 .endm
38
39 .macro err4
40 400:
41 EX_TABLE(400b,.Ldo_err4)
42 .endm
43
44
45 .Ldo_err4:
46 ld r16,STK_REG(R16)(r1)
47 ld r15,STK_REG(R15)(r1)
48 ld r14,STK_REG(R14)(r1)
49 .Ldo_err3:
50 bl exit_vmx_usercopy
51 ld r0,STACKFRAMESIZE+16(r1)
52 mtlr r0
53 b .Lexit
54 #endif
55
56 .Ldo_err2:
57 ld r22,STK_REG(R22)(r1)
58 ld r21,STK_REG(R21)(r1)
59 ld r20,STK_REG(R20)(r1)
60 ld r19,STK_REG(R19)(r1)
61 ld r18,STK_REG(R18)(r1)
62 ld r17,STK_REG(R17)(r1)
63 ld r16,STK_REG(R16)(r1)
64 ld r15,STK_REG(R15)(r1)
65 ld r14,STK_REG(R14)(r1)
66 .Lexit:
67 addi r1,r1,STACKFRAMESIZE
68 .Ldo_err1:
69 ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
70 ld r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
71 ld r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
72 b __copy_tofrom_user_base
73
74
75 _GLOBAL(__copy_tofrom_user_power7)
76 cmpldi r5,16
77 cmpldi cr1,r5,3328
78
79 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
80 std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
81 std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
82
83 blt .Lshort_copy
84
85 #ifdef CONFIG_ALTIVEC
86 test_feature = SELFTEST_CASE
87 BEGIN_FTR_SECTION
88 bgt cr1,.Lvmx_copy
89 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
90 #endif
91
92 .Lnonvmx_copy:
93
94 neg r6,r4
95 mtocrf 0x01,r6
96 clrldi r6,r6,(64-3)
97
98 bf cr7*4+3,1f
99 err1; lbz r0,0(r4)
100 addi r4,r4,1
101 err1; stb r0,0(r3)
102 addi r3,r3,1
103
104 1: bf cr7*4+2,2f
105 err1; lhz r0,0(r4)
106 addi r4,r4,2
107 err1; sth r0,0(r3)
108 addi r3,r3,2
109
110 2: bf cr7*4+1,3f
111 err1; lwz r0,0(r4)
112 addi r4,r4,4
113 err1; stw r0,0(r3)
114 addi r3,r3,4
115
116 3: sub r5,r5,r6
117 cmpldi r5,128
118 blt 5f
119
120 mflr r0
121 stdu r1,-STACKFRAMESIZE(r1)
122 std r14,STK_REG(R14)(r1)
123 std r15,STK_REG(R15)(r1)
124 std r16,STK_REG(R16)(r1)
125 std r17,STK_REG(R17)(r1)
126 std r18,STK_REG(R18)(r1)
127 std r19,STK_REG(R19)(r1)
128 std r20,STK_REG(R20)(r1)
129 std r21,STK_REG(R21)(r1)
130 std r22,STK_REG(R22)(r1)
131 std r0,STACKFRAMESIZE+16(r1)
132
133 srdi r6,r5,7
134 mtctr r6
135
136
137 .align 5
138 4:
139 err2; ld r0,0(r4)
140 err2; ld r6,8(r4)
141 err2; ld r7,16(r4)
142 err2; ld r8,24(r4)
143 err2; ld r9,32(r4)
144 err2; ld r10,40(r4)
145 err2; ld r11,48(r4)
146 err2; ld r12,56(r4)
147 err2; ld r14,64(r4)
148 err2; ld r15,72(r4)
149 err2; ld r16,80(r4)
150 err2; ld r17,88(r4)
151 err2; ld r18,96(r4)
152 err2; ld r19,104(r4)
153 err2; ld r20,112(r4)
154 err2; ld r21,120(r4)
155 addi r4,r4,128
156 err2; std r0,0(r3)
157 err2; std r6,8(r3)
158 err2; std r7,16(r3)
159 err2; std r8,24(r3)
160 err2; std r9,32(r3)
161 err2; std r10,40(r3)
162 err2; std r11,48(r3)
163 err2; std r12,56(r3)
164 err2; std r14,64(r3)
165 err2; std r15,72(r3)
166 err2; std r16,80(r3)
167 err2; std r17,88(r3)
168 err2; std r18,96(r3)
169 err2; std r19,104(r3)
170 err2; std r20,112(r3)
171 err2; std r21,120(r3)
172 addi r3,r3,128
173 bdnz 4b
174
175 clrldi r5,r5,(64-7)
176
177 ld r14,STK_REG(R14)(r1)
178 ld r15,STK_REG(R15)(r1)
179 ld r16,STK_REG(R16)(r1)
180 ld r17,STK_REG(R17)(r1)
181 ld r18,STK_REG(R18)(r1)
182 ld r19,STK_REG(R19)(r1)
183 ld r20,STK_REG(R20)(r1)
184 ld r21,STK_REG(R21)(r1)
185 ld r22,STK_REG(R22)(r1)
186 addi r1,r1,STACKFRAMESIZE
187
188
189 5: srdi r6,r5,4
190 mtocrf 0x01,r6
191
192 6: bf cr7*4+1,7f
193 err1; ld r0,0(r4)
194 err1; ld r6,8(r4)
195 err1; ld r7,16(r4)
196 err1; ld r8,24(r4)
197 err1; ld r9,32(r4)
198 err1; ld r10,40(r4)
199 err1; ld r11,48(r4)
200 err1; ld r12,56(r4)
201 addi r4,r4,64
202 err1; std r0,0(r3)
203 err1; std r6,8(r3)
204 err1; std r7,16(r3)
205 err1; std r8,24(r3)
206 err1; std r9,32(r3)
207 err1; std r10,40(r3)
208 err1; std r11,48(r3)
209 err1; std r12,56(r3)
210 addi r3,r3,64
211
212
213 7: bf cr7*4+2,8f
214 err1; ld r0,0(r4)
215 err1; ld r6,8(r4)
216 err1; ld r7,16(r4)
217 err1; ld r8,24(r4)
218 addi r4,r4,32
219 err1; std r0,0(r3)
220 err1; std r6,8(r3)
221 err1; std r7,16(r3)
222 err1; std r8,24(r3)
223 addi r3,r3,32
224
225
226 8: bf cr7*4+3,9f
227 err1; ld r0,0(r4)
228 err1; ld r6,8(r4)
229 addi r4,r4,16
230 err1; std r0,0(r3)
231 err1; std r6,8(r3)
232 addi r3,r3,16
233
234 9: clrldi r5,r5,(64-4)
235
236
237 .Lshort_copy:
238 mtocrf 0x01,r5
239 bf cr7*4+0,12f
240 err1; lwz r0,0(r4)
241 err1; lwz r6,4(r4)
242 addi r4,r4,8
243 err1; stw r0,0(r3)
244 err1; stw r6,4(r3)
245 addi r3,r3,8
246
247 12: bf cr7*4+1,13f
248 err1; lwz r0,0(r4)
249 addi r4,r4,4
250 err1; stw r0,0(r3)
251 addi r3,r3,4
252
253 13: bf cr7*4+2,14f
254 err1; lhz r0,0(r4)
255 addi r4,r4,2
256 err1; sth r0,0(r3)
257 addi r3,r3,2
258
259 14: bf cr7*4+3,15f
260 err1; lbz r0,0(r4)
261 err1; stb r0,0(r3)
262
263 15: li r3,0
264 blr
265
266 .Lunwind_stack_nonvmx_copy:
267 addi r1,r1,STACKFRAMESIZE
268 b .Lnonvmx_copy
269
270 .Lvmx_copy:
271 #ifdef CONFIG_ALTIVEC
272 mflr r0
273 std r0,16(r1)
274 stdu r1,-STACKFRAMESIZE(r1)
275 bl enter_vmx_usercopy
276 cmpwi cr1,r3,0
277 ld r0,STACKFRAMESIZE+16(r1)
278 ld r3,STK_REG(R31)(r1)
279 ld r4,STK_REG(R30)(r1)
280 ld r5,STK_REG(R29)(r1)
281 mtlr r0
282
283
284
285
286
287
288 clrrdi r6,r4,7
289 clrrdi r9,r3,7
290 ori r9,r9,1
291
292 srdi r7,r5,7
293 cmpldi r7,0x3FF
294 ble 1f
295 li r7,0x3FF
296 1: lis r0,0x0E00
297 sldi r7,r7,7
298 or r7,r7,r0
299 ori r10,r7,1
300
301 lis r8,0x8000
302 clrldi r8,r8,32
303
304
305 dcbt 0,r6,0b01000
306 dcbt 0,r7,0b01010
307
308 dcbtst 0,r9,0b01000
309 dcbtst 0,r10,0b01010
310 eieio
311 dcbt 0,r8,0b01010
312
313 beq cr1,.Lunwind_stack_nonvmx_copy
314
315
316
317
318
319 xor r6,r4,r3
320 rldicl. r6,r6,0,(64-4)
321 bne .Lvmx_unaligned_copy
322
323
324 neg r6,r3
325 mtocrf 0x01,r6
326 clrldi r6,r6,(64-4)
327
328 bf cr7*4+3,1f
329 err3; lbz r0,0(r4)
330 addi r4,r4,1
331 err3; stb r0,0(r3)
332 addi r3,r3,1
333
334 1: bf cr7*4+2,2f
335 err3; lhz r0,0(r4)
336 addi r4,r4,2
337 err3; sth r0,0(r3)
338 addi r3,r3,2
339
340 2: bf cr7*4+1,3f
341 err3; lwz r0,0(r4)
342 addi r4,r4,4
343 err3; stw r0,0(r3)
344 addi r3,r3,4
345
346 3: bf cr7*4+0,4f
347 err3; ld r0,0(r4)
348 addi r4,r4,8
349 err3; std r0,0(r3)
350 addi r3,r3,8
351
352 4: sub r5,r5,r6
353
354
355 neg r6,r3
356 srdi r7,r6,4
357 mtocrf 0x01,r7
358 clrldi r6,r6,(64-7)
359
360 li r9,16
361 li r10,32
362 li r11,48
363
364 bf cr7*4+3,5f
365 err3; lvx v1,0,r4
366 addi r4,r4,16
367 err3; stvx v1,0,r3
368 addi r3,r3,16
369
370 5: bf cr7*4+2,6f
371 err3; lvx v1,0,r4
372 err3; lvx v0,r4,r9
373 addi r4,r4,32
374 err3; stvx v1,0,r3
375 err3; stvx v0,r3,r9
376 addi r3,r3,32
377
378 6: bf cr7*4+1,7f
379 err3; lvx v3,0,r4
380 err3; lvx v2,r4,r9
381 err3; lvx v1,r4,r10
382 err3; lvx v0,r4,r11
383 addi r4,r4,64
384 err3; stvx v3,0,r3
385 err3; stvx v2,r3,r9
386 err3; stvx v1,r3,r10
387 err3; stvx v0,r3,r11
388 addi r3,r3,64
389
390 7: sub r5,r5,r6
391 srdi r6,r5,7
392
393 std r14,STK_REG(R14)(r1)
394 std r15,STK_REG(R15)(r1)
395 std r16,STK_REG(R16)(r1)
396
397 li r12,64
398 li r14,80
399 li r15,96
400 li r16,112
401
402 mtctr r6
403
404
405
406
407
408 .align 5
409 8:
410 err4; lvx v7,0,r4
411 err4; lvx v6,r4,r9
412 err4; lvx v5,r4,r10
413 err4; lvx v4,r4,r11
414 err4; lvx v3,r4,r12
415 err4; lvx v2,r4,r14
416 err4; lvx v1,r4,r15
417 err4; lvx v0,r4,r16
418 addi r4,r4,128
419 err4; stvx v7,0,r3
420 err4; stvx v6,r3,r9
421 err4; stvx v5,r3,r10
422 err4; stvx v4,r3,r11
423 err4; stvx v3,r3,r12
424 err4; stvx v2,r3,r14
425 err4; stvx v1,r3,r15
426 err4; stvx v0,r3,r16
427 addi r3,r3,128
428 bdnz 8b
429
430 ld r14,STK_REG(R14)(r1)
431 ld r15,STK_REG(R15)(r1)
432 ld r16,STK_REG(R16)(r1)
433
434
435 clrldi r5,r5,(64-7)
436 srdi r6,r5,4
437 mtocrf 0x01,r6
438
439 bf cr7*4+1,9f
440 err3; lvx v3,0,r4
441 err3; lvx v2,r4,r9
442 err3; lvx v1,r4,r10
443 err3; lvx v0,r4,r11
444 addi r4,r4,64
445 err3; stvx v3,0,r3
446 err3; stvx v2,r3,r9
447 err3; stvx v1,r3,r10
448 err3; stvx v0,r3,r11
449 addi r3,r3,64
450
451 9: bf cr7*4+2,10f
452 err3; lvx v1,0,r4
453 err3; lvx v0,r4,r9
454 addi r4,r4,32
455 err3; stvx v1,0,r3
456 err3; stvx v0,r3,r9
457 addi r3,r3,32
458
459 10: bf cr7*4+3,11f
460 err3; lvx v1,0,r4
461 addi r4,r4,16
462 err3; stvx v1,0,r3
463 addi r3,r3,16
464
465
466 11: clrldi r5,r5,(64-4)
467 mtocrf 0x01,r5
468 bf cr7*4+0,12f
469 err3; ld r0,0(r4)
470 addi r4,r4,8
471 err3; std r0,0(r3)
472 addi r3,r3,8
473
474 12: bf cr7*4+1,13f
475 err3; lwz r0,0(r4)
476 addi r4,r4,4
477 err3; stw r0,0(r3)
478 addi r3,r3,4
479
480 13: bf cr7*4+2,14f
481 err3; lhz r0,0(r4)
482 addi r4,r4,2
483 err3; sth r0,0(r3)
484 addi r3,r3,2
485
486 14: bf cr7*4+3,15f
487 err3; lbz r0,0(r4)
488 err3; stb r0,0(r3)
489
490 15: addi r1,r1,STACKFRAMESIZE
491 b exit_vmx_usercopy
492
493 .Lvmx_unaligned_copy:
494
495 neg r6,r3
496 mtocrf 0x01,r6
497 clrldi r6,r6,(64-4)
498
499 bf cr7*4+3,1f
500 err3; lbz r0,0(r4)
501 addi r4,r4,1
502 err3; stb r0,0(r3)
503 addi r3,r3,1
504
505 1: bf cr7*4+2,2f
506 err3; lhz r0,0(r4)
507 addi r4,r4,2
508 err3; sth r0,0(r3)
509 addi r3,r3,2
510
511 2: bf cr7*4+1,3f
512 err3; lwz r0,0(r4)
513 addi r4,r4,4
514 err3; stw r0,0(r3)
515 addi r3,r3,4
516
517 3: bf cr7*4+0,4f
518 err3; lwz r0,0(r4)
519 err3; lwz r7,4(r4)
520 addi r4,r4,8
521 err3; stw r0,0(r3)
522 err3; stw r7,4(r3)
523 addi r3,r3,8
524
525 4: sub r5,r5,r6
526
527
528 neg r6,r3
529 srdi r7,r6,4
530 mtocrf 0x01,r7
531 clrldi r6,r6,(64-7)
532
533 li r9,16
534 li r10,32
535 li r11,48
536
537 LVS(v16,0,r4)
538 err3; lvx v0,0,r4
539 addi r4,r4,16
540
541 bf cr7*4+3,5f
542 err3; lvx v1,0,r4
543 VPERM(v8,v0,v1,v16)
544 addi r4,r4,16
545 err3; stvx v8,0,r3
546 addi r3,r3,16
547 vor v0,v1,v1
548
549 5: bf cr7*4+2,6f
550 err3; lvx v1,0,r4
551 VPERM(v8,v0,v1,v16)
552 err3; lvx v0,r4,r9
553 VPERM(v9,v1,v0,v16)
554 addi r4,r4,32
555 err3; stvx v8,0,r3
556 err3; stvx v9,r3,r9
557 addi r3,r3,32
558
559 6: bf cr7*4+1,7f
560 err3; lvx v3,0,r4
561 VPERM(v8,v0,v3,v16)
562 err3; lvx v2,r4,r9
563 VPERM(v9,v3,v2,v16)
564 err3; lvx v1,r4,r10
565 VPERM(v10,v2,v1,v16)
566 err3; lvx v0,r4,r11
567 VPERM(v11,v1,v0,v16)
568 addi r4,r4,64
569 err3; stvx v8,0,r3
570 err3; stvx v9,r3,r9
571 err3; stvx v10,r3,r10
572 err3; stvx v11,r3,r11
573 addi r3,r3,64
574
575 7: sub r5,r5,r6
576 srdi r6,r5,7
577
578 std r14,STK_REG(R14)(r1)
579 std r15,STK_REG(R15)(r1)
580 std r16,STK_REG(R16)(r1)
581
582 li r12,64
583 li r14,80
584 li r15,96
585 li r16,112
586
587 mtctr r6
588
589
590
591
592
593 .align 5
594 8:
595 err4; lvx v7,0,r4
596 VPERM(v8,v0,v7,v16)
597 err4; lvx v6,r4,r9
598 VPERM(v9,v7,v6,v16)
599 err4; lvx v5,r4,r10
600 VPERM(v10,v6,v5,v16)
601 err4; lvx v4,r4,r11
602 VPERM(v11,v5,v4,v16)
603 err4; lvx v3,r4,r12
604 VPERM(v12,v4,v3,v16)
605 err4; lvx v2,r4,r14
606 VPERM(v13,v3,v2,v16)
607 err4; lvx v1,r4,r15
608 VPERM(v14,v2,v1,v16)
609 err4; lvx v0,r4,r16
610 VPERM(v15,v1,v0,v16)
611 addi r4,r4,128
612 err4; stvx v8,0,r3
613 err4; stvx v9,r3,r9
614 err4; stvx v10,r3,r10
615 err4; stvx v11,r3,r11
616 err4; stvx v12,r3,r12
617 err4; stvx v13,r3,r14
618 err4; stvx v14,r3,r15
619 err4; stvx v15,r3,r16
620 addi r3,r3,128
621 bdnz 8b
622
623 ld r14,STK_REG(R14)(r1)
624 ld r15,STK_REG(R15)(r1)
625 ld r16,STK_REG(R16)(r1)
626
627
628 clrldi r5,r5,(64-7)
629 srdi r6,r5,4
630 mtocrf 0x01,r6
631
632 bf cr7*4+1,9f
633 err3; lvx v3,0,r4
634 VPERM(v8,v0,v3,v16)
635 err3; lvx v2,r4,r9
636 VPERM(v9,v3,v2,v16)
637 err3; lvx v1,r4,r10
638 VPERM(v10,v2,v1,v16)
639 err3; lvx v0,r4,r11
640 VPERM(v11,v1,v0,v16)
641 addi r4,r4,64
642 err3; stvx v8,0,r3
643 err3; stvx v9,r3,r9
644 err3; stvx v10,r3,r10
645 err3; stvx v11,r3,r11
646 addi r3,r3,64
647
648 9: bf cr7*4+2,10f
649 err3; lvx v1,0,r4
650 VPERM(v8,v0,v1,v16)
651 err3; lvx v0,r4,r9
652 VPERM(v9,v1,v0,v16)
653 addi r4,r4,32
654 err3; stvx v8,0,r3
655 err3; stvx v9,r3,r9
656 addi r3,r3,32
657
658 10: bf cr7*4+3,11f
659 err3; lvx v1,0,r4
660 VPERM(v8,v0,v1,v16)
661 addi r4,r4,16
662 err3; stvx v8,0,r3
663 addi r3,r3,16
664
665
666 11: clrldi r5,r5,(64-4)
667 addi r4,r4,-16
668 mtocrf 0x01,r5
669 bf cr7*4+0,12f
670 err3; lwz r0,0(r4)
671 err3; lwz r6,4(r4)
672 addi r4,r4,8
673 err3; stw r0,0(r3)
674 err3; stw r6,4(r3)
675 addi r3,r3,8
676
677 12: bf cr7*4+1,13f
678 err3; lwz r0,0(r4)
679 addi r4,r4,4
680 err3; stw r0,0(r3)
681 addi r3,r3,4
682
683 13: bf cr7*4+2,14f
684 err3; lhz r0,0(r4)
685 addi r4,r4,2
686 err3; sth r0,0(r3)
687 addi r3,r3,2
688
689 14: bf cr7*4+3,15f
690 err3; lbz r0,0(r4)
691 err3; stb r0,0(r3)
692
693 15: addi r1,r1,STACKFRAMESIZE
694 b exit_vmx_usercopy
695 #endif