root/arch/powerpc/lib/copyuser_64.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0-or-later */
   2 /*
   3  * Copyright (C) 2002 Paul Mackerras, IBM Corp.
   4  */
   5 #include <asm/processor.h>
   6 #include <asm/ppc_asm.h>
   7 #include <asm/export.h>
   8 #include <asm/asm-compat.h>
   9 #include <asm/feature-fixups.h>
  10 
  11 #ifndef SELFTEST_CASE
  12 /* 0 == most CPUs, 1 == POWER6, 2 == Cell */
  13 #define SELFTEST_CASE   0
  14 #endif
  15 
  16 #ifdef __BIG_ENDIAN__
  17 #define sLd sld         /* Shift towards low-numbered address. */
  18 #define sHd srd         /* Shift towards high-numbered address. */
  19 #else
  20 #define sLd srd         /* Shift towards low-numbered address. */
  21 #define sHd sld         /* Shift towards high-numbered address. */
  22 #endif
  23 
  24 /*
  25  * These macros are used to generate exception table entries.
  26  * The exception handlers below use the original arguments
  27  * (stored on the stack) and the point where we're up to in
  28  * the destination buffer, i.e. the address of the first
  29  * unmodified byte.  Generally r3 points into the destination
  30  * buffer, but the first unmodified byte is at a variable
  31  * offset from r3.  In the code below, the symbol r3_offset
  32  * is set to indicate the current offset at each point in
  33  * the code.  This offset is then used as a negative offset
  34  * from the exception handler code, and those instructions
  35  * before the exception handlers are addi instructions that
  36  * adjust r3 to point to the correct place.
  37  */
  38         .macro  lex             /* exception handler for load */
  39 100:    EX_TABLE(100b, .Lld_exc - r3_offset)
  40         .endm
  41 
  42         .macro  stex            /* exception handler for store */
  43 100:    EX_TABLE(100b, .Lst_exc - r3_offset)
  44         .endm
  45 
  46         .align  7
  47 _GLOBAL_TOC(__copy_tofrom_user)
  48 #ifdef CONFIG_PPC_BOOK3S_64
  49 BEGIN_FTR_SECTION
  50         nop
  51 FTR_SECTION_ELSE
  52         b       __copy_tofrom_user_power7
  53 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
  54 #endif
  55 _GLOBAL(__copy_tofrom_user_base)
  56         /* first check for a 4kB copy on a 4kB boundary */
  57         cmpldi  cr1,r5,16
  58         cmpdi   cr6,r5,4096
  59         or      r0,r3,r4
  60         neg     r6,r3           /* LS 3 bits = # bytes to 8-byte dest bdry */
  61         andi.   r0,r0,4095
  62         std     r3,-24(r1)
  63         crand   cr0*4+2,cr0*4+2,cr6*4+2
  64         std     r4,-16(r1)
  65         std     r5,-8(r1)
  66         dcbt    0,r4
  67         beq     .Lcopy_page_4K
  68         andi.   r6,r6,7
  69         PPC_MTOCRF(0x01,r5)
  70         blt     cr1,.Lshort_copy
  71 /* Below we want to nop out the bne if we're on a CPU that has the
  72  * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
  73  * cleared.
  74  * At the time of writing the only CPU that has this combination of bits
  75  * set is Power6.
  76  */
  77 test_feature = (SELFTEST_CASE == 1)
  78 BEGIN_FTR_SECTION
  79         nop
  80 FTR_SECTION_ELSE
  81         bne     .Ldst_unaligned
  82 ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
  83                     CPU_FTR_UNALIGNED_LD_STD)
  84 .Ldst_aligned:
  85         addi    r3,r3,-16
  86 r3_offset = 16
  87 test_feature = (SELFTEST_CASE == 0)
  88 BEGIN_FTR_SECTION
  89         andi.   r0,r4,7
  90         bne     .Lsrc_unaligned
  91 END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
  92         blt     cr1,.Ldo_tail           /* if < 16 bytes to copy */
  93         srdi    r0,r5,5
  94         cmpdi   cr1,r0,0
  95 lex;    ld      r7,0(r4)
  96 lex;    ld      r6,8(r4)
  97         addi    r4,r4,16
  98         mtctr   r0
  99         andi.   r0,r5,0x10
 100         beq     22f
 101         addi    r3,r3,16
 102 r3_offset = 0
 103         addi    r4,r4,-16
 104         mr      r9,r7
 105         mr      r8,r6
 106         beq     cr1,72f
 107 21:
 108 lex;    ld      r7,16(r4)
 109 lex;    ld      r6,24(r4)
 110         addi    r4,r4,32
 111 stex;   std     r9,0(r3)
 112 r3_offset = 8
 113 stex;   std     r8,8(r3)
 114 r3_offset = 16
 115 22:
 116 lex;    ld      r9,0(r4)
 117 lex;    ld      r8,8(r4)
 118 stex;   std     r7,16(r3)
 119 r3_offset = 24
 120 stex;   std     r6,24(r3)
 121         addi    r3,r3,32
 122 r3_offset = 0
 123         bdnz    21b
 124 72:
 125 stex;   std     r9,0(r3)
 126 r3_offset = 8
 127 stex;   std     r8,8(r3)
 128 r3_offset = 16
 129         andi.   r5,r5,0xf
 130         beq+    3f
 131         addi    r4,r4,16
 132 .Ldo_tail:
 133         addi    r3,r3,16
 134 r3_offset = 0
 135         bf      cr7*4+0,246f
 136 lex;    ld      r9,0(r4)
 137         addi    r4,r4,8
 138 stex;   std     r9,0(r3)
 139         addi    r3,r3,8
 140 246:    bf      cr7*4+1,1f
 141 lex;    lwz     r9,0(r4)
 142         addi    r4,r4,4
 143 stex;   stw     r9,0(r3)
 144         addi    r3,r3,4
 145 1:      bf      cr7*4+2,2f
 146 lex;    lhz     r9,0(r4)
 147         addi    r4,r4,2
 148 stex;   sth     r9,0(r3)
 149         addi    r3,r3,2
 150 2:      bf      cr7*4+3,3f
 151 lex;    lbz     r9,0(r4)
 152 stex;   stb     r9,0(r3)
 153 3:      li      r3,0
 154         blr
 155 
 156 .Lsrc_unaligned:
 157 r3_offset = 16
 158         srdi    r6,r5,3
 159         addi    r5,r5,-16
 160         subf    r4,r0,r4
 161         srdi    r7,r5,4
 162         sldi    r10,r0,3
 163         cmpldi  cr6,r6,3
 164         andi.   r5,r5,7
 165         mtctr   r7
 166         subfic  r11,r10,64
 167         add     r5,r5,r0
 168         bt      cr7*4+0,28f
 169 
 170 lex;    ld      r9,0(r4)        /* 3+2n loads, 2+2n stores */
 171 lex;    ld      r0,8(r4)
 172         sLd     r6,r9,r10
 173 lex;    ldu     r9,16(r4)
 174         sHd     r7,r0,r11
 175         sLd     r8,r0,r10
 176         or      r7,r7,r6
 177         blt     cr6,79f
 178 lex;    ld      r0,8(r4)
 179         b       2f
 180 
 181 28:
 182 lex;    ld      r0,0(r4)        /* 4+2n loads, 3+2n stores */
 183 lex;    ldu     r9,8(r4)
 184         sLd     r8,r0,r10
 185         addi    r3,r3,-8
 186 r3_offset = 24
 187         blt     cr6,5f
 188 lex;    ld      r0,8(r4)
 189         sHd     r12,r9,r11
 190         sLd     r6,r9,r10
 191 lex;    ldu     r9,16(r4)
 192         or      r12,r8,r12
 193         sHd     r7,r0,r11
 194         sLd     r8,r0,r10
 195         addi    r3,r3,16
 196 r3_offset = 8
 197         beq     cr6,78f
 198 
 199 1:      or      r7,r7,r6
 200 lex;    ld      r0,8(r4)
 201 stex;   std     r12,8(r3)
 202 r3_offset = 16
 203 2:      sHd     r12,r9,r11
 204         sLd     r6,r9,r10
 205 lex;    ldu     r9,16(r4)
 206         or      r12,r8,r12
 207 stex;   stdu    r7,16(r3)
 208 r3_offset = 8
 209         sHd     r7,r0,r11
 210         sLd     r8,r0,r10
 211         bdnz    1b
 212 
 213 78:
 214 stex;   std     r12,8(r3)
 215 r3_offset = 16
 216         or      r7,r7,r6
 217 79:
 218 stex;   std     r7,16(r3)
 219 r3_offset = 24
 220 5:      sHd     r12,r9,r11
 221         or      r12,r8,r12
 222 stex;   std     r12,24(r3)
 223 r3_offset = 32
 224         bne     6f
 225         li      r3,0
 226         blr
 227 6:      cmpwi   cr1,r5,8
 228         addi    r3,r3,32
 229 r3_offset = 0
 230         sLd     r9,r9,r10
 231         ble     cr1,7f
 232 lex;    ld      r0,8(r4)
 233         sHd     r7,r0,r11
 234         or      r9,r7,r9
 235 7:
 236         bf      cr7*4+1,1f
 237 #ifdef __BIG_ENDIAN__
 238         rotldi  r9,r9,32
 239 #endif
 240 stex;   stw     r9,0(r3)
 241 #ifdef __LITTLE_ENDIAN__
 242         rotrdi  r9,r9,32
 243 #endif
 244         addi    r3,r3,4
 245 1:      bf      cr7*4+2,2f
 246 #ifdef __BIG_ENDIAN__
 247         rotldi  r9,r9,16
 248 #endif
 249 stex;   sth     r9,0(r3)
 250 #ifdef __LITTLE_ENDIAN__
 251         rotrdi  r9,r9,16
 252 #endif
 253         addi    r3,r3,2
 254 2:      bf      cr7*4+3,3f
 255 #ifdef __BIG_ENDIAN__
 256         rotldi  r9,r9,8
 257 #endif
 258 stex;   stb     r9,0(r3)
 259 #ifdef __LITTLE_ENDIAN__
 260         rotrdi  r9,r9,8
 261 #endif
 262 3:      li      r3,0
 263         blr
 264 
 265 .Ldst_unaligned:
 266 r3_offset = 0
 267         PPC_MTOCRF(0x01,r6)             /* put #bytes to 8B bdry into cr7 */
 268         subf    r5,r6,r5
 269         li      r7,0
 270         cmpldi  cr1,r5,16
 271         bf      cr7*4+3,1f
 272 100:    EX_TABLE(100b, .Lld_exc_r7)
 273         lbz     r0,0(r4)
 274 100:    EX_TABLE(100b, .Lst_exc_r7)
 275         stb     r0,0(r3)
 276         addi    r7,r7,1
 277 1:      bf      cr7*4+2,2f
 278 100:    EX_TABLE(100b, .Lld_exc_r7)
 279         lhzx    r0,r7,r4
 280 100:    EX_TABLE(100b, .Lst_exc_r7)
 281         sthx    r0,r7,r3
 282         addi    r7,r7,2
 283 2:      bf      cr7*4+1,3f
 284 100:    EX_TABLE(100b, .Lld_exc_r7)
 285         lwzx    r0,r7,r4
 286 100:    EX_TABLE(100b, .Lst_exc_r7)
 287         stwx    r0,r7,r3
 288 3:      PPC_MTOCRF(0x01,r5)
 289         add     r4,r6,r4
 290         add     r3,r6,r3
 291         b       .Ldst_aligned
 292 
 293 .Lshort_copy:
 294 r3_offset = 0
 295         bf      cr7*4+0,1f
 296 lex;    lwz     r0,0(r4)
 297 lex;    lwz     r9,4(r4)
 298         addi    r4,r4,8
 299 stex;   stw     r0,0(r3)
 300 stex;   stw     r9,4(r3)
 301         addi    r3,r3,8
 302 1:      bf      cr7*4+1,2f
 303 lex;    lwz     r0,0(r4)
 304         addi    r4,r4,4
 305 stex;   stw     r0,0(r3)
 306         addi    r3,r3,4
 307 2:      bf      cr7*4+2,3f
 308 lex;    lhz     r0,0(r4)
 309         addi    r4,r4,2
 310 stex;   sth     r0,0(r3)
 311         addi    r3,r3,2
 312 3:      bf      cr7*4+3,4f
 313 lex;    lbz     r0,0(r4)
 314 stex;   stb     r0,0(r3)
 315 4:      li      r3,0
 316         blr
 317 
 318 /*
 319  * exception handlers follow
 320  * we have to return the number of bytes not copied
 321  * for an exception on a load, we set the rest of the destination to 0
 322  * Note that the number of bytes of instructions for adjusting r3 needs
 323  * to equal the amount of the adjustment, due to the trick of using
 324  * .Lld_exc - r3_offset as the handler address.
 325  */
 326 
 327 .Lld_exc_r7:
 328         add     r3,r3,r7
 329         b       .Lld_exc
 330 
 331         /* adjust by 24 */
 332         addi    r3,r3,8
 333         nop
 334         /* adjust by 16 */
 335         addi    r3,r3,8
 336         nop
 337         /* adjust by 8 */
 338         addi    r3,r3,8
 339         nop
 340 
 341 /*
 342  * Here we have had a fault on a load and r3 points to the first
 343  * unmodified byte of the destination.  We use the original arguments
 344  * and r3 to work out how much wasn't copied.  Since we load some
 345  * distance ahead of the stores, we continue copying byte-by-byte until
 346  * we hit the load fault again in order to copy as much as possible.
 347  */
 348 .Lld_exc:
 349         ld      r6,-24(r1)
 350         ld      r4,-16(r1)
 351         ld      r5,-8(r1)
 352         subf    r6,r6,r3
 353         add     r4,r4,r6
 354         subf    r5,r6,r5        /* #bytes left to go */
 355 
 356 /*
 357  * first see if we can copy any more bytes before hitting another exception
 358  */
 359         mtctr   r5
 360 r3_offset = 0
 361 100:    EX_TABLE(100b, .Ldone)
 362 43:     lbz     r0,0(r4)
 363         addi    r4,r4,1
 364 stex;   stb     r0,0(r3)
 365         addi    r3,r3,1
 366         bdnz    43b
 367         li      r3,0            /* huh? all copied successfully this time? */
 368         blr
 369 
 370 /*
 371  * here we have trapped again, amount remaining is in ctr.
 372  */
 373 .Ldone:
 374         mfctr   r3
 375         blr
 376 
 377 /*
 378  * exception handlers for stores: we need to work out how many bytes
 379  * weren't copied, and we may need to copy some more.
 380  * Note that the number of bytes of instructions for adjusting r3 needs
 381  * to equal the amount of the adjustment, due to the trick of using
 382  * .Lst_exc - r3_offset as the handler address.
 383  */
 384 .Lst_exc_r7:
 385         add     r3,r3,r7
 386         b       .Lst_exc
 387 
 388         /* adjust by 24 */
 389         addi    r3,r3,8
 390         nop
 391         /* adjust by 16 */
 392         addi    r3,r3,8
 393         nop
 394         /* adjust by 8 */
 395         addi    r3,r3,4
 396         /* adjust by 4 */
 397         addi    r3,r3,4
 398 .Lst_exc:
 399         ld      r6,-24(r1)      /* original destination pointer */
 400         ld      r4,-16(r1)      /* original source pointer */
 401         ld      r5,-8(r1)       /* original number of bytes */
 402         add     r7,r6,r5
 403         /*
 404          * If the destination pointer isn't 8-byte aligned,
 405          * we may have got the exception as a result of a
 406          * store that overlapped a page boundary, so we may be
 407          * able to copy a few more bytes.
 408          */
 409 17:     andi.   r0,r3,7
 410         beq     19f
 411         subf    r8,r6,r3        /* #bytes copied */
 412 100:    EX_TABLE(100b,19f)
 413         lbzx    r0,r8,r4
 414 100:    EX_TABLE(100b,19f)
 415         stb     r0,0(r3)
 416         addi    r3,r3,1
 417         cmpld   r3,r7
 418         blt     17b
 419 19:     subf    r3,r3,r7        /* #bytes not copied in r3 */
 420         blr
 421 
 422 /*
 423  * Routine to copy a whole page of data, optimized for POWER4.
 424  * On POWER4 it is more than 50% faster than the simple loop
 425  * above (following the .Ldst_aligned label).
 426  */
 427         .macro  exc
 428 100:    EX_TABLE(100b, .Labort)
 429         .endm
 430 .Lcopy_page_4K:
 431         std     r31,-32(1)
 432         std     r30,-40(1)
 433         std     r29,-48(1)
 434         std     r28,-56(1)
 435         std     r27,-64(1)
 436         std     r26,-72(1)
 437         std     r25,-80(1)
 438         std     r24,-88(1)
 439         std     r23,-96(1)
 440         std     r22,-104(1)
 441         std     r21,-112(1)
 442         std     r20,-120(1)
 443         li      r5,4096/32 - 1
 444         addi    r3,r3,-8
 445         li      r0,5
 446 0:      addi    r5,r5,-24
 447         mtctr   r0
 448 exc;    ld      r22,640(4)
 449 exc;    ld      r21,512(4)
 450 exc;    ld      r20,384(4)
 451 exc;    ld      r11,256(4)
 452 exc;    ld      r9,128(4)
 453 exc;    ld      r7,0(4)
 454 exc;    ld      r25,648(4)
 455 exc;    ld      r24,520(4)
 456 exc;    ld      r23,392(4)
 457 exc;    ld      r10,264(4)
 458 exc;    ld      r8,136(4)
 459 exc;    ldu     r6,8(4)
 460         cmpwi   r5,24
 461 1:
 462 exc;    std     r22,648(3)
 463 exc;    std     r21,520(3)
 464 exc;    std     r20,392(3)
 465 exc;    std     r11,264(3)
 466 exc;    std     r9,136(3)
 467 exc;    std     r7,8(3)
 468 exc;    ld      r28,648(4)
 469 exc;    ld      r27,520(4)
 470 exc;    ld      r26,392(4)
 471 exc;    ld      r31,264(4)
 472 exc;    ld      r30,136(4)
 473 exc;    ld      r29,8(4)
 474 exc;    std     r25,656(3)
 475 exc;    std     r24,528(3)
 476 exc;    std     r23,400(3)
 477 exc;    std     r10,272(3)
 478 exc;    std     r8,144(3)
 479 exc;    std     r6,16(3)
 480 exc;    ld      r22,656(4)
 481 exc;    ld      r21,528(4)
 482 exc;    ld      r20,400(4)
 483 exc;    ld      r11,272(4)
 484 exc;    ld      r9,144(4)
 485 exc;    ld      r7,16(4)
 486 exc;    std     r28,664(3)
 487 exc;    std     r27,536(3)
 488 exc;    std     r26,408(3)
 489 exc;    std     r31,280(3)
 490 exc;    std     r30,152(3)
 491 exc;    stdu    r29,24(3)
 492 exc;    ld      r25,664(4)
 493 exc;    ld      r24,536(4)
 494 exc;    ld      r23,408(4)
 495 exc;    ld      r10,280(4)
 496 exc;    ld      r8,152(4)
 497 exc;    ldu     r6,24(4)
 498         bdnz    1b
 499 exc;    std     r22,648(3)
 500 exc;    std     r21,520(3)
 501 exc;    std     r20,392(3)
 502 exc;    std     r11,264(3)
 503 exc;    std     r9,136(3)
 504 exc;    std     r7,8(3)
 505         addi    r4,r4,640
 506         addi    r3,r3,648
 507         bge     0b
 508         mtctr   r5
 509 exc;    ld      r7,0(4)
 510 exc;    ld      r8,8(4)
 511 exc;    ldu     r9,16(4)
 512 3:
 513 exc;    ld      r10,8(4)
 514 exc;    std     r7,8(3)
 515 exc;    ld      r7,16(4)
 516 exc;    std     r8,16(3)
 517 exc;    ld      r8,24(4)
 518 exc;    std     r9,24(3)
 519 exc;    ldu     r9,32(4)
 520 exc;    stdu    r10,32(3)
 521         bdnz    3b
 522 4:
 523 exc;    ld      r10,8(4)
 524 exc;    std     r7,8(3)
 525 exc;    std     r8,16(3)
 526 exc;    std     r9,24(3)
 527 exc;    std     r10,32(3)
 528 9:      ld      r20,-120(1)
 529         ld      r21,-112(1)
 530         ld      r22,-104(1)
 531         ld      r23,-96(1)
 532         ld      r24,-88(1)
 533         ld      r25,-80(1)
 534         ld      r26,-72(1)
 535         ld      r27,-64(1)
 536         ld      r28,-56(1)
 537         ld      r29,-48(1)
 538         ld      r30,-40(1)
 539         ld      r31,-32(1)
 540         li      r3,0
 541         blr
 542 
 543 /*
 544  * on an exception, reset to the beginning and jump back into the
 545  * standard __copy_tofrom_user
 546  */
 547 .Labort:
 548         ld      r20,-120(1)
 549         ld      r21,-112(1)
 550         ld      r22,-104(1)
 551         ld      r23,-96(1)
 552         ld      r24,-88(1)
 553         ld      r25,-80(1)
 554         ld      r26,-72(1)
 555         ld      r27,-64(1)
 556         ld      r28,-56(1)
 557         ld      r29,-48(1)
 558         ld      r30,-40(1)
 559         ld      r31,-32(1)
 560         ld      r3,-24(r1)
 561         ld      r4,-16(r1)
 562         li      r5,4096
 563         b       .Ldst_aligned
 564 EXPORT_SYMBOL(__copy_tofrom_user)

/* [<][>][^][v][top][bottom][index][help] */