root/arch/powerpc/lib/copy_32.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0-or-later */
   2 /*
   3  * Memory copy functions for 32-bit PowerPC.
   4  *
   5  * Copyright (C) 1996-2005 Paul Mackerras.
   6  */
   7 #include <asm/processor.h>
   8 #include <asm/cache.h>
   9 #include <asm/errno.h>
  10 #include <asm/ppc_asm.h>
  11 #include <asm/export.h>
  12 #include <asm/code-patching-asm.h>
  13 #include <asm/kasan.h>
  14 
  15 #define COPY_16_BYTES           \
  16         lwz     r7,4(r4);       \
  17         lwz     r8,8(r4);       \
  18         lwz     r9,12(r4);      \
  19         lwzu    r10,16(r4);     \
  20         stw     r7,4(r6);       \
  21         stw     r8,8(r6);       \
  22         stw     r9,12(r6);      \
  23         stwu    r10,16(r6)
  24 
  25 #define COPY_16_BYTES_WITHEX(n) \
  26 8 ## n ## 0:                    \
  27         lwz     r7,4(r4);       \
  28 8 ## n ## 1:                    \
  29         lwz     r8,8(r4);       \
  30 8 ## n ## 2:                    \
  31         lwz     r9,12(r4);      \
  32 8 ## n ## 3:                    \
  33         lwzu    r10,16(r4);     \
  34 8 ## n ## 4:                    \
  35         stw     r7,4(r6);       \
  36 8 ## n ## 5:                    \
  37         stw     r8,8(r6);       \
  38 8 ## n ## 6:                    \
  39         stw     r9,12(r6);      \
  40 8 ## n ## 7:                    \
  41         stwu    r10,16(r6)
  42 
  43 #define COPY_16_BYTES_EXCODE(n)                 \
  44 9 ## n ## 0:                                    \
  45         addi    r5,r5,-(16 * n);                \
  46         b       104f;                           \
  47 9 ## n ## 1:                                    \
  48         addi    r5,r5,-(16 * n);                \
  49         b       105f;                           \
  50         EX_TABLE(8 ## n ## 0b,9 ## n ## 0b);    \
  51         EX_TABLE(8 ## n ## 1b,9 ## n ## 0b);    \
  52         EX_TABLE(8 ## n ## 2b,9 ## n ## 0b);    \
  53         EX_TABLE(8 ## n ## 3b,9 ## n ## 0b);    \
  54         EX_TABLE(8 ## n ## 4b,9 ## n ## 1b);    \
  55         EX_TABLE(8 ## n ## 5b,9 ## n ## 1b);    \
  56         EX_TABLE(8 ## n ## 6b,9 ## n ## 1b);    \
  57         EX_TABLE(8 ## n ## 7b,9 ## n ## 1b)
  58 
  59         .text
  60         .stabs  "arch/powerpc/lib/",N_SO,0,0,0f
  61         .stabs  "copy_32.S",N_SO,0,0,0f
  62 0:
  63 
  64 CACHELINE_BYTES = L1_CACHE_BYTES
  65 LG_CACHELINE_BYTES = L1_CACHE_SHIFT
  66 CACHELINE_MASK = (L1_CACHE_BYTES-1)
  67 
  68 #ifndef CONFIG_KASAN
  69 _GLOBAL(memset16)
  70         rlwinm. r0 ,r5, 31, 1, 31
  71         addi    r6, r3, -4
  72         beq-    2f
  73         rlwimi  r4 ,r4 ,16 ,0 ,15
  74         mtctr   r0
  75 1:      stwu    r4, 4(r6)
  76         bdnz    1b
  77 2:      andi.   r0, r5, 1
  78         beqlr
  79         sth     r4, 4(r6)
  80         blr
  81 EXPORT_SYMBOL(memset16)
  82 #endif
  83 
  84 /*
  85  * Use dcbz on the complete cache lines in the destination
  86  * to set them to zero.  This requires that the destination
  87  * area is cacheable.  -- paulus
  88  *
  89  * During early init, cache might not be active yet, so dcbz cannot be used.
  90  * We therefore skip the optimised bloc that uses dcbz. This jump is
  91  * replaced by a nop once cache is active. This is done in machine_init()
  92  */
  93 _GLOBAL_KASAN(memset)
  94         cmplwi  0,r5,4
  95         blt     7f
  96 
  97         rlwimi  r4,r4,8,16,23
  98         rlwimi  r4,r4,16,0,15
  99 
 100         stw     r4,0(r3)
 101         beqlr
 102         andi.   r0,r3,3
 103         add     r5,r0,r5
 104         subf    r6,r0,r3
 105         cmplwi  0,r4,0
 106         /*
 107          * Skip optimised bloc until cache is enabled. Will be replaced
 108          * by 'bne' during boot to use normal procedure if r4 is not zero
 109          */
 110 5:      b       2f
 111         patch_site      5b, patch__memset_nocache
 112 
 113         clrlwi  r7,r6,32-LG_CACHELINE_BYTES
 114         add     r8,r7,r5
 115         srwi    r9,r8,LG_CACHELINE_BYTES
 116         addic.  r9,r9,-1        /* total number of complete cachelines */
 117         ble     2f
 118         xori    r0,r7,CACHELINE_MASK & ~3
 119         srwi.   r0,r0,2
 120         beq     3f
 121         mtctr   r0
 122 4:      stwu    r4,4(r6)
 123         bdnz    4b
 124 3:      mtctr   r9
 125         li      r7,4
 126 10:     dcbz    r7,r6
 127         addi    r6,r6,CACHELINE_BYTES
 128         bdnz    10b
 129         clrlwi  r5,r8,32-LG_CACHELINE_BYTES
 130         addi    r5,r5,4
 131 
 132 2:      srwi    r0,r5,2
 133         mtctr   r0
 134         bdz     6f
 135 1:      stwu    r4,4(r6)
 136         bdnz    1b
 137 6:      andi.   r5,r5,3
 138         beqlr
 139         mtctr   r5
 140         addi    r6,r6,3
 141 8:      stbu    r4,1(r6)
 142         bdnz    8b
 143         blr
 144 
 145 7:      cmpwi   0,r5,0
 146         beqlr
 147         mtctr   r5
 148         addi    r6,r3,-1
 149 9:      stbu    r4,1(r6)
 150         bdnz    9b
 151         blr
 152 EXPORT_SYMBOL(memset)
 153 EXPORT_SYMBOL_KASAN(memset)
 154 
 155 /*
 156  * This version uses dcbz on the complete cache lines in the
 157  * destination area to reduce memory traffic.  This requires that
 158  * the destination area is cacheable.
 159  * We only use this version if the source and dest don't overlap.
 160  * -- paulus.
 161  *
 162  * During early init, cache might not be active yet, so dcbz cannot be used.
 163  * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is
 164  * replaced by a nop once cache is active. This is done in machine_init()
 165  */
 166 _GLOBAL_KASAN(memmove)
 167         cmplw   0,r3,r4
 168         bgt     backwards_memcpy
 169         /* fall through */
 170 
 171 _GLOBAL_KASAN(memcpy)
 172 1:      b       generic_memcpy
 173         patch_site      1b, patch__memcpy_nocache
 174 
 175         add     r7,r3,r5                /* test if the src & dst overlap */
 176         add     r8,r4,r5
 177         cmplw   0,r4,r7
 178         cmplw   1,r3,r8
 179         crand   0,0,4                   /* cr0.lt &= cr1.lt */
 180         blt     generic_memcpy          /* if regions overlap */
 181 
 182         addi    r4,r4,-4
 183         addi    r6,r3,-4
 184         neg     r0,r3
 185         andi.   r0,r0,CACHELINE_MASK    /* # bytes to start of cache line */
 186         beq     58f
 187 
 188         cmplw   0,r5,r0                 /* is this more than total to do? */
 189         blt     63f                     /* if not much to do */
 190         andi.   r8,r0,3                 /* get it word-aligned first */
 191         subf    r5,r0,r5
 192         mtctr   r8
 193         beq+    61f
 194 70:     lbz     r9,4(r4)                /* do some bytes */
 195         addi    r4,r4,1
 196         addi    r6,r6,1
 197         stb     r9,3(r6)
 198         bdnz    70b
 199 61:     srwi.   r0,r0,2
 200         mtctr   r0
 201         beq     58f
 202 72:     lwzu    r9,4(r4)                /* do some words */
 203         stwu    r9,4(r6)
 204         bdnz    72b
 205 
 206 58:     srwi.   r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
 207         clrlwi  r5,r5,32-LG_CACHELINE_BYTES
 208         li      r11,4
 209         mtctr   r0
 210         beq     63f
 211 53:
 212         dcbz    r11,r6
 213         COPY_16_BYTES
 214 #if L1_CACHE_BYTES >= 32
 215         COPY_16_BYTES
 216 #if L1_CACHE_BYTES >= 64
 217         COPY_16_BYTES
 218         COPY_16_BYTES
 219 #if L1_CACHE_BYTES >= 128
 220         COPY_16_BYTES
 221         COPY_16_BYTES
 222         COPY_16_BYTES
 223         COPY_16_BYTES
 224 #endif
 225 #endif
 226 #endif
 227         bdnz    53b
 228 
 229 63:     srwi.   r0,r5,2
 230         mtctr   r0
 231         beq     64f
 232 30:     lwzu    r0,4(r4)
 233         stwu    r0,4(r6)
 234         bdnz    30b
 235 
 236 64:     andi.   r0,r5,3
 237         mtctr   r0
 238         beq+    65f
 239         addi    r4,r4,3
 240         addi    r6,r6,3
 241 40:     lbzu    r0,1(r4)
 242         stbu    r0,1(r6)
 243         bdnz    40b
 244 65:     blr
 245 EXPORT_SYMBOL(memcpy)
 246 EXPORT_SYMBOL(memmove)
 247 EXPORT_SYMBOL_KASAN(memcpy)
 248 EXPORT_SYMBOL_KASAN(memmove)
 249 
 250 generic_memcpy:
 251         srwi.   r7,r5,3
 252         addi    r6,r3,-4
 253         addi    r4,r4,-4
 254         beq     2f                      /* if less than 8 bytes to do */
 255         andi.   r0,r6,3                 /* get dest word aligned */
 256         mtctr   r7
 257         bne     5f
 258 1:      lwz     r7,4(r4)
 259         lwzu    r8,8(r4)
 260         stw     r7,4(r6)
 261         stwu    r8,8(r6)
 262         bdnz    1b
 263         andi.   r5,r5,7
 264 2:      cmplwi  0,r5,4
 265         blt     3f
 266         lwzu    r0,4(r4)
 267         addi    r5,r5,-4
 268         stwu    r0,4(r6)
 269 3:      cmpwi   0,r5,0
 270         beqlr
 271         mtctr   r5
 272         addi    r4,r4,3
 273         addi    r6,r6,3
 274 4:      lbzu    r0,1(r4)
 275         stbu    r0,1(r6)
 276         bdnz    4b
 277         blr
 278 5:      subfic  r0,r0,4
 279         mtctr   r0
 280 6:      lbz     r7,4(r4)
 281         addi    r4,r4,1
 282         stb     r7,4(r6)
 283         addi    r6,r6,1
 284         bdnz    6b
 285         subf    r5,r0,r5
 286         rlwinm. r7,r5,32-3,3,31
 287         beq     2b
 288         mtctr   r7
 289         b       1b
 290 
 291 _GLOBAL(backwards_memcpy)
 292         rlwinm. r7,r5,32-3,3,31         /* r0 = r5 >> 3 */
 293         add     r6,r3,r5
 294         add     r4,r4,r5
 295         beq     2f
 296         andi.   r0,r6,3
 297         mtctr   r7
 298         bne     5f
 299 1:      lwz     r7,-4(r4)
 300         lwzu    r8,-8(r4)
 301         stw     r7,-4(r6)
 302         stwu    r8,-8(r6)
 303         bdnz    1b
 304         andi.   r5,r5,7
 305 2:      cmplwi  0,r5,4
 306         blt     3f
 307         lwzu    r0,-4(r4)
 308         subi    r5,r5,4
 309         stwu    r0,-4(r6)
 310 3:      cmpwi   0,r5,0
 311         beqlr
 312         mtctr   r5
 313 4:      lbzu    r0,-1(r4)
 314         stbu    r0,-1(r6)
 315         bdnz    4b
 316         blr
 317 5:      mtctr   r0
 318 6:      lbzu    r7,-1(r4)
 319         stbu    r7,-1(r6)
 320         bdnz    6b
 321         subf    r5,r0,r5
 322         rlwinm. r7,r5,32-3,3,31
 323         beq     2b
 324         mtctr   r7
 325         b       1b
 326 
 327 _GLOBAL(__copy_tofrom_user)
 328         addi    r4,r4,-4
 329         addi    r6,r3,-4
 330         neg     r0,r3
 331         andi.   r0,r0,CACHELINE_MASK    /* # bytes to start of cache line */
 332         beq     58f
 333 
 334         cmplw   0,r5,r0                 /* is this more than total to do? */
 335         blt     63f                     /* if not much to do */
 336         andi.   r8,r0,3                 /* get it word-aligned first */
 337         mtctr   r8
 338         beq+    61f
 339 70:     lbz     r9,4(r4)                /* do some bytes */
 340 71:     stb     r9,4(r6)
 341         addi    r4,r4,1
 342         addi    r6,r6,1
 343         bdnz    70b
 344 61:     subf    r5,r0,r5
 345         srwi.   r0,r0,2
 346         mtctr   r0
 347         beq     58f
 348 72:     lwzu    r9,4(r4)                /* do some words */
 349 73:     stwu    r9,4(r6)
 350         bdnz    72b
 351 
 352         EX_TABLE(70b,100f)
 353         EX_TABLE(71b,101f)
 354         EX_TABLE(72b,102f)
 355         EX_TABLE(73b,103f)
 356 
 357 58:     srwi.   r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
 358         clrlwi  r5,r5,32-LG_CACHELINE_BYTES
 359         li      r11,4
 360         beq     63f
 361 
 362         /* Here we decide how far ahead to prefetch the source */
 363         li      r3,4
 364         cmpwi   r0,1
 365         li      r7,0
 366         ble     114f
 367         li      r7,1
 368 #if MAX_COPY_PREFETCH > 1
 369         /* Heuristically, for large transfers we prefetch
 370            MAX_COPY_PREFETCH cachelines ahead.  For small transfers
 371            we prefetch 1 cacheline ahead. */
 372         cmpwi   r0,MAX_COPY_PREFETCH
 373         ble     112f
 374         li      r7,MAX_COPY_PREFETCH
 375 112:    mtctr   r7
 376 111:    dcbt    r3,r4
 377         addi    r3,r3,CACHELINE_BYTES
 378         bdnz    111b
 379 #else
 380         dcbt    r3,r4
 381         addi    r3,r3,CACHELINE_BYTES
 382 #endif /* MAX_COPY_PREFETCH > 1 */
 383 
 384 114:    subf    r8,r7,r0
 385         mr      r0,r7
 386         mtctr   r8
 387 
 388 53:     dcbt    r3,r4
 389 54:     dcbz    r11,r6
 390         EX_TABLE(54b,105f)
 391 /* the main body of the cacheline loop */
 392         COPY_16_BYTES_WITHEX(0)
 393 #if L1_CACHE_BYTES >= 32
 394         COPY_16_BYTES_WITHEX(1)
 395 #if L1_CACHE_BYTES >= 64
 396         COPY_16_BYTES_WITHEX(2)
 397         COPY_16_BYTES_WITHEX(3)
 398 #if L1_CACHE_BYTES >= 128
 399         COPY_16_BYTES_WITHEX(4)
 400         COPY_16_BYTES_WITHEX(5)
 401         COPY_16_BYTES_WITHEX(6)
 402         COPY_16_BYTES_WITHEX(7)
 403 #endif
 404 #endif
 405 #endif
 406         bdnz    53b
 407         cmpwi   r0,0
 408         li      r3,4
 409         li      r7,0
 410         bne     114b
 411 
 412 63:     srwi.   r0,r5,2
 413         mtctr   r0
 414         beq     64f
 415 30:     lwzu    r0,4(r4)
 416 31:     stwu    r0,4(r6)
 417         bdnz    30b
 418 
 419 64:     andi.   r0,r5,3
 420         mtctr   r0
 421         beq+    65f
 422 40:     lbz     r0,4(r4)
 423 41:     stb     r0,4(r6)
 424         addi    r4,r4,1
 425         addi    r6,r6,1
 426         bdnz    40b
 427 65:     li      r3,0
 428         blr
 429 
 430 /* read fault, initial single-byte copy */
 431 100:    li      r9,0
 432         b       90f
 433 /* write fault, initial single-byte copy */
 434 101:    li      r9,1
 435 90:     subf    r5,r8,r5
 436         li      r3,0
 437         b       99f
 438 /* read fault, initial word copy */
 439 102:    li      r9,0
 440         b       91f
 441 /* write fault, initial word copy */
 442 103:    li      r9,1
 443 91:     li      r3,2
 444         b       99f
 445 
 446 /*
 447  * this stuff handles faults in the cacheline loop and branches to either
 448  * 104f (if in read part) or 105f (if in write part), after updating r5
 449  */
 450         COPY_16_BYTES_EXCODE(0)
 451 #if L1_CACHE_BYTES >= 32
 452         COPY_16_BYTES_EXCODE(1)
 453 #if L1_CACHE_BYTES >= 64
 454         COPY_16_BYTES_EXCODE(2)
 455         COPY_16_BYTES_EXCODE(3)
 456 #if L1_CACHE_BYTES >= 128
 457         COPY_16_BYTES_EXCODE(4)
 458         COPY_16_BYTES_EXCODE(5)
 459         COPY_16_BYTES_EXCODE(6)
 460         COPY_16_BYTES_EXCODE(7)
 461 #endif
 462 #endif
 463 #endif
 464 
 465 /* read fault in cacheline loop */
 466 104:    li      r9,0
 467         b       92f
 468 /* fault on dcbz (effectively a write fault) */
 469 /* or write fault in cacheline loop */
 470 105:    li      r9,1
 471 92:     li      r3,LG_CACHELINE_BYTES
 472         mfctr   r8
 473         add     r0,r0,r8
 474         b       106f
 475 /* read fault in final word loop */
 476 108:    li      r9,0
 477         b       93f
 478 /* write fault in final word loop */
 479 109:    li      r9,1
 480 93:     andi.   r5,r5,3
 481         li      r3,2
 482         b       99f
 483 /* read fault in final byte loop */
 484 110:    li      r9,0
 485         b       94f
 486 /* write fault in final byte loop */
 487 111:    li      r9,1
 488 94:     li      r5,0
 489         li      r3,0
 490 /*
 491  * At this stage the number of bytes not copied is
 492  * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
 493  */
 494 99:     mfctr   r0
 495 106:    slw     r3,r0,r3
 496         add.    r3,r3,r5
 497         beq     120f                    /* shouldn't happen */
 498         cmpwi   0,r9,0
 499         bne     120f
 500 /* for a read fault, first try to continue the copy one byte at a time */
 501         mtctr   r3
 502 130:    lbz     r0,4(r4)
 503 131:    stb     r0,4(r6)
 504         addi    r4,r4,1
 505         addi    r6,r6,1
 506         bdnz    130b
 507 /* then clear out the destination: r3 bytes starting at 4(r6) */
 508 132:    mfctr   r3
 509 120:    blr
 510 
 511         EX_TABLE(30b,108b)
 512         EX_TABLE(31b,109b)
 513         EX_TABLE(40b,110b)
 514         EX_TABLE(41b,111b)
 515         EX_TABLE(130b,132b)
 516         EX_TABLE(131b,120b)
 517 
 518 EXPORT_SYMBOL(__copy_tofrom_user)

/* [<][>][^][v][top][bottom][index][help] */