
/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0-or-later */
   2 /*
   3  * Memory copy functions for 32-bit PowerPC.
   4  *
   5  * Copyright (C) 1996-2005 Paul Mackerras.
   6  */
   7 #include <asm/processor.h>
   8 #include <asm/cache.h>
   9 #include <asm/errno.h>
  10 #include <asm/ppc_asm.h>
  11 #include <asm/export.h>
  12 #include <asm/code-patching-asm.h>
  13 #include <asm/kasan.h>
  15 #define COPY_16_BYTES           \
  16         lwz     r7,4(r4);       \
  17         lwz     r8,8(r4);       \
  18         lwz     r9,12(r4);      \
  19         lwzu    r10,16(r4);     \
  20         stw     r7,4(r6);       \
  21         stw     r8,8(r6);       \
  22         stw     r9,12(r6);      \
  23         stwu    r10,16(r6)
  25 #define COPY_16_BYTES_WITHEX(n) \
  26 8 ## n ## 0:                    \
  27         lwz     r7,4(r4);       \
  28 8 ## n ## 1:                    \
  29         lwz     r8,8(r4);       \
  30 8 ## n ## 2:                    \
  31         lwz     r9,12(r4);      \
  32 8 ## n ## 3:                    \
  33         lwzu    r10,16(r4);     \
  34 8 ## n ## 4:                    \
  35         stw     r7,4(r6);       \
  36 8 ## n ## 5:                    \
  37         stw     r8,8(r6);       \
  38 8 ## n ## 6:                    \
  39         stw     r9,12(r6);      \
  40 8 ## n ## 7:                    \
  41         stwu    r10,16(r6)
  43 #define COPY_16_BYTES_EXCODE(n)                 \
  44 9 ## n ## 0:                                    \
  45         addi    r5,r5,-(16 * n);                \
  46         b       104f;                           \
  47 9 ## n ## 1:                                    \
  48         addi    r5,r5,-(16 * n);                \
  49         b       105f;                           \
  50         EX_TABLE(8 ## n ## 0b,9 ## n ## 0b);    \
  51         EX_TABLE(8 ## n ## 1b,9 ## n ## 0b);    \
  52         EX_TABLE(8 ## n ## 2b,9 ## n ## 0b);    \
  53         EX_TABLE(8 ## n ## 3b,9 ## n ## 0b);    \
  54         EX_TABLE(8 ## n ## 4b,9 ## n ## 1b);    \
  55         EX_TABLE(8 ## n ## 5b,9 ## n ## 1b);    \
  56         EX_TABLE(8 ## n ## 6b,9 ## n ## 1b);    \
  57         EX_TABLE(8 ## n ## 7b,9 ## n ## 1b)
  59         .text
  60         .stabs  "arch/powerpc/lib/",N_SO,0,0,0f
  61         .stabs  "copy_32.S",N_SO,0,0,0f
  62 0:
  68 #ifndef CONFIG_KASAN
  69 _GLOBAL(memset16)
  70         rlwinm. r0 ,r5, 31, 1, 31
  71         addi    r6, r3, -4
  72         beq-    2f
  73         rlwimi  r4 ,r4 ,16 ,0 ,15
  74         mtctr   r0
  75 1:      stwu    r4, 4(r6)
  76         bdnz    1b
  77 2:      andi.   r0, r5, 1
  78         beqlr
  79         sth     r4, 4(r6)
  80         blr
  81 EXPORT_SYMBOL(memset16)
  82 #endif
  84 /*
  85  * Use dcbz on the complete cache lines in the destination
  86  * to set them to zero.  This requires that the destination
  87  * area is cacheable.  -- paulus
  88  *
  89  * During early init, cache might not be active yet, so dcbz cannot be used.
  90  * We therefore skip the optimised bloc that uses dcbz. This jump is
  91  * replaced by a nop once cache is active. This is done in machine_init()
  92  */
  93 _GLOBAL_KASAN(memset)
  94         cmplwi  0,r5,4
  95         blt     7f
  97         rlwimi  r4,r4,8,16,23
  98         rlwimi  r4,r4,16,0,15
 100         stw     r4,0(r3)
 101         beqlr
 102         andi.   r0,r3,3
 103         add     r5,r0,r5
 104         subf    r6,r0,r3
 105         cmplwi  0,r4,0
 106         /*
 107          * Skip optimised bloc until cache is enabled. Will be replaced
 108          * by 'bne' during boot to use normal procedure if r4 is not zero
 109          */
 110 5:      b       2f
 111         patch_site      5b, patch__memset_nocache
 113         clrlwi  r7,r6,32-LG_CACHELINE_BYTES
 114         add     r8,r7,r5
 115         srwi    r9,r8,LG_CACHELINE_BYTES
 116         addic.  r9,r9,-1        /* total number of complete cachelines */
 117         ble     2f
 118         xori    r0,r7,CACHELINE_MASK & ~3
 119         srwi.   r0,r0,2
 120         beq     3f
 121         mtctr   r0
 122 4:      stwu    r4,4(r6)
 123         bdnz    4b
 124 3:      mtctr   r9
 125         li      r7,4
 126 10:     dcbz    r7,r6
 127         addi    r6,r6,CACHELINE_BYTES
 128         bdnz    10b
 129         clrlwi  r5,r8,32-LG_CACHELINE_BYTES
 130         addi    r5,r5,4
 132 2:      srwi    r0,r5,2
 133         mtctr   r0
 134         bdz     6f
 135 1:      stwu    r4,4(r6)
 136         bdnz    1b
 137 6:      andi.   r5,r5,3
 138         beqlr
 139         mtctr   r5
 140         addi    r6,r6,3
 141 8:      stbu    r4,1(r6)
 142         bdnz    8b
 143         blr
 145 7:      cmpwi   0,r5,0
 146         beqlr
 147         mtctr   r5
 148         addi    r6,r3,-1
 149 9:      stbu    r4,1(r6)
 150         bdnz    9b
 151         blr
 152 EXPORT_SYMBOL(memset)
 155 /*
 156  * This version uses dcbz on the complete cache lines in the
 157  * destination area to reduce memory traffic.  This requires that
 158  * the destination area is cacheable.
 159  * We only use this version if the source and dest don't overlap.
 160  * -- paulus.
 161  *
 162  * During early init, cache might not be active yet, so dcbz cannot be used.
 163  * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is
 164  * replaced by a nop once cache is active. This is done in machine_init()
 165  */
 166 _GLOBAL_KASAN(memmove)
 167         cmplw   0,r3,r4
 168         bgt     backwards_memcpy
 169         /* fall through */
 171 _GLOBAL_KASAN(memcpy)
 172 1:      b       generic_memcpy
 173         patch_site      1b, patch__memcpy_nocache
 175         add     r7,r3,r5                /* test if the src & dst overlap */
 176         add     r8,r4,r5
 177         cmplw   0,r4,r7
 178         cmplw   1,r3,r8
 179         crand   0,0,4                   /* &= */
 180         blt     generic_memcpy          /* if regions overlap */
 182         addi    r4,r4,-4
 183         addi    r6,r3,-4
 184         neg     r0,r3
 185         andi.   r0,r0,CACHELINE_MASK    /* # bytes to start of cache line */
 186         beq     58f
 188         cmplw   0,r5,r0                 /* is this more than total to do? */
 189         blt     63f                     /* if not much to do */
 190         andi.   r8,r0,3                 /* get it word-aligned first */
 191         subf    r5,r0,r5
 192         mtctr   r8
 193         beq+    61f
 194 70:     lbz     r9,4(r4)                /* do some bytes */
 195         addi    r4,r4,1
 196         addi    r6,r6,1
 197         stb     r9,3(r6)
 198         bdnz    70b
 199 61:     srwi.   r0,r0,2
 200         mtctr   r0
 201         beq     58f
 202 72:     lwzu    r9,4(r4)                /* do some words */
 203         stwu    r9,4(r6)
 204         bdnz    72b
 206 58:     srwi.   r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
 207         clrlwi  r5,r5,32-LG_CACHELINE_BYTES
 208         li      r11,4
 209         mtctr   r0
 210         beq     63f
 211 53:
 212         dcbz    r11,r6
 213         COPY_16_BYTES
 214 #if L1_CACHE_BYTES >= 32
 215         COPY_16_BYTES
 216 #if L1_CACHE_BYTES >= 64
 217         COPY_16_BYTES
 218         COPY_16_BYTES
 219 #if L1_CACHE_BYTES >= 128
 220         COPY_16_BYTES
 221         COPY_16_BYTES
 222         COPY_16_BYTES
 223         COPY_16_BYTES
 224 #endif
 225 #endif
 226 #endif
 227         bdnz    53b
 229 63:     srwi.   r0,r5,2
 230         mtctr   r0
 231         beq     64f
 232 30:     lwzu    r0,4(r4)
 233         stwu    r0,4(r6)
 234         bdnz    30b
 236 64:     andi.   r0,r5,3
 237         mtctr   r0
 238         beq+    65f
 239         addi    r4,r4,3
 240         addi    r6,r6,3
 241 40:     lbzu    r0,1(r4)
 242         stbu    r0,1(r6)
 243         bdnz    40b
 244 65:     blr
 245 EXPORT_SYMBOL(memcpy)
 246 EXPORT_SYMBOL(memmove)
 250 generic_memcpy:
 251         srwi.   r7,r5,3
 252         addi    r6,r3,-4
 253         addi    r4,r4,-4
 254         beq     2f                      /* if less than 8 bytes to do */
 255         andi.   r0,r6,3                 /* get dest word aligned */
 256         mtctr   r7
 257         bne     5f
 258 1:      lwz     r7,4(r4)
 259         lwzu    r8,8(r4)
 260         stw     r7,4(r6)
 261         stwu    r8,8(r6)
 262         bdnz    1b
 263         andi.   r5,r5,7
 264 2:      cmplwi  0,r5,4
 265         blt     3f
 266         lwzu    r0,4(r4)
 267         addi    r5,r5,-4
 268         stwu    r0,4(r6)
 269 3:      cmpwi   0,r5,0
 270         beqlr
 271         mtctr   r5
 272         addi    r4,r4,3
 273         addi    r6,r6,3
 274 4:      lbzu    r0,1(r4)
 275         stbu    r0,1(r6)
 276         bdnz    4b
 277         blr
 278 5:      subfic  r0,r0,4
 279         mtctr   r0
 280 6:      lbz     r7,4(r4)
 281         addi    r4,r4,1
 282         stb     r7,4(r6)
 283         addi    r6,r6,1
 284         bdnz    6b
 285         subf    r5,r0,r5
 286         rlwinm. r7,r5,32-3,3,31
 287         beq     2b
 288         mtctr   r7
 289         b       1b
 291 _GLOBAL(backwards_memcpy)
 292         rlwinm. r7,r5,32-3,3,31         /* r0 = r5 >> 3 */
 293         add     r6,r3,r5
 294         add     r4,r4,r5
 295         beq     2f
 296         andi.   r0,r6,3
 297         mtctr   r7
 298         bne     5f
 299 1:      lwz     r7,-4(r4)
 300         lwzu    r8,-8(r4)
 301         stw     r7,-4(r6)
 302         stwu    r8,-8(r6)
 303         bdnz    1b
 304         andi.   r5,r5,7
 305 2:      cmplwi  0,r5,4
 306         blt     3f
 307         lwzu    r0,-4(r4)
 308         subi    r5,r5,4
 309         stwu    r0,-4(r6)
 310 3:      cmpwi   0,r5,0
 311         beqlr
 312         mtctr   r5
 313 4:      lbzu    r0,-1(r4)
 314         stbu    r0,-1(r6)
 315         bdnz    4b
 316         blr
 317 5:      mtctr   r0
 318 6:      lbzu    r7,-1(r4)
 319         stbu    r7,-1(r6)
 320         bdnz    6b
 321         subf    r5,r0,r5
 322         rlwinm. r7,r5,32-3,3,31
 323         beq     2b
 324         mtctr   r7
 325         b       1b
 327 _GLOBAL(__copy_tofrom_user)
 328         addi    r4,r4,-4
 329         addi    r6,r3,-4
 330         neg     r0,r3
 331         andi.   r0,r0,CACHELINE_MASK    /* # bytes to start of cache line */
 332         beq     58f
 334         cmplw   0,r5,r0                 /* is this more than total to do? */
 335         blt     63f                     /* if not much to do */
 336         andi.   r8,r0,3                 /* get it word-aligned first */
 337         mtctr   r8
 338         beq+    61f
 339 70:     lbz     r9,4(r4)                /* do some bytes */
 340 71:     stb     r9,4(r6)
 341         addi    r4,r4,1
 342         addi    r6,r6,1
 343         bdnz    70b
 344 61:     subf    r5,r0,r5
 345         srwi.   r0,r0,2
 346         mtctr   r0
 347         beq     58f
 348 72:     lwzu    r9,4(r4)                /* do some words */
 349 73:     stwu    r9,4(r6)
 350         bdnz    72b
 352         EX_TABLE(70b,100f)
 353         EX_TABLE(71b,101f)
 354         EX_TABLE(72b,102f)
 355         EX_TABLE(73b,103f)
 357 58:     srwi.   r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
 358         clrlwi  r5,r5,32-LG_CACHELINE_BYTES
 359         li      r11,4
 360         beq     63f
 362         /* Here we decide how far ahead to prefetch the source */
 363         li      r3,4
 364         cmpwi   r0,1
 365         li      r7,0
 366         ble     114f
 367         li      r7,1
 368 #if MAX_COPY_PREFETCH > 1
 369         /* Heuristically, for large transfers we prefetch
 370            MAX_COPY_PREFETCH cachelines ahead.  For small transfers
 371            we prefetch 1 cacheline ahead. */
 372         cmpwi   r0,MAX_COPY_PREFETCH
 373         ble     112f
 374         li      r7,MAX_COPY_PREFETCH
 375 112:    mtctr   r7
 376 111:    dcbt    r3,r4
 377         addi    r3,r3,CACHELINE_BYTES
 378         bdnz    111b
 379 #else
 380         dcbt    r3,r4
 381         addi    r3,r3,CACHELINE_BYTES
 382 #endif /* MAX_COPY_PREFETCH > 1 */
 384 114:    subf    r8,r7,r0
 385         mr      r0,r7
 386         mtctr   r8
 388 53:     dcbt    r3,r4
 389 54:     dcbz    r11,r6
 390         EX_TABLE(54b,105f)
 391 /* the main body of the cacheline loop */
 392         COPY_16_BYTES_WITHEX(0)
 393 #if L1_CACHE_BYTES >= 32
 394         COPY_16_BYTES_WITHEX(1)
 395 #if L1_CACHE_BYTES >= 64
 396         COPY_16_BYTES_WITHEX(2)
 397         COPY_16_BYTES_WITHEX(3)
 398 #if L1_CACHE_BYTES >= 128
 399         COPY_16_BYTES_WITHEX(4)
 400         COPY_16_BYTES_WITHEX(5)
 401         COPY_16_BYTES_WITHEX(6)
 402         COPY_16_BYTES_WITHEX(7)
 403 #endif
 404 #endif
 405 #endif
 406         bdnz    53b
 407         cmpwi   r0,0
 408         li      r3,4
 409         li      r7,0
 410         bne     114b
 412 63:     srwi.   r0,r5,2
 413         mtctr   r0
 414         beq     64f
 415 30:     lwzu    r0,4(r4)
 416 31:     stwu    r0,4(r6)
 417         bdnz    30b
 419 64:     andi.   r0,r5,3
 420         mtctr   r0
 421         beq+    65f
 422 40:     lbz     r0,4(r4)
 423 41:     stb     r0,4(r6)
 424         addi    r4,r4,1
 425         addi    r6,r6,1
 426         bdnz    40b
 427 65:     li      r3,0
 428         blr
 430 /* read fault, initial single-byte copy */
 431 100:    li      r9,0
 432         b       90f
 433 /* write fault, initial single-byte copy */
 434 101:    li      r9,1
 435 90:     subf    r5,r8,r5
 436         li      r3,0
 437         b       99f
 438 /* read fault, initial word copy */
 439 102:    li      r9,0
 440         b       91f
 441 /* write fault, initial word copy */
 442 103:    li      r9,1
 443 91:     li      r3,2
 444         b       99f
 446 /*
 447  * this stuff handles faults in the cacheline loop and branches to either
 448  * 104f (if in read part) or 105f (if in write part), after updating r5
 449  */
 450         COPY_16_BYTES_EXCODE(0)
 451 #if L1_CACHE_BYTES >= 32
 452         COPY_16_BYTES_EXCODE(1)
 453 #if L1_CACHE_BYTES >= 64
 454         COPY_16_BYTES_EXCODE(2)
 455         COPY_16_BYTES_EXCODE(3)
 456 #if L1_CACHE_BYTES >= 128
 457         COPY_16_BYTES_EXCODE(4)
 458         COPY_16_BYTES_EXCODE(5)
 459         COPY_16_BYTES_EXCODE(6)
 460         COPY_16_BYTES_EXCODE(7)
 461 #endif
 462 #endif
 463 #endif
 465 /* read fault in cacheline loop */
 466 104:    li      r9,0
 467         b       92f
 468 /* fault on dcbz (effectively a write fault) */
 469 /* or write fault in cacheline loop */
 470 105:    li      r9,1
 471 92:     li      r3,LG_CACHELINE_BYTES
 472         mfctr   r8
 473         add     r0,r0,r8
 474         b       106f
 475 /* read fault in final word loop */
 476 108:    li      r9,0
 477         b       93f
 478 /* write fault in final word loop */
 479 109:    li      r9,1
 480 93:     andi.   r5,r5,3
 481         li      r3,2
 482         b       99f
 483 /* read fault in final byte loop */
 484 110:    li      r9,0
 485         b       94f
 486 /* write fault in final byte loop */
 487 111:    li      r9,1
 488 94:     li      r5,0
 489         li      r3,0
 490 /*
 491  * At this stage the number of bytes not copied is
 492  * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
 493  */
 494 99:     mfctr   r0
 495 106:    slw     r3,r0,r3
 496         add.    r3,r3,r5
 497         beq     120f                    /* shouldn't happen */
 498         cmpwi   0,r9,0
 499         bne     120f
 500 /* for a read fault, first try to continue the copy one byte at a time */
 501         mtctr   r3
 502 130:    lbz     r0,4(r4)
 503 131:    stb     r0,4(r6)
 504         addi    r4,r4,1
 505         addi    r6,r6,1
 506         bdnz    130b
 507 /* then clear out the destination: r3 bytes starting at 4(r6) */
 508 132:    mfctr   r3
 509 120:    blr
 511         EX_TABLE(30b,108b)
 512         EX_TABLE(31b,109b)
 513         EX_TABLE(40b,110b)
 514         EX_TABLE(41b,111b)
 515         EX_TABLE(130b,132b)
 516         EX_TABLE(131b,120b)
 518 EXPORT_SYMBOL(__copy_tofrom_user)

/* [<][>][^][v][top][bottom][index][help] */