root/arch/powerpc/lib/memcpy_64.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0-or-later */
   2 /*
   3  * Copyright (C) 2002 Paul Mackerras, IBM Corp.
   4  */
   5 #include <asm/processor.h>
   6 #include <asm/ppc_asm.h>
   7 #include <asm/export.h>
   8 #include <asm/asm-compat.h>
   9 #include <asm/feature-fixups.h>
  10 #include <asm/kasan.h>
  11 
  12 #ifndef SELFTEST_CASE
  13 /* For big-endian, 0 == most CPUs, 1 == POWER6, 2 == Cell */
  14 #define SELFTEST_CASE   0
  15 #endif
  16 
  17         .align  7
  18 _GLOBAL_TOC_KASAN(memcpy)
  19 BEGIN_FTR_SECTION
  20 #ifdef __LITTLE_ENDIAN__
  21         cmpdi   cr7,r5,0
  22 #else
  23         std     r3,-STACKFRAMESIZE+STK_REG(R31)(r1)     /* save destination pointer for return value */
  24 #endif
  25 FTR_SECTION_ELSE
  26 #ifdef CONFIG_PPC_BOOK3S_64
  27         b       memcpy_power7
  28 #endif
  29 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
  30 #ifdef __LITTLE_ENDIAN__
  31         /* dumb little-endian memcpy that will get replaced at runtime */
  32         addi r9,r3,-1
  33         addi r4,r4,-1
  34         beqlr cr7
  35         mtctr r5
  36 1:      lbzu r10,1(r4)
  37         stbu r10,1(r9)
  38         bdnz 1b
  39         blr
  40 #else
  41         PPC_MTOCRF(0x01,r5)
  42         cmpldi  cr1,r5,16
  43         neg     r6,r3           # LS 3 bits = # bytes to 8-byte dest bdry
  44         andi.   r6,r6,7
  45         dcbt    0,r4
  46         blt     cr1,.Lshort_copy
  47 /* Below we want to nop out the bne if we're on a CPU that has the
  48    CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
  49    cleared.
  50    At the time of writing the only CPU that has this combination of bits
  51    set is Power6. */
  52 test_feature = (SELFTEST_CASE == 1)
  53 BEGIN_FTR_SECTION
  54         nop
  55 FTR_SECTION_ELSE
  56         bne     .Ldst_unaligned
  57 ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
  58                     CPU_FTR_UNALIGNED_LD_STD)
  59 .Ldst_aligned:
  60         addi    r3,r3,-16
  61 test_feature = (SELFTEST_CASE == 0)
  62 BEGIN_FTR_SECTION
  63         andi.   r0,r4,7
  64         bne     .Lsrc_unaligned
  65 END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
  66         srdi    r7,r5,4
  67         ld      r9,0(r4)
  68         addi    r4,r4,-8
  69         mtctr   r7
  70         andi.   r5,r5,7
  71         bf      cr7*4+0,2f
  72         addi    r3,r3,8
  73         addi    r4,r4,8
  74         mr      r8,r9
  75         blt     cr1,3f
  76 1:      ld      r9,8(r4)
  77         std     r8,8(r3)
  78 2:      ldu     r8,16(r4)
  79         stdu    r9,16(r3)
  80         bdnz    1b
  81 3:      std     r8,8(r3)
  82         beq     3f
  83         addi    r3,r3,16
  84 .Ldo_tail:
  85         bf      cr7*4+1,1f
  86         lwz     r9,8(r4)
  87         addi    r4,r4,4
  88         stw     r9,0(r3)
  89         addi    r3,r3,4
  90 1:      bf      cr7*4+2,2f
  91         lhz     r9,8(r4)
  92         addi    r4,r4,2
  93         sth     r9,0(r3)
  94         addi    r3,r3,2
  95 2:      bf      cr7*4+3,3f
  96         lbz     r9,8(r4)
  97         stb     r9,0(r3)
  98 3:      ld      r3,-STACKFRAMESIZE+STK_REG(R31)(r1)     /* return dest pointer */
  99         blr
 100 
 101 .Lsrc_unaligned:
 102         srdi    r6,r5,3
 103         addi    r5,r5,-16
 104         subf    r4,r0,r4
 105         srdi    r7,r5,4
 106         sldi    r10,r0,3
 107         cmpdi   cr6,r6,3
 108         andi.   r5,r5,7
 109         mtctr   r7
 110         subfic  r11,r10,64
 111         add     r5,r5,r0
 112 
 113         bt      cr7*4+0,0f
 114 
 115         ld      r9,0(r4)        # 3+2n loads, 2+2n stores
 116         ld      r0,8(r4)
 117         sld     r6,r9,r10
 118         ldu     r9,16(r4)
 119         srd     r7,r0,r11
 120         sld     r8,r0,r10
 121         or      r7,r7,r6
 122         blt     cr6,4f
 123         ld      r0,8(r4)
 124         # s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
 125         b       2f
 126 
 127 0:      ld      r0,0(r4)        # 4+2n loads, 3+2n stores
 128         ldu     r9,8(r4)
 129         sld     r8,r0,r10
 130         addi    r3,r3,-8
 131         blt     cr6,5f
 132         ld      r0,8(r4)
 133         srd     r12,r9,r11
 134         sld     r6,r9,r10
 135         ldu     r9,16(r4)
 136         or      r12,r8,r12
 137         srd     r7,r0,r11
 138         sld     r8,r0,r10
 139         addi    r3,r3,16
 140         beq     cr6,3f
 141 
 142         # d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
 143 1:      or      r7,r7,r6
 144         ld      r0,8(r4)
 145         std     r12,8(r3)
 146 2:      srd     r12,r9,r11
 147         sld     r6,r9,r10
 148         ldu     r9,16(r4)
 149         or      r12,r8,r12
 150         stdu    r7,16(r3)
 151         srd     r7,r0,r11
 152         sld     r8,r0,r10
 153         bdnz    1b
 154 
 155 3:      std     r12,8(r3)
 156         or      r7,r7,r6
 157 4:      std     r7,16(r3)
 158 5:      srd     r12,r9,r11
 159         or      r12,r8,r12
 160         std     r12,24(r3)
 161         beq     4f
 162         cmpwi   cr1,r5,8
 163         addi    r3,r3,32
 164         sld     r9,r9,r10
 165         ble     cr1,6f
 166         ld      r0,8(r4)
 167         srd     r7,r0,r11
 168         or      r9,r7,r9
 169 6:
 170         bf      cr7*4+1,1f
 171         rotldi  r9,r9,32
 172         stw     r9,0(r3)
 173         addi    r3,r3,4
 174 1:      bf      cr7*4+2,2f
 175         rotldi  r9,r9,16
 176         sth     r9,0(r3)
 177         addi    r3,r3,2
 178 2:      bf      cr7*4+3,3f
 179         rotldi  r9,r9,8
 180         stb     r9,0(r3)
 181 3:      ld      r3,-STACKFRAMESIZE+STK_REG(R31)(r1)     /* return dest pointer */
 182         blr
 183 
 184 .Ldst_unaligned:
 185         PPC_MTOCRF(0x01,r6)             # put #bytes to 8B bdry into cr7
 186         subf    r5,r6,r5
 187         li      r7,0
 188         cmpldi  cr1,r5,16
 189         bf      cr7*4+3,1f
 190         lbz     r0,0(r4)
 191         stb     r0,0(r3)
 192         addi    r7,r7,1
 193 1:      bf      cr7*4+2,2f
 194         lhzx    r0,r7,r4
 195         sthx    r0,r7,r3
 196         addi    r7,r7,2
 197 2:      bf      cr7*4+1,3f
 198         lwzx    r0,r7,r4
 199         stwx    r0,r7,r3
 200 3:      PPC_MTOCRF(0x01,r5)
 201         add     r4,r6,r4
 202         add     r3,r6,r3
 203         b       .Ldst_aligned
 204 
 205 .Lshort_copy:
 206         bf      cr7*4+0,1f
 207         lwz     r0,0(r4)
 208         lwz     r9,4(r4)
 209         addi    r4,r4,8
 210         stw     r0,0(r3)
 211         stw     r9,4(r3)
 212         addi    r3,r3,8
 213 1:      bf      cr7*4+1,2f
 214         lwz     r0,0(r4)
 215         addi    r4,r4,4
 216         stw     r0,0(r3)
 217         addi    r3,r3,4
 218 2:      bf      cr7*4+2,3f
 219         lhz     r0,0(r4)
 220         addi    r4,r4,2
 221         sth     r0,0(r3)
 222         addi    r3,r3,2
 223 3:      bf      cr7*4+3,4f
 224         lbz     r0,0(r4)
 225         stb     r0,0(r3)
 226 4:      ld      r3,-STACKFRAMESIZE+STK_REG(R31)(r1)     /* return dest pointer */
 227         blr
 228 #endif
 229 EXPORT_SYMBOL(memcpy)
 230 EXPORT_SYMBOL_KASAN(memcpy)

/* [<][>][^][v][top][bottom][index][help] */