root/arch/sparc/lib/NGmemcpy.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0 */
   2 /* NGmemcpy.S: Niagara optimized memcpy.
   3  *
   4  * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
   5  */
   6 
   7 #ifdef __KERNEL__
   8 #include <linux/linkage.h>
   9 #include <asm/asi.h>
  10 #include <asm/thread_info.h>
  11 #define GLOBAL_SPARE    %g7
  12 #define RESTORE_ASI(TMP)        \
  13         ldub    [%g6 + TI_CURRENT_DS], TMP;  \
  14         wr      TMP, 0x0, %asi;
  15 #else
  16 #define GLOBAL_SPARE    %g5
  17 #define RESTORE_ASI(TMP)        \
  18         wr      %g0, ASI_PNF, %asi
  19 #endif
  20 
  21 #ifdef __sparc_v9__
  22 #define SAVE_AMOUNT     128
  23 #else
  24 #define SAVE_AMOUNT     64
  25 #endif
  26 
  27 #ifndef STORE_ASI
  28 #define STORE_ASI       ASI_BLK_INIT_QUAD_LDD_P
  29 #endif
  30 
  31 #ifndef EX_LD
  32 #define EX_LD(x,y)      x
  33 #endif
  34 
  35 #ifndef EX_ST
  36 #define EX_ST(x,y)      x
  37 #endif
  38 
  39 #ifndef LOAD
  40 #ifndef MEMCPY_DEBUG
  41 #define LOAD(type,addr,dest)    type [addr], dest
  42 #else
  43 #define LOAD(type,addr,dest)    type##a [addr] 0x80, dest
  44 #endif
  45 #endif
  46 
  47 #ifndef LOAD_TWIN
  48 #define LOAD_TWIN(addr_reg,dest0,dest1) \
  49         ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_P, dest0
  50 #endif
  51 
  52 #ifndef STORE
  53 #define STORE(type,src,addr)    type src, [addr]
  54 #endif
  55 
  56 #ifndef STORE_INIT
  57 #ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA
  58 #define STORE_INIT(src,addr)    stxa src, [addr] %asi
  59 #else
  60 #define STORE_INIT(src,addr)    stx src, [addr + 0x00]
  61 #endif
  62 #endif
  63 
  64 #ifndef FUNC_NAME
  65 #define FUNC_NAME       NGmemcpy
  66 #endif
  67 
  68 #ifndef PREAMBLE
  69 #define PREAMBLE
  70 #endif
  71 
  72 #ifndef XCC
  73 #define XCC xcc
  74 #endif
  75 
  76         .register       %g2,#scratch
  77         .register       %g3,#scratch
  78 
  79         .text
  80 #ifndef EX_RETVAL
  81 #define EX_RETVAL(x)    x
  82 __restore_asi:
  83         ret
  84         wr      %g0, ASI_AIUS, %asi
  85          restore
  86 ENTRY(NG_ret_i2_plus_i4_plus_1)
  87         ba,pt   %xcc, __restore_asi
  88          add    %i2, %i5, %i0
  89 ENDPROC(NG_ret_i2_plus_i4_plus_1)
  90 ENTRY(NG_ret_i2_plus_g1)
  91         ba,pt   %xcc, __restore_asi
  92          add    %i2, %g1, %i0
  93 ENDPROC(NG_ret_i2_plus_g1)
  94 ENTRY(NG_ret_i2_plus_g1_minus_8)
  95         sub     %g1, 8, %g1
  96         ba,pt   %xcc, __restore_asi
  97          add    %i2, %g1, %i0
  98 ENDPROC(NG_ret_i2_plus_g1_minus_8)
  99 ENTRY(NG_ret_i2_plus_g1_minus_16)
 100         sub     %g1, 16, %g1
 101         ba,pt   %xcc, __restore_asi
 102          add    %i2, %g1, %i0
 103 ENDPROC(NG_ret_i2_plus_g1_minus_16)
 104 ENTRY(NG_ret_i2_plus_g1_minus_24)
 105         sub     %g1, 24, %g1
 106         ba,pt   %xcc, __restore_asi
 107          add    %i2, %g1, %i0
 108 ENDPROC(NG_ret_i2_plus_g1_minus_24)
 109 ENTRY(NG_ret_i2_plus_g1_minus_32)
 110         sub     %g1, 32, %g1
 111         ba,pt   %xcc, __restore_asi
 112          add    %i2, %g1, %i0
 113 ENDPROC(NG_ret_i2_plus_g1_minus_32)
 114 ENTRY(NG_ret_i2_plus_g1_minus_40)
 115         sub     %g1, 40, %g1
 116         ba,pt   %xcc, __restore_asi
 117          add    %i2, %g1, %i0
 118 ENDPROC(NG_ret_i2_plus_g1_minus_40)
 119 ENTRY(NG_ret_i2_plus_g1_minus_48)
 120         sub     %g1, 48, %g1
 121         ba,pt   %xcc, __restore_asi
 122          add    %i2, %g1, %i0
 123 ENDPROC(NG_ret_i2_plus_g1_minus_48)
 124 ENTRY(NG_ret_i2_plus_g1_minus_56)
 125         sub     %g1, 56, %g1
 126         ba,pt   %xcc, __restore_asi
 127          add    %i2, %g1, %i0
 128 ENDPROC(NG_ret_i2_plus_g1_minus_56)
 129 ENTRY(NG_ret_i2_plus_i4)
 130         ba,pt   %xcc, __restore_asi
 131          add    %i2, %i4, %i0
 132 ENDPROC(NG_ret_i2_plus_i4)
 133 ENTRY(NG_ret_i2_plus_i4_minus_8)
 134         sub     %i4, 8, %i4
 135         ba,pt   %xcc, __restore_asi
 136          add    %i2, %i4, %i0
 137 ENDPROC(NG_ret_i2_plus_i4_minus_8)
 138 ENTRY(NG_ret_i2_plus_8)
 139         ba,pt   %xcc, __restore_asi
 140          add    %i2, 8, %i0
 141 ENDPROC(NG_ret_i2_plus_8)
 142 ENTRY(NG_ret_i2_plus_4)
 143         ba,pt   %xcc, __restore_asi
 144          add    %i2, 4, %i0
 145 ENDPROC(NG_ret_i2_plus_4)
 146 ENTRY(NG_ret_i2_plus_1)
 147         ba,pt   %xcc, __restore_asi
 148          add    %i2, 1, %i0
 149 ENDPROC(NG_ret_i2_plus_1)
 150 ENTRY(NG_ret_i2_plus_g1_plus_1)
 151         add     %g1, 1, %g1
 152         ba,pt   %xcc, __restore_asi
 153          add    %i2, %g1, %i0
 154 ENDPROC(NG_ret_i2_plus_g1_plus_1)
 155 ENTRY(NG_ret_i2)
 156         ba,pt   %xcc, __restore_asi
 157          mov    %i2, %i0
 158 ENDPROC(NG_ret_i2)
 159 ENTRY(NG_ret_i2_and_7_plus_i4)
 160         and     %i2, 7, %i2
 161         ba,pt   %xcc, __restore_asi
 162          add    %i2, %i4, %i0
 163 ENDPROC(NG_ret_i2_and_7_plus_i4)
 164 #endif
 165 
 166         .align          64
 167 
 168         .globl  FUNC_NAME
 169         .type   FUNC_NAME,#function
 170 FUNC_NAME:      /* %i0=dst, %i1=src, %i2=len */
 171         PREAMBLE
 172         save            %sp, -SAVE_AMOUNT, %sp
 173         srlx            %i2, 31, %g2
 174         cmp             %g2, 0
 175         tne             %xcc, 5
 176         mov             %i0, %o0
 177         cmp             %i2, 0
 178         be,pn           %XCC, 85f
 179          or             %o0, %i1, %i3
 180         cmp             %i2, 16
 181         blu,a,pn        %XCC, 80f
 182          or             %i3, %i2, %i3
 183 
 184         /* 2 blocks (128 bytes) is the minimum we can do the block
 185          * copy with.  We need to ensure that we'll iterate at least
 186          * once in the block copy loop.  At worst we'll need to align
 187          * the destination to a 64-byte boundary which can chew up
 188          * to (64 - 1) bytes from the length before we perform the
 189          * block copy loop.
 190          */
 191         cmp             %i2, (2 * 64)
 192         blu,pt          %XCC, 70f
 193          andcc          %i3, 0x7, %g0
 194 
 195         /* %o0: dst
 196          * %i1: src
 197          * %i2: len  (known to be >= 128)
 198          *
 199          * The block copy loops will use %i4/%i5,%g2/%g3 as
 200          * temporaries while copying the data.
 201          */
 202 
 203         LOAD(prefetch, %i1, #one_read)
 204         wr              %g0, STORE_ASI, %asi
 205 
 206         /* Align destination on 64-byte boundary.  */
 207         andcc           %o0, (64 - 1), %i4
 208         be,pt           %XCC, 2f
 209          sub            %i4, 64, %i4
 210         sub             %g0, %i4, %i4   ! bytes to align dst
 211         sub             %i2, %i4, %i2
 212 1:      subcc           %i4, 1, %i4
 213         EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_i4_plus_1)
 214         EX_ST(STORE(stb, %g1, %o0), NG_ret_i2_plus_i4_plus_1)
 215         add             %i1, 1, %i1
 216         bne,pt          %XCC, 1b
 217         add             %o0, 1, %o0
 218 
 219         /* If the source is on a 16-byte boundary we can do
 220          * the direct block copy loop.  If it is 8-byte aligned
 221          * we can do the 16-byte loads offset by -8 bytes and the
 222          * init stores offset by one register.
 223          *
 224          * If the source is not even 8-byte aligned, we need to do
 225          * shifting and masking (basically integer faligndata).
 226          *
 227          * The careful bit with init stores is that if we store
 228          * to any part of the cache line we have to store the whole
 229          * cacheline else we can end up with corrupt L2 cache line
 230          * contents.  Since the loop works on 64-bytes of 64-byte
 231          * aligned store data at a time, this is easy to ensure.
 232          */
 233 2:
 234         andcc           %i1, (16 - 1), %i4
 235         andn            %i2, (64 - 1), %g1      ! block copy loop iterator
 236         be,pt           %XCC, 50f
 237          sub            %i2, %g1, %i2           ! final sub-block copy bytes
 238 
 239         cmp             %i4, 8
 240         be,pt           %XCC, 10f
 241          sub            %i1, %i4, %i1
 242 
 243         /* Neither 8-byte nor 16-byte aligned, shift and mask.  */
 244         and             %i4, 0x7, GLOBAL_SPARE
 245         sll             GLOBAL_SPARE, 3, GLOBAL_SPARE
 246         mov             64, %i5
 247         EX_LD(LOAD_TWIN(%i1, %g2, %g3), NG_ret_i2_plus_g1)
 248         sub             %i5, GLOBAL_SPARE, %i5
 249         mov             16, %o4
 250         mov             32, %o5
 251         mov             48, %o7
 252         mov             64, %i3
 253 
 254         bg,pn           %XCC, 9f
 255          nop
 256 
 257 #define MIX_THREE_WORDS(WORD1, WORD2, WORD3, PRE_SHIFT, POST_SHIFT, TMP) \
 258         sllx            WORD1, POST_SHIFT, WORD1; \
 259         srlx            WORD2, PRE_SHIFT, TMP; \
 260         sllx            WORD2, POST_SHIFT, WORD2; \
 261         or              WORD1, TMP, WORD1; \
 262         srlx            WORD3, PRE_SHIFT, TMP; \
 263         or              WORD2, TMP, WORD2;
 264 
 265 8:      EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
 266         MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
 267         LOAD(prefetch, %i1 + %i3, #one_read)
 268 
 269         EX_ST(STORE_INIT(%g2, %o0 + 0x00), NG_ret_i2_plus_g1)
 270         EX_ST(STORE_INIT(%g3, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
 271 
 272         EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
 273         MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
 274 
 275         EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
 276         EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
 277 
 278         EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
 279         MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
 280 
 281         EX_ST(STORE_INIT(%g2, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
 282         EX_ST(STORE_INIT(%g3, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
 283 
 284         EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
 285         add             %i1, 64, %i1
 286         MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
 287 
 288         EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
 289         EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
 290 
 291         subcc           %g1, 64, %g1
 292         bne,pt          %XCC, 8b
 293          add            %o0, 64, %o0
 294 
 295         ba,pt           %XCC, 60f
 296          add            %i1, %i4, %i1
 297 
 298 9:      EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
 299         MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
 300         LOAD(prefetch, %i1 + %i3, #one_read)
 301 
 302         EX_ST(STORE_INIT(%g3, %o0 + 0x00), NG_ret_i2_plus_g1)
 303         EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
 304 
 305         EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
 306         MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
 307 
 308         EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
 309         EX_ST(STORE_INIT(%g2, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
 310 
 311         EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
 312         MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
 313 
 314         EX_ST(STORE_INIT(%g3, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
 315         EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
 316 
 317         EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
 318         add             %i1, 64, %i1
 319         MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
 320 
 321         EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
 322         EX_ST(STORE_INIT(%g2, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
 323 
 324         subcc           %g1, 64, %g1
 325         bne,pt          %XCC, 9b
 326          add            %o0, 64, %o0
 327 
 328         ba,pt           %XCC, 60f
 329          add            %i1, %i4, %i1
 330 
 331 10:     /* Destination is 64-byte aligned, source was only 8-byte
 332          * aligned but it has been subtracted by 8 and we perform
 333          * one twin load ahead, then add 8 back into source when
 334          * we finish the loop.
 335          */
 336         EX_LD(LOAD_TWIN(%i1, %o4, %o5), NG_ret_i2_plus_g1)
 337         mov     16, %o7
 338         mov     32, %g2
 339         mov     48, %g3
 340         mov     64, %o1
 341 1:      EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
 342         LOAD(prefetch, %i1 + %o1, #one_read)
 343         EX_ST(STORE_INIT(%o5, %o0 + 0x00), NG_ret_i2_plus_g1)   ! initializes cache line
 344         EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
 345         EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
 346         EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
 347         EX_ST(STORE_INIT(%o4, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
 348         EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
 349         EX_ST(STORE_INIT(%o5, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
 350         EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
 351         EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5), NG_ret_i2_plus_g1_minus_48)
 352         add             %i1, 64, %i1
 353         EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
 354         EX_ST(STORE_INIT(%o4, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
 355         subcc           %g1, 64, %g1
 356         bne,pt          %XCC, 1b
 357          add            %o0, 64, %o0
 358 
 359         ba,pt           %XCC, 60f
 360          add            %i1, 0x8, %i1
 361 
 362 50:     /* Destination is 64-byte aligned, and source is 16-byte
 363          * aligned.
 364          */
 365         mov     16, %o7
 366         mov     32, %g2
 367         mov     48, %g3
 368         mov     64, %o1
 369 1:      EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5), NG_ret_i2_plus_g1)
 370         EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
 371         LOAD(prefetch, %i1 + %o1, #one_read)
 372         EX_ST(STORE_INIT(%o4, %o0 + 0x00), NG_ret_i2_plus_g1)   ! initializes cache line
 373         EX_ST(STORE_INIT(%o5, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
 374         EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
 375         EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
 376         EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
 377         EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
 378         add     %i1, 64, %i1
 379         EX_ST(STORE_INIT(%o4, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
 380         EX_ST(STORE_INIT(%o5, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
 381         EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
 382         EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
 383         subcc   %g1, 64, %g1
 384         bne,pt  %XCC, 1b
 385          add    %o0, 64, %o0
 386         /* fall through */
 387 
 388 60:     
 389         membar          #Sync
 390 
 391         /* %i2 contains any final bytes still needed to be copied
 392          * over. If anything is left, we copy it one byte at a time.
 393          */
 394         RESTORE_ASI(%i3)
 395         brz,pt          %i2, 85f
 396          sub            %o0, %i1, %i3
 397         ba,a,pt         %XCC, 90f
 398          nop
 399 
 400         .align          64
 401 70: /* 16 < len <= 64 */
 402         bne,pn          %XCC, 75f
 403          sub            %o0, %i1, %i3
 404 
 405 72:
 406         andn            %i2, 0xf, %i4
 407         and             %i2, 0xf, %i2
 408 1:      subcc           %i4, 0x10, %i4
 409         EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4)
 410         add             %i1, 0x08, %i1
 411         EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4)
 412         sub             %i1, 0x08, %i1
 413         EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4)
 414         add             %i1, 0x8, %i1
 415         EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_minus_8)
 416         bgu,pt          %XCC, 1b
 417          add            %i1, 0x8, %i1
 418 73:     andcc           %i2, 0x8, %g0
 419         be,pt           %XCC, 1f
 420          nop
 421         sub             %i2, 0x8, %i2
 422         EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_8)
 423         EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_8)
 424         add             %i1, 0x8, %i1
 425 1:      andcc           %i2, 0x4, %g0
 426         be,pt           %XCC, 1f
 427          nop
 428         sub             %i2, 0x4, %i2
 429         EX_LD(LOAD(lduw, %i1, %i5), NG_ret_i2_plus_4)
 430         EX_ST(STORE(stw, %i5, %i1 + %i3), NG_ret_i2_plus_4)
 431         add             %i1, 0x4, %i1
 432 1:      cmp             %i2, 0
 433         be,pt           %XCC, 85f
 434          nop
 435         ba,pt           %xcc, 90f
 436          nop
 437 
 438 75:
 439         andcc           %o0, 0x7, %g1
 440         sub             %g1, 0x8, %g1
 441         be,pn           %icc, 2f
 442          sub            %g0, %g1, %g1
 443         sub             %i2, %g1, %i2
 444 
 445 1:      subcc           %g1, 1, %g1
 446         EX_LD(LOAD(ldub, %i1, %i5), NG_ret_i2_plus_g1_plus_1)
 447         EX_ST(STORE(stb, %i5, %i1 + %i3), NG_ret_i2_plus_g1_plus_1)
 448         bgu,pt          %icc, 1b
 449          add            %i1, 1, %i1
 450 
 451 2:      add             %i1, %i3, %o0
 452         andcc           %i1, 0x7, %g1
 453         bne,pt          %icc, 8f
 454          sll            %g1, 3, %g1
 455 
 456         cmp             %i2, 16
 457         bgeu,pt         %icc, 72b
 458          nop
 459         ba,a,pt         %xcc, 73b
 460 
 461 8:      mov             64, %i3
 462         andn            %i1, 0x7, %i1
 463         EX_LD(LOAD(ldx, %i1, %g2), NG_ret_i2)
 464         sub             %i3, %g1, %i3
 465         andn            %i2, 0x7, %i4
 466         sllx            %g2, %g1, %g2
 467 1:      add             %i1, 0x8, %i1
 468         EX_LD(LOAD(ldx, %i1, %g3), NG_ret_i2_and_7_plus_i4)
 469         subcc           %i4, 0x8, %i4
 470         srlx            %g3, %i3, %i5
 471         or              %i5, %g2, %i5
 472         EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4)
 473         add             %o0, 0x8, %o0
 474         bgu,pt          %icc, 1b
 475          sllx           %g3, %g1, %g2
 476 
 477         srl             %g1, 3, %g1
 478         andcc           %i2, 0x7, %i2
 479         be,pn           %icc, 85f
 480          add            %i1, %g1, %i1
 481         ba,pt           %xcc, 90f
 482          sub            %o0, %i1, %i3
 483 
 484         .align          64
 485 80: /* 0 < len <= 16 */
 486         andcc           %i3, 0x3, %g0
 487         bne,pn          %XCC, 90f
 488          sub            %o0, %i1, %i3
 489 
 490 1:
 491         subcc           %i2, 4, %i2
 492         EX_LD(LOAD(lduw, %i1, %g1), NG_ret_i2_plus_4)
 493         EX_ST(STORE(stw, %g1, %i1 + %i3), NG_ret_i2_plus_4)
 494         bgu,pt          %XCC, 1b
 495          add            %i1, 4, %i1
 496 
 497 85:     ret
 498          restore        EX_RETVAL(%i0), %g0, %o0
 499 
 500         .align          32
 501 90:
 502         subcc           %i2, 1, %i2
 503         EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_1)
 504         EX_ST(STORE(stb, %g1, %i1 + %i3), NG_ret_i2_plus_1)
 505         bgu,pt          %XCC, 90b
 506          add            %i1, 1, %i1
 507         ret
 508          restore        EX_RETVAL(%i0), %g0, %o0
 509 
 510         .size           FUNC_NAME, .-FUNC_NAME

/* [<][>][^][v][top][bottom][index][help] */