root/arch/arc/lib/memcpy-archs.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0-only */
   2 /*
   3  * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
   4  */
   5 
   6 #include <linux/linkage.h>
   7 
   8 #ifdef __LITTLE_ENDIAN__
   9 # define SHIFT_1(RX,RY,IMM)     asl     RX, RY, IMM     ; <<
  10 # define SHIFT_2(RX,RY,IMM)     lsr     RX, RY, IMM     ; >>
  11 # define MERGE_1(RX,RY,IMM)     asl     RX, RY, IMM
  12 # define MERGE_2(RX,RY,IMM)
  13 # define EXTRACT_1(RX,RY,IMM)   and     RX, RY, 0xFFFF
  14 # define EXTRACT_2(RX,RY,IMM)   lsr     RX, RY, IMM
  15 #else
  16 # define SHIFT_1(RX,RY,IMM)     lsr     RX, RY, IMM     ; >>
  17 # define SHIFT_2(RX,RY,IMM)     asl     RX, RY, IMM     ; <<
  18 # define MERGE_1(RX,RY,IMM)     asl     RX, RY, IMM     ; <<
  19 # define MERGE_2(RX,RY,IMM)     asl     RX, RY, IMM     ; <<
  20 # define EXTRACT_1(RX,RY,IMM)   lsr     RX, RY, IMM
  21 # define EXTRACT_2(RX,RY,IMM)   lsr     RX, RY, 0x08
  22 #endif
  23 
  24 #ifdef CONFIG_ARC_HAS_LL64
  25 # define LOADX(DST,RX)          ldd.ab  DST, [RX, 8]
  26 # define STOREX(SRC,RX)         std.ab  SRC, [RX, 8]
  27 # define ZOLSHFT                5
  28 # define ZOLAND                 0x1F
  29 #else
  30 # define LOADX(DST,RX)          ld.ab   DST, [RX, 4]
  31 # define STOREX(SRC,RX)         st.ab   SRC, [RX, 4]
  32 # define ZOLSHFT                4
  33 # define ZOLAND                 0xF
  34 #endif
  35 
  36 ENTRY_CFI(memcpy)
  37         mov.f   0, r2
  38 ;;; if size is zero
  39         jz.d    [blink]
  40         mov     r3, r0          ; don;t clobber ret val
  41 
  42 ;;; if size <= 8
  43         cmp     r2, 8
  44         bls.d   @.Lsmallchunk
  45         mov.f   lp_count, r2
  46 
  47         and.f   r4, r0, 0x03
  48         rsub    lp_count, r4, 4
  49         lpnz    @.Laligndestination
  50         ;; LOOP BEGIN
  51         ldb.ab  r5, [r1,1]
  52         sub     r2, r2, 1
  53         stb.ab  r5, [r3,1]
  54 .Laligndestination:
  55 
  56 ;;; Check the alignment of the source
  57         and.f   r4, r1, 0x03
  58         bnz.d   @.Lsourceunaligned
  59 
  60 ;;; CASE 0: Both source and destination are 32bit aligned
  61 ;;; Convert len to Dwords, unfold x4
  62         lsr.f   lp_count, r2, ZOLSHFT
  63         lpnz    @.Lcopy32_64bytes
  64         ;; LOOP START
  65         LOADX (r6, r1)
  66         LOADX (r8, r1)
  67         LOADX (r10, r1)
  68         LOADX (r4, r1)
  69         STOREX (r6, r3)
  70         STOREX (r8, r3)
  71         STOREX (r10, r3)
  72         STOREX (r4, r3)
  73 .Lcopy32_64bytes:
  74 
  75         and.f   lp_count, r2, ZOLAND ;Last remaining 31 bytes
  76 .Lsmallchunk:
  77         lpnz    @.Lcopyremainingbytes
  78         ;; LOOP START
  79         ldb.ab  r5, [r1,1]
  80         stb.ab  r5, [r3,1]
  81 .Lcopyremainingbytes:
  82 
  83         j       [blink]
  84 ;;; END CASE 0
  85 
  86 .Lsourceunaligned:
  87         cmp     r4, 2
  88         beq.d   @.LunalignedOffby2
  89         sub     r2, r2, 1
  90 
  91         bhi.d   @.LunalignedOffby3
  92         ldb.ab  r5, [r1, 1]
  93 
  94 ;;; CASE 1: The source is unaligned, off by 1
  95         ;; Hence I need to read 1 byte for a 16bit alignment
  96         ;; and 2bytes to reach 32bit alignment
  97         ldh.ab  r6, [r1, 2]
  98         sub     r2, r2, 2
  99         ;; Convert to words, unfold x2
 100         lsr.f   lp_count, r2, 3
 101         MERGE_1 (r6, r6, 8)
 102         MERGE_2 (r5, r5, 24)
 103         or      r5, r5, r6
 104 
 105         ;; Both src and dst are aligned
 106         lpnz    @.Lcopy8bytes_1
 107         ;; LOOP START
 108         ld.ab   r6, [r1, 4]
 109         ld.ab   r8, [r1,4]
 110 
 111         SHIFT_1 (r7, r6, 24)
 112         or      r7, r7, r5
 113         SHIFT_2 (r5, r6, 8)
 114 
 115         SHIFT_1 (r9, r8, 24)
 116         or      r9, r9, r5
 117         SHIFT_2 (r5, r8, 8)
 118 
 119         st.ab   r7, [r3, 4]
 120         st.ab   r9, [r3, 4]
 121 .Lcopy8bytes_1:
 122 
 123         ;; Write back the remaining 16bits
 124         EXTRACT_1 (r6, r5, 16)
 125         sth.ab  r6, [r3, 2]
 126         ;; Write back the remaining 8bits
 127         EXTRACT_2 (r5, r5, 16)
 128         stb.ab  r5, [r3, 1]
 129 
 130         and.f   lp_count, r2, 0x07 ;Last 8bytes
 131         lpnz    @.Lcopybytewise_1
 132         ;; LOOP START
 133         ldb.ab  r6, [r1,1]
 134         stb.ab  r6, [r3,1]
 135 .Lcopybytewise_1:
 136         j       [blink]
 137 
 138 .LunalignedOffby2:
 139 ;;; CASE 2: The source is unaligned, off by 2
 140         ldh.ab  r5, [r1, 2]
 141         sub     r2, r2, 1
 142 
 143         ;; Both src and dst are aligned
 144         ;; Convert to words, unfold x2
 145         lsr.f   lp_count, r2, 3
 146 #ifdef __BIG_ENDIAN__
 147         asl.nz  r5, r5, 16
 148 #endif
 149         lpnz    @.Lcopy8bytes_2
 150         ;; LOOP START
 151         ld.ab   r6, [r1, 4]
 152         ld.ab   r8, [r1,4]
 153 
 154         SHIFT_1 (r7, r6, 16)
 155         or      r7, r7, r5
 156         SHIFT_2 (r5, r6, 16)
 157 
 158         SHIFT_1 (r9, r8, 16)
 159         or      r9, r9, r5
 160         SHIFT_2 (r5, r8, 16)
 161 
 162         st.ab   r7, [r3, 4]
 163         st.ab   r9, [r3, 4]
 164 .Lcopy8bytes_2:
 165 
 166 #ifdef __BIG_ENDIAN__
 167         lsr.nz  r5, r5, 16
 168 #endif
 169         sth.ab  r5, [r3, 2]
 170 
 171         and.f   lp_count, r2, 0x07 ;Last 8bytes
 172         lpnz    @.Lcopybytewise_2
 173         ;; LOOP START
 174         ldb.ab  r6, [r1,1]
 175         stb.ab  r6, [r3,1]
 176 .Lcopybytewise_2:
 177         j       [blink]
 178 
 179 .LunalignedOffby3:
 180 ;;; CASE 3: The source is unaligned, off by 3
 181 ;;; Hence, I need to read 1byte for achieve the 32bit alignment
 182 
 183         ;; Both src and dst are aligned
 184         ;; Convert to words, unfold x2
 185         lsr.f   lp_count, r2, 3
 186 #ifdef __BIG_ENDIAN__
 187         asl.ne  r5, r5, 24
 188 #endif
 189         lpnz    @.Lcopy8bytes_3
 190         ;; LOOP START
 191         ld.ab   r6, [r1, 4]
 192         ld.ab   r8, [r1,4]
 193 
 194         SHIFT_1 (r7, r6, 8)
 195         or      r7, r7, r5
 196         SHIFT_2 (r5, r6, 24)
 197 
 198         SHIFT_1 (r9, r8, 8)
 199         or      r9, r9, r5
 200         SHIFT_2 (r5, r8, 24)
 201 
 202         st.ab   r7, [r3, 4]
 203         st.ab   r9, [r3, 4]
 204 .Lcopy8bytes_3:
 205 
 206 #ifdef __BIG_ENDIAN__
 207         lsr.nz  r5, r5, 24
 208 #endif
 209         stb.ab  r5, [r3, 1]
 210 
 211         and.f   lp_count, r2, 0x07 ;Last 8bytes
 212         lpnz    @.Lcopybytewise_3
 213         ;; LOOP START
 214         ldb.ab  r6, [r1,1]
 215         stb.ab  r6, [r3,1]
 216 .Lcopybytewise_3:
 217         j       [blink]
 218 
 219 END_CFI(memcpy)

/* [<][>][^][v][top][bottom][index][help] */