root/arch/csky/abiv1/memcpy.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0 */
   2 // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
   3 
   4 #include <linux/linkage.h>
   5 
   6 .macro  GET_FRONT_BITS rx y
   7 #ifdef  __cskyLE__
   8         lsri    \rx, \y
   9 #else
  10         lsli    \rx, \y
  11 #endif
  12 .endm
  13 
  14 .macro  GET_AFTER_BITS rx y
  15 #ifdef  __cskyLE__
  16         lsli    \rx, \y
  17 #else
  18         lsri    \rx, \y
  19 #endif
  20 .endm
  21 
  22 /* void *memcpy(void *dest, const void *src, size_t n); */
  23 ENTRY(memcpy)
  24         mov     r7, r2
  25         cmplti  r4, 4
  26         bt      .L_copy_by_byte
  27         mov     r6, r2
  28         andi    r6, 3
  29         cmpnei  r6, 0
  30         jbt     .L_dest_not_aligned
  31         mov     r6, r3
  32         andi    r6, 3
  33         cmpnei  r6, 0
  34         jbt     .L_dest_aligned_but_src_not_aligned
  35 .L0:
  36         cmplti  r4, 16
  37         jbt     .L_aligned_and_len_less_16bytes
  38         subi    sp, 8
  39         stw     r8, (sp, 0)
  40 .L_aligned_and_len_larger_16bytes:
  41         ldw     r1, (r3, 0)
  42         ldw     r5, (r3, 4)
  43         ldw     r8, (r3, 8)
  44         stw     r1, (r7, 0)
  45         ldw     r1, (r3, 12)
  46         stw     r5, (r7, 4)
  47         stw     r8, (r7, 8)
  48         stw     r1, (r7, 12)
  49         subi    r4, 16
  50         addi    r3, 16
  51         addi    r7, 16
  52         cmplti  r4, 16
  53         jbf     .L_aligned_and_len_larger_16bytes
  54         ldw     r8, (sp, 0)
  55         addi    sp, 8
  56         cmpnei  r4, 0
  57         jbf     .L_return
  58 
  59 .L_aligned_and_len_less_16bytes:
  60         cmplti  r4, 4
  61         bt      .L_copy_by_byte
  62 .L1:
  63         ldw     r1, (r3, 0)
  64         stw     r1, (r7, 0)
  65         subi    r4, 4
  66         addi    r3, 4
  67         addi    r7, 4
  68         cmplti  r4, 4
  69         jbf     .L1
  70         br      .L_copy_by_byte
  71 
  72 .L_return:
  73         rts
  74 
  75 .L_copy_by_byte:                      /* len less than 4 bytes */
  76         cmpnei  r4, 0
  77         jbf     .L_return
  78 .L4:
  79         ldb     r1, (r3, 0)
  80         stb     r1, (r7, 0)
  81         addi    r3, 1
  82         addi    r7, 1
  83         decne   r4
  84         jbt     .L4
  85         rts
  86 
  87 /*
  88  * If dest is not aligned, just copying some bytes makes the dest align.
  89  * Afther that, we judge whether the src is aligned.
  90  */
  91 .L_dest_not_aligned:
  92         mov     r5, r3
  93         rsub    r5, r5, r7
  94         abs     r5, r5
  95         cmplt   r5, r4
  96         bt      .L_copy_by_byte
  97         mov     r5, r7
  98         sub     r5, r3
  99         cmphs   r5, r4
 100         bf      .L_copy_by_byte
 101         mov     r5, r6
 102 .L5:
 103         ldb     r1, (r3, 0)              /* makes the dest align. */
 104         stb     r1, (r7, 0)
 105         addi    r5, 1
 106         subi    r4, 1
 107         addi    r3, 1
 108         addi    r7, 1
 109         cmpnei  r5, 4
 110         jbt     .L5
 111         cmplti  r4, 4
 112         jbt     .L_copy_by_byte
 113         mov     r6, r3                   /* judge whether the src is aligned. */
 114         andi    r6, 3
 115         cmpnei  r6, 0
 116         jbf     .L0
 117 
 118 /* Judge the number of misaligned, 1, 2, 3? */
 119 .L_dest_aligned_but_src_not_aligned:
 120         mov     r5, r3
 121         rsub    r5, r5, r7
 122         abs     r5, r5
 123         cmplt   r5, r4
 124         bt      .L_copy_by_byte
 125         bclri   r3, 0
 126         bclri   r3, 1
 127         ldw     r1, (r3, 0)
 128         addi    r3, 4
 129         cmpnei  r6, 2
 130         bf      .L_dest_aligned_but_src_not_aligned_2bytes
 131         cmpnei  r6, 3
 132         bf      .L_dest_aligned_but_src_not_aligned_3bytes
 133 
 134 .L_dest_aligned_but_src_not_aligned_1byte:
 135         mov     r5, r7
 136         sub     r5, r3
 137         cmphs   r5, r4
 138         bf      .L_copy_by_byte
 139         cmplti  r4, 16
 140         bf      .L11
 141 .L10:                                     /* If the len is less than 16 bytes */
 142         GET_FRONT_BITS r1 8
 143         mov     r5, r1
 144         ldw     r6, (r3, 0)
 145         mov     r1, r6
 146         GET_AFTER_BITS r6 24
 147         or      r5, r6
 148         stw     r5, (r7, 0)
 149         subi    r4, 4
 150         addi    r3, 4
 151         addi    r7, 4
 152         cmplti  r4, 4
 153         bf      .L10
 154         subi    r3, 3
 155         br      .L_copy_by_byte
 156 .L11:
 157         subi    sp, 16
 158         stw     r8, (sp, 0)
 159         stw     r9, (sp, 4)
 160         stw     r10, (sp, 8)
 161         stw     r11, (sp, 12)
 162 .L12:
 163         ldw     r5, (r3, 0)
 164         ldw     r11, (r3, 4)
 165         ldw     r8, (r3, 8)
 166         ldw     r9, (r3, 12)
 167 
 168         GET_FRONT_BITS r1 8               /* little or big endian? */
 169         mov     r10, r5
 170         GET_AFTER_BITS r5 24
 171         or      r5, r1
 172 
 173         GET_FRONT_BITS r10 8
 174         mov     r1, r11
 175         GET_AFTER_BITS r11 24
 176         or      r11, r10
 177 
 178         GET_FRONT_BITS r1 8
 179         mov     r10, r8
 180         GET_AFTER_BITS r8 24
 181         or      r8, r1
 182 
 183         GET_FRONT_BITS r10 8
 184         mov     r1, r9
 185         GET_AFTER_BITS r9 24
 186         or      r9, r10
 187 
 188         stw     r5, (r7, 0)
 189         stw     r11, (r7, 4)
 190         stw     r8, (r7, 8)
 191         stw     r9, (r7, 12)
 192         subi    r4, 16
 193         addi    r3, 16
 194         addi    r7, 16
 195         cmplti  r4, 16
 196         jbf     .L12
 197         ldw     r8, (sp, 0)
 198         ldw     r9, (sp, 4)
 199         ldw     r10, (sp, 8)
 200         ldw     r11, (sp, 12)
 201         addi    sp , 16
 202         cmplti  r4, 4
 203         bf      .L10
 204         subi    r3, 3
 205         br      .L_copy_by_byte
 206 
 207 .L_dest_aligned_but_src_not_aligned_2bytes:
 208         cmplti  r4, 16
 209         bf      .L21
 210 .L20:
 211         GET_FRONT_BITS r1 16
 212         mov     r5, r1
 213         ldw     r6, (r3, 0)
 214         mov     r1, r6
 215         GET_AFTER_BITS r6 16
 216         or      r5, r6
 217         stw     r5, (r7, 0)
 218         subi    r4, 4
 219         addi    r3, 4
 220         addi    r7, 4
 221         cmplti  r4, 4
 222         bf      .L20
 223         subi    r3, 2
 224         br      .L_copy_by_byte
 225         rts
 226 
 227 .L21:   /* n > 16 */
 228         subi    sp, 16
 229         stw     r8, (sp, 0)
 230         stw     r9, (sp, 4)
 231         stw     r10, (sp, 8)
 232         stw     r11, (sp, 12)
 233 
 234 .L22:
 235         ldw     r5, (r3, 0)
 236         ldw     r11, (r3, 4)
 237         ldw     r8, (r3, 8)
 238         ldw     r9, (r3, 12)
 239 
 240         GET_FRONT_BITS r1 16
 241         mov     r10, r5
 242         GET_AFTER_BITS r5 16
 243         or      r5, r1
 244 
 245         GET_FRONT_BITS r10 16
 246         mov     r1, r11
 247         GET_AFTER_BITS r11 16
 248         or      r11, r10
 249 
 250         GET_FRONT_BITS r1 16
 251         mov     r10, r8
 252         GET_AFTER_BITS r8 16
 253         or      r8, r1
 254 
 255         GET_FRONT_BITS r10 16
 256         mov     r1, r9
 257         GET_AFTER_BITS r9 16
 258         or      r9, r10
 259 
 260         stw     r5, (r7, 0)
 261         stw     r11, (r7, 4)
 262         stw     r8, (r7, 8)
 263         stw     r9, (r7, 12)
 264         subi    r4, 16
 265         addi    r3, 16
 266         addi    r7, 16
 267         cmplti  r4, 16
 268         jbf     .L22
 269         ldw     r8, (sp, 0)
 270         ldw     r9, (sp, 4)
 271         ldw     r10, (sp, 8)
 272         ldw     r11, (sp, 12)
 273         addi    sp, 16
 274         cmplti  r4, 4
 275         bf      .L20
 276         subi    r3, 2
 277         br      .L_copy_by_byte
 278 
 279 
 280 .L_dest_aligned_but_src_not_aligned_3bytes:
 281         cmplti  r4, 16
 282         bf      .L31
 283 .L30:
 284         GET_FRONT_BITS r1 24
 285         mov     r5, r1
 286         ldw     r6, (r3, 0)
 287         mov     r1, r6
 288         GET_AFTER_BITS r6 8
 289         or      r5, r6
 290         stw     r5, (r7, 0)
 291         subi    r4, 4
 292         addi    r3, 4
 293         addi    r7, 4
 294         cmplti  r4, 4
 295         bf      .L30
 296         subi    r3, 1
 297         br      .L_copy_by_byte
 298 .L31:
 299         subi    sp, 16
 300         stw     r8, (sp, 0)
 301         stw     r9, (sp, 4)
 302         stw     r10, (sp, 8)
 303         stw     r11, (sp, 12)
 304 .L32:
 305         ldw     r5, (r3, 0)
 306         ldw     r11, (r3, 4)
 307         ldw     r8, (r3, 8)
 308         ldw     r9, (r3, 12)
 309 
 310         GET_FRONT_BITS r1 24
 311         mov     r10, r5
 312         GET_AFTER_BITS r5 8
 313         or      r5, r1
 314 
 315         GET_FRONT_BITS r10 24
 316         mov     r1, r11
 317         GET_AFTER_BITS r11 8
 318         or      r11, r10
 319 
 320         GET_FRONT_BITS r1 24
 321         mov     r10, r8
 322         GET_AFTER_BITS r8 8
 323         or      r8, r1
 324 
 325         GET_FRONT_BITS r10 24
 326         mov     r1, r9
 327         GET_AFTER_BITS r9 8
 328         or      r9, r10
 329 
 330         stw     r5, (r7, 0)
 331         stw     r11, (r7, 4)
 332         stw     r8, (r7, 8)
 333         stw     r9, (r7, 12)
 334         subi    r4, 16
 335         addi    r3, 16
 336         addi    r7, 16
 337         cmplti  r4, 16
 338         jbf     .L32
 339         ldw     r8, (sp, 0)
 340         ldw     r9, (sp, 4)
 341         ldw     r10, (sp, 8)
 342         ldw     r11, (sp, 12)
 343         addi    sp, 16
 344         cmplti  r4, 4
 345         bf      .L30
 346         subi    r3, 1
 347         br      .L_copy_by_byte

/* [<][>][^][v][top][bottom][index][help] */