root/arch/sh/lib/memset-sh4.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0 */
   2 /*
   3  * "memset" implementation for SH4
   4  *
   5  * Copyright (C) 1999  Niibe Yutaka
   6  * Copyright (c) 2009  STMicroelectronics Limited
   7  * Author: Stuart Menefy <stuart.menefy:st.com>
   8  */
   9 
  10 /*
  11  *            void *memset(void *s, int c, size_t n);
  12  */
  13 
  14 #include <linux/linkage.h>
  15 
  16 ENTRY(memset)
  17         mov     #12,r0
  18         add     r6,r4
  19         cmp/gt  r6,r0
  20         bt/s    40f             ! if it's too small, set a byte at once
  21          mov    r4,r0
  22         and     #3,r0
  23         cmp/eq  #0,r0
  24         bt/s    2f              ! It's aligned
  25          sub    r0,r6
  26 1:
  27         dt      r0
  28         bf/s    1b
  29          mov.b  r5,@-r4
  30 2:                              ! make VVVV
  31         extu.b  r5,r5
  32         swap.b  r5,r0           !   V0
  33         or      r0,r5           !   VV
  34         swap.w  r5,r0           ! VV00
  35         or      r0,r5           ! VVVV
  36 
  37         ! Check if enough bytes need to be copied to be worth the big loop
  38         mov     #0x40, r0       ! (MT)
  39         cmp/gt  r6,r0           ! (MT)  64 > len => slow loop
  40 
  41         bt/s    22f
  42          mov    r6,r0
  43 
  44         ! align the dst to the cache block size if necessary
  45         mov     r4, r3
  46         mov     #~(0x1f), r1
  47 
  48         and     r3, r1
  49         cmp/eq  r3, r1
  50 
  51         bt/s    11f             ! dst is already aligned
  52          sub    r1, r3          ! r3-r1 -> r3
  53         shlr2   r3              ! number of loops
  54 
  55 10:     mov.l   r5,@-r4
  56         dt      r3
  57         bf/s    10b
  58          add    #-4, r6
  59 
  60 11:     ! dst is 32byte aligned
  61         mov     r6,r2
  62         mov     #-5,r0
  63         shld    r0,r2           ! number of loops
  64 
  65         add     #-32, r4
  66         mov     r5, r0
  67 12:
  68         movca.l r0,@r4
  69         mov.l   r5,@(4, r4)
  70         mov.l   r5,@(8, r4)
  71         mov.l   r5,@(12,r4)
  72         mov.l   r5,@(16,r4)
  73         mov.l   r5,@(20,r4)
  74         add     #-0x20, r6
  75         mov.l   r5,@(24,r4)
  76         dt      r2
  77         mov.l   r5,@(28,r4)
  78         bf/s    12b
  79          add    #-32, r4
  80 
  81         add     #32, r4
  82         mov     #8, r0
  83         cmp/ge  r0, r6
  84         bf      40f
  85 
  86         mov     r6,r0
  87 22:
  88         shlr2   r0
  89         shlr    r0              ! r0 = r6 >> 3
  90 3:
  91         dt      r0
  92         mov.l   r5,@-r4         ! set 8-byte at once
  93         bf/s    3b
  94          mov.l  r5,@-r4
  95         !
  96         mov     #7,r0
  97         and     r0,r6
  98 
  99         ! fill bytes (length may be zero)
 100 40:     tst     r6,r6
 101         bt      5f
 102 4:
 103         dt      r6
 104         bf/s    4b
 105          mov.b  r5,@-r4
 106 5:
 107         rts
 108          mov    r4,r0

/* [<][>][^][v][top][bottom][index][help] */