root/arch/arc/lib/memset-archs.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0-only */
   2 /*
   3  * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
   4  */
   5 
   6 #include <linux/linkage.h>
   7 #include <asm/cache.h>
   8 
   9 /*
  10  * The memset implementation below is optimized to use prefetchw and prealloc
  11  * instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6)
  12  * If you want to implement optimized memset for other possible L1 data cache
  13  * line lengths (32B and 128B) you should rewrite code carefully checking
  14  * we don't call any prefetchw/prealloc instruction for L1 cache lines which
  15  * don't belongs to memset area.
  16  */
  17 
  18 #if L1_CACHE_SHIFT == 6
  19 
  20 .macro PREALLOC_INSTR   reg, off
  21         prealloc        [\reg, \off]
  22 .endm
  23 
  24 .macro PREFETCHW_INSTR  reg, off
  25         prefetchw       [\reg, \off]
  26 .endm
  27 
  28 #else
  29 
  30 .macro PREALLOC_INSTR   reg, off
  31 .endm
  32 
  33 .macro PREFETCHW_INSTR  reg, off
  34 .endm
  35 
  36 #endif
  37 
  38 ENTRY_CFI(memset)
  39         PREFETCHW_INSTR r0, 0   ; Prefetch the first write location
  40         mov.f   0, r2
  41 ;;; if size is zero
  42         jz.d    [blink]
  43         mov     r3, r0          ; don't clobber ret val
  44 
  45 ;;; if length < 8
  46         brls.d.nt       r2, 8, .Lsmallchunk
  47         mov.f   lp_count,r2
  48 
  49         and.f   r4, r0, 0x03
  50         rsub    lp_count, r4, 4
  51         lpnz    @.Laligndestination
  52         ;; LOOP BEGIN
  53         stb.ab  r1, [r3,1]
  54         sub     r2, r2, 1
  55 .Laligndestination:
  56 
  57 ;;; Destination is aligned
  58         and     r1, r1, 0xFF
  59         asl     r4, r1, 8
  60         or      r4, r4, r1
  61         asl     r5, r4, 16
  62         or      r5, r5, r4
  63         mov     r4, r5
  64 
  65         sub3    lp_count, r2, 8
  66         cmp     r2, 64
  67         bmsk.hi r2, r2, 5
  68         mov.ls  lp_count, 0
  69         add3.hi r2, r2, 8
  70 
  71 ;;; Convert len to Dwords, unfold x8
  72         lsr.f   lp_count, lp_count, 6
  73 
  74         lpnz    @.Lset64bytes
  75         ;; LOOP START
  76         PREALLOC_INSTR  r3, 64  ; alloc next line w/o fetching
  77 
  78 #ifdef CONFIG_ARC_HAS_LL64
  79         std.ab  r4, [r3, 8]
  80         std.ab  r4, [r3, 8]
  81         std.ab  r4, [r3, 8]
  82         std.ab  r4, [r3, 8]
  83         std.ab  r4, [r3, 8]
  84         std.ab  r4, [r3, 8]
  85         std.ab  r4, [r3, 8]
  86         std.ab  r4, [r3, 8]
  87 #else
  88         st.ab   r4, [r3, 4]
  89         st.ab   r4, [r3, 4]
  90         st.ab   r4, [r3, 4]
  91         st.ab   r4, [r3, 4]
  92         st.ab   r4, [r3, 4]
  93         st.ab   r4, [r3, 4]
  94         st.ab   r4, [r3, 4]
  95         st.ab   r4, [r3, 4]
  96         st.ab   r4, [r3, 4]
  97         st.ab   r4, [r3, 4]
  98         st.ab   r4, [r3, 4]
  99         st.ab   r4, [r3, 4]
 100         st.ab   r4, [r3, 4]
 101         st.ab   r4, [r3, 4]
 102         st.ab   r4, [r3, 4]
 103         st.ab   r4, [r3, 4]
 104 #endif
 105 .Lset64bytes:
 106 
 107         lsr.f   lp_count, r2, 5 ;Last remaining  max 124 bytes
 108         lpnz    .Lset32bytes
 109         ;; LOOP START
 110 #ifdef CONFIG_ARC_HAS_LL64
 111         std.ab  r4, [r3, 8]
 112         std.ab  r4, [r3, 8]
 113         std.ab  r4, [r3, 8]
 114         std.ab  r4, [r3, 8]
 115 #else
 116         st.ab   r4, [r3, 4]
 117         st.ab   r4, [r3, 4]
 118         st.ab   r4, [r3, 4]
 119         st.ab   r4, [r3, 4]
 120         st.ab   r4, [r3, 4]
 121         st.ab   r4, [r3, 4]
 122         st.ab   r4, [r3, 4]
 123         st.ab   r4, [r3, 4]
 124 #endif
 125 .Lset32bytes:
 126 
 127         and.f   lp_count, r2, 0x1F ;Last remaining 31 bytes
 128 .Lsmallchunk:
 129         lpnz    .Lcopy3bytes
 130         ;; LOOP START
 131         stb.ab  r1, [r3, 1]
 132 .Lcopy3bytes:
 133 
 134         j       [blink]
 135 
 136 END_CFI(memset)
 137 
 138 ENTRY_CFI(memzero)
 139     ; adjust bzero args to memset args
 140     mov r2, r1
 141     b.d  memset    ;tail call so need to tinker with blink
 142     mov r1, 0
 143 END_CFI(memzero)

/* [<][>][^][v][top][bottom][index][help] */