root/arch/hexagon/lib/memset.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0-only */
   2 /*
   3  * Copyright (c) 2011, The Linux Foundation. All rights reserved.
   4  */
   5 
   6 
   7 /* HEXAGON assembly optimized memset */
   8 /* Replaces the standard library function memset */
   9 
  10 
  11         .macro HEXAGON_OPT_FUNC_BEGIN name
  12         .text
  13         .p2align 4
  14         .globl \name
  15         .type  \name, @function
  16 \name:
  17         .endm
  18 
  19         .macro HEXAGON_OPT_FUNC_FINISH name
  20         .size  \name, . - \name
  21         .endm
  22 
  23 /* FUNCTION: memset (v2 version) */
  24 #if __HEXAGON_ARCH__ < 3
  25 HEXAGON_OPT_FUNC_BEGIN memset
  26         {
  27                 r6 = #8
  28                 r7 = extractu(r0, #3 , #0)
  29                 p0 = cmp.eq(r2, #0)
  30                 p1 = cmp.gtu(r2, #7)
  31         }
  32         {
  33                 r4 = vsplatb(r1)
  34                 r8 = r0           /* leave r0 intact for return val  */
  35                 r9 = sub(r6, r7)  /* bytes until double alignment  */
  36                 if p0 jumpr r31   /* count == 0, so return  */
  37         }
  38         {
  39                 r3 = #0
  40                 r7 = #0
  41                 p0 = tstbit(r9, #0)
  42                 if p1 jump 2f /* skip byte loop */
  43         }
  44 
  45 /* less than 8 bytes to set, so just set a byte at a time and return  */
  46 
  47                 loop0(1f, r2) /* byte loop */
  48         .falign
  49 1: /* byte loop */
  50         {
  51                 memb(r8++#1) = r4
  52         }:endloop0
  53                 jumpr r31
  54         .falign
  55 2: /* skip byte loop */
  56         {
  57                 r6 = #1
  58                 p0 = tstbit(r9, #1)
  59                 p1 = cmp.eq(r2, #1)
  60                 if !p0 jump 3f /* skip initial byte store */
  61         }
  62         {
  63                 memb(r8++#1) = r4
  64                 r3:2 = sub(r3:2, r7:6)
  65                 if p1 jumpr r31
  66         }
  67         .falign
  68 3: /* skip initial byte store */
  69         {
  70                 r6 = #2
  71                 p0 = tstbit(r9, #2)
  72                 p1 = cmp.eq(r2, #2)
  73                 if !p0 jump 4f /* skip initial half store */
  74         }
  75         {
  76                 memh(r8++#2) = r4
  77                 r3:2 = sub(r3:2, r7:6)
  78                 if p1 jumpr r31
  79         }
  80         .falign
  81 4: /* skip initial half store */
  82         {
  83                 r6 = #4
  84                 p0 = cmp.gtu(r2, #7)
  85                 p1 = cmp.eq(r2, #4)
  86                 if !p0 jump 5f /* skip initial word store */
  87         }
  88         {
  89                 memw(r8++#4) = r4
  90                 r3:2 = sub(r3:2, r7:6)
  91                 p0 = cmp.gtu(r2, #11)
  92                 if p1 jumpr r31
  93         }
  94         .falign
  95 5: /* skip initial word store */
  96         {
  97                 r10 = lsr(r2, #3)
  98                 p1 = cmp.eq(r3, #1)
  99                 if !p0 jump 7f /* skip double loop */
 100         }
 101         {
 102                 r5 = r4
 103                 r6 = #8
 104                 loop0(6f, r10) /* double loop */
 105         }
 106 
 107 /* set bytes a double word at a time  */
 108 
 109         .falign
 110 6: /* double loop */
 111         {
 112                 memd(r8++#8) = r5:4
 113                 r3:2 = sub(r3:2, r7:6)
 114                 p1 = cmp.eq(r2, #8)
 115         }:endloop0
 116         .falign
 117 7: /* skip double loop */
 118         {
 119                 p0 = tstbit(r2, #2)
 120                 if p1 jumpr r31
 121         }
 122         {
 123                 r6 = #4
 124                 p0 = tstbit(r2, #1)
 125                 p1 = cmp.eq(r2, #4)
 126                 if !p0 jump 8f /* skip final word store */
 127         }
 128         {
 129                 memw(r8++#4) = r4
 130                 r3:2 = sub(r3:2, r7:6)
 131                 if p1 jumpr r31
 132         }
 133         .falign
 134 8: /* skip final word store */
 135         {
 136                 p1 = cmp.eq(r2, #2)
 137                 if !p0 jump 9f /* skip final half store */
 138         }
 139         {
 140                 memh(r8++#2) = r4
 141                 if p1 jumpr r31
 142         }
 143         .falign
 144 9: /* skip final half store */
 145         {
 146                 memb(r8++#1) = r4
 147                 jumpr r31
 148         }
 149 HEXAGON_OPT_FUNC_FINISH memset
 150 #endif
 151 
 152 
 153 /*  FUNCTION: memset (v3 and higher version)  */
 154 #if __HEXAGON_ARCH__ >= 3
 155 HEXAGON_OPT_FUNC_BEGIN memset
 156         {
 157                 r7=vsplatb(r1)
 158                 r6 = r0
 159                 if (r2==#0) jump:nt .L1
 160         }
 161         {
 162                 r5:4=combine(r7,r7)
 163                 p0 = cmp.gtu(r2,#8)
 164                 if (p0.new) jump:nt .L3
 165         }
 166         {
 167                 r3 = r0
 168                 loop0(.L47,r2)
 169         }
 170         .falign
 171 .L47:
 172         {
 173                 memb(r3++#1) = r1
 174         }:endloop0 /* start=.L47 */
 175                 jumpr r31
 176 .L3:
 177         {
 178                 p0 = tstbit(r0,#0)
 179                 if (!p0.new) jump:nt .L8
 180                 p1 = cmp.eq(r2, #1)
 181         }
 182         {
 183                 r6 = add(r0, #1)
 184                 r2 = add(r2,#-1)
 185                 memb(r0) = r1
 186                 if (p1) jump .L1
 187         }
 188 .L8:
 189         {
 190                 p0 = tstbit(r6,#1)
 191                 if (!p0.new) jump:nt .L10
 192         }
 193         {
 194                 r2 = add(r2,#-2)
 195                 memh(r6++#2) = r7
 196                 p0 = cmp.eq(r2, #2)
 197                 if (p0.new) jump:nt .L1
 198         }
 199 .L10:
 200         {
 201                 p0 = tstbit(r6,#2)
 202                 if (!p0.new) jump:nt .L12
 203         }
 204         {
 205                 r2 = add(r2,#-4)
 206                 memw(r6++#4) = r7
 207                 p0 = cmp.eq(r2, #4)
 208                 if (p0.new) jump:nt .L1
 209         }
 210 .L12:
 211         {
 212                 p0 = cmp.gtu(r2,#127)
 213                 if (!p0.new) jump:nt .L14
 214         }
 215                 r3 = and(r6,#31)
 216                 if (r3==#0) jump:nt .L17
 217         {
 218                 memd(r6++#8) = r5:4
 219                 r2 = add(r2,#-8)
 220         }
 221                 r3 = and(r6,#31)
 222                 if (r3==#0) jump:nt .L17
 223         {
 224                 memd(r6++#8) = r5:4
 225                 r2 = add(r2,#-8)
 226         }
 227                 r3 = and(r6,#31)
 228                 if (r3==#0) jump:nt .L17
 229         {
 230                 memd(r6++#8) = r5:4
 231                 r2 = add(r2,#-8)
 232         }
 233 .L17:
 234         {
 235                 r3 = lsr(r2,#5)
 236                 if (r1!=#0) jump:nt .L18
 237         }
 238         {
 239                 r8 = r3
 240                 r3 = r6
 241                 loop0(.L46,r3)
 242         }
 243         .falign
 244 .L46:
 245         {
 246                 dczeroa(r6)
 247                 r6 = add(r6,#32)
 248                 r2 = add(r2,#-32)
 249         }:endloop0 /* start=.L46 */
 250 .L14:
 251         {
 252                 p0 = cmp.gtu(r2,#7)
 253                 if (!p0.new) jump:nt .L28
 254                 r8 = lsr(r2,#3)
 255         }
 256                 loop0(.L44,r8)
 257         .falign
 258 .L44:
 259         {
 260                 memd(r6++#8) = r5:4
 261                 r2 = add(r2,#-8)
 262         }:endloop0 /* start=.L44 */
 263 .L28:
 264         {
 265                 p0 = tstbit(r2,#2)
 266                 if (!p0.new) jump:nt .L33
 267         }
 268         {
 269                 r2 = add(r2,#-4)
 270                 memw(r6++#4) = r7
 271         }
 272 .L33:
 273         {
 274                 p0 = tstbit(r2,#1)
 275                 if (!p0.new) jump:nt .L35
 276         }
 277         {
 278                 r2 = add(r2,#-2)
 279                 memh(r6++#2) = r7
 280         }
 281 .L35:
 282                 p0 = cmp.eq(r2,#1)
 283                 if (p0) memb(r6) = r1
 284 .L1:
 285                 jumpr r31
 286 .L18:
 287                 loop0(.L45,r3)
 288         .falign
 289 .L45:
 290                 dczeroa(r6)
 291         {
 292                 memd(r6++#8) = r5:4
 293                 r2 = add(r2,#-32)
 294         }
 295                 memd(r6++#8) = r5:4
 296                 memd(r6++#8) = r5:4
 297         {
 298                 memd(r6++#8) = r5:4
 299         }:endloop0 /* start=.L45  */
 300                 jump .L14
 301 HEXAGON_OPT_FUNC_FINISH memset
 302 #endif

/* [<][>][^][v][top][bottom][index][help] */