1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 
  17 
  18 
  19 
  20 
  21 
  22 
  23 
  24 
  25 
  26 
  27 
  28 
  29 
  30 
  31 
  32 #include <asm/regdef.h>
  33 
  34         .set noat
  35         .set noreorder
  36 
  37         .text
  38 
  39 
  40 
  41 
  42 
  43 
  44         .ent stxncpy_aligned
  45         .align 3
  46 stxncpy_aligned:
  47         .frame sp, 0, t9, 0
  48         .prologue 0
  49 
  50         
  51 
  52 
  53 
  54         
  55         lda     t2, -1          # e1    : build a mask against false zero
  56         mskqh   t2, a1, t2      # e0    :   detection in the src word
  57         mskqh   t1, a1, t3      # e0    :
  58         ornot   t1, t2, t2      # .. e1 :
  59         mskql   t0, a1, t0      # e0    : assemble the first output word
  60         cmpbge  zero, t2, t8    # .. e1 : bits set iff null found
  61         or      t0, t3, t0      # e0    :
  62         beq     a2, $a_eoc      # .. e1 :
  63         bne     t8, $a_eos      # .. e1 :
  64 
  65         
  66 
  67 
  68 $a_loop:
  69         stq_u   t0, 0(a0)       # e0    :
  70         addq    a0, 8, a0       # .. e1 :
  71         ldq_u   t0, 0(a1)       # e0    :
  72         addq    a1, 8, a1       # .. e1 :
  73         subq    a2, 1, a2       # e0    :
  74         cmpbge  zero, t0, t8    # .. e1 (stall)
  75         beq     a2, $a_eoc      # e1    :
  76         beq     t8, $a_loop     # e1    :
  77 
  78         
  79 
  80 
  81 
  82 
  83 
  84 
  85 $a_eos:
  86         negq    t8, t12         # e0    : find low bit set
  87         and     t8, t12, t12    # e1 (stall)
  88 
  89         
  90 
  91         and     t12, 0x80, t6   # e0    :
  92         bne     t6, 1f          # .. e1 (zdb)
  93 
  94         
  95 
  96         ldq_u   t1, 0(a0)       # e0    :
  97         subq    t12, 1, t6      # .. e1 :
  98         or      t12, t6, t8     # e0    :
  99         unop                    #
 100         zapnot  t0, t8, t0      # e0    : clear src bytes > null
 101         zap     t1, t8, t1      # .. e1 : clear dst bytes <= null
 102         or      t0, t1, t0      # e1    :
 103 
 104 1:      stq_u   t0, 0(a0)       # e0    :
 105         ret     (t9)            # e1    :
 106 
 107         
 108 $a_eoc:
 109         or      t10, t8, t8
 110         br      $a_eos
 111 
 112         .end stxncpy_aligned
 113 
 114         .align 3
 115         .ent __stxncpy
 116         .globl __stxncpy
 117 __stxncpy:
 118         .frame sp, 0, t9, 0
 119         .prologue 0
 120 
 121         
 122         xor     a0, a1, t1      # e0    :
 123         and     a0, 7, t0       # .. e1 : find dest misalignment
 124         and     t1, 7, t1       # e0    :
 125         addq    a2, t0, a2      # .. e1 : bias count by dest misalignment
 126         subq    a2, 1, a2       # e0    :
 127         and     a2, 7, t2       # e1    :
 128         srl     a2, 3, a2       # e0    : a2 = loop counter = (count - 1)/8
 129         addq    zero, 1, t10    # .. e1 :
 130         sll     t10, t2, t10    # e0    : t10 = bitmask of last count byte
 131         bne     t1, $unaligned  # .. e1 :
 132 
 133         
 134 
 135         ldq_u   t1, 0(a1)       # e0    : load first src word
 136         addq    a1, 8, a1       # .. e1 :
 137 
 138         beq     t0, stxncpy_aligned     # avoid loading dest word if not needed
 139         ldq_u   t0, 0(a0)       # e0    :
 140         br      stxncpy_aligned # .. e1 :
 141 
 142 
 143 
 144 
 145 
 146 
 147         .align 3
 148 $u_head:
 149         
 150 
 151 
 152 
 153 
 154 
 155 
 156 
 157 
 158         ldq_u   t2, 8(a1)       # e0    : load second src word
 159         addq    a1, 8, a1       # .. e1 :
 160         mskql   t0, a0, t0      # e0    : mask trailing garbage in dst
 161         extqh   t2, a1, t4      # e0    :
 162         or      t1, t4, t1      # e1    : first aligned src word complete
 163         mskqh   t1, a0, t1      # e0    : mask leading garbage in src
 164         or      t0, t1, t0      # e0    : first output word complete
 165         or      t0, t6, t6      # e1    : mask original data for zero test
 166         cmpbge  zero, t6, t8    # e0    :
 167         beq     a2, $u_eocfin   # .. e1 :
 168         lda     t6, -1          # e0    :
 169         bne     t8, $u_final    # .. e1 :
 170 
 171         mskql   t6, a1, t6      # e0    : mask out bits already seen
 172         nop                     # .. e1 :
 173         stq_u   t0, 0(a0)       # e0    : store first output word
 174         or      t6, t2, t2      # .. e1 :
 175         cmpbge  zero, t2, t8    # e0    : find nulls in second partial
 176         addq    a0, 8, a0       # .. e1 :
 177         subq    a2, 1, a2       # e0    :
 178         bne     t8, $u_late_head_exit   # .. e1 :
 179 
 180         
 181 
 182 
 183         extql   t2, a1, t1      # e0    : position hi-bits of lo word
 184         beq     a2, $u_eoc      # .. e1 :
 185         ldq_u   t2, 8(a1)       # e0    : read next high-order source word
 186         addq    a1, 8, a1       # .. e1 :
 187         extqh   t2, a1, t0      # e0    : position lo-bits of hi word (stall)
 188         cmpbge  zero, t2, t8    # .. e1 :
 189         nop                     # e0    :
 190         bne     t8, $u_eos      # .. e1 :
 191 
 192         
 193 
 194 
 195 
 196 
 197 
 198 
 199 
 200 
 201 
 202 
 203 
 204 
 205 
 206         .align 3
 207 $u_loop:
 208         or      t0, t1, t0      # e0    : current dst word now complete
 209         subq    a2, 1, a2       # .. e1 : decrement word count
 210         stq_u   t0, 0(a0)       # e0    : save the current word
 211         addq    a0, 8, a0       # .. e1 :
 212         extql   t2, a1, t1      # e0    : extract high bits for next time
 213         beq     a2, $u_eoc      # .. e1 :
 214         ldq_u   t2, 8(a1)       # e0    : load high word for next time
 215         addq    a1, 8, a1       # .. e1 :
 216         nop                     # e0    :
 217         cmpbge  zero, t2, t8    # e1    : test new word for eos (stall)
 218         extqh   t2, a1, t0      # e0    : extract low bits for current word
 219         beq     t8, $u_loop     # .. e1 :
 220 
 221         
 222 
 223 
 224 
 225 
 226 
 227 
 228 
 229 
 230 $u_eos:
 231         or      t0, t1, t0      # e0    : first (partial) source word complete
 232         nop                     # .. e1 :
 233         cmpbge  zero, t0, t8    # e0    : is the null in this first bit?
 234         bne     t8, $u_final    # .. e1 (zdb)
 235 
 236         stq_u   t0, 0(a0)       # e0    : the null was in the high-order bits
 237         addq    a0, 8, a0       # .. e1 :
 238         subq    a2, 1, a2       # e1    :
 239 
 240 $u_late_head_exit:
 241         extql   t2, a1, t0      # .. e0 :
 242         cmpbge  zero, t0, t8    # e0    :
 243         or      t8, t10, t6     # e1    :
 244         cmoveq  a2, t6, t8      # e0    :
 245         nop                     # .. e1 :
 246 
 247         
 248 
 249 
 250 
 251 $u_final:
 252         negq    t8, t6          # e0    : isolate low bit set
 253         and     t6, t8, t12     # e1    :
 254 
 255         and     t12, 0x80, t6   # e0    : avoid dest word load if we can
 256         bne     t6, 1f          # .. e1 (zdb)
 257 
 258         ldq_u   t1, 0(a0)       # e0    :
 259         subq    t12, 1, t6      # .. e1 :
 260         or      t6, t12, t8     # e0    :
 261         zapnot  t0, t8, t0      # .. e1 : kill source bytes > null
 262         zap     t1, t8, t1      # e0    : kill dest bytes <= null
 263         or      t0, t1, t0      # e1    :
 264 
 265 1:      stq_u   t0, 0(a0)       # e0    :
 266         ret     (t9)            # .. e1 :
 267 
 268         
 269 
 270 
 271 $u_eoc:
 272         and     a1, 7, t6       # e1    :
 273         sll     t10, t6, t6     # e0    :
 274         and     t6, 0xff, t6    # e0    :
 275         bne     t6, 1f          # .. e1 :
 276 
 277         ldq_u   t2, 8(a1)       # e0    : load final src word
 278         nop                     # .. e1 :
 279         extqh   t2, a1, t0      # e0    : extract low bits for last word
 280         or      t1, t0, t1      # e1    :
 281 
 282 1:      cmpbge  zero, t1, t8
 283         mov     t1, t0
 284 
 285 $u_eocfin:                      # end-of-count, final word
 286         or      t10, t8, t8
 287         br      $u_final
 288 
 289         
 290         .align 3
 291 $unaligned:
 292 
 293         ldq_u   t1, 0(a1)       # e0    : load first source word
 294 
 295         and     a0, 7, t4       # .. e1 : find dest misalignment
 296         and     a1, 7, t5       # e0    : find src misalignment
 297 
 298         
 299 
 300 
 301         mov     zero, t0        # .. e1 :
 302         mov     zero, t6        # e0    :
 303         beq     t4, 1f          # .. e1 :
 304         ldq_u   t0, 0(a0)       # e0    :
 305         lda     t6, -1          # .. e1 :
 306         mskql   t6, a0, t6      # e0    :
 307         subq    a1, t4, a1      # .. e1 : sub dest misalignment from src addr
 308 
 309         
 310 
 311 
 312 1:      cmplt   t4, t5, t12     # e1    :
 313         extql   t1, a1, t1      # .. e0 : shift src into place
 314         lda     t2, -1          # e0    : for creating masks later
 315         beq     t12, $u_head    # .. e1 :
 316 
 317         extql   t2, a1, t2      # e0    :
 318         cmpbge  zero, t1, t8    # .. e1 : is there a zero?
 319         andnot  t2, t6, t2      # e0    : dest mask for a single word copy
 320         or      t8, t10, t5     # .. e1 : test for end-of-count too
 321         cmpbge  zero, t2, t3    # e0    :
 322         cmoveq  a2, t5, t8      # .. e1 :
 323         andnot  t8, t3, t8      # e0    :
 324         beq     t8, $u_head     # .. e1 (zdb)
 325 
 326         
 327 
 328 
 329 
 330 
 331         ldq_u   t0, 0(a0)       # e0    :
 332         negq    t8, t6          # .. e1 : build bitmask of bytes <= zero
 333         mskqh   t1, t4, t1      # e0    :
 334         and     t6, t8, t12     # .. e1 :
 335         subq    t12, 1, t6      # e0    :
 336         or      t6, t12, t8     # e1    :
 337 
 338         zapnot  t2, t8, t2      # e0    : prepare source word; mirror changes
 339         zapnot  t1, t8, t1      # .. e1 : to source validity mask
 340 
 341         andnot  t0, t2, t0      # e0    : zero place for source to reside
 342         or      t0, t1, t0      # e1    : and put it there
 343         stq_u   t0, 0(a0)       # e0    :
 344         ret     (t9)            # .. e1 :
 345 
 346         .end __stxncpy