root/arch/parisc/lib/lusercopy.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0-or-later */
   2 /*
   3  *    User Space Access Routines
   4  *
   5  *    Copyright (C) 2000-2002 Hewlett-Packard (John Marvin)
   6  *    Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org>
   7  *    Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr>
   8  *    Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org>
   9  *    Copyright (C) 2017 Helge Deller <deller@gmx.de>
  10  *    Copyright (C) 2017 John David Anglin <dave.anglin@bell.net>
  11  */
  12 
  13 /*
  14  * These routines still have plenty of room for optimization
  15  * (word & doubleword load/store, dual issue, store hints, etc.).
  16  */
  17 
  18 /*
  19  * The following routines assume that space register 3 (sr3) contains
  20  * the space id associated with the current users address space.
  21  */
  22 
  23 
  24         .text
  25         
  26 #include <asm/assembly.h>
  27 #include <asm/errno.h>
  28 #include <linux/linkage.h>
  29 
  30         /*
  31          * get_sr gets the appropriate space value into
  32          * sr1 for kernel/user space access, depending
  33          * on the flag stored in the task structure.
  34          */
  35 
  36         .macro  get_sr
  37         mfctl       %cr30,%r1
  38         ldw         TI_SEGMENT(%r1),%r22
  39         mfsp        %sr3,%r1
  40         or,<>       %r22,%r0,%r0
  41         copy        %r0,%r1
  42         mtsp        %r1,%sr1
  43         .endm
  44 
  45         /*
  46          * unsigned long lclear_user(void *to, unsigned long n)
  47          *
  48          * Returns 0 for success.
  49          * otherwise, returns number of bytes not transferred.
  50          */
  51 
  52 ENTRY_CFI(lclear_user)
  53         comib,=,n   0,%r25,$lclu_done
  54         get_sr
  55 $lclu_loop:
  56         addib,<>    -1,%r25,$lclu_loop
  57 1:      stbs,ma     %r0,1(%sr1,%r26)
  58 
  59 $lclu_done:
  60         bv          %r0(%r2)
  61         copy        %r25,%r28
  62 
  63 2:      b           $lclu_done
  64         ldo         1(%r25),%r25
  65 
  66         ASM_EXCEPTIONTABLE_ENTRY(1b,2b)
  67 ENDPROC_CFI(lclear_user)
  68 
  69 
  70         /*
  71          * long lstrnlen_user(char *s, long n)
  72          *
  73          * Returns 0 if exception before zero byte or reaching N,
  74          *         N+1 if N would be exceeded,
  75          *         else strlen + 1 (i.e. includes zero byte).
  76          */
  77 
  78 ENTRY_CFI(lstrnlen_user)
  79         comib,=     0,%r25,$lslen_nzero
  80         copy        %r26,%r24
  81         get_sr
  82 1:      ldbs,ma     1(%sr1,%r26),%r1
  83 $lslen_loop:
  84         comib,=,n   0,%r1,$lslen_done
  85         addib,<>    -1,%r25,$lslen_loop
  86 2:      ldbs,ma     1(%sr1,%r26),%r1
  87 $lslen_done:
  88         bv          %r0(%r2)
  89         sub         %r26,%r24,%r28
  90 
  91 $lslen_nzero:
  92         b           $lslen_done
  93         ldo         1(%r26),%r26 /* special case for N == 0 */
  94 
  95 3:      b           $lslen_done
  96         copy        %r24,%r26    /* reset r26 so 0 is returned on fault */
  97 
  98         ASM_EXCEPTIONTABLE_ENTRY(1b,3b)
  99         ASM_EXCEPTIONTABLE_ENTRY(2b,3b)
 100 
 101 ENDPROC_CFI(lstrnlen_user)
 102 
 103 
 104 /*
 105  * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
 106  *
 107  * Inputs:
 108  * - sr1 already contains space of source region
 109  * - sr2 already contains space of destination region
 110  *
 111  * Returns:
 112  * - number of bytes that could not be copied.
 113  *   On success, this will be zero.
 114  *
 115  * This code is based on a C-implementation of a copy routine written by
 116  * Randolph Chung, which in turn was derived from the glibc.
 117  *
 118  * Several strategies are tried to try to get the best performance for various
 119  * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes
 120  * at a time using general registers.  Unaligned copies are handled either by
 121  * aligning the destination and then using shift-and-write method, or in a few
 122  * cases by falling back to a byte-at-a-time copy.
 123  *
 124  * Testing with various alignments and buffer sizes shows that this code is
 125  * often >10x faster than a simple byte-at-a-time copy, even for strangely
 126  * aligned operands. It is interesting to note that the glibc version of memcpy
 127  * (written in C) is actually quite fast already. This routine is able to beat
 128  * it by 30-40% for aligned copies because of the loop unrolling, but in some
 129  * cases the glibc version is still slightly faster. This lends more
 130  * credibility that gcc can generate very good code as long as we are careful.
 131  *
 132  * Possible optimizations:
 133  * - add cache prefetching
 134  * - try not to use the post-increment address modifiers; they may create
 135  *   additional interlocks. Assumption is that those were only efficient on old
 136  *   machines (pre PA8000 processors)
 137  */
 138 
 139         dst = arg0
 140         src = arg1
 141         len = arg2
 142         end = arg3
 143         t1  = r19
 144         t2  = r20
 145         t3  = r21
 146         t4  = r22
 147         srcspc = sr1
 148         dstspc = sr2
 149 
 150         t0 = r1
 151         a1 = t1
 152         a2 = t2
 153         a3 = t3
 154         a0 = t4
 155 
 156         save_src = ret0
 157         save_dst = ret1
 158         save_len = r31
 159 
 160 ENTRY_CFI(pa_memcpy)
 161         /* Last destination address */
 162         add     dst,len,end
 163 
 164         /* short copy with less than 16 bytes? */
 165         cmpib,COND(>>=),n 15,len,.Lbyte_loop
 166 
 167         /* same alignment? */
 168         xor     src,dst,t0
 169         extru   t0,31,2,t1
 170         cmpib,<>,n  0,t1,.Lunaligned_copy
 171 
 172 #ifdef CONFIG_64BIT
 173         /* only do 64-bit copies if we can get aligned. */
 174         extru   t0,31,3,t1
 175         cmpib,<>,n  0,t1,.Lalign_loop32
 176 
 177         /* loop until we are 64-bit aligned */
 178 .Lalign_loop64:
 179         extru   dst,31,3,t1
 180         cmpib,=,n       0,t1,.Lcopy_loop_16_start
 181 20:     ldb,ma  1(srcspc,src),t1
 182 21:     stb,ma  t1,1(dstspc,dst)
 183         b       .Lalign_loop64
 184         ldo     -1(len),len
 185 
 186         ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
 187         ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
 188 
 189 .Lcopy_loop_16_start:
 190         ldi     31,t0
 191 .Lcopy_loop_16:
 192         cmpb,COND(>>=),n t0,len,.Lword_loop
 193 
 194 10:     ldd     0(srcspc,src),t1
 195 11:     ldd     8(srcspc,src),t2
 196         ldo     16(src),src
 197 12:     std,ma  t1,8(dstspc,dst)
 198 13:     std,ma  t2,8(dstspc,dst)
 199 14:     ldd     0(srcspc,src),t1
 200 15:     ldd     8(srcspc,src),t2
 201         ldo     16(src),src
 202 16:     std,ma  t1,8(dstspc,dst)
 203 17:     std,ma  t2,8(dstspc,dst)
 204 
 205         ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
 206         ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault)
 207         ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
 208         ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
 209         ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
 210         ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault)
 211         ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
 212         ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
 213 
 214         b       .Lcopy_loop_16
 215         ldo     -32(len),len
 216 
 217 .Lword_loop:
 218         cmpib,COND(>>=),n 3,len,.Lbyte_loop
 219 20:     ldw,ma  4(srcspc,src),t1
 220 21:     stw,ma  t1,4(dstspc,dst)
 221         b       .Lword_loop
 222         ldo     -4(len),len
 223 
 224         ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
 225         ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
 226 
 227 #endif /* CONFIG_64BIT */
 228 
 229         /* loop until we are 32-bit aligned */
 230 .Lalign_loop32:
 231         extru   dst,31,2,t1
 232         cmpib,=,n       0,t1,.Lcopy_loop_8
 233 20:     ldb,ma  1(srcspc,src),t1
 234 21:     stb,ma  t1,1(dstspc,dst)
 235         b       .Lalign_loop32
 236         ldo     -1(len),len
 237 
 238         ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
 239         ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
 240 
 241 
 242 .Lcopy_loop_8:
 243         cmpib,COND(>>=),n 15,len,.Lbyte_loop
 244 
 245 10:     ldw     0(srcspc,src),t1
 246 11:     ldw     4(srcspc,src),t2
 247 12:     stw,ma  t1,4(dstspc,dst)
 248 13:     stw,ma  t2,4(dstspc,dst)
 249 14:     ldw     8(srcspc,src),t1
 250 15:     ldw     12(srcspc,src),t2
 251         ldo     16(src),src
 252 16:     stw,ma  t1,4(dstspc,dst)
 253 17:     stw,ma  t2,4(dstspc,dst)
 254 
 255         ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
 256         ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault)
 257         ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
 258         ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
 259         ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
 260         ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault)
 261         ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
 262         ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
 263 
 264         b       .Lcopy_loop_8
 265         ldo     -16(len),len
 266 
 267 .Lbyte_loop:
 268         cmpclr,COND(<>) len,%r0,%r0
 269         b,n     .Lcopy_done
 270 20:     ldb     0(srcspc,src),t1
 271         ldo     1(src),src
 272 21:     stb,ma  t1,1(dstspc,dst)
 273         b       .Lbyte_loop
 274         ldo     -1(len),len
 275 
 276         ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
 277         ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
 278 
 279 .Lcopy_done:
 280         bv      %r0(%r2)
 281         sub     end,dst,ret0
 282 
 283 
 284         /* src and dst are not aligned the same way. */
 285         /* need to go the hard way */
 286 .Lunaligned_copy:
 287         /* align until dst is 32bit-word-aligned */
 288         extru   dst,31,2,t1
 289         cmpib,=,n       0,t1,.Lcopy_dstaligned
 290 20:     ldb     0(srcspc,src),t1
 291         ldo     1(src),src
 292 21:     stb,ma  t1,1(dstspc,dst)
 293         b       .Lunaligned_copy
 294         ldo     -1(len),len
 295 
 296         ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
 297         ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
 298 
 299 .Lcopy_dstaligned:
 300 
 301         /* store src, dst and len in safe place */
 302         copy    src,save_src
 303         copy    dst,save_dst
 304         copy    len,save_len
 305 
 306         /* len now needs give number of words to copy */
 307         SHRREG  len,2,len
 308 
 309         /*
 310          * Copy from a not-aligned src to an aligned dst using shifts.
 311          * Handles 4 words per loop.
 312          */
 313 
 314         depw,z src,28,2,t0
 315         subi 32,t0,t0
 316         mtsar t0
 317         extru len,31,2,t0
 318         cmpib,= 2,t0,.Lcase2
 319         /* Make src aligned by rounding it down.  */
 320         depi 0,31,2,src
 321 
 322         cmpiclr,<> 3,t0,%r0
 323         b,n .Lcase3
 324         cmpiclr,<> 1,t0,%r0
 325         b,n .Lcase1
 326 .Lcase0:
 327         cmpb,COND(=) %r0,len,.Lcda_finish
 328         nop
 329 
 330 1:      ldw,ma 4(srcspc,src), a3
 331         ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 332 1:      ldw,ma 4(srcspc,src), a0
 333         ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 334         b,n .Ldo3
 335 .Lcase1:
 336 1:      ldw,ma 4(srcspc,src), a2
 337         ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 338 1:      ldw,ma 4(srcspc,src), a3
 339         ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 340         ldo -1(len),len
 341         cmpb,COND(=),n %r0,len,.Ldo0
 342 .Ldo4:
 343 1:      ldw,ma 4(srcspc,src), a0
 344         ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 345         shrpw a2, a3, %sar, t0
 346 1:      stw,ma t0, 4(dstspc,dst)
 347         ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
 348 .Ldo3:
 349 1:      ldw,ma 4(srcspc,src), a1
 350         ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 351         shrpw a3, a0, %sar, t0
 352 1:      stw,ma t0, 4(dstspc,dst)
 353         ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
 354 .Ldo2:
 355 1:      ldw,ma 4(srcspc,src), a2
 356         ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 357         shrpw a0, a1, %sar, t0
 358 1:      stw,ma t0, 4(dstspc,dst)
 359         ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
 360 .Ldo1:
 361 1:      ldw,ma 4(srcspc,src), a3
 362         ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 363         shrpw a1, a2, %sar, t0
 364 1:      stw,ma t0, 4(dstspc,dst)
 365         ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
 366         ldo -4(len),len
 367         cmpb,COND(<>) %r0,len,.Ldo4
 368         nop
 369 .Ldo0:
 370         shrpw a2, a3, %sar, t0
 371 1:      stw,ma t0, 4(dstspc,dst)
 372         ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
 373 
 374 .Lcda_rdfault:
 375 .Lcda_finish:
 376         /* calculate new src, dst and len and jump to byte-copy loop */
 377         sub     dst,save_dst,t0
 378         add     save_src,t0,src
 379         b       .Lbyte_loop
 380         sub     save_len,t0,len
 381 
 382 .Lcase3:
 383 1:      ldw,ma 4(srcspc,src), a0
 384         ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 385 1:      ldw,ma 4(srcspc,src), a1
 386         ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 387         b .Ldo2
 388         ldo 1(len),len
 389 .Lcase2:
 390 1:      ldw,ma 4(srcspc,src), a1
 391         ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 392 1:      ldw,ma 4(srcspc,src), a2
 393         ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 394         b .Ldo1
 395         ldo 2(len),len
 396 
 397 
 398         /* fault exception fixup handlers: */
 399 #ifdef CONFIG_64BIT
 400 .Lcopy16_fault:
 401         b       .Lcopy_done
 402 10:     std,ma  t1,8(dstspc,dst)
 403         ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
 404 #endif
 405 
 406 .Lcopy8_fault:
 407         b       .Lcopy_done
 408 10:     stw,ma  t1,4(dstspc,dst)
 409         ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
 410 ENDPROC_CFI(pa_memcpy)
 411 
 412         .end

/* [<][>][^][v][top][bottom][index][help] */