root/arch/sparc/lib/copy_user.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0 */
   2 /* copy_user.S: Sparc optimized copy_from_user and copy_to_user code.
   3  *
   4  *  Copyright(C) 1995 Linus Torvalds
   5  *  Copyright(C) 1996 David S. Miller
   6  *  Copyright(C) 1996 Eddie C. Dost
   7  *  Copyright(C) 1996,1998 Jakub Jelinek
   8  *
   9  * derived from:
  10  *      e-mail between David and Eddie.
  11  *
  12  * Returns 0 if successful, otherwise count of bytes not copied yet
  13  */
  14 
  15 #include <asm/ptrace.h>
  16 #include <asm/asmmacro.h>
  17 #include <asm/page.h>
  18 #include <asm/thread_info.h>
  19 #include <asm/export.h>
  20 
  21 /* Work around cpp -rob */
  22 #define ALLOC #alloc
  23 #define EXECINSTR #execinstr
  24 #define EX(x,y,a,b)                             \
  25 98:     x,y;                                    \
  26         .section .fixup,ALLOC,EXECINSTR;        \
  27         .align  4;                              \
  28 99:     ba fixupretl;                           \
  29          a, b, %g3;                             \
  30         .section __ex_table,ALLOC;              \
  31         .align  4;                              \
  32         .word   98b, 99b;                       \
  33         .text;                                  \
  34         .align  4
  35 
  36 #define EX2(x,y,c,d,e,a,b)                      \
  37 98:     x,y;                                    \
  38         .section .fixup,ALLOC,EXECINSTR;        \
  39         .align  4;                              \
  40 99:     c, d, e;                                \
  41         ba fixupretl;                           \
  42          a, b, %g3;                             \
  43         .section __ex_table,ALLOC;              \
  44         .align  4;                              \
  45         .word   98b, 99b;                       \
  46         .text;                                  \
  47         .align  4
  48 
  49 #define EXO2(x,y)                               \
  50 98:     x, y;                                   \
  51         .section __ex_table,ALLOC;              \
  52         .align  4;                              \
  53         .word   98b, 97f;                       \
  54         .text;                                  \
  55         .align  4
  56 
  57 #define EXT(start,end,handler)                  \
  58         .section __ex_table,ALLOC;              \
  59         .align  4;                              \
  60         .word   start, 0, end, handler;         \
  61         .text;                                  \
  62         .align  4
  63 
  64 /* Please do not change following macros unless you change logic used
  65  * in .fixup at the end of this file as well
  66  */
  67 
  68 /* Both these macros have to start with exactly the same insn */
  69 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
  70         ldd     [%src + (offset) + 0x00], %t0; \
  71         ldd     [%src + (offset) + 0x08], %t2; \
  72         ldd     [%src + (offset) + 0x10], %t4; \
  73         ldd     [%src + (offset) + 0x18], %t6; \
  74         st      %t0, [%dst + (offset) + 0x00]; \
  75         st      %t1, [%dst + (offset) + 0x04]; \
  76         st      %t2, [%dst + (offset) + 0x08]; \
  77         st      %t3, [%dst + (offset) + 0x0c]; \
  78         st      %t4, [%dst + (offset) + 0x10]; \
  79         st      %t5, [%dst + (offset) + 0x14]; \
  80         st      %t6, [%dst + (offset) + 0x18]; \
  81         st      %t7, [%dst + (offset) + 0x1c];
  82 
  83 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
  84         ldd     [%src + (offset) + 0x00], %t0; \
  85         ldd     [%src + (offset) + 0x08], %t2; \
  86         ldd     [%src + (offset) + 0x10], %t4; \
  87         ldd     [%src + (offset) + 0x18], %t6; \
  88         std     %t0, [%dst + (offset) + 0x00]; \
  89         std     %t2, [%dst + (offset) + 0x08]; \
  90         std     %t4, [%dst + (offset) + 0x10]; \
  91         std     %t6, [%dst + (offset) + 0x18];
  92 
  93 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
  94         ldd     [%src - (offset) - 0x10], %t0; \
  95         ldd     [%src - (offset) - 0x08], %t2; \
  96         st      %t0, [%dst - (offset) - 0x10]; \
  97         st      %t1, [%dst - (offset) - 0x0c]; \
  98         st      %t2, [%dst - (offset) - 0x08]; \
  99         st      %t3, [%dst - (offset) - 0x04];
 100 
 101 #define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \
 102         lduh    [%src + (offset) + 0x00], %t0; \
 103         lduh    [%src + (offset) + 0x02], %t1; \
 104         lduh    [%src + (offset) + 0x04], %t2; \
 105         lduh    [%src + (offset) + 0x06], %t3; \
 106         sth     %t0, [%dst + (offset) + 0x00]; \
 107         sth     %t1, [%dst + (offset) + 0x02]; \
 108         sth     %t2, [%dst + (offset) + 0x04]; \
 109         sth     %t3, [%dst + (offset) + 0x06];
 110 
 111 #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
 112         ldub    [%src - (offset) - 0x02], %t0; \
 113         ldub    [%src - (offset) - 0x01], %t1; \
 114         stb     %t0, [%dst - (offset) - 0x02]; \
 115         stb     %t1, [%dst - (offset) - 0x01];
 116 
 117         .text
 118         .align  4
 119 
 120         .globl  __copy_user_begin
 121 __copy_user_begin:
 122 
 123         .globl  __copy_user
 124         EXPORT_SYMBOL(__copy_user)
 125 dword_align:
 126         andcc   %o1, 1, %g0
 127         be      4f
 128          andcc  %o1, 2, %g0
 129 
 130         EXO2(ldub [%o1], %g2)
 131         add     %o1, 1, %o1
 132         EXO2(stb %g2, [%o0])
 133         sub     %o2, 1, %o2
 134         bne     3f
 135          add    %o0, 1, %o0
 136 
 137         EXO2(lduh [%o1], %g2)
 138         add     %o1, 2, %o1
 139         EXO2(sth %g2, [%o0])
 140         sub     %o2, 2, %o2
 141         b       3f
 142          add    %o0, 2, %o0
 143 4:
 144         EXO2(lduh [%o1], %g2)
 145         add     %o1, 2, %o1
 146         EXO2(sth %g2, [%o0])
 147         sub     %o2, 2, %o2
 148         b       3f
 149          add    %o0, 2, %o0
 150 
 151 __copy_user:    /* %o0=dst %o1=src %o2=len */
 152         xor     %o0, %o1, %o4
 153 1:
 154         andcc   %o4, 3, %o5
 155 2:
 156         bne     cannot_optimize
 157          cmp    %o2, 15
 158 
 159         bleu    short_aligned_end
 160          andcc  %o1, 3, %g0
 161 
 162         bne     dword_align
 163 3:
 164          andcc  %o1, 4, %g0
 165 
 166         be      2f
 167          mov    %o2, %g1
 168 
 169         EXO2(ld [%o1], %o4)
 170         sub     %g1, 4, %g1
 171         EXO2(st %o4, [%o0])
 172         add     %o1, 4, %o1
 173         add     %o0, 4, %o0
 174 2:
 175         andcc   %g1, 0xffffff80, %g7
 176         be      3f
 177          andcc  %o0, 4, %g0
 178 
 179         be      ldd_std + 4
 180 5:
 181         MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
 182         MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
 183         MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
 184         MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
 185 80:
 186         EXT(5b, 80b, 50f)
 187         subcc   %g7, 128, %g7
 188         add     %o1, 128, %o1
 189         bne     5b
 190          add    %o0, 128, %o0
 191 3:
 192         andcc   %g1, 0x70, %g7
 193         be      copy_user_table_end
 194          andcc  %g1, 8, %g0
 195 
 196         sethi   %hi(copy_user_table_end), %o5
 197         srl     %g7, 1, %o4
 198         add     %g7, %o4, %o4
 199         add     %o1, %g7, %o1
 200         sub     %o5, %o4, %o5
 201         jmpl    %o5 + %lo(copy_user_table_end), %g0
 202          add    %o0, %g7, %o0
 203 
 204 copy_user_table:
 205         MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
 206         MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
 207         MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
 208         MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
 209         MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
 210         MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
 211         MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
 212 copy_user_table_end:
 213         EXT(copy_user_table, copy_user_table_end, 51f)
 214         be      copy_user_last7
 215          andcc  %g1, 4, %g0
 216 
 217         EX(ldd  [%o1], %g2, and %g1, 0xf)
 218         add     %o0, 8, %o0
 219         add     %o1, 8, %o1
 220         EX(st   %g2, [%o0 - 0x08], and %g1, 0xf)
 221         EX2(st  %g3, [%o0 - 0x04], and %g1, 0xf, %g1, sub %g1, 4)
 222 copy_user_last7:
 223         be      1f
 224          andcc  %g1, 2, %g0
 225 
 226         EX(ld   [%o1], %g2, and %g1, 7)
 227         add     %o1, 4, %o1
 228         EX(st   %g2, [%o0], and %g1, 7)
 229         add     %o0, 4, %o0
 230 1:
 231         be      1f
 232          andcc  %g1, 1, %g0
 233 
 234         EX(lduh [%o1], %g2, and %g1, 3)
 235         add     %o1, 2, %o1
 236         EX(sth  %g2, [%o0], and %g1, 3)
 237         add     %o0, 2, %o0
 238 1:
 239         be      1f
 240          nop
 241 
 242         EX(ldub [%o1], %g2, add %g0, 1)
 243         EX(stb  %g2, [%o0], add %g0, 1)
 244 1:
 245         retl
 246          clr    %o0
 247 
 248 ldd_std:
 249         MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
 250         MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
 251         MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
 252         MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
 253 81:
 254         EXT(ldd_std, 81b, 52f)
 255         subcc   %g7, 128, %g7
 256         add     %o1, 128, %o1
 257         bne     ldd_std
 258          add    %o0, 128, %o0
 259 
 260         andcc   %g1, 0x70, %g7
 261         be      copy_user_table_end
 262          andcc  %g1, 8, %g0
 263 
 264         sethi   %hi(copy_user_table_end), %o5
 265         srl     %g7, 1, %o4
 266         add     %g7, %o4, %o4
 267         add     %o1, %g7, %o1
 268         sub     %o5, %o4, %o5
 269         jmpl    %o5 + %lo(copy_user_table_end), %g0
 270          add    %o0, %g7, %o0
 271 
 272 cannot_optimize:
 273         bleu    short_end
 274          cmp    %o5, 2
 275 
 276         bne     byte_chunk
 277          and    %o2, 0xfffffff0, %o3
 278          
 279         andcc   %o1, 1, %g0
 280         be      10f
 281          nop
 282 
 283         EXO2(ldub [%o1], %g2)
 284         add     %o1, 1, %o1
 285         EXO2(stb %g2, [%o0])
 286         sub     %o2, 1, %o2
 287         andcc   %o2, 0xfffffff0, %o3
 288         be      short_end
 289          add    %o0, 1, %o0
 290 10:
 291         MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
 292         MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5)
 293 82:
 294         EXT(10b, 82b, 53f)
 295         subcc   %o3, 0x10, %o3
 296         add     %o1, 0x10, %o1
 297         bne     10b
 298          add    %o0, 0x10, %o0
 299         b       2f
 300          and    %o2, 0xe, %o3
 301         
 302 byte_chunk:
 303         MOVE_SHORTCHUNK(o1, o0, -0x02, g2, g3)
 304         MOVE_SHORTCHUNK(o1, o0, -0x04, g2, g3)
 305         MOVE_SHORTCHUNK(o1, o0, -0x06, g2, g3)
 306         MOVE_SHORTCHUNK(o1, o0, -0x08, g2, g3)
 307         MOVE_SHORTCHUNK(o1, o0, -0x0a, g2, g3)
 308         MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3)
 309         MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3)
 310         MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3)
 311 83:
 312         EXT(byte_chunk, 83b, 54f)
 313         subcc   %o3, 0x10, %o3
 314         add     %o1, 0x10, %o1
 315         bne     byte_chunk
 316          add    %o0, 0x10, %o0
 317 
 318 short_end:
 319         and     %o2, 0xe, %o3
 320 2:
 321         sethi   %hi(short_table_end), %o5
 322         sll     %o3, 3, %o4
 323         add     %o0, %o3, %o0
 324         sub     %o5, %o4, %o5
 325         add     %o1, %o3, %o1
 326         jmpl    %o5 + %lo(short_table_end), %g0
 327          andcc  %o2, 1, %g0
 328 84:
 329         MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
 330         MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
 331         MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
 332         MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
 333         MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
 334         MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
 335         MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
 336 short_table_end:
 337         EXT(84b, short_table_end, 55f)
 338         be      1f
 339          nop
 340         EX(ldub [%o1], %g2, add %g0, 1)
 341         EX(stb  %g2, [%o0], add %g0, 1)
 342 1:
 343         retl
 344          clr    %o0
 345 
 346 short_aligned_end:
 347         bne     short_end
 348          andcc  %o2, 8, %g0
 349 
 350         be      1f
 351          andcc  %o2, 4, %g0
 352 
 353         EXO2(ld [%o1 + 0x00], %g2)
 354         EXO2(ld [%o1 + 0x04], %g3)
 355         add     %o1, 8, %o1
 356         EXO2(st %g2, [%o0 + 0x00])
 357         EX(st   %g3, [%o0 + 0x04], sub %o2, 4)
 358         add     %o0, 8, %o0
 359 1:
 360         b       copy_user_last7
 361          mov    %o2, %g1
 362 
 363         .section .fixup,#alloc,#execinstr
 364         .align  4
 365 97:
 366         mov     %o2, %g3
 367 fixupretl:
 368         retl
 369          mov    %g3, %o0
 370 
 371 /* exception routine sets %g2 to (broken_insn - first_insn)>>2 */
 372 50:
 373 /* This magic counts how many bytes are left when crash in MOVE_BIGCHUNK
 374  * happens. This is derived from the amount ldd reads, st stores, etc.
 375  * x = g2 % 12;
 376  * g3 = g1 + g7 - ((g2 / 12) * 32 + (x < 4) ? 0 : (x - 4) * 4);
 377  * o0 += (g2 / 12) * 32;
 378  */
 379         cmp     %g2, 12
 380         add     %o0, %g7, %o0
 381         bcs     1f
 382          cmp    %g2, 24
 383         bcs     2f
 384          cmp    %g2, 36
 385         bcs     3f
 386          nop
 387         sub     %g2, 12, %g2
 388         sub     %g7, 32, %g7
 389 3:      sub     %g2, 12, %g2
 390         sub     %g7, 32, %g7
 391 2:      sub     %g2, 12, %g2
 392         sub     %g7, 32, %g7
 393 1:      cmp     %g2, 4
 394         bcs,a   60f
 395          clr    %g2
 396         sub     %g2, 4, %g2
 397         sll     %g2, 2, %g2
 398 60:     and     %g1, 0x7f, %g3
 399         sub     %o0, %g7, %o0
 400         add     %g3, %g7, %g3
 401         ba      fixupretl
 402          sub    %g3, %g2, %g3
 403 51:
 404 /* i = 41 - g2; j = i % 6;
 405  * g3 = (g1 & 15) + (i / 6) * 16 + (j < 4) ? (j + 1) * 4 : 16;
 406  * o0 -= (i / 6) * 16 + 16;
 407  */
 408         neg     %g2
 409         and     %g1, 0xf, %g1
 410         add     %g2, 41, %g2
 411         add     %o0, %g1, %o0
 412 1:      cmp     %g2, 6
 413         bcs,a   2f
 414          cmp    %g2, 4
 415         add     %g1, 16, %g1
 416         b       1b
 417          sub    %g2, 6, %g2
 418 2:      bcc,a   2f
 419          mov    16, %g2
 420         inc     %g2
 421         sll     %g2, 2, %g2
 422 2:      add     %g1, %g2, %g3
 423         ba      fixupretl
 424          sub    %o0, %g3, %o0
 425 52:
 426 /* g3 = g1 + g7 - (g2 / 8) * 32 + (g2 & 4) ? (g2 & 3) * 8 : 0;
 427    o0 += (g2 / 8) * 32 */
 428         andn    %g2, 7, %g4
 429         add     %o0, %g7, %o0
 430         andcc   %g2, 4, %g0
 431         and     %g2, 3, %g2
 432         sll     %g4, 2, %g4
 433         sll     %g2, 3, %g2
 434         bne     60b
 435          sub    %g7, %g4, %g7
 436         ba      60b
 437          clr    %g2
 438 53:
 439 /* g3 = o3 + (o2 & 15) - (g2 & 8) - (g2 & 4) ? (g2 & 3) * 2 : 0;
 440    o0 += (g2 & 8) */
 441         and     %g2, 3, %g4
 442         andcc   %g2, 4, %g0
 443         and     %g2, 8, %g2
 444         sll     %g4, 1, %g4
 445         be      1f
 446          add    %o0, %g2, %o0
 447         add     %g2, %g4, %g2
 448 1:      and     %o2, 0xf, %g3
 449         add     %g3, %o3, %g3
 450         ba      fixupretl
 451          sub    %g3, %g2, %g3
 452 54:
 453 /* g3 = o3 + (o2 & 15) - (g2 / 4) * 2 - (g2 & 2) ? (g2 & 1) : 0;
 454    o0 += (g2 / 4) * 2 */
 455         srl     %g2, 2, %o4
 456         and     %g2, 1, %o5
 457         srl     %g2, 1, %g2
 458         add     %o4, %o4, %o4
 459         and     %o5, %g2, %o5
 460         and     %o2, 0xf, %o2
 461         add     %o0, %o4, %o0
 462         sub     %o3, %o5, %o3
 463         sub     %o2, %o4, %o2
 464         ba      fixupretl
 465          add    %o2, %o3, %g3
 466 55:
 467 /* i = 27 - g2;
 468    g3 = (o2 & 1) + i / 4 * 2 + !(i & 3);
 469    o0 -= i / 4 * 2 + 1 */
 470         neg     %g2
 471         and     %o2, 1, %o2
 472         add     %g2, 27, %g2
 473         srl     %g2, 2, %o5
 474         andcc   %g2, 3, %g0
 475         mov     1, %g2
 476         add     %o5, %o5, %o5
 477         be,a    1f
 478          clr    %g2
 479 1:      add     %g2, %o5, %g3
 480         sub     %o0, %g3, %o0
 481         ba      fixupretl
 482          add    %g3, %o2, %g3
 483 
 484         .globl  __copy_user_end
 485 __copy_user_end:

/* [<][>][^][v][top][bottom][index][help] */