1 
   2 
   3 
   4 
   5 
   6 #include <asm/ppc_asm.h>
   7 #include <asm/export.h>
   8 #include <asm/ppc-opcode.h>
   9 
  10 #define off8    r6
  11 #define off16   r7
  12 #define off24   r8
  13 
  14 #define rA      r9
  15 #define rB      r10
  16 #define rC      r11
  17 #define rD      r27
  18 #define rE      r28
  19 #define rF      r29
  20 #define rG      r30
  21 #define rH      r31
  22 
  23 #ifdef __LITTLE_ENDIAN__
  24 #define LH      lhbrx
  25 #define LW      lwbrx
  26 #define LD      ldbrx
  27 #define LVS     lvsr
  28 #define VPERM(_VRT,_VRA,_VRB,_VRC) \
  29         vperm _VRT,_VRB,_VRA,_VRC
  30 #else
  31 #define LH      lhzx
  32 #define LW      lwzx
  33 #define LD      ldx
  34 #define LVS     lvsl
  35 #define VPERM(_VRT,_VRA,_VRB,_VRC) \
  36         vperm _VRT,_VRA,_VRB,_VRC
  37 #endif
  38 
  39 #define VMX_THRESH 4096
  40 #define ENTER_VMX_OPS   \
  41         mflr    r0;     \
  42         std     r3,-STACKFRAMESIZE+STK_REG(R31)(r1); \
  43         std     r4,-STACKFRAMESIZE+STK_REG(R30)(r1); \
  44         std     r5,-STACKFRAMESIZE+STK_REG(R29)(r1); \
  45         std     r0,16(r1); \
  46         stdu    r1,-STACKFRAMESIZE(r1); \
  47         bl      enter_vmx_ops; \
  48         cmpwi   cr1,r3,0; \
  49         ld      r0,STACKFRAMESIZE+16(r1); \
  50         ld      r3,STK_REG(R31)(r1); \
  51         ld      r4,STK_REG(R30)(r1); \
  52         ld      r5,STK_REG(R29)(r1); \
  53         addi    r1,r1,STACKFRAMESIZE; \
  54         mtlr    r0
  55 
  56 #define EXIT_VMX_OPS \
  57         mflr    r0; \
  58         std     r3,-STACKFRAMESIZE+STK_REG(R31)(r1); \
  59         std     r4,-STACKFRAMESIZE+STK_REG(R30)(r1); \
  60         std     r5,-STACKFRAMESIZE+STK_REG(R29)(r1); \
  61         std     r0,16(r1); \
  62         stdu    r1,-STACKFRAMESIZE(r1); \
  63         bl      exit_vmx_ops; \
  64         ld      r0,STACKFRAMESIZE+16(r1); \
  65         ld      r3,STK_REG(R31)(r1); \
  66         ld      r4,STK_REG(R30)(r1); \
  67         ld      r5,STK_REG(R29)(r1); \
  68         addi    r1,r1,STACKFRAMESIZE; \
  69         mtlr    r0
  70 
  71 
  72 
  73 
  74 
  75 
  76 
  77 
  78 
  79 
  80 
  81 
  82 
  83 
  84 
  85 
  86 
  87 
  88 
  89 
  90 #define LD_VSR_CROSS16B(_vaddr,_vmask,_v1st_qw,_v2nd_qw,_v_res) \
  91         lvx     _v2nd_qw,_vaddr,off16; \
  92         VPERM(_v_res,_v1st_qw,_v2nd_qw,_vmask)
  93 
  94 
  95 
  96 
  97 
  98 
  99 
 100 
 101 _GLOBAL_TOC(memcmp)
 102         cmpdi   cr1,r5,0
 103 
 104         
 105 
 106 
 107         xor     r6,r3,r4
 108         andi.   r6,r6,7
 109 
 110         
 111 
 112 
 113         cmpdi   cr6,r5,7
 114 
 115         beq     cr1,.Lzero
 116         bgt     cr6,.Lno_short
 117 
 118 .Lshort:
 119         mtctr   r5
 120 1:      lbz     rA,0(r3)
 121         lbz     rB,0(r4)
 122         subf.   rC,rB,rA
 123         bne     .Lnon_zero
 124         bdz     .Lzero
 125 
 126         lbz     rA,1(r3)
 127         lbz     rB,1(r4)
 128         subf.   rC,rB,rA
 129         bne     .Lnon_zero
 130         bdz     .Lzero
 131 
 132         lbz     rA,2(r3)
 133         lbz     rB,2(r4)
 134         subf.   rC,rB,rA
 135         bne     .Lnon_zero
 136         bdz     .Lzero
 137 
 138         lbz     rA,3(r3)
 139         lbz     rB,3(r4)
 140         subf.   rC,rB,rA
 141         bne     .Lnon_zero
 142 
 143         addi    r3,r3,4
 144         addi    r4,r4,4
 145 
 146         bdnz    1b
 147 
 148 .Lzero:
 149         li      r3,0
 150         blr
 151 
 152 .Lno_short:
 153         dcbt    0,r3
 154         dcbt    0,r4
 155         bne     .Ldiffoffset_8bytes_make_align_start
 156 
 157 
 158 .Lsameoffset_8bytes_make_align_start:
 159         
 160 
 161 
 162         andi.   r6,r3,7
 163 
 164         
 165 
 166 
 167 
 168         rlwinm  r6,r3,3,26,28
 169         beq     .Lsameoffset_8bytes_aligned
 170         clrrdi  r3,r3,3
 171         clrrdi  r4,r4,3
 172         LD      rA,0,r3
 173         LD      rB,0,r4
 174         sld     rA,rA,r6
 175         sld     rB,rB,r6
 176         cmpld   cr0,rA,rB
 177         srwi    r6,r6,3
 178         bne     cr0,.LcmpAB_lightweight
 179         subfic  r6,r6,8
 180         subf.   r5,r6,r5
 181         addi    r3,r3,8
 182         addi    r4,r4,8
 183         beq     .Lzero
 184 
 185 .Lsameoffset_8bytes_aligned:
 186         
 187 
 188 
 189         cmpdi   cr6,r5,31
 190         bgt     cr6,.Llong
 191 
 192 .Lcmp_lt32bytes:
 193         
 194         cmpdi   cr5,r5,7
 195         srdi    r0,r5,3
 196         ble     cr5,.Lcmp_rest_lt8bytes
 197 
 198         
 199         clrldi  r5,r5,61
 200         mtctr   r0
 201 2:
 202         LD      rA,0,r3
 203         LD      rB,0,r4
 204         cmpld   cr0,rA,rB
 205         addi    r3,r3,8
 206         addi    r4,r4,8
 207         bne     cr0,.LcmpAB_lightweight
 208         bdnz    2b
 209 
 210         cmpwi   r5,0
 211         beq     .Lzero
 212 
 213 .Lcmp_rest_lt8bytes:
 214         
 215 
 216 
 217 
 218 
 219 
 220 
 221 
 222 
 223 
 224         clrldi  r6,r4,(64-12)   
 225         cmpdi   r6,0xff8
 226         bgt     .Lshort
 227 
 228         subfic  r6,r5,8
 229         slwi    r6,r6,3
 230         LD      rA,0,r3
 231         LD      rB,0,r4
 232         srd     rA,rA,r6
 233         srd     rB,rB,r6
 234         cmpld   cr0,rA,rB
 235         bne     cr0,.LcmpAB_lightweight
 236         b       .Lzero
 237 
 238 .Lnon_zero:
 239         mr      r3,rC
 240         blr
 241 
 242 .Llong:
 243 #ifdef CONFIG_ALTIVEC
 244 BEGIN_FTR_SECTION
 245         
 246         cmpldi  cr6,r5,VMX_THRESH
 247         bge     cr6,.Lsameoffset_vmx_cmp
 248 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 249 
 250 .Llong_novmx_cmp:
 251 #endif
 252         
 253         li      off8,8
 254         li      off16,16
 255         li      off24,24
 256 
 257         std     r31,-8(r1)
 258         std     r30,-16(r1)
 259         std     r29,-24(r1)
 260         std     r28,-32(r1)
 261         std     r27,-40(r1)
 262 
 263         srdi    r0,r5,5
 264         mtctr   r0
 265         andi.   r5,r5,31
 266 
 267         LD      rA,0,r3
 268         LD      rB,0,r4
 269 
 270         LD      rC,off8,r3
 271         LD      rD,off8,r4
 272 
 273         LD      rE,off16,r3
 274         LD      rF,off16,r4
 275 
 276         LD      rG,off24,r3
 277         LD      rH,off24,r4
 278         cmpld   cr0,rA,rB
 279 
 280         addi    r3,r3,32
 281         addi    r4,r4,32
 282 
 283         bdz     .Lfirst32
 284 
 285         LD      rA,0,r3
 286         LD      rB,0,r4
 287         cmpld   cr1,rC,rD
 288 
 289         LD      rC,off8,r3
 290         LD      rD,off8,r4
 291         cmpld   cr6,rE,rF
 292 
 293         LD      rE,off16,r3
 294         LD      rF,off16,r4
 295         cmpld   cr7,rG,rH
 296         bne     cr0,.LcmpAB
 297 
 298         LD      rG,off24,r3
 299         LD      rH,off24,r4
 300         cmpld   cr0,rA,rB
 301         bne     cr1,.LcmpCD
 302 
 303         addi    r3,r3,32
 304         addi    r4,r4,32
 305 
 306         bdz     .Lsecond32
 307 
 308         .balign 16
 309 
 310 1:      LD      rA,0,r3
 311         LD      rB,0,r4
 312         cmpld   cr1,rC,rD
 313         bne     cr6,.LcmpEF
 314 
 315         LD      rC,off8,r3
 316         LD      rD,off8,r4
 317         cmpld   cr6,rE,rF
 318         bne     cr7,.LcmpGH
 319 
 320         LD      rE,off16,r3
 321         LD      rF,off16,r4
 322         cmpld   cr7,rG,rH
 323         bne     cr0,.LcmpAB
 324 
 325         LD      rG,off24,r3
 326         LD      rH,off24,r4
 327         cmpld   cr0,rA,rB
 328         bne     cr1,.LcmpCD
 329 
 330         addi    r3,r3,32
 331         addi    r4,r4,32
 332 
 333         bdnz    1b
 334 
 335 .Lsecond32:
 336         cmpld   cr1,rC,rD
 337         bne     cr6,.LcmpEF
 338 
 339         cmpld   cr6,rE,rF
 340         bne     cr7,.LcmpGH
 341 
 342         cmpld   cr7,rG,rH
 343         bne     cr0,.LcmpAB
 344 
 345         bne     cr1,.LcmpCD
 346         bne     cr6,.LcmpEF
 347         bne     cr7,.LcmpGH
 348 
 349 .Ltail:
 350         ld      r31,-8(r1)
 351         ld      r30,-16(r1)
 352         ld      r29,-24(r1)
 353         ld      r28,-32(r1)
 354         ld      r27,-40(r1)
 355 
 356         cmpdi   r5,0
 357         beq     .Lzero
 358         b       .Lshort
 359 
 360 .Lfirst32:
 361         cmpld   cr1,rC,rD
 362         cmpld   cr6,rE,rF
 363         cmpld   cr7,rG,rH
 364 
 365         bne     cr0,.LcmpAB
 366         bne     cr1,.LcmpCD
 367         bne     cr6,.LcmpEF
 368         bne     cr7,.LcmpGH
 369 
 370         b       .Ltail
 371 
 372 .LcmpAB:
 373         li      r3,1
 374         bgt     cr0,.Lout
 375         li      r3,-1
 376         b       .Lout
 377 
 378 .LcmpCD:
 379         li      r3,1
 380         bgt     cr1,.Lout
 381         li      r3,-1
 382         b       .Lout
 383 
 384 .LcmpEF:
 385         li      r3,1
 386         bgt     cr6,.Lout
 387         li      r3,-1
 388         b       .Lout
 389 
 390 .LcmpGH:
 391         li      r3,1
 392         bgt     cr7,.Lout
 393         li      r3,-1
 394 
 395 .Lout:
 396         ld      r31,-8(r1)
 397         ld      r30,-16(r1)
 398         ld      r29,-24(r1)
 399         ld      r28,-32(r1)
 400         ld      r27,-40(r1)
 401         blr
 402 
 403 .LcmpAB_lightweight:   
 404         li      r3,1
 405         bgtlr
 406         li      r3,-1
 407         blr
 408 
 409 #ifdef CONFIG_ALTIVEC
 410 .Lsameoffset_vmx_cmp:
 411         
 412 
 413 
 414 
 415 
 416 
 417 
 418 
 419 
 420 
 421         li      r0,4
 422         mtctr   r0
 423 .Lsameoffset_prechk_32B_loop:
 424         LD      rA,0,r3
 425         LD      rB,0,r4
 426         cmpld   cr0,rA,rB
 427         addi    r3,r3,8
 428         addi    r4,r4,8
 429         bne     cr0,.LcmpAB_lightweight
 430         addi    r5,r5,-8
 431         bdnz    .Lsameoffset_prechk_32B_loop
 432 
 433         ENTER_VMX_OPS
 434         beq     cr1,.Llong_novmx_cmp
 435 
 436 3:
 437         
 438 
 439 
 440         xor     r0,r3,r4
 441         andi.   r0,r0,0xf
 442         bne     .Ldiffoffset_vmx_cmp_start
 443 
 444         
 445 
 446         andi.   rA,r3,8
 447         LD      rA,0,r3
 448         beq     4f
 449         LD      rB,0,r4
 450         cmpld   cr0,rA,rB
 451         addi    r3,r3,8
 452         addi    r4,r4,8
 453         addi    r5,r5,-8
 454 
 455         beq     cr0,4f
 456         
 457         mfocrf  r5,128
 458         EXIT_VMX_OPS
 459         mtocrf  128,r5
 460         b       .LcmpAB_lightweight
 461 
 462 4:
 463         
 464         srdi    r0,r5,5
 465         mtctr   r0
 466         clrldi  r5,r5,59
 467         li      off16,16
 468 
 469 .balign 16
 470 5:
 471         lvx     v0,0,r3
 472         lvx     v1,0,r4
 473         VCMPEQUD_RC(v0,v0,v1)
 474         bnl     cr6,7f
 475         lvx     v0,off16,r3
 476         lvx     v1,off16,r4
 477         VCMPEQUD_RC(v0,v0,v1)
 478         bnl     cr6,6f
 479         addi    r3,r3,32
 480         addi    r4,r4,32
 481         bdnz    5b
 482 
 483         EXIT_VMX_OPS
 484         cmpdi   r5,0
 485         beq     .Lzero
 486         b       .Lcmp_lt32bytes
 487 
 488 6:
 489         addi    r3,r3,16
 490         addi    r4,r4,16
 491 
 492 7:
 493         
 494         EXIT_VMX_OPS
 495         LD      rA,0,r3
 496         LD      rB,0,r4
 497         cmpld   cr0,rA,rB
 498         li      off8,8
 499         bne     cr0,.LcmpAB_lightweight
 500 
 501         LD      rA,off8,r3
 502         LD      rB,off8,r4
 503         cmpld   cr0,rA,rB
 504         bne     cr0,.LcmpAB_lightweight
 505         b       .Lzero
 506 #endif
 507 
 508 .Ldiffoffset_8bytes_make_align_start:
 509         
 510         rlwinm  r6,r3,3,26,28
 511         beq     .Ldiffoffset_align_s1_8bytes
 512 
 513         clrrdi  r3,r3,3
 514         LD      rA,0,r3
 515         LD      rB,0,r4  
 516         sld     rA,rA,r6
 517         srd     rA,rA,r6
 518         srd     rB,rB,r6
 519         cmpld   cr0,rA,rB
 520         srwi    r6,r6,3
 521         bne     cr0,.LcmpAB_lightweight
 522 
 523         subfic  r6,r6,8
 524         subf.   r5,r6,r5
 525         addi    r3,r3,8
 526         add     r4,r4,r6
 527 
 528         beq     .Lzero
 529 
 530 .Ldiffoffset_align_s1_8bytes:
 531         
 532 #ifdef CONFIG_ALTIVEC
 533 BEGIN_FTR_SECTION
 534         
 535         cmpdi   cr5,r5,VMX_THRESH
 536         bge     cr5,.Ldiffoffset_vmx_cmp
 537 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 538 
 539 .Ldiffoffset_novmx_cmp:
 540 #endif
 541 
 542 
 543         cmpdi   cr5,r5,31
 544         ble     cr5,.Lcmp_lt32bytes
 545 
 546 #ifdef CONFIG_ALTIVEC
 547         b       .Llong_novmx_cmp
 548 #else
 549         b       .Llong
 550 #endif
 551 
 552 #ifdef CONFIG_ALTIVEC
 553 .Ldiffoffset_vmx_cmp:
 554         
 555 
 556 
 557         li      r0,4
 558         mtctr   r0
 559 .Ldiffoffset_prechk_32B_loop:
 560         LD      rA,0,r3
 561         LD      rB,0,r4
 562         cmpld   cr0,rA,rB
 563         addi    r3,r3,8
 564         addi    r4,r4,8
 565         bne     cr0,.LcmpAB_lightweight
 566         addi    r5,r5,-8
 567         bdnz    .Ldiffoffset_prechk_32B_loop
 568 
 569         ENTER_VMX_OPS
 570         beq     cr1,.Ldiffoffset_novmx_cmp
 571 
 572 .Ldiffoffset_vmx_cmp_start:
 573         
 574         andi.   r6,r3,0xf
 575         li      off16,16
 576         beq     .Ldiffoffset_vmx_s1_16bytes_align
 577 
 578         LVS     v3,0,r3
 579         LVS     v4,0,r4
 580 
 581         lvx     v5,0,r3
 582         lvx     v6,0,r4
 583         LD_VSR_CROSS16B(r3,v3,v5,v7,v9)
 584         LD_VSR_CROSS16B(r4,v4,v6,v8,v10)
 585 
 586         VCMPEQUB_RC(v7,v9,v10)
 587         bnl     cr6,.Ldiffoffset_vmx_diff_found
 588 
 589         subfic  r6,r6,16
 590         subf    r5,r6,r5
 591         add     r3,r3,r6
 592         add     r4,r4,r6
 593 
 594 .Ldiffoffset_vmx_s1_16bytes_align:
 595         
 596         lvx     v6,0,r4
 597         LVS     v4,0,r4
 598         srdi    r6,r5,5  
 599         clrldi  r5,r5,59
 600         mtctr   r6
 601 
 602 .balign 16
 603 .Ldiffoffset_vmx_32bytesloop:
 604         
 605         lvx     v9,0,r3
 606         LD_VSR_CROSS16B(r4,v4,v6,v8,v10)
 607         VCMPEQUB_RC(v7,v9,v10)
 608         vor     v6,v8,v8
 609         bnl     cr6,.Ldiffoffset_vmx_diff_found
 610 
 611         addi    r3,r3,16
 612         addi    r4,r4,16
 613 
 614         lvx     v9,0,r3
 615         LD_VSR_CROSS16B(r4,v4,v6,v8,v10)
 616         VCMPEQUB_RC(v7,v9,v10)
 617         vor     v6,v8,v8
 618         bnl     cr6,.Ldiffoffset_vmx_diff_found
 619 
 620         addi    r3,r3,16
 621         addi    r4,r4,16
 622 
 623         bdnz    .Ldiffoffset_vmx_32bytesloop
 624 
 625         EXIT_VMX_OPS
 626 
 627         cmpdi   r5,0
 628         beq     .Lzero
 629         b       .Lcmp_lt32bytes
 630 
 631 .Ldiffoffset_vmx_diff_found:
 632         EXIT_VMX_OPS
 633         
 634         li      r5,16
 635         b       .Lcmp_lt32bytes
 636 
 637 #endif
 638 EXPORT_SYMBOL(memcmp)