1/* 2 * Author: Anton Blanchard <anton@au.ibm.com> 3 * Copyright 2015 IBM Corporation. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License 7 * as published by the Free Software Foundation; either version 8 * 2 of the License, or (at your option) any later version. 9 */ 10#include <asm/ppc_asm.h> 11 12#define off8 r6 13#define off16 r7 14#define off24 r8 15 16#define rA r9 17#define rB r10 18#define rC r11 19#define rD r27 20#define rE r28 21#define rF r29 22#define rG r30 23#define rH r31 24 25#ifdef __LITTLE_ENDIAN__ 26#define LD ldbrx 27#else 28#define LD ldx 29#endif 30 31_GLOBAL(memcmp) 32 cmpdi cr1,r5,0 33 34 /* Use the short loop if both strings are not 8B aligned */ 35 or r6,r3,r4 36 andi. r6,r6,7 37 38 /* Use the short loop if length is less than 32B */ 39 cmpdi cr6,r5,31 40 41 beq cr1,.Lzero 42 bne .Lshort 43 bgt cr6,.Llong 44 45.Lshort: 46 mtctr r5 47 481: lbz rA,0(r3) 49 lbz rB,0(r4) 50 subf. rC,rB,rA 51 bne .Lnon_zero 52 bdz .Lzero 53 54 lbz rA,1(r3) 55 lbz rB,1(r4) 56 subf. rC,rB,rA 57 bne .Lnon_zero 58 bdz .Lzero 59 60 lbz rA,2(r3) 61 lbz rB,2(r4) 62 subf. rC,rB,rA 63 bne .Lnon_zero 64 bdz .Lzero 65 66 lbz rA,3(r3) 67 lbz rB,3(r4) 68 subf. rC,rB,rA 69 bne .Lnon_zero 70 71 addi r3,r3,4 72 addi r4,r4,4 73 74 bdnz 1b 75 76.Lzero: 77 li r3,0 78 blr 79 80.Lnon_zero: 81 mr r3,rC 82 blr 83 84.Llong: 85 li off8,8 86 li off16,16 87 li off24,24 88 89 std r31,-8(r1) 90 std r30,-16(r1) 91 std r29,-24(r1) 92 std r28,-32(r1) 93 std r27,-40(r1) 94 95 srdi r0,r5,5 96 mtctr r0 97 andi. r5,r5,31 98 99 LD rA,0,r3 100 LD rB,0,r4 101 102 LD rC,off8,r3 103 LD rD,off8,r4 104 105 LD rE,off16,r3 106 LD rF,off16,r4 107 108 LD rG,off24,r3 109 LD rH,off24,r4 110 cmpld cr0,rA,rB 111 112 addi r3,r3,32 113 addi r4,r4,32 114 115 bdz .Lfirst32 116 117 LD rA,0,r3 118 LD rB,0,r4 119 cmpld cr1,rC,rD 120 121 LD rC,off8,r3 122 LD rD,off8,r4 123 cmpld cr6,rE,rF 124 125 LD rE,off16,r3 126 LD rF,off16,r4 127 cmpld cr7,rG,rH 128 bne cr0,.LcmpAB 129 130 LD rG,off24,r3 131 LD rH,off24,r4 132 cmpld cr0,rA,rB 133 bne cr1,.LcmpCD 134 135 addi r3,r3,32 136 addi r4,r4,32 137 138 bdz .Lsecond32 139 140 .balign 16 141 1421: LD rA,0,r3 143 LD rB,0,r4 144 cmpld cr1,rC,rD 145 bne cr6,.LcmpEF 146 147 LD rC,off8,r3 148 LD rD,off8,r4 149 cmpld cr6,rE,rF 150 bne cr7,.LcmpGH 151 152 LD rE,off16,r3 153 LD rF,off16,r4 154 cmpld cr7,rG,rH 155 bne cr0,.LcmpAB 156 157 LD rG,off24,r3 158 LD rH,off24,r4 159 cmpld cr0,rA,rB 160 bne cr1,.LcmpCD 161 162 addi r3,r3,32 163 addi r4,r4,32 164 165 bdnz 1b 166 167.Lsecond32: 168 cmpld cr1,rC,rD 169 bne cr6,.LcmpEF 170 171 cmpld cr6,rE,rF 172 bne cr7,.LcmpGH 173 174 cmpld cr7,rG,rH 175 bne cr0,.LcmpAB 176 177 bne cr1,.LcmpCD 178 bne cr6,.LcmpEF 179 bne cr7,.LcmpGH 180 181.Ltail: 182 ld r31,-8(r1) 183 ld r30,-16(r1) 184 ld r29,-24(r1) 185 ld r28,-32(r1) 186 ld r27,-40(r1) 187 188 cmpdi r5,0 189 beq .Lzero 190 b .Lshort 191 192.Lfirst32: 193 cmpld cr1,rC,rD 194 cmpld cr6,rE,rF 195 cmpld cr7,rG,rH 196 197 bne cr0,.LcmpAB 198 bne cr1,.LcmpCD 199 bne cr6,.LcmpEF 200 bne cr7,.LcmpGH 201 202 b .Ltail 203 204.LcmpAB: 205 li r3,1 206 bgt cr0,.Lout 207 li r3,-1 208 b .Lout 209 210.LcmpCD: 211 li r3,1 212 bgt cr1,.Lout 213 li r3,-1 214 b .Lout 215 216.LcmpEF: 217 li r3,1 218 bgt cr6,.Lout 219 li r3,-1 220 b .Lout 221 222.LcmpGH: 223 li r3,1 224 bgt cr7,.Lout 225 li r3,-1 226 227.Lout: 228 ld r31,-8(r1) 229 ld r30,-16(r1) 230 ld r29,-24(r1) 231 ld r28,-32(r1) 232 ld r27,-40(r1) 233 blr 234