root/arch/arm64/lib/strcmp.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0-only */
   2 /*
   3  * Copyright (C) 2013 ARM Ltd.
   4  * Copyright (C) 2013 Linaro.
   5  *
   6  * This code is based on glibc cortex strings work originally authored by Linaro
   7  * be found @
   8  *
   9  * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
  10  * files/head:/src/aarch64/
  11  */
  12 
  13 #include <linux/linkage.h>
  14 #include <asm/assembler.h>
  15 
  16 /*
  17  * compare two strings
  18  *
  19  * Parameters:
  20  *      x0 - const string 1 pointer
  21  *    x1 - const string 2 pointer
  22  * Returns:
  23  * x0 - an integer less than, equal to, or greater than zero
  24  * if  s1  is  found, respectively, to be less than, to match,
  25  * or be greater than s2.
  26  */
  27 
  28 #define REP8_01 0x0101010101010101
  29 #define REP8_7f 0x7f7f7f7f7f7f7f7f
  30 #define REP8_80 0x8080808080808080
  31 
  32 /* Parameters and result.  */
  33 src1            .req    x0
  34 src2            .req    x1
  35 result          .req    x0
  36 
  37 /* Internal variables.  */
  38 data1           .req    x2
  39 data1w          .req    w2
  40 data2           .req    x3
  41 data2w          .req    w3
  42 has_nul         .req    x4
  43 diff            .req    x5
  44 syndrome        .req    x6
  45 tmp1            .req    x7
  46 tmp2            .req    x8
  47 tmp3            .req    x9
  48 zeroones        .req    x10
  49 pos             .req    x11
  50 
  51 WEAK(strcmp)
  52         eor     tmp1, src1, src2
  53         mov     zeroones, #REP8_01
  54         tst     tmp1, #7
  55         b.ne    .Lmisaligned8
  56         ands    tmp1, src1, #7
  57         b.ne    .Lmutual_align
  58 
  59         /*
  60         * NUL detection works on the principle that (X - 1) & (~X) & 0x80
  61         * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
  62         * can be done in parallel across the entire word.
  63         */
  64 .Lloop_aligned:
  65         ldr     data1, [src1], #8
  66         ldr     data2, [src2], #8
  67 .Lstart_realigned:
  68         sub     tmp1, data1, zeroones
  69         orr     tmp2, data1, #REP8_7f
  70         eor     diff, data1, data2      /* Non-zero if differences found.  */
  71         bic     has_nul, tmp1, tmp2     /* Non-zero if NUL terminator.  */
  72         orr     syndrome, diff, has_nul
  73         cbz     syndrome, .Lloop_aligned
  74         b       .Lcal_cmpresult
  75 
  76 .Lmutual_align:
  77         /*
  78         * Sources are mutually aligned, but are not currently at an
  79         * alignment boundary.  Round down the addresses and then mask off
  80         * the bytes that preceed the start point.
  81         */
  82         bic     src1, src1, #7
  83         bic     src2, src2, #7
  84         lsl     tmp1, tmp1, #3          /* Bytes beyond alignment -> bits.  */
  85         ldr     data1, [src1], #8
  86         neg     tmp1, tmp1              /* Bits to alignment -64.  */
  87         ldr     data2, [src2], #8
  88         mov     tmp2, #~0
  89         /* Big-endian.  Early bytes are at MSB.  */
  90 CPU_BE( lsl     tmp2, tmp2, tmp1 )      /* Shift (tmp1 & 63).  */
  91         /* Little-endian.  Early bytes are at LSB.  */
  92 CPU_LE( lsr     tmp2, tmp2, tmp1 )      /* Shift (tmp1 & 63).  */
  93 
  94         orr     data1, data1, tmp2
  95         orr     data2, data2, tmp2
  96         b       .Lstart_realigned
  97 
  98 .Lmisaligned8:
  99         /*
 100         * Get the align offset length to compare per byte first.
 101         * After this process, one string's address will be aligned.
 102         */
 103         and     tmp1, src1, #7
 104         neg     tmp1, tmp1
 105         add     tmp1, tmp1, #8
 106         and     tmp2, src2, #7
 107         neg     tmp2, tmp2
 108         add     tmp2, tmp2, #8
 109         subs    tmp3, tmp1, tmp2
 110         csel    pos, tmp1, tmp2, hi /*Choose the maximum. */
 111 .Ltinycmp:
 112         ldrb    data1w, [src1], #1
 113         ldrb    data2w, [src2], #1
 114         subs    pos, pos, #1
 115         ccmp    data1w, #1, #0, ne  /* NZCV = 0b0000.  */
 116         ccmp    data1w, data2w, #0, cs  /* NZCV = 0b0000.  */
 117         b.eq    .Ltinycmp
 118         cbnz    pos, 1f /*find the null or unequal...*/
 119         cmp     data1w, #1
 120         ccmp    data1w, data2w, #0, cs
 121         b.eq    .Lstart_align /*the last bytes are equal....*/
 122 1:
 123         sub     result, data1, data2
 124         ret
 125 
 126 .Lstart_align:
 127         ands    xzr, src1, #7
 128         b.eq    .Lrecal_offset
 129         /*process more leading bytes to make str1 aligned...*/
 130         add     src1, src1, tmp3
 131         add     src2, src2, tmp3
 132         /*load 8 bytes from aligned str1 and non-aligned str2..*/
 133         ldr     data1, [src1], #8
 134         ldr     data2, [src2], #8
 135 
 136         sub     tmp1, data1, zeroones
 137         orr     tmp2, data1, #REP8_7f
 138         bic     has_nul, tmp1, tmp2
 139         eor     diff, data1, data2 /* Non-zero if differences found.  */
 140         orr     syndrome, diff, has_nul
 141         cbnz    syndrome, .Lcal_cmpresult
 142         /*How far is the current str2 from the alignment boundary...*/
 143         and     tmp3, tmp3, #7
 144 .Lrecal_offset:
 145         neg     pos, tmp3
 146 .Lloopcmp_proc:
 147         /*
 148         * Divide the eight bytes into two parts. First,backwards the src2
 149         * to an alignment boundary,load eight bytes from the SRC2 alignment
 150         * boundary,then compare with the relative bytes from SRC1.
 151         * If all 8 bytes are equal,then start the second part's comparison.
 152         * Otherwise finish the comparison.
 153         * This special handle can garantee all the accesses are in the
 154         * thread/task space in avoid to overrange access.
 155         */
 156         ldr     data1, [src1,pos]
 157         ldr     data2, [src2,pos]
 158         sub     tmp1, data1, zeroones
 159         orr     tmp2, data1, #REP8_7f
 160         bic     has_nul, tmp1, tmp2
 161         eor     diff, data1, data2  /* Non-zero if differences found.  */
 162         orr     syndrome, diff, has_nul
 163         cbnz    syndrome, .Lcal_cmpresult
 164 
 165         /*The second part process*/
 166         ldr     data1, [src1], #8
 167         ldr     data2, [src2], #8
 168         sub     tmp1, data1, zeroones
 169         orr     tmp2, data1, #REP8_7f
 170         bic     has_nul, tmp1, tmp2
 171         eor     diff, data1, data2  /* Non-zero if differences found.  */
 172         orr     syndrome, diff, has_nul
 173         cbz     syndrome, .Lloopcmp_proc
 174 
 175 .Lcal_cmpresult:
 176         /*
 177         * reversed the byte-order as big-endian,then CLZ can find the most
 178         * significant zero bits.
 179         */
 180 CPU_LE( rev     syndrome, syndrome )
 181 CPU_LE( rev     data1, data1 )
 182 CPU_LE( rev     data2, data2 )
 183 
 184         /*
 185         * For big-endian we cannot use the trick with the syndrome value
 186         * as carry-propagation can corrupt the upper bits if the trailing
 187         * bytes in the string contain 0x01.
 188         * However, if there is no NUL byte in the dword, we can generate
 189         * the result directly.  We ca not just subtract the bytes as the
 190         * MSB might be significant.
 191         */
 192 CPU_BE( cbnz    has_nul, 1f )
 193 CPU_BE( cmp     data1, data2 )
 194 CPU_BE( cset    result, ne )
 195 CPU_BE( cneg    result, result, lo )
 196 CPU_BE( ret )
 197 CPU_BE( 1: )
 198         /*Re-compute the NUL-byte detection, using a byte-reversed value. */
 199 CPU_BE( rev     tmp3, data1 )
 200 CPU_BE( sub     tmp1, tmp3, zeroones )
 201 CPU_BE( orr     tmp2, tmp3, #REP8_7f )
 202 CPU_BE( bic     has_nul, tmp1, tmp2 )
 203 CPU_BE( rev     has_nul, has_nul )
 204 CPU_BE( orr     syndrome, diff, has_nul )
 205 
 206         clz     pos, syndrome
 207         /*
 208         * The MS-non-zero bit of the syndrome marks either the first bit
 209         * that is different, or the top bit of the first zero byte.
 210         * Shifting left now will bring the critical information into the
 211         * top bits.
 212         */
 213         lsl     data1, data1, pos
 214         lsl     data2, data2, pos
 215         /*
 216         * But we need to zero-extend (char is unsigned) the value and then
 217         * perform a signed 32-bit subtraction.
 218         */
 219         lsr     data1, data1, #56
 220         sub     result, data1, data2, lsr #56
 221         ret
 222 ENDPIPROC(strcmp)
 223 EXPORT_SYMBOL_NOKASAN(strcmp)

/* [<][>][^][v][top][bottom][index][help] */