root/arch/arc/lib/strcmp.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0-only */
   2 /*
   3  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
   4  */
   5 
   6 /* This is optimized primarily for the ARC700.
   7    It would be possible to speed up the loops by one cycle / word
   8    respective one cycle / byte by forcing double source 1 alignment, unrolling
   9    by a factor of two, and speculatively loading the second word / byte of
  10    source 1; however, that would increase the overhead for loop setup / finish,
  11    and strcmp might often terminate early.  */
  12 
  13 #include <linux/linkage.h>
  14 
  15 ENTRY_CFI(strcmp)
  16         or      r2,r0,r1
  17         bmsk_s  r2,r2,1
  18         brne    r2,0,.Lcharloop
  19         mov_s   r12,0x01010101
  20         ror     r5,r12
  21 .Lwordloop:
  22         ld.ab   r2,[r0,4]
  23         ld.ab   r3,[r1,4]
  24         nop_s
  25         sub     r4,r2,r12
  26         bic     r4,r4,r2
  27         and     r4,r4,r5
  28         brne    r4,0,.Lfound0
  29         breq    r2,r3,.Lwordloop
  30 #ifdef  __LITTLE_ENDIAN__
  31         xor     r0,r2,r3        ; mask for difference
  32         sub_s   r1,r0,1
  33         bic_s   r0,r0,r1        ; mask for least significant difference bit
  34         sub     r1,r5,r0
  35         xor     r0,r5,r1        ; mask for least significant difference byte
  36         and_s   r2,r2,r0
  37         and_s   r3,r3,r0
  38 #endif /* LITTLE ENDIAN */
  39         cmp_s   r2,r3
  40         mov_s   r0,1
  41         j_s.d   [blink]
  42         bset.lo r0,r0,31
  43 
  44         .balign 4
  45 #ifdef __LITTLE_ENDIAN__
  46 .Lfound0:
  47         xor     r0,r2,r3        ; mask for difference
  48         or      r0,r0,r4        ; or in zero indicator
  49         sub_s   r1,r0,1
  50         bic_s   r0,r0,r1        ; mask for least significant difference bit
  51         sub     r1,r5,r0
  52         xor     r0,r5,r1        ; mask for least significant difference byte
  53         and_s   r2,r2,r0
  54         and_s   r3,r3,r0
  55         sub.f   r0,r2,r3
  56         mov.hi  r0,1
  57         j_s.d   [blink]
  58         bset.lo r0,r0,31
  59 #else /* BIG ENDIAN */
  60         /* The zero-detection above can mis-detect 0x01 bytes as zeroes
  61            because of carry-propagateion from a lower significant zero byte.
  62            We can compensate for this by checking that bit0 is zero.
  63            This compensation is not necessary in the step where we
  64            get a low estimate for r2, because in any affected bytes
  65            we already have 0x00 or 0x01, which will remain unchanged
  66            when bit 7 is cleared.  */
  67         .balign 4
  68 .Lfound0:
  69         lsr     r0,r4,8
  70         lsr_s   r1,r2
  71         bic_s   r2,r2,r0        ; get low estimate for r2 and get ...
  72         bic_s   r0,r0,r1        ; <this is the adjusted mask for zeros>
  73         or_s    r3,r3,r0        ; ... high estimate r3 so that r2 > r3 will ...
  74         cmp_s   r3,r2           ; ... be independent of trailing garbage
  75         or_s    r2,r2,r0        ; likewise for r3 > r2
  76         bic_s   r3,r3,r0
  77         rlc     r0,0            ; r0 := r2 > r3 ? 1 : 0
  78         cmp_s   r2,r3
  79         j_s.d   [blink]
  80         bset.lo r0,r0,31
  81 #endif /* ENDIAN */
  82 
  83         .balign 4
  84 .Lcharloop:
  85         ldb.ab  r2,[r0,1]
  86         ldb.ab  r3,[r1,1]
  87         nop_s
  88         breq    r2,0,.Lcmpend
  89         breq    r2,r3,.Lcharloop
  90 .Lcmpend:
  91         j_s.d   [blink]
  92         sub     r0,r2,r3
  93 END_CFI(strcmp)

/* [<][>][^][v][top][bottom][index][help] */