1
2
3
4
5
6
7
8
9
10
11
12
13 #include <linux/linkage.h>
14 #include <asm/assembler.h>
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29 src1 .req x0
30 src2 .req x1
31 limit .req x2
32 result .req x0
33
34
35 data1 .req x3
36 data1w .req w3
37 data2 .req x4
38 data2w .req w4
39 has_nul .req x5
40 diff .req x6
41 endloop .req x7
42 tmp1 .req x8
43 tmp2 .req x9
44 tmp3 .req x10
45 pos .req x11
46 limit_wd .req x12
47 mask .req x13
48
49 WEAK(memcmp)
50 cbz limit, .Lret0
51 eor tmp1, src1, src2
52 tst tmp1, #7
53 b.ne .Lmisaligned8
54 ands tmp1, src1, #7
55 b.ne .Lmutual_align
56 sub limit_wd, limit, #1
57 lsr limit_wd, limit_wd, #3
58
59
60
61
62 .Lloop_aligned:
63 ldr data1, [src1], #8
64 ldr data2, [src2], #8
65 .Lstart_realigned:
66 subs limit_wd, limit_wd, #1
67 eor diff, data1, data2
68 csinv endloop, diff, xzr, cs
69 cbz endloop, .Lloop_aligned
70
71
72 tbz limit_wd, #63, .Lnot_limit
73
74
75 ands limit, limit, #7
76 b.eq .Lnot_limit
77
78
79
80
81 lsl limit, limit, #3
82 mov mask, #~0
83 CPU_BE( lsr mask, mask, limit )
84 CPU_LE( lsl mask, mask, limit )
85 bic data1, data1, mask
86 bic data2, data2, mask
87
88 orr diff, diff, mask
89 b .Lnot_limit
90
91 .Lmutual_align:
92
93
94
95
96
97 bic src1, src1, #7
98 bic src2, src2, #7
99 ldr data1, [src1], #8
100 ldr data2, [src2], #8
101
102
103
104
105 sub limit_wd, limit, #1
106 and tmp3, limit_wd, #7
107 lsr limit_wd, limit_wd, #3
108 add tmp3, tmp3, tmp1
109 add limit_wd, limit_wd, tmp3, lsr #3
110 add limit, limit, tmp1
111
112 lsl tmp1, tmp1, #3
113 neg tmp1, tmp1
114 mov tmp2, #~0
115
116 CPU_BE( lsl tmp2, tmp2, tmp1 )
117
118 CPU_LE( lsr tmp2, tmp2, tmp1 )
119
120 orr data1, data1, tmp2
121 orr data2, data2, tmp2
122 b .Lstart_realigned
123
124
125 .Lmisaligned8:
126 cmp limit, #8
127 b.lo .Ltiny8proc
128
129 and tmp1, src1, #7
130 neg tmp1, tmp1
131 add tmp1, tmp1, #8
132 and tmp2, src2, #7
133 neg tmp2, tmp2
134 add tmp2, tmp2, #8
135 subs tmp3, tmp1, tmp2
136 csel pos, tmp1, tmp2, hi
137
138 sub limit, limit, pos
139
140 .Ltinycmp:
141 ldrb data1w, [src1], #1
142 ldrb data2w, [src2], #1
143 subs pos, pos, #1
144 ccmp data1w, data2w, #0, ne
145 b.eq .Ltinycmp
146 cbnz pos, 1f
147 cmp data1w, data2w
148 b.eq .Lstart_align
149 1:
150 sub result, data1, data2
151 ret
152
153 .Lstart_align:
154 lsr limit_wd, limit, #3
155 cbz limit_wd, .Lremain8
156
157 ands xzr, src1, #7
158 b.eq .Lrecal_offset
159
160 add src1, src1, tmp3
161 add src2, src2, tmp3
162 sub limit, limit, tmp3
163 lsr limit_wd, limit, #3
164 cbz limit_wd, .Lremain8
165
166 ldr data1, [src1], #8
167 ldr data2, [src2], #8
168
169 subs limit_wd, limit_wd, #1
170 eor diff, data1, data2
171 csinv endloop, diff, xzr, ne
172 cbnz endloop, .Lunequal_proc
173
174 and tmp3, tmp3, #7
175
176 .Lrecal_offset:
177 neg pos, tmp3
178 .Lloopcmp_proc:
179
180
181
182
183
184
185
186
187 ldr data1, [src1,pos]
188 ldr data2, [src2,pos]
189 eor diff, data1, data2
190 cbnz diff, .Lnot_limit
191
192
193 ldr data1, [src1], #8
194 ldr data2, [src2], #8
195 eor diff, data1, data2
196 subs limit_wd, limit_wd, #1
197 csinv endloop, diff, xzr, ne
198 cbz endloop, .Lloopcmp_proc
199 .Lunequal_proc:
200 cbz diff, .Lremain8
201
202
203 .Lnot_limit:
204
205
206
207
208 CPU_LE( rev diff, diff )
209 CPU_LE( rev data1, data1 )
210 CPU_LE( rev data2, data2 )
211
212
213
214
215
216
217
218 clz pos, diff
219 lsl data1, data1, pos
220 lsl data2, data2, pos
221
222
223
224
225 lsr data1, data1, #56
226 sub result, data1, data2, lsr #56
227 ret
228
229 .Lremain8:
230
231 ands limit, limit, #7
232 b.eq .Lret0
233
234 .Ltiny8proc:
235 ldrb data1w, [src1], #1
236 ldrb data2w, [src2], #1
237 subs limit, limit, #1
238
239 ccmp data1w, data2w, #0, ne
240 b.eq .Ltiny8proc
241 sub result, data1, data2
242 ret
243 .Lret0:
244 mov result, #0
245 ret
246 ENDPIPROC(memcmp)
247 EXPORT_SYMBOL_NOKASAN(memcmp)