1
2
3
4
5
6
7
8
9
10
11 .macro HEXAGON_OPT_FUNC_BEGIN name
12 .text
13 .p2align 4
14 .globl \name
15 .type \name, @function
16 \name:
17 .endm
18
19 .macro HEXAGON_OPT_FUNC_FINISH name
20 .size \name, . - \name
21 .endm
22
23
24 #if __HEXAGON_ARCH__ < 3
25 HEXAGON_OPT_FUNC_BEGIN memset
26 {
27 r6 = #8
28 r7 = extractu(r0, #3 , #0)
29 p0 = cmp.eq(r2, #0)
30 p1 = cmp.gtu(r2, #7)
31 }
32 {
33 r4 = vsplatb(r1)
34 r8 = r0
35 r9 = sub(r6, r7)
36 if p0 jumpr r31
37 }
38 {
39 r3 = #0
40 r7 = #0
41 p0 = tstbit(r9, #0)
42 if p1 jump 2f
43 }
44
45
46
47 loop0(1f, r2)
48 .falign
49 1:
50 {
51 memb(r8++#1) = r4
52 }:endloop0
53 jumpr r31
54 .falign
55 2:
56 {
57 r6 = #1
58 p0 = tstbit(r9, #1)
59 p1 = cmp.eq(r2, #1)
60 if !p0 jump 3f
61 }
62 {
63 memb(r8++#1) = r4
64 r3:2 = sub(r3:2, r7:6)
65 if p1 jumpr r31
66 }
67 .falign
68 3:
69 {
70 r6 = #2
71 p0 = tstbit(r9, #2)
72 p1 = cmp.eq(r2, #2)
73 if !p0 jump 4f
74 }
75 {
76 memh(r8++#2) = r4
77 r3:2 = sub(r3:2, r7:6)
78 if p1 jumpr r31
79 }
80 .falign
81 4:
82 {
83 r6 = #4
84 p0 = cmp.gtu(r2, #7)
85 p1 = cmp.eq(r2, #4)
86 if !p0 jump 5f
87 }
88 {
89 memw(r8++#4) = r4
90 r3:2 = sub(r3:2, r7:6)
91 p0 = cmp.gtu(r2, #11)
92 if p1 jumpr r31
93 }
94 .falign
95 5:
96 {
97 r10 = lsr(r2, #3)
98 p1 = cmp.eq(r3, #1)
99 if !p0 jump 7f
100 }
101 {
102 r5 = r4
103 r6 = #8
104 loop0(6f, r10)
105 }
106
107
108
109 .falign
110 6:
111 {
112 memd(r8++#8) = r5:4
113 r3:2 = sub(r3:2, r7:6)
114 p1 = cmp.eq(r2, #8)
115 }:endloop0
116 .falign
117 7:
118 {
119 p0 = tstbit(r2, #2)
120 if p1 jumpr r31
121 }
122 {
123 r6 = #4
124 p0 = tstbit(r2, #1)
125 p1 = cmp.eq(r2, #4)
126 if !p0 jump 8f
127 }
128 {
129 memw(r8++#4) = r4
130 r3:2 = sub(r3:2, r7:6)
131 if p1 jumpr r31
132 }
133 .falign
134 8:
135 {
136 p1 = cmp.eq(r2, #2)
137 if !p0 jump 9f
138 }
139 {
140 memh(r8++#2) = r4
141 if p1 jumpr r31
142 }
143 .falign
144 9:
145 {
146 memb(r8++#1) = r4
147 jumpr r31
148 }
149 HEXAGON_OPT_FUNC_FINISH memset
150 #endif
151
152
153
154 #if __HEXAGON_ARCH__ >= 3
155 HEXAGON_OPT_FUNC_BEGIN memset
156 {
157 r7=vsplatb(r1)
158 r6 = r0
159 if (r2==#0) jump:nt .L1
160 }
161 {
162 r5:4=combine(r7,r7)
163 p0 = cmp.gtu(r2,#8)
164 if (p0.new) jump:nt .L3
165 }
166 {
167 r3 = r0
168 loop0(.L47,r2)
169 }
170 .falign
171 .L47:
172 {
173 memb(r3++#1) = r1
174 }:endloop0
175 jumpr r31
176 .L3:
177 {
178 p0 = tstbit(r0,#0)
179 if (!p0.new) jump:nt .L8
180 p1 = cmp.eq(r2, #1)
181 }
182 {
183 r6 = add(r0, #1)
184 r2 = add(r2,#-1)
185 memb(r0) = r1
186 if (p1) jump .L1
187 }
188 .L8:
189 {
190 p0 = tstbit(r6,#1)
191 if (!p0.new) jump:nt .L10
192 }
193 {
194 r2 = add(r2,#-2)
195 memh(r6++#2) = r7
196 p0 = cmp.eq(r2, #2)
197 if (p0.new) jump:nt .L1
198 }
199 .L10:
200 {
201 p0 = tstbit(r6,#2)
202 if (!p0.new) jump:nt .L12
203 }
204 {
205 r2 = add(r2,#-4)
206 memw(r6++#4) = r7
207 p0 = cmp.eq(r2, #4)
208 if (p0.new) jump:nt .L1
209 }
210 .L12:
211 {
212 p0 = cmp.gtu(r2,#127)
213 if (!p0.new) jump:nt .L14
214 }
215 r3 = and(r6,#31)
216 if (r3==#0) jump:nt .L17
217 {
218 memd(r6++#8) = r5:4
219 r2 = add(r2,#-8)
220 }
221 r3 = and(r6,#31)
222 if (r3==#0) jump:nt .L17
223 {
224 memd(r6++#8) = r5:4
225 r2 = add(r2,#-8)
226 }
227 r3 = and(r6,#31)
228 if (r3==#0) jump:nt .L17
229 {
230 memd(r6++#8) = r5:4
231 r2 = add(r2,#-8)
232 }
233 .L17:
234 {
235 r3 = lsr(r2,#5)
236 if (r1!=#0) jump:nt .L18
237 }
238 {
239 r8 = r3
240 r3 = r6
241 loop0(.L46,r3)
242 }
243 .falign
244 .L46:
245 {
246 dczeroa(r6)
247 r6 = add(r6,#32)
248 r2 = add(r2,#-32)
249 }:endloop0
250 .L14:
251 {
252 p0 = cmp.gtu(r2,#7)
253 if (!p0.new) jump:nt .L28
254 r8 = lsr(r2,#3)
255 }
256 loop0(.L44,r8)
257 .falign
258 .L44:
259 {
260 memd(r6++#8) = r5:4
261 r2 = add(r2,#-8)
262 }:endloop0
263 .L28:
264 {
265 p0 = tstbit(r2,#2)
266 if (!p0.new) jump:nt .L33
267 }
268 {
269 r2 = add(r2,#-4)
270 memw(r6++#4) = r7
271 }
272 .L33:
273 {
274 p0 = tstbit(r2,#1)
275 if (!p0.new) jump:nt .L35
276 }
277 {
278 r2 = add(r2,#-2)
279 memh(r6++#2) = r7
280 }
281 .L35:
282 p0 = cmp.eq(r2,#1)
283 if (p0) memb(r6) = r1
284 .L1:
285 jumpr r31
286 .L18:
287 loop0(.L45,r3)
288 .falign
289 .L45:
290 dczeroa(r6)
291 {
292 memd(r6++#8) = r5:4
293 r2 = add(r2,#-32)
294 }
295 memd(r6++#8) = r5:4
296 memd(r6++#8) = r5:4
297 {
298 memd(r6++#8) = r5:4
299 }:endloop0
300 jump .L14
301 HEXAGON_OPT_FUNC_FINISH memset
302 #endif