1 /*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37
38 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47
48 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49 MODULE_FIRMWARE("radeon/bonaire_me.bin");
50 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56
57 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
66
67 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
68 MODULE_FIRMWARE("radeon/hawaii_me.bin");
69 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
71 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
73 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
74 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
75
76 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
82
83 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
84 MODULE_FIRMWARE("radeon/kaveri_me.bin");
85 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
86 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
87 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
88 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
89 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
90
91 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
92 MODULE_FIRMWARE("radeon/KABINI_me.bin");
93 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
94 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
95 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
96 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
97
98 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
99 MODULE_FIRMWARE("radeon/kabini_me.bin");
100 MODULE_FIRMWARE("radeon/kabini_ce.bin");
101 MODULE_FIRMWARE("radeon/kabini_mec.bin");
102 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
103 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
104
105 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
111
112 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
113 MODULE_FIRMWARE("radeon/mullins_me.bin");
114 MODULE_FIRMWARE("radeon/mullins_ce.bin");
115 MODULE_FIRMWARE("radeon/mullins_mec.bin");
116 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
117 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
118
119 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120 extern void r600_ih_ring_fini(struct radeon_device *rdev);
121 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
124 extern void sumo_rlc_fini(struct radeon_device *rdev);
125 extern int sumo_rlc_init(struct radeon_device *rdev);
126 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
127 extern void si_rlc_reset(struct radeon_device *rdev);
128 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
129 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
130 extern int cik_sdma_resume(struct radeon_device *rdev);
131 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
132 extern void cik_sdma_fini(struct radeon_device *rdev);
133 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
134 static void cik_rlc_stop(struct radeon_device *rdev);
135 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
136 static void cik_program_aspm(struct radeon_device *rdev);
137 static void cik_init_pg(struct radeon_device *rdev);
138 static void cik_init_cg(struct radeon_device *rdev);
139 static void cik_fini_pg(struct radeon_device *rdev);
140 static void cik_fini_cg(struct radeon_device *rdev);
141 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
142 bool enable);
143
144 /**
145 * cik_get_allowed_info_register - fetch the register for the info ioctl
146 *
147 * @rdev: radeon_device pointer
148 * @reg: register offset in bytes
149 * @val: register value
150 *
151 * Returns 0 for success or -EINVAL for an invalid register
152 *
153 */
cik_get_allowed_info_register(struct radeon_device * rdev,u32 reg,u32 * val)154 int cik_get_allowed_info_register(struct radeon_device *rdev,
155 u32 reg, u32 *val)
156 {
157 switch (reg) {
158 case GRBM_STATUS:
159 case GRBM_STATUS2:
160 case GRBM_STATUS_SE0:
161 case GRBM_STATUS_SE1:
162 case GRBM_STATUS_SE2:
163 case GRBM_STATUS_SE3:
164 case SRBM_STATUS:
165 case SRBM_STATUS2:
166 case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
167 case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
168 case UVD_STATUS:
169 /* TODO VCE */
170 *val = RREG32(reg);
171 return 0;
172 default:
173 return -EINVAL;
174 }
175 }
176
177 /* get temperature in millidegrees */
ci_get_temp(struct radeon_device * rdev)178 int ci_get_temp(struct radeon_device *rdev)
179 {
180 u32 temp;
181 int actual_temp = 0;
182
183 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
184 CTF_TEMP_SHIFT;
185
186 if (temp & 0x200)
187 actual_temp = 255;
188 else
189 actual_temp = temp & 0x1ff;
190
191 actual_temp = actual_temp * 1000;
192
193 return actual_temp;
194 }
195
196 /* get temperature in millidegrees */
kv_get_temp(struct radeon_device * rdev)197 int kv_get_temp(struct radeon_device *rdev)
198 {
199 u32 temp;
200 int actual_temp = 0;
201
202 temp = RREG32_SMC(0xC0300E0C);
203
204 if (temp)
205 actual_temp = (temp / 8) - 49;
206 else
207 actual_temp = 0;
208
209 actual_temp = actual_temp * 1000;
210
211 return actual_temp;
212 }
213
214 /*
215 * Indirect registers accessor
216 */
cik_pciep_rreg(struct radeon_device * rdev,u32 reg)217 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
218 {
219 unsigned long flags;
220 u32 r;
221
222 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
223 WREG32(PCIE_INDEX, reg);
224 (void)RREG32(PCIE_INDEX);
225 r = RREG32(PCIE_DATA);
226 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
227 return r;
228 }
229
cik_pciep_wreg(struct radeon_device * rdev,u32 reg,u32 v)230 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
231 {
232 unsigned long flags;
233
234 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
235 WREG32(PCIE_INDEX, reg);
236 (void)RREG32(PCIE_INDEX);
237 WREG32(PCIE_DATA, v);
238 (void)RREG32(PCIE_DATA);
239 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
240 }
241
242 static const u32 spectre_rlc_save_restore_register_list[] =
243 {
244 (0x0e00 << 16) | (0xc12c >> 2),
245 0x00000000,
246 (0x0e00 << 16) | (0xc140 >> 2),
247 0x00000000,
248 (0x0e00 << 16) | (0xc150 >> 2),
249 0x00000000,
250 (0x0e00 << 16) | (0xc15c >> 2),
251 0x00000000,
252 (0x0e00 << 16) | (0xc168 >> 2),
253 0x00000000,
254 (0x0e00 << 16) | (0xc170 >> 2),
255 0x00000000,
256 (0x0e00 << 16) | (0xc178 >> 2),
257 0x00000000,
258 (0x0e00 << 16) | (0xc204 >> 2),
259 0x00000000,
260 (0x0e00 << 16) | (0xc2b4 >> 2),
261 0x00000000,
262 (0x0e00 << 16) | (0xc2b8 >> 2),
263 0x00000000,
264 (0x0e00 << 16) | (0xc2bc >> 2),
265 0x00000000,
266 (0x0e00 << 16) | (0xc2c0 >> 2),
267 0x00000000,
268 (0x0e00 << 16) | (0x8228 >> 2),
269 0x00000000,
270 (0x0e00 << 16) | (0x829c >> 2),
271 0x00000000,
272 (0x0e00 << 16) | (0x869c >> 2),
273 0x00000000,
274 (0x0600 << 16) | (0x98f4 >> 2),
275 0x00000000,
276 (0x0e00 << 16) | (0x98f8 >> 2),
277 0x00000000,
278 (0x0e00 << 16) | (0x9900 >> 2),
279 0x00000000,
280 (0x0e00 << 16) | (0xc260 >> 2),
281 0x00000000,
282 (0x0e00 << 16) | (0x90e8 >> 2),
283 0x00000000,
284 (0x0e00 << 16) | (0x3c000 >> 2),
285 0x00000000,
286 (0x0e00 << 16) | (0x3c00c >> 2),
287 0x00000000,
288 (0x0e00 << 16) | (0x8c1c >> 2),
289 0x00000000,
290 (0x0e00 << 16) | (0x9700 >> 2),
291 0x00000000,
292 (0x0e00 << 16) | (0xcd20 >> 2),
293 0x00000000,
294 (0x4e00 << 16) | (0xcd20 >> 2),
295 0x00000000,
296 (0x5e00 << 16) | (0xcd20 >> 2),
297 0x00000000,
298 (0x6e00 << 16) | (0xcd20 >> 2),
299 0x00000000,
300 (0x7e00 << 16) | (0xcd20 >> 2),
301 0x00000000,
302 (0x8e00 << 16) | (0xcd20 >> 2),
303 0x00000000,
304 (0x9e00 << 16) | (0xcd20 >> 2),
305 0x00000000,
306 (0xae00 << 16) | (0xcd20 >> 2),
307 0x00000000,
308 (0xbe00 << 16) | (0xcd20 >> 2),
309 0x00000000,
310 (0x0e00 << 16) | (0x89bc >> 2),
311 0x00000000,
312 (0x0e00 << 16) | (0x8900 >> 2),
313 0x00000000,
314 0x3,
315 (0x0e00 << 16) | (0xc130 >> 2),
316 0x00000000,
317 (0x0e00 << 16) | (0xc134 >> 2),
318 0x00000000,
319 (0x0e00 << 16) | (0xc1fc >> 2),
320 0x00000000,
321 (0x0e00 << 16) | (0xc208 >> 2),
322 0x00000000,
323 (0x0e00 << 16) | (0xc264 >> 2),
324 0x00000000,
325 (0x0e00 << 16) | (0xc268 >> 2),
326 0x00000000,
327 (0x0e00 << 16) | (0xc26c >> 2),
328 0x00000000,
329 (0x0e00 << 16) | (0xc270 >> 2),
330 0x00000000,
331 (0x0e00 << 16) | (0xc274 >> 2),
332 0x00000000,
333 (0x0e00 << 16) | (0xc278 >> 2),
334 0x00000000,
335 (0x0e00 << 16) | (0xc27c >> 2),
336 0x00000000,
337 (0x0e00 << 16) | (0xc280 >> 2),
338 0x00000000,
339 (0x0e00 << 16) | (0xc284 >> 2),
340 0x00000000,
341 (0x0e00 << 16) | (0xc288 >> 2),
342 0x00000000,
343 (0x0e00 << 16) | (0xc28c >> 2),
344 0x00000000,
345 (0x0e00 << 16) | (0xc290 >> 2),
346 0x00000000,
347 (0x0e00 << 16) | (0xc294 >> 2),
348 0x00000000,
349 (0x0e00 << 16) | (0xc298 >> 2),
350 0x00000000,
351 (0x0e00 << 16) | (0xc29c >> 2),
352 0x00000000,
353 (0x0e00 << 16) | (0xc2a0 >> 2),
354 0x00000000,
355 (0x0e00 << 16) | (0xc2a4 >> 2),
356 0x00000000,
357 (0x0e00 << 16) | (0xc2a8 >> 2),
358 0x00000000,
359 (0x0e00 << 16) | (0xc2ac >> 2),
360 0x00000000,
361 (0x0e00 << 16) | (0xc2b0 >> 2),
362 0x00000000,
363 (0x0e00 << 16) | (0x301d0 >> 2),
364 0x00000000,
365 (0x0e00 << 16) | (0x30238 >> 2),
366 0x00000000,
367 (0x0e00 << 16) | (0x30250 >> 2),
368 0x00000000,
369 (0x0e00 << 16) | (0x30254 >> 2),
370 0x00000000,
371 (0x0e00 << 16) | (0x30258 >> 2),
372 0x00000000,
373 (0x0e00 << 16) | (0x3025c >> 2),
374 0x00000000,
375 (0x4e00 << 16) | (0xc900 >> 2),
376 0x00000000,
377 (0x5e00 << 16) | (0xc900 >> 2),
378 0x00000000,
379 (0x6e00 << 16) | (0xc900 >> 2),
380 0x00000000,
381 (0x7e00 << 16) | (0xc900 >> 2),
382 0x00000000,
383 (0x8e00 << 16) | (0xc900 >> 2),
384 0x00000000,
385 (0x9e00 << 16) | (0xc900 >> 2),
386 0x00000000,
387 (0xae00 << 16) | (0xc900 >> 2),
388 0x00000000,
389 (0xbe00 << 16) | (0xc900 >> 2),
390 0x00000000,
391 (0x4e00 << 16) | (0xc904 >> 2),
392 0x00000000,
393 (0x5e00 << 16) | (0xc904 >> 2),
394 0x00000000,
395 (0x6e00 << 16) | (0xc904 >> 2),
396 0x00000000,
397 (0x7e00 << 16) | (0xc904 >> 2),
398 0x00000000,
399 (0x8e00 << 16) | (0xc904 >> 2),
400 0x00000000,
401 (0x9e00 << 16) | (0xc904 >> 2),
402 0x00000000,
403 (0xae00 << 16) | (0xc904 >> 2),
404 0x00000000,
405 (0xbe00 << 16) | (0xc904 >> 2),
406 0x00000000,
407 (0x4e00 << 16) | (0xc908 >> 2),
408 0x00000000,
409 (0x5e00 << 16) | (0xc908 >> 2),
410 0x00000000,
411 (0x6e00 << 16) | (0xc908 >> 2),
412 0x00000000,
413 (0x7e00 << 16) | (0xc908 >> 2),
414 0x00000000,
415 (0x8e00 << 16) | (0xc908 >> 2),
416 0x00000000,
417 (0x9e00 << 16) | (0xc908 >> 2),
418 0x00000000,
419 (0xae00 << 16) | (0xc908 >> 2),
420 0x00000000,
421 (0xbe00 << 16) | (0xc908 >> 2),
422 0x00000000,
423 (0x4e00 << 16) | (0xc90c >> 2),
424 0x00000000,
425 (0x5e00 << 16) | (0xc90c >> 2),
426 0x00000000,
427 (0x6e00 << 16) | (0xc90c >> 2),
428 0x00000000,
429 (0x7e00 << 16) | (0xc90c >> 2),
430 0x00000000,
431 (0x8e00 << 16) | (0xc90c >> 2),
432 0x00000000,
433 (0x9e00 << 16) | (0xc90c >> 2),
434 0x00000000,
435 (0xae00 << 16) | (0xc90c >> 2),
436 0x00000000,
437 (0xbe00 << 16) | (0xc90c >> 2),
438 0x00000000,
439 (0x4e00 << 16) | (0xc910 >> 2),
440 0x00000000,
441 (0x5e00 << 16) | (0xc910 >> 2),
442 0x00000000,
443 (0x6e00 << 16) | (0xc910 >> 2),
444 0x00000000,
445 (0x7e00 << 16) | (0xc910 >> 2),
446 0x00000000,
447 (0x8e00 << 16) | (0xc910 >> 2),
448 0x00000000,
449 (0x9e00 << 16) | (0xc910 >> 2),
450 0x00000000,
451 (0xae00 << 16) | (0xc910 >> 2),
452 0x00000000,
453 (0xbe00 << 16) | (0xc910 >> 2),
454 0x00000000,
455 (0x0e00 << 16) | (0xc99c >> 2),
456 0x00000000,
457 (0x0e00 << 16) | (0x9834 >> 2),
458 0x00000000,
459 (0x0000 << 16) | (0x30f00 >> 2),
460 0x00000000,
461 (0x0001 << 16) | (0x30f00 >> 2),
462 0x00000000,
463 (0x0000 << 16) | (0x30f04 >> 2),
464 0x00000000,
465 (0x0001 << 16) | (0x30f04 >> 2),
466 0x00000000,
467 (0x0000 << 16) | (0x30f08 >> 2),
468 0x00000000,
469 (0x0001 << 16) | (0x30f08 >> 2),
470 0x00000000,
471 (0x0000 << 16) | (0x30f0c >> 2),
472 0x00000000,
473 (0x0001 << 16) | (0x30f0c >> 2),
474 0x00000000,
475 (0x0600 << 16) | (0x9b7c >> 2),
476 0x00000000,
477 (0x0e00 << 16) | (0x8a14 >> 2),
478 0x00000000,
479 (0x0e00 << 16) | (0x8a18 >> 2),
480 0x00000000,
481 (0x0600 << 16) | (0x30a00 >> 2),
482 0x00000000,
483 (0x0e00 << 16) | (0x8bf0 >> 2),
484 0x00000000,
485 (0x0e00 << 16) | (0x8bcc >> 2),
486 0x00000000,
487 (0x0e00 << 16) | (0x8b24 >> 2),
488 0x00000000,
489 (0x0e00 << 16) | (0x30a04 >> 2),
490 0x00000000,
491 (0x0600 << 16) | (0x30a10 >> 2),
492 0x00000000,
493 (0x0600 << 16) | (0x30a14 >> 2),
494 0x00000000,
495 (0x0600 << 16) | (0x30a18 >> 2),
496 0x00000000,
497 (0x0600 << 16) | (0x30a2c >> 2),
498 0x00000000,
499 (0x0e00 << 16) | (0xc700 >> 2),
500 0x00000000,
501 (0x0e00 << 16) | (0xc704 >> 2),
502 0x00000000,
503 (0x0e00 << 16) | (0xc708 >> 2),
504 0x00000000,
505 (0x0e00 << 16) | (0xc768 >> 2),
506 0x00000000,
507 (0x0400 << 16) | (0xc770 >> 2),
508 0x00000000,
509 (0x0400 << 16) | (0xc774 >> 2),
510 0x00000000,
511 (0x0400 << 16) | (0xc778 >> 2),
512 0x00000000,
513 (0x0400 << 16) | (0xc77c >> 2),
514 0x00000000,
515 (0x0400 << 16) | (0xc780 >> 2),
516 0x00000000,
517 (0x0400 << 16) | (0xc784 >> 2),
518 0x00000000,
519 (0x0400 << 16) | (0xc788 >> 2),
520 0x00000000,
521 (0x0400 << 16) | (0xc78c >> 2),
522 0x00000000,
523 (0x0400 << 16) | (0xc798 >> 2),
524 0x00000000,
525 (0x0400 << 16) | (0xc79c >> 2),
526 0x00000000,
527 (0x0400 << 16) | (0xc7a0 >> 2),
528 0x00000000,
529 (0x0400 << 16) | (0xc7a4 >> 2),
530 0x00000000,
531 (0x0400 << 16) | (0xc7a8 >> 2),
532 0x00000000,
533 (0x0400 << 16) | (0xc7ac >> 2),
534 0x00000000,
535 (0x0400 << 16) | (0xc7b0 >> 2),
536 0x00000000,
537 (0x0400 << 16) | (0xc7b4 >> 2),
538 0x00000000,
539 (0x0e00 << 16) | (0x9100 >> 2),
540 0x00000000,
541 (0x0e00 << 16) | (0x3c010 >> 2),
542 0x00000000,
543 (0x0e00 << 16) | (0x92a8 >> 2),
544 0x00000000,
545 (0x0e00 << 16) | (0x92ac >> 2),
546 0x00000000,
547 (0x0e00 << 16) | (0x92b4 >> 2),
548 0x00000000,
549 (0x0e00 << 16) | (0x92b8 >> 2),
550 0x00000000,
551 (0x0e00 << 16) | (0x92bc >> 2),
552 0x00000000,
553 (0x0e00 << 16) | (0x92c0 >> 2),
554 0x00000000,
555 (0x0e00 << 16) | (0x92c4 >> 2),
556 0x00000000,
557 (0x0e00 << 16) | (0x92c8 >> 2),
558 0x00000000,
559 (0x0e00 << 16) | (0x92cc >> 2),
560 0x00000000,
561 (0x0e00 << 16) | (0x92d0 >> 2),
562 0x00000000,
563 (0x0e00 << 16) | (0x8c00 >> 2),
564 0x00000000,
565 (0x0e00 << 16) | (0x8c04 >> 2),
566 0x00000000,
567 (0x0e00 << 16) | (0x8c20 >> 2),
568 0x00000000,
569 (0x0e00 << 16) | (0x8c38 >> 2),
570 0x00000000,
571 (0x0e00 << 16) | (0x8c3c >> 2),
572 0x00000000,
573 (0x0e00 << 16) | (0xae00 >> 2),
574 0x00000000,
575 (0x0e00 << 16) | (0x9604 >> 2),
576 0x00000000,
577 (0x0e00 << 16) | (0xac08 >> 2),
578 0x00000000,
579 (0x0e00 << 16) | (0xac0c >> 2),
580 0x00000000,
581 (0x0e00 << 16) | (0xac10 >> 2),
582 0x00000000,
583 (0x0e00 << 16) | (0xac14 >> 2),
584 0x00000000,
585 (0x0e00 << 16) | (0xac58 >> 2),
586 0x00000000,
587 (0x0e00 << 16) | (0xac68 >> 2),
588 0x00000000,
589 (0x0e00 << 16) | (0xac6c >> 2),
590 0x00000000,
591 (0x0e00 << 16) | (0xac70 >> 2),
592 0x00000000,
593 (0x0e00 << 16) | (0xac74 >> 2),
594 0x00000000,
595 (0x0e00 << 16) | (0xac78 >> 2),
596 0x00000000,
597 (0x0e00 << 16) | (0xac7c >> 2),
598 0x00000000,
599 (0x0e00 << 16) | (0xac80 >> 2),
600 0x00000000,
601 (0x0e00 << 16) | (0xac84 >> 2),
602 0x00000000,
603 (0x0e00 << 16) | (0xac88 >> 2),
604 0x00000000,
605 (0x0e00 << 16) | (0xac8c >> 2),
606 0x00000000,
607 (0x0e00 << 16) | (0x970c >> 2),
608 0x00000000,
609 (0x0e00 << 16) | (0x9714 >> 2),
610 0x00000000,
611 (0x0e00 << 16) | (0x9718 >> 2),
612 0x00000000,
613 (0x0e00 << 16) | (0x971c >> 2),
614 0x00000000,
615 (0x0e00 << 16) | (0x31068 >> 2),
616 0x00000000,
617 (0x4e00 << 16) | (0x31068 >> 2),
618 0x00000000,
619 (0x5e00 << 16) | (0x31068 >> 2),
620 0x00000000,
621 (0x6e00 << 16) | (0x31068 >> 2),
622 0x00000000,
623 (0x7e00 << 16) | (0x31068 >> 2),
624 0x00000000,
625 (0x8e00 << 16) | (0x31068 >> 2),
626 0x00000000,
627 (0x9e00 << 16) | (0x31068 >> 2),
628 0x00000000,
629 (0xae00 << 16) | (0x31068 >> 2),
630 0x00000000,
631 (0xbe00 << 16) | (0x31068 >> 2),
632 0x00000000,
633 (0x0e00 << 16) | (0xcd10 >> 2),
634 0x00000000,
635 (0x0e00 << 16) | (0xcd14 >> 2),
636 0x00000000,
637 (0x0e00 << 16) | (0x88b0 >> 2),
638 0x00000000,
639 (0x0e00 << 16) | (0x88b4 >> 2),
640 0x00000000,
641 (0x0e00 << 16) | (0x88b8 >> 2),
642 0x00000000,
643 (0x0e00 << 16) | (0x88bc >> 2),
644 0x00000000,
645 (0x0400 << 16) | (0x89c0 >> 2),
646 0x00000000,
647 (0x0e00 << 16) | (0x88c4 >> 2),
648 0x00000000,
649 (0x0e00 << 16) | (0x88c8 >> 2),
650 0x00000000,
651 (0x0e00 << 16) | (0x88d0 >> 2),
652 0x00000000,
653 (0x0e00 << 16) | (0x88d4 >> 2),
654 0x00000000,
655 (0x0e00 << 16) | (0x88d8 >> 2),
656 0x00000000,
657 (0x0e00 << 16) | (0x8980 >> 2),
658 0x00000000,
659 (0x0e00 << 16) | (0x30938 >> 2),
660 0x00000000,
661 (0x0e00 << 16) | (0x3093c >> 2),
662 0x00000000,
663 (0x0e00 << 16) | (0x30940 >> 2),
664 0x00000000,
665 (0x0e00 << 16) | (0x89a0 >> 2),
666 0x00000000,
667 (0x0e00 << 16) | (0x30900 >> 2),
668 0x00000000,
669 (0x0e00 << 16) | (0x30904 >> 2),
670 0x00000000,
671 (0x0e00 << 16) | (0x89b4 >> 2),
672 0x00000000,
673 (0x0e00 << 16) | (0x3c210 >> 2),
674 0x00000000,
675 (0x0e00 << 16) | (0x3c214 >> 2),
676 0x00000000,
677 (0x0e00 << 16) | (0x3c218 >> 2),
678 0x00000000,
679 (0x0e00 << 16) | (0x8904 >> 2),
680 0x00000000,
681 0x5,
682 (0x0e00 << 16) | (0x8c28 >> 2),
683 (0x0e00 << 16) | (0x8c2c >> 2),
684 (0x0e00 << 16) | (0x8c30 >> 2),
685 (0x0e00 << 16) | (0x8c34 >> 2),
686 (0x0e00 << 16) | (0x9600 >> 2),
687 };
688
689 static const u32 kalindi_rlc_save_restore_register_list[] =
690 {
691 (0x0e00 << 16) | (0xc12c >> 2),
692 0x00000000,
693 (0x0e00 << 16) | (0xc140 >> 2),
694 0x00000000,
695 (0x0e00 << 16) | (0xc150 >> 2),
696 0x00000000,
697 (0x0e00 << 16) | (0xc15c >> 2),
698 0x00000000,
699 (0x0e00 << 16) | (0xc168 >> 2),
700 0x00000000,
701 (0x0e00 << 16) | (0xc170 >> 2),
702 0x00000000,
703 (0x0e00 << 16) | (0xc204 >> 2),
704 0x00000000,
705 (0x0e00 << 16) | (0xc2b4 >> 2),
706 0x00000000,
707 (0x0e00 << 16) | (0xc2b8 >> 2),
708 0x00000000,
709 (0x0e00 << 16) | (0xc2bc >> 2),
710 0x00000000,
711 (0x0e00 << 16) | (0xc2c0 >> 2),
712 0x00000000,
713 (0x0e00 << 16) | (0x8228 >> 2),
714 0x00000000,
715 (0x0e00 << 16) | (0x829c >> 2),
716 0x00000000,
717 (0x0e00 << 16) | (0x869c >> 2),
718 0x00000000,
719 (0x0600 << 16) | (0x98f4 >> 2),
720 0x00000000,
721 (0x0e00 << 16) | (0x98f8 >> 2),
722 0x00000000,
723 (0x0e00 << 16) | (0x9900 >> 2),
724 0x00000000,
725 (0x0e00 << 16) | (0xc260 >> 2),
726 0x00000000,
727 (0x0e00 << 16) | (0x90e8 >> 2),
728 0x00000000,
729 (0x0e00 << 16) | (0x3c000 >> 2),
730 0x00000000,
731 (0x0e00 << 16) | (0x3c00c >> 2),
732 0x00000000,
733 (0x0e00 << 16) | (0x8c1c >> 2),
734 0x00000000,
735 (0x0e00 << 16) | (0x9700 >> 2),
736 0x00000000,
737 (0x0e00 << 16) | (0xcd20 >> 2),
738 0x00000000,
739 (0x4e00 << 16) | (0xcd20 >> 2),
740 0x00000000,
741 (0x5e00 << 16) | (0xcd20 >> 2),
742 0x00000000,
743 (0x6e00 << 16) | (0xcd20 >> 2),
744 0x00000000,
745 (0x7e00 << 16) | (0xcd20 >> 2),
746 0x00000000,
747 (0x0e00 << 16) | (0x89bc >> 2),
748 0x00000000,
749 (0x0e00 << 16) | (0x8900 >> 2),
750 0x00000000,
751 0x3,
752 (0x0e00 << 16) | (0xc130 >> 2),
753 0x00000000,
754 (0x0e00 << 16) | (0xc134 >> 2),
755 0x00000000,
756 (0x0e00 << 16) | (0xc1fc >> 2),
757 0x00000000,
758 (0x0e00 << 16) | (0xc208 >> 2),
759 0x00000000,
760 (0x0e00 << 16) | (0xc264 >> 2),
761 0x00000000,
762 (0x0e00 << 16) | (0xc268 >> 2),
763 0x00000000,
764 (0x0e00 << 16) | (0xc26c >> 2),
765 0x00000000,
766 (0x0e00 << 16) | (0xc270 >> 2),
767 0x00000000,
768 (0x0e00 << 16) | (0xc274 >> 2),
769 0x00000000,
770 (0x0e00 << 16) | (0xc28c >> 2),
771 0x00000000,
772 (0x0e00 << 16) | (0xc290 >> 2),
773 0x00000000,
774 (0x0e00 << 16) | (0xc294 >> 2),
775 0x00000000,
776 (0x0e00 << 16) | (0xc298 >> 2),
777 0x00000000,
778 (0x0e00 << 16) | (0xc2a0 >> 2),
779 0x00000000,
780 (0x0e00 << 16) | (0xc2a4 >> 2),
781 0x00000000,
782 (0x0e00 << 16) | (0xc2a8 >> 2),
783 0x00000000,
784 (0x0e00 << 16) | (0xc2ac >> 2),
785 0x00000000,
786 (0x0e00 << 16) | (0x301d0 >> 2),
787 0x00000000,
788 (0x0e00 << 16) | (0x30238 >> 2),
789 0x00000000,
790 (0x0e00 << 16) | (0x30250 >> 2),
791 0x00000000,
792 (0x0e00 << 16) | (0x30254 >> 2),
793 0x00000000,
794 (0x0e00 << 16) | (0x30258 >> 2),
795 0x00000000,
796 (0x0e00 << 16) | (0x3025c >> 2),
797 0x00000000,
798 (0x4e00 << 16) | (0xc900 >> 2),
799 0x00000000,
800 (0x5e00 << 16) | (0xc900 >> 2),
801 0x00000000,
802 (0x6e00 << 16) | (0xc900 >> 2),
803 0x00000000,
804 (0x7e00 << 16) | (0xc900 >> 2),
805 0x00000000,
806 (0x4e00 << 16) | (0xc904 >> 2),
807 0x00000000,
808 (0x5e00 << 16) | (0xc904 >> 2),
809 0x00000000,
810 (0x6e00 << 16) | (0xc904 >> 2),
811 0x00000000,
812 (0x7e00 << 16) | (0xc904 >> 2),
813 0x00000000,
814 (0x4e00 << 16) | (0xc908 >> 2),
815 0x00000000,
816 (0x5e00 << 16) | (0xc908 >> 2),
817 0x00000000,
818 (0x6e00 << 16) | (0xc908 >> 2),
819 0x00000000,
820 (0x7e00 << 16) | (0xc908 >> 2),
821 0x00000000,
822 (0x4e00 << 16) | (0xc90c >> 2),
823 0x00000000,
824 (0x5e00 << 16) | (0xc90c >> 2),
825 0x00000000,
826 (0x6e00 << 16) | (0xc90c >> 2),
827 0x00000000,
828 (0x7e00 << 16) | (0xc90c >> 2),
829 0x00000000,
830 (0x4e00 << 16) | (0xc910 >> 2),
831 0x00000000,
832 (0x5e00 << 16) | (0xc910 >> 2),
833 0x00000000,
834 (0x6e00 << 16) | (0xc910 >> 2),
835 0x00000000,
836 (0x7e00 << 16) | (0xc910 >> 2),
837 0x00000000,
838 (0x0e00 << 16) | (0xc99c >> 2),
839 0x00000000,
840 (0x0e00 << 16) | (0x9834 >> 2),
841 0x00000000,
842 (0x0000 << 16) | (0x30f00 >> 2),
843 0x00000000,
844 (0x0000 << 16) | (0x30f04 >> 2),
845 0x00000000,
846 (0x0000 << 16) | (0x30f08 >> 2),
847 0x00000000,
848 (0x0000 << 16) | (0x30f0c >> 2),
849 0x00000000,
850 (0x0600 << 16) | (0x9b7c >> 2),
851 0x00000000,
852 (0x0e00 << 16) | (0x8a14 >> 2),
853 0x00000000,
854 (0x0e00 << 16) | (0x8a18 >> 2),
855 0x00000000,
856 (0x0600 << 16) | (0x30a00 >> 2),
857 0x00000000,
858 (0x0e00 << 16) | (0x8bf0 >> 2),
859 0x00000000,
860 (0x0e00 << 16) | (0x8bcc >> 2),
861 0x00000000,
862 (0x0e00 << 16) | (0x8b24 >> 2),
863 0x00000000,
864 (0x0e00 << 16) | (0x30a04 >> 2),
865 0x00000000,
866 (0x0600 << 16) | (0x30a10 >> 2),
867 0x00000000,
868 (0x0600 << 16) | (0x30a14 >> 2),
869 0x00000000,
870 (0x0600 << 16) | (0x30a18 >> 2),
871 0x00000000,
872 (0x0600 << 16) | (0x30a2c >> 2),
873 0x00000000,
874 (0x0e00 << 16) | (0xc700 >> 2),
875 0x00000000,
876 (0x0e00 << 16) | (0xc704 >> 2),
877 0x00000000,
878 (0x0e00 << 16) | (0xc708 >> 2),
879 0x00000000,
880 (0x0e00 << 16) | (0xc768 >> 2),
881 0x00000000,
882 (0x0400 << 16) | (0xc770 >> 2),
883 0x00000000,
884 (0x0400 << 16) | (0xc774 >> 2),
885 0x00000000,
886 (0x0400 << 16) | (0xc798 >> 2),
887 0x00000000,
888 (0x0400 << 16) | (0xc79c >> 2),
889 0x00000000,
890 (0x0e00 << 16) | (0x9100 >> 2),
891 0x00000000,
892 (0x0e00 << 16) | (0x3c010 >> 2),
893 0x00000000,
894 (0x0e00 << 16) | (0x8c00 >> 2),
895 0x00000000,
896 (0x0e00 << 16) | (0x8c04 >> 2),
897 0x00000000,
898 (0x0e00 << 16) | (0x8c20 >> 2),
899 0x00000000,
900 (0x0e00 << 16) | (0x8c38 >> 2),
901 0x00000000,
902 (0x0e00 << 16) | (0x8c3c >> 2),
903 0x00000000,
904 (0x0e00 << 16) | (0xae00 >> 2),
905 0x00000000,
906 (0x0e00 << 16) | (0x9604 >> 2),
907 0x00000000,
908 (0x0e00 << 16) | (0xac08 >> 2),
909 0x00000000,
910 (0x0e00 << 16) | (0xac0c >> 2),
911 0x00000000,
912 (0x0e00 << 16) | (0xac10 >> 2),
913 0x00000000,
914 (0x0e00 << 16) | (0xac14 >> 2),
915 0x00000000,
916 (0x0e00 << 16) | (0xac58 >> 2),
917 0x00000000,
918 (0x0e00 << 16) | (0xac68 >> 2),
919 0x00000000,
920 (0x0e00 << 16) | (0xac6c >> 2),
921 0x00000000,
922 (0x0e00 << 16) | (0xac70 >> 2),
923 0x00000000,
924 (0x0e00 << 16) | (0xac74 >> 2),
925 0x00000000,
926 (0x0e00 << 16) | (0xac78 >> 2),
927 0x00000000,
928 (0x0e00 << 16) | (0xac7c >> 2),
929 0x00000000,
930 (0x0e00 << 16) | (0xac80 >> 2),
931 0x00000000,
932 (0x0e00 << 16) | (0xac84 >> 2),
933 0x00000000,
934 (0x0e00 << 16) | (0xac88 >> 2),
935 0x00000000,
936 (0x0e00 << 16) | (0xac8c >> 2),
937 0x00000000,
938 (0x0e00 << 16) | (0x970c >> 2),
939 0x00000000,
940 (0x0e00 << 16) | (0x9714 >> 2),
941 0x00000000,
942 (0x0e00 << 16) | (0x9718 >> 2),
943 0x00000000,
944 (0x0e00 << 16) | (0x971c >> 2),
945 0x00000000,
946 (0x0e00 << 16) | (0x31068 >> 2),
947 0x00000000,
948 (0x4e00 << 16) | (0x31068 >> 2),
949 0x00000000,
950 (0x5e00 << 16) | (0x31068 >> 2),
951 0x00000000,
952 (0x6e00 << 16) | (0x31068 >> 2),
953 0x00000000,
954 (0x7e00 << 16) | (0x31068 >> 2),
955 0x00000000,
956 (0x0e00 << 16) | (0xcd10 >> 2),
957 0x00000000,
958 (0x0e00 << 16) | (0xcd14 >> 2),
959 0x00000000,
960 (0x0e00 << 16) | (0x88b0 >> 2),
961 0x00000000,
962 (0x0e00 << 16) | (0x88b4 >> 2),
963 0x00000000,
964 (0x0e00 << 16) | (0x88b8 >> 2),
965 0x00000000,
966 (0x0e00 << 16) | (0x88bc >> 2),
967 0x00000000,
968 (0x0400 << 16) | (0x89c0 >> 2),
969 0x00000000,
970 (0x0e00 << 16) | (0x88c4 >> 2),
971 0x00000000,
972 (0x0e00 << 16) | (0x88c8 >> 2),
973 0x00000000,
974 (0x0e00 << 16) | (0x88d0 >> 2),
975 0x00000000,
976 (0x0e00 << 16) | (0x88d4 >> 2),
977 0x00000000,
978 (0x0e00 << 16) | (0x88d8 >> 2),
979 0x00000000,
980 (0x0e00 << 16) | (0x8980 >> 2),
981 0x00000000,
982 (0x0e00 << 16) | (0x30938 >> 2),
983 0x00000000,
984 (0x0e00 << 16) | (0x3093c >> 2),
985 0x00000000,
986 (0x0e00 << 16) | (0x30940 >> 2),
987 0x00000000,
988 (0x0e00 << 16) | (0x89a0 >> 2),
989 0x00000000,
990 (0x0e00 << 16) | (0x30900 >> 2),
991 0x00000000,
992 (0x0e00 << 16) | (0x30904 >> 2),
993 0x00000000,
994 (0x0e00 << 16) | (0x89b4 >> 2),
995 0x00000000,
996 (0x0e00 << 16) | (0x3e1fc >> 2),
997 0x00000000,
998 (0x0e00 << 16) | (0x3c210 >> 2),
999 0x00000000,
1000 (0x0e00 << 16) | (0x3c214 >> 2),
1001 0x00000000,
1002 (0x0e00 << 16) | (0x3c218 >> 2),
1003 0x00000000,
1004 (0x0e00 << 16) | (0x8904 >> 2),
1005 0x00000000,
1006 0x5,
1007 (0x0e00 << 16) | (0x8c28 >> 2),
1008 (0x0e00 << 16) | (0x8c2c >> 2),
1009 (0x0e00 << 16) | (0x8c30 >> 2),
1010 (0x0e00 << 16) | (0x8c34 >> 2),
1011 (0x0e00 << 16) | (0x9600 >> 2),
1012 };
1013
1014 static const u32 bonaire_golden_spm_registers[] =
1015 {
1016 0x30800, 0xe0ffffff, 0xe0000000
1017 };
1018
1019 static const u32 bonaire_golden_common_registers[] =
1020 {
1021 0xc770, 0xffffffff, 0x00000800,
1022 0xc774, 0xffffffff, 0x00000800,
1023 0xc798, 0xffffffff, 0x00007fbf,
1024 0xc79c, 0xffffffff, 0x00007faf
1025 };
1026
1027 static const u32 bonaire_golden_registers[] =
1028 {
1029 0x3354, 0x00000333, 0x00000333,
1030 0x3350, 0x000c0fc0, 0x00040200,
1031 0x9a10, 0x00010000, 0x00058208,
1032 0x3c000, 0xffff1fff, 0x00140000,
1033 0x3c200, 0xfdfc0fff, 0x00000100,
1034 0x3c234, 0x40000000, 0x40000200,
1035 0x9830, 0xffffffff, 0x00000000,
1036 0x9834, 0xf00fffff, 0x00000400,
1037 0x9838, 0x0002021c, 0x00020200,
1038 0xc78, 0x00000080, 0x00000000,
1039 0x5bb0, 0x000000f0, 0x00000070,
1040 0x5bc0, 0xf0311fff, 0x80300000,
1041 0x98f8, 0x73773777, 0x12010001,
1042 0x350c, 0x00810000, 0x408af000,
1043 0x7030, 0x31000111, 0x00000011,
1044 0x2f48, 0x73773777, 0x12010001,
1045 0x220c, 0x00007fb6, 0x0021a1b1,
1046 0x2210, 0x00007fb6, 0x002021b1,
1047 0x2180, 0x00007fb6, 0x00002191,
1048 0x2218, 0x00007fb6, 0x002121b1,
1049 0x221c, 0x00007fb6, 0x002021b1,
1050 0x21dc, 0x00007fb6, 0x00002191,
1051 0x21e0, 0x00007fb6, 0x00002191,
1052 0x3628, 0x0000003f, 0x0000000a,
1053 0x362c, 0x0000003f, 0x0000000a,
1054 0x2ae4, 0x00073ffe, 0x000022a2,
1055 0x240c, 0x000007ff, 0x00000000,
1056 0x8a14, 0xf000003f, 0x00000007,
1057 0x8bf0, 0x00002001, 0x00000001,
1058 0x8b24, 0xffffffff, 0x00ffffff,
1059 0x30a04, 0x0000ff0f, 0x00000000,
1060 0x28a4c, 0x07ffffff, 0x06000000,
1061 0x4d8, 0x00000fff, 0x00000100,
1062 0x3e78, 0x00000001, 0x00000002,
1063 0x9100, 0x03000000, 0x0362c688,
1064 0x8c00, 0x000000ff, 0x00000001,
1065 0xe40, 0x00001fff, 0x00001fff,
1066 0x9060, 0x0000007f, 0x00000020,
1067 0x9508, 0x00010000, 0x00010000,
1068 0xac14, 0x000003ff, 0x000000f3,
1069 0xac0c, 0xffffffff, 0x00001032
1070 };
1071
1072 static const u32 bonaire_mgcg_cgcg_init[] =
1073 {
1074 0xc420, 0xffffffff, 0xfffffffc,
1075 0x30800, 0xffffffff, 0xe0000000,
1076 0x3c2a0, 0xffffffff, 0x00000100,
1077 0x3c208, 0xffffffff, 0x00000100,
1078 0x3c2c0, 0xffffffff, 0xc0000100,
1079 0x3c2c8, 0xffffffff, 0xc0000100,
1080 0x3c2c4, 0xffffffff, 0xc0000100,
1081 0x55e4, 0xffffffff, 0x00600100,
1082 0x3c280, 0xffffffff, 0x00000100,
1083 0x3c214, 0xffffffff, 0x06000100,
1084 0x3c220, 0xffffffff, 0x00000100,
1085 0x3c218, 0xffffffff, 0x06000100,
1086 0x3c204, 0xffffffff, 0x00000100,
1087 0x3c2e0, 0xffffffff, 0x00000100,
1088 0x3c224, 0xffffffff, 0x00000100,
1089 0x3c200, 0xffffffff, 0x00000100,
1090 0x3c230, 0xffffffff, 0x00000100,
1091 0x3c234, 0xffffffff, 0x00000100,
1092 0x3c250, 0xffffffff, 0x00000100,
1093 0x3c254, 0xffffffff, 0x00000100,
1094 0x3c258, 0xffffffff, 0x00000100,
1095 0x3c25c, 0xffffffff, 0x00000100,
1096 0x3c260, 0xffffffff, 0x00000100,
1097 0x3c27c, 0xffffffff, 0x00000100,
1098 0x3c278, 0xffffffff, 0x00000100,
1099 0x3c210, 0xffffffff, 0x06000100,
1100 0x3c290, 0xffffffff, 0x00000100,
1101 0x3c274, 0xffffffff, 0x00000100,
1102 0x3c2b4, 0xffffffff, 0x00000100,
1103 0x3c2b0, 0xffffffff, 0x00000100,
1104 0x3c270, 0xffffffff, 0x00000100,
1105 0x30800, 0xffffffff, 0xe0000000,
1106 0x3c020, 0xffffffff, 0x00010000,
1107 0x3c024, 0xffffffff, 0x00030002,
1108 0x3c028, 0xffffffff, 0x00040007,
1109 0x3c02c, 0xffffffff, 0x00060005,
1110 0x3c030, 0xffffffff, 0x00090008,
1111 0x3c034, 0xffffffff, 0x00010000,
1112 0x3c038, 0xffffffff, 0x00030002,
1113 0x3c03c, 0xffffffff, 0x00040007,
1114 0x3c040, 0xffffffff, 0x00060005,
1115 0x3c044, 0xffffffff, 0x00090008,
1116 0x3c048, 0xffffffff, 0x00010000,
1117 0x3c04c, 0xffffffff, 0x00030002,
1118 0x3c050, 0xffffffff, 0x00040007,
1119 0x3c054, 0xffffffff, 0x00060005,
1120 0x3c058, 0xffffffff, 0x00090008,
1121 0x3c05c, 0xffffffff, 0x00010000,
1122 0x3c060, 0xffffffff, 0x00030002,
1123 0x3c064, 0xffffffff, 0x00040007,
1124 0x3c068, 0xffffffff, 0x00060005,
1125 0x3c06c, 0xffffffff, 0x00090008,
1126 0x3c070, 0xffffffff, 0x00010000,
1127 0x3c074, 0xffffffff, 0x00030002,
1128 0x3c078, 0xffffffff, 0x00040007,
1129 0x3c07c, 0xffffffff, 0x00060005,
1130 0x3c080, 0xffffffff, 0x00090008,
1131 0x3c084, 0xffffffff, 0x00010000,
1132 0x3c088, 0xffffffff, 0x00030002,
1133 0x3c08c, 0xffffffff, 0x00040007,
1134 0x3c090, 0xffffffff, 0x00060005,
1135 0x3c094, 0xffffffff, 0x00090008,
1136 0x3c098, 0xffffffff, 0x00010000,
1137 0x3c09c, 0xffffffff, 0x00030002,
1138 0x3c0a0, 0xffffffff, 0x00040007,
1139 0x3c0a4, 0xffffffff, 0x00060005,
1140 0x3c0a8, 0xffffffff, 0x00090008,
1141 0x3c000, 0xffffffff, 0x96e00200,
1142 0x8708, 0xffffffff, 0x00900100,
1143 0xc424, 0xffffffff, 0x0020003f,
1144 0x38, 0xffffffff, 0x0140001c,
1145 0x3c, 0x000f0000, 0x000f0000,
1146 0x220, 0xffffffff, 0xC060000C,
1147 0x224, 0xc0000fff, 0x00000100,
1148 0xf90, 0xffffffff, 0x00000100,
1149 0xf98, 0x00000101, 0x00000000,
1150 0x20a8, 0xffffffff, 0x00000104,
1151 0x55e4, 0xff000fff, 0x00000100,
1152 0x30cc, 0xc0000fff, 0x00000104,
1153 0xc1e4, 0x00000001, 0x00000001,
1154 0xd00c, 0xff000ff0, 0x00000100,
1155 0xd80c, 0xff000ff0, 0x00000100
1156 };
1157
1158 static const u32 spectre_golden_spm_registers[] =
1159 {
1160 0x30800, 0xe0ffffff, 0xe0000000
1161 };
1162
1163 static const u32 spectre_golden_common_registers[] =
1164 {
1165 0xc770, 0xffffffff, 0x00000800,
1166 0xc774, 0xffffffff, 0x00000800,
1167 0xc798, 0xffffffff, 0x00007fbf,
1168 0xc79c, 0xffffffff, 0x00007faf
1169 };
1170
1171 static const u32 spectre_golden_registers[] =
1172 {
1173 0x3c000, 0xffff1fff, 0x96940200,
1174 0x3c00c, 0xffff0001, 0xff000000,
1175 0x3c200, 0xfffc0fff, 0x00000100,
1176 0x6ed8, 0x00010101, 0x00010000,
1177 0x9834, 0xf00fffff, 0x00000400,
1178 0x9838, 0xfffffffc, 0x00020200,
1179 0x5bb0, 0x000000f0, 0x00000070,
1180 0x5bc0, 0xf0311fff, 0x80300000,
1181 0x98f8, 0x73773777, 0x12010001,
1182 0x9b7c, 0x00ff0000, 0x00fc0000,
1183 0x2f48, 0x73773777, 0x12010001,
1184 0x8a14, 0xf000003f, 0x00000007,
1185 0x8b24, 0xffffffff, 0x00ffffff,
1186 0x28350, 0x3f3f3fff, 0x00000082,
1187 0x28354, 0x0000003f, 0x00000000,
1188 0x3e78, 0x00000001, 0x00000002,
1189 0x913c, 0xffff03df, 0x00000004,
1190 0xc768, 0x00000008, 0x00000008,
1191 0x8c00, 0x000008ff, 0x00000800,
1192 0x9508, 0x00010000, 0x00010000,
1193 0xac0c, 0xffffffff, 0x54763210,
1194 0x214f8, 0x01ff01ff, 0x00000002,
1195 0x21498, 0x007ff800, 0x00200000,
1196 0x2015c, 0xffffffff, 0x00000f40,
1197 0x30934, 0xffffffff, 0x00000001
1198 };
1199
1200 static const u32 spectre_mgcg_cgcg_init[] =
1201 {
1202 0xc420, 0xffffffff, 0xfffffffc,
1203 0x30800, 0xffffffff, 0xe0000000,
1204 0x3c2a0, 0xffffffff, 0x00000100,
1205 0x3c208, 0xffffffff, 0x00000100,
1206 0x3c2c0, 0xffffffff, 0x00000100,
1207 0x3c2c8, 0xffffffff, 0x00000100,
1208 0x3c2c4, 0xffffffff, 0x00000100,
1209 0x55e4, 0xffffffff, 0x00600100,
1210 0x3c280, 0xffffffff, 0x00000100,
1211 0x3c214, 0xffffffff, 0x06000100,
1212 0x3c220, 0xffffffff, 0x00000100,
1213 0x3c218, 0xffffffff, 0x06000100,
1214 0x3c204, 0xffffffff, 0x00000100,
1215 0x3c2e0, 0xffffffff, 0x00000100,
1216 0x3c224, 0xffffffff, 0x00000100,
1217 0x3c200, 0xffffffff, 0x00000100,
1218 0x3c230, 0xffffffff, 0x00000100,
1219 0x3c234, 0xffffffff, 0x00000100,
1220 0x3c250, 0xffffffff, 0x00000100,
1221 0x3c254, 0xffffffff, 0x00000100,
1222 0x3c258, 0xffffffff, 0x00000100,
1223 0x3c25c, 0xffffffff, 0x00000100,
1224 0x3c260, 0xffffffff, 0x00000100,
1225 0x3c27c, 0xffffffff, 0x00000100,
1226 0x3c278, 0xffffffff, 0x00000100,
1227 0x3c210, 0xffffffff, 0x06000100,
1228 0x3c290, 0xffffffff, 0x00000100,
1229 0x3c274, 0xffffffff, 0x00000100,
1230 0x3c2b4, 0xffffffff, 0x00000100,
1231 0x3c2b0, 0xffffffff, 0x00000100,
1232 0x3c270, 0xffffffff, 0x00000100,
1233 0x30800, 0xffffffff, 0xe0000000,
1234 0x3c020, 0xffffffff, 0x00010000,
1235 0x3c024, 0xffffffff, 0x00030002,
1236 0x3c028, 0xffffffff, 0x00040007,
1237 0x3c02c, 0xffffffff, 0x00060005,
1238 0x3c030, 0xffffffff, 0x00090008,
1239 0x3c034, 0xffffffff, 0x00010000,
1240 0x3c038, 0xffffffff, 0x00030002,
1241 0x3c03c, 0xffffffff, 0x00040007,
1242 0x3c040, 0xffffffff, 0x00060005,
1243 0x3c044, 0xffffffff, 0x00090008,
1244 0x3c048, 0xffffffff, 0x00010000,
1245 0x3c04c, 0xffffffff, 0x00030002,
1246 0x3c050, 0xffffffff, 0x00040007,
1247 0x3c054, 0xffffffff, 0x00060005,
1248 0x3c058, 0xffffffff, 0x00090008,
1249 0x3c05c, 0xffffffff, 0x00010000,
1250 0x3c060, 0xffffffff, 0x00030002,
1251 0x3c064, 0xffffffff, 0x00040007,
1252 0x3c068, 0xffffffff, 0x00060005,
1253 0x3c06c, 0xffffffff, 0x00090008,
1254 0x3c070, 0xffffffff, 0x00010000,
1255 0x3c074, 0xffffffff, 0x00030002,
1256 0x3c078, 0xffffffff, 0x00040007,
1257 0x3c07c, 0xffffffff, 0x00060005,
1258 0x3c080, 0xffffffff, 0x00090008,
1259 0x3c084, 0xffffffff, 0x00010000,
1260 0x3c088, 0xffffffff, 0x00030002,
1261 0x3c08c, 0xffffffff, 0x00040007,
1262 0x3c090, 0xffffffff, 0x00060005,
1263 0x3c094, 0xffffffff, 0x00090008,
1264 0x3c098, 0xffffffff, 0x00010000,
1265 0x3c09c, 0xffffffff, 0x00030002,
1266 0x3c0a0, 0xffffffff, 0x00040007,
1267 0x3c0a4, 0xffffffff, 0x00060005,
1268 0x3c0a8, 0xffffffff, 0x00090008,
1269 0x3c0ac, 0xffffffff, 0x00010000,
1270 0x3c0b0, 0xffffffff, 0x00030002,
1271 0x3c0b4, 0xffffffff, 0x00040007,
1272 0x3c0b8, 0xffffffff, 0x00060005,
1273 0x3c0bc, 0xffffffff, 0x00090008,
1274 0x3c000, 0xffffffff, 0x96e00200,
1275 0x8708, 0xffffffff, 0x00900100,
1276 0xc424, 0xffffffff, 0x0020003f,
1277 0x38, 0xffffffff, 0x0140001c,
1278 0x3c, 0x000f0000, 0x000f0000,
1279 0x220, 0xffffffff, 0xC060000C,
1280 0x224, 0xc0000fff, 0x00000100,
1281 0xf90, 0xffffffff, 0x00000100,
1282 0xf98, 0x00000101, 0x00000000,
1283 0x20a8, 0xffffffff, 0x00000104,
1284 0x55e4, 0xff000fff, 0x00000100,
1285 0x30cc, 0xc0000fff, 0x00000104,
1286 0xc1e4, 0x00000001, 0x00000001,
1287 0xd00c, 0xff000ff0, 0x00000100,
1288 0xd80c, 0xff000ff0, 0x00000100
1289 };
1290
1291 static const u32 kalindi_golden_spm_registers[] =
1292 {
1293 0x30800, 0xe0ffffff, 0xe0000000
1294 };
1295
1296 static const u32 kalindi_golden_common_registers[] =
1297 {
1298 0xc770, 0xffffffff, 0x00000800,
1299 0xc774, 0xffffffff, 0x00000800,
1300 0xc798, 0xffffffff, 0x00007fbf,
1301 0xc79c, 0xffffffff, 0x00007faf
1302 };
1303
1304 static const u32 kalindi_golden_registers[] =
1305 {
1306 0x3c000, 0xffffdfff, 0x6e944040,
1307 0x55e4, 0xff607fff, 0xfc000100,
1308 0x3c220, 0xff000fff, 0x00000100,
1309 0x3c224, 0xff000fff, 0x00000100,
1310 0x3c200, 0xfffc0fff, 0x00000100,
1311 0x6ed8, 0x00010101, 0x00010000,
1312 0x9830, 0xffffffff, 0x00000000,
1313 0x9834, 0xf00fffff, 0x00000400,
1314 0x5bb0, 0x000000f0, 0x00000070,
1315 0x5bc0, 0xf0311fff, 0x80300000,
1316 0x98f8, 0x73773777, 0x12010001,
1317 0x98fc, 0xffffffff, 0x00000010,
1318 0x9b7c, 0x00ff0000, 0x00fc0000,
1319 0x8030, 0x00001f0f, 0x0000100a,
1320 0x2f48, 0x73773777, 0x12010001,
1321 0x2408, 0x000fffff, 0x000c007f,
1322 0x8a14, 0xf000003f, 0x00000007,
1323 0x8b24, 0x3fff3fff, 0x00ffcfff,
1324 0x30a04, 0x0000ff0f, 0x00000000,
1325 0x28a4c, 0x07ffffff, 0x06000000,
1326 0x4d8, 0x00000fff, 0x00000100,
1327 0x3e78, 0x00000001, 0x00000002,
1328 0xc768, 0x00000008, 0x00000008,
1329 0x8c00, 0x000000ff, 0x00000003,
1330 0x214f8, 0x01ff01ff, 0x00000002,
1331 0x21498, 0x007ff800, 0x00200000,
1332 0x2015c, 0xffffffff, 0x00000f40,
1333 0x88c4, 0x001f3ae3, 0x00000082,
1334 0x88d4, 0x0000001f, 0x00000010,
1335 0x30934, 0xffffffff, 0x00000000
1336 };
1337
1338 static const u32 kalindi_mgcg_cgcg_init[] =
1339 {
1340 0xc420, 0xffffffff, 0xfffffffc,
1341 0x30800, 0xffffffff, 0xe0000000,
1342 0x3c2a0, 0xffffffff, 0x00000100,
1343 0x3c208, 0xffffffff, 0x00000100,
1344 0x3c2c0, 0xffffffff, 0x00000100,
1345 0x3c2c8, 0xffffffff, 0x00000100,
1346 0x3c2c4, 0xffffffff, 0x00000100,
1347 0x55e4, 0xffffffff, 0x00600100,
1348 0x3c280, 0xffffffff, 0x00000100,
1349 0x3c214, 0xffffffff, 0x06000100,
1350 0x3c220, 0xffffffff, 0x00000100,
1351 0x3c218, 0xffffffff, 0x06000100,
1352 0x3c204, 0xffffffff, 0x00000100,
1353 0x3c2e0, 0xffffffff, 0x00000100,
1354 0x3c224, 0xffffffff, 0x00000100,
1355 0x3c200, 0xffffffff, 0x00000100,
1356 0x3c230, 0xffffffff, 0x00000100,
1357 0x3c234, 0xffffffff, 0x00000100,
1358 0x3c250, 0xffffffff, 0x00000100,
1359 0x3c254, 0xffffffff, 0x00000100,
1360 0x3c258, 0xffffffff, 0x00000100,
1361 0x3c25c, 0xffffffff, 0x00000100,
1362 0x3c260, 0xffffffff, 0x00000100,
1363 0x3c27c, 0xffffffff, 0x00000100,
1364 0x3c278, 0xffffffff, 0x00000100,
1365 0x3c210, 0xffffffff, 0x06000100,
1366 0x3c290, 0xffffffff, 0x00000100,
1367 0x3c274, 0xffffffff, 0x00000100,
1368 0x3c2b4, 0xffffffff, 0x00000100,
1369 0x3c2b0, 0xffffffff, 0x00000100,
1370 0x3c270, 0xffffffff, 0x00000100,
1371 0x30800, 0xffffffff, 0xe0000000,
1372 0x3c020, 0xffffffff, 0x00010000,
1373 0x3c024, 0xffffffff, 0x00030002,
1374 0x3c028, 0xffffffff, 0x00040007,
1375 0x3c02c, 0xffffffff, 0x00060005,
1376 0x3c030, 0xffffffff, 0x00090008,
1377 0x3c034, 0xffffffff, 0x00010000,
1378 0x3c038, 0xffffffff, 0x00030002,
1379 0x3c03c, 0xffffffff, 0x00040007,
1380 0x3c040, 0xffffffff, 0x00060005,
1381 0x3c044, 0xffffffff, 0x00090008,
1382 0x3c000, 0xffffffff, 0x96e00200,
1383 0x8708, 0xffffffff, 0x00900100,
1384 0xc424, 0xffffffff, 0x0020003f,
1385 0x38, 0xffffffff, 0x0140001c,
1386 0x3c, 0x000f0000, 0x000f0000,
1387 0x220, 0xffffffff, 0xC060000C,
1388 0x224, 0xc0000fff, 0x00000100,
1389 0x20a8, 0xffffffff, 0x00000104,
1390 0x55e4, 0xff000fff, 0x00000100,
1391 0x30cc, 0xc0000fff, 0x00000104,
1392 0xc1e4, 0x00000001, 0x00000001,
1393 0xd00c, 0xff000ff0, 0x00000100,
1394 0xd80c, 0xff000ff0, 0x00000100
1395 };
1396
1397 static const u32 hawaii_golden_spm_registers[] =
1398 {
1399 0x30800, 0xe0ffffff, 0xe0000000
1400 };
1401
1402 static const u32 hawaii_golden_common_registers[] =
1403 {
1404 0x30800, 0xffffffff, 0xe0000000,
1405 0x28350, 0xffffffff, 0x3a00161a,
1406 0x28354, 0xffffffff, 0x0000002e,
1407 0x9a10, 0xffffffff, 0x00018208,
1408 0x98f8, 0xffffffff, 0x12011003
1409 };
1410
1411 static const u32 hawaii_golden_registers[] =
1412 {
1413 0x3354, 0x00000333, 0x00000333,
1414 0x9a10, 0x00010000, 0x00058208,
1415 0x9830, 0xffffffff, 0x00000000,
1416 0x9834, 0xf00fffff, 0x00000400,
1417 0x9838, 0x0002021c, 0x00020200,
1418 0xc78, 0x00000080, 0x00000000,
1419 0x5bb0, 0x000000f0, 0x00000070,
1420 0x5bc0, 0xf0311fff, 0x80300000,
1421 0x350c, 0x00810000, 0x408af000,
1422 0x7030, 0x31000111, 0x00000011,
1423 0x2f48, 0x73773777, 0x12010001,
1424 0x2120, 0x0000007f, 0x0000001b,
1425 0x21dc, 0x00007fb6, 0x00002191,
1426 0x3628, 0x0000003f, 0x0000000a,
1427 0x362c, 0x0000003f, 0x0000000a,
1428 0x2ae4, 0x00073ffe, 0x000022a2,
1429 0x240c, 0x000007ff, 0x00000000,
1430 0x8bf0, 0x00002001, 0x00000001,
1431 0x8b24, 0xffffffff, 0x00ffffff,
1432 0x30a04, 0x0000ff0f, 0x00000000,
1433 0x28a4c, 0x07ffffff, 0x06000000,
1434 0x3e78, 0x00000001, 0x00000002,
1435 0xc768, 0x00000008, 0x00000008,
1436 0xc770, 0x00000f00, 0x00000800,
1437 0xc774, 0x00000f00, 0x00000800,
1438 0xc798, 0x00ffffff, 0x00ff7fbf,
1439 0xc79c, 0x00ffffff, 0x00ff7faf,
1440 0x8c00, 0x000000ff, 0x00000800,
1441 0xe40, 0x00001fff, 0x00001fff,
1442 0x9060, 0x0000007f, 0x00000020,
1443 0x9508, 0x00010000, 0x00010000,
1444 0xae00, 0x00100000, 0x000ff07c,
1445 0xac14, 0x000003ff, 0x0000000f,
1446 0xac10, 0xffffffff, 0x7564fdec,
1447 0xac0c, 0xffffffff, 0x3120b9a8,
1448 0xac08, 0x20000000, 0x0f9c0000
1449 };
1450
1451 static const u32 hawaii_mgcg_cgcg_init[] =
1452 {
1453 0xc420, 0xffffffff, 0xfffffffd,
1454 0x30800, 0xffffffff, 0xe0000000,
1455 0x3c2a0, 0xffffffff, 0x00000100,
1456 0x3c208, 0xffffffff, 0x00000100,
1457 0x3c2c0, 0xffffffff, 0x00000100,
1458 0x3c2c8, 0xffffffff, 0x00000100,
1459 0x3c2c4, 0xffffffff, 0x00000100,
1460 0x55e4, 0xffffffff, 0x00200100,
1461 0x3c280, 0xffffffff, 0x00000100,
1462 0x3c214, 0xffffffff, 0x06000100,
1463 0x3c220, 0xffffffff, 0x00000100,
1464 0x3c218, 0xffffffff, 0x06000100,
1465 0x3c204, 0xffffffff, 0x00000100,
1466 0x3c2e0, 0xffffffff, 0x00000100,
1467 0x3c224, 0xffffffff, 0x00000100,
1468 0x3c200, 0xffffffff, 0x00000100,
1469 0x3c230, 0xffffffff, 0x00000100,
1470 0x3c234, 0xffffffff, 0x00000100,
1471 0x3c250, 0xffffffff, 0x00000100,
1472 0x3c254, 0xffffffff, 0x00000100,
1473 0x3c258, 0xffffffff, 0x00000100,
1474 0x3c25c, 0xffffffff, 0x00000100,
1475 0x3c260, 0xffffffff, 0x00000100,
1476 0x3c27c, 0xffffffff, 0x00000100,
1477 0x3c278, 0xffffffff, 0x00000100,
1478 0x3c210, 0xffffffff, 0x06000100,
1479 0x3c290, 0xffffffff, 0x00000100,
1480 0x3c274, 0xffffffff, 0x00000100,
1481 0x3c2b4, 0xffffffff, 0x00000100,
1482 0x3c2b0, 0xffffffff, 0x00000100,
1483 0x3c270, 0xffffffff, 0x00000100,
1484 0x30800, 0xffffffff, 0xe0000000,
1485 0x3c020, 0xffffffff, 0x00010000,
1486 0x3c024, 0xffffffff, 0x00030002,
1487 0x3c028, 0xffffffff, 0x00040007,
1488 0x3c02c, 0xffffffff, 0x00060005,
1489 0x3c030, 0xffffffff, 0x00090008,
1490 0x3c034, 0xffffffff, 0x00010000,
1491 0x3c038, 0xffffffff, 0x00030002,
1492 0x3c03c, 0xffffffff, 0x00040007,
1493 0x3c040, 0xffffffff, 0x00060005,
1494 0x3c044, 0xffffffff, 0x00090008,
1495 0x3c048, 0xffffffff, 0x00010000,
1496 0x3c04c, 0xffffffff, 0x00030002,
1497 0x3c050, 0xffffffff, 0x00040007,
1498 0x3c054, 0xffffffff, 0x00060005,
1499 0x3c058, 0xffffffff, 0x00090008,
1500 0x3c05c, 0xffffffff, 0x00010000,
1501 0x3c060, 0xffffffff, 0x00030002,
1502 0x3c064, 0xffffffff, 0x00040007,
1503 0x3c068, 0xffffffff, 0x00060005,
1504 0x3c06c, 0xffffffff, 0x00090008,
1505 0x3c070, 0xffffffff, 0x00010000,
1506 0x3c074, 0xffffffff, 0x00030002,
1507 0x3c078, 0xffffffff, 0x00040007,
1508 0x3c07c, 0xffffffff, 0x00060005,
1509 0x3c080, 0xffffffff, 0x00090008,
1510 0x3c084, 0xffffffff, 0x00010000,
1511 0x3c088, 0xffffffff, 0x00030002,
1512 0x3c08c, 0xffffffff, 0x00040007,
1513 0x3c090, 0xffffffff, 0x00060005,
1514 0x3c094, 0xffffffff, 0x00090008,
1515 0x3c098, 0xffffffff, 0x00010000,
1516 0x3c09c, 0xffffffff, 0x00030002,
1517 0x3c0a0, 0xffffffff, 0x00040007,
1518 0x3c0a4, 0xffffffff, 0x00060005,
1519 0x3c0a8, 0xffffffff, 0x00090008,
1520 0x3c0ac, 0xffffffff, 0x00010000,
1521 0x3c0b0, 0xffffffff, 0x00030002,
1522 0x3c0b4, 0xffffffff, 0x00040007,
1523 0x3c0b8, 0xffffffff, 0x00060005,
1524 0x3c0bc, 0xffffffff, 0x00090008,
1525 0x3c0c0, 0xffffffff, 0x00010000,
1526 0x3c0c4, 0xffffffff, 0x00030002,
1527 0x3c0c8, 0xffffffff, 0x00040007,
1528 0x3c0cc, 0xffffffff, 0x00060005,
1529 0x3c0d0, 0xffffffff, 0x00090008,
1530 0x3c0d4, 0xffffffff, 0x00010000,
1531 0x3c0d8, 0xffffffff, 0x00030002,
1532 0x3c0dc, 0xffffffff, 0x00040007,
1533 0x3c0e0, 0xffffffff, 0x00060005,
1534 0x3c0e4, 0xffffffff, 0x00090008,
1535 0x3c0e8, 0xffffffff, 0x00010000,
1536 0x3c0ec, 0xffffffff, 0x00030002,
1537 0x3c0f0, 0xffffffff, 0x00040007,
1538 0x3c0f4, 0xffffffff, 0x00060005,
1539 0x3c0f8, 0xffffffff, 0x00090008,
1540 0xc318, 0xffffffff, 0x00020200,
1541 0x3350, 0xffffffff, 0x00000200,
1542 0x15c0, 0xffffffff, 0x00000400,
1543 0x55e8, 0xffffffff, 0x00000000,
1544 0x2f50, 0xffffffff, 0x00000902,
1545 0x3c000, 0xffffffff, 0x96940200,
1546 0x8708, 0xffffffff, 0x00900100,
1547 0xc424, 0xffffffff, 0x0020003f,
1548 0x38, 0xffffffff, 0x0140001c,
1549 0x3c, 0x000f0000, 0x000f0000,
1550 0x220, 0xffffffff, 0xc060000c,
1551 0x224, 0xc0000fff, 0x00000100,
1552 0xf90, 0xffffffff, 0x00000100,
1553 0xf98, 0x00000101, 0x00000000,
1554 0x20a8, 0xffffffff, 0x00000104,
1555 0x55e4, 0xff000fff, 0x00000100,
1556 0x30cc, 0xc0000fff, 0x00000104,
1557 0xc1e4, 0x00000001, 0x00000001,
1558 0xd00c, 0xff000ff0, 0x00000100,
1559 0xd80c, 0xff000ff0, 0x00000100
1560 };
1561
1562 static const u32 godavari_golden_registers[] =
1563 {
1564 0x55e4, 0xff607fff, 0xfc000100,
1565 0x6ed8, 0x00010101, 0x00010000,
1566 0x9830, 0xffffffff, 0x00000000,
1567 0x98302, 0xf00fffff, 0x00000400,
1568 0x6130, 0xffffffff, 0x00010000,
1569 0x5bb0, 0x000000f0, 0x00000070,
1570 0x5bc0, 0xf0311fff, 0x80300000,
1571 0x98f8, 0x73773777, 0x12010001,
1572 0x98fc, 0xffffffff, 0x00000010,
1573 0x8030, 0x00001f0f, 0x0000100a,
1574 0x2f48, 0x73773777, 0x12010001,
1575 0x2408, 0x000fffff, 0x000c007f,
1576 0x8a14, 0xf000003f, 0x00000007,
1577 0x8b24, 0xffffffff, 0x00ff0fff,
1578 0x30a04, 0x0000ff0f, 0x00000000,
1579 0x28a4c, 0x07ffffff, 0x06000000,
1580 0x4d8, 0x00000fff, 0x00000100,
1581 0xd014, 0x00010000, 0x00810001,
1582 0xd814, 0x00010000, 0x00810001,
1583 0x3e78, 0x00000001, 0x00000002,
1584 0xc768, 0x00000008, 0x00000008,
1585 0xc770, 0x00000f00, 0x00000800,
1586 0xc774, 0x00000f00, 0x00000800,
1587 0xc798, 0x00ffffff, 0x00ff7fbf,
1588 0xc79c, 0x00ffffff, 0x00ff7faf,
1589 0x8c00, 0x000000ff, 0x00000001,
1590 0x214f8, 0x01ff01ff, 0x00000002,
1591 0x21498, 0x007ff800, 0x00200000,
1592 0x2015c, 0xffffffff, 0x00000f40,
1593 0x88c4, 0x001f3ae3, 0x00000082,
1594 0x88d4, 0x0000001f, 0x00000010,
1595 0x30934, 0xffffffff, 0x00000000
1596 };
1597
1598
cik_init_golden_registers(struct radeon_device * rdev)1599 static void cik_init_golden_registers(struct radeon_device *rdev)
1600 {
1601 /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1602 mutex_lock(&rdev->grbm_idx_mutex);
1603 switch (rdev->family) {
1604 case CHIP_BONAIRE:
1605 radeon_program_register_sequence(rdev,
1606 bonaire_mgcg_cgcg_init,
1607 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1608 radeon_program_register_sequence(rdev,
1609 bonaire_golden_registers,
1610 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1611 radeon_program_register_sequence(rdev,
1612 bonaire_golden_common_registers,
1613 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1614 radeon_program_register_sequence(rdev,
1615 bonaire_golden_spm_registers,
1616 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1617 break;
1618 case CHIP_KABINI:
1619 radeon_program_register_sequence(rdev,
1620 kalindi_mgcg_cgcg_init,
1621 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1622 radeon_program_register_sequence(rdev,
1623 kalindi_golden_registers,
1624 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1625 radeon_program_register_sequence(rdev,
1626 kalindi_golden_common_registers,
1627 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1628 radeon_program_register_sequence(rdev,
1629 kalindi_golden_spm_registers,
1630 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1631 break;
1632 case CHIP_MULLINS:
1633 radeon_program_register_sequence(rdev,
1634 kalindi_mgcg_cgcg_init,
1635 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1636 radeon_program_register_sequence(rdev,
1637 godavari_golden_registers,
1638 (const u32)ARRAY_SIZE(godavari_golden_registers));
1639 radeon_program_register_sequence(rdev,
1640 kalindi_golden_common_registers,
1641 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1642 radeon_program_register_sequence(rdev,
1643 kalindi_golden_spm_registers,
1644 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1645 break;
1646 case CHIP_KAVERI:
1647 radeon_program_register_sequence(rdev,
1648 spectre_mgcg_cgcg_init,
1649 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1650 radeon_program_register_sequence(rdev,
1651 spectre_golden_registers,
1652 (const u32)ARRAY_SIZE(spectre_golden_registers));
1653 radeon_program_register_sequence(rdev,
1654 spectre_golden_common_registers,
1655 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1656 radeon_program_register_sequence(rdev,
1657 spectre_golden_spm_registers,
1658 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1659 break;
1660 case CHIP_HAWAII:
1661 radeon_program_register_sequence(rdev,
1662 hawaii_mgcg_cgcg_init,
1663 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1664 radeon_program_register_sequence(rdev,
1665 hawaii_golden_registers,
1666 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1667 radeon_program_register_sequence(rdev,
1668 hawaii_golden_common_registers,
1669 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1670 radeon_program_register_sequence(rdev,
1671 hawaii_golden_spm_registers,
1672 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1673 break;
1674 default:
1675 break;
1676 }
1677 mutex_unlock(&rdev->grbm_idx_mutex);
1678 }
1679
1680 /**
1681 * cik_get_xclk - get the xclk
1682 *
1683 * @rdev: radeon_device pointer
1684 *
1685 * Returns the reference clock used by the gfx engine
1686 * (CIK).
1687 */
cik_get_xclk(struct radeon_device * rdev)1688 u32 cik_get_xclk(struct radeon_device *rdev)
1689 {
1690 u32 reference_clock = rdev->clock.spll.reference_freq;
1691
1692 if (rdev->flags & RADEON_IS_IGP) {
1693 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1694 return reference_clock / 2;
1695 } else {
1696 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1697 return reference_clock / 4;
1698 }
1699 return reference_clock;
1700 }
1701
1702 /**
1703 * cik_mm_rdoorbell - read a doorbell dword
1704 *
1705 * @rdev: radeon_device pointer
1706 * @index: doorbell index
1707 *
1708 * Returns the value in the doorbell aperture at the
1709 * requested doorbell index (CIK).
1710 */
cik_mm_rdoorbell(struct radeon_device * rdev,u32 index)1711 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1712 {
1713 if (index < rdev->doorbell.num_doorbells) {
1714 return readl(rdev->doorbell.ptr + index);
1715 } else {
1716 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1717 return 0;
1718 }
1719 }
1720
1721 /**
1722 * cik_mm_wdoorbell - write a doorbell dword
1723 *
1724 * @rdev: radeon_device pointer
1725 * @index: doorbell index
1726 * @v: value to write
1727 *
1728 * Writes @v to the doorbell aperture at the
1729 * requested doorbell index (CIK).
1730 */
cik_mm_wdoorbell(struct radeon_device * rdev,u32 index,u32 v)1731 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1732 {
1733 if (index < rdev->doorbell.num_doorbells) {
1734 writel(v, rdev->doorbell.ptr + index);
1735 } else {
1736 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1737 }
1738 }
1739
1740 #define BONAIRE_IO_MC_REGS_SIZE 36
1741
1742 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1743 {
1744 {0x00000070, 0x04400000},
1745 {0x00000071, 0x80c01803},
1746 {0x00000072, 0x00004004},
1747 {0x00000073, 0x00000100},
1748 {0x00000074, 0x00ff0000},
1749 {0x00000075, 0x34000000},
1750 {0x00000076, 0x08000014},
1751 {0x00000077, 0x00cc08ec},
1752 {0x00000078, 0x00000400},
1753 {0x00000079, 0x00000000},
1754 {0x0000007a, 0x04090000},
1755 {0x0000007c, 0x00000000},
1756 {0x0000007e, 0x4408a8e8},
1757 {0x0000007f, 0x00000304},
1758 {0x00000080, 0x00000000},
1759 {0x00000082, 0x00000001},
1760 {0x00000083, 0x00000002},
1761 {0x00000084, 0xf3e4f400},
1762 {0x00000085, 0x052024e3},
1763 {0x00000087, 0x00000000},
1764 {0x00000088, 0x01000000},
1765 {0x0000008a, 0x1c0a0000},
1766 {0x0000008b, 0xff010000},
1767 {0x0000008d, 0xffffefff},
1768 {0x0000008e, 0xfff3efff},
1769 {0x0000008f, 0xfff3efbf},
1770 {0x00000092, 0xf7ffffff},
1771 {0x00000093, 0xffffff7f},
1772 {0x00000095, 0x00101101},
1773 {0x00000096, 0x00000fff},
1774 {0x00000097, 0x00116fff},
1775 {0x00000098, 0x60010000},
1776 {0x00000099, 0x10010000},
1777 {0x0000009a, 0x00006000},
1778 {0x0000009b, 0x00001000},
1779 {0x0000009f, 0x00b48000}
1780 };
1781
1782 #define HAWAII_IO_MC_REGS_SIZE 22
1783
1784 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1785 {
1786 {0x0000007d, 0x40000000},
1787 {0x0000007e, 0x40180304},
1788 {0x0000007f, 0x0000ff00},
1789 {0x00000081, 0x00000000},
1790 {0x00000083, 0x00000800},
1791 {0x00000086, 0x00000000},
1792 {0x00000087, 0x00000100},
1793 {0x00000088, 0x00020100},
1794 {0x00000089, 0x00000000},
1795 {0x0000008b, 0x00040000},
1796 {0x0000008c, 0x00000100},
1797 {0x0000008e, 0xff010000},
1798 {0x00000090, 0xffffefff},
1799 {0x00000091, 0xfff3efff},
1800 {0x00000092, 0xfff3efbf},
1801 {0x00000093, 0xf7ffffff},
1802 {0x00000094, 0xffffff7f},
1803 {0x00000095, 0x00000fff},
1804 {0x00000096, 0x00116fff},
1805 {0x00000097, 0x60010000},
1806 {0x00000098, 0x10010000},
1807 {0x0000009f, 0x00c79000}
1808 };
1809
1810
1811 /**
1812 * cik_srbm_select - select specific register instances
1813 *
1814 * @rdev: radeon_device pointer
1815 * @me: selected ME (micro engine)
1816 * @pipe: pipe
1817 * @queue: queue
1818 * @vmid: VMID
1819 *
1820 * Switches the currently active registers instances. Some
1821 * registers are instanced per VMID, others are instanced per
1822 * me/pipe/queue combination.
1823 */
cik_srbm_select(struct radeon_device * rdev,u32 me,u32 pipe,u32 queue,u32 vmid)1824 static void cik_srbm_select(struct radeon_device *rdev,
1825 u32 me, u32 pipe, u32 queue, u32 vmid)
1826 {
1827 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1828 MEID(me & 0x3) |
1829 VMID(vmid & 0xf) |
1830 QUEUEID(queue & 0x7));
1831 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1832 }
1833
1834 /* ucode loading */
1835 /**
1836 * ci_mc_load_microcode - load MC ucode into the hw
1837 *
1838 * @rdev: radeon_device pointer
1839 *
1840 * Load the GDDR MC ucode into the hw (CIK).
1841 * Returns 0 on success, error on failure.
1842 */
ci_mc_load_microcode(struct radeon_device * rdev)1843 int ci_mc_load_microcode(struct radeon_device *rdev)
1844 {
1845 const __be32 *fw_data = NULL;
1846 const __le32 *new_fw_data = NULL;
1847 u32 running, blackout = 0, tmp;
1848 u32 *io_mc_regs = NULL;
1849 const __le32 *new_io_mc_regs = NULL;
1850 int i, regs_size, ucode_size;
1851
1852 if (!rdev->mc_fw)
1853 return -EINVAL;
1854
1855 if (rdev->new_fw) {
1856 const struct mc_firmware_header_v1_0 *hdr =
1857 (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1858
1859 radeon_ucode_print_mc_hdr(&hdr->header);
1860
1861 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1862 new_io_mc_regs = (const __le32 *)
1863 (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1864 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1865 new_fw_data = (const __le32 *)
1866 (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1867 } else {
1868 ucode_size = rdev->mc_fw->size / 4;
1869
1870 switch (rdev->family) {
1871 case CHIP_BONAIRE:
1872 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1873 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1874 break;
1875 case CHIP_HAWAII:
1876 io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1877 regs_size = HAWAII_IO_MC_REGS_SIZE;
1878 break;
1879 default:
1880 return -EINVAL;
1881 }
1882 fw_data = (const __be32 *)rdev->mc_fw->data;
1883 }
1884
1885 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1886
1887 if (running == 0) {
1888 if (running) {
1889 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1890 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1891 }
1892
1893 /* reset the engine and set to writable */
1894 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1895 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1896
1897 /* load mc io regs */
1898 for (i = 0; i < regs_size; i++) {
1899 if (rdev->new_fw) {
1900 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1901 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1902 } else {
1903 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1904 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1905 }
1906 }
1907
1908 tmp = RREG32(MC_SEQ_MISC0);
1909 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1910 WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1911 WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1912 WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1913 WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1914 }
1915
1916 /* load the MC ucode */
1917 for (i = 0; i < ucode_size; i++) {
1918 if (rdev->new_fw)
1919 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1920 else
1921 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1922 }
1923
1924 /* put the engine back into the active state */
1925 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1926 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1927 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1928
1929 /* wait for training to complete */
1930 for (i = 0; i < rdev->usec_timeout; i++) {
1931 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1932 break;
1933 udelay(1);
1934 }
1935 for (i = 0; i < rdev->usec_timeout; i++) {
1936 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1937 break;
1938 udelay(1);
1939 }
1940
1941 if (running)
1942 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1943 }
1944
1945 return 0;
1946 }
1947
1948 /**
1949 * cik_init_microcode - load ucode images from disk
1950 *
1951 * @rdev: radeon_device pointer
1952 *
1953 * Use the firmware interface to load the ucode images into
1954 * the driver (not loaded into hw).
1955 * Returns 0 on success, error on failure.
1956 */
cik_init_microcode(struct radeon_device * rdev)1957 static int cik_init_microcode(struct radeon_device *rdev)
1958 {
1959 const char *chip_name;
1960 const char *new_chip_name;
1961 size_t pfp_req_size, me_req_size, ce_req_size,
1962 mec_req_size, rlc_req_size, mc_req_size = 0,
1963 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1964 char fw_name[30];
1965 int new_fw = 0;
1966 int err;
1967 int num_fw;
1968
1969 DRM_DEBUG("\n");
1970
1971 switch (rdev->family) {
1972 case CHIP_BONAIRE:
1973 chip_name = "BONAIRE";
1974 new_chip_name = "bonaire";
1975 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1976 me_req_size = CIK_ME_UCODE_SIZE * 4;
1977 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1978 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1979 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1980 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1981 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1982 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1983 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1984 num_fw = 8;
1985 break;
1986 case CHIP_HAWAII:
1987 chip_name = "HAWAII";
1988 new_chip_name = "hawaii";
1989 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1990 me_req_size = CIK_ME_UCODE_SIZE * 4;
1991 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1992 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1993 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1994 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1995 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1996 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1997 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1998 num_fw = 8;
1999 break;
2000 case CHIP_KAVERI:
2001 chip_name = "KAVERI";
2002 new_chip_name = "kaveri";
2003 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2004 me_req_size = CIK_ME_UCODE_SIZE * 4;
2005 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2006 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2007 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2008 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2009 num_fw = 7;
2010 break;
2011 case CHIP_KABINI:
2012 chip_name = "KABINI";
2013 new_chip_name = "kabini";
2014 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2015 me_req_size = CIK_ME_UCODE_SIZE * 4;
2016 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2017 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2018 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2019 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2020 num_fw = 6;
2021 break;
2022 case CHIP_MULLINS:
2023 chip_name = "MULLINS";
2024 new_chip_name = "mullins";
2025 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2026 me_req_size = CIK_ME_UCODE_SIZE * 4;
2027 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2028 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2029 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2030 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2031 num_fw = 6;
2032 break;
2033 default: BUG();
2034 }
2035
2036 DRM_INFO("Loading %s Microcode\n", new_chip_name);
2037
2038 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2039 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2040 if (err) {
2041 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2042 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2043 if (err)
2044 goto out;
2045 if (rdev->pfp_fw->size != pfp_req_size) {
2046 printk(KERN_ERR
2047 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2048 rdev->pfp_fw->size, fw_name);
2049 err = -EINVAL;
2050 goto out;
2051 }
2052 } else {
2053 err = radeon_ucode_validate(rdev->pfp_fw);
2054 if (err) {
2055 printk(KERN_ERR
2056 "cik_fw: validation failed for firmware \"%s\"\n",
2057 fw_name);
2058 goto out;
2059 } else {
2060 new_fw++;
2061 }
2062 }
2063
2064 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2065 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2066 if (err) {
2067 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2068 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2069 if (err)
2070 goto out;
2071 if (rdev->me_fw->size != me_req_size) {
2072 printk(KERN_ERR
2073 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2074 rdev->me_fw->size, fw_name);
2075 err = -EINVAL;
2076 }
2077 } else {
2078 err = radeon_ucode_validate(rdev->me_fw);
2079 if (err) {
2080 printk(KERN_ERR
2081 "cik_fw: validation failed for firmware \"%s\"\n",
2082 fw_name);
2083 goto out;
2084 } else {
2085 new_fw++;
2086 }
2087 }
2088
2089 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2090 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2091 if (err) {
2092 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2093 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2094 if (err)
2095 goto out;
2096 if (rdev->ce_fw->size != ce_req_size) {
2097 printk(KERN_ERR
2098 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2099 rdev->ce_fw->size, fw_name);
2100 err = -EINVAL;
2101 }
2102 } else {
2103 err = radeon_ucode_validate(rdev->ce_fw);
2104 if (err) {
2105 printk(KERN_ERR
2106 "cik_fw: validation failed for firmware \"%s\"\n",
2107 fw_name);
2108 goto out;
2109 } else {
2110 new_fw++;
2111 }
2112 }
2113
2114 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2115 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2116 if (err) {
2117 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2118 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2119 if (err)
2120 goto out;
2121 if (rdev->mec_fw->size != mec_req_size) {
2122 printk(KERN_ERR
2123 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2124 rdev->mec_fw->size, fw_name);
2125 err = -EINVAL;
2126 }
2127 } else {
2128 err = radeon_ucode_validate(rdev->mec_fw);
2129 if (err) {
2130 printk(KERN_ERR
2131 "cik_fw: validation failed for firmware \"%s\"\n",
2132 fw_name);
2133 goto out;
2134 } else {
2135 new_fw++;
2136 }
2137 }
2138
2139 if (rdev->family == CHIP_KAVERI) {
2140 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2141 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2142 if (err) {
2143 goto out;
2144 } else {
2145 err = radeon_ucode_validate(rdev->mec2_fw);
2146 if (err) {
2147 goto out;
2148 } else {
2149 new_fw++;
2150 }
2151 }
2152 }
2153
2154 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2155 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2156 if (err) {
2157 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2158 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2159 if (err)
2160 goto out;
2161 if (rdev->rlc_fw->size != rlc_req_size) {
2162 printk(KERN_ERR
2163 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2164 rdev->rlc_fw->size, fw_name);
2165 err = -EINVAL;
2166 }
2167 } else {
2168 err = radeon_ucode_validate(rdev->rlc_fw);
2169 if (err) {
2170 printk(KERN_ERR
2171 "cik_fw: validation failed for firmware \"%s\"\n",
2172 fw_name);
2173 goto out;
2174 } else {
2175 new_fw++;
2176 }
2177 }
2178
2179 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2180 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2181 if (err) {
2182 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2183 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2184 if (err)
2185 goto out;
2186 if (rdev->sdma_fw->size != sdma_req_size) {
2187 printk(KERN_ERR
2188 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2189 rdev->sdma_fw->size, fw_name);
2190 err = -EINVAL;
2191 }
2192 } else {
2193 err = radeon_ucode_validate(rdev->sdma_fw);
2194 if (err) {
2195 printk(KERN_ERR
2196 "cik_fw: validation failed for firmware \"%s\"\n",
2197 fw_name);
2198 goto out;
2199 } else {
2200 new_fw++;
2201 }
2202 }
2203
2204 /* No SMC, MC ucode on APUs */
2205 if (!(rdev->flags & RADEON_IS_IGP)) {
2206 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2207 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2208 if (err) {
2209 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2210 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2211 if (err) {
2212 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2213 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2214 if (err)
2215 goto out;
2216 }
2217 if ((rdev->mc_fw->size != mc_req_size) &&
2218 (rdev->mc_fw->size != mc2_req_size)){
2219 printk(KERN_ERR
2220 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2221 rdev->mc_fw->size, fw_name);
2222 err = -EINVAL;
2223 }
2224 DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2225 } else {
2226 err = radeon_ucode_validate(rdev->mc_fw);
2227 if (err) {
2228 printk(KERN_ERR
2229 "cik_fw: validation failed for firmware \"%s\"\n",
2230 fw_name);
2231 goto out;
2232 } else {
2233 new_fw++;
2234 }
2235 }
2236
2237 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2238 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2239 if (err) {
2240 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2241 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2242 if (err) {
2243 printk(KERN_ERR
2244 "smc: error loading firmware \"%s\"\n",
2245 fw_name);
2246 release_firmware(rdev->smc_fw);
2247 rdev->smc_fw = NULL;
2248 err = 0;
2249 } else if (rdev->smc_fw->size != smc_req_size) {
2250 printk(KERN_ERR
2251 "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2252 rdev->smc_fw->size, fw_name);
2253 err = -EINVAL;
2254 }
2255 } else {
2256 err = radeon_ucode_validate(rdev->smc_fw);
2257 if (err) {
2258 printk(KERN_ERR
2259 "cik_fw: validation failed for firmware \"%s\"\n",
2260 fw_name);
2261 goto out;
2262 } else {
2263 new_fw++;
2264 }
2265 }
2266 }
2267
2268 if (new_fw == 0) {
2269 rdev->new_fw = false;
2270 } else if (new_fw < num_fw) {
2271 printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2272 err = -EINVAL;
2273 } else {
2274 rdev->new_fw = true;
2275 }
2276
2277 out:
2278 if (err) {
2279 if (err != -EINVAL)
2280 printk(KERN_ERR
2281 "cik_cp: Failed to load firmware \"%s\"\n",
2282 fw_name);
2283 release_firmware(rdev->pfp_fw);
2284 rdev->pfp_fw = NULL;
2285 release_firmware(rdev->me_fw);
2286 rdev->me_fw = NULL;
2287 release_firmware(rdev->ce_fw);
2288 rdev->ce_fw = NULL;
2289 release_firmware(rdev->mec_fw);
2290 rdev->mec_fw = NULL;
2291 release_firmware(rdev->mec2_fw);
2292 rdev->mec2_fw = NULL;
2293 release_firmware(rdev->rlc_fw);
2294 rdev->rlc_fw = NULL;
2295 release_firmware(rdev->sdma_fw);
2296 rdev->sdma_fw = NULL;
2297 release_firmware(rdev->mc_fw);
2298 rdev->mc_fw = NULL;
2299 release_firmware(rdev->smc_fw);
2300 rdev->smc_fw = NULL;
2301 }
2302 return err;
2303 }
2304
2305 /*
2306 * Core functions
2307 */
2308 /**
2309 * cik_tiling_mode_table_init - init the hw tiling table
2310 *
2311 * @rdev: radeon_device pointer
2312 *
2313 * Starting with SI, the tiling setup is done globally in a
2314 * set of 32 tiling modes. Rather than selecting each set of
2315 * parameters per surface as on older asics, we just select
2316 * which index in the tiling table we want to use, and the
2317 * surface uses those parameters (CIK).
2318 */
cik_tiling_mode_table_init(struct radeon_device * rdev)2319 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2320 {
2321 const u32 num_tile_mode_states = 32;
2322 const u32 num_secondary_tile_mode_states = 16;
2323 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2324 u32 num_pipe_configs;
2325 u32 num_rbs = rdev->config.cik.max_backends_per_se *
2326 rdev->config.cik.max_shader_engines;
2327
2328 switch (rdev->config.cik.mem_row_size_in_kb) {
2329 case 1:
2330 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2331 break;
2332 case 2:
2333 default:
2334 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2335 break;
2336 case 4:
2337 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2338 break;
2339 }
2340
2341 num_pipe_configs = rdev->config.cik.max_tile_pipes;
2342 if (num_pipe_configs > 8)
2343 num_pipe_configs = 16;
2344
2345 if (num_pipe_configs == 16) {
2346 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2347 switch (reg_offset) {
2348 case 0:
2349 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2350 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2351 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2352 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2353 break;
2354 case 1:
2355 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2356 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2357 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2359 break;
2360 case 2:
2361 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2363 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2364 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2365 break;
2366 case 3:
2367 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2368 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2369 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2371 break;
2372 case 4:
2373 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2375 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376 TILE_SPLIT(split_equal_to_row_size));
2377 break;
2378 case 5:
2379 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2380 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2382 break;
2383 case 6:
2384 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2385 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2386 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2388 break;
2389 case 7:
2390 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2391 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2392 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2393 TILE_SPLIT(split_equal_to_row_size));
2394 break;
2395 case 8:
2396 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2397 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2398 break;
2399 case 9:
2400 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2401 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2403 break;
2404 case 10:
2405 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2406 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2407 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2408 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2409 break;
2410 case 11:
2411 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2412 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2413 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2414 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2415 break;
2416 case 12:
2417 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2418 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2419 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421 break;
2422 case 13:
2423 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2424 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2425 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2426 break;
2427 case 14:
2428 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2429 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2430 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2432 break;
2433 case 16:
2434 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2435 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2436 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2437 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438 break;
2439 case 17:
2440 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2441 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2442 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2443 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444 break;
2445 case 27:
2446 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2447 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2448 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2449 break;
2450 case 28:
2451 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2452 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2453 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2454 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2455 break;
2456 case 29:
2457 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2458 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2459 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2460 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2461 break;
2462 case 30:
2463 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2464 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2465 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2466 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2467 break;
2468 default:
2469 gb_tile_moden = 0;
2470 break;
2471 }
2472 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2473 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2474 }
2475 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2476 switch (reg_offset) {
2477 case 0:
2478 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2480 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2481 NUM_BANKS(ADDR_SURF_16_BANK));
2482 break;
2483 case 1:
2484 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2486 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2487 NUM_BANKS(ADDR_SURF_16_BANK));
2488 break;
2489 case 2:
2490 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2492 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2493 NUM_BANKS(ADDR_SURF_16_BANK));
2494 break;
2495 case 3:
2496 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2499 NUM_BANKS(ADDR_SURF_16_BANK));
2500 break;
2501 case 4:
2502 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2503 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2504 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2505 NUM_BANKS(ADDR_SURF_8_BANK));
2506 break;
2507 case 5:
2508 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2510 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2511 NUM_BANKS(ADDR_SURF_4_BANK));
2512 break;
2513 case 6:
2514 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2515 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2516 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2517 NUM_BANKS(ADDR_SURF_2_BANK));
2518 break;
2519 case 8:
2520 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2523 NUM_BANKS(ADDR_SURF_16_BANK));
2524 break;
2525 case 9:
2526 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2528 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2529 NUM_BANKS(ADDR_SURF_16_BANK));
2530 break;
2531 case 10:
2532 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2534 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2535 NUM_BANKS(ADDR_SURF_16_BANK));
2536 break;
2537 case 11:
2538 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2540 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2541 NUM_BANKS(ADDR_SURF_8_BANK));
2542 break;
2543 case 12:
2544 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2545 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2546 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2547 NUM_BANKS(ADDR_SURF_4_BANK));
2548 break;
2549 case 13:
2550 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2552 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2553 NUM_BANKS(ADDR_SURF_2_BANK));
2554 break;
2555 case 14:
2556 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2558 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2559 NUM_BANKS(ADDR_SURF_2_BANK));
2560 break;
2561 default:
2562 gb_tile_moden = 0;
2563 break;
2564 }
2565 rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2566 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2567 }
2568 } else if (num_pipe_configs == 8) {
2569 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2570 switch (reg_offset) {
2571 case 0:
2572 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2573 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2574 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2576 break;
2577 case 1:
2578 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2579 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2580 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2582 break;
2583 case 2:
2584 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2585 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2586 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2588 break;
2589 case 3:
2590 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2591 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2592 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2594 break;
2595 case 4:
2596 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2597 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2598 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2599 TILE_SPLIT(split_equal_to_row_size));
2600 break;
2601 case 5:
2602 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2603 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2604 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2605 break;
2606 case 6:
2607 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2608 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2609 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2610 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2611 break;
2612 case 7:
2613 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2614 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2615 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2616 TILE_SPLIT(split_equal_to_row_size));
2617 break;
2618 case 8:
2619 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2620 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2621 break;
2622 case 9:
2623 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2624 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2625 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2626 break;
2627 case 10:
2628 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2629 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2630 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2631 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2632 break;
2633 case 11:
2634 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2635 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2636 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2637 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2638 break;
2639 case 12:
2640 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2641 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2642 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2643 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2644 break;
2645 case 13:
2646 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2647 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2648 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2649 break;
2650 case 14:
2651 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2652 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2653 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2654 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2655 break;
2656 case 16:
2657 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2658 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2659 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2660 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2661 break;
2662 case 17:
2663 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2664 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2665 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2666 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2667 break;
2668 case 27:
2669 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2670 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2671 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2672 break;
2673 case 28:
2674 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2675 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2676 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2677 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2678 break;
2679 case 29:
2680 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2681 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2682 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2683 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2684 break;
2685 case 30:
2686 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2687 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2688 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2689 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2690 break;
2691 default:
2692 gb_tile_moden = 0;
2693 break;
2694 }
2695 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2696 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2697 }
2698 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2699 switch (reg_offset) {
2700 case 0:
2701 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2702 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2703 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2704 NUM_BANKS(ADDR_SURF_16_BANK));
2705 break;
2706 case 1:
2707 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2708 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2709 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2710 NUM_BANKS(ADDR_SURF_16_BANK));
2711 break;
2712 case 2:
2713 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2714 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2715 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2716 NUM_BANKS(ADDR_SURF_16_BANK));
2717 break;
2718 case 3:
2719 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2720 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2721 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2722 NUM_BANKS(ADDR_SURF_16_BANK));
2723 break;
2724 case 4:
2725 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2726 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2727 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2728 NUM_BANKS(ADDR_SURF_8_BANK));
2729 break;
2730 case 5:
2731 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2732 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2733 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2734 NUM_BANKS(ADDR_SURF_4_BANK));
2735 break;
2736 case 6:
2737 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2738 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2739 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2740 NUM_BANKS(ADDR_SURF_2_BANK));
2741 break;
2742 case 8:
2743 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2745 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2746 NUM_BANKS(ADDR_SURF_16_BANK));
2747 break;
2748 case 9:
2749 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2750 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2751 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2752 NUM_BANKS(ADDR_SURF_16_BANK));
2753 break;
2754 case 10:
2755 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2757 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2758 NUM_BANKS(ADDR_SURF_16_BANK));
2759 break;
2760 case 11:
2761 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2762 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2763 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2764 NUM_BANKS(ADDR_SURF_16_BANK));
2765 break;
2766 case 12:
2767 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2769 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2770 NUM_BANKS(ADDR_SURF_8_BANK));
2771 break;
2772 case 13:
2773 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2774 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2775 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2776 NUM_BANKS(ADDR_SURF_4_BANK));
2777 break;
2778 case 14:
2779 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2780 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2781 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2782 NUM_BANKS(ADDR_SURF_2_BANK));
2783 break;
2784 default:
2785 gb_tile_moden = 0;
2786 break;
2787 }
2788 rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2789 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2790 }
2791 } else if (num_pipe_configs == 4) {
2792 if (num_rbs == 4) {
2793 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2794 switch (reg_offset) {
2795 case 0:
2796 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2797 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2798 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2799 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2800 break;
2801 case 1:
2802 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2803 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2804 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2806 break;
2807 case 2:
2808 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2810 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2811 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2812 break;
2813 case 3:
2814 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2815 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2816 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2818 break;
2819 case 4:
2820 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2821 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2822 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2823 TILE_SPLIT(split_equal_to_row_size));
2824 break;
2825 case 5:
2826 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2827 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2828 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2829 break;
2830 case 6:
2831 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2832 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2833 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2834 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2835 break;
2836 case 7:
2837 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2838 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2839 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2840 TILE_SPLIT(split_equal_to_row_size));
2841 break;
2842 case 8:
2843 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2844 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2845 break;
2846 case 9:
2847 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2848 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2849 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2850 break;
2851 case 10:
2852 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2853 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2854 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2855 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2856 break;
2857 case 11:
2858 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2859 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2860 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2861 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2862 break;
2863 case 12:
2864 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2865 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2866 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2867 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2868 break;
2869 case 13:
2870 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2871 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2872 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2873 break;
2874 case 14:
2875 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2876 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2877 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2878 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2879 break;
2880 case 16:
2881 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2882 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2883 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2884 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2885 break;
2886 case 17:
2887 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2888 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2889 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2890 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2891 break;
2892 case 27:
2893 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2894 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2895 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2896 break;
2897 case 28:
2898 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2899 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2900 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2901 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2902 break;
2903 case 29:
2904 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2905 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2906 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2907 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2908 break;
2909 case 30:
2910 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2911 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2912 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2913 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2914 break;
2915 default:
2916 gb_tile_moden = 0;
2917 break;
2918 }
2919 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2920 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2921 }
2922 } else if (num_rbs < 4) {
2923 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2924 switch (reg_offset) {
2925 case 0:
2926 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2927 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2928 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2929 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2930 break;
2931 case 1:
2932 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2933 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2934 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2935 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2936 break;
2937 case 2:
2938 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2939 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2940 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2941 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2942 break;
2943 case 3:
2944 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2945 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2946 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2947 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2948 break;
2949 case 4:
2950 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2951 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2952 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2953 TILE_SPLIT(split_equal_to_row_size));
2954 break;
2955 case 5:
2956 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2957 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2958 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2959 break;
2960 case 6:
2961 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2962 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2963 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2964 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2965 break;
2966 case 7:
2967 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2968 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2969 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2970 TILE_SPLIT(split_equal_to_row_size));
2971 break;
2972 case 8:
2973 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2974 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2975 break;
2976 case 9:
2977 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2978 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2979 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2980 break;
2981 case 10:
2982 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2983 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2984 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2985 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2986 break;
2987 case 11:
2988 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2989 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2990 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2991 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2992 break;
2993 case 12:
2994 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2995 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2996 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2997 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2998 break;
2999 case 13:
3000 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3001 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3002 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3003 break;
3004 case 14:
3005 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3006 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3007 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3008 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3009 break;
3010 case 16:
3011 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3012 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3013 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3014 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3015 break;
3016 case 17:
3017 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3018 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3019 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3020 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3021 break;
3022 case 27:
3023 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3024 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3025 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
3026 break;
3027 case 28:
3028 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3029 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3030 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3031 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3032 break;
3033 case 29:
3034 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3035 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3036 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3037 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3038 break;
3039 case 30:
3040 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3041 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3042 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3043 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3044 break;
3045 default:
3046 gb_tile_moden = 0;
3047 break;
3048 }
3049 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3050 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3051 }
3052 }
3053 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3054 switch (reg_offset) {
3055 case 0:
3056 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3057 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3058 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3059 NUM_BANKS(ADDR_SURF_16_BANK));
3060 break;
3061 case 1:
3062 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3063 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3064 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3065 NUM_BANKS(ADDR_SURF_16_BANK));
3066 break;
3067 case 2:
3068 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3069 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3070 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3071 NUM_BANKS(ADDR_SURF_16_BANK));
3072 break;
3073 case 3:
3074 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3075 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3076 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3077 NUM_BANKS(ADDR_SURF_16_BANK));
3078 break;
3079 case 4:
3080 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3081 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3082 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3083 NUM_BANKS(ADDR_SURF_16_BANK));
3084 break;
3085 case 5:
3086 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3087 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3088 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3089 NUM_BANKS(ADDR_SURF_8_BANK));
3090 break;
3091 case 6:
3092 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3093 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3094 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3095 NUM_BANKS(ADDR_SURF_4_BANK));
3096 break;
3097 case 8:
3098 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3099 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3100 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3101 NUM_BANKS(ADDR_SURF_16_BANK));
3102 break;
3103 case 9:
3104 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3105 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3106 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3107 NUM_BANKS(ADDR_SURF_16_BANK));
3108 break;
3109 case 10:
3110 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3111 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3112 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3113 NUM_BANKS(ADDR_SURF_16_BANK));
3114 break;
3115 case 11:
3116 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3117 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3118 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3119 NUM_BANKS(ADDR_SURF_16_BANK));
3120 break;
3121 case 12:
3122 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3123 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3124 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3125 NUM_BANKS(ADDR_SURF_16_BANK));
3126 break;
3127 case 13:
3128 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3129 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3130 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3131 NUM_BANKS(ADDR_SURF_8_BANK));
3132 break;
3133 case 14:
3134 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3135 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3136 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3137 NUM_BANKS(ADDR_SURF_4_BANK));
3138 break;
3139 default:
3140 gb_tile_moden = 0;
3141 break;
3142 }
3143 rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3144 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3145 }
3146 } else if (num_pipe_configs == 2) {
3147 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3148 switch (reg_offset) {
3149 case 0:
3150 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3151 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3152 PIPE_CONFIG(ADDR_SURF_P2) |
3153 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3154 break;
3155 case 1:
3156 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3157 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3158 PIPE_CONFIG(ADDR_SURF_P2) |
3159 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3160 break;
3161 case 2:
3162 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3163 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3164 PIPE_CONFIG(ADDR_SURF_P2) |
3165 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3166 break;
3167 case 3:
3168 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3169 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3170 PIPE_CONFIG(ADDR_SURF_P2) |
3171 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3172 break;
3173 case 4:
3174 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3175 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3176 PIPE_CONFIG(ADDR_SURF_P2) |
3177 TILE_SPLIT(split_equal_to_row_size));
3178 break;
3179 case 5:
3180 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3181 PIPE_CONFIG(ADDR_SURF_P2) |
3182 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3183 break;
3184 case 6:
3185 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3186 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3187 PIPE_CONFIG(ADDR_SURF_P2) |
3188 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3189 break;
3190 case 7:
3191 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3192 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3193 PIPE_CONFIG(ADDR_SURF_P2) |
3194 TILE_SPLIT(split_equal_to_row_size));
3195 break;
3196 case 8:
3197 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3198 PIPE_CONFIG(ADDR_SURF_P2);
3199 break;
3200 case 9:
3201 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3202 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3203 PIPE_CONFIG(ADDR_SURF_P2));
3204 break;
3205 case 10:
3206 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3207 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3208 PIPE_CONFIG(ADDR_SURF_P2) |
3209 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3210 break;
3211 case 11:
3212 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3213 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3214 PIPE_CONFIG(ADDR_SURF_P2) |
3215 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3216 break;
3217 case 12:
3218 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3219 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3220 PIPE_CONFIG(ADDR_SURF_P2) |
3221 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3222 break;
3223 case 13:
3224 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3225 PIPE_CONFIG(ADDR_SURF_P2) |
3226 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3227 break;
3228 case 14:
3229 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3230 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3231 PIPE_CONFIG(ADDR_SURF_P2) |
3232 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3233 break;
3234 case 16:
3235 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3236 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3237 PIPE_CONFIG(ADDR_SURF_P2) |
3238 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3239 break;
3240 case 17:
3241 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3242 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3243 PIPE_CONFIG(ADDR_SURF_P2) |
3244 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3245 break;
3246 case 27:
3247 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3248 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3249 PIPE_CONFIG(ADDR_SURF_P2));
3250 break;
3251 case 28:
3252 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3253 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3254 PIPE_CONFIG(ADDR_SURF_P2) |
3255 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3256 break;
3257 case 29:
3258 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3259 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3260 PIPE_CONFIG(ADDR_SURF_P2) |
3261 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3262 break;
3263 case 30:
3264 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3265 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3266 PIPE_CONFIG(ADDR_SURF_P2) |
3267 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3268 break;
3269 default:
3270 gb_tile_moden = 0;
3271 break;
3272 }
3273 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3274 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3275 }
3276 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3277 switch (reg_offset) {
3278 case 0:
3279 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3280 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3281 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3282 NUM_BANKS(ADDR_SURF_16_BANK));
3283 break;
3284 case 1:
3285 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3286 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3287 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3288 NUM_BANKS(ADDR_SURF_16_BANK));
3289 break;
3290 case 2:
3291 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3292 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3293 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3294 NUM_BANKS(ADDR_SURF_16_BANK));
3295 break;
3296 case 3:
3297 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3298 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3299 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3300 NUM_BANKS(ADDR_SURF_16_BANK));
3301 break;
3302 case 4:
3303 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3304 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3305 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3306 NUM_BANKS(ADDR_SURF_16_BANK));
3307 break;
3308 case 5:
3309 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3310 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3311 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3312 NUM_BANKS(ADDR_SURF_16_BANK));
3313 break;
3314 case 6:
3315 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3316 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3317 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3318 NUM_BANKS(ADDR_SURF_8_BANK));
3319 break;
3320 case 8:
3321 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3322 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3323 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3324 NUM_BANKS(ADDR_SURF_16_BANK));
3325 break;
3326 case 9:
3327 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3328 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3329 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3330 NUM_BANKS(ADDR_SURF_16_BANK));
3331 break;
3332 case 10:
3333 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3334 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3335 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3336 NUM_BANKS(ADDR_SURF_16_BANK));
3337 break;
3338 case 11:
3339 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3340 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3341 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3342 NUM_BANKS(ADDR_SURF_16_BANK));
3343 break;
3344 case 12:
3345 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3346 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3347 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3348 NUM_BANKS(ADDR_SURF_16_BANK));
3349 break;
3350 case 13:
3351 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3352 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3353 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3354 NUM_BANKS(ADDR_SURF_16_BANK));
3355 break;
3356 case 14:
3357 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3358 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3359 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3360 NUM_BANKS(ADDR_SURF_8_BANK));
3361 break;
3362 default:
3363 gb_tile_moden = 0;
3364 break;
3365 }
3366 rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3367 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3368 }
3369 } else
3370 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3371 }
3372
3373 /**
3374 * cik_select_se_sh - select which SE, SH to address
3375 *
3376 * @rdev: radeon_device pointer
3377 * @se_num: shader engine to address
3378 * @sh_num: sh block to address
3379 *
3380 * Select which SE, SH combinations to address. Certain
3381 * registers are instanced per SE or SH. 0xffffffff means
3382 * broadcast to all SEs or SHs (CIK).
3383 */
cik_select_se_sh(struct radeon_device * rdev,u32 se_num,u32 sh_num)3384 static void cik_select_se_sh(struct radeon_device *rdev,
3385 u32 se_num, u32 sh_num)
3386 {
3387 u32 data = INSTANCE_BROADCAST_WRITES;
3388
3389 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3390 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3391 else if (se_num == 0xffffffff)
3392 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3393 else if (sh_num == 0xffffffff)
3394 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3395 else
3396 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3397 WREG32(GRBM_GFX_INDEX, data);
3398 }
3399
3400 /**
3401 * cik_create_bitmask - create a bitmask
3402 *
3403 * @bit_width: length of the mask
3404 *
3405 * create a variable length bit mask (CIK).
3406 * Returns the bitmask.
3407 */
cik_create_bitmask(u32 bit_width)3408 static u32 cik_create_bitmask(u32 bit_width)
3409 {
3410 u32 i, mask = 0;
3411
3412 for (i = 0; i < bit_width; i++) {
3413 mask <<= 1;
3414 mask |= 1;
3415 }
3416 return mask;
3417 }
3418
3419 /**
3420 * cik_get_rb_disabled - computes the mask of disabled RBs
3421 *
3422 * @rdev: radeon_device pointer
3423 * @max_rb_num: max RBs (render backends) for the asic
3424 * @se_num: number of SEs (shader engines) for the asic
3425 * @sh_per_se: number of SH blocks per SE for the asic
3426 *
3427 * Calculates the bitmask of disabled RBs (CIK).
3428 * Returns the disabled RB bitmask.
3429 */
cik_get_rb_disabled(struct radeon_device * rdev,u32 max_rb_num_per_se,u32 sh_per_se)3430 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3431 u32 max_rb_num_per_se,
3432 u32 sh_per_se)
3433 {
3434 u32 data, mask;
3435
3436 data = RREG32(CC_RB_BACKEND_DISABLE);
3437 if (data & 1)
3438 data &= BACKEND_DISABLE_MASK;
3439 else
3440 data = 0;
3441 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3442
3443 data >>= BACKEND_DISABLE_SHIFT;
3444
3445 mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3446
3447 return data & mask;
3448 }
3449
3450 /**
3451 * cik_setup_rb - setup the RBs on the asic
3452 *
3453 * @rdev: radeon_device pointer
3454 * @se_num: number of SEs (shader engines) for the asic
3455 * @sh_per_se: number of SH blocks per SE for the asic
3456 * @max_rb_num: max RBs (render backends) for the asic
3457 *
3458 * Configures per-SE/SH RB registers (CIK).
3459 */
cik_setup_rb(struct radeon_device * rdev,u32 se_num,u32 sh_per_se,u32 max_rb_num_per_se)3460 static void cik_setup_rb(struct radeon_device *rdev,
3461 u32 se_num, u32 sh_per_se,
3462 u32 max_rb_num_per_se)
3463 {
3464 int i, j;
3465 u32 data, mask;
3466 u32 disabled_rbs = 0;
3467 u32 enabled_rbs = 0;
3468
3469 mutex_lock(&rdev->grbm_idx_mutex);
3470 for (i = 0; i < se_num; i++) {
3471 for (j = 0; j < sh_per_se; j++) {
3472 cik_select_se_sh(rdev, i, j);
3473 data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3474 if (rdev->family == CHIP_HAWAII)
3475 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3476 else
3477 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3478 }
3479 }
3480 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3481 mutex_unlock(&rdev->grbm_idx_mutex);
3482
3483 mask = 1;
3484 for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3485 if (!(disabled_rbs & mask))
3486 enabled_rbs |= mask;
3487 mask <<= 1;
3488 }
3489
3490 rdev->config.cik.backend_enable_mask = enabled_rbs;
3491
3492 mutex_lock(&rdev->grbm_idx_mutex);
3493 for (i = 0; i < se_num; i++) {
3494 cik_select_se_sh(rdev, i, 0xffffffff);
3495 data = 0;
3496 for (j = 0; j < sh_per_se; j++) {
3497 switch (enabled_rbs & 3) {
3498 case 0:
3499 if (j == 0)
3500 data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3501 else
3502 data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3503 break;
3504 case 1:
3505 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3506 break;
3507 case 2:
3508 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3509 break;
3510 case 3:
3511 default:
3512 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3513 break;
3514 }
3515 enabled_rbs >>= 2;
3516 }
3517 WREG32(PA_SC_RASTER_CONFIG, data);
3518 }
3519 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3520 mutex_unlock(&rdev->grbm_idx_mutex);
3521 }
3522
3523 /**
3524 * cik_gpu_init - setup the 3D engine
3525 *
3526 * @rdev: radeon_device pointer
3527 *
3528 * Configures the 3D engine and tiling configuration
3529 * registers so that the 3D engine is usable.
3530 */
cik_gpu_init(struct radeon_device * rdev)3531 static void cik_gpu_init(struct radeon_device *rdev)
3532 {
3533 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3534 u32 mc_shared_chmap, mc_arb_ramcfg;
3535 u32 hdp_host_path_cntl;
3536 u32 tmp;
3537 int i, j;
3538
3539 switch (rdev->family) {
3540 case CHIP_BONAIRE:
3541 rdev->config.cik.max_shader_engines = 2;
3542 rdev->config.cik.max_tile_pipes = 4;
3543 rdev->config.cik.max_cu_per_sh = 7;
3544 rdev->config.cik.max_sh_per_se = 1;
3545 rdev->config.cik.max_backends_per_se = 2;
3546 rdev->config.cik.max_texture_channel_caches = 4;
3547 rdev->config.cik.max_gprs = 256;
3548 rdev->config.cik.max_gs_threads = 32;
3549 rdev->config.cik.max_hw_contexts = 8;
3550
3551 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3552 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3553 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3554 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3555 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3556 break;
3557 case CHIP_HAWAII:
3558 rdev->config.cik.max_shader_engines = 4;
3559 rdev->config.cik.max_tile_pipes = 16;
3560 rdev->config.cik.max_cu_per_sh = 11;
3561 rdev->config.cik.max_sh_per_se = 1;
3562 rdev->config.cik.max_backends_per_se = 4;
3563 rdev->config.cik.max_texture_channel_caches = 16;
3564 rdev->config.cik.max_gprs = 256;
3565 rdev->config.cik.max_gs_threads = 32;
3566 rdev->config.cik.max_hw_contexts = 8;
3567
3568 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3569 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3570 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3571 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3572 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3573 break;
3574 case CHIP_KAVERI:
3575 rdev->config.cik.max_shader_engines = 1;
3576 rdev->config.cik.max_tile_pipes = 4;
3577 if ((rdev->pdev->device == 0x1304) ||
3578 (rdev->pdev->device == 0x1305) ||
3579 (rdev->pdev->device == 0x130C) ||
3580 (rdev->pdev->device == 0x130F) ||
3581 (rdev->pdev->device == 0x1310) ||
3582 (rdev->pdev->device == 0x1311) ||
3583 (rdev->pdev->device == 0x131C)) {
3584 rdev->config.cik.max_cu_per_sh = 8;
3585 rdev->config.cik.max_backends_per_se = 2;
3586 } else if ((rdev->pdev->device == 0x1309) ||
3587 (rdev->pdev->device == 0x130A) ||
3588 (rdev->pdev->device == 0x130D) ||
3589 (rdev->pdev->device == 0x1313) ||
3590 (rdev->pdev->device == 0x131D)) {
3591 rdev->config.cik.max_cu_per_sh = 6;
3592 rdev->config.cik.max_backends_per_se = 2;
3593 } else if ((rdev->pdev->device == 0x1306) ||
3594 (rdev->pdev->device == 0x1307) ||
3595 (rdev->pdev->device == 0x130B) ||
3596 (rdev->pdev->device == 0x130E) ||
3597 (rdev->pdev->device == 0x1315) ||
3598 (rdev->pdev->device == 0x1318) ||
3599 (rdev->pdev->device == 0x131B)) {
3600 rdev->config.cik.max_cu_per_sh = 4;
3601 rdev->config.cik.max_backends_per_se = 1;
3602 } else {
3603 rdev->config.cik.max_cu_per_sh = 3;
3604 rdev->config.cik.max_backends_per_se = 1;
3605 }
3606 rdev->config.cik.max_sh_per_se = 1;
3607 rdev->config.cik.max_texture_channel_caches = 4;
3608 rdev->config.cik.max_gprs = 256;
3609 rdev->config.cik.max_gs_threads = 16;
3610 rdev->config.cik.max_hw_contexts = 8;
3611
3612 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3613 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3614 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3615 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3616 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3617 break;
3618 case CHIP_KABINI:
3619 case CHIP_MULLINS:
3620 default:
3621 rdev->config.cik.max_shader_engines = 1;
3622 rdev->config.cik.max_tile_pipes = 2;
3623 rdev->config.cik.max_cu_per_sh = 2;
3624 rdev->config.cik.max_sh_per_se = 1;
3625 rdev->config.cik.max_backends_per_se = 1;
3626 rdev->config.cik.max_texture_channel_caches = 2;
3627 rdev->config.cik.max_gprs = 256;
3628 rdev->config.cik.max_gs_threads = 16;
3629 rdev->config.cik.max_hw_contexts = 8;
3630
3631 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3632 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3633 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3634 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3635 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3636 break;
3637 }
3638
3639 /* Initialize HDP */
3640 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3641 WREG32((0x2c14 + j), 0x00000000);
3642 WREG32((0x2c18 + j), 0x00000000);
3643 WREG32((0x2c1c + j), 0x00000000);
3644 WREG32((0x2c20 + j), 0x00000000);
3645 WREG32((0x2c24 + j), 0x00000000);
3646 }
3647
3648 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3649 WREG32(SRBM_INT_CNTL, 0x1);
3650 WREG32(SRBM_INT_ACK, 0x1);
3651
3652 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3653
3654 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3655 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3656
3657 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3658 rdev->config.cik.mem_max_burst_length_bytes = 256;
3659 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3660 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3661 if (rdev->config.cik.mem_row_size_in_kb > 4)
3662 rdev->config.cik.mem_row_size_in_kb = 4;
3663 /* XXX use MC settings? */
3664 rdev->config.cik.shader_engine_tile_size = 32;
3665 rdev->config.cik.num_gpus = 1;
3666 rdev->config.cik.multi_gpu_tile_size = 64;
3667
3668 /* fix up row size */
3669 gb_addr_config &= ~ROW_SIZE_MASK;
3670 switch (rdev->config.cik.mem_row_size_in_kb) {
3671 case 1:
3672 default:
3673 gb_addr_config |= ROW_SIZE(0);
3674 break;
3675 case 2:
3676 gb_addr_config |= ROW_SIZE(1);
3677 break;
3678 case 4:
3679 gb_addr_config |= ROW_SIZE(2);
3680 break;
3681 }
3682
3683 /* setup tiling info dword. gb_addr_config is not adequate since it does
3684 * not have bank info, so create a custom tiling dword.
3685 * bits 3:0 num_pipes
3686 * bits 7:4 num_banks
3687 * bits 11:8 group_size
3688 * bits 15:12 row_size
3689 */
3690 rdev->config.cik.tile_config = 0;
3691 switch (rdev->config.cik.num_tile_pipes) {
3692 case 1:
3693 rdev->config.cik.tile_config |= (0 << 0);
3694 break;
3695 case 2:
3696 rdev->config.cik.tile_config |= (1 << 0);
3697 break;
3698 case 4:
3699 rdev->config.cik.tile_config |= (2 << 0);
3700 break;
3701 case 8:
3702 default:
3703 /* XXX what about 12? */
3704 rdev->config.cik.tile_config |= (3 << 0);
3705 break;
3706 }
3707 rdev->config.cik.tile_config |=
3708 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3709 rdev->config.cik.tile_config |=
3710 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3711 rdev->config.cik.tile_config |=
3712 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3713
3714 WREG32(GB_ADDR_CONFIG, gb_addr_config);
3715 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3716 WREG32(DMIF_ADDR_CALC, gb_addr_config);
3717 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3718 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3719 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3720 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3721 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3722
3723 cik_tiling_mode_table_init(rdev);
3724
3725 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3726 rdev->config.cik.max_sh_per_se,
3727 rdev->config.cik.max_backends_per_se);
3728
3729 rdev->config.cik.active_cus = 0;
3730 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3731 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3732 rdev->config.cik.active_cus +=
3733 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3734 }
3735 }
3736
3737 /* set HW defaults for 3D engine */
3738 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3739
3740 mutex_lock(&rdev->grbm_idx_mutex);
3741 /*
3742 * making sure that the following register writes will be broadcasted
3743 * to all the shaders
3744 */
3745 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3746 WREG32(SX_DEBUG_1, 0x20);
3747
3748 WREG32(TA_CNTL_AUX, 0x00010000);
3749
3750 tmp = RREG32(SPI_CONFIG_CNTL);
3751 tmp |= 0x03000000;
3752 WREG32(SPI_CONFIG_CNTL, tmp);
3753
3754 WREG32(SQ_CONFIG, 1);
3755
3756 WREG32(DB_DEBUG, 0);
3757
3758 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3759 tmp |= 0x00000400;
3760 WREG32(DB_DEBUG2, tmp);
3761
3762 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3763 tmp |= 0x00020200;
3764 WREG32(DB_DEBUG3, tmp);
3765
3766 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3767 tmp |= 0x00018208;
3768 WREG32(CB_HW_CONTROL, tmp);
3769
3770 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3771
3772 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3773 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3774 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3775 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3776
3777 WREG32(VGT_NUM_INSTANCES, 1);
3778
3779 WREG32(CP_PERFMON_CNTL, 0);
3780
3781 WREG32(SQ_CONFIG, 0);
3782
3783 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3784 FORCE_EOV_MAX_REZ_CNT(255)));
3785
3786 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3787 AUTO_INVLD_EN(ES_AND_GS_AUTO));
3788
3789 WREG32(VGT_GS_VERTEX_REUSE, 16);
3790 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3791
3792 tmp = RREG32(HDP_MISC_CNTL);
3793 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3794 WREG32(HDP_MISC_CNTL, tmp);
3795
3796 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3797 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3798
3799 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3800 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3801 mutex_unlock(&rdev->grbm_idx_mutex);
3802
3803 udelay(50);
3804 }
3805
3806 /*
3807 * GPU scratch registers helpers function.
3808 */
3809 /**
3810 * cik_scratch_init - setup driver info for CP scratch regs
3811 *
3812 * @rdev: radeon_device pointer
3813 *
3814 * Set up the number and offset of the CP scratch registers.
3815 * NOTE: use of CP scratch registers is a legacy inferface and
3816 * is not used by default on newer asics (r6xx+). On newer asics,
3817 * memory buffers are used for fences rather than scratch regs.
3818 */
cik_scratch_init(struct radeon_device * rdev)3819 static void cik_scratch_init(struct radeon_device *rdev)
3820 {
3821 int i;
3822
3823 rdev->scratch.num_reg = 7;
3824 rdev->scratch.reg_base = SCRATCH_REG0;
3825 for (i = 0; i < rdev->scratch.num_reg; i++) {
3826 rdev->scratch.free[i] = true;
3827 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3828 }
3829 }
3830
3831 /**
3832 * cik_ring_test - basic gfx ring test
3833 *
3834 * @rdev: radeon_device pointer
3835 * @ring: radeon_ring structure holding ring information
3836 *
3837 * Allocate a scratch register and write to it using the gfx ring (CIK).
3838 * Provides a basic gfx ring test to verify that the ring is working.
3839 * Used by cik_cp_gfx_resume();
3840 * Returns 0 on success, error on failure.
3841 */
cik_ring_test(struct radeon_device * rdev,struct radeon_ring * ring)3842 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3843 {
3844 uint32_t scratch;
3845 uint32_t tmp = 0;
3846 unsigned i;
3847 int r;
3848
3849 r = radeon_scratch_get(rdev, &scratch);
3850 if (r) {
3851 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3852 return r;
3853 }
3854 WREG32(scratch, 0xCAFEDEAD);
3855 r = radeon_ring_lock(rdev, ring, 3);
3856 if (r) {
3857 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3858 radeon_scratch_free(rdev, scratch);
3859 return r;
3860 }
3861 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3862 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3863 radeon_ring_write(ring, 0xDEADBEEF);
3864 radeon_ring_unlock_commit(rdev, ring, false);
3865
3866 for (i = 0; i < rdev->usec_timeout; i++) {
3867 tmp = RREG32(scratch);
3868 if (tmp == 0xDEADBEEF)
3869 break;
3870 DRM_UDELAY(1);
3871 }
3872 if (i < rdev->usec_timeout) {
3873 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3874 } else {
3875 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3876 ring->idx, scratch, tmp);
3877 r = -EINVAL;
3878 }
3879 radeon_scratch_free(rdev, scratch);
3880 return r;
3881 }
3882
3883 /**
3884 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3885 *
3886 * @rdev: radeon_device pointer
3887 * @ridx: radeon ring index
3888 *
3889 * Emits an hdp flush on the cp.
3890 */
cik_hdp_flush_cp_ring_emit(struct radeon_device * rdev,int ridx)3891 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3892 int ridx)
3893 {
3894 struct radeon_ring *ring = &rdev->ring[ridx];
3895 u32 ref_and_mask;
3896
3897 switch (ring->idx) {
3898 case CAYMAN_RING_TYPE_CP1_INDEX:
3899 case CAYMAN_RING_TYPE_CP2_INDEX:
3900 default:
3901 switch (ring->me) {
3902 case 0:
3903 ref_and_mask = CP2 << ring->pipe;
3904 break;
3905 case 1:
3906 ref_and_mask = CP6 << ring->pipe;
3907 break;
3908 default:
3909 return;
3910 }
3911 break;
3912 case RADEON_RING_TYPE_GFX_INDEX:
3913 ref_and_mask = CP0;
3914 break;
3915 }
3916
3917 radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3918 radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3919 WAIT_REG_MEM_FUNCTION(3) | /* == */
3920 WAIT_REG_MEM_ENGINE(1))); /* pfp */
3921 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3922 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3923 radeon_ring_write(ring, ref_and_mask);
3924 radeon_ring_write(ring, ref_and_mask);
3925 radeon_ring_write(ring, 0x20); /* poll interval */
3926 }
3927
3928 /**
3929 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3930 *
3931 * @rdev: radeon_device pointer
3932 * @fence: radeon fence object
3933 *
3934 * Emits a fence sequnce number on the gfx ring and flushes
3935 * GPU caches.
3936 */
cik_fence_gfx_ring_emit(struct radeon_device * rdev,struct radeon_fence * fence)3937 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3938 struct radeon_fence *fence)
3939 {
3940 struct radeon_ring *ring = &rdev->ring[fence->ring];
3941 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3942
3943 /* Workaround for cache flush problems. First send a dummy EOP
3944 * event down the pipe with seq one below.
3945 */
3946 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3947 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3948 EOP_TC_ACTION_EN |
3949 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3950 EVENT_INDEX(5)));
3951 radeon_ring_write(ring, addr & 0xfffffffc);
3952 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3953 DATA_SEL(1) | INT_SEL(0));
3954 radeon_ring_write(ring, fence->seq - 1);
3955 radeon_ring_write(ring, 0);
3956
3957 /* Then send the real EOP event down the pipe. */
3958 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3959 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3960 EOP_TC_ACTION_EN |
3961 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3962 EVENT_INDEX(5)));
3963 radeon_ring_write(ring, addr & 0xfffffffc);
3964 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3965 radeon_ring_write(ring, fence->seq);
3966 radeon_ring_write(ring, 0);
3967 }
3968
3969 /**
3970 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3971 *
3972 * @rdev: radeon_device pointer
3973 * @fence: radeon fence object
3974 *
3975 * Emits a fence sequnce number on the compute ring and flushes
3976 * GPU caches.
3977 */
cik_fence_compute_ring_emit(struct radeon_device * rdev,struct radeon_fence * fence)3978 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3979 struct radeon_fence *fence)
3980 {
3981 struct radeon_ring *ring = &rdev->ring[fence->ring];
3982 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3983
3984 /* RELEASE_MEM - flush caches, send int */
3985 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3986 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3987 EOP_TC_ACTION_EN |
3988 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3989 EVENT_INDEX(5)));
3990 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3991 radeon_ring_write(ring, addr & 0xfffffffc);
3992 radeon_ring_write(ring, upper_32_bits(addr));
3993 radeon_ring_write(ring, fence->seq);
3994 radeon_ring_write(ring, 0);
3995 }
3996
3997 /**
3998 * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3999 *
4000 * @rdev: radeon_device pointer
4001 * @ring: radeon ring buffer object
4002 * @semaphore: radeon semaphore object
4003 * @emit_wait: Is this a sempahore wait?
4004 *
4005 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4006 * from running ahead of semaphore waits.
4007 */
cik_semaphore_ring_emit(struct radeon_device * rdev,struct radeon_ring * ring,struct radeon_semaphore * semaphore,bool emit_wait)4008 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
4009 struct radeon_ring *ring,
4010 struct radeon_semaphore *semaphore,
4011 bool emit_wait)
4012 {
4013 uint64_t addr = semaphore->gpu_addr;
4014 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4015
4016 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
4017 radeon_ring_write(ring, lower_32_bits(addr));
4018 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
4019
4020 if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
4021 /* Prevent the PFP from running ahead of the semaphore wait */
4022 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4023 radeon_ring_write(ring, 0x0);
4024 }
4025
4026 return true;
4027 }
4028
4029 /**
4030 * cik_copy_cpdma - copy pages using the CP DMA engine
4031 *
4032 * @rdev: radeon_device pointer
4033 * @src_offset: src GPU address
4034 * @dst_offset: dst GPU address
4035 * @num_gpu_pages: number of GPU pages to xfer
4036 * @resv: reservation object to sync to
4037 *
4038 * Copy GPU paging using the CP DMA engine (CIK+).
4039 * Used by the radeon ttm implementation to move pages if
4040 * registered as the asic copy callback.
4041 */
cik_copy_cpdma(struct radeon_device * rdev,uint64_t src_offset,uint64_t dst_offset,unsigned num_gpu_pages,struct reservation_object * resv)4042 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
4043 uint64_t src_offset, uint64_t dst_offset,
4044 unsigned num_gpu_pages,
4045 struct reservation_object *resv)
4046 {
4047 struct radeon_fence *fence;
4048 struct radeon_sync sync;
4049 int ring_index = rdev->asic->copy.blit_ring_index;
4050 struct radeon_ring *ring = &rdev->ring[ring_index];
4051 u32 size_in_bytes, cur_size_in_bytes, control;
4052 int i, num_loops;
4053 int r = 0;
4054
4055 radeon_sync_create(&sync);
4056
4057 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4058 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4059 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4060 if (r) {
4061 DRM_ERROR("radeon: moving bo (%d).\n", r);
4062 radeon_sync_free(rdev, &sync, NULL);
4063 return ERR_PTR(r);
4064 }
4065
4066 radeon_sync_resv(rdev, &sync, resv, false);
4067 radeon_sync_rings(rdev, &sync, ring->idx);
4068
4069 for (i = 0; i < num_loops; i++) {
4070 cur_size_in_bytes = size_in_bytes;
4071 if (cur_size_in_bytes > 0x1fffff)
4072 cur_size_in_bytes = 0x1fffff;
4073 size_in_bytes -= cur_size_in_bytes;
4074 control = 0;
4075 if (size_in_bytes == 0)
4076 control |= PACKET3_DMA_DATA_CP_SYNC;
4077 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4078 radeon_ring_write(ring, control);
4079 radeon_ring_write(ring, lower_32_bits(src_offset));
4080 radeon_ring_write(ring, upper_32_bits(src_offset));
4081 radeon_ring_write(ring, lower_32_bits(dst_offset));
4082 radeon_ring_write(ring, upper_32_bits(dst_offset));
4083 radeon_ring_write(ring, cur_size_in_bytes);
4084 src_offset += cur_size_in_bytes;
4085 dst_offset += cur_size_in_bytes;
4086 }
4087
4088 r = radeon_fence_emit(rdev, &fence, ring->idx);
4089 if (r) {
4090 radeon_ring_unlock_undo(rdev, ring);
4091 radeon_sync_free(rdev, &sync, NULL);
4092 return ERR_PTR(r);
4093 }
4094
4095 radeon_ring_unlock_commit(rdev, ring, false);
4096 radeon_sync_free(rdev, &sync, fence);
4097
4098 return fence;
4099 }
4100
4101 /*
4102 * IB stuff
4103 */
4104 /**
4105 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4106 *
4107 * @rdev: radeon_device pointer
4108 * @ib: radeon indirect buffer object
4109 *
4110 * Emits an DE (drawing engine) or CE (constant engine) IB
4111 * on the gfx ring. IBs are usually generated by userspace
4112 * acceleration drivers and submitted to the kernel for
4113 * sheduling on the ring. This function schedules the IB
4114 * on the gfx ring for execution by the GPU.
4115 */
cik_ring_ib_execute(struct radeon_device * rdev,struct radeon_ib * ib)4116 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4117 {
4118 struct radeon_ring *ring = &rdev->ring[ib->ring];
4119 unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4120 u32 header, control = INDIRECT_BUFFER_VALID;
4121
4122 if (ib->is_const_ib) {
4123 /* set switch buffer packet before const IB */
4124 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4125 radeon_ring_write(ring, 0);
4126
4127 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4128 } else {
4129 u32 next_rptr;
4130 if (ring->rptr_save_reg) {
4131 next_rptr = ring->wptr + 3 + 4;
4132 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4133 radeon_ring_write(ring, ((ring->rptr_save_reg -
4134 PACKET3_SET_UCONFIG_REG_START) >> 2));
4135 radeon_ring_write(ring, next_rptr);
4136 } else if (rdev->wb.enabled) {
4137 next_rptr = ring->wptr + 5 + 4;
4138 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4139 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4140 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4141 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4142 radeon_ring_write(ring, next_rptr);
4143 }
4144
4145 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4146 }
4147
4148 control |= ib->length_dw | (vm_id << 24);
4149
4150 radeon_ring_write(ring, header);
4151 radeon_ring_write(ring,
4152 #ifdef __BIG_ENDIAN
4153 (2 << 0) |
4154 #endif
4155 (ib->gpu_addr & 0xFFFFFFFC));
4156 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4157 radeon_ring_write(ring, control);
4158 }
4159
4160 /**
4161 * cik_ib_test - basic gfx ring IB test
4162 *
4163 * @rdev: radeon_device pointer
4164 * @ring: radeon_ring structure holding ring information
4165 *
4166 * Allocate an IB and execute it on the gfx ring (CIK).
4167 * Provides a basic gfx ring test to verify that IBs are working.
4168 * Returns 0 on success, error on failure.
4169 */
cik_ib_test(struct radeon_device * rdev,struct radeon_ring * ring)4170 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4171 {
4172 struct radeon_ib ib;
4173 uint32_t scratch;
4174 uint32_t tmp = 0;
4175 unsigned i;
4176 int r;
4177
4178 r = radeon_scratch_get(rdev, &scratch);
4179 if (r) {
4180 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4181 return r;
4182 }
4183 WREG32(scratch, 0xCAFEDEAD);
4184 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4185 if (r) {
4186 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4187 radeon_scratch_free(rdev, scratch);
4188 return r;
4189 }
4190 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4191 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4192 ib.ptr[2] = 0xDEADBEEF;
4193 ib.length_dw = 3;
4194 r = radeon_ib_schedule(rdev, &ib, NULL, false);
4195 if (r) {
4196 radeon_scratch_free(rdev, scratch);
4197 radeon_ib_free(rdev, &ib);
4198 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4199 return r;
4200 }
4201 r = radeon_fence_wait(ib.fence, false);
4202 if (r) {
4203 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4204 radeon_scratch_free(rdev, scratch);
4205 radeon_ib_free(rdev, &ib);
4206 return r;
4207 }
4208 for (i = 0; i < rdev->usec_timeout; i++) {
4209 tmp = RREG32(scratch);
4210 if (tmp == 0xDEADBEEF)
4211 break;
4212 DRM_UDELAY(1);
4213 }
4214 if (i < rdev->usec_timeout) {
4215 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4216 } else {
4217 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4218 scratch, tmp);
4219 r = -EINVAL;
4220 }
4221 radeon_scratch_free(rdev, scratch);
4222 radeon_ib_free(rdev, &ib);
4223 return r;
4224 }
4225
4226 /*
4227 * CP.
4228 * On CIK, gfx and compute now have independant command processors.
4229 *
4230 * GFX
4231 * Gfx consists of a single ring and can process both gfx jobs and
4232 * compute jobs. The gfx CP consists of three microengines (ME):
4233 * PFP - Pre-Fetch Parser
4234 * ME - Micro Engine
4235 * CE - Constant Engine
4236 * The PFP and ME make up what is considered the Drawing Engine (DE).
4237 * The CE is an asynchronous engine used for updating buffer desciptors
4238 * used by the DE so that they can be loaded into cache in parallel
4239 * while the DE is processing state update packets.
4240 *
4241 * Compute
4242 * The compute CP consists of two microengines (ME):
4243 * MEC1 - Compute MicroEngine 1
4244 * MEC2 - Compute MicroEngine 2
4245 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4246 * The queues are exposed to userspace and are programmed directly
4247 * by the compute runtime.
4248 */
4249 /**
4250 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4251 *
4252 * @rdev: radeon_device pointer
4253 * @enable: enable or disable the MEs
4254 *
4255 * Halts or unhalts the gfx MEs.
4256 */
cik_cp_gfx_enable(struct radeon_device * rdev,bool enable)4257 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4258 {
4259 if (enable)
4260 WREG32(CP_ME_CNTL, 0);
4261 else {
4262 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4263 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4264 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4265 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4266 }
4267 udelay(50);
4268 }
4269
4270 /**
4271 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4272 *
4273 * @rdev: radeon_device pointer
4274 *
4275 * Loads the gfx PFP, ME, and CE ucode.
4276 * Returns 0 for success, -EINVAL if the ucode is not available.
4277 */
cik_cp_gfx_load_microcode(struct radeon_device * rdev)4278 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4279 {
4280 int i;
4281
4282 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4283 return -EINVAL;
4284
4285 cik_cp_gfx_enable(rdev, false);
4286
4287 if (rdev->new_fw) {
4288 const struct gfx_firmware_header_v1_0 *pfp_hdr =
4289 (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4290 const struct gfx_firmware_header_v1_0 *ce_hdr =
4291 (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4292 const struct gfx_firmware_header_v1_0 *me_hdr =
4293 (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4294 const __le32 *fw_data;
4295 u32 fw_size;
4296
4297 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4298 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4299 radeon_ucode_print_gfx_hdr(&me_hdr->header);
4300
4301 /* PFP */
4302 fw_data = (const __le32 *)
4303 (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4304 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4305 WREG32(CP_PFP_UCODE_ADDR, 0);
4306 for (i = 0; i < fw_size; i++)
4307 WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4308 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4309
4310 /* CE */
4311 fw_data = (const __le32 *)
4312 (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4313 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4314 WREG32(CP_CE_UCODE_ADDR, 0);
4315 for (i = 0; i < fw_size; i++)
4316 WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4317 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4318
4319 /* ME */
4320 fw_data = (const __be32 *)
4321 (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4322 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4323 WREG32(CP_ME_RAM_WADDR, 0);
4324 for (i = 0; i < fw_size; i++)
4325 WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4326 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4327 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4328 } else {
4329 const __be32 *fw_data;
4330
4331 /* PFP */
4332 fw_data = (const __be32 *)rdev->pfp_fw->data;
4333 WREG32(CP_PFP_UCODE_ADDR, 0);
4334 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4335 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4336 WREG32(CP_PFP_UCODE_ADDR, 0);
4337
4338 /* CE */
4339 fw_data = (const __be32 *)rdev->ce_fw->data;
4340 WREG32(CP_CE_UCODE_ADDR, 0);
4341 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4342 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4343 WREG32(CP_CE_UCODE_ADDR, 0);
4344
4345 /* ME */
4346 fw_data = (const __be32 *)rdev->me_fw->data;
4347 WREG32(CP_ME_RAM_WADDR, 0);
4348 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4349 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4350 WREG32(CP_ME_RAM_WADDR, 0);
4351 }
4352
4353 return 0;
4354 }
4355
4356 /**
4357 * cik_cp_gfx_start - start the gfx ring
4358 *
4359 * @rdev: radeon_device pointer
4360 *
4361 * Enables the ring and loads the clear state context and other
4362 * packets required to init the ring.
4363 * Returns 0 for success, error for failure.
4364 */
cik_cp_gfx_start(struct radeon_device * rdev)4365 static int cik_cp_gfx_start(struct radeon_device *rdev)
4366 {
4367 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4368 int r, i;
4369
4370 /* init the CP */
4371 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4372 WREG32(CP_ENDIAN_SWAP, 0);
4373 WREG32(CP_DEVICE_ID, 1);
4374
4375 cik_cp_gfx_enable(rdev, true);
4376
4377 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4378 if (r) {
4379 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4380 return r;
4381 }
4382
4383 /* init the CE partitions. CE only used for gfx on CIK */
4384 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4385 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4386 radeon_ring_write(ring, 0x8000);
4387 radeon_ring_write(ring, 0x8000);
4388
4389 /* setup clear context state */
4390 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4391 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4392
4393 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4394 radeon_ring_write(ring, 0x80000000);
4395 radeon_ring_write(ring, 0x80000000);
4396
4397 for (i = 0; i < cik_default_size; i++)
4398 radeon_ring_write(ring, cik_default_state[i]);
4399
4400 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4401 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4402
4403 /* set clear context state */
4404 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4405 radeon_ring_write(ring, 0);
4406
4407 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4408 radeon_ring_write(ring, 0x00000316);
4409 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4410 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4411
4412 radeon_ring_unlock_commit(rdev, ring, false);
4413
4414 return 0;
4415 }
4416
4417 /**
4418 * cik_cp_gfx_fini - stop the gfx ring
4419 *
4420 * @rdev: radeon_device pointer
4421 *
4422 * Stop the gfx ring and tear down the driver ring
4423 * info.
4424 */
cik_cp_gfx_fini(struct radeon_device * rdev)4425 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4426 {
4427 cik_cp_gfx_enable(rdev, false);
4428 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4429 }
4430
4431 /**
4432 * cik_cp_gfx_resume - setup the gfx ring buffer registers
4433 *
4434 * @rdev: radeon_device pointer
4435 *
4436 * Program the location and size of the gfx ring buffer
4437 * and test it to make sure it's working.
4438 * Returns 0 for success, error for failure.
4439 */
cik_cp_gfx_resume(struct radeon_device * rdev)4440 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4441 {
4442 struct radeon_ring *ring;
4443 u32 tmp;
4444 u32 rb_bufsz;
4445 u64 rb_addr;
4446 int r;
4447
4448 WREG32(CP_SEM_WAIT_TIMER, 0x0);
4449 if (rdev->family != CHIP_HAWAII)
4450 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4451
4452 /* Set the write pointer delay */
4453 WREG32(CP_RB_WPTR_DELAY, 0);
4454
4455 /* set the RB to use vmid 0 */
4456 WREG32(CP_RB_VMID, 0);
4457
4458 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4459
4460 /* ring 0 - compute and gfx */
4461 /* Set ring buffer size */
4462 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4463 rb_bufsz = order_base_2(ring->ring_size / 8);
4464 tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4465 #ifdef __BIG_ENDIAN
4466 tmp |= BUF_SWAP_32BIT;
4467 #endif
4468 WREG32(CP_RB0_CNTL, tmp);
4469
4470 /* Initialize the ring buffer's read and write pointers */
4471 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4472 ring->wptr = 0;
4473 WREG32(CP_RB0_WPTR, ring->wptr);
4474
4475 /* set the wb address wether it's enabled or not */
4476 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4477 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4478
4479 /* scratch register shadowing is no longer supported */
4480 WREG32(SCRATCH_UMSK, 0);
4481
4482 if (!rdev->wb.enabled)
4483 tmp |= RB_NO_UPDATE;
4484
4485 mdelay(1);
4486 WREG32(CP_RB0_CNTL, tmp);
4487
4488 rb_addr = ring->gpu_addr >> 8;
4489 WREG32(CP_RB0_BASE, rb_addr);
4490 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4491
4492 /* start the ring */
4493 cik_cp_gfx_start(rdev);
4494 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4495 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4496 if (r) {
4497 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4498 return r;
4499 }
4500
4501 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4502 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4503
4504 return 0;
4505 }
4506
cik_gfx_get_rptr(struct radeon_device * rdev,struct radeon_ring * ring)4507 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4508 struct radeon_ring *ring)
4509 {
4510 u32 rptr;
4511
4512 if (rdev->wb.enabled)
4513 rptr = rdev->wb.wb[ring->rptr_offs/4];
4514 else
4515 rptr = RREG32(CP_RB0_RPTR);
4516
4517 return rptr;
4518 }
4519
cik_gfx_get_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4520 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4521 struct radeon_ring *ring)
4522 {
4523 u32 wptr;
4524
4525 wptr = RREG32(CP_RB0_WPTR);
4526
4527 return wptr;
4528 }
4529
cik_gfx_set_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4530 void cik_gfx_set_wptr(struct radeon_device *rdev,
4531 struct radeon_ring *ring)
4532 {
4533 WREG32(CP_RB0_WPTR, ring->wptr);
4534 (void)RREG32(CP_RB0_WPTR);
4535 }
4536
cik_compute_get_rptr(struct radeon_device * rdev,struct radeon_ring * ring)4537 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4538 struct radeon_ring *ring)
4539 {
4540 u32 rptr;
4541
4542 if (rdev->wb.enabled) {
4543 rptr = rdev->wb.wb[ring->rptr_offs/4];
4544 } else {
4545 mutex_lock(&rdev->srbm_mutex);
4546 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4547 rptr = RREG32(CP_HQD_PQ_RPTR);
4548 cik_srbm_select(rdev, 0, 0, 0, 0);
4549 mutex_unlock(&rdev->srbm_mutex);
4550 }
4551
4552 return rptr;
4553 }
4554
cik_compute_get_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4555 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4556 struct radeon_ring *ring)
4557 {
4558 u32 wptr;
4559
4560 if (rdev->wb.enabled) {
4561 /* XXX check if swapping is necessary on BE */
4562 wptr = rdev->wb.wb[ring->wptr_offs/4];
4563 } else {
4564 mutex_lock(&rdev->srbm_mutex);
4565 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4566 wptr = RREG32(CP_HQD_PQ_WPTR);
4567 cik_srbm_select(rdev, 0, 0, 0, 0);
4568 mutex_unlock(&rdev->srbm_mutex);
4569 }
4570
4571 return wptr;
4572 }
4573
cik_compute_set_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4574 void cik_compute_set_wptr(struct radeon_device *rdev,
4575 struct radeon_ring *ring)
4576 {
4577 /* XXX check if swapping is necessary on BE */
4578 rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4579 WDOORBELL32(ring->doorbell_index, ring->wptr);
4580 }
4581
cik_compute_stop(struct radeon_device * rdev,struct radeon_ring * ring)4582 static void cik_compute_stop(struct radeon_device *rdev,
4583 struct radeon_ring *ring)
4584 {
4585 u32 j, tmp;
4586
4587 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4588 /* Disable wptr polling. */
4589 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4590 tmp &= ~WPTR_POLL_EN;
4591 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4592 /* Disable HQD. */
4593 if (RREG32(CP_HQD_ACTIVE) & 1) {
4594 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4595 for (j = 0; j < rdev->usec_timeout; j++) {
4596 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4597 break;
4598 udelay(1);
4599 }
4600 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4601 WREG32(CP_HQD_PQ_RPTR, 0);
4602 WREG32(CP_HQD_PQ_WPTR, 0);
4603 }
4604 cik_srbm_select(rdev, 0, 0, 0, 0);
4605 }
4606
4607 /**
4608 * cik_cp_compute_enable - enable/disable the compute CP MEs
4609 *
4610 * @rdev: radeon_device pointer
4611 * @enable: enable or disable the MEs
4612 *
4613 * Halts or unhalts the compute MEs.
4614 */
cik_cp_compute_enable(struct radeon_device * rdev,bool enable)4615 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4616 {
4617 if (enable)
4618 WREG32(CP_MEC_CNTL, 0);
4619 else {
4620 /*
4621 * To make hibernation reliable we need to clear compute ring
4622 * configuration before halting the compute ring.
4623 */
4624 mutex_lock(&rdev->srbm_mutex);
4625 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4626 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4627 mutex_unlock(&rdev->srbm_mutex);
4628
4629 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4630 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4631 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4632 }
4633 udelay(50);
4634 }
4635
4636 /**
4637 * cik_cp_compute_load_microcode - load the compute CP ME ucode
4638 *
4639 * @rdev: radeon_device pointer
4640 *
4641 * Loads the compute MEC1&2 ucode.
4642 * Returns 0 for success, -EINVAL if the ucode is not available.
4643 */
cik_cp_compute_load_microcode(struct radeon_device * rdev)4644 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4645 {
4646 int i;
4647
4648 if (!rdev->mec_fw)
4649 return -EINVAL;
4650
4651 cik_cp_compute_enable(rdev, false);
4652
4653 if (rdev->new_fw) {
4654 const struct gfx_firmware_header_v1_0 *mec_hdr =
4655 (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4656 const __le32 *fw_data;
4657 u32 fw_size;
4658
4659 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4660
4661 /* MEC1 */
4662 fw_data = (const __le32 *)
4663 (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4664 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4665 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4666 for (i = 0; i < fw_size; i++)
4667 WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4668 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4669
4670 /* MEC2 */
4671 if (rdev->family == CHIP_KAVERI) {
4672 const struct gfx_firmware_header_v1_0 *mec2_hdr =
4673 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4674
4675 fw_data = (const __le32 *)
4676 (rdev->mec2_fw->data +
4677 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4678 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4679 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4680 for (i = 0; i < fw_size; i++)
4681 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4682 WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4683 }
4684 } else {
4685 const __be32 *fw_data;
4686
4687 /* MEC1 */
4688 fw_data = (const __be32 *)rdev->mec_fw->data;
4689 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4690 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4691 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4692 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4693
4694 if (rdev->family == CHIP_KAVERI) {
4695 /* MEC2 */
4696 fw_data = (const __be32 *)rdev->mec_fw->data;
4697 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4698 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4699 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4700 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4701 }
4702 }
4703
4704 return 0;
4705 }
4706
4707 /**
4708 * cik_cp_compute_start - start the compute queues
4709 *
4710 * @rdev: radeon_device pointer
4711 *
4712 * Enable the compute queues.
4713 * Returns 0 for success, error for failure.
4714 */
cik_cp_compute_start(struct radeon_device * rdev)4715 static int cik_cp_compute_start(struct radeon_device *rdev)
4716 {
4717 cik_cp_compute_enable(rdev, true);
4718
4719 return 0;
4720 }
4721
4722 /**
4723 * cik_cp_compute_fini - stop the compute queues
4724 *
4725 * @rdev: radeon_device pointer
4726 *
4727 * Stop the compute queues and tear down the driver queue
4728 * info.
4729 */
cik_cp_compute_fini(struct radeon_device * rdev)4730 static void cik_cp_compute_fini(struct radeon_device *rdev)
4731 {
4732 int i, idx, r;
4733
4734 cik_cp_compute_enable(rdev, false);
4735
4736 for (i = 0; i < 2; i++) {
4737 if (i == 0)
4738 idx = CAYMAN_RING_TYPE_CP1_INDEX;
4739 else
4740 idx = CAYMAN_RING_TYPE_CP2_INDEX;
4741
4742 if (rdev->ring[idx].mqd_obj) {
4743 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4744 if (unlikely(r != 0))
4745 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4746
4747 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4748 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4749
4750 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4751 rdev->ring[idx].mqd_obj = NULL;
4752 }
4753 }
4754 }
4755
cik_mec_fini(struct radeon_device * rdev)4756 static void cik_mec_fini(struct radeon_device *rdev)
4757 {
4758 int r;
4759
4760 if (rdev->mec.hpd_eop_obj) {
4761 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4762 if (unlikely(r != 0))
4763 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4764 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4765 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4766
4767 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4768 rdev->mec.hpd_eop_obj = NULL;
4769 }
4770 }
4771
4772 #define MEC_HPD_SIZE 2048
4773
cik_mec_init(struct radeon_device * rdev)4774 static int cik_mec_init(struct radeon_device *rdev)
4775 {
4776 int r;
4777 u32 *hpd;
4778
4779 /*
4780 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4781 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4782 * Nonetheless, we assign only 1 pipe because all other pipes will
4783 * be handled by KFD
4784 */
4785 rdev->mec.num_mec = 1;
4786 rdev->mec.num_pipe = 1;
4787 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4788
4789 if (rdev->mec.hpd_eop_obj == NULL) {
4790 r = radeon_bo_create(rdev,
4791 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4792 PAGE_SIZE, true,
4793 RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4794 &rdev->mec.hpd_eop_obj);
4795 if (r) {
4796 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4797 return r;
4798 }
4799 }
4800
4801 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4802 if (unlikely(r != 0)) {
4803 cik_mec_fini(rdev);
4804 return r;
4805 }
4806 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4807 &rdev->mec.hpd_eop_gpu_addr);
4808 if (r) {
4809 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4810 cik_mec_fini(rdev);
4811 return r;
4812 }
4813 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4814 if (r) {
4815 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4816 cik_mec_fini(rdev);
4817 return r;
4818 }
4819
4820 /* clear memory. Not sure if this is required or not */
4821 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4822
4823 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4824 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4825
4826 return 0;
4827 }
4828
4829 struct hqd_registers
4830 {
4831 u32 cp_mqd_base_addr;
4832 u32 cp_mqd_base_addr_hi;
4833 u32 cp_hqd_active;
4834 u32 cp_hqd_vmid;
4835 u32 cp_hqd_persistent_state;
4836 u32 cp_hqd_pipe_priority;
4837 u32 cp_hqd_queue_priority;
4838 u32 cp_hqd_quantum;
4839 u32 cp_hqd_pq_base;
4840 u32 cp_hqd_pq_base_hi;
4841 u32 cp_hqd_pq_rptr;
4842 u32 cp_hqd_pq_rptr_report_addr;
4843 u32 cp_hqd_pq_rptr_report_addr_hi;
4844 u32 cp_hqd_pq_wptr_poll_addr;
4845 u32 cp_hqd_pq_wptr_poll_addr_hi;
4846 u32 cp_hqd_pq_doorbell_control;
4847 u32 cp_hqd_pq_wptr;
4848 u32 cp_hqd_pq_control;
4849 u32 cp_hqd_ib_base_addr;
4850 u32 cp_hqd_ib_base_addr_hi;
4851 u32 cp_hqd_ib_rptr;
4852 u32 cp_hqd_ib_control;
4853 u32 cp_hqd_iq_timer;
4854 u32 cp_hqd_iq_rptr;
4855 u32 cp_hqd_dequeue_request;
4856 u32 cp_hqd_dma_offload;
4857 u32 cp_hqd_sema_cmd;
4858 u32 cp_hqd_msg_type;
4859 u32 cp_hqd_atomic0_preop_lo;
4860 u32 cp_hqd_atomic0_preop_hi;
4861 u32 cp_hqd_atomic1_preop_lo;
4862 u32 cp_hqd_atomic1_preop_hi;
4863 u32 cp_hqd_hq_scheduler0;
4864 u32 cp_hqd_hq_scheduler1;
4865 u32 cp_mqd_control;
4866 };
4867
4868 struct bonaire_mqd
4869 {
4870 u32 header;
4871 u32 dispatch_initiator;
4872 u32 dimensions[3];
4873 u32 start_idx[3];
4874 u32 num_threads[3];
4875 u32 pipeline_stat_enable;
4876 u32 perf_counter_enable;
4877 u32 pgm[2];
4878 u32 tba[2];
4879 u32 tma[2];
4880 u32 pgm_rsrc[2];
4881 u32 vmid;
4882 u32 resource_limits;
4883 u32 static_thread_mgmt01[2];
4884 u32 tmp_ring_size;
4885 u32 static_thread_mgmt23[2];
4886 u32 restart[3];
4887 u32 thread_trace_enable;
4888 u32 reserved1;
4889 u32 user_data[16];
4890 u32 vgtcs_invoke_count[2];
4891 struct hqd_registers queue_state;
4892 u32 dequeue_cntr;
4893 u32 interrupt_queue[64];
4894 };
4895
4896 /**
4897 * cik_cp_compute_resume - setup the compute queue registers
4898 *
4899 * @rdev: radeon_device pointer
4900 *
4901 * Program the compute queues and test them to make sure they
4902 * are working.
4903 * Returns 0 for success, error for failure.
4904 */
cik_cp_compute_resume(struct radeon_device * rdev)4905 static int cik_cp_compute_resume(struct radeon_device *rdev)
4906 {
4907 int r, i, j, idx;
4908 u32 tmp;
4909 bool use_doorbell = true;
4910 u64 hqd_gpu_addr;
4911 u64 mqd_gpu_addr;
4912 u64 eop_gpu_addr;
4913 u64 wb_gpu_addr;
4914 u32 *buf;
4915 struct bonaire_mqd *mqd;
4916
4917 r = cik_cp_compute_start(rdev);
4918 if (r)
4919 return r;
4920
4921 /* fix up chicken bits */
4922 tmp = RREG32(CP_CPF_DEBUG);
4923 tmp |= (1 << 23);
4924 WREG32(CP_CPF_DEBUG, tmp);
4925
4926 /* init the pipes */
4927 mutex_lock(&rdev->srbm_mutex);
4928
4929 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4930
4931 cik_srbm_select(rdev, 0, 0, 0, 0);
4932
4933 /* write the EOP addr */
4934 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4935 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4936
4937 /* set the VMID assigned */
4938 WREG32(CP_HPD_EOP_VMID, 0);
4939
4940 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4941 tmp = RREG32(CP_HPD_EOP_CONTROL);
4942 tmp &= ~EOP_SIZE_MASK;
4943 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4944 WREG32(CP_HPD_EOP_CONTROL, tmp);
4945
4946 mutex_unlock(&rdev->srbm_mutex);
4947
4948 /* init the queues. Just two for now. */
4949 for (i = 0; i < 2; i++) {
4950 if (i == 0)
4951 idx = CAYMAN_RING_TYPE_CP1_INDEX;
4952 else
4953 idx = CAYMAN_RING_TYPE_CP2_INDEX;
4954
4955 if (rdev->ring[idx].mqd_obj == NULL) {
4956 r = radeon_bo_create(rdev,
4957 sizeof(struct bonaire_mqd),
4958 PAGE_SIZE, true,
4959 RADEON_GEM_DOMAIN_GTT, 0, NULL,
4960 NULL, &rdev->ring[idx].mqd_obj);
4961 if (r) {
4962 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4963 return r;
4964 }
4965 }
4966
4967 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4968 if (unlikely(r != 0)) {
4969 cik_cp_compute_fini(rdev);
4970 return r;
4971 }
4972 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4973 &mqd_gpu_addr);
4974 if (r) {
4975 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4976 cik_cp_compute_fini(rdev);
4977 return r;
4978 }
4979 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4980 if (r) {
4981 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4982 cik_cp_compute_fini(rdev);
4983 return r;
4984 }
4985
4986 /* init the mqd struct */
4987 memset(buf, 0, sizeof(struct bonaire_mqd));
4988
4989 mqd = (struct bonaire_mqd *)buf;
4990 mqd->header = 0xC0310800;
4991 mqd->static_thread_mgmt01[0] = 0xffffffff;
4992 mqd->static_thread_mgmt01[1] = 0xffffffff;
4993 mqd->static_thread_mgmt23[0] = 0xffffffff;
4994 mqd->static_thread_mgmt23[1] = 0xffffffff;
4995
4996 mutex_lock(&rdev->srbm_mutex);
4997 cik_srbm_select(rdev, rdev->ring[idx].me,
4998 rdev->ring[idx].pipe,
4999 rdev->ring[idx].queue, 0);
5000
5001 /* disable wptr polling */
5002 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
5003 tmp &= ~WPTR_POLL_EN;
5004 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
5005
5006 /* enable doorbell? */
5007 mqd->queue_state.cp_hqd_pq_doorbell_control =
5008 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5009 if (use_doorbell)
5010 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5011 else
5012 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
5013 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5014 mqd->queue_state.cp_hqd_pq_doorbell_control);
5015
5016 /* disable the queue if it's active */
5017 mqd->queue_state.cp_hqd_dequeue_request = 0;
5018 mqd->queue_state.cp_hqd_pq_rptr = 0;
5019 mqd->queue_state.cp_hqd_pq_wptr= 0;
5020 if (RREG32(CP_HQD_ACTIVE) & 1) {
5021 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
5022 for (j = 0; j < rdev->usec_timeout; j++) {
5023 if (!(RREG32(CP_HQD_ACTIVE) & 1))
5024 break;
5025 udelay(1);
5026 }
5027 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
5028 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
5029 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5030 }
5031
5032 /* set the pointer to the MQD */
5033 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
5034 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5035 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
5036 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
5037 /* set MQD vmid to 0 */
5038 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
5039 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
5040 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
5041
5042 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5043 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
5044 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
5045 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5046 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
5047 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
5048
5049 /* set up the HQD, this is similar to CP_RB0_CNTL */
5050 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
5051 mqd->queue_state.cp_hqd_pq_control &=
5052 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
5053
5054 mqd->queue_state.cp_hqd_pq_control |=
5055 order_base_2(rdev->ring[idx].ring_size / 8);
5056 mqd->queue_state.cp_hqd_pq_control |=
5057 (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
5058 #ifdef __BIG_ENDIAN
5059 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
5060 #endif
5061 mqd->queue_state.cp_hqd_pq_control &=
5062 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
5063 mqd->queue_state.cp_hqd_pq_control |=
5064 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
5065 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
5066
5067 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
5068 if (i == 0)
5069 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
5070 else
5071 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
5072 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
5073 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5074 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
5075 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
5076 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
5077
5078 /* set the wb address wether it's enabled or not */
5079 if (i == 0)
5080 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
5081 else
5082 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
5083 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5084 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5085 upper_32_bits(wb_gpu_addr) & 0xffff;
5086 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5087 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5088 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5089 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5090
5091 /* enable the doorbell if requested */
5092 if (use_doorbell) {
5093 mqd->queue_state.cp_hqd_pq_doorbell_control =
5094 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5095 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5096 mqd->queue_state.cp_hqd_pq_doorbell_control |=
5097 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5098 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5099 mqd->queue_state.cp_hqd_pq_doorbell_control &=
5100 ~(DOORBELL_SOURCE | DOORBELL_HIT);
5101
5102 } else {
5103 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5104 }
5105 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5106 mqd->queue_state.cp_hqd_pq_doorbell_control);
5107
5108 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5109 rdev->ring[idx].wptr = 0;
5110 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5111 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5112 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5113
5114 /* set the vmid for the queue */
5115 mqd->queue_state.cp_hqd_vmid = 0;
5116 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5117
5118 /* activate the queue */
5119 mqd->queue_state.cp_hqd_active = 1;
5120 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5121
5122 cik_srbm_select(rdev, 0, 0, 0, 0);
5123 mutex_unlock(&rdev->srbm_mutex);
5124
5125 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5126 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5127
5128 rdev->ring[idx].ready = true;
5129 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5130 if (r)
5131 rdev->ring[idx].ready = false;
5132 }
5133
5134 return 0;
5135 }
5136
cik_cp_enable(struct radeon_device * rdev,bool enable)5137 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5138 {
5139 cik_cp_gfx_enable(rdev, enable);
5140 cik_cp_compute_enable(rdev, enable);
5141 }
5142
cik_cp_load_microcode(struct radeon_device * rdev)5143 static int cik_cp_load_microcode(struct radeon_device *rdev)
5144 {
5145 int r;
5146
5147 r = cik_cp_gfx_load_microcode(rdev);
5148 if (r)
5149 return r;
5150 r = cik_cp_compute_load_microcode(rdev);
5151 if (r)
5152 return r;
5153
5154 return 0;
5155 }
5156
cik_cp_fini(struct radeon_device * rdev)5157 static void cik_cp_fini(struct radeon_device *rdev)
5158 {
5159 cik_cp_gfx_fini(rdev);
5160 cik_cp_compute_fini(rdev);
5161 }
5162
cik_cp_resume(struct radeon_device * rdev)5163 static int cik_cp_resume(struct radeon_device *rdev)
5164 {
5165 int r;
5166
5167 cik_enable_gui_idle_interrupt(rdev, false);
5168
5169 r = cik_cp_load_microcode(rdev);
5170 if (r)
5171 return r;
5172
5173 r = cik_cp_gfx_resume(rdev);
5174 if (r)
5175 return r;
5176 r = cik_cp_compute_resume(rdev);
5177 if (r)
5178 return r;
5179
5180 cik_enable_gui_idle_interrupt(rdev, true);
5181
5182 return 0;
5183 }
5184
cik_print_gpu_status_regs(struct radeon_device * rdev)5185 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5186 {
5187 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
5188 RREG32(GRBM_STATUS));
5189 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
5190 RREG32(GRBM_STATUS2));
5191 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
5192 RREG32(GRBM_STATUS_SE0));
5193 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
5194 RREG32(GRBM_STATUS_SE1));
5195 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
5196 RREG32(GRBM_STATUS_SE2));
5197 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
5198 RREG32(GRBM_STATUS_SE3));
5199 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
5200 RREG32(SRBM_STATUS));
5201 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
5202 RREG32(SRBM_STATUS2));
5203 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
5204 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5205 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
5206 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5207 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5208 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
5209 RREG32(CP_STALLED_STAT1));
5210 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
5211 RREG32(CP_STALLED_STAT2));
5212 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
5213 RREG32(CP_STALLED_STAT3));
5214 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
5215 RREG32(CP_CPF_BUSY_STAT));
5216 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
5217 RREG32(CP_CPF_STALLED_STAT1));
5218 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5219 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5220 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
5221 RREG32(CP_CPC_STALLED_STAT1));
5222 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5223 }
5224
5225 /**
5226 * cik_gpu_check_soft_reset - check which blocks are busy
5227 *
5228 * @rdev: radeon_device pointer
5229 *
5230 * Check which blocks are busy and return the relevant reset
5231 * mask to be used by cik_gpu_soft_reset().
5232 * Returns a mask of the blocks to be reset.
5233 */
cik_gpu_check_soft_reset(struct radeon_device * rdev)5234 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5235 {
5236 u32 reset_mask = 0;
5237 u32 tmp;
5238
5239 /* GRBM_STATUS */
5240 tmp = RREG32(GRBM_STATUS);
5241 if (tmp & (PA_BUSY | SC_BUSY |
5242 BCI_BUSY | SX_BUSY |
5243 TA_BUSY | VGT_BUSY |
5244 DB_BUSY | CB_BUSY |
5245 GDS_BUSY | SPI_BUSY |
5246 IA_BUSY | IA_BUSY_NO_DMA))
5247 reset_mask |= RADEON_RESET_GFX;
5248
5249 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5250 reset_mask |= RADEON_RESET_CP;
5251
5252 /* GRBM_STATUS2 */
5253 tmp = RREG32(GRBM_STATUS2);
5254 if (tmp & RLC_BUSY)
5255 reset_mask |= RADEON_RESET_RLC;
5256
5257 /* SDMA0_STATUS_REG */
5258 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5259 if (!(tmp & SDMA_IDLE))
5260 reset_mask |= RADEON_RESET_DMA;
5261
5262 /* SDMA1_STATUS_REG */
5263 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5264 if (!(tmp & SDMA_IDLE))
5265 reset_mask |= RADEON_RESET_DMA1;
5266
5267 /* SRBM_STATUS2 */
5268 tmp = RREG32(SRBM_STATUS2);
5269 if (tmp & SDMA_BUSY)
5270 reset_mask |= RADEON_RESET_DMA;
5271
5272 if (tmp & SDMA1_BUSY)
5273 reset_mask |= RADEON_RESET_DMA1;
5274
5275 /* SRBM_STATUS */
5276 tmp = RREG32(SRBM_STATUS);
5277
5278 if (tmp & IH_BUSY)
5279 reset_mask |= RADEON_RESET_IH;
5280
5281 if (tmp & SEM_BUSY)
5282 reset_mask |= RADEON_RESET_SEM;
5283
5284 if (tmp & GRBM_RQ_PENDING)
5285 reset_mask |= RADEON_RESET_GRBM;
5286
5287 if (tmp & VMC_BUSY)
5288 reset_mask |= RADEON_RESET_VMC;
5289
5290 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5291 MCC_BUSY | MCD_BUSY))
5292 reset_mask |= RADEON_RESET_MC;
5293
5294 if (evergreen_is_display_hung(rdev))
5295 reset_mask |= RADEON_RESET_DISPLAY;
5296
5297 /* Skip MC reset as it's mostly likely not hung, just busy */
5298 if (reset_mask & RADEON_RESET_MC) {
5299 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5300 reset_mask &= ~RADEON_RESET_MC;
5301 }
5302
5303 return reset_mask;
5304 }
5305
5306 /**
5307 * cik_gpu_soft_reset - soft reset GPU
5308 *
5309 * @rdev: radeon_device pointer
5310 * @reset_mask: mask of which blocks to reset
5311 *
5312 * Soft reset the blocks specified in @reset_mask.
5313 */
cik_gpu_soft_reset(struct radeon_device * rdev,u32 reset_mask)5314 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5315 {
5316 struct evergreen_mc_save save;
5317 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5318 u32 tmp;
5319
5320 if (reset_mask == 0)
5321 return;
5322
5323 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5324
5325 cik_print_gpu_status_regs(rdev);
5326 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
5327 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5328 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5329 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5330
5331 /* disable CG/PG */
5332 cik_fini_pg(rdev);
5333 cik_fini_cg(rdev);
5334
5335 /* stop the rlc */
5336 cik_rlc_stop(rdev);
5337
5338 /* Disable GFX parsing/prefetching */
5339 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5340
5341 /* Disable MEC parsing/prefetching */
5342 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5343
5344 if (reset_mask & RADEON_RESET_DMA) {
5345 /* sdma0 */
5346 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5347 tmp |= SDMA_HALT;
5348 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5349 }
5350 if (reset_mask & RADEON_RESET_DMA1) {
5351 /* sdma1 */
5352 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5353 tmp |= SDMA_HALT;
5354 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5355 }
5356
5357 evergreen_mc_stop(rdev, &save);
5358 if (evergreen_mc_wait_for_idle(rdev)) {
5359 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5360 }
5361
5362 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5363 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5364
5365 if (reset_mask & RADEON_RESET_CP) {
5366 grbm_soft_reset |= SOFT_RESET_CP;
5367
5368 srbm_soft_reset |= SOFT_RESET_GRBM;
5369 }
5370
5371 if (reset_mask & RADEON_RESET_DMA)
5372 srbm_soft_reset |= SOFT_RESET_SDMA;
5373
5374 if (reset_mask & RADEON_RESET_DMA1)
5375 srbm_soft_reset |= SOFT_RESET_SDMA1;
5376
5377 if (reset_mask & RADEON_RESET_DISPLAY)
5378 srbm_soft_reset |= SOFT_RESET_DC;
5379
5380 if (reset_mask & RADEON_RESET_RLC)
5381 grbm_soft_reset |= SOFT_RESET_RLC;
5382
5383 if (reset_mask & RADEON_RESET_SEM)
5384 srbm_soft_reset |= SOFT_RESET_SEM;
5385
5386 if (reset_mask & RADEON_RESET_IH)
5387 srbm_soft_reset |= SOFT_RESET_IH;
5388
5389 if (reset_mask & RADEON_RESET_GRBM)
5390 srbm_soft_reset |= SOFT_RESET_GRBM;
5391
5392 if (reset_mask & RADEON_RESET_VMC)
5393 srbm_soft_reset |= SOFT_RESET_VMC;
5394
5395 if (!(rdev->flags & RADEON_IS_IGP)) {
5396 if (reset_mask & RADEON_RESET_MC)
5397 srbm_soft_reset |= SOFT_RESET_MC;
5398 }
5399
5400 if (grbm_soft_reset) {
5401 tmp = RREG32(GRBM_SOFT_RESET);
5402 tmp |= grbm_soft_reset;
5403 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5404 WREG32(GRBM_SOFT_RESET, tmp);
5405 tmp = RREG32(GRBM_SOFT_RESET);
5406
5407 udelay(50);
5408
5409 tmp &= ~grbm_soft_reset;
5410 WREG32(GRBM_SOFT_RESET, tmp);
5411 tmp = RREG32(GRBM_SOFT_RESET);
5412 }
5413
5414 if (srbm_soft_reset) {
5415 tmp = RREG32(SRBM_SOFT_RESET);
5416 tmp |= srbm_soft_reset;
5417 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5418 WREG32(SRBM_SOFT_RESET, tmp);
5419 tmp = RREG32(SRBM_SOFT_RESET);
5420
5421 udelay(50);
5422
5423 tmp &= ~srbm_soft_reset;
5424 WREG32(SRBM_SOFT_RESET, tmp);
5425 tmp = RREG32(SRBM_SOFT_RESET);
5426 }
5427
5428 /* Wait a little for things to settle down */
5429 udelay(50);
5430
5431 evergreen_mc_resume(rdev, &save);
5432 udelay(50);
5433
5434 cik_print_gpu_status_regs(rdev);
5435 }
5436
5437 struct kv_reset_save_regs {
5438 u32 gmcon_reng_execute;
5439 u32 gmcon_misc;
5440 u32 gmcon_misc3;
5441 };
5442
kv_save_regs_for_reset(struct radeon_device * rdev,struct kv_reset_save_regs * save)5443 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5444 struct kv_reset_save_regs *save)
5445 {
5446 save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5447 save->gmcon_misc = RREG32(GMCON_MISC);
5448 save->gmcon_misc3 = RREG32(GMCON_MISC3);
5449
5450 WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5451 WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5452 STCTRL_STUTTER_EN));
5453 }
5454
kv_restore_regs_for_reset(struct radeon_device * rdev,struct kv_reset_save_regs * save)5455 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5456 struct kv_reset_save_regs *save)
5457 {
5458 int i;
5459
5460 WREG32(GMCON_PGFSM_WRITE, 0);
5461 WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5462
5463 for (i = 0; i < 5; i++)
5464 WREG32(GMCON_PGFSM_WRITE, 0);
5465
5466 WREG32(GMCON_PGFSM_WRITE, 0);
5467 WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5468
5469 for (i = 0; i < 5; i++)
5470 WREG32(GMCON_PGFSM_WRITE, 0);
5471
5472 WREG32(GMCON_PGFSM_WRITE, 0x210000);
5473 WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5474
5475 for (i = 0; i < 5; i++)
5476 WREG32(GMCON_PGFSM_WRITE, 0);
5477
5478 WREG32(GMCON_PGFSM_WRITE, 0x21003);
5479 WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5480
5481 for (i = 0; i < 5; i++)
5482 WREG32(GMCON_PGFSM_WRITE, 0);
5483
5484 WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5485 WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5486
5487 for (i = 0; i < 5; i++)
5488 WREG32(GMCON_PGFSM_WRITE, 0);
5489
5490 WREG32(GMCON_PGFSM_WRITE, 0);
5491 WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5492
5493 for (i = 0; i < 5; i++)
5494 WREG32(GMCON_PGFSM_WRITE, 0);
5495
5496 WREG32(GMCON_PGFSM_WRITE, 0x420000);
5497 WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5498
5499 for (i = 0; i < 5; i++)
5500 WREG32(GMCON_PGFSM_WRITE, 0);
5501
5502 WREG32(GMCON_PGFSM_WRITE, 0x120202);
5503 WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5504
5505 for (i = 0; i < 5; i++)
5506 WREG32(GMCON_PGFSM_WRITE, 0);
5507
5508 WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5509 WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5510
5511 for (i = 0; i < 5; i++)
5512 WREG32(GMCON_PGFSM_WRITE, 0);
5513
5514 WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5515 WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5516
5517 for (i = 0; i < 5; i++)
5518 WREG32(GMCON_PGFSM_WRITE, 0);
5519
5520 WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5521 WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5522
5523 WREG32(GMCON_MISC3, save->gmcon_misc3);
5524 WREG32(GMCON_MISC, save->gmcon_misc);
5525 WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5526 }
5527
cik_gpu_pci_config_reset(struct radeon_device * rdev)5528 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5529 {
5530 struct evergreen_mc_save save;
5531 struct kv_reset_save_regs kv_save = { 0 };
5532 u32 tmp, i;
5533
5534 dev_info(rdev->dev, "GPU pci config reset\n");
5535
5536 /* disable dpm? */
5537
5538 /* disable cg/pg */
5539 cik_fini_pg(rdev);
5540 cik_fini_cg(rdev);
5541
5542 /* Disable GFX parsing/prefetching */
5543 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5544
5545 /* Disable MEC parsing/prefetching */
5546 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5547
5548 /* sdma0 */
5549 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5550 tmp |= SDMA_HALT;
5551 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5552 /* sdma1 */
5553 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5554 tmp |= SDMA_HALT;
5555 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5556 /* XXX other engines? */
5557
5558 /* halt the rlc, disable cp internal ints */
5559 cik_rlc_stop(rdev);
5560
5561 udelay(50);
5562
5563 /* disable mem access */
5564 evergreen_mc_stop(rdev, &save);
5565 if (evergreen_mc_wait_for_idle(rdev)) {
5566 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5567 }
5568
5569 if (rdev->flags & RADEON_IS_IGP)
5570 kv_save_regs_for_reset(rdev, &kv_save);
5571
5572 /* disable BM */
5573 pci_clear_master(rdev->pdev);
5574 /* reset */
5575 radeon_pci_config_reset(rdev);
5576
5577 udelay(100);
5578
5579 /* wait for asic to come out of reset */
5580 for (i = 0; i < rdev->usec_timeout; i++) {
5581 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5582 break;
5583 udelay(1);
5584 }
5585
5586 /* does asic init need to be run first??? */
5587 if (rdev->flags & RADEON_IS_IGP)
5588 kv_restore_regs_for_reset(rdev, &kv_save);
5589 }
5590
5591 /**
5592 * cik_asic_reset - soft reset GPU
5593 *
5594 * @rdev: radeon_device pointer
5595 *
5596 * Look up which blocks are hung and attempt
5597 * to reset them.
5598 * Returns 0 for success.
5599 */
cik_asic_reset(struct radeon_device * rdev)5600 int cik_asic_reset(struct radeon_device *rdev)
5601 {
5602 u32 reset_mask;
5603
5604 reset_mask = cik_gpu_check_soft_reset(rdev);
5605
5606 if (reset_mask)
5607 r600_set_bios_scratch_engine_hung(rdev, true);
5608
5609 /* try soft reset */
5610 cik_gpu_soft_reset(rdev, reset_mask);
5611
5612 reset_mask = cik_gpu_check_soft_reset(rdev);
5613
5614 /* try pci config reset */
5615 if (reset_mask && radeon_hard_reset)
5616 cik_gpu_pci_config_reset(rdev);
5617
5618 reset_mask = cik_gpu_check_soft_reset(rdev);
5619
5620 if (!reset_mask)
5621 r600_set_bios_scratch_engine_hung(rdev, false);
5622
5623 return 0;
5624 }
5625
5626 /**
5627 * cik_gfx_is_lockup - check if the 3D engine is locked up
5628 *
5629 * @rdev: radeon_device pointer
5630 * @ring: radeon_ring structure holding ring information
5631 *
5632 * Check if the 3D engine is locked up (CIK).
5633 * Returns true if the engine is locked, false if not.
5634 */
cik_gfx_is_lockup(struct radeon_device * rdev,struct radeon_ring * ring)5635 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5636 {
5637 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5638
5639 if (!(reset_mask & (RADEON_RESET_GFX |
5640 RADEON_RESET_COMPUTE |
5641 RADEON_RESET_CP))) {
5642 radeon_ring_lockup_update(rdev, ring);
5643 return false;
5644 }
5645 return radeon_ring_test_lockup(rdev, ring);
5646 }
5647
5648 /* MC */
5649 /**
5650 * cik_mc_program - program the GPU memory controller
5651 *
5652 * @rdev: radeon_device pointer
5653 *
5654 * Set the location of vram, gart, and AGP in the GPU's
5655 * physical address space (CIK).
5656 */
cik_mc_program(struct radeon_device * rdev)5657 static void cik_mc_program(struct radeon_device *rdev)
5658 {
5659 struct evergreen_mc_save save;
5660 u32 tmp;
5661 int i, j;
5662
5663 /* Initialize HDP */
5664 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5665 WREG32((0x2c14 + j), 0x00000000);
5666 WREG32((0x2c18 + j), 0x00000000);
5667 WREG32((0x2c1c + j), 0x00000000);
5668 WREG32((0x2c20 + j), 0x00000000);
5669 WREG32((0x2c24 + j), 0x00000000);
5670 }
5671 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5672
5673 evergreen_mc_stop(rdev, &save);
5674 if (radeon_mc_wait_for_idle(rdev)) {
5675 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5676 }
5677 /* Lockout access through VGA aperture*/
5678 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5679 /* Update configuration */
5680 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5681 rdev->mc.vram_start >> 12);
5682 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5683 rdev->mc.vram_end >> 12);
5684 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5685 rdev->vram_scratch.gpu_addr >> 12);
5686 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5687 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5688 WREG32(MC_VM_FB_LOCATION, tmp);
5689 /* XXX double check these! */
5690 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5691 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5692 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5693 WREG32(MC_VM_AGP_BASE, 0);
5694 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5695 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5696 if (radeon_mc_wait_for_idle(rdev)) {
5697 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5698 }
5699 evergreen_mc_resume(rdev, &save);
5700 /* we need to own VRAM, so turn off the VGA renderer here
5701 * to stop it overwriting our objects */
5702 rv515_vga_render_disable(rdev);
5703 }
5704
5705 /**
5706 * cik_mc_init - initialize the memory controller driver params
5707 *
5708 * @rdev: radeon_device pointer
5709 *
5710 * Look up the amount of vram, vram width, and decide how to place
5711 * vram and gart within the GPU's physical address space (CIK).
5712 * Returns 0 for success.
5713 */
cik_mc_init(struct radeon_device * rdev)5714 static int cik_mc_init(struct radeon_device *rdev)
5715 {
5716 u32 tmp;
5717 int chansize, numchan;
5718
5719 /* Get VRAM informations */
5720 rdev->mc.vram_is_ddr = true;
5721 tmp = RREG32(MC_ARB_RAMCFG);
5722 if (tmp & CHANSIZE_MASK) {
5723 chansize = 64;
5724 } else {
5725 chansize = 32;
5726 }
5727 tmp = RREG32(MC_SHARED_CHMAP);
5728 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5729 case 0:
5730 default:
5731 numchan = 1;
5732 break;
5733 case 1:
5734 numchan = 2;
5735 break;
5736 case 2:
5737 numchan = 4;
5738 break;
5739 case 3:
5740 numchan = 8;
5741 break;
5742 case 4:
5743 numchan = 3;
5744 break;
5745 case 5:
5746 numchan = 6;
5747 break;
5748 case 6:
5749 numchan = 10;
5750 break;
5751 case 7:
5752 numchan = 12;
5753 break;
5754 case 8:
5755 numchan = 16;
5756 break;
5757 }
5758 rdev->mc.vram_width = numchan * chansize;
5759 /* Could aper size report 0 ? */
5760 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5761 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5762 /* size in MB on si */
5763 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5764 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5765 rdev->mc.visible_vram_size = rdev->mc.aper_size;
5766 si_vram_gtt_location(rdev, &rdev->mc);
5767 radeon_update_bandwidth_info(rdev);
5768
5769 return 0;
5770 }
5771
5772 /*
5773 * GART
5774 * VMID 0 is the physical GPU addresses as used by the kernel.
5775 * VMIDs 1-15 are used for userspace clients and are handled
5776 * by the radeon vm/hsa code.
5777 */
5778 /**
5779 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5780 *
5781 * @rdev: radeon_device pointer
5782 *
5783 * Flush the TLB for the VMID 0 page table (CIK).
5784 */
cik_pcie_gart_tlb_flush(struct radeon_device * rdev)5785 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5786 {
5787 /* flush hdp cache */
5788 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5789
5790 /* bits 0-15 are the VM contexts0-15 */
5791 WREG32(VM_INVALIDATE_REQUEST, 0x1);
5792 }
5793
cik_pcie_init_compute_vmid(struct radeon_device * rdev)5794 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5795 {
5796 int i;
5797 uint32_t sh_mem_bases, sh_mem_config;
5798
5799 sh_mem_bases = 0x6000 | 0x6000 << 16;
5800 sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5801 sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5802
5803 mutex_lock(&rdev->srbm_mutex);
5804 for (i = 8; i < 16; i++) {
5805 cik_srbm_select(rdev, 0, 0, 0, i);
5806 /* CP and shaders */
5807 WREG32(SH_MEM_CONFIG, sh_mem_config);
5808 WREG32(SH_MEM_APE1_BASE, 1);
5809 WREG32(SH_MEM_APE1_LIMIT, 0);
5810 WREG32(SH_MEM_BASES, sh_mem_bases);
5811 }
5812 cik_srbm_select(rdev, 0, 0, 0, 0);
5813 mutex_unlock(&rdev->srbm_mutex);
5814 }
5815
5816 /**
5817 * cik_pcie_gart_enable - gart enable
5818 *
5819 * @rdev: radeon_device pointer
5820 *
5821 * This sets up the TLBs, programs the page tables for VMID0,
5822 * sets up the hw for VMIDs 1-15 which are allocated on
5823 * demand, and sets up the global locations for the LDS, GDS,
5824 * and GPUVM for FSA64 clients (CIK).
5825 * Returns 0 for success, errors for failure.
5826 */
cik_pcie_gart_enable(struct radeon_device * rdev)5827 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5828 {
5829 int r, i;
5830
5831 if (rdev->gart.robj == NULL) {
5832 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5833 return -EINVAL;
5834 }
5835 r = radeon_gart_table_vram_pin(rdev);
5836 if (r)
5837 return r;
5838 /* Setup TLB control */
5839 WREG32(MC_VM_MX_L1_TLB_CNTL,
5840 (0xA << 7) |
5841 ENABLE_L1_TLB |
5842 ENABLE_L1_FRAGMENT_PROCESSING |
5843 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5844 ENABLE_ADVANCED_DRIVER_MODEL |
5845 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5846 /* Setup L2 cache */
5847 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5848 ENABLE_L2_FRAGMENT_PROCESSING |
5849 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5850 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5851 EFFECTIVE_L2_QUEUE_SIZE(7) |
5852 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5853 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5854 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5855 BANK_SELECT(4) |
5856 L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5857 /* setup context0 */
5858 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5859 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5860 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5861 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5862 (u32)(rdev->dummy_page.addr >> 12));
5863 WREG32(VM_CONTEXT0_CNTL2, 0);
5864 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5865 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5866
5867 WREG32(0x15D4, 0);
5868 WREG32(0x15D8, 0);
5869 WREG32(0x15DC, 0);
5870
5871 /* restore context1-15 */
5872 /* set vm size, must be a multiple of 4 */
5873 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5874 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5875 for (i = 1; i < 16; i++) {
5876 if (i < 8)
5877 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5878 rdev->vm_manager.saved_table_addr[i]);
5879 else
5880 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5881 rdev->vm_manager.saved_table_addr[i]);
5882 }
5883
5884 /* enable context1-15 */
5885 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5886 (u32)(rdev->dummy_page.addr >> 12));
5887 WREG32(VM_CONTEXT1_CNTL2, 4);
5888 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5889 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5890 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5891 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5892 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5893 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5894 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5895 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5896 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5897 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5898 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5899 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5900 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5901 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5902
5903 if (rdev->family == CHIP_KAVERI) {
5904 u32 tmp = RREG32(CHUB_CONTROL);
5905 tmp &= ~BYPASS_VM;
5906 WREG32(CHUB_CONTROL, tmp);
5907 }
5908
5909 /* XXX SH_MEM regs */
5910 /* where to put LDS, scratch, GPUVM in FSA64 space */
5911 mutex_lock(&rdev->srbm_mutex);
5912 for (i = 0; i < 16; i++) {
5913 cik_srbm_select(rdev, 0, 0, 0, i);
5914 /* CP and shaders */
5915 WREG32(SH_MEM_CONFIG, 0);
5916 WREG32(SH_MEM_APE1_BASE, 1);
5917 WREG32(SH_MEM_APE1_LIMIT, 0);
5918 WREG32(SH_MEM_BASES, 0);
5919 /* SDMA GFX */
5920 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5921 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5922 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5923 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5924 /* XXX SDMA RLC - todo */
5925 }
5926 cik_srbm_select(rdev, 0, 0, 0, 0);
5927 mutex_unlock(&rdev->srbm_mutex);
5928
5929 cik_pcie_init_compute_vmid(rdev);
5930
5931 cik_pcie_gart_tlb_flush(rdev);
5932 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5933 (unsigned)(rdev->mc.gtt_size >> 20),
5934 (unsigned long long)rdev->gart.table_addr);
5935 rdev->gart.ready = true;
5936 return 0;
5937 }
5938
5939 /**
5940 * cik_pcie_gart_disable - gart disable
5941 *
5942 * @rdev: radeon_device pointer
5943 *
5944 * This disables all VM page table (CIK).
5945 */
cik_pcie_gart_disable(struct radeon_device * rdev)5946 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5947 {
5948 unsigned i;
5949
5950 for (i = 1; i < 16; ++i) {
5951 uint32_t reg;
5952 if (i < 8)
5953 reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5954 else
5955 reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5956 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5957 }
5958
5959 /* Disable all tables */
5960 WREG32(VM_CONTEXT0_CNTL, 0);
5961 WREG32(VM_CONTEXT1_CNTL, 0);
5962 /* Setup TLB control */
5963 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5964 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5965 /* Setup L2 cache */
5966 WREG32(VM_L2_CNTL,
5967 ENABLE_L2_FRAGMENT_PROCESSING |
5968 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5969 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5970 EFFECTIVE_L2_QUEUE_SIZE(7) |
5971 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5972 WREG32(VM_L2_CNTL2, 0);
5973 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5974 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5975 radeon_gart_table_vram_unpin(rdev);
5976 }
5977
5978 /**
5979 * cik_pcie_gart_fini - vm fini callback
5980 *
5981 * @rdev: radeon_device pointer
5982 *
5983 * Tears down the driver GART/VM setup (CIK).
5984 */
cik_pcie_gart_fini(struct radeon_device * rdev)5985 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5986 {
5987 cik_pcie_gart_disable(rdev);
5988 radeon_gart_table_vram_free(rdev);
5989 radeon_gart_fini(rdev);
5990 }
5991
5992 /* vm parser */
5993 /**
5994 * cik_ib_parse - vm ib_parse callback
5995 *
5996 * @rdev: radeon_device pointer
5997 * @ib: indirect buffer pointer
5998 *
5999 * CIK uses hw IB checking so this is a nop (CIK).
6000 */
cik_ib_parse(struct radeon_device * rdev,struct radeon_ib * ib)6001 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
6002 {
6003 return 0;
6004 }
6005
6006 /*
6007 * vm
6008 * VMID 0 is the physical GPU addresses as used by the kernel.
6009 * VMIDs 1-15 are used for userspace clients and are handled
6010 * by the radeon vm/hsa code.
6011 */
6012 /**
6013 * cik_vm_init - cik vm init callback
6014 *
6015 * @rdev: radeon_device pointer
6016 *
6017 * Inits cik specific vm parameters (number of VMs, base of vram for
6018 * VMIDs 1-15) (CIK).
6019 * Returns 0 for success.
6020 */
cik_vm_init(struct radeon_device * rdev)6021 int cik_vm_init(struct radeon_device *rdev)
6022 {
6023 /*
6024 * number of VMs
6025 * VMID 0 is reserved for System
6026 * radeon graphics/compute will use VMIDs 1-7
6027 * amdkfd will use VMIDs 8-15
6028 */
6029 rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
6030 /* base offset of vram pages */
6031 if (rdev->flags & RADEON_IS_IGP) {
6032 u64 tmp = RREG32(MC_VM_FB_OFFSET);
6033 tmp <<= 22;
6034 rdev->vm_manager.vram_base_offset = tmp;
6035 } else
6036 rdev->vm_manager.vram_base_offset = 0;
6037
6038 return 0;
6039 }
6040
6041 /**
6042 * cik_vm_fini - cik vm fini callback
6043 *
6044 * @rdev: radeon_device pointer
6045 *
6046 * Tear down any asic specific VM setup (CIK).
6047 */
cik_vm_fini(struct radeon_device * rdev)6048 void cik_vm_fini(struct radeon_device *rdev)
6049 {
6050 }
6051
6052 /**
6053 * cik_vm_decode_fault - print human readable fault info
6054 *
6055 * @rdev: radeon_device pointer
6056 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
6057 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
6058 *
6059 * Print human readable fault information (CIK).
6060 */
cik_vm_decode_fault(struct radeon_device * rdev,u32 status,u32 addr,u32 mc_client)6061 static void cik_vm_decode_fault(struct radeon_device *rdev,
6062 u32 status, u32 addr, u32 mc_client)
6063 {
6064 u32 mc_id;
6065 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
6066 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
6067 char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
6068 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
6069
6070 if (rdev->family == CHIP_HAWAII)
6071 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6072 else
6073 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6074
6075 printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
6076 protections, vmid, addr,
6077 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
6078 block, mc_client, mc_id);
6079 }
6080
6081 /**
6082 * cik_vm_flush - cik vm flush using the CP
6083 *
6084 * @rdev: radeon_device pointer
6085 *
6086 * Update the page table base and flush the VM TLB
6087 * using the CP (CIK).
6088 */
cik_vm_flush(struct radeon_device * rdev,struct radeon_ring * ring,unsigned vm_id,uint64_t pd_addr)6089 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
6090 unsigned vm_id, uint64_t pd_addr)
6091 {
6092 int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
6093
6094 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6095 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6096 WRITE_DATA_DST_SEL(0)));
6097 if (vm_id < 8) {
6098 radeon_ring_write(ring,
6099 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
6100 } else {
6101 radeon_ring_write(ring,
6102 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
6103 }
6104 radeon_ring_write(ring, 0);
6105 radeon_ring_write(ring, pd_addr >> 12);
6106
6107 /* update SH_MEM_* regs */
6108 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6109 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6110 WRITE_DATA_DST_SEL(0)));
6111 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6112 radeon_ring_write(ring, 0);
6113 radeon_ring_write(ring, VMID(vm_id));
6114
6115 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6116 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6117 WRITE_DATA_DST_SEL(0)));
6118 radeon_ring_write(ring, SH_MEM_BASES >> 2);
6119 radeon_ring_write(ring, 0);
6120
6121 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6122 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6123 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6124 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6125
6126 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6127 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6128 WRITE_DATA_DST_SEL(0)));
6129 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6130 radeon_ring_write(ring, 0);
6131 radeon_ring_write(ring, VMID(0));
6132
6133 /* HDP flush */
6134 cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6135
6136 /* bits 0-15 are the VM contexts0-15 */
6137 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6138 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6139 WRITE_DATA_DST_SEL(0)));
6140 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6141 radeon_ring_write(ring, 0);
6142 radeon_ring_write(ring, 1 << vm_id);
6143
6144 /* wait for the invalidate to complete */
6145 radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6146 radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6147 WAIT_REG_MEM_FUNCTION(0) | /* always */
6148 WAIT_REG_MEM_ENGINE(0))); /* me */
6149 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6150 radeon_ring_write(ring, 0);
6151 radeon_ring_write(ring, 0); /* ref */
6152 radeon_ring_write(ring, 0); /* mask */
6153 radeon_ring_write(ring, 0x20); /* poll interval */
6154
6155 /* compute doesn't have PFP */
6156 if (usepfp) {
6157 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6158 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6159 radeon_ring_write(ring, 0x0);
6160 }
6161 }
6162
6163 /*
6164 * RLC
6165 * The RLC is a multi-purpose microengine that handles a
6166 * variety of functions, the most important of which is
6167 * the interrupt controller.
6168 */
cik_enable_gui_idle_interrupt(struct radeon_device * rdev,bool enable)6169 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6170 bool enable)
6171 {
6172 u32 tmp = RREG32(CP_INT_CNTL_RING0);
6173
6174 if (enable)
6175 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6176 else
6177 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6178 WREG32(CP_INT_CNTL_RING0, tmp);
6179 }
6180
cik_enable_lbpw(struct radeon_device * rdev,bool enable)6181 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6182 {
6183 u32 tmp;
6184
6185 tmp = RREG32(RLC_LB_CNTL);
6186 if (enable)
6187 tmp |= LOAD_BALANCE_ENABLE;
6188 else
6189 tmp &= ~LOAD_BALANCE_ENABLE;
6190 WREG32(RLC_LB_CNTL, tmp);
6191 }
6192
cik_wait_for_rlc_serdes(struct radeon_device * rdev)6193 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6194 {
6195 u32 i, j, k;
6196 u32 mask;
6197
6198 mutex_lock(&rdev->grbm_idx_mutex);
6199 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6200 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6201 cik_select_se_sh(rdev, i, j);
6202 for (k = 0; k < rdev->usec_timeout; k++) {
6203 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6204 break;
6205 udelay(1);
6206 }
6207 }
6208 }
6209 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6210 mutex_unlock(&rdev->grbm_idx_mutex);
6211
6212 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6213 for (k = 0; k < rdev->usec_timeout; k++) {
6214 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6215 break;
6216 udelay(1);
6217 }
6218 }
6219
cik_update_rlc(struct radeon_device * rdev,u32 rlc)6220 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6221 {
6222 u32 tmp;
6223
6224 tmp = RREG32(RLC_CNTL);
6225 if (tmp != rlc)
6226 WREG32(RLC_CNTL, rlc);
6227 }
6228
cik_halt_rlc(struct radeon_device * rdev)6229 static u32 cik_halt_rlc(struct radeon_device *rdev)
6230 {
6231 u32 data, orig;
6232
6233 orig = data = RREG32(RLC_CNTL);
6234
6235 if (data & RLC_ENABLE) {
6236 u32 i;
6237
6238 data &= ~RLC_ENABLE;
6239 WREG32(RLC_CNTL, data);
6240
6241 for (i = 0; i < rdev->usec_timeout; i++) {
6242 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6243 break;
6244 udelay(1);
6245 }
6246
6247 cik_wait_for_rlc_serdes(rdev);
6248 }
6249
6250 return orig;
6251 }
6252
cik_enter_rlc_safe_mode(struct radeon_device * rdev)6253 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6254 {
6255 u32 tmp, i, mask;
6256
6257 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6258 WREG32(RLC_GPR_REG2, tmp);
6259
6260 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6261 for (i = 0; i < rdev->usec_timeout; i++) {
6262 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6263 break;
6264 udelay(1);
6265 }
6266
6267 for (i = 0; i < rdev->usec_timeout; i++) {
6268 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6269 break;
6270 udelay(1);
6271 }
6272 }
6273
cik_exit_rlc_safe_mode(struct radeon_device * rdev)6274 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6275 {
6276 u32 tmp;
6277
6278 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6279 WREG32(RLC_GPR_REG2, tmp);
6280 }
6281
6282 /**
6283 * cik_rlc_stop - stop the RLC ME
6284 *
6285 * @rdev: radeon_device pointer
6286 *
6287 * Halt the RLC ME (MicroEngine) (CIK).
6288 */
cik_rlc_stop(struct radeon_device * rdev)6289 static void cik_rlc_stop(struct radeon_device *rdev)
6290 {
6291 WREG32(RLC_CNTL, 0);
6292
6293 cik_enable_gui_idle_interrupt(rdev, false);
6294
6295 cik_wait_for_rlc_serdes(rdev);
6296 }
6297
6298 /**
6299 * cik_rlc_start - start the RLC ME
6300 *
6301 * @rdev: radeon_device pointer
6302 *
6303 * Unhalt the RLC ME (MicroEngine) (CIK).
6304 */
cik_rlc_start(struct radeon_device * rdev)6305 static void cik_rlc_start(struct radeon_device *rdev)
6306 {
6307 WREG32(RLC_CNTL, RLC_ENABLE);
6308
6309 cik_enable_gui_idle_interrupt(rdev, true);
6310
6311 udelay(50);
6312 }
6313
6314 /**
6315 * cik_rlc_resume - setup the RLC hw
6316 *
6317 * @rdev: radeon_device pointer
6318 *
6319 * Initialize the RLC registers, load the ucode,
6320 * and start the RLC (CIK).
6321 * Returns 0 for success, -EINVAL if the ucode is not available.
6322 */
cik_rlc_resume(struct radeon_device * rdev)6323 static int cik_rlc_resume(struct radeon_device *rdev)
6324 {
6325 u32 i, size, tmp;
6326
6327 if (!rdev->rlc_fw)
6328 return -EINVAL;
6329
6330 cik_rlc_stop(rdev);
6331
6332 /* disable CG */
6333 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6334 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6335
6336 si_rlc_reset(rdev);
6337
6338 cik_init_pg(rdev);
6339
6340 cik_init_cg(rdev);
6341
6342 WREG32(RLC_LB_CNTR_INIT, 0);
6343 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6344
6345 mutex_lock(&rdev->grbm_idx_mutex);
6346 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6347 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6348 WREG32(RLC_LB_PARAMS, 0x00600408);
6349 WREG32(RLC_LB_CNTL, 0x80000004);
6350 mutex_unlock(&rdev->grbm_idx_mutex);
6351
6352 WREG32(RLC_MC_CNTL, 0);
6353 WREG32(RLC_UCODE_CNTL, 0);
6354
6355 if (rdev->new_fw) {
6356 const struct rlc_firmware_header_v1_0 *hdr =
6357 (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6358 const __le32 *fw_data = (const __le32 *)
6359 (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6360
6361 radeon_ucode_print_rlc_hdr(&hdr->header);
6362
6363 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6364 WREG32(RLC_GPM_UCODE_ADDR, 0);
6365 for (i = 0; i < size; i++)
6366 WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6367 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6368 } else {
6369 const __be32 *fw_data;
6370
6371 switch (rdev->family) {
6372 case CHIP_BONAIRE:
6373 case CHIP_HAWAII:
6374 default:
6375 size = BONAIRE_RLC_UCODE_SIZE;
6376 break;
6377 case CHIP_KAVERI:
6378 size = KV_RLC_UCODE_SIZE;
6379 break;
6380 case CHIP_KABINI:
6381 size = KB_RLC_UCODE_SIZE;
6382 break;
6383 case CHIP_MULLINS:
6384 size = ML_RLC_UCODE_SIZE;
6385 break;
6386 }
6387
6388 fw_data = (const __be32 *)rdev->rlc_fw->data;
6389 WREG32(RLC_GPM_UCODE_ADDR, 0);
6390 for (i = 0; i < size; i++)
6391 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6392 WREG32(RLC_GPM_UCODE_ADDR, 0);
6393 }
6394
6395 /* XXX - find out what chips support lbpw */
6396 cik_enable_lbpw(rdev, false);
6397
6398 if (rdev->family == CHIP_BONAIRE)
6399 WREG32(RLC_DRIVER_DMA_STATUS, 0);
6400
6401 cik_rlc_start(rdev);
6402
6403 return 0;
6404 }
6405
cik_enable_cgcg(struct radeon_device * rdev,bool enable)6406 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6407 {
6408 u32 data, orig, tmp, tmp2;
6409
6410 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6411
6412 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6413 cik_enable_gui_idle_interrupt(rdev, true);
6414
6415 tmp = cik_halt_rlc(rdev);
6416
6417 mutex_lock(&rdev->grbm_idx_mutex);
6418 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6419 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6420 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6421 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6422 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6423 mutex_unlock(&rdev->grbm_idx_mutex);
6424
6425 cik_update_rlc(rdev, tmp);
6426
6427 data |= CGCG_EN | CGLS_EN;
6428 } else {
6429 cik_enable_gui_idle_interrupt(rdev, false);
6430
6431 RREG32(CB_CGTT_SCLK_CTRL);
6432 RREG32(CB_CGTT_SCLK_CTRL);
6433 RREG32(CB_CGTT_SCLK_CTRL);
6434 RREG32(CB_CGTT_SCLK_CTRL);
6435
6436 data &= ~(CGCG_EN | CGLS_EN);
6437 }
6438
6439 if (orig != data)
6440 WREG32(RLC_CGCG_CGLS_CTRL, data);
6441
6442 }
6443
cik_enable_mgcg(struct radeon_device * rdev,bool enable)6444 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6445 {
6446 u32 data, orig, tmp = 0;
6447
6448 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6449 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6450 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6451 orig = data = RREG32(CP_MEM_SLP_CNTL);
6452 data |= CP_MEM_LS_EN;
6453 if (orig != data)
6454 WREG32(CP_MEM_SLP_CNTL, data);
6455 }
6456 }
6457
6458 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6459 data |= 0x00000001;
6460 data &= 0xfffffffd;
6461 if (orig != data)
6462 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6463
6464 tmp = cik_halt_rlc(rdev);
6465
6466 mutex_lock(&rdev->grbm_idx_mutex);
6467 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6468 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6469 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6470 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6471 WREG32(RLC_SERDES_WR_CTRL, data);
6472 mutex_unlock(&rdev->grbm_idx_mutex);
6473
6474 cik_update_rlc(rdev, tmp);
6475
6476 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6477 orig = data = RREG32(CGTS_SM_CTRL_REG);
6478 data &= ~SM_MODE_MASK;
6479 data |= SM_MODE(0x2);
6480 data |= SM_MODE_ENABLE;
6481 data &= ~CGTS_OVERRIDE;
6482 if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6483 (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6484 data &= ~CGTS_LS_OVERRIDE;
6485 data &= ~ON_MONITOR_ADD_MASK;
6486 data |= ON_MONITOR_ADD_EN;
6487 data |= ON_MONITOR_ADD(0x96);
6488 if (orig != data)
6489 WREG32(CGTS_SM_CTRL_REG, data);
6490 }
6491 } else {
6492 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6493 data |= 0x00000003;
6494 if (orig != data)
6495 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6496
6497 data = RREG32(RLC_MEM_SLP_CNTL);
6498 if (data & RLC_MEM_LS_EN) {
6499 data &= ~RLC_MEM_LS_EN;
6500 WREG32(RLC_MEM_SLP_CNTL, data);
6501 }
6502
6503 data = RREG32(CP_MEM_SLP_CNTL);
6504 if (data & CP_MEM_LS_EN) {
6505 data &= ~CP_MEM_LS_EN;
6506 WREG32(CP_MEM_SLP_CNTL, data);
6507 }
6508
6509 orig = data = RREG32(CGTS_SM_CTRL_REG);
6510 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6511 if (orig != data)
6512 WREG32(CGTS_SM_CTRL_REG, data);
6513
6514 tmp = cik_halt_rlc(rdev);
6515
6516 mutex_lock(&rdev->grbm_idx_mutex);
6517 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6518 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6519 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6520 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6521 WREG32(RLC_SERDES_WR_CTRL, data);
6522 mutex_unlock(&rdev->grbm_idx_mutex);
6523
6524 cik_update_rlc(rdev, tmp);
6525 }
6526 }
6527
6528 static const u32 mc_cg_registers[] =
6529 {
6530 MC_HUB_MISC_HUB_CG,
6531 MC_HUB_MISC_SIP_CG,
6532 MC_HUB_MISC_VM_CG,
6533 MC_XPB_CLK_GAT,
6534 ATC_MISC_CG,
6535 MC_CITF_MISC_WR_CG,
6536 MC_CITF_MISC_RD_CG,
6537 MC_CITF_MISC_VM_CG,
6538 VM_L2_CG,
6539 };
6540
cik_enable_mc_ls(struct radeon_device * rdev,bool enable)6541 static void cik_enable_mc_ls(struct radeon_device *rdev,
6542 bool enable)
6543 {
6544 int i;
6545 u32 orig, data;
6546
6547 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6548 orig = data = RREG32(mc_cg_registers[i]);
6549 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6550 data |= MC_LS_ENABLE;
6551 else
6552 data &= ~MC_LS_ENABLE;
6553 if (data != orig)
6554 WREG32(mc_cg_registers[i], data);
6555 }
6556 }
6557
cik_enable_mc_mgcg(struct radeon_device * rdev,bool enable)6558 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6559 bool enable)
6560 {
6561 int i;
6562 u32 orig, data;
6563
6564 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6565 orig = data = RREG32(mc_cg_registers[i]);
6566 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6567 data |= MC_CG_ENABLE;
6568 else
6569 data &= ~MC_CG_ENABLE;
6570 if (data != orig)
6571 WREG32(mc_cg_registers[i], data);
6572 }
6573 }
6574
cik_enable_sdma_mgcg(struct radeon_device * rdev,bool enable)6575 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6576 bool enable)
6577 {
6578 u32 orig, data;
6579
6580 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6581 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6582 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6583 } else {
6584 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6585 data |= 0xff000000;
6586 if (data != orig)
6587 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6588
6589 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6590 data |= 0xff000000;
6591 if (data != orig)
6592 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6593 }
6594 }
6595
cik_enable_sdma_mgls(struct radeon_device * rdev,bool enable)6596 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6597 bool enable)
6598 {
6599 u32 orig, data;
6600
6601 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6602 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6603 data |= 0x100;
6604 if (orig != data)
6605 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6606
6607 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6608 data |= 0x100;
6609 if (orig != data)
6610 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6611 } else {
6612 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6613 data &= ~0x100;
6614 if (orig != data)
6615 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6616
6617 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6618 data &= ~0x100;
6619 if (orig != data)
6620 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6621 }
6622 }
6623
cik_enable_uvd_mgcg(struct radeon_device * rdev,bool enable)6624 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6625 bool enable)
6626 {
6627 u32 orig, data;
6628
6629 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6630 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6631 data = 0xfff;
6632 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6633
6634 orig = data = RREG32(UVD_CGC_CTRL);
6635 data |= DCM;
6636 if (orig != data)
6637 WREG32(UVD_CGC_CTRL, data);
6638 } else {
6639 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6640 data &= ~0xfff;
6641 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6642
6643 orig = data = RREG32(UVD_CGC_CTRL);
6644 data &= ~DCM;
6645 if (orig != data)
6646 WREG32(UVD_CGC_CTRL, data);
6647 }
6648 }
6649
cik_enable_bif_mgls(struct radeon_device * rdev,bool enable)6650 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6651 bool enable)
6652 {
6653 u32 orig, data;
6654
6655 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6656
6657 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6658 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6659 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6660 else
6661 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6662 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6663
6664 if (orig != data)
6665 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6666 }
6667
cik_enable_hdp_mgcg(struct radeon_device * rdev,bool enable)6668 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6669 bool enable)
6670 {
6671 u32 orig, data;
6672
6673 orig = data = RREG32(HDP_HOST_PATH_CNTL);
6674
6675 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6676 data &= ~CLOCK_GATING_DIS;
6677 else
6678 data |= CLOCK_GATING_DIS;
6679
6680 if (orig != data)
6681 WREG32(HDP_HOST_PATH_CNTL, data);
6682 }
6683
cik_enable_hdp_ls(struct radeon_device * rdev,bool enable)6684 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6685 bool enable)
6686 {
6687 u32 orig, data;
6688
6689 orig = data = RREG32(HDP_MEM_POWER_LS);
6690
6691 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6692 data |= HDP_LS_ENABLE;
6693 else
6694 data &= ~HDP_LS_ENABLE;
6695
6696 if (orig != data)
6697 WREG32(HDP_MEM_POWER_LS, data);
6698 }
6699
cik_update_cg(struct radeon_device * rdev,u32 block,bool enable)6700 void cik_update_cg(struct radeon_device *rdev,
6701 u32 block, bool enable)
6702 {
6703
6704 if (block & RADEON_CG_BLOCK_GFX) {
6705 cik_enable_gui_idle_interrupt(rdev, false);
6706 /* order matters! */
6707 if (enable) {
6708 cik_enable_mgcg(rdev, true);
6709 cik_enable_cgcg(rdev, true);
6710 } else {
6711 cik_enable_cgcg(rdev, false);
6712 cik_enable_mgcg(rdev, false);
6713 }
6714 cik_enable_gui_idle_interrupt(rdev, true);
6715 }
6716
6717 if (block & RADEON_CG_BLOCK_MC) {
6718 if (!(rdev->flags & RADEON_IS_IGP)) {
6719 cik_enable_mc_mgcg(rdev, enable);
6720 cik_enable_mc_ls(rdev, enable);
6721 }
6722 }
6723
6724 if (block & RADEON_CG_BLOCK_SDMA) {
6725 cik_enable_sdma_mgcg(rdev, enable);
6726 cik_enable_sdma_mgls(rdev, enable);
6727 }
6728
6729 if (block & RADEON_CG_BLOCK_BIF) {
6730 cik_enable_bif_mgls(rdev, enable);
6731 }
6732
6733 if (block & RADEON_CG_BLOCK_UVD) {
6734 if (rdev->has_uvd)
6735 cik_enable_uvd_mgcg(rdev, enable);
6736 }
6737
6738 if (block & RADEON_CG_BLOCK_HDP) {
6739 cik_enable_hdp_mgcg(rdev, enable);
6740 cik_enable_hdp_ls(rdev, enable);
6741 }
6742
6743 if (block & RADEON_CG_BLOCK_VCE) {
6744 vce_v2_0_enable_mgcg(rdev, enable);
6745 }
6746 }
6747
cik_init_cg(struct radeon_device * rdev)6748 static void cik_init_cg(struct radeon_device *rdev)
6749 {
6750
6751 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6752
6753 if (rdev->has_uvd)
6754 si_init_uvd_internal_cg(rdev);
6755
6756 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6757 RADEON_CG_BLOCK_SDMA |
6758 RADEON_CG_BLOCK_BIF |
6759 RADEON_CG_BLOCK_UVD |
6760 RADEON_CG_BLOCK_HDP), true);
6761 }
6762
cik_fini_cg(struct radeon_device * rdev)6763 static void cik_fini_cg(struct radeon_device *rdev)
6764 {
6765 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6766 RADEON_CG_BLOCK_SDMA |
6767 RADEON_CG_BLOCK_BIF |
6768 RADEON_CG_BLOCK_UVD |
6769 RADEON_CG_BLOCK_HDP), false);
6770
6771 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6772 }
6773
cik_enable_sck_slowdown_on_pu(struct radeon_device * rdev,bool enable)6774 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6775 bool enable)
6776 {
6777 u32 data, orig;
6778
6779 orig = data = RREG32(RLC_PG_CNTL);
6780 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6781 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6782 else
6783 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6784 if (orig != data)
6785 WREG32(RLC_PG_CNTL, data);
6786 }
6787
cik_enable_sck_slowdown_on_pd(struct radeon_device * rdev,bool enable)6788 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6789 bool enable)
6790 {
6791 u32 data, orig;
6792
6793 orig = data = RREG32(RLC_PG_CNTL);
6794 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6795 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6796 else
6797 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6798 if (orig != data)
6799 WREG32(RLC_PG_CNTL, data);
6800 }
6801
cik_enable_cp_pg(struct radeon_device * rdev,bool enable)6802 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6803 {
6804 u32 data, orig;
6805
6806 orig = data = RREG32(RLC_PG_CNTL);
6807 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6808 data &= ~DISABLE_CP_PG;
6809 else
6810 data |= DISABLE_CP_PG;
6811 if (orig != data)
6812 WREG32(RLC_PG_CNTL, data);
6813 }
6814
cik_enable_gds_pg(struct radeon_device * rdev,bool enable)6815 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6816 {
6817 u32 data, orig;
6818
6819 orig = data = RREG32(RLC_PG_CNTL);
6820 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6821 data &= ~DISABLE_GDS_PG;
6822 else
6823 data |= DISABLE_GDS_PG;
6824 if (orig != data)
6825 WREG32(RLC_PG_CNTL, data);
6826 }
6827
6828 #define CP_ME_TABLE_SIZE 96
6829 #define CP_ME_TABLE_OFFSET 2048
6830 #define CP_MEC_TABLE_OFFSET 4096
6831
cik_init_cp_pg_table(struct radeon_device * rdev)6832 void cik_init_cp_pg_table(struct radeon_device *rdev)
6833 {
6834 volatile u32 *dst_ptr;
6835 int me, i, max_me = 4;
6836 u32 bo_offset = 0;
6837 u32 table_offset, table_size;
6838
6839 if (rdev->family == CHIP_KAVERI)
6840 max_me = 5;
6841
6842 if (rdev->rlc.cp_table_ptr == NULL)
6843 return;
6844
6845 /* write the cp table buffer */
6846 dst_ptr = rdev->rlc.cp_table_ptr;
6847 for (me = 0; me < max_me; me++) {
6848 if (rdev->new_fw) {
6849 const __le32 *fw_data;
6850 const struct gfx_firmware_header_v1_0 *hdr;
6851
6852 if (me == 0) {
6853 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6854 fw_data = (const __le32 *)
6855 (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6856 table_offset = le32_to_cpu(hdr->jt_offset);
6857 table_size = le32_to_cpu(hdr->jt_size);
6858 } else if (me == 1) {
6859 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6860 fw_data = (const __le32 *)
6861 (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6862 table_offset = le32_to_cpu(hdr->jt_offset);
6863 table_size = le32_to_cpu(hdr->jt_size);
6864 } else if (me == 2) {
6865 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6866 fw_data = (const __le32 *)
6867 (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6868 table_offset = le32_to_cpu(hdr->jt_offset);
6869 table_size = le32_to_cpu(hdr->jt_size);
6870 } else if (me == 3) {
6871 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6872 fw_data = (const __le32 *)
6873 (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6874 table_offset = le32_to_cpu(hdr->jt_offset);
6875 table_size = le32_to_cpu(hdr->jt_size);
6876 } else {
6877 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6878 fw_data = (const __le32 *)
6879 (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6880 table_offset = le32_to_cpu(hdr->jt_offset);
6881 table_size = le32_to_cpu(hdr->jt_size);
6882 }
6883
6884 for (i = 0; i < table_size; i ++) {
6885 dst_ptr[bo_offset + i] =
6886 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6887 }
6888 bo_offset += table_size;
6889 } else {
6890 const __be32 *fw_data;
6891 table_size = CP_ME_TABLE_SIZE;
6892
6893 if (me == 0) {
6894 fw_data = (const __be32 *)rdev->ce_fw->data;
6895 table_offset = CP_ME_TABLE_OFFSET;
6896 } else if (me == 1) {
6897 fw_data = (const __be32 *)rdev->pfp_fw->data;
6898 table_offset = CP_ME_TABLE_OFFSET;
6899 } else if (me == 2) {
6900 fw_data = (const __be32 *)rdev->me_fw->data;
6901 table_offset = CP_ME_TABLE_OFFSET;
6902 } else {
6903 fw_data = (const __be32 *)rdev->mec_fw->data;
6904 table_offset = CP_MEC_TABLE_OFFSET;
6905 }
6906
6907 for (i = 0; i < table_size; i ++) {
6908 dst_ptr[bo_offset + i] =
6909 cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6910 }
6911 bo_offset += table_size;
6912 }
6913 }
6914 }
6915
cik_enable_gfx_cgpg(struct radeon_device * rdev,bool enable)6916 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6917 bool enable)
6918 {
6919 u32 data, orig;
6920
6921 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6922 orig = data = RREG32(RLC_PG_CNTL);
6923 data |= GFX_PG_ENABLE;
6924 if (orig != data)
6925 WREG32(RLC_PG_CNTL, data);
6926
6927 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6928 data |= AUTO_PG_EN;
6929 if (orig != data)
6930 WREG32(RLC_AUTO_PG_CTRL, data);
6931 } else {
6932 orig = data = RREG32(RLC_PG_CNTL);
6933 data &= ~GFX_PG_ENABLE;
6934 if (orig != data)
6935 WREG32(RLC_PG_CNTL, data);
6936
6937 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6938 data &= ~AUTO_PG_EN;
6939 if (orig != data)
6940 WREG32(RLC_AUTO_PG_CTRL, data);
6941
6942 data = RREG32(DB_RENDER_CONTROL);
6943 }
6944 }
6945
cik_get_cu_active_bitmap(struct radeon_device * rdev,u32 se,u32 sh)6946 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6947 {
6948 u32 mask = 0, tmp, tmp1;
6949 int i;
6950
6951 mutex_lock(&rdev->grbm_idx_mutex);
6952 cik_select_se_sh(rdev, se, sh);
6953 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6954 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6955 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6956 mutex_unlock(&rdev->grbm_idx_mutex);
6957
6958 tmp &= 0xffff0000;
6959
6960 tmp |= tmp1;
6961 tmp >>= 16;
6962
6963 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6964 mask <<= 1;
6965 mask |= 1;
6966 }
6967
6968 return (~tmp) & mask;
6969 }
6970
cik_init_ao_cu_mask(struct radeon_device * rdev)6971 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6972 {
6973 u32 i, j, k, active_cu_number = 0;
6974 u32 mask, counter, cu_bitmap;
6975 u32 tmp = 0;
6976
6977 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6978 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6979 mask = 1;
6980 cu_bitmap = 0;
6981 counter = 0;
6982 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6983 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6984 if (counter < 2)
6985 cu_bitmap |= mask;
6986 counter ++;
6987 }
6988 mask <<= 1;
6989 }
6990
6991 active_cu_number += counter;
6992 tmp |= (cu_bitmap << (i * 16 + j * 8));
6993 }
6994 }
6995
6996 WREG32(RLC_PG_AO_CU_MASK, tmp);
6997
6998 tmp = RREG32(RLC_MAX_PG_CU);
6999 tmp &= ~MAX_PU_CU_MASK;
7000 tmp |= MAX_PU_CU(active_cu_number);
7001 WREG32(RLC_MAX_PG_CU, tmp);
7002 }
7003
cik_enable_gfx_static_mgpg(struct radeon_device * rdev,bool enable)7004 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
7005 bool enable)
7006 {
7007 u32 data, orig;
7008
7009 orig = data = RREG32(RLC_PG_CNTL);
7010 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
7011 data |= STATIC_PER_CU_PG_ENABLE;
7012 else
7013 data &= ~STATIC_PER_CU_PG_ENABLE;
7014 if (orig != data)
7015 WREG32(RLC_PG_CNTL, data);
7016 }
7017
cik_enable_gfx_dynamic_mgpg(struct radeon_device * rdev,bool enable)7018 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
7019 bool enable)
7020 {
7021 u32 data, orig;
7022
7023 orig = data = RREG32(RLC_PG_CNTL);
7024 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
7025 data |= DYN_PER_CU_PG_ENABLE;
7026 else
7027 data &= ~DYN_PER_CU_PG_ENABLE;
7028 if (orig != data)
7029 WREG32(RLC_PG_CNTL, data);
7030 }
7031
7032 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
7033 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
7034
cik_init_gfx_cgpg(struct radeon_device * rdev)7035 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
7036 {
7037 u32 data, orig;
7038 u32 i;
7039
7040 if (rdev->rlc.cs_data) {
7041 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7042 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
7043 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
7044 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
7045 } else {
7046 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7047 for (i = 0; i < 3; i++)
7048 WREG32(RLC_GPM_SCRATCH_DATA, 0);
7049 }
7050 if (rdev->rlc.reg_list) {
7051 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
7052 for (i = 0; i < rdev->rlc.reg_list_size; i++)
7053 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
7054 }
7055
7056 orig = data = RREG32(RLC_PG_CNTL);
7057 data |= GFX_PG_SRC;
7058 if (orig != data)
7059 WREG32(RLC_PG_CNTL, data);
7060
7061 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
7062 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
7063
7064 data = RREG32(CP_RB_WPTR_POLL_CNTL);
7065 data &= ~IDLE_POLL_COUNT_MASK;
7066 data |= IDLE_POLL_COUNT(0x60);
7067 WREG32(CP_RB_WPTR_POLL_CNTL, data);
7068
7069 data = 0x10101010;
7070 WREG32(RLC_PG_DELAY, data);
7071
7072 data = RREG32(RLC_PG_DELAY_2);
7073 data &= ~0xff;
7074 data |= 0x3;
7075 WREG32(RLC_PG_DELAY_2, data);
7076
7077 data = RREG32(RLC_AUTO_PG_CTRL);
7078 data &= ~GRBM_REG_SGIT_MASK;
7079 data |= GRBM_REG_SGIT(0x700);
7080 WREG32(RLC_AUTO_PG_CTRL, data);
7081
7082 }
7083
cik_update_gfx_pg(struct radeon_device * rdev,bool enable)7084 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
7085 {
7086 cik_enable_gfx_cgpg(rdev, enable);
7087 cik_enable_gfx_static_mgpg(rdev, enable);
7088 cik_enable_gfx_dynamic_mgpg(rdev, enable);
7089 }
7090
cik_get_csb_size(struct radeon_device * rdev)7091 u32 cik_get_csb_size(struct radeon_device *rdev)
7092 {
7093 u32 count = 0;
7094 const struct cs_section_def *sect = NULL;
7095 const struct cs_extent_def *ext = NULL;
7096
7097 if (rdev->rlc.cs_data == NULL)
7098 return 0;
7099
7100 /* begin clear state */
7101 count += 2;
7102 /* context control state */
7103 count += 3;
7104
7105 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7106 for (ext = sect->section; ext->extent != NULL; ++ext) {
7107 if (sect->id == SECT_CONTEXT)
7108 count += 2 + ext->reg_count;
7109 else
7110 return 0;
7111 }
7112 }
7113 /* pa_sc_raster_config/pa_sc_raster_config1 */
7114 count += 4;
7115 /* end clear state */
7116 count += 2;
7117 /* clear state */
7118 count += 2;
7119
7120 return count;
7121 }
7122
cik_get_csb_buffer(struct radeon_device * rdev,volatile u32 * buffer)7123 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7124 {
7125 u32 count = 0, i;
7126 const struct cs_section_def *sect = NULL;
7127 const struct cs_extent_def *ext = NULL;
7128
7129 if (rdev->rlc.cs_data == NULL)
7130 return;
7131 if (buffer == NULL)
7132 return;
7133
7134 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7135 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7136
7137 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7138 buffer[count++] = cpu_to_le32(0x80000000);
7139 buffer[count++] = cpu_to_le32(0x80000000);
7140
7141 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7142 for (ext = sect->section; ext->extent != NULL; ++ext) {
7143 if (sect->id == SECT_CONTEXT) {
7144 buffer[count++] =
7145 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7146 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7147 for (i = 0; i < ext->reg_count; i++)
7148 buffer[count++] = cpu_to_le32(ext->extent[i]);
7149 } else {
7150 return;
7151 }
7152 }
7153 }
7154
7155 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7156 buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7157 switch (rdev->family) {
7158 case CHIP_BONAIRE:
7159 buffer[count++] = cpu_to_le32(0x16000012);
7160 buffer[count++] = cpu_to_le32(0x00000000);
7161 break;
7162 case CHIP_KAVERI:
7163 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7164 buffer[count++] = cpu_to_le32(0x00000000);
7165 break;
7166 case CHIP_KABINI:
7167 case CHIP_MULLINS:
7168 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7169 buffer[count++] = cpu_to_le32(0x00000000);
7170 break;
7171 case CHIP_HAWAII:
7172 buffer[count++] = cpu_to_le32(0x3a00161a);
7173 buffer[count++] = cpu_to_le32(0x0000002e);
7174 break;
7175 default:
7176 buffer[count++] = cpu_to_le32(0x00000000);
7177 buffer[count++] = cpu_to_le32(0x00000000);
7178 break;
7179 }
7180
7181 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7182 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7183
7184 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7185 buffer[count++] = cpu_to_le32(0);
7186 }
7187
cik_init_pg(struct radeon_device * rdev)7188 static void cik_init_pg(struct radeon_device *rdev)
7189 {
7190 if (rdev->pg_flags) {
7191 cik_enable_sck_slowdown_on_pu(rdev, true);
7192 cik_enable_sck_slowdown_on_pd(rdev, true);
7193 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7194 cik_init_gfx_cgpg(rdev);
7195 cik_enable_cp_pg(rdev, true);
7196 cik_enable_gds_pg(rdev, true);
7197 }
7198 cik_init_ao_cu_mask(rdev);
7199 cik_update_gfx_pg(rdev, true);
7200 }
7201 }
7202
cik_fini_pg(struct radeon_device * rdev)7203 static void cik_fini_pg(struct radeon_device *rdev)
7204 {
7205 if (rdev->pg_flags) {
7206 cik_update_gfx_pg(rdev, false);
7207 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7208 cik_enable_cp_pg(rdev, false);
7209 cik_enable_gds_pg(rdev, false);
7210 }
7211 }
7212 }
7213
7214 /*
7215 * Interrupts
7216 * Starting with r6xx, interrupts are handled via a ring buffer.
7217 * Ring buffers are areas of GPU accessible memory that the GPU
7218 * writes interrupt vectors into and the host reads vectors out of.
7219 * There is a rptr (read pointer) that determines where the
7220 * host is currently reading, and a wptr (write pointer)
7221 * which determines where the GPU has written. When the
7222 * pointers are equal, the ring is idle. When the GPU
7223 * writes vectors to the ring buffer, it increments the
7224 * wptr. When there is an interrupt, the host then starts
7225 * fetching commands and processing them until the pointers are
7226 * equal again at which point it updates the rptr.
7227 */
7228
7229 /**
7230 * cik_enable_interrupts - Enable the interrupt ring buffer
7231 *
7232 * @rdev: radeon_device pointer
7233 *
7234 * Enable the interrupt ring buffer (CIK).
7235 */
cik_enable_interrupts(struct radeon_device * rdev)7236 static void cik_enable_interrupts(struct radeon_device *rdev)
7237 {
7238 u32 ih_cntl = RREG32(IH_CNTL);
7239 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7240
7241 ih_cntl |= ENABLE_INTR;
7242 ih_rb_cntl |= IH_RB_ENABLE;
7243 WREG32(IH_CNTL, ih_cntl);
7244 WREG32(IH_RB_CNTL, ih_rb_cntl);
7245 rdev->ih.enabled = true;
7246 }
7247
7248 /**
7249 * cik_disable_interrupts - Disable the interrupt ring buffer
7250 *
7251 * @rdev: radeon_device pointer
7252 *
7253 * Disable the interrupt ring buffer (CIK).
7254 */
cik_disable_interrupts(struct radeon_device * rdev)7255 static void cik_disable_interrupts(struct radeon_device *rdev)
7256 {
7257 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7258 u32 ih_cntl = RREG32(IH_CNTL);
7259
7260 ih_rb_cntl &= ~IH_RB_ENABLE;
7261 ih_cntl &= ~ENABLE_INTR;
7262 WREG32(IH_RB_CNTL, ih_rb_cntl);
7263 WREG32(IH_CNTL, ih_cntl);
7264 /* set rptr, wptr to 0 */
7265 WREG32(IH_RB_RPTR, 0);
7266 WREG32(IH_RB_WPTR, 0);
7267 rdev->ih.enabled = false;
7268 rdev->ih.rptr = 0;
7269 }
7270
7271 /**
7272 * cik_disable_interrupt_state - Disable all interrupt sources
7273 *
7274 * @rdev: radeon_device pointer
7275 *
7276 * Clear all interrupt enable bits used by the driver (CIK).
7277 */
cik_disable_interrupt_state(struct radeon_device * rdev)7278 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7279 {
7280 u32 tmp;
7281
7282 /* gfx ring */
7283 tmp = RREG32(CP_INT_CNTL_RING0) &
7284 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7285 WREG32(CP_INT_CNTL_RING0, tmp);
7286 /* sdma */
7287 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7288 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7289 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7290 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7291 /* compute queues */
7292 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7293 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7294 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7295 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7296 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7297 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7298 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7299 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7300 /* grbm */
7301 WREG32(GRBM_INT_CNTL, 0);
7302 /* SRBM */
7303 WREG32(SRBM_INT_CNTL, 0);
7304 /* vline/vblank, etc. */
7305 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7306 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7307 if (rdev->num_crtc >= 4) {
7308 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7309 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7310 }
7311 if (rdev->num_crtc >= 6) {
7312 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7313 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7314 }
7315 /* pflip */
7316 if (rdev->num_crtc >= 2) {
7317 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7318 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7319 }
7320 if (rdev->num_crtc >= 4) {
7321 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7322 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7323 }
7324 if (rdev->num_crtc >= 6) {
7325 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7326 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7327 }
7328
7329 /* dac hotplug */
7330 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7331
7332 /* digital hotplug */
7333 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7334 WREG32(DC_HPD1_INT_CONTROL, tmp);
7335 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7336 WREG32(DC_HPD2_INT_CONTROL, tmp);
7337 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7338 WREG32(DC_HPD3_INT_CONTROL, tmp);
7339 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7340 WREG32(DC_HPD4_INT_CONTROL, tmp);
7341 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7342 WREG32(DC_HPD5_INT_CONTROL, tmp);
7343 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7344 WREG32(DC_HPD6_INT_CONTROL, tmp);
7345
7346 }
7347
7348 /**
7349 * cik_irq_init - init and enable the interrupt ring
7350 *
7351 * @rdev: radeon_device pointer
7352 *
7353 * Allocate a ring buffer for the interrupt controller,
7354 * enable the RLC, disable interrupts, enable the IH
7355 * ring buffer and enable it (CIK).
7356 * Called at device load and reume.
7357 * Returns 0 for success, errors for failure.
7358 */
cik_irq_init(struct radeon_device * rdev)7359 static int cik_irq_init(struct radeon_device *rdev)
7360 {
7361 int ret = 0;
7362 int rb_bufsz;
7363 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7364
7365 /* allocate ring */
7366 ret = r600_ih_ring_alloc(rdev);
7367 if (ret)
7368 return ret;
7369
7370 /* disable irqs */
7371 cik_disable_interrupts(rdev);
7372
7373 /* init rlc */
7374 ret = cik_rlc_resume(rdev);
7375 if (ret) {
7376 r600_ih_ring_fini(rdev);
7377 return ret;
7378 }
7379
7380 /* setup interrupt control */
7381 /* XXX this should actually be a bus address, not an MC address. same on older asics */
7382 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7383 interrupt_cntl = RREG32(INTERRUPT_CNTL);
7384 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7385 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7386 */
7387 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7388 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7389 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7390 WREG32(INTERRUPT_CNTL, interrupt_cntl);
7391
7392 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7393 rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7394
7395 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7396 IH_WPTR_OVERFLOW_CLEAR |
7397 (rb_bufsz << 1));
7398
7399 if (rdev->wb.enabled)
7400 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7401
7402 /* set the writeback address whether it's enabled or not */
7403 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7404 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7405
7406 WREG32(IH_RB_CNTL, ih_rb_cntl);
7407
7408 /* set rptr, wptr to 0 */
7409 WREG32(IH_RB_RPTR, 0);
7410 WREG32(IH_RB_WPTR, 0);
7411
7412 /* Default settings for IH_CNTL (disabled at first) */
7413 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7414 /* RPTR_REARM only works if msi's are enabled */
7415 if (rdev->msi_enabled)
7416 ih_cntl |= RPTR_REARM;
7417 WREG32(IH_CNTL, ih_cntl);
7418
7419 /* force the active interrupt state to all disabled */
7420 cik_disable_interrupt_state(rdev);
7421
7422 pci_set_master(rdev->pdev);
7423
7424 /* enable irqs */
7425 cik_enable_interrupts(rdev);
7426
7427 return ret;
7428 }
7429
7430 /**
7431 * cik_irq_set - enable/disable interrupt sources
7432 *
7433 * @rdev: radeon_device pointer
7434 *
7435 * Enable interrupt sources on the GPU (vblanks, hpd,
7436 * etc.) (CIK).
7437 * Returns 0 for success, errors for failure.
7438 */
cik_irq_set(struct radeon_device * rdev)7439 int cik_irq_set(struct radeon_device *rdev)
7440 {
7441 u32 cp_int_cntl;
7442 u32 cp_m1p0;
7443 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7444 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7445 u32 grbm_int_cntl = 0;
7446 u32 dma_cntl, dma_cntl1;
7447
7448 if (!rdev->irq.installed) {
7449 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7450 return -EINVAL;
7451 }
7452 /* don't enable anything if the ih is disabled */
7453 if (!rdev->ih.enabled) {
7454 cik_disable_interrupts(rdev);
7455 /* force the active interrupt state to all disabled */
7456 cik_disable_interrupt_state(rdev);
7457 return 0;
7458 }
7459
7460 cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7461 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7462 cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7463
7464 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7465 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7466 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7467 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7468 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7469 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7470
7471 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7472 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7473
7474 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7475
7476 /* enable CP interrupts on all rings */
7477 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7478 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7479 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7480 }
7481 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7482 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7483 DRM_DEBUG("si_irq_set: sw int cp1\n");
7484 if (ring->me == 1) {
7485 switch (ring->pipe) {
7486 case 0:
7487 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7488 break;
7489 default:
7490 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7491 break;
7492 }
7493 } else {
7494 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7495 }
7496 }
7497 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7498 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7499 DRM_DEBUG("si_irq_set: sw int cp2\n");
7500 if (ring->me == 1) {
7501 switch (ring->pipe) {
7502 case 0:
7503 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7504 break;
7505 default:
7506 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7507 break;
7508 }
7509 } else {
7510 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7511 }
7512 }
7513
7514 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7515 DRM_DEBUG("cik_irq_set: sw int dma\n");
7516 dma_cntl |= TRAP_ENABLE;
7517 }
7518
7519 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7520 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7521 dma_cntl1 |= TRAP_ENABLE;
7522 }
7523
7524 if (rdev->irq.crtc_vblank_int[0] ||
7525 atomic_read(&rdev->irq.pflip[0])) {
7526 DRM_DEBUG("cik_irq_set: vblank 0\n");
7527 crtc1 |= VBLANK_INTERRUPT_MASK;
7528 }
7529 if (rdev->irq.crtc_vblank_int[1] ||
7530 atomic_read(&rdev->irq.pflip[1])) {
7531 DRM_DEBUG("cik_irq_set: vblank 1\n");
7532 crtc2 |= VBLANK_INTERRUPT_MASK;
7533 }
7534 if (rdev->irq.crtc_vblank_int[2] ||
7535 atomic_read(&rdev->irq.pflip[2])) {
7536 DRM_DEBUG("cik_irq_set: vblank 2\n");
7537 crtc3 |= VBLANK_INTERRUPT_MASK;
7538 }
7539 if (rdev->irq.crtc_vblank_int[3] ||
7540 atomic_read(&rdev->irq.pflip[3])) {
7541 DRM_DEBUG("cik_irq_set: vblank 3\n");
7542 crtc4 |= VBLANK_INTERRUPT_MASK;
7543 }
7544 if (rdev->irq.crtc_vblank_int[4] ||
7545 atomic_read(&rdev->irq.pflip[4])) {
7546 DRM_DEBUG("cik_irq_set: vblank 4\n");
7547 crtc5 |= VBLANK_INTERRUPT_MASK;
7548 }
7549 if (rdev->irq.crtc_vblank_int[5] ||
7550 atomic_read(&rdev->irq.pflip[5])) {
7551 DRM_DEBUG("cik_irq_set: vblank 5\n");
7552 crtc6 |= VBLANK_INTERRUPT_MASK;
7553 }
7554 if (rdev->irq.hpd[0]) {
7555 DRM_DEBUG("cik_irq_set: hpd 1\n");
7556 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7557 }
7558 if (rdev->irq.hpd[1]) {
7559 DRM_DEBUG("cik_irq_set: hpd 2\n");
7560 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7561 }
7562 if (rdev->irq.hpd[2]) {
7563 DRM_DEBUG("cik_irq_set: hpd 3\n");
7564 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7565 }
7566 if (rdev->irq.hpd[3]) {
7567 DRM_DEBUG("cik_irq_set: hpd 4\n");
7568 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7569 }
7570 if (rdev->irq.hpd[4]) {
7571 DRM_DEBUG("cik_irq_set: hpd 5\n");
7572 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7573 }
7574 if (rdev->irq.hpd[5]) {
7575 DRM_DEBUG("cik_irq_set: hpd 6\n");
7576 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7577 }
7578
7579 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7580
7581 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7582 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7583
7584 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7585
7586 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7587
7588 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7589 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7590 if (rdev->num_crtc >= 4) {
7591 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7592 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7593 }
7594 if (rdev->num_crtc >= 6) {
7595 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7596 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7597 }
7598
7599 if (rdev->num_crtc >= 2) {
7600 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7601 GRPH_PFLIP_INT_MASK);
7602 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7603 GRPH_PFLIP_INT_MASK);
7604 }
7605 if (rdev->num_crtc >= 4) {
7606 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7607 GRPH_PFLIP_INT_MASK);
7608 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7609 GRPH_PFLIP_INT_MASK);
7610 }
7611 if (rdev->num_crtc >= 6) {
7612 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7613 GRPH_PFLIP_INT_MASK);
7614 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7615 GRPH_PFLIP_INT_MASK);
7616 }
7617
7618 WREG32(DC_HPD1_INT_CONTROL, hpd1);
7619 WREG32(DC_HPD2_INT_CONTROL, hpd2);
7620 WREG32(DC_HPD3_INT_CONTROL, hpd3);
7621 WREG32(DC_HPD4_INT_CONTROL, hpd4);
7622 WREG32(DC_HPD5_INT_CONTROL, hpd5);
7623 WREG32(DC_HPD6_INT_CONTROL, hpd6);
7624
7625 /* posting read */
7626 RREG32(SRBM_STATUS);
7627
7628 return 0;
7629 }
7630
7631 /**
7632 * cik_irq_ack - ack interrupt sources
7633 *
7634 * @rdev: radeon_device pointer
7635 *
7636 * Ack interrupt sources on the GPU (vblanks, hpd,
7637 * etc.) (CIK). Certain interrupts sources are sw
7638 * generated and do not require an explicit ack.
7639 */
cik_irq_ack(struct radeon_device * rdev)7640 static inline void cik_irq_ack(struct radeon_device *rdev)
7641 {
7642 u32 tmp;
7643
7644 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7645 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7646 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7647 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7648 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7649 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7650 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7651
7652 rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7653 EVERGREEN_CRTC0_REGISTER_OFFSET);
7654 rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7655 EVERGREEN_CRTC1_REGISTER_OFFSET);
7656 if (rdev->num_crtc >= 4) {
7657 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7658 EVERGREEN_CRTC2_REGISTER_OFFSET);
7659 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7660 EVERGREEN_CRTC3_REGISTER_OFFSET);
7661 }
7662 if (rdev->num_crtc >= 6) {
7663 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7664 EVERGREEN_CRTC4_REGISTER_OFFSET);
7665 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7666 EVERGREEN_CRTC5_REGISTER_OFFSET);
7667 }
7668
7669 if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7670 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7671 GRPH_PFLIP_INT_CLEAR);
7672 if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7673 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7674 GRPH_PFLIP_INT_CLEAR);
7675 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7676 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7677 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7678 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7679 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7680 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7681 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7682 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7683
7684 if (rdev->num_crtc >= 4) {
7685 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7686 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7687 GRPH_PFLIP_INT_CLEAR);
7688 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7689 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7690 GRPH_PFLIP_INT_CLEAR);
7691 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7692 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7693 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7694 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7695 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7696 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7697 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7698 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7699 }
7700
7701 if (rdev->num_crtc >= 6) {
7702 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7703 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7704 GRPH_PFLIP_INT_CLEAR);
7705 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7706 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7707 GRPH_PFLIP_INT_CLEAR);
7708 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7709 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7710 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7711 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7712 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7713 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7714 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7715 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7716 }
7717
7718 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7719 tmp = RREG32(DC_HPD1_INT_CONTROL);
7720 tmp |= DC_HPDx_INT_ACK;
7721 WREG32(DC_HPD1_INT_CONTROL, tmp);
7722 }
7723 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7724 tmp = RREG32(DC_HPD2_INT_CONTROL);
7725 tmp |= DC_HPDx_INT_ACK;
7726 WREG32(DC_HPD2_INT_CONTROL, tmp);
7727 }
7728 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7729 tmp = RREG32(DC_HPD3_INT_CONTROL);
7730 tmp |= DC_HPDx_INT_ACK;
7731 WREG32(DC_HPD3_INT_CONTROL, tmp);
7732 }
7733 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7734 tmp = RREG32(DC_HPD4_INT_CONTROL);
7735 tmp |= DC_HPDx_INT_ACK;
7736 WREG32(DC_HPD4_INT_CONTROL, tmp);
7737 }
7738 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7739 tmp = RREG32(DC_HPD5_INT_CONTROL);
7740 tmp |= DC_HPDx_INT_ACK;
7741 WREG32(DC_HPD5_INT_CONTROL, tmp);
7742 }
7743 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7744 tmp = RREG32(DC_HPD5_INT_CONTROL);
7745 tmp |= DC_HPDx_INT_ACK;
7746 WREG32(DC_HPD6_INT_CONTROL, tmp);
7747 }
7748 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7749 tmp = RREG32(DC_HPD1_INT_CONTROL);
7750 tmp |= DC_HPDx_RX_INT_ACK;
7751 WREG32(DC_HPD1_INT_CONTROL, tmp);
7752 }
7753 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7754 tmp = RREG32(DC_HPD2_INT_CONTROL);
7755 tmp |= DC_HPDx_RX_INT_ACK;
7756 WREG32(DC_HPD2_INT_CONTROL, tmp);
7757 }
7758 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7759 tmp = RREG32(DC_HPD3_INT_CONTROL);
7760 tmp |= DC_HPDx_RX_INT_ACK;
7761 WREG32(DC_HPD3_INT_CONTROL, tmp);
7762 }
7763 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7764 tmp = RREG32(DC_HPD4_INT_CONTROL);
7765 tmp |= DC_HPDx_RX_INT_ACK;
7766 WREG32(DC_HPD4_INT_CONTROL, tmp);
7767 }
7768 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7769 tmp = RREG32(DC_HPD5_INT_CONTROL);
7770 tmp |= DC_HPDx_RX_INT_ACK;
7771 WREG32(DC_HPD5_INT_CONTROL, tmp);
7772 }
7773 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7774 tmp = RREG32(DC_HPD5_INT_CONTROL);
7775 tmp |= DC_HPDx_RX_INT_ACK;
7776 WREG32(DC_HPD6_INT_CONTROL, tmp);
7777 }
7778 }
7779
7780 /**
7781 * cik_irq_disable - disable interrupts
7782 *
7783 * @rdev: radeon_device pointer
7784 *
7785 * Disable interrupts on the hw (CIK).
7786 */
cik_irq_disable(struct radeon_device * rdev)7787 static void cik_irq_disable(struct radeon_device *rdev)
7788 {
7789 cik_disable_interrupts(rdev);
7790 /* Wait and acknowledge irq */
7791 mdelay(1);
7792 cik_irq_ack(rdev);
7793 cik_disable_interrupt_state(rdev);
7794 }
7795
7796 /**
7797 * cik_irq_disable - disable interrupts for suspend
7798 *
7799 * @rdev: radeon_device pointer
7800 *
7801 * Disable interrupts and stop the RLC (CIK).
7802 * Used for suspend.
7803 */
cik_irq_suspend(struct radeon_device * rdev)7804 static void cik_irq_suspend(struct radeon_device *rdev)
7805 {
7806 cik_irq_disable(rdev);
7807 cik_rlc_stop(rdev);
7808 }
7809
7810 /**
7811 * cik_irq_fini - tear down interrupt support
7812 *
7813 * @rdev: radeon_device pointer
7814 *
7815 * Disable interrupts on the hw and free the IH ring
7816 * buffer (CIK).
7817 * Used for driver unload.
7818 */
cik_irq_fini(struct radeon_device * rdev)7819 static void cik_irq_fini(struct radeon_device *rdev)
7820 {
7821 cik_irq_suspend(rdev);
7822 r600_ih_ring_fini(rdev);
7823 }
7824
7825 /**
7826 * cik_get_ih_wptr - get the IH ring buffer wptr
7827 *
7828 * @rdev: radeon_device pointer
7829 *
7830 * Get the IH ring buffer wptr from either the register
7831 * or the writeback memory buffer (CIK). Also check for
7832 * ring buffer overflow and deal with it.
7833 * Used by cik_irq_process().
7834 * Returns the value of the wptr.
7835 */
cik_get_ih_wptr(struct radeon_device * rdev)7836 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7837 {
7838 u32 wptr, tmp;
7839
7840 if (rdev->wb.enabled)
7841 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7842 else
7843 wptr = RREG32(IH_RB_WPTR);
7844
7845 if (wptr & RB_OVERFLOW) {
7846 wptr &= ~RB_OVERFLOW;
7847 /* When a ring buffer overflow happen start parsing interrupt
7848 * from the last not overwritten vector (wptr + 16). Hopefully
7849 * this should allow us to catchup.
7850 */
7851 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7852 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7853 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7854 tmp = RREG32(IH_RB_CNTL);
7855 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7856 WREG32(IH_RB_CNTL, tmp);
7857 }
7858 return (wptr & rdev->ih.ptr_mask);
7859 }
7860
7861 /* CIK IV Ring
7862 * Each IV ring entry is 128 bits:
7863 * [7:0] - interrupt source id
7864 * [31:8] - reserved
7865 * [59:32] - interrupt source data
7866 * [63:60] - reserved
7867 * [71:64] - RINGID
7868 * CP:
7869 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7870 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7871 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7872 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7873 * PIPE_ID - ME0 0=3D
7874 * - ME1&2 compute dispatcher (4 pipes each)
7875 * SDMA:
7876 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
7877 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
7878 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7879 * [79:72] - VMID
7880 * [95:80] - PASID
7881 * [127:96] - reserved
7882 */
7883 /**
7884 * cik_irq_process - interrupt handler
7885 *
7886 * @rdev: radeon_device pointer
7887 *
7888 * Interrupt hander (CIK). Walk the IH ring,
7889 * ack interrupts and schedule work to handle
7890 * interrupt events.
7891 * Returns irq process return code.
7892 */
cik_irq_process(struct radeon_device * rdev)7893 int cik_irq_process(struct radeon_device *rdev)
7894 {
7895 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7896 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7897 u32 wptr;
7898 u32 rptr;
7899 u32 src_id, src_data, ring_id;
7900 u8 me_id, pipe_id, queue_id;
7901 u32 ring_index;
7902 bool queue_hotplug = false;
7903 bool queue_dp = false;
7904 bool queue_reset = false;
7905 u32 addr, status, mc_client;
7906 bool queue_thermal = false;
7907
7908 if (!rdev->ih.enabled || rdev->shutdown)
7909 return IRQ_NONE;
7910
7911 wptr = cik_get_ih_wptr(rdev);
7912
7913 restart_ih:
7914 /* is somebody else already processing irqs? */
7915 if (atomic_xchg(&rdev->ih.lock, 1))
7916 return IRQ_NONE;
7917
7918 rptr = rdev->ih.rptr;
7919 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7920
7921 /* Order reading of wptr vs. reading of IH ring data */
7922 rmb();
7923
7924 /* display interrupts */
7925 cik_irq_ack(rdev);
7926
7927 while (rptr != wptr) {
7928 /* wptr/rptr are in bytes! */
7929 ring_index = rptr / 4;
7930
7931 radeon_kfd_interrupt(rdev,
7932 (const void *) &rdev->ih.ring[ring_index]);
7933
7934 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7935 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7936 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7937
7938 switch (src_id) {
7939 case 1: /* D1 vblank/vline */
7940 switch (src_data) {
7941 case 0: /* D1 vblank */
7942 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7943 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7944
7945 if (rdev->irq.crtc_vblank_int[0]) {
7946 drm_handle_vblank(rdev->ddev, 0);
7947 rdev->pm.vblank_sync = true;
7948 wake_up(&rdev->irq.vblank_queue);
7949 }
7950 if (atomic_read(&rdev->irq.pflip[0]))
7951 radeon_crtc_handle_vblank(rdev, 0);
7952 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7953 DRM_DEBUG("IH: D1 vblank\n");
7954
7955 break;
7956 case 1: /* D1 vline */
7957 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7958 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7959
7960 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7961 DRM_DEBUG("IH: D1 vline\n");
7962
7963 break;
7964 default:
7965 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7966 break;
7967 }
7968 break;
7969 case 2: /* D2 vblank/vline */
7970 switch (src_data) {
7971 case 0: /* D2 vblank */
7972 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7973 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7974
7975 if (rdev->irq.crtc_vblank_int[1]) {
7976 drm_handle_vblank(rdev->ddev, 1);
7977 rdev->pm.vblank_sync = true;
7978 wake_up(&rdev->irq.vblank_queue);
7979 }
7980 if (atomic_read(&rdev->irq.pflip[1]))
7981 radeon_crtc_handle_vblank(rdev, 1);
7982 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7983 DRM_DEBUG("IH: D2 vblank\n");
7984
7985 break;
7986 case 1: /* D2 vline */
7987 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7988 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7989
7990 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7991 DRM_DEBUG("IH: D2 vline\n");
7992
7993 break;
7994 default:
7995 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7996 break;
7997 }
7998 break;
7999 case 3: /* D3 vblank/vline */
8000 switch (src_data) {
8001 case 0: /* D3 vblank */
8002 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
8003 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8004
8005 if (rdev->irq.crtc_vblank_int[2]) {
8006 drm_handle_vblank(rdev->ddev, 2);
8007 rdev->pm.vblank_sync = true;
8008 wake_up(&rdev->irq.vblank_queue);
8009 }
8010 if (atomic_read(&rdev->irq.pflip[2]))
8011 radeon_crtc_handle_vblank(rdev, 2);
8012 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
8013 DRM_DEBUG("IH: D3 vblank\n");
8014
8015 break;
8016 case 1: /* D3 vline */
8017 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
8018 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8019
8020 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
8021 DRM_DEBUG("IH: D3 vline\n");
8022
8023 break;
8024 default:
8025 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8026 break;
8027 }
8028 break;
8029 case 4: /* D4 vblank/vline */
8030 switch (src_data) {
8031 case 0: /* D4 vblank */
8032 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
8033 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8034
8035 if (rdev->irq.crtc_vblank_int[3]) {
8036 drm_handle_vblank(rdev->ddev, 3);
8037 rdev->pm.vblank_sync = true;
8038 wake_up(&rdev->irq.vblank_queue);
8039 }
8040 if (atomic_read(&rdev->irq.pflip[3]))
8041 radeon_crtc_handle_vblank(rdev, 3);
8042 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
8043 DRM_DEBUG("IH: D4 vblank\n");
8044
8045 break;
8046 case 1: /* D4 vline */
8047 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
8048 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8049
8050 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
8051 DRM_DEBUG("IH: D4 vline\n");
8052
8053 break;
8054 default:
8055 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8056 break;
8057 }
8058 break;
8059 case 5: /* D5 vblank/vline */
8060 switch (src_data) {
8061 case 0: /* D5 vblank */
8062 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
8063 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8064
8065 if (rdev->irq.crtc_vblank_int[4]) {
8066 drm_handle_vblank(rdev->ddev, 4);
8067 rdev->pm.vblank_sync = true;
8068 wake_up(&rdev->irq.vblank_queue);
8069 }
8070 if (atomic_read(&rdev->irq.pflip[4]))
8071 radeon_crtc_handle_vblank(rdev, 4);
8072 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
8073 DRM_DEBUG("IH: D5 vblank\n");
8074
8075 break;
8076 case 1: /* D5 vline */
8077 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
8078 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8079
8080 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
8081 DRM_DEBUG("IH: D5 vline\n");
8082
8083 break;
8084 default:
8085 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8086 break;
8087 }
8088 break;
8089 case 6: /* D6 vblank/vline */
8090 switch (src_data) {
8091 case 0: /* D6 vblank */
8092 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
8093 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8094
8095 if (rdev->irq.crtc_vblank_int[5]) {
8096 drm_handle_vblank(rdev->ddev, 5);
8097 rdev->pm.vblank_sync = true;
8098 wake_up(&rdev->irq.vblank_queue);
8099 }
8100 if (atomic_read(&rdev->irq.pflip[5]))
8101 radeon_crtc_handle_vblank(rdev, 5);
8102 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
8103 DRM_DEBUG("IH: D6 vblank\n");
8104
8105 break;
8106 case 1: /* D6 vline */
8107 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
8108 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8109
8110 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
8111 DRM_DEBUG("IH: D6 vline\n");
8112
8113 break;
8114 default:
8115 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8116 break;
8117 }
8118 break;
8119 case 8: /* D1 page flip */
8120 case 10: /* D2 page flip */
8121 case 12: /* D3 page flip */
8122 case 14: /* D4 page flip */
8123 case 16: /* D5 page flip */
8124 case 18: /* D6 page flip */
8125 DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8126 if (radeon_use_pflipirq > 0)
8127 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8128 break;
8129 case 42: /* HPD hotplug */
8130 switch (src_data) {
8131 case 0:
8132 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
8133 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8134
8135 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8136 queue_hotplug = true;
8137 DRM_DEBUG("IH: HPD1\n");
8138
8139 break;
8140 case 1:
8141 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
8142 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8143
8144 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8145 queue_hotplug = true;
8146 DRM_DEBUG("IH: HPD2\n");
8147
8148 break;
8149 case 2:
8150 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
8151 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8152
8153 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8154 queue_hotplug = true;
8155 DRM_DEBUG("IH: HPD3\n");
8156
8157 break;
8158 case 3:
8159 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
8160 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8161
8162 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8163 queue_hotplug = true;
8164 DRM_DEBUG("IH: HPD4\n");
8165
8166 break;
8167 case 4:
8168 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
8169 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8170
8171 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8172 queue_hotplug = true;
8173 DRM_DEBUG("IH: HPD5\n");
8174
8175 break;
8176 case 5:
8177 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
8178 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8179
8180 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8181 queue_hotplug = true;
8182 DRM_DEBUG("IH: HPD6\n");
8183
8184 break;
8185 case 6:
8186 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
8187 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8188
8189 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
8190 queue_dp = true;
8191 DRM_DEBUG("IH: HPD_RX 1\n");
8192
8193 break;
8194 case 7:
8195 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
8196 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8197
8198 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
8199 queue_dp = true;
8200 DRM_DEBUG("IH: HPD_RX 2\n");
8201
8202 break;
8203 case 8:
8204 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
8205 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8206
8207 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
8208 queue_dp = true;
8209 DRM_DEBUG("IH: HPD_RX 3\n");
8210
8211 break;
8212 case 9:
8213 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
8214 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8215
8216 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
8217 queue_dp = true;
8218 DRM_DEBUG("IH: HPD_RX 4\n");
8219
8220 break;
8221 case 10:
8222 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
8223 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8224
8225 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
8226 queue_dp = true;
8227 DRM_DEBUG("IH: HPD_RX 5\n");
8228
8229 break;
8230 case 11:
8231 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
8232 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8233
8234 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
8235 queue_dp = true;
8236 DRM_DEBUG("IH: HPD_RX 6\n");
8237
8238 break;
8239 default:
8240 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8241 break;
8242 }
8243 break;
8244 case 96:
8245 DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
8246 WREG32(SRBM_INT_ACK, 0x1);
8247 break;
8248 case 124: /* UVD */
8249 DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8250 radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8251 break;
8252 case 146:
8253 case 147:
8254 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8255 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8256 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8257 /* reset addr and status */
8258 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8259 if (addr == 0x0 && status == 0x0)
8260 break;
8261 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8262 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
8263 addr);
8264 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8265 status);
8266 cik_vm_decode_fault(rdev, status, addr, mc_client);
8267 break;
8268 case 167: /* VCE */
8269 DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8270 switch (src_data) {
8271 case 0:
8272 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8273 break;
8274 case 1:
8275 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8276 break;
8277 default:
8278 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8279 break;
8280 }
8281 break;
8282 case 176: /* GFX RB CP_INT */
8283 case 177: /* GFX IB CP_INT */
8284 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8285 break;
8286 case 181: /* CP EOP event */
8287 DRM_DEBUG("IH: CP EOP\n");
8288 /* XXX check the bitfield order! */
8289 me_id = (ring_id & 0x60) >> 5;
8290 pipe_id = (ring_id & 0x18) >> 3;
8291 queue_id = (ring_id & 0x7) >> 0;
8292 switch (me_id) {
8293 case 0:
8294 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8295 break;
8296 case 1:
8297 case 2:
8298 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8299 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8300 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8301 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8302 break;
8303 }
8304 break;
8305 case 184: /* CP Privileged reg access */
8306 DRM_ERROR("Illegal register access in command stream\n");
8307 /* XXX check the bitfield order! */
8308 me_id = (ring_id & 0x60) >> 5;
8309 pipe_id = (ring_id & 0x18) >> 3;
8310 queue_id = (ring_id & 0x7) >> 0;
8311 switch (me_id) {
8312 case 0:
8313 /* This results in a full GPU reset, but all we need to do is soft
8314 * reset the CP for gfx
8315 */
8316 queue_reset = true;
8317 break;
8318 case 1:
8319 /* XXX compute */
8320 queue_reset = true;
8321 break;
8322 case 2:
8323 /* XXX compute */
8324 queue_reset = true;
8325 break;
8326 }
8327 break;
8328 case 185: /* CP Privileged inst */
8329 DRM_ERROR("Illegal instruction in command stream\n");
8330 /* XXX check the bitfield order! */
8331 me_id = (ring_id & 0x60) >> 5;
8332 pipe_id = (ring_id & 0x18) >> 3;
8333 queue_id = (ring_id & 0x7) >> 0;
8334 switch (me_id) {
8335 case 0:
8336 /* This results in a full GPU reset, but all we need to do is soft
8337 * reset the CP for gfx
8338 */
8339 queue_reset = true;
8340 break;
8341 case 1:
8342 /* XXX compute */
8343 queue_reset = true;
8344 break;
8345 case 2:
8346 /* XXX compute */
8347 queue_reset = true;
8348 break;
8349 }
8350 break;
8351 case 224: /* SDMA trap event */
8352 /* XXX check the bitfield order! */
8353 me_id = (ring_id & 0x3) >> 0;
8354 queue_id = (ring_id & 0xc) >> 2;
8355 DRM_DEBUG("IH: SDMA trap\n");
8356 switch (me_id) {
8357 case 0:
8358 switch (queue_id) {
8359 case 0:
8360 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8361 break;
8362 case 1:
8363 /* XXX compute */
8364 break;
8365 case 2:
8366 /* XXX compute */
8367 break;
8368 }
8369 break;
8370 case 1:
8371 switch (queue_id) {
8372 case 0:
8373 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8374 break;
8375 case 1:
8376 /* XXX compute */
8377 break;
8378 case 2:
8379 /* XXX compute */
8380 break;
8381 }
8382 break;
8383 }
8384 break;
8385 case 230: /* thermal low to high */
8386 DRM_DEBUG("IH: thermal low to high\n");
8387 rdev->pm.dpm.thermal.high_to_low = false;
8388 queue_thermal = true;
8389 break;
8390 case 231: /* thermal high to low */
8391 DRM_DEBUG("IH: thermal high to low\n");
8392 rdev->pm.dpm.thermal.high_to_low = true;
8393 queue_thermal = true;
8394 break;
8395 case 233: /* GUI IDLE */
8396 DRM_DEBUG("IH: GUI idle\n");
8397 break;
8398 case 241: /* SDMA Privileged inst */
8399 case 247: /* SDMA Privileged inst */
8400 DRM_ERROR("Illegal instruction in SDMA command stream\n");
8401 /* XXX check the bitfield order! */
8402 me_id = (ring_id & 0x3) >> 0;
8403 queue_id = (ring_id & 0xc) >> 2;
8404 switch (me_id) {
8405 case 0:
8406 switch (queue_id) {
8407 case 0:
8408 queue_reset = true;
8409 break;
8410 case 1:
8411 /* XXX compute */
8412 queue_reset = true;
8413 break;
8414 case 2:
8415 /* XXX compute */
8416 queue_reset = true;
8417 break;
8418 }
8419 break;
8420 case 1:
8421 switch (queue_id) {
8422 case 0:
8423 queue_reset = true;
8424 break;
8425 case 1:
8426 /* XXX compute */
8427 queue_reset = true;
8428 break;
8429 case 2:
8430 /* XXX compute */
8431 queue_reset = true;
8432 break;
8433 }
8434 break;
8435 }
8436 break;
8437 default:
8438 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8439 break;
8440 }
8441
8442 /* wptr/rptr are in bytes! */
8443 rptr += 16;
8444 rptr &= rdev->ih.ptr_mask;
8445 WREG32(IH_RB_RPTR, rptr);
8446 }
8447 if (queue_dp)
8448 schedule_work(&rdev->dp_work);
8449 if (queue_hotplug)
8450 schedule_work(&rdev->hotplug_work);
8451 if (queue_reset) {
8452 rdev->needs_reset = true;
8453 wake_up_all(&rdev->fence_queue);
8454 }
8455 if (queue_thermal)
8456 schedule_work(&rdev->pm.dpm.thermal.work);
8457 rdev->ih.rptr = rptr;
8458 atomic_set(&rdev->ih.lock, 0);
8459
8460 /* make sure wptr hasn't changed while processing */
8461 wptr = cik_get_ih_wptr(rdev);
8462 if (wptr != rptr)
8463 goto restart_ih;
8464
8465 return IRQ_HANDLED;
8466 }
8467
8468 /*
8469 * startup/shutdown callbacks
8470 */
8471 /**
8472 * cik_startup - program the asic to a functional state
8473 *
8474 * @rdev: radeon_device pointer
8475 *
8476 * Programs the asic to a functional state (CIK).
8477 * Called by cik_init() and cik_resume().
8478 * Returns 0 for success, error for failure.
8479 */
cik_startup(struct radeon_device * rdev)8480 static int cik_startup(struct radeon_device *rdev)
8481 {
8482 struct radeon_ring *ring;
8483 u32 nop;
8484 int r;
8485
8486 /* enable pcie gen2/3 link */
8487 cik_pcie_gen3_enable(rdev);
8488 /* enable aspm */
8489 cik_program_aspm(rdev);
8490
8491 /* scratch needs to be initialized before MC */
8492 r = r600_vram_scratch_init(rdev);
8493 if (r)
8494 return r;
8495
8496 cik_mc_program(rdev);
8497
8498 if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8499 r = ci_mc_load_microcode(rdev);
8500 if (r) {
8501 DRM_ERROR("Failed to load MC firmware!\n");
8502 return r;
8503 }
8504 }
8505
8506 r = cik_pcie_gart_enable(rdev);
8507 if (r)
8508 return r;
8509 cik_gpu_init(rdev);
8510
8511 /* allocate rlc buffers */
8512 if (rdev->flags & RADEON_IS_IGP) {
8513 if (rdev->family == CHIP_KAVERI) {
8514 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8515 rdev->rlc.reg_list_size =
8516 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8517 } else {
8518 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8519 rdev->rlc.reg_list_size =
8520 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8521 }
8522 }
8523 rdev->rlc.cs_data = ci_cs_data;
8524 rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8525 r = sumo_rlc_init(rdev);
8526 if (r) {
8527 DRM_ERROR("Failed to init rlc BOs!\n");
8528 return r;
8529 }
8530
8531 /* allocate wb buffer */
8532 r = radeon_wb_init(rdev);
8533 if (r)
8534 return r;
8535
8536 /* allocate mec buffers */
8537 r = cik_mec_init(rdev);
8538 if (r) {
8539 DRM_ERROR("Failed to init MEC BOs!\n");
8540 return r;
8541 }
8542
8543 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8544 if (r) {
8545 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8546 return r;
8547 }
8548
8549 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8550 if (r) {
8551 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8552 return r;
8553 }
8554
8555 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8556 if (r) {
8557 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8558 return r;
8559 }
8560
8561 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8562 if (r) {
8563 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8564 return r;
8565 }
8566
8567 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8568 if (r) {
8569 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8570 return r;
8571 }
8572
8573 r = radeon_uvd_resume(rdev);
8574 if (!r) {
8575 r = uvd_v4_2_resume(rdev);
8576 if (!r) {
8577 r = radeon_fence_driver_start_ring(rdev,
8578 R600_RING_TYPE_UVD_INDEX);
8579 if (r)
8580 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8581 }
8582 }
8583 if (r)
8584 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8585
8586 r = radeon_vce_resume(rdev);
8587 if (!r) {
8588 r = vce_v2_0_resume(rdev);
8589 if (!r)
8590 r = radeon_fence_driver_start_ring(rdev,
8591 TN_RING_TYPE_VCE1_INDEX);
8592 if (!r)
8593 r = radeon_fence_driver_start_ring(rdev,
8594 TN_RING_TYPE_VCE2_INDEX);
8595 }
8596 if (r) {
8597 dev_err(rdev->dev, "VCE init error (%d).\n", r);
8598 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8599 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8600 }
8601
8602 /* Enable IRQ */
8603 if (!rdev->irq.installed) {
8604 r = radeon_irq_kms_init(rdev);
8605 if (r)
8606 return r;
8607 }
8608
8609 r = cik_irq_init(rdev);
8610 if (r) {
8611 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8612 radeon_irq_kms_fini(rdev);
8613 return r;
8614 }
8615 cik_irq_set(rdev);
8616
8617 if (rdev->family == CHIP_HAWAII) {
8618 if (rdev->new_fw)
8619 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8620 else
8621 nop = RADEON_CP_PACKET2;
8622 } else {
8623 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8624 }
8625
8626 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8627 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8628 nop);
8629 if (r)
8630 return r;
8631
8632 /* set up the compute queues */
8633 /* type-2 packets are deprecated on MEC, use type-3 instead */
8634 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8635 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8636 nop);
8637 if (r)
8638 return r;
8639 ring->me = 1; /* first MEC */
8640 ring->pipe = 0; /* first pipe */
8641 ring->queue = 0; /* first queue */
8642 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8643
8644 /* type-2 packets are deprecated on MEC, use type-3 instead */
8645 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8646 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8647 nop);
8648 if (r)
8649 return r;
8650 /* dGPU only have 1 MEC */
8651 ring->me = 1; /* first MEC */
8652 ring->pipe = 0; /* first pipe */
8653 ring->queue = 1; /* second queue */
8654 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8655
8656 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8657 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8658 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8659 if (r)
8660 return r;
8661
8662 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8663 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8664 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8665 if (r)
8666 return r;
8667
8668 r = cik_cp_resume(rdev);
8669 if (r)
8670 return r;
8671
8672 r = cik_sdma_resume(rdev);
8673 if (r)
8674 return r;
8675
8676 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8677 if (ring->ring_size) {
8678 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8679 RADEON_CP_PACKET2);
8680 if (!r)
8681 r = uvd_v1_0_init(rdev);
8682 if (r)
8683 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8684 }
8685
8686 r = -ENOENT;
8687
8688 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8689 if (ring->ring_size)
8690 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8691 VCE_CMD_NO_OP);
8692
8693 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8694 if (ring->ring_size)
8695 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8696 VCE_CMD_NO_OP);
8697
8698 if (!r)
8699 r = vce_v1_0_init(rdev);
8700 else if (r != -ENOENT)
8701 DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8702
8703 r = radeon_ib_pool_init(rdev);
8704 if (r) {
8705 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8706 return r;
8707 }
8708
8709 r = radeon_vm_manager_init(rdev);
8710 if (r) {
8711 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8712 return r;
8713 }
8714
8715 r = radeon_audio_init(rdev);
8716 if (r)
8717 return r;
8718
8719 r = radeon_kfd_resume(rdev);
8720 if (r)
8721 return r;
8722
8723 return 0;
8724 }
8725
8726 /**
8727 * cik_resume - resume the asic to a functional state
8728 *
8729 * @rdev: radeon_device pointer
8730 *
8731 * Programs the asic to a functional state (CIK).
8732 * Called at resume.
8733 * Returns 0 for success, error for failure.
8734 */
cik_resume(struct radeon_device * rdev)8735 int cik_resume(struct radeon_device *rdev)
8736 {
8737 int r;
8738
8739 /* post card */
8740 atom_asic_init(rdev->mode_info.atom_context);
8741
8742 /* init golden registers */
8743 cik_init_golden_registers(rdev);
8744
8745 if (rdev->pm.pm_method == PM_METHOD_DPM)
8746 radeon_pm_resume(rdev);
8747
8748 rdev->accel_working = true;
8749 r = cik_startup(rdev);
8750 if (r) {
8751 DRM_ERROR("cik startup failed on resume\n");
8752 rdev->accel_working = false;
8753 return r;
8754 }
8755
8756 return r;
8757
8758 }
8759
8760 /**
8761 * cik_suspend - suspend the asic
8762 *
8763 * @rdev: radeon_device pointer
8764 *
8765 * Bring the chip into a state suitable for suspend (CIK).
8766 * Called at suspend.
8767 * Returns 0 for success.
8768 */
cik_suspend(struct radeon_device * rdev)8769 int cik_suspend(struct radeon_device *rdev)
8770 {
8771 radeon_kfd_suspend(rdev);
8772 radeon_pm_suspend(rdev);
8773 radeon_audio_fini(rdev);
8774 radeon_vm_manager_fini(rdev);
8775 cik_cp_enable(rdev, false);
8776 cik_sdma_enable(rdev, false);
8777 uvd_v1_0_fini(rdev);
8778 radeon_uvd_suspend(rdev);
8779 radeon_vce_suspend(rdev);
8780 cik_fini_pg(rdev);
8781 cik_fini_cg(rdev);
8782 cik_irq_suspend(rdev);
8783 radeon_wb_disable(rdev);
8784 cik_pcie_gart_disable(rdev);
8785 return 0;
8786 }
8787
8788 /* Plan is to move initialization in that function and use
8789 * helper function so that radeon_device_init pretty much
8790 * do nothing more than calling asic specific function. This
8791 * should also allow to remove a bunch of callback function
8792 * like vram_info.
8793 */
8794 /**
8795 * cik_init - asic specific driver and hw init
8796 *
8797 * @rdev: radeon_device pointer
8798 *
8799 * Setup asic specific driver variables and program the hw
8800 * to a functional state (CIK).
8801 * Called at driver startup.
8802 * Returns 0 for success, errors for failure.
8803 */
cik_init(struct radeon_device * rdev)8804 int cik_init(struct radeon_device *rdev)
8805 {
8806 struct radeon_ring *ring;
8807 int r;
8808
8809 /* Read BIOS */
8810 if (!radeon_get_bios(rdev)) {
8811 if (ASIC_IS_AVIVO(rdev))
8812 return -EINVAL;
8813 }
8814 /* Must be an ATOMBIOS */
8815 if (!rdev->is_atom_bios) {
8816 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8817 return -EINVAL;
8818 }
8819 r = radeon_atombios_init(rdev);
8820 if (r)
8821 return r;
8822
8823 /* Post card if necessary */
8824 if (!radeon_card_posted(rdev)) {
8825 if (!rdev->bios) {
8826 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8827 return -EINVAL;
8828 }
8829 DRM_INFO("GPU not posted. posting now...\n");
8830 atom_asic_init(rdev->mode_info.atom_context);
8831 }
8832 /* init golden registers */
8833 cik_init_golden_registers(rdev);
8834 /* Initialize scratch registers */
8835 cik_scratch_init(rdev);
8836 /* Initialize surface registers */
8837 radeon_surface_init(rdev);
8838 /* Initialize clocks */
8839 radeon_get_clock_info(rdev->ddev);
8840
8841 /* Fence driver */
8842 r = radeon_fence_driver_init(rdev);
8843 if (r)
8844 return r;
8845
8846 /* initialize memory controller */
8847 r = cik_mc_init(rdev);
8848 if (r)
8849 return r;
8850 /* Memory manager */
8851 r = radeon_bo_init(rdev);
8852 if (r)
8853 return r;
8854
8855 if (rdev->flags & RADEON_IS_IGP) {
8856 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8857 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8858 r = cik_init_microcode(rdev);
8859 if (r) {
8860 DRM_ERROR("Failed to load firmware!\n");
8861 return r;
8862 }
8863 }
8864 } else {
8865 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8866 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8867 !rdev->mc_fw) {
8868 r = cik_init_microcode(rdev);
8869 if (r) {
8870 DRM_ERROR("Failed to load firmware!\n");
8871 return r;
8872 }
8873 }
8874 }
8875
8876 /* Initialize power management */
8877 radeon_pm_init(rdev);
8878
8879 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8880 ring->ring_obj = NULL;
8881 r600_ring_init(rdev, ring, 1024 * 1024);
8882
8883 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8884 ring->ring_obj = NULL;
8885 r600_ring_init(rdev, ring, 1024 * 1024);
8886 r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8887 if (r)
8888 return r;
8889
8890 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8891 ring->ring_obj = NULL;
8892 r600_ring_init(rdev, ring, 1024 * 1024);
8893 r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8894 if (r)
8895 return r;
8896
8897 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8898 ring->ring_obj = NULL;
8899 r600_ring_init(rdev, ring, 256 * 1024);
8900
8901 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8902 ring->ring_obj = NULL;
8903 r600_ring_init(rdev, ring, 256 * 1024);
8904
8905 r = radeon_uvd_init(rdev);
8906 if (!r) {
8907 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8908 ring->ring_obj = NULL;
8909 r600_ring_init(rdev, ring, 4096);
8910 }
8911
8912 r = radeon_vce_init(rdev);
8913 if (!r) {
8914 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8915 ring->ring_obj = NULL;
8916 r600_ring_init(rdev, ring, 4096);
8917
8918 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8919 ring->ring_obj = NULL;
8920 r600_ring_init(rdev, ring, 4096);
8921 }
8922
8923 rdev->ih.ring_obj = NULL;
8924 r600_ih_ring_init(rdev, 64 * 1024);
8925
8926 r = r600_pcie_gart_init(rdev);
8927 if (r)
8928 return r;
8929
8930 rdev->accel_working = true;
8931 r = cik_startup(rdev);
8932 if (r) {
8933 dev_err(rdev->dev, "disabling GPU acceleration\n");
8934 cik_cp_fini(rdev);
8935 cik_sdma_fini(rdev);
8936 cik_irq_fini(rdev);
8937 sumo_rlc_fini(rdev);
8938 cik_mec_fini(rdev);
8939 radeon_wb_fini(rdev);
8940 radeon_ib_pool_fini(rdev);
8941 radeon_vm_manager_fini(rdev);
8942 radeon_irq_kms_fini(rdev);
8943 cik_pcie_gart_fini(rdev);
8944 rdev->accel_working = false;
8945 }
8946
8947 /* Don't start up if the MC ucode is missing.
8948 * The default clocks and voltages before the MC ucode
8949 * is loaded are not suffient for advanced operations.
8950 */
8951 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8952 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8953 return -EINVAL;
8954 }
8955
8956 return 0;
8957 }
8958
8959 /**
8960 * cik_fini - asic specific driver and hw fini
8961 *
8962 * @rdev: radeon_device pointer
8963 *
8964 * Tear down the asic specific driver variables and program the hw
8965 * to an idle state (CIK).
8966 * Called at driver unload.
8967 */
cik_fini(struct radeon_device * rdev)8968 void cik_fini(struct radeon_device *rdev)
8969 {
8970 radeon_pm_fini(rdev);
8971 cik_cp_fini(rdev);
8972 cik_sdma_fini(rdev);
8973 cik_fini_pg(rdev);
8974 cik_fini_cg(rdev);
8975 cik_irq_fini(rdev);
8976 sumo_rlc_fini(rdev);
8977 cik_mec_fini(rdev);
8978 radeon_wb_fini(rdev);
8979 radeon_vm_manager_fini(rdev);
8980 radeon_ib_pool_fini(rdev);
8981 radeon_irq_kms_fini(rdev);
8982 uvd_v1_0_fini(rdev);
8983 radeon_uvd_fini(rdev);
8984 radeon_vce_fini(rdev);
8985 cik_pcie_gart_fini(rdev);
8986 r600_vram_scratch_fini(rdev);
8987 radeon_gem_fini(rdev);
8988 radeon_fence_driver_fini(rdev);
8989 radeon_bo_fini(rdev);
8990 radeon_atombios_fini(rdev);
8991 kfree(rdev->bios);
8992 rdev->bios = NULL;
8993 }
8994
dce8_program_fmt(struct drm_encoder * encoder)8995 void dce8_program_fmt(struct drm_encoder *encoder)
8996 {
8997 struct drm_device *dev = encoder->dev;
8998 struct radeon_device *rdev = dev->dev_private;
8999 struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
9000 struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
9001 struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
9002 int bpc = 0;
9003 u32 tmp = 0;
9004 enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
9005
9006 if (connector) {
9007 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
9008 bpc = radeon_get_monitor_bpc(connector);
9009 dither = radeon_connector->dither;
9010 }
9011
9012 /* LVDS/eDP FMT is set up by atom */
9013 if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
9014 return;
9015
9016 /* not needed for analog */
9017 if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
9018 (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
9019 return;
9020
9021 if (bpc == 0)
9022 return;
9023
9024 switch (bpc) {
9025 case 6:
9026 if (dither == RADEON_FMT_DITHER_ENABLE)
9027 /* XXX sort out optimal dither settings */
9028 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9029 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
9030 else
9031 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
9032 break;
9033 case 8:
9034 if (dither == RADEON_FMT_DITHER_ENABLE)
9035 /* XXX sort out optimal dither settings */
9036 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9037 FMT_RGB_RANDOM_ENABLE |
9038 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
9039 else
9040 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
9041 break;
9042 case 10:
9043 if (dither == RADEON_FMT_DITHER_ENABLE)
9044 /* XXX sort out optimal dither settings */
9045 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9046 FMT_RGB_RANDOM_ENABLE |
9047 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
9048 else
9049 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
9050 break;
9051 default:
9052 /* not needed */
9053 break;
9054 }
9055
9056 WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
9057 }
9058
9059 /* display watermark setup */
9060 /**
9061 * dce8_line_buffer_adjust - Set up the line buffer
9062 *
9063 * @rdev: radeon_device pointer
9064 * @radeon_crtc: the selected display controller
9065 * @mode: the current display mode on the selected display
9066 * controller
9067 *
9068 * Setup up the line buffer allocation for
9069 * the selected display controller (CIK).
9070 * Returns the line buffer size in pixels.
9071 */
dce8_line_buffer_adjust(struct radeon_device * rdev,struct radeon_crtc * radeon_crtc,struct drm_display_mode * mode)9072 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
9073 struct radeon_crtc *radeon_crtc,
9074 struct drm_display_mode *mode)
9075 {
9076 u32 tmp, buffer_alloc, i;
9077 u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
9078 /*
9079 * Line Buffer Setup
9080 * There are 6 line buffers, one for each display controllers.
9081 * There are 3 partitions per LB. Select the number of partitions
9082 * to enable based on the display width. For display widths larger
9083 * than 4096, you need use to use 2 display controllers and combine
9084 * them using the stereo blender.
9085 */
9086 if (radeon_crtc->base.enabled && mode) {
9087 if (mode->crtc_hdisplay < 1920) {
9088 tmp = 1;
9089 buffer_alloc = 2;
9090 } else if (mode->crtc_hdisplay < 2560) {
9091 tmp = 2;
9092 buffer_alloc = 2;
9093 } else if (mode->crtc_hdisplay < 4096) {
9094 tmp = 0;
9095 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9096 } else {
9097 DRM_DEBUG_KMS("Mode too big for LB!\n");
9098 tmp = 0;
9099 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9100 }
9101 } else {
9102 tmp = 1;
9103 buffer_alloc = 0;
9104 }
9105
9106 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
9107 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
9108
9109 WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
9110 DMIF_BUFFERS_ALLOCATED(buffer_alloc));
9111 for (i = 0; i < rdev->usec_timeout; i++) {
9112 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
9113 DMIF_BUFFERS_ALLOCATED_COMPLETED)
9114 break;
9115 udelay(1);
9116 }
9117
9118 if (radeon_crtc->base.enabled && mode) {
9119 switch (tmp) {
9120 case 0:
9121 default:
9122 return 4096 * 2;
9123 case 1:
9124 return 1920 * 2;
9125 case 2:
9126 return 2560 * 2;
9127 }
9128 }
9129
9130 /* controller not enabled, so no lb used */
9131 return 0;
9132 }
9133
9134 /**
9135 * cik_get_number_of_dram_channels - get the number of dram channels
9136 *
9137 * @rdev: radeon_device pointer
9138 *
9139 * Look up the number of video ram channels (CIK).
9140 * Used for display watermark bandwidth calculations
9141 * Returns the number of dram channels
9142 */
cik_get_number_of_dram_channels(struct radeon_device * rdev)9143 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
9144 {
9145 u32 tmp = RREG32(MC_SHARED_CHMAP);
9146
9147 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
9148 case 0:
9149 default:
9150 return 1;
9151 case 1:
9152 return 2;
9153 case 2:
9154 return 4;
9155 case 3:
9156 return 8;
9157 case 4:
9158 return 3;
9159 case 5:
9160 return 6;
9161 case 6:
9162 return 10;
9163 case 7:
9164 return 12;
9165 case 8:
9166 return 16;
9167 }
9168 }
9169
9170 struct dce8_wm_params {
9171 u32 dram_channels; /* number of dram channels */
9172 u32 yclk; /* bandwidth per dram data pin in kHz */
9173 u32 sclk; /* engine clock in kHz */
9174 u32 disp_clk; /* display clock in kHz */
9175 u32 src_width; /* viewport width */
9176 u32 active_time; /* active display time in ns */
9177 u32 blank_time; /* blank time in ns */
9178 bool interlaced; /* mode is interlaced */
9179 fixed20_12 vsc; /* vertical scale ratio */
9180 u32 num_heads; /* number of active crtcs */
9181 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
9182 u32 lb_size; /* line buffer allocated to pipe */
9183 u32 vtaps; /* vertical scaler taps */
9184 };
9185
9186 /**
9187 * dce8_dram_bandwidth - get the dram bandwidth
9188 *
9189 * @wm: watermark calculation data
9190 *
9191 * Calculate the raw dram bandwidth (CIK).
9192 * Used for display watermark bandwidth calculations
9193 * Returns the dram bandwidth in MBytes/s
9194 */
dce8_dram_bandwidth(struct dce8_wm_params * wm)9195 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9196 {
9197 /* Calculate raw DRAM Bandwidth */
9198 fixed20_12 dram_efficiency; /* 0.7 */
9199 fixed20_12 yclk, dram_channels, bandwidth;
9200 fixed20_12 a;
9201
9202 a.full = dfixed_const(1000);
9203 yclk.full = dfixed_const(wm->yclk);
9204 yclk.full = dfixed_div(yclk, a);
9205 dram_channels.full = dfixed_const(wm->dram_channels * 4);
9206 a.full = dfixed_const(10);
9207 dram_efficiency.full = dfixed_const(7);
9208 dram_efficiency.full = dfixed_div(dram_efficiency, a);
9209 bandwidth.full = dfixed_mul(dram_channels, yclk);
9210 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9211
9212 return dfixed_trunc(bandwidth);
9213 }
9214
9215 /**
9216 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9217 *
9218 * @wm: watermark calculation data
9219 *
9220 * Calculate the dram bandwidth used for display (CIK).
9221 * Used for display watermark bandwidth calculations
9222 * Returns the dram bandwidth for display in MBytes/s
9223 */
dce8_dram_bandwidth_for_display(struct dce8_wm_params * wm)9224 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9225 {
9226 /* Calculate DRAM Bandwidth and the part allocated to display. */
9227 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9228 fixed20_12 yclk, dram_channels, bandwidth;
9229 fixed20_12 a;
9230
9231 a.full = dfixed_const(1000);
9232 yclk.full = dfixed_const(wm->yclk);
9233 yclk.full = dfixed_div(yclk, a);
9234 dram_channels.full = dfixed_const(wm->dram_channels * 4);
9235 a.full = dfixed_const(10);
9236 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9237 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9238 bandwidth.full = dfixed_mul(dram_channels, yclk);
9239 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9240
9241 return dfixed_trunc(bandwidth);
9242 }
9243
9244 /**
9245 * dce8_data_return_bandwidth - get the data return bandwidth
9246 *
9247 * @wm: watermark calculation data
9248 *
9249 * Calculate the data return bandwidth used for display (CIK).
9250 * Used for display watermark bandwidth calculations
9251 * Returns the data return bandwidth in MBytes/s
9252 */
dce8_data_return_bandwidth(struct dce8_wm_params * wm)9253 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9254 {
9255 /* Calculate the display Data return Bandwidth */
9256 fixed20_12 return_efficiency; /* 0.8 */
9257 fixed20_12 sclk, bandwidth;
9258 fixed20_12 a;
9259
9260 a.full = dfixed_const(1000);
9261 sclk.full = dfixed_const(wm->sclk);
9262 sclk.full = dfixed_div(sclk, a);
9263 a.full = dfixed_const(10);
9264 return_efficiency.full = dfixed_const(8);
9265 return_efficiency.full = dfixed_div(return_efficiency, a);
9266 a.full = dfixed_const(32);
9267 bandwidth.full = dfixed_mul(a, sclk);
9268 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9269
9270 return dfixed_trunc(bandwidth);
9271 }
9272
9273 /**
9274 * dce8_dmif_request_bandwidth - get the dmif bandwidth
9275 *
9276 * @wm: watermark calculation data
9277 *
9278 * Calculate the dmif bandwidth used for display (CIK).
9279 * Used for display watermark bandwidth calculations
9280 * Returns the dmif bandwidth in MBytes/s
9281 */
dce8_dmif_request_bandwidth(struct dce8_wm_params * wm)9282 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9283 {
9284 /* Calculate the DMIF Request Bandwidth */
9285 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9286 fixed20_12 disp_clk, bandwidth;
9287 fixed20_12 a, b;
9288
9289 a.full = dfixed_const(1000);
9290 disp_clk.full = dfixed_const(wm->disp_clk);
9291 disp_clk.full = dfixed_div(disp_clk, a);
9292 a.full = dfixed_const(32);
9293 b.full = dfixed_mul(a, disp_clk);
9294
9295 a.full = dfixed_const(10);
9296 disp_clk_request_efficiency.full = dfixed_const(8);
9297 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9298
9299 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9300
9301 return dfixed_trunc(bandwidth);
9302 }
9303
9304 /**
9305 * dce8_available_bandwidth - get the min available bandwidth
9306 *
9307 * @wm: watermark calculation data
9308 *
9309 * Calculate the min available bandwidth used for display (CIK).
9310 * Used for display watermark bandwidth calculations
9311 * Returns the min available bandwidth in MBytes/s
9312 */
dce8_available_bandwidth(struct dce8_wm_params * wm)9313 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9314 {
9315 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9316 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9317 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9318 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9319
9320 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9321 }
9322
9323 /**
9324 * dce8_average_bandwidth - get the average available bandwidth
9325 *
9326 * @wm: watermark calculation data
9327 *
9328 * Calculate the average available bandwidth used for display (CIK).
9329 * Used for display watermark bandwidth calculations
9330 * Returns the average available bandwidth in MBytes/s
9331 */
dce8_average_bandwidth(struct dce8_wm_params * wm)9332 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9333 {
9334 /* Calculate the display mode Average Bandwidth
9335 * DisplayMode should contain the source and destination dimensions,
9336 * timing, etc.
9337 */
9338 fixed20_12 bpp;
9339 fixed20_12 line_time;
9340 fixed20_12 src_width;
9341 fixed20_12 bandwidth;
9342 fixed20_12 a;
9343
9344 a.full = dfixed_const(1000);
9345 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9346 line_time.full = dfixed_div(line_time, a);
9347 bpp.full = dfixed_const(wm->bytes_per_pixel);
9348 src_width.full = dfixed_const(wm->src_width);
9349 bandwidth.full = dfixed_mul(src_width, bpp);
9350 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9351 bandwidth.full = dfixed_div(bandwidth, line_time);
9352
9353 return dfixed_trunc(bandwidth);
9354 }
9355
9356 /**
9357 * dce8_latency_watermark - get the latency watermark
9358 *
9359 * @wm: watermark calculation data
9360 *
9361 * Calculate the latency watermark (CIK).
9362 * Used for display watermark bandwidth calculations
9363 * Returns the latency watermark in ns
9364 */
dce8_latency_watermark(struct dce8_wm_params * wm)9365 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9366 {
9367 /* First calculate the latency in ns */
9368 u32 mc_latency = 2000; /* 2000 ns. */
9369 u32 available_bandwidth = dce8_available_bandwidth(wm);
9370 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9371 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9372 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9373 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9374 (wm->num_heads * cursor_line_pair_return_time);
9375 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9376 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9377 u32 tmp, dmif_size = 12288;
9378 fixed20_12 a, b, c;
9379
9380 if (wm->num_heads == 0)
9381 return 0;
9382
9383 a.full = dfixed_const(2);
9384 b.full = dfixed_const(1);
9385 if ((wm->vsc.full > a.full) ||
9386 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9387 (wm->vtaps >= 5) ||
9388 ((wm->vsc.full >= a.full) && wm->interlaced))
9389 max_src_lines_per_dst_line = 4;
9390 else
9391 max_src_lines_per_dst_line = 2;
9392
9393 a.full = dfixed_const(available_bandwidth);
9394 b.full = dfixed_const(wm->num_heads);
9395 a.full = dfixed_div(a, b);
9396
9397 b.full = dfixed_const(mc_latency + 512);
9398 c.full = dfixed_const(wm->disp_clk);
9399 b.full = dfixed_div(b, c);
9400
9401 c.full = dfixed_const(dmif_size);
9402 b.full = dfixed_div(c, b);
9403
9404 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9405
9406 b.full = dfixed_const(1000);
9407 c.full = dfixed_const(wm->disp_clk);
9408 b.full = dfixed_div(c, b);
9409 c.full = dfixed_const(wm->bytes_per_pixel);
9410 b.full = dfixed_mul(b, c);
9411
9412 lb_fill_bw = min(tmp, dfixed_trunc(b));
9413
9414 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9415 b.full = dfixed_const(1000);
9416 c.full = dfixed_const(lb_fill_bw);
9417 b.full = dfixed_div(c, b);
9418 a.full = dfixed_div(a, b);
9419 line_fill_time = dfixed_trunc(a);
9420
9421 if (line_fill_time < wm->active_time)
9422 return latency;
9423 else
9424 return latency + (line_fill_time - wm->active_time);
9425
9426 }
9427
9428 /**
9429 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9430 * average and available dram bandwidth
9431 *
9432 * @wm: watermark calculation data
9433 *
9434 * Check if the display average bandwidth fits in the display
9435 * dram bandwidth (CIK).
9436 * Used for display watermark bandwidth calculations
9437 * Returns true if the display fits, false if not.
9438 */
dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params * wm)9439 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9440 {
9441 if (dce8_average_bandwidth(wm) <=
9442 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9443 return true;
9444 else
9445 return false;
9446 }
9447
9448 /**
9449 * dce8_average_bandwidth_vs_available_bandwidth - check
9450 * average and available bandwidth
9451 *
9452 * @wm: watermark calculation data
9453 *
9454 * Check if the display average bandwidth fits in the display
9455 * available bandwidth (CIK).
9456 * Used for display watermark bandwidth calculations
9457 * Returns true if the display fits, false if not.
9458 */
dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params * wm)9459 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9460 {
9461 if (dce8_average_bandwidth(wm) <=
9462 (dce8_available_bandwidth(wm) / wm->num_heads))
9463 return true;
9464 else
9465 return false;
9466 }
9467
9468 /**
9469 * dce8_check_latency_hiding - check latency hiding
9470 *
9471 * @wm: watermark calculation data
9472 *
9473 * Check latency hiding (CIK).
9474 * Used for display watermark bandwidth calculations
9475 * Returns true if the display fits, false if not.
9476 */
dce8_check_latency_hiding(struct dce8_wm_params * wm)9477 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9478 {
9479 u32 lb_partitions = wm->lb_size / wm->src_width;
9480 u32 line_time = wm->active_time + wm->blank_time;
9481 u32 latency_tolerant_lines;
9482 u32 latency_hiding;
9483 fixed20_12 a;
9484
9485 a.full = dfixed_const(1);
9486 if (wm->vsc.full > a.full)
9487 latency_tolerant_lines = 1;
9488 else {
9489 if (lb_partitions <= (wm->vtaps + 1))
9490 latency_tolerant_lines = 1;
9491 else
9492 latency_tolerant_lines = 2;
9493 }
9494
9495 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9496
9497 if (dce8_latency_watermark(wm) <= latency_hiding)
9498 return true;
9499 else
9500 return false;
9501 }
9502
9503 /**
9504 * dce8_program_watermarks - program display watermarks
9505 *
9506 * @rdev: radeon_device pointer
9507 * @radeon_crtc: the selected display controller
9508 * @lb_size: line buffer size
9509 * @num_heads: number of display controllers in use
9510 *
9511 * Calculate and program the display watermarks for the
9512 * selected display controller (CIK).
9513 */
dce8_program_watermarks(struct radeon_device * rdev,struct radeon_crtc * radeon_crtc,u32 lb_size,u32 num_heads)9514 static void dce8_program_watermarks(struct radeon_device *rdev,
9515 struct radeon_crtc *radeon_crtc,
9516 u32 lb_size, u32 num_heads)
9517 {
9518 struct drm_display_mode *mode = &radeon_crtc->base.mode;
9519 struct dce8_wm_params wm_low, wm_high;
9520 u32 pixel_period;
9521 u32 line_time = 0;
9522 u32 latency_watermark_a = 0, latency_watermark_b = 0;
9523 u32 tmp, wm_mask;
9524
9525 if (radeon_crtc->base.enabled && num_heads && mode) {
9526 pixel_period = 1000000 / (u32)mode->clock;
9527 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9528
9529 /* watermark for high clocks */
9530 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9531 rdev->pm.dpm_enabled) {
9532 wm_high.yclk =
9533 radeon_dpm_get_mclk(rdev, false) * 10;
9534 wm_high.sclk =
9535 radeon_dpm_get_sclk(rdev, false) * 10;
9536 } else {
9537 wm_high.yclk = rdev->pm.current_mclk * 10;
9538 wm_high.sclk = rdev->pm.current_sclk * 10;
9539 }
9540
9541 wm_high.disp_clk = mode->clock;
9542 wm_high.src_width = mode->crtc_hdisplay;
9543 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9544 wm_high.blank_time = line_time - wm_high.active_time;
9545 wm_high.interlaced = false;
9546 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9547 wm_high.interlaced = true;
9548 wm_high.vsc = radeon_crtc->vsc;
9549 wm_high.vtaps = 1;
9550 if (radeon_crtc->rmx_type != RMX_OFF)
9551 wm_high.vtaps = 2;
9552 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9553 wm_high.lb_size = lb_size;
9554 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9555 wm_high.num_heads = num_heads;
9556
9557 /* set for high clocks */
9558 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9559
9560 /* possibly force display priority to high */
9561 /* should really do this at mode validation time... */
9562 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9563 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9564 !dce8_check_latency_hiding(&wm_high) ||
9565 (rdev->disp_priority == 2)) {
9566 DRM_DEBUG_KMS("force priority to high\n");
9567 }
9568
9569 /* watermark for low clocks */
9570 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9571 rdev->pm.dpm_enabled) {
9572 wm_low.yclk =
9573 radeon_dpm_get_mclk(rdev, true) * 10;
9574 wm_low.sclk =
9575 radeon_dpm_get_sclk(rdev, true) * 10;
9576 } else {
9577 wm_low.yclk = rdev->pm.current_mclk * 10;
9578 wm_low.sclk = rdev->pm.current_sclk * 10;
9579 }
9580
9581 wm_low.disp_clk = mode->clock;
9582 wm_low.src_width = mode->crtc_hdisplay;
9583 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9584 wm_low.blank_time = line_time - wm_low.active_time;
9585 wm_low.interlaced = false;
9586 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9587 wm_low.interlaced = true;
9588 wm_low.vsc = radeon_crtc->vsc;
9589 wm_low.vtaps = 1;
9590 if (radeon_crtc->rmx_type != RMX_OFF)
9591 wm_low.vtaps = 2;
9592 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9593 wm_low.lb_size = lb_size;
9594 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9595 wm_low.num_heads = num_heads;
9596
9597 /* set for low clocks */
9598 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9599
9600 /* possibly force display priority to high */
9601 /* should really do this at mode validation time... */
9602 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9603 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9604 !dce8_check_latency_hiding(&wm_low) ||
9605 (rdev->disp_priority == 2)) {
9606 DRM_DEBUG_KMS("force priority to high\n");
9607 }
9608 }
9609
9610 /* select wm A */
9611 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9612 tmp = wm_mask;
9613 tmp &= ~LATENCY_WATERMARK_MASK(3);
9614 tmp |= LATENCY_WATERMARK_MASK(1);
9615 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9616 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9617 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9618 LATENCY_HIGH_WATERMARK(line_time)));
9619 /* select wm B */
9620 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9621 tmp &= ~LATENCY_WATERMARK_MASK(3);
9622 tmp |= LATENCY_WATERMARK_MASK(2);
9623 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9624 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9625 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9626 LATENCY_HIGH_WATERMARK(line_time)));
9627 /* restore original selection */
9628 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9629
9630 /* save values for DPM */
9631 radeon_crtc->line_time = line_time;
9632 radeon_crtc->wm_high = latency_watermark_a;
9633 radeon_crtc->wm_low = latency_watermark_b;
9634 }
9635
9636 /**
9637 * dce8_bandwidth_update - program display watermarks
9638 *
9639 * @rdev: radeon_device pointer
9640 *
9641 * Calculate and program the display watermarks and line
9642 * buffer allocation (CIK).
9643 */
dce8_bandwidth_update(struct radeon_device * rdev)9644 void dce8_bandwidth_update(struct radeon_device *rdev)
9645 {
9646 struct drm_display_mode *mode = NULL;
9647 u32 num_heads = 0, lb_size;
9648 int i;
9649
9650 if (!rdev->mode_info.mode_config_initialized)
9651 return;
9652
9653 radeon_update_display_priority(rdev);
9654
9655 for (i = 0; i < rdev->num_crtc; i++) {
9656 if (rdev->mode_info.crtcs[i]->base.enabled)
9657 num_heads++;
9658 }
9659 for (i = 0; i < rdev->num_crtc; i++) {
9660 mode = &rdev->mode_info.crtcs[i]->base.mode;
9661 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9662 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9663 }
9664 }
9665
9666 /**
9667 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9668 *
9669 * @rdev: radeon_device pointer
9670 *
9671 * Fetches a GPU clock counter snapshot (SI).
9672 * Returns the 64 bit clock counter snapshot.
9673 */
cik_get_gpu_clock_counter(struct radeon_device * rdev)9674 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9675 {
9676 uint64_t clock;
9677
9678 mutex_lock(&rdev->gpu_clock_mutex);
9679 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9680 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9681 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9682 mutex_unlock(&rdev->gpu_clock_mutex);
9683 return clock;
9684 }
9685
cik_set_uvd_clock(struct radeon_device * rdev,u32 clock,u32 cntl_reg,u32 status_reg)9686 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9687 u32 cntl_reg, u32 status_reg)
9688 {
9689 int r, i;
9690 struct atom_clock_dividers dividers;
9691 uint32_t tmp;
9692
9693 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9694 clock, false, ÷rs);
9695 if (r)
9696 return r;
9697
9698 tmp = RREG32_SMC(cntl_reg);
9699 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9700 tmp |= dividers.post_divider;
9701 WREG32_SMC(cntl_reg, tmp);
9702
9703 for (i = 0; i < 100; i++) {
9704 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9705 break;
9706 mdelay(10);
9707 }
9708 if (i == 100)
9709 return -ETIMEDOUT;
9710
9711 return 0;
9712 }
9713
cik_set_uvd_clocks(struct radeon_device * rdev,u32 vclk,u32 dclk)9714 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9715 {
9716 int r = 0;
9717
9718 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9719 if (r)
9720 return r;
9721
9722 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9723 return r;
9724 }
9725
cik_set_vce_clocks(struct radeon_device * rdev,u32 evclk,u32 ecclk)9726 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9727 {
9728 int r, i;
9729 struct atom_clock_dividers dividers;
9730 u32 tmp;
9731
9732 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9733 ecclk, false, ÷rs);
9734 if (r)
9735 return r;
9736
9737 for (i = 0; i < 100; i++) {
9738 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9739 break;
9740 mdelay(10);
9741 }
9742 if (i == 100)
9743 return -ETIMEDOUT;
9744
9745 tmp = RREG32_SMC(CG_ECLK_CNTL);
9746 tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9747 tmp |= dividers.post_divider;
9748 WREG32_SMC(CG_ECLK_CNTL, tmp);
9749
9750 for (i = 0; i < 100; i++) {
9751 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9752 break;
9753 mdelay(10);
9754 }
9755 if (i == 100)
9756 return -ETIMEDOUT;
9757
9758 return 0;
9759 }
9760
cik_pcie_gen3_enable(struct radeon_device * rdev)9761 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9762 {
9763 struct pci_dev *root = rdev->pdev->bus->self;
9764 int bridge_pos, gpu_pos;
9765 u32 speed_cntl, mask, current_data_rate;
9766 int ret, i;
9767 u16 tmp16;
9768
9769 if (pci_is_root_bus(rdev->pdev->bus))
9770 return;
9771
9772 if (radeon_pcie_gen2 == 0)
9773 return;
9774
9775 if (rdev->flags & RADEON_IS_IGP)
9776 return;
9777
9778 if (!(rdev->flags & RADEON_IS_PCIE))
9779 return;
9780
9781 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9782 if (ret != 0)
9783 return;
9784
9785 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9786 return;
9787
9788 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9789 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9790 LC_CURRENT_DATA_RATE_SHIFT;
9791 if (mask & DRM_PCIE_SPEED_80) {
9792 if (current_data_rate == 2) {
9793 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9794 return;
9795 }
9796 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9797 } else if (mask & DRM_PCIE_SPEED_50) {
9798 if (current_data_rate == 1) {
9799 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9800 return;
9801 }
9802 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9803 }
9804
9805 bridge_pos = pci_pcie_cap(root);
9806 if (!bridge_pos)
9807 return;
9808
9809 gpu_pos = pci_pcie_cap(rdev->pdev);
9810 if (!gpu_pos)
9811 return;
9812
9813 if (mask & DRM_PCIE_SPEED_80) {
9814 /* re-try equalization if gen3 is not already enabled */
9815 if (current_data_rate != 2) {
9816 u16 bridge_cfg, gpu_cfg;
9817 u16 bridge_cfg2, gpu_cfg2;
9818 u32 max_lw, current_lw, tmp;
9819
9820 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9821 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9822
9823 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9824 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9825
9826 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9827 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9828
9829 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9830 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9831 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9832
9833 if (current_lw < max_lw) {
9834 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9835 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9836 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9837 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9838 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9839 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9840 }
9841 }
9842
9843 for (i = 0; i < 10; i++) {
9844 /* check status */
9845 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9846 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9847 break;
9848
9849 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9850 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9851
9852 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9853 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9854
9855 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9856 tmp |= LC_SET_QUIESCE;
9857 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9858
9859 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9860 tmp |= LC_REDO_EQ;
9861 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9862
9863 mdelay(100);
9864
9865 /* linkctl */
9866 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9867 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9868 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9869 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9870
9871 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9872 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9873 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9874 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9875
9876 /* linkctl2 */
9877 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9878 tmp16 &= ~((1 << 4) | (7 << 9));
9879 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9880 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9881
9882 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9883 tmp16 &= ~((1 << 4) | (7 << 9));
9884 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9885 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9886
9887 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9888 tmp &= ~LC_SET_QUIESCE;
9889 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9890 }
9891 }
9892 }
9893
9894 /* set the link speed */
9895 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9896 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9897 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9898
9899 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9900 tmp16 &= ~0xf;
9901 if (mask & DRM_PCIE_SPEED_80)
9902 tmp16 |= 3; /* gen3 */
9903 else if (mask & DRM_PCIE_SPEED_50)
9904 tmp16 |= 2; /* gen2 */
9905 else
9906 tmp16 |= 1; /* gen1 */
9907 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9908
9909 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9910 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9911 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9912
9913 for (i = 0; i < rdev->usec_timeout; i++) {
9914 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9915 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9916 break;
9917 udelay(1);
9918 }
9919 }
9920
cik_program_aspm(struct radeon_device * rdev)9921 static void cik_program_aspm(struct radeon_device *rdev)
9922 {
9923 u32 data, orig;
9924 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9925 bool disable_clkreq = false;
9926
9927 if (radeon_aspm == 0)
9928 return;
9929
9930 /* XXX double check IGPs */
9931 if (rdev->flags & RADEON_IS_IGP)
9932 return;
9933
9934 if (!(rdev->flags & RADEON_IS_PCIE))
9935 return;
9936
9937 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9938 data &= ~LC_XMIT_N_FTS_MASK;
9939 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9940 if (orig != data)
9941 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9942
9943 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9944 data |= LC_GO_TO_RECOVERY;
9945 if (orig != data)
9946 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9947
9948 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9949 data |= P_IGNORE_EDB_ERR;
9950 if (orig != data)
9951 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9952
9953 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9954 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9955 data |= LC_PMI_TO_L1_DIS;
9956 if (!disable_l0s)
9957 data |= LC_L0S_INACTIVITY(7);
9958
9959 if (!disable_l1) {
9960 data |= LC_L1_INACTIVITY(7);
9961 data &= ~LC_PMI_TO_L1_DIS;
9962 if (orig != data)
9963 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9964
9965 if (!disable_plloff_in_l1) {
9966 bool clk_req_support;
9967
9968 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9969 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9970 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9971 if (orig != data)
9972 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9973
9974 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9975 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9976 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9977 if (orig != data)
9978 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9979
9980 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9981 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9982 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9983 if (orig != data)
9984 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9985
9986 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9987 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9988 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9989 if (orig != data)
9990 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9991
9992 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9993 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9994 data |= LC_DYN_LANES_PWR_STATE(3);
9995 if (orig != data)
9996 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9997
9998 if (!disable_clkreq &&
9999 !pci_is_root_bus(rdev->pdev->bus)) {
10000 struct pci_dev *root = rdev->pdev->bus->self;
10001 u32 lnkcap;
10002
10003 clk_req_support = false;
10004 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
10005 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
10006 clk_req_support = true;
10007 } else {
10008 clk_req_support = false;
10009 }
10010
10011 if (clk_req_support) {
10012 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
10013 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
10014 if (orig != data)
10015 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
10016
10017 orig = data = RREG32_SMC(THM_CLK_CNTL);
10018 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
10019 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
10020 if (orig != data)
10021 WREG32_SMC(THM_CLK_CNTL, data);
10022
10023 orig = data = RREG32_SMC(MISC_CLK_CTRL);
10024 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
10025 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
10026 if (orig != data)
10027 WREG32_SMC(MISC_CLK_CTRL, data);
10028
10029 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
10030 data &= ~BCLK_AS_XCLK;
10031 if (orig != data)
10032 WREG32_SMC(CG_CLKPIN_CNTL, data);
10033
10034 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
10035 data &= ~FORCE_BIF_REFCLK_EN;
10036 if (orig != data)
10037 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
10038
10039 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
10040 data &= ~MPLL_CLKOUT_SEL_MASK;
10041 data |= MPLL_CLKOUT_SEL(4);
10042 if (orig != data)
10043 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
10044 }
10045 }
10046 } else {
10047 if (orig != data)
10048 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10049 }
10050
10051 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
10052 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
10053 if (orig != data)
10054 WREG32_PCIE_PORT(PCIE_CNTL2, data);
10055
10056 if (!disable_l0s) {
10057 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
10058 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
10059 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
10060 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
10061 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
10062 data &= ~LC_L0S_INACTIVITY_MASK;
10063 if (orig != data)
10064 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10065 }
10066 }
10067 }
10068 }
10069