1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37 
38 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47 
48 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49 MODULE_FIRMWARE("radeon/bonaire_me.bin");
50 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56 
57 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
66 
67 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
68 MODULE_FIRMWARE("radeon/hawaii_me.bin");
69 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
71 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
73 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
74 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
75 
76 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
82 
83 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
84 MODULE_FIRMWARE("radeon/kaveri_me.bin");
85 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
86 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
87 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
88 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
89 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
90 
91 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
92 MODULE_FIRMWARE("radeon/KABINI_me.bin");
93 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
94 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
95 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
96 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
97 
98 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
99 MODULE_FIRMWARE("radeon/kabini_me.bin");
100 MODULE_FIRMWARE("radeon/kabini_ce.bin");
101 MODULE_FIRMWARE("radeon/kabini_mec.bin");
102 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
103 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
104 
105 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
111 
112 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
113 MODULE_FIRMWARE("radeon/mullins_me.bin");
114 MODULE_FIRMWARE("radeon/mullins_ce.bin");
115 MODULE_FIRMWARE("radeon/mullins_mec.bin");
116 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
117 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
118 
119 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120 extern void r600_ih_ring_fini(struct radeon_device *rdev);
121 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
124 extern void sumo_rlc_fini(struct radeon_device *rdev);
125 extern int sumo_rlc_init(struct radeon_device *rdev);
126 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
127 extern void si_rlc_reset(struct radeon_device *rdev);
128 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
129 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
130 extern int cik_sdma_resume(struct radeon_device *rdev);
131 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
132 extern void cik_sdma_fini(struct radeon_device *rdev);
133 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
134 static void cik_rlc_stop(struct radeon_device *rdev);
135 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
136 static void cik_program_aspm(struct radeon_device *rdev);
137 static void cik_init_pg(struct radeon_device *rdev);
138 static void cik_init_cg(struct radeon_device *rdev);
139 static void cik_fini_pg(struct radeon_device *rdev);
140 static void cik_fini_cg(struct radeon_device *rdev);
141 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
142 					  bool enable);
143 
144 /**
145  * cik_get_allowed_info_register - fetch the register for the info ioctl
146  *
147  * @rdev: radeon_device pointer
148  * @reg: register offset in bytes
149  * @val: register value
150  *
151  * Returns 0 for success or -EINVAL for an invalid register
152  *
153  */
cik_get_allowed_info_register(struct radeon_device * rdev,u32 reg,u32 * val)154 int cik_get_allowed_info_register(struct radeon_device *rdev,
155 				  u32 reg, u32 *val)
156 {
157 	switch (reg) {
158 	case GRBM_STATUS:
159 	case GRBM_STATUS2:
160 	case GRBM_STATUS_SE0:
161 	case GRBM_STATUS_SE1:
162 	case GRBM_STATUS_SE2:
163 	case GRBM_STATUS_SE3:
164 	case SRBM_STATUS:
165 	case SRBM_STATUS2:
166 	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
167 	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
168 	case UVD_STATUS:
169 	/* TODO VCE */
170 		*val = RREG32(reg);
171 		return 0;
172 	default:
173 		return -EINVAL;
174 	}
175 }
176 
177 /* get temperature in millidegrees */
ci_get_temp(struct radeon_device * rdev)178 int ci_get_temp(struct radeon_device *rdev)
179 {
180 	u32 temp;
181 	int actual_temp = 0;
182 
183 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
184 		CTF_TEMP_SHIFT;
185 
186 	if (temp & 0x200)
187 		actual_temp = 255;
188 	else
189 		actual_temp = temp & 0x1ff;
190 
191 	actual_temp = actual_temp * 1000;
192 
193 	return actual_temp;
194 }
195 
196 /* get temperature in millidegrees */
kv_get_temp(struct radeon_device * rdev)197 int kv_get_temp(struct radeon_device *rdev)
198 {
199 	u32 temp;
200 	int actual_temp = 0;
201 
202 	temp = RREG32_SMC(0xC0300E0C);
203 
204 	if (temp)
205 		actual_temp = (temp / 8) - 49;
206 	else
207 		actual_temp = 0;
208 
209 	actual_temp = actual_temp * 1000;
210 
211 	return actual_temp;
212 }
213 
214 /*
215  * Indirect registers accessor
216  */
cik_pciep_rreg(struct radeon_device * rdev,u32 reg)217 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
218 {
219 	unsigned long flags;
220 	u32 r;
221 
222 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
223 	WREG32(PCIE_INDEX, reg);
224 	(void)RREG32(PCIE_INDEX);
225 	r = RREG32(PCIE_DATA);
226 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
227 	return r;
228 }
229 
cik_pciep_wreg(struct radeon_device * rdev,u32 reg,u32 v)230 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
231 {
232 	unsigned long flags;
233 
234 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
235 	WREG32(PCIE_INDEX, reg);
236 	(void)RREG32(PCIE_INDEX);
237 	WREG32(PCIE_DATA, v);
238 	(void)RREG32(PCIE_DATA);
239 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
240 }
241 
242 static const u32 spectre_rlc_save_restore_register_list[] =
243 {
244 	(0x0e00 << 16) | (0xc12c >> 2),
245 	0x00000000,
246 	(0x0e00 << 16) | (0xc140 >> 2),
247 	0x00000000,
248 	(0x0e00 << 16) | (0xc150 >> 2),
249 	0x00000000,
250 	(0x0e00 << 16) | (0xc15c >> 2),
251 	0x00000000,
252 	(0x0e00 << 16) | (0xc168 >> 2),
253 	0x00000000,
254 	(0x0e00 << 16) | (0xc170 >> 2),
255 	0x00000000,
256 	(0x0e00 << 16) | (0xc178 >> 2),
257 	0x00000000,
258 	(0x0e00 << 16) | (0xc204 >> 2),
259 	0x00000000,
260 	(0x0e00 << 16) | (0xc2b4 >> 2),
261 	0x00000000,
262 	(0x0e00 << 16) | (0xc2b8 >> 2),
263 	0x00000000,
264 	(0x0e00 << 16) | (0xc2bc >> 2),
265 	0x00000000,
266 	(0x0e00 << 16) | (0xc2c0 >> 2),
267 	0x00000000,
268 	(0x0e00 << 16) | (0x8228 >> 2),
269 	0x00000000,
270 	(0x0e00 << 16) | (0x829c >> 2),
271 	0x00000000,
272 	(0x0e00 << 16) | (0x869c >> 2),
273 	0x00000000,
274 	(0x0600 << 16) | (0x98f4 >> 2),
275 	0x00000000,
276 	(0x0e00 << 16) | (0x98f8 >> 2),
277 	0x00000000,
278 	(0x0e00 << 16) | (0x9900 >> 2),
279 	0x00000000,
280 	(0x0e00 << 16) | (0xc260 >> 2),
281 	0x00000000,
282 	(0x0e00 << 16) | (0x90e8 >> 2),
283 	0x00000000,
284 	(0x0e00 << 16) | (0x3c000 >> 2),
285 	0x00000000,
286 	(0x0e00 << 16) | (0x3c00c >> 2),
287 	0x00000000,
288 	(0x0e00 << 16) | (0x8c1c >> 2),
289 	0x00000000,
290 	(0x0e00 << 16) | (0x9700 >> 2),
291 	0x00000000,
292 	(0x0e00 << 16) | (0xcd20 >> 2),
293 	0x00000000,
294 	(0x4e00 << 16) | (0xcd20 >> 2),
295 	0x00000000,
296 	(0x5e00 << 16) | (0xcd20 >> 2),
297 	0x00000000,
298 	(0x6e00 << 16) | (0xcd20 >> 2),
299 	0x00000000,
300 	(0x7e00 << 16) | (0xcd20 >> 2),
301 	0x00000000,
302 	(0x8e00 << 16) | (0xcd20 >> 2),
303 	0x00000000,
304 	(0x9e00 << 16) | (0xcd20 >> 2),
305 	0x00000000,
306 	(0xae00 << 16) | (0xcd20 >> 2),
307 	0x00000000,
308 	(0xbe00 << 16) | (0xcd20 >> 2),
309 	0x00000000,
310 	(0x0e00 << 16) | (0x89bc >> 2),
311 	0x00000000,
312 	(0x0e00 << 16) | (0x8900 >> 2),
313 	0x00000000,
314 	0x3,
315 	(0x0e00 << 16) | (0xc130 >> 2),
316 	0x00000000,
317 	(0x0e00 << 16) | (0xc134 >> 2),
318 	0x00000000,
319 	(0x0e00 << 16) | (0xc1fc >> 2),
320 	0x00000000,
321 	(0x0e00 << 16) | (0xc208 >> 2),
322 	0x00000000,
323 	(0x0e00 << 16) | (0xc264 >> 2),
324 	0x00000000,
325 	(0x0e00 << 16) | (0xc268 >> 2),
326 	0x00000000,
327 	(0x0e00 << 16) | (0xc26c >> 2),
328 	0x00000000,
329 	(0x0e00 << 16) | (0xc270 >> 2),
330 	0x00000000,
331 	(0x0e00 << 16) | (0xc274 >> 2),
332 	0x00000000,
333 	(0x0e00 << 16) | (0xc278 >> 2),
334 	0x00000000,
335 	(0x0e00 << 16) | (0xc27c >> 2),
336 	0x00000000,
337 	(0x0e00 << 16) | (0xc280 >> 2),
338 	0x00000000,
339 	(0x0e00 << 16) | (0xc284 >> 2),
340 	0x00000000,
341 	(0x0e00 << 16) | (0xc288 >> 2),
342 	0x00000000,
343 	(0x0e00 << 16) | (0xc28c >> 2),
344 	0x00000000,
345 	(0x0e00 << 16) | (0xc290 >> 2),
346 	0x00000000,
347 	(0x0e00 << 16) | (0xc294 >> 2),
348 	0x00000000,
349 	(0x0e00 << 16) | (0xc298 >> 2),
350 	0x00000000,
351 	(0x0e00 << 16) | (0xc29c >> 2),
352 	0x00000000,
353 	(0x0e00 << 16) | (0xc2a0 >> 2),
354 	0x00000000,
355 	(0x0e00 << 16) | (0xc2a4 >> 2),
356 	0x00000000,
357 	(0x0e00 << 16) | (0xc2a8 >> 2),
358 	0x00000000,
359 	(0x0e00 << 16) | (0xc2ac  >> 2),
360 	0x00000000,
361 	(0x0e00 << 16) | (0xc2b0 >> 2),
362 	0x00000000,
363 	(0x0e00 << 16) | (0x301d0 >> 2),
364 	0x00000000,
365 	(0x0e00 << 16) | (0x30238 >> 2),
366 	0x00000000,
367 	(0x0e00 << 16) | (0x30250 >> 2),
368 	0x00000000,
369 	(0x0e00 << 16) | (0x30254 >> 2),
370 	0x00000000,
371 	(0x0e00 << 16) | (0x30258 >> 2),
372 	0x00000000,
373 	(0x0e00 << 16) | (0x3025c >> 2),
374 	0x00000000,
375 	(0x4e00 << 16) | (0xc900 >> 2),
376 	0x00000000,
377 	(0x5e00 << 16) | (0xc900 >> 2),
378 	0x00000000,
379 	(0x6e00 << 16) | (0xc900 >> 2),
380 	0x00000000,
381 	(0x7e00 << 16) | (0xc900 >> 2),
382 	0x00000000,
383 	(0x8e00 << 16) | (0xc900 >> 2),
384 	0x00000000,
385 	(0x9e00 << 16) | (0xc900 >> 2),
386 	0x00000000,
387 	(0xae00 << 16) | (0xc900 >> 2),
388 	0x00000000,
389 	(0xbe00 << 16) | (0xc900 >> 2),
390 	0x00000000,
391 	(0x4e00 << 16) | (0xc904 >> 2),
392 	0x00000000,
393 	(0x5e00 << 16) | (0xc904 >> 2),
394 	0x00000000,
395 	(0x6e00 << 16) | (0xc904 >> 2),
396 	0x00000000,
397 	(0x7e00 << 16) | (0xc904 >> 2),
398 	0x00000000,
399 	(0x8e00 << 16) | (0xc904 >> 2),
400 	0x00000000,
401 	(0x9e00 << 16) | (0xc904 >> 2),
402 	0x00000000,
403 	(0xae00 << 16) | (0xc904 >> 2),
404 	0x00000000,
405 	(0xbe00 << 16) | (0xc904 >> 2),
406 	0x00000000,
407 	(0x4e00 << 16) | (0xc908 >> 2),
408 	0x00000000,
409 	(0x5e00 << 16) | (0xc908 >> 2),
410 	0x00000000,
411 	(0x6e00 << 16) | (0xc908 >> 2),
412 	0x00000000,
413 	(0x7e00 << 16) | (0xc908 >> 2),
414 	0x00000000,
415 	(0x8e00 << 16) | (0xc908 >> 2),
416 	0x00000000,
417 	(0x9e00 << 16) | (0xc908 >> 2),
418 	0x00000000,
419 	(0xae00 << 16) | (0xc908 >> 2),
420 	0x00000000,
421 	(0xbe00 << 16) | (0xc908 >> 2),
422 	0x00000000,
423 	(0x4e00 << 16) | (0xc90c >> 2),
424 	0x00000000,
425 	(0x5e00 << 16) | (0xc90c >> 2),
426 	0x00000000,
427 	(0x6e00 << 16) | (0xc90c >> 2),
428 	0x00000000,
429 	(0x7e00 << 16) | (0xc90c >> 2),
430 	0x00000000,
431 	(0x8e00 << 16) | (0xc90c >> 2),
432 	0x00000000,
433 	(0x9e00 << 16) | (0xc90c >> 2),
434 	0x00000000,
435 	(0xae00 << 16) | (0xc90c >> 2),
436 	0x00000000,
437 	(0xbe00 << 16) | (0xc90c >> 2),
438 	0x00000000,
439 	(0x4e00 << 16) | (0xc910 >> 2),
440 	0x00000000,
441 	(0x5e00 << 16) | (0xc910 >> 2),
442 	0x00000000,
443 	(0x6e00 << 16) | (0xc910 >> 2),
444 	0x00000000,
445 	(0x7e00 << 16) | (0xc910 >> 2),
446 	0x00000000,
447 	(0x8e00 << 16) | (0xc910 >> 2),
448 	0x00000000,
449 	(0x9e00 << 16) | (0xc910 >> 2),
450 	0x00000000,
451 	(0xae00 << 16) | (0xc910 >> 2),
452 	0x00000000,
453 	(0xbe00 << 16) | (0xc910 >> 2),
454 	0x00000000,
455 	(0x0e00 << 16) | (0xc99c >> 2),
456 	0x00000000,
457 	(0x0e00 << 16) | (0x9834 >> 2),
458 	0x00000000,
459 	(0x0000 << 16) | (0x30f00 >> 2),
460 	0x00000000,
461 	(0x0001 << 16) | (0x30f00 >> 2),
462 	0x00000000,
463 	(0x0000 << 16) | (0x30f04 >> 2),
464 	0x00000000,
465 	(0x0001 << 16) | (0x30f04 >> 2),
466 	0x00000000,
467 	(0x0000 << 16) | (0x30f08 >> 2),
468 	0x00000000,
469 	(0x0001 << 16) | (0x30f08 >> 2),
470 	0x00000000,
471 	(0x0000 << 16) | (0x30f0c >> 2),
472 	0x00000000,
473 	(0x0001 << 16) | (0x30f0c >> 2),
474 	0x00000000,
475 	(0x0600 << 16) | (0x9b7c >> 2),
476 	0x00000000,
477 	(0x0e00 << 16) | (0x8a14 >> 2),
478 	0x00000000,
479 	(0x0e00 << 16) | (0x8a18 >> 2),
480 	0x00000000,
481 	(0x0600 << 16) | (0x30a00 >> 2),
482 	0x00000000,
483 	(0x0e00 << 16) | (0x8bf0 >> 2),
484 	0x00000000,
485 	(0x0e00 << 16) | (0x8bcc >> 2),
486 	0x00000000,
487 	(0x0e00 << 16) | (0x8b24 >> 2),
488 	0x00000000,
489 	(0x0e00 << 16) | (0x30a04 >> 2),
490 	0x00000000,
491 	(0x0600 << 16) | (0x30a10 >> 2),
492 	0x00000000,
493 	(0x0600 << 16) | (0x30a14 >> 2),
494 	0x00000000,
495 	(0x0600 << 16) | (0x30a18 >> 2),
496 	0x00000000,
497 	(0x0600 << 16) | (0x30a2c >> 2),
498 	0x00000000,
499 	(0x0e00 << 16) | (0xc700 >> 2),
500 	0x00000000,
501 	(0x0e00 << 16) | (0xc704 >> 2),
502 	0x00000000,
503 	(0x0e00 << 16) | (0xc708 >> 2),
504 	0x00000000,
505 	(0x0e00 << 16) | (0xc768 >> 2),
506 	0x00000000,
507 	(0x0400 << 16) | (0xc770 >> 2),
508 	0x00000000,
509 	(0x0400 << 16) | (0xc774 >> 2),
510 	0x00000000,
511 	(0x0400 << 16) | (0xc778 >> 2),
512 	0x00000000,
513 	(0x0400 << 16) | (0xc77c >> 2),
514 	0x00000000,
515 	(0x0400 << 16) | (0xc780 >> 2),
516 	0x00000000,
517 	(0x0400 << 16) | (0xc784 >> 2),
518 	0x00000000,
519 	(0x0400 << 16) | (0xc788 >> 2),
520 	0x00000000,
521 	(0x0400 << 16) | (0xc78c >> 2),
522 	0x00000000,
523 	(0x0400 << 16) | (0xc798 >> 2),
524 	0x00000000,
525 	(0x0400 << 16) | (0xc79c >> 2),
526 	0x00000000,
527 	(0x0400 << 16) | (0xc7a0 >> 2),
528 	0x00000000,
529 	(0x0400 << 16) | (0xc7a4 >> 2),
530 	0x00000000,
531 	(0x0400 << 16) | (0xc7a8 >> 2),
532 	0x00000000,
533 	(0x0400 << 16) | (0xc7ac >> 2),
534 	0x00000000,
535 	(0x0400 << 16) | (0xc7b0 >> 2),
536 	0x00000000,
537 	(0x0400 << 16) | (0xc7b4 >> 2),
538 	0x00000000,
539 	(0x0e00 << 16) | (0x9100 >> 2),
540 	0x00000000,
541 	(0x0e00 << 16) | (0x3c010 >> 2),
542 	0x00000000,
543 	(0x0e00 << 16) | (0x92a8 >> 2),
544 	0x00000000,
545 	(0x0e00 << 16) | (0x92ac >> 2),
546 	0x00000000,
547 	(0x0e00 << 16) | (0x92b4 >> 2),
548 	0x00000000,
549 	(0x0e00 << 16) | (0x92b8 >> 2),
550 	0x00000000,
551 	(0x0e00 << 16) | (0x92bc >> 2),
552 	0x00000000,
553 	(0x0e00 << 16) | (0x92c0 >> 2),
554 	0x00000000,
555 	(0x0e00 << 16) | (0x92c4 >> 2),
556 	0x00000000,
557 	(0x0e00 << 16) | (0x92c8 >> 2),
558 	0x00000000,
559 	(0x0e00 << 16) | (0x92cc >> 2),
560 	0x00000000,
561 	(0x0e00 << 16) | (0x92d0 >> 2),
562 	0x00000000,
563 	(0x0e00 << 16) | (0x8c00 >> 2),
564 	0x00000000,
565 	(0x0e00 << 16) | (0x8c04 >> 2),
566 	0x00000000,
567 	(0x0e00 << 16) | (0x8c20 >> 2),
568 	0x00000000,
569 	(0x0e00 << 16) | (0x8c38 >> 2),
570 	0x00000000,
571 	(0x0e00 << 16) | (0x8c3c >> 2),
572 	0x00000000,
573 	(0x0e00 << 16) | (0xae00 >> 2),
574 	0x00000000,
575 	(0x0e00 << 16) | (0x9604 >> 2),
576 	0x00000000,
577 	(0x0e00 << 16) | (0xac08 >> 2),
578 	0x00000000,
579 	(0x0e00 << 16) | (0xac0c >> 2),
580 	0x00000000,
581 	(0x0e00 << 16) | (0xac10 >> 2),
582 	0x00000000,
583 	(0x0e00 << 16) | (0xac14 >> 2),
584 	0x00000000,
585 	(0x0e00 << 16) | (0xac58 >> 2),
586 	0x00000000,
587 	(0x0e00 << 16) | (0xac68 >> 2),
588 	0x00000000,
589 	(0x0e00 << 16) | (0xac6c >> 2),
590 	0x00000000,
591 	(0x0e00 << 16) | (0xac70 >> 2),
592 	0x00000000,
593 	(0x0e00 << 16) | (0xac74 >> 2),
594 	0x00000000,
595 	(0x0e00 << 16) | (0xac78 >> 2),
596 	0x00000000,
597 	(0x0e00 << 16) | (0xac7c >> 2),
598 	0x00000000,
599 	(0x0e00 << 16) | (0xac80 >> 2),
600 	0x00000000,
601 	(0x0e00 << 16) | (0xac84 >> 2),
602 	0x00000000,
603 	(0x0e00 << 16) | (0xac88 >> 2),
604 	0x00000000,
605 	(0x0e00 << 16) | (0xac8c >> 2),
606 	0x00000000,
607 	(0x0e00 << 16) | (0x970c >> 2),
608 	0x00000000,
609 	(0x0e00 << 16) | (0x9714 >> 2),
610 	0x00000000,
611 	(0x0e00 << 16) | (0x9718 >> 2),
612 	0x00000000,
613 	(0x0e00 << 16) | (0x971c >> 2),
614 	0x00000000,
615 	(0x0e00 << 16) | (0x31068 >> 2),
616 	0x00000000,
617 	(0x4e00 << 16) | (0x31068 >> 2),
618 	0x00000000,
619 	(0x5e00 << 16) | (0x31068 >> 2),
620 	0x00000000,
621 	(0x6e00 << 16) | (0x31068 >> 2),
622 	0x00000000,
623 	(0x7e00 << 16) | (0x31068 >> 2),
624 	0x00000000,
625 	(0x8e00 << 16) | (0x31068 >> 2),
626 	0x00000000,
627 	(0x9e00 << 16) | (0x31068 >> 2),
628 	0x00000000,
629 	(0xae00 << 16) | (0x31068 >> 2),
630 	0x00000000,
631 	(0xbe00 << 16) | (0x31068 >> 2),
632 	0x00000000,
633 	(0x0e00 << 16) | (0xcd10 >> 2),
634 	0x00000000,
635 	(0x0e00 << 16) | (0xcd14 >> 2),
636 	0x00000000,
637 	(0x0e00 << 16) | (0x88b0 >> 2),
638 	0x00000000,
639 	(0x0e00 << 16) | (0x88b4 >> 2),
640 	0x00000000,
641 	(0x0e00 << 16) | (0x88b8 >> 2),
642 	0x00000000,
643 	(0x0e00 << 16) | (0x88bc >> 2),
644 	0x00000000,
645 	(0x0400 << 16) | (0x89c0 >> 2),
646 	0x00000000,
647 	(0x0e00 << 16) | (0x88c4 >> 2),
648 	0x00000000,
649 	(0x0e00 << 16) | (0x88c8 >> 2),
650 	0x00000000,
651 	(0x0e00 << 16) | (0x88d0 >> 2),
652 	0x00000000,
653 	(0x0e00 << 16) | (0x88d4 >> 2),
654 	0x00000000,
655 	(0x0e00 << 16) | (0x88d8 >> 2),
656 	0x00000000,
657 	(0x0e00 << 16) | (0x8980 >> 2),
658 	0x00000000,
659 	(0x0e00 << 16) | (0x30938 >> 2),
660 	0x00000000,
661 	(0x0e00 << 16) | (0x3093c >> 2),
662 	0x00000000,
663 	(0x0e00 << 16) | (0x30940 >> 2),
664 	0x00000000,
665 	(0x0e00 << 16) | (0x89a0 >> 2),
666 	0x00000000,
667 	(0x0e00 << 16) | (0x30900 >> 2),
668 	0x00000000,
669 	(0x0e00 << 16) | (0x30904 >> 2),
670 	0x00000000,
671 	(0x0e00 << 16) | (0x89b4 >> 2),
672 	0x00000000,
673 	(0x0e00 << 16) | (0x3c210 >> 2),
674 	0x00000000,
675 	(0x0e00 << 16) | (0x3c214 >> 2),
676 	0x00000000,
677 	(0x0e00 << 16) | (0x3c218 >> 2),
678 	0x00000000,
679 	(0x0e00 << 16) | (0x8904 >> 2),
680 	0x00000000,
681 	0x5,
682 	(0x0e00 << 16) | (0x8c28 >> 2),
683 	(0x0e00 << 16) | (0x8c2c >> 2),
684 	(0x0e00 << 16) | (0x8c30 >> 2),
685 	(0x0e00 << 16) | (0x8c34 >> 2),
686 	(0x0e00 << 16) | (0x9600 >> 2),
687 };
688 
689 static const u32 kalindi_rlc_save_restore_register_list[] =
690 {
691 	(0x0e00 << 16) | (0xc12c >> 2),
692 	0x00000000,
693 	(0x0e00 << 16) | (0xc140 >> 2),
694 	0x00000000,
695 	(0x0e00 << 16) | (0xc150 >> 2),
696 	0x00000000,
697 	(0x0e00 << 16) | (0xc15c >> 2),
698 	0x00000000,
699 	(0x0e00 << 16) | (0xc168 >> 2),
700 	0x00000000,
701 	(0x0e00 << 16) | (0xc170 >> 2),
702 	0x00000000,
703 	(0x0e00 << 16) | (0xc204 >> 2),
704 	0x00000000,
705 	(0x0e00 << 16) | (0xc2b4 >> 2),
706 	0x00000000,
707 	(0x0e00 << 16) | (0xc2b8 >> 2),
708 	0x00000000,
709 	(0x0e00 << 16) | (0xc2bc >> 2),
710 	0x00000000,
711 	(0x0e00 << 16) | (0xc2c0 >> 2),
712 	0x00000000,
713 	(0x0e00 << 16) | (0x8228 >> 2),
714 	0x00000000,
715 	(0x0e00 << 16) | (0x829c >> 2),
716 	0x00000000,
717 	(0x0e00 << 16) | (0x869c >> 2),
718 	0x00000000,
719 	(0x0600 << 16) | (0x98f4 >> 2),
720 	0x00000000,
721 	(0x0e00 << 16) | (0x98f8 >> 2),
722 	0x00000000,
723 	(0x0e00 << 16) | (0x9900 >> 2),
724 	0x00000000,
725 	(0x0e00 << 16) | (0xc260 >> 2),
726 	0x00000000,
727 	(0x0e00 << 16) | (0x90e8 >> 2),
728 	0x00000000,
729 	(0x0e00 << 16) | (0x3c000 >> 2),
730 	0x00000000,
731 	(0x0e00 << 16) | (0x3c00c >> 2),
732 	0x00000000,
733 	(0x0e00 << 16) | (0x8c1c >> 2),
734 	0x00000000,
735 	(0x0e00 << 16) | (0x9700 >> 2),
736 	0x00000000,
737 	(0x0e00 << 16) | (0xcd20 >> 2),
738 	0x00000000,
739 	(0x4e00 << 16) | (0xcd20 >> 2),
740 	0x00000000,
741 	(0x5e00 << 16) | (0xcd20 >> 2),
742 	0x00000000,
743 	(0x6e00 << 16) | (0xcd20 >> 2),
744 	0x00000000,
745 	(0x7e00 << 16) | (0xcd20 >> 2),
746 	0x00000000,
747 	(0x0e00 << 16) | (0x89bc >> 2),
748 	0x00000000,
749 	(0x0e00 << 16) | (0x8900 >> 2),
750 	0x00000000,
751 	0x3,
752 	(0x0e00 << 16) | (0xc130 >> 2),
753 	0x00000000,
754 	(0x0e00 << 16) | (0xc134 >> 2),
755 	0x00000000,
756 	(0x0e00 << 16) | (0xc1fc >> 2),
757 	0x00000000,
758 	(0x0e00 << 16) | (0xc208 >> 2),
759 	0x00000000,
760 	(0x0e00 << 16) | (0xc264 >> 2),
761 	0x00000000,
762 	(0x0e00 << 16) | (0xc268 >> 2),
763 	0x00000000,
764 	(0x0e00 << 16) | (0xc26c >> 2),
765 	0x00000000,
766 	(0x0e00 << 16) | (0xc270 >> 2),
767 	0x00000000,
768 	(0x0e00 << 16) | (0xc274 >> 2),
769 	0x00000000,
770 	(0x0e00 << 16) | (0xc28c >> 2),
771 	0x00000000,
772 	(0x0e00 << 16) | (0xc290 >> 2),
773 	0x00000000,
774 	(0x0e00 << 16) | (0xc294 >> 2),
775 	0x00000000,
776 	(0x0e00 << 16) | (0xc298 >> 2),
777 	0x00000000,
778 	(0x0e00 << 16) | (0xc2a0 >> 2),
779 	0x00000000,
780 	(0x0e00 << 16) | (0xc2a4 >> 2),
781 	0x00000000,
782 	(0x0e00 << 16) | (0xc2a8 >> 2),
783 	0x00000000,
784 	(0x0e00 << 16) | (0xc2ac >> 2),
785 	0x00000000,
786 	(0x0e00 << 16) | (0x301d0 >> 2),
787 	0x00000000,
788 	(0x0e00 << 16) | (0x30238 >> 2),
789 	0x00000000,
790 	(0x0e00 << 16) | (0x30250 >> 2),
791 	0x00000000,
792 	(0x0e00 << 16) | (0x30254 >> 2),
793 	0x00000000,
794 	(0x0e00 << 16) | (0x30258 >> 2),
795 	0x00000000,
796 	(0x0e00 << 16) | (0x3025c >> 2),
797 	0x00000000,
798 	(0x4e00 << 16) | (0xc900 >> 2),
799 	0x00000000,
800 	(0x5e00 << 16) | (0xc900 >> 2),
801 	0x00000000,
802 	(0x6e00 << 16) | (0xc900 >> 2),
803 	0x00000000,
804 	(0x7e00 << 16) | (0xc900 >> 2),
805 	0x00000000,
806 	(0x4e00 << 16) | (0xc904 >> 2),
807 	0x00000000,
808 	(0x5e00 << 16) | (0xc904 >> 2),
809 	0x00000000,
810 	(0x6e00 << 16) | (0xc904 >> 2),
811 	0x00000000,
812 	(0x7e00 << 16) | (0xc904 >> 2),
813 	0x00000000,
814 	(0x4e00 << 16) | (0xc908 >> 2),
815 	0x00000000,
816 	(0x5e00 << 16) | (0xc908 >> 2),
817 	0x00000000,
818 	(0x6e00 << 16) | (0xc908 >> 2),
819 	0x00000000,
820 	(0x7e00 << 16) | (0xc908 >> 2),
821 	0x00000000,
822 	(0x4e00 << 16) | (0xc90c >> 2),
823 	0x00000000,
824 	(0x5e00 << 16) | (0xc90c >> 2),
825 	0x00000000,
826 	(0x6e00 << 16) | (0xc90c >> 2),
827 	0x00000000,
828 	(0x7e00 << 16) | (0xc90c >> 2),
829 	0x00000000,
830 	(0x4e00 << 16) | (0xc910 >> 2),
831 	0x00000000,
832 	(0x5e00 << 16) | (0xc910 >> 2),
833 	0x00000000,
834 	(0x6e00 << 16) | (0xc910 >> 2),
835 	0x00000000,
836 	(0x7e00 << 16) | (0xc910 >> 2),
837 	0x00000000,
838 	(0x0e00 << 16) | (0xc99c >> 2),
839 	0x00000000,
840 	(0x0e00 << 16) | (0x9834 >> 2),
841 	0x00000000,
842 	(0x0000 << 16) | (0x30f00 >> 2),
843 	0x00000000,
844 	(0x0000 << 16) | (0x30f04 >> 2),
845 	0x00000000,
846 	(0x0000 << 16) | (0x30f08 >> 2),
847 	0x00000000,
848 	(0x0000 << 16) | (0x30f0c >> 2),
849 	0x00000000,
850 	(0x0600 << 16) | (0x9b7c >> 2),
851 	0x00000000,
852 	(0x0e00 << 16) | (0x8a14 >> 2),
853 	0x00000000,
854 	(0x0e00 << 16) | (0x8a18 >> 2),
855 	0x00000000,
856 	(0x0600 << 16) | (0x30a00 >> 2),
857 	0x00000000,
858 	(0x0e00 << 16) | (0x8bf0 >> 2),
859 	0x00000000,
860 	(0x0e00 << 16) | (0x8bcc >> 2),
861 	0x00000000,
862 	(0x0e00 << 16) | (0x8b24 >> 2),
863 	0x00000000,
864 	(0x0e00 << 16) | (0x30a04 >> 2),
865 	0x00000000,
866 	(0x0600 << 16) | (0x30a10 >> 2),
867 	0x00000000,
868 	(0x0600 << 16) | (0x30a14 >> 2),
869 	0x00000000,
870 	(0x0600 << 16) | (0x30a18 >> 2),
871 	0x00000000,
872 	(0x0600 << 16) | (0x30a2c >> 2),
873 	0x00000000,
874 	(0x0e00 << 16) | (0xc700 >> 2),
875 	0x00000000,
876 	(0x0e00 << 16) | (0xc704 >> 2),
877 	0x00000000,
878 	(0x0e00 << 16) | (0xc708 >> 2),
879 	0x00000000,
880 	(0x0e00 << 16) | (0xc768 >> 2),
881 	0x00000000,
882 	(0x0400 << 16) | (0xc770 >> 2),
883 	0x00000000,
884 	(0x0400 << 16) | (0xc774 >> 2),
885 	0x00000000,
886 	(0x0400 << 16) | (0xc798 >> 2),
887 	0x00000000,
888 	(0x0400 << 16) | (0xc79c >> 2),
889 	0x00000000,
890 	(0x0e00 << 16) | (0x9100 >> 2),
891 	0x00000000,
892 	(0x0e00 << 16) | (0x3c010 >> 2),
893 	0x00000000,
894 	(0x0e00 << 16) | (0x8c00 >> 2),
895 	0x00000000,
896 	(0x0e00 << 16) | (0x8c04 >> 2),
897 	0x00000000,
898 	(0x0e00 << 16) | (0x8c20 >> 2),
899 	0x00000000,
900 	(0x0e00 << 16) | (0x8c38 >> 2),
901 	0x00000000,
902 	(0x0e00 << 16) | (0x8c3c >> 2),
903 	0x00000000,
904 	(0x0e00 << 16) | (0xae00 >> 2),
905 	0x00000000,
906 	(0x0e00 << 16) | (0x9604 >> 2),
907 	0x00000000,
908 	(0x0e00 << 16) | (0xac08 >> 2),
909 	0x00000000,
910 	(0x0e00 << 16) | (0xac0c >> 2),
911 	0x00000000,
912 	(0x0e00 << 16) | (0xac10 >> 2),
913 	0x00000000,
914 	(0x0e00 << 16) | (0xac14 >> 2),
915 	0x00000000,
916 	(0x0e00 << 16) | (0xac58 >> 2),
917 	0x00000000,
918 	(0x0e00 << 16) | (0xac68 >> 2),
919 	0x00000000,
920 	(0x0e00 << 16) | (0xac6c >> 2),
921 	0x00000000,
922 	(0x0e00 << 16) | (0xac70 >> 2),
923 	0x00000000,
924 	(0x0e00 << 16) | (0xac74 >> 2),
925 	0x00000000,
926 	(0x0e00 << 16) | (0xac78 >> 2),
927 	0x00000000,
928 	(0x0e00 << 16) | (0xac7c >> 2),
929 	0x00000000,
930 	(0x0e00 << 16) | (0xac80 >> 2),
931 	0x00000000,
932 	(0x0e00 << 16) | (0xac84 >> 2),
933 	0x00000000,
934 	(0x0e00 << 16) | (0xac88 >> 2),
935 	0x00000000,
936 	(0x0e00 << 16) | (0xac8c >> 2),
937 	0x00000000,
938 	(0x0e00 << 16) | (0x970c >> 2),
939 	0x00000000,
940 	(0x0e00 << 16) | (0x9714 >> 2),
941 	0x00000000,
942 	(0x0e00 << 16) | (0x9718 >> 2),
943 	0x00000000,
944 	(0x0e00 << 16) | (0x971c >> 2),
945 	0x00000000,
946 	(0x0e00 << 16) | (0x31068 >> 2),
947 	0x00000000,
948 	(0x4e00 << 16) | (0x31068 >> 2),
949 	0x00000000,
950 	(0x5e00 << 16) | (0x31068 >> 2),
951 	0x00000000,
952 	(0x6e00 << 16) | (0x31068 >> 2),
953 	0x00000000,
954 	(0x7e00 << 16) | (0x31068 >> 2),
955 	0x00000000,
956 	(0x0e00 << 16) | (0xcd10 >> 2),
957 	0x00000000,
958 	(0x0e00 << 16) | (0xcd14 >> 2),
959 	0x00000000,
960 	(0x0e00 << 16) | (0x88b0 >> 2),
961 	0x00000000,
962 	(0x0e00 << 16) | (0x88b4 >> 2),
963 	0x00000000,
964 	(0x0e00 << 16) | (0x88b8 >> 2),
965 	0x00000000,
966 	(0x0e00 << 16) | (0x88bc >> 2),
967 	0x00000000,
968 	(0x0400 << 16) | (0x89c0 >> 2),
969 	0x00000000,
970 	(0x0e00 << 16) | (0x88c4 >> 2),
971 	0x00000000,
972 	(0x0e00 << 16) | (0x88c8 >> 2),
973 	0x00000000,
974 	(0x0e00 << 16) | (0x88d0 >> 2),
975 	0x00000000,
976 	(0x0e00 << 16) | (0x88d4 >> 2),
977 	0x00000000,
978 	(0x0e00 << 16) | (0x88d8 >> 2),
979 	0x00000000,
980 	(0x0e00 << 16) | (0x8980 >> 2),
981 	0x00000000,
982 	(0x0e00 << 16) | (0x30938 >> 2),
983 	0x00000000,
984 	(0x0e00 << 16) | (0x3093c >> 2),
985 	0x00000000,
986 	(0x0e00 << 16) | (0x30940 >> 2),
987 	0x00000000,
988 	(0x0e00 << 16) | (0x89a0 >> 2),
989 	0x00000000,
990 	(0x0e00 << 16) | (0x30900 >> 2),
991 	0x00000000,
992 	(0x0e00 << 16) | (0x30904 >> 2),
993 	0x00000000,
994 	(0x0e00 << 16) | (0x89b4 >> 2),
995 	0x00000000,
996 	(0x0e00 << 16) | (0x3e1fc >> 2),
997 	0x00000000,
998 	(0x0e00 << 16) | (0x3c210 >> 2),
999 	0x00000000,
1000 	(0x0e00 << 16) | (0x3c214 >> 2),
1001 	0x00000000,
1002 	(0x0e00 << 16) | (0x3c218 >> 2),
1003 	0x00000000,
1004 	(0x0e00 << 16) | (0x8904 >> 2),
1005 	0x00000000,
1006 	0x5,
1007 	(0x0e00 << 16) | (0x8c28 >> 2),
1008 	(0x0e00 << 16) | (0x8c2c >> 2),
1009 	(0x0e00 << 16) | (0x8c30 >> 2),
1010 	(0x0e00 << 16) | (0x8c34 >> 2),
1011 	(0x0e00 << 16) | (0x9600 >> 2),
1012 };
1013 
1014 static const u32 bonaire_golden_spm_registers[] =
1015 {
1016 	0x30800, 0xe0ffffff, 0xe0000000
1017 };
1018 
1019 static const u32 bonaire_golden_common_registers[] =
1020 {
1021 	0xc770, 0xffffffff, 0x00000800,
1022 	0xc774, 0xffffffff, 0x00000800,
1023 	0xc798, 0xffffffff, 0x00007fbf,
1024 	0xc79c, 0xffffffff, 0x00007faf
1025 };
1026 
1027 static const u32 bonaire_golden_registers[] =
1028 {
1029 	0x3354, 0x00000333, 0x00000333,
1030 	0x3350, 0x000c0fc0, 0x00040200,
1031 	0x9a10, 0x00010000, 0x00058208,
1032 	0x3c000, 0xffff1fff, 0x00140000,
1033 	0x3c200, 0xfdfc0fff, 0x00000100,
1034 	0x3c234, 0x40000000, 0x40000200,
1035 	0x9830, 0xffffffff, 0x00000000,
1036 	0x9834, 0xf00fffff, 0x00000400,
1037 	0x9838, 0x0002021c, 0x00020200,
1038 	0xc78, 0x00000080, 0x00000000,
1039 	0x5bb0, 0x000000f0, 0x00000070,
1040 	0x5bc0, 0xf0311fff, 0x80300000,
1041 	0x98f8, 0x73773777, 0x12010001,
1042 	0x350c, 0x00810000, 0x408af000,
1043 	0x7030, 0x31000111, 0x00000011,
1044 	0x2f48, 0x73773777, 0x12010001,
1045 	0x220c, 0x00007fb6, 0x0021a1b1,
1046 	0x2210, 0x00007fb6, 0x002021b1,
1047 	0x2180, 0x00007fb6, 0x00002191,
1048 	0x2218, 0x00007fb6, 0x002121b1,
1049 	0x221c, 0x00007fb6, 0x002021b1,
1050 	0x21dc, 0x00007fb6, 0x00002191,
1051 	0x21e0, 0x00007fb6, 0x00002191,
1052 	0x3628, 0x0000003f, 0x0000000a,
1053 	0x362c, 0x0000003f, 0x0000000a,
1054 	0x2ae4, 0x00073ffe, 0x000022a2,
1055 	0x240c, 0x000007ff, 0x00000000,
1056 	0x8a14, 0xf000003f, 0x00000007,
1057 	0x8bf0, 0x00002001, 0x00000001,
1058 	0x8b24, 0xffffffff, 0x00ffffff,
1059 	0x30a04, 0x0000ff0f, 0x00000000,
1060 	0x28a4c, 0x07ffffff, 0x06000000,
1061 	0x4d8, 0x00000fff, 0x00000100,
1062 	0x3e78, 0x00000001, 0x00000002,
1063 	0x9100, 0x03000000, 0x0362c688,
1064 	0x8c00, 0x000000ff, 0x00000001,
1065 	0xe40, 0x00001fff, 0x00001fff,
1066 	0x9060, 0x0000007f, 0x00000020,
1067 	0x9508, 0x00010000, 0x00010000,
1068 	0xac14, 0x000003ff, 0x000000f3,
1069 	0xac0c, 0xffffffff, 0x00001032
1070 };
1071 
1072 static const u32 bonaire_mgcg_cgcg_init[] =
1073 {
1074 	0xc420, 0xffffffff, 0xfffffffc,
1075 	0x30800, 0xffffffff, 0xe0000000,
1076 	0x3c2a0, 0xffffffff, 0x00000100,
1077 	0x3c208, 0xffffffff, 0x00000100,
1078 	0x3c2c0, 0xffffffff, 0xc0000100,
1079 	0x3c2c8, 0xffffffff, 0xc0000100,
1080 	0x3c2c4, 0xffffffff, 0xc0000100,
1081 	0x55e4, 0xffffffff, 0x00600100,
1082 	0x3c280, 0xffffffff, 0x00000100,
1083 	0x3c214, 0xffffffff, 0x06000100,
1084 	0x3c220, 0xffffffff, 0x00000100,
1085 	0x3c218, 0xffffffff, 0x06000100,
1086 	0x3c204, 0xffffffff, 0x00000100,
1087 	0x3c2e0, 0xffffffff, 0x00000100,
1088 	0x3c224, 0xffffffff, 0x00000100,
1089 	0x3c200, 0xffffffff, 0x00000100,
1090 	0x3c230, 0xffffffff, 0x00000100,
1091 	0x3c234, 0xffffffff, 0x00000100,
1092 	0x3c250, 0xffffffff, 0x00000100,
1093 	0x3c254, 0xffffffff, 0x00000100,
1094 	0x3c258, 0xffffffff, 0x00000100,
1095 	0x3c25c, 0xffffffff, 0x00000100,
1096 	0x3c260, 0xffffffff, 0x00000100,
1097 	0x3c27c, 0xffffffff, 0x00000100,
1098 	0x3c278, 0xffffffff, 0x00000100,
1099 	0x3c210, 0xffffffff, 0x06000100,
1100 	0x3c290, 0xffffffff, 0x00000100,
1101 	0x3c274, 0xffffffff, 0x00000100,
1102 	0x3c2b4, 0xffffffff, 0x00000100,
1103 	0x3c2b0, 0xffffffff, 0x00000100,
1104 	0x3c270, 0xffffffff, 0x00000100,
1105 	0x30800, 0xffffffff, 0xe0000000,
1106 	0x3c020, 0xffffffff, 0x00010000,
1107 	0x3c024, 0xffffffff, 0x00030002,
1108 	0x3c028, 0xffffffff, 0x00040007,
1109 	0x3c02c, 0xffffffff, 0x00060005,
1110 	0x3c030, 0xffffffff, 0x00090008,
1111 	0x3c034, 0xffffffff, 0x00010000,
1112 	0x3c038, 0xffffffff, 0x00030002,
1113 	0x3c03c, 0xffffffff, 0x00040007,
1114 	0x3c040, 0xffffffff, 0x00060005,
1115 	0x3c044, 0xffffffff, 0x00090008,
1116 	0x3c048, 0xffffffff, 0x00010000,
1117 	0x3c04c, 0xffffffff, 0x00030002,
1118 	0x3c050, 0xffffffff, 0x00040007,
1119 	0x3c054, 0xffffffff, 0x00060005,
1120 	0x3c058, 0xffffffff, 0x00090008,
1121 	0x3c05c, 0xffffffff, 0x00010000,
1122 	0x3c060, 0xffffffff, 0x00030002,
1123 	0x3c064, 0xffffffff, 0x00040007,
1124 	0x3c068, 0xffffffff, 0x00060005,
1125 	0x3c06c, 0xffffffff, 0x00090008,
1126 	0x3c070, 0xffffffff, 0x00010000,
1127 	0x3c074, 0xffffffff, 0x00030002,
1128 	0x3c078, 0xffffffff, 0x00040007,
1129 	0x3c07c, 0xffffffff, 0x00060005,
1130 	0x3c080, 0xffffffff, 0x00090008,
1131 	0x3c084, 0xffffffff, 0x00010000,
1132 	0x3c088, 0xffffffff, 0x00030002,
1133 	0x3c08c, 0xffffffff, 0x00040007,
1134 	0x3c090, 0xffffffff, 0x00060005,
1135 	0x3c094, 0xffffffff, 0x00090008,
1136 	0x3c098, 0xffffffff, 0x00010000,
1137 	0x3c09c, 0xffffffff, 0x00030002,
1138 	0x3c0a0, 0xffffffff, 0x00040007,
1139 	0x3c0a4, 0xffffffff, 0x00060005,
1140 	0x3c0a8, 0xffffffff, 0x00090008,
1141 	0x3c000, 0xffffffff, 0x96e00200,
1142 	0x8708, 0xffffffff, 0x00900100,
1143 	0xc424, 0xffffffff, 0x0020003f,
1144 	0x38, 0xffffffff, 0x0140001c,
1145 	0x3c, 0x000f0000, 0x000f0000,
1146 	0x220, 0xffffffff, 0xC060000C,
1147 	0x224, 0xc0000fff, 0x00000100,
1148 	0xf90, 0xffffffff, 0x00000100,
1149 	0xf98, 0x00000101, 0x00000000,
1150 	0x20a8, 0xffffffff, 0x00000104,
1151 	0x55e4, 0xff000fff, 0x00000100,
1152 	0x30cc, 0xc0000fff, 0x00000104,
1153 	0xc1e4, 0x00000001, 0x00000001,
1154 	0xd00c, 0xff000ff0, 0x00000100,
1155 	0xd80c, 0xff000ff0, 0x00000100
1156 };
1157 
1158 static const u32 spectre_golden_spm_registers[] =
1159 {
1160 	0x30800, 0xe0ffffff, 0xe0000000
1161 };
1162 
1163 static const u32 spectre_golden_common_registers[] =
1164 {
1165 	0xc770, 0xffffffff, 0x00000800,
1166 	0xc774, 0xffffffff, 0x00000800,
1167 	0xc798, 0xffffffff, 0x00007fbf,
1168 	0xc79c, 0xffffffff, 0x00007faf
1169 };
1170 
1171 static const u32 spectre_golden_registers[] =
1172 {
1173 	0x3c000, 0xffff1fff, 0x96940200,
1174 	0x3c00c, 0xffff0001, 0xff000000,
1175 	0x3c200, 0xfffc0fff, 0x00000100,
1176 	0x6ed8, 0x00010101, 0x00010000,
1177 	0x9834, 0xf00fffff, 0x00000400,
1178 	0x9838, 0xfffffffc, 0x00020200,
1179 	0x5bb0, 0x000000f0, 0x00000070,
1180 	0x5bc0, 0xf0311fff, 0x80300000,
1181 	0x98f8, 0x73773777, 0x12010001,
1182 	0x9b7c, 0x00ff0000, 0x00fc0000,
1183 	0x2f48, 0x73773777, 0x12010001,
1184 	0x8a14, 0xf000003f, 0x00000007,
1185 	0x8b24, 0xffffffff, 0x00ffffff,
1186 	0x28350, 0x3f3f3fff, 0x00000082,
1187 	0x28354, 0x0000003f, 0x00000000,
1188 	0x3e78, 0x00000001, 0x00000002,
1189 	0x913c, 0xffff03df, 0x00000004,
1190 	0xc768, 0x00000008, 0x00000008,
1191 	0x8c00, 0x000008ff, 0x00000800,
1192 	0x9508, 0x00010000, 0x00010000,
1193 	0xac0c, 0xffffffff, 0x54763210,
1194 	0x214f8, 0x01ff01ff, 0x00000002,
1195 	0x21498, 0x007ff800, 0x00200000,
1196 	0x2015c, 0xffffffff, 0x00000f40,
1197 	0x30934, 0xffffffff, 0x00000001
1198 };
1199 
1200 static const u32 spectre_mgcg_cgcg_init[] =
1201 {
1202 	0xc420, 0xffffffff, 0xfffffffc,
1203 	0x30800, 0xffffffff, 0xe0000000,
1204 	0x3c2a0, 0xffffffff, 0x00000100,
1205 	0x3c208, 0xffffffff, 0x00000100,
1206 	0x3c2c0, 0xffffffff, 0x00000100,
1207 	0x3c2c8, 0xffffffff, 0x00000100,
1208 	0x3c2c4, 0xffffffff, 0x00000100,
1209 	0x55e4, 0xffffffff, 0x00600100,
1210 	0x3c280, 0xffffffff, 0x00000100,
1211 	0x3c214, 0xffffffff, 0x06000100,
1212 	0x3c220, 0xffffffff, 0x00000100,
1213 	0x3c218, 0xffffffff, 0x06000100,
1214 	0x3c204, 0xffffffff, 0x00000100,
1215 	0x3c2e0, 0xffffffff, 0x00000100,
1216 	0x3c224, 0xffffffff, 0x00000100,
1217 	0x3c200, 0xffffffff, 0x00000100,
1218 	0x3c230, 0xffffffff, 0x00000100,
1219 	0x3c234, 0xffffffff, 0x00000100,
1220 	0x3c250, 0xffffffff, 0x00000100,
1221 	0x3c254, 0xffffffff, 0x00000100,
1222 	0x3c258, 0xffffffff, 0x00000100,
1223 	0x3c25c, 0xffffffff, 0x00000100,
1224 	0x3c260, 0xffffffff, 0x00000100,
1225 	0x3c27c, 0xffffffff, 0x00000100,
1226 	0x3c278, 0xffffffff, 0x00000100,
1227 	0x3c210, 0xffffffff, 0x06000100,
1228 	0x3c290, 0xffffffff, 0x00000100,
1229 	0x3c274, 0xffffffff, 0x00000100,
1230 	0x3c2b4, 0xffffffff, 0x00000100,
1231 	0x3c2b0, 0xffffffff, 0x00000100,
1232 	0x3c270, 0xffffffff, 0x00000100,
1233 	0x30800, 0xffffffff, 0xe0000000,
1234 	0x3c020, 0xffffffff, 0x00010000,
1235 	0x3c024, 0xffffffff, 0x00030002,
1236 	0x3c028, 0xffffffff, 0x00040007,
1237 	0x3c02c, 0xffffffff, 0x00060005,
1238 	0x3c030, 0xffffffff, 0x00090008,
1239 	0x3c034, 0xffffffff, 0x00010000,
1240 	0x3c038, 0xffffffff, 0x00030002,
1241 	0x3c03c, 0xffffffff, 0x00040007,
1242 	0x3c040, 0xffffffff, 0x00060005,
1243 	0x3c044, 0xffffffff, 0x00090008,
1244 	0x3c048, 0xffffffff, 0x00010000,
1245 	0x3c04c, 0xffffffff, 0x00030002,
1246 	0x3c050, 0xffffffff, 0x00040007,
1247 	0x3c054, 0xffffffff, 0x00060005,
1248 	0x3c058, 0xffffffff, 0x00090008,
1249 	0x3c05c, 0xffffffff, 0x00010000,
1250 	0x3c060, 0xffffffff, 0x00030002,
1251 	0x3c064, 0xffffffff, 0x00040007,
1252 	0x3c068, 0xffffffff, 0x00060005,
1253 	0x3c06c, 0xffffffff, 0x00090008,
1254 	0x3c070, 0xffffffff, 0x00010000,
1255 	0x3c074, 0xffffffff, 0x00030002,
1256 	0x3c078, 0xffffffff, 0x00040007,
1257 	0x3c07c, 0xffffffff, 0x00060005,
1258 	0x3c080, 0xffffffff, 0x00090008,
1259 	0x3c084, 0xffffffff, 0x00010000,
1260 	0x3c088, 0xffffffff, 0x00030002,
1261 	0x3c08c, 0xffffffff, 0x00040007,
1262 	0x3c090, 0xffffffff, 0x00060005,
1263 	0x3c094, 0xffffffff, 0x00090008,
1264 	0x3c098, 0xffffffff, 0x00010000,
1265 	0x3c09c, 0xffffffff, 0x00030002,
1266 	0x3c0a0, 0xffffffff, 0x00040007,
1267 	0x3c0a4, 0xffffffff, 0x00060005,
1268 	0x3c0a8, 0xffffffff, 0x00090008,
1269 	0x3c0ac, 0xffffffff, 0x00010000,
1270 	0x3c0b0, 0xffffffff, 0x00030002,
1271 	0x3c0b4, 0xffffffff, 0x00040007,
1272 	0x3c0b8, 0xffffffff, 0x00060005,
1273 	0x3c0bc, 0xffffffff, 0x00090008,
1274 	0x3c000, 0xffffffff, 0x96e00200,
1275 	0x8708, 0xffffffff, 0x00900100,
1276 	0xc424, 0xffffffff, 0x0020003f,
1277 	0x38, 0xffffffff, 0x0140001c,
1278 	0x3c, 0x000f0000, 0x000f0000,
1279 	0x220, 0xffffffff, 0xC060000C,
1280 	0x224, 0xc0000fff, 0x00000100,
1281 	0xf90, 0xffffffff, 0x00000100,
1282 	0xf98, 0x00000101, 0x00000000,
1283 	0x20a8, 0xffffffff, 0x00000104,
1284 	0x55e4, 0xff000fff, 0x00000100,
1285 	0x30cc, 0xc0000fff, 0x00000104,
1286 	0xc1e4, 0x00000001, 0x00000001,
1287 	0xd00c, 0xff000ff0, 0x00000100,
1288 	0xd80c, 0xff000ff0, 0x00000100
1289 };
1290 
1291 static const u32 kalindi_golden_spm_registers[] =
1292 {
1293 	0x30800, 0xe0ffffff, 0xe0000000
1294 };
1295 
1296 static const u32 kalindi_golden_common_registers[] =
1297 {
1298 	0xc770, 0xffffffff, 0x00000800,
1299 	0xc774, 0xffffffff, 0x00000800,
1300 	0xc798, 0xffffffff, 0x00007fbf,
1301 	0xc79c, 0xffffffff, 0x00007faf
1302 };
1303 
1304 static const u32 kalindi_golden_registers[] =
1305 {
1306 	0x3c000, 0xffffdfff, 0x6e944040,
1307 	0x55e4, 0xff607fff, 0xfc000100,
1308 	0x3c220, 0xff000fff, 0x00000100,
1309 	0x3c224, 0xff000fff, 0x00000100,
1310 	0x3c200, 0xfffc0fff, 0x00000100,
1311 	0x6ed8, 0x00010101, 0x00010000,
1312 	0x9830, 0xffffffff, 0x00000000,
1313 	0x9834, 0xf00fffff, 0x00000400,
1314 	0x5bb0, 0x000000f0, 0x00000070,
1315 	0x5bc0, 0xf0311fff, 0x80300000,
1316 	0x98f8, 0x73773777, 0x12010001,
1317 	0x98fc, 0xffffffff, 0x00000010,
1318 	0x9b7c, 0x00ff0000, 0x00fc0000,
1319 	0x8030, 0x00001f0f, 0x0000100a,
1320 	0x2f48, 0x73773777, 0x12010001,
1321 	0x2408, 0x000fffff, 0x000c007f,
1322 	0x8a14, 0xf000003f, 0x00000007,
1323 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1324 	0x30a04, 0x0000ff0f, 0x00000000,
1325 	0x28a4c, 0x07ffffff, 0x06000000,
1326 	0x4d8, 0x00000fff, 0x00000100,
1327 	0x3e78, 0x00000001, 0x00000002,
1328 	0xc768, 0x00000008, 0x00000008,
1329 	0x8c00, 0x000000ff, 0x00000003,
1330 	0x214f8, 0x01ff01ff, 0x00000002,
1331 	0x21498, 0x007ff800, 0x00200000,
1332 	0x2015c, 0xffffffff, 0x00000f40,
1333 	0x88c4, 0x001f3ae3, 0x00000082,
1334 	0x88d4, 0x0000001f, 0x00000010,
1335 	0x30934, 0xffffffff, 0x00000000
1336 };
1337 
1338 static const u32 kalindi_mgcg_cgcg_init[] =
1339 {
1340 	0xc420, 0xffffffff, 0xfffffffc,
1341 	0x30800, 0xffffffff, 0xe0000000,
1342 	0x3c2a0, 0xffffffff, 0x00000100,
1343 	0x3c208, 0xffffffff, 0x00000100,
1344 	0x3c2c0, 0xffffffff, 0x00000100,
1345 	0x3c2c8, 0xffffffff, 0x00000100,
1346 	0x3c2c4, 0xffffffff, 0x00000100,
1347 	0x55e4, 0xffffffff, 0x00600100,
1348 	0x3c280, 0xffffffff, 0x00000100,
1349 	0x3c214, 0xffffffff, 0x06000100,
1350 	0x3c220, 0xffffffff, 0x00000100,
1351 	0x3c218, 0xffffffff, 0x06000100,
1352 	0x3c204, 0xffffffff, 0x00000100,
1353 	0x3c2e0, 0xffffffff, 0x00000100,
1354 	0x3c224, 0xffffffff, 0x00000100,
1355 	0x3c200, 0xffffffff, 0x00000100,
1356 	0x3c230, 0xffffffff, 0x00000100,
1357 	0x3c234, 0xffffffff, 0x00000100,
1358 	0x3c250, 0xffffffff, 0x00000100,
1359 	0x3c254, 0xffffffff, 0x00000100,
1360 	0x3c258, 0xffffffff, 0x00000100,
1361 	0x3c25c, 0xffffffff, 0x00000100,
1362 	0x3c260, 0xffffffff, 0x00000100,
1363 	0x3c27c, 0xffffffff, 0x00000100,
1364 	0x3c278, 0xffffffff, 0x00000100,
1365 	0x3c210, 0xffffffff, 0x06000100,
1366 	0x3c290, 0xffffffff, 0x00000100,
1367 	0x3c274, 0xffffffff, 0x00000100,
1368 	0x3c2b4, 0xffffffff, 0x00000100,
1369 	0x3c2b0, 0xffffffff, 0x00000100,
1370 	0x3c270, 0xffffffff, 0x00000100,
1371 	0x30800, 0xffffffff, 0xe0000000,
1372 	0x3c020, 0xffffffff, 0x00010000,
1373 	0x3c024, 0xffffffff, 0x00030002,
1374 	0x3c028, 0xffffffff, 0x00040007,
1375 	0x3c02c, 0xffffffff, 0x00060005,
1376 	0x3c030, 0xffffffff, 0x00090008,
1377 	0x3c034, 0xffffffff, 0x00010000,
1378 	0x3c038, 0xffffffff, 0x00030002,
1379 	0x3c03c, 0xffffffff, 0x00040007,
1380 	0x3c040, 0xffffffff, 0x00060005,
1381 	0x3c044, 0xffffffff, 0x00090008,
1382 	0x3c000, 0xffffffff, 0x96e00200,
1383 	0x8708, 0xffffffff, 0x00900100,
1384 	0xc424, 0xffffffff, 0x0020003f,
1385 	0x38, 0xffffffff, 0x0140001c,
1386 	0x3c, 0x000f0000, 0x000f0000,
1387 	0x220, 0xffffffff, 0xC060000C,
1388 	0x224, 0xc0000fff, 0x00000100,
1389 	0x20a8, 0xffffffff, 0x00000104,
1390 	0x55e4, 0xff000fff, 0x00000100,
1391 	0x30cc, 0xc0000fff, 0x00000104,
1392 	0xc1e4, 0x00000001, 0x00000001,
1393 	0xd00c, 0xff000ff0, 0x00000100,
1394 	0xd80c, 0xff000ff0, 0x00000100
1395 };
1396 
1397 static const u32 hawaii_golden_spm_registers[] =
1398 {
1399 	0x30800, 0xe0ffffff, 0xe0000000
1400 };
1401 
1402 static const u32 hawaii_golden_common_registers[] =
1403 {
1404 	0x30800, 0xffffffff, 0xe0000000,
1405 	0x28350, 0xffffffff, 0x3a00161a,
1406 	0x28354, 0xffffffff, 0x0000002e,
1407 	0x9a10, 0xffffffff, 0x00018208,
1408 	0x98f8, 0xffffffff, 0x12011003
1409 };
1410 
1411 static const u32 hawaii_golden_registers[] =
1412 {
1413 	0x3354, 0x00000333, 0x00000333,
1414 	0x9a10, 0x00010000, 0x00058208,
1415 	0x9830, 0xffffffff, 0x00000000,
1416 	0x9834, 0xf00fffff, 0x00000400,
1417 	0x9838, 0x0002021c, 0x00020200,
1418 	0xc78, 0x00000080, 0x00000000,
1419 	0x5bb0, 0x000000f0, 0x00000070,
1420 	0x5bc0, 0xf0311fff, 0x80300000,
1421 	0x350c, 0x00810000, 0x408af000,
1422 	0x7030, 0x31000111, 0x00000011,
1423 	0x2f48, 0x73773777, 0x12010001,
1424 	0x2120, 0x0000007f, 0x0000001b,
1425 	0x21dc, 0x00007fb6, 0x00002191,
1426 	0x3628, 0x0000003f, 0x0000000a,
1427 	0x362c, 0x0000003f, 0x0000000a,
1428 	0x2ae4, 0x00073ffe, 0x000022a2,
1429 	0x240c, 0x000007ff, 0x00000000,
1430 	0x8bf0, 0x00002001, 0x00000001,
1431 	0x8b24, 0xffffffff, 0x00ffffff,
1432 	0x30a04, 0x0000ff0f, 0x00000000,
1433 	0x28a4c, 0x07ffffff, 0x06000000,
1434 	0x3e78, 0x00000001, 0x00000002,
1435 	0xc768, 0x00000008, 0x00000008,
1436 	0xc770, 0x00000f00, 0x00000800,
1437 	0xc774, 0x00000f00, 0x00000800,
1438 	0xc798, 0x00ffffff, 0x00ff7fbf,
1439 	0xc79c, 0x00ffffff, 0x00ff7faf,
1440 	0x8c00, 0x000000ff, 0x00000800,
1441 	0xe40, 0x00001fff, 0x00001fff,
1442 	0x9060, 0x0000007f, 0x00000020,
1443 	0x9508, 0x00010000, 0x00010000,
1444 	0xae00, 0x00100000, 0x000ff07c,
1445 	0xac14, 0x000003ff, 0x0000000f,
1446 	0xac10, 0xffffffff, 0x7564fdec,
1447 	0xac0c, 0xffffffff, 0x3120b9a8,
1448 	0xac08, 0x20000000, 0x0f9c0000
1449 };
1450 
1451 static const u32 hawaii_mgcg_cgcg_init[] =
1452 {
1453 	0xc420, 0xffffffff, 0xfffffffd,
1454 	0x30800, 0xffffffff, 0xe0000000,
1455 	0x3c2a0, 0xffffffff, 0x00000100,
1456 	0x3c208, 0xffffffff, 0x00000100,
1457 	0x3c2c0, 0xffffffff, 0x00000100,
1458 	0x3c2c8, 0xffffffff, 0x00000100,
1459 	0x3c2c4, 0xffffffff, 0x00000100,
1460 	0x55e4, 0xffffffff, 0x00200100,
1461 	0x3c280, 0xffffffff, 0x00000100,
1462 	0x3c214, 0xffffffff, 0x06000100,
1463 	0x3c220, 0xffffffff, 0x00000100,
1464 	0x3c218, 0xffffffff, 0x06000100,
1465 	0x3c204, 0xffffffff, 0x00000100,
1466 	0x3c2e0, 0xffffffff, 0x00000100,
1467 	0x3c224, 0xffffffff, 0x00000100,
1468 	0x3c200, 0xffffffff, 0x00000100,
1469 	0x3c230, 0xffffffff, 0x00000100,
1470 	0x3c234, 0xffffffff, 0x00000100,
1471 	0x3c250, 0xffffffff, 0x00000100,
1472 	0x3c254, 0xffffffff, 0x00000100,
1473 	0x3c258, 0xffffffff, 0x00000100,
1474 	0x3c25c, 0xffffffff, 0x00000100,
1475 	0x3c260, 0xffffffff, 0x00000100,
1476 	0x3c27c, 0xffffffff, 0x00000100,
1477 	0x3c278, 0xffffffff, 0x00000100,
1478 	0x3c210, 0xffffffff, 0x06000100,
1479 	0x3c290, 0xffffffff, 0x00000100,
1480 	0x3c274, 0xffffffff, 0x00000100,
1481 	0x3c2b4, 0xffffffff, 0x00000100,
1482 	0x3c2b0, 0xffffffff, 0x00000100,
1483 	0x3c270, 0xffffffff, 0x00000100,
1484 	0x30800, 0xffffffff, 0xe0000000,
1485 	0x3c020, 0xffffffff, 0x00010000,
1486 	0x3c024, 0xffffffff, 0x00030002,
1487 	0x3c028, 0xffffffff, 0x00040007,
1488 	0x3c02c, 0xffffffff, 0x00060005,
1489 	0x3c030, 0xffffffff, 0x00090008,
1490 	0x3c034, 0xffffffff, 0x00010000,
1491 	0x3c038, 0xffffffff, 0x00030002,
1492 	0x3c03c, 0xffffffff, 0x00040007,
1493 	0x3c040, 0xffffffff, 0x00060005,
1494 	0x3c044, 0xffffffff, 0x00090008,
1495 	0x3c048, 0xffffffff, 0x00010000,
1496 	0x3c04c, 0xffffffff, 0x00030002,
1497 	0x3c050, 0xffffffff, 0x00040007,
1498 	0x3c054, 0xffffffff, 0x00060005,
1499 	0x3c058, 0xffffffff, 0x00090008,
1500 	0x3c05c, 0xffffffff, 0x00010000,
1501 	0x3c060, 0xffffffff, 0x00030002,
1502 	0x3c064, 0xffffffff, 0x00040007,
1503 	0x3c068, 0xffffffff, 0x00060005,
1504 	0x3c06c, 0xffffffff, 0x00090008,
1505 	0x3c070, 0xffffffff, 0x00010000,
1506 	0x3c074, 0xffffffff, 0x00030002,
1507 	0x3c078, 0xffffffff, 0x00040007,
1508 	0x3c07c, 0xffffffff, 0x00060005,
1509 	0x3c080, 0xffffffff, 0x00090008,
1510 	0x3c084, 0xffffffff, 0x00010000,
1511 	0x3c088, 0xffffffff, 0x00030002,
1512 	0x3c08c, 0xffffffff, 0x00040007,
1513 	0x3c090, 0xffffffff, 0x00060005,
1514 	0x3c094, 0xffffffff, 0x00090008,
1515 	0x3c098, 0xffffffff, 0x00010000,
1516 	0x3c09c, 0xffffffff, 0x00030002,
1517 	0x3c0a0, 0xffffffff, 0x00040007,
1518 	0x3c0a4, 0xffffffff, 0x00060005,
1519 	0x3c0a8, 0xffffffff, 0x00090008,
1520 	0x3c0ac, 0xffffffff, 0x00010000,
1521 	0x3c0b0, 0xffffffff, 0x00030002,
1522 	0x3c0b4, 0xffffffff, 0x00040007,
1523 	0x3c0b8, 0xffffffff, 0x00060005,
1524 	0x3c0bc, 0xffffffff, 0x00090008,
1525 	0x3c0c0, 0xffffffff, 0x00010000,
1526 	0x3c0c4, 0xffffffff, 0x00030002,
1527 	0x3c0c8, 0xffffffff, 0x00040007,
1528 	0x3c0cc, 0xffffffff, 0x00060005,
1529 	0x3c0d0, 0xffffffff, 0x00090008,
1530 	0x3c0d4, 0xffffffff, 0x00010000,
1531 	0x3c0d8, 0xffffffff, 0x00030002,
1532 	0x3c0dc, 0xffffffff, 0x00040007,
1533 	0x3c0e0, 0xffffffff, 0x00060005,
1534 	0x3c0e4, 0xffffffff, 0x00090008,
1535 	0x3c0e8, 0xffffffff, 0x00010000,
1536 	0x3c0ec, 0xffffffff, 0x00030002,
1537 	0x3c0f0, 0xffffffff, 0x00040007,
1538 	0x3c0f4, 0xffffffff, 0x00060005,
1539 	0x3c0f8, 0xffffffff, 0x00090008,
1540 	0xc318, 0xffffffff, 0x00020200,
1541 	0x3350, 0xffffffff, 0x00000200,
1542 	0x15c0, 0xffffffff, 0x00000400,
1543 	0x55e8, 0xffffffff, 0x00000000,
1544 	0x2f50, 0xffffffff, 0x00000902,
1545 	0x3c000, 0xffffffff, 0x96940200,
1546 	0x8708, 0xffffffff, 0x00900100,
1547 	0xc424, 0xffffffff, 0x0020003f,
1548 	0x38, 0xffffffff, 0x0140001c,
1549 	0x3c, 0x000f0000, 0x000f0000,
1550 	0x220, 0xffffffff, 0xc060000c,
1551 	0x224, 0xc0000fff, 0x00000100,
1552 	0xf90, 0xffffffff, 0x00000100,
1553 	0xf98, 0x00000101, 0x00000000,
1554 	0x20a8, 0xffffffff, 0x00000104,
1555 	0x55e4, 0xff000fff, 0x00000100,
1556 	0x30cc, 0xc0000fff, 0x00000104,
1557 	0xc1e4, 0x00000001, 0x00000001,
1558 	0xd00c, 0xff000ff0, 0x00000100,
1559 	0xd80c, 0xff000ff0, 0x00000100
1560 };
1561 
1562 static const u32 godavari_golden_registers[] =
1563 {
1564 	0x55e4, 0xff607fff, 0xfc000100,
1565 	0x6ed8, 0x00010101, 0x00010000,
1566 	0x9830, 0xffffffff, 0x00000000,
1567 	0x98302, 0xf00fffff, 0x00000400,
1568 	0x6130, 0xffffffff, 0x00010000,
1569 	0x5bb0, 0x000000f0, 0x00000070,
1570 	0x5bc0, 0xf0311fff, 0x80300000,
1571 	0x98f8, 0x73773777, 0x12010001,
1572 	0x98fc, 0xffffffff, 0x00000010,
1573 	0x8030, 0x00001f0f, 0x0000100a,
1574 	0x2f48, 0x73773777, 0x12010001,
1575 	0x2408, 0x000fffff, 0x000c007f,
1576 	0x8a14, 0xf000003f, 0x00000007,
1577 	0x8b24, 0xffffffff, 0x00ff0fff,
1578 	0x30a04, 0x0000ff0f, 0x00000000,
1579 	0x28a4c, 0x07ffffff, 0x06000000,
1580 	0x4d8, 0x00000fff, 0x00000100,
1581 	0xd014, 0x00010000, 0x00810001,
1582 	0xd814, 0x00010000, 0x00810001,
1583 	0x3e78, 0x00000001, 0x00000002,
1584 	0xc768, 0x00000008, 0x00000008,
1585 	0xc770, 0x00000f00, 0x00000800,
1586 	0xc774, 0x00000f00, 0x00000800,
1587 	0xc798, 0x00ffffff, 0x00ff7fbf,
1588 	0xc79c, 0x00ffffff, 0x00ff7faf,
1589 	0x8c00, 0x000000ff, 0x00000001,
1590 	0x214f8, 0x01ff01ff, 0x00000002,
1591 	0x21498, 0x007ff800, 0x00200000,
1592 	0x2015c, 0xffffffff, 0x00000f40,
1593 	0x88c4, 0x001f3ae3, 0x00000082,
1594 	0x88d4, 0x0000001f, 0x00000010,
1595 	0x30934, 0xffffffff, 0x00000000
1596 };
1597 
1598 
cik_init_golden_registers(struct radeon_device * rdev)1599 static void cik_init_golden_registers(struct radeon_device *rdev)
1600 {
1601 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1602 	mutex_lock(&rdev->grbm_idx_mutex);
1603 	switch (rdev->family) {
1604 	case CHIP_BONAIRE:
1605 		radeon_program_register_sequence(rdev,
1606 						 bonaire_mgcg_cgcg_init,
1607 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1608 		radeon_program_register_sequence(rdev,
1609 						 bonaire_golden_registers,
1610 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1611 		radeon_program_register_sequence(rdev,
1612 						 bonaire_golden_common_registers,
1613 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1614 		radeon_program_register_sequence(rdev,
1615 						 bonaire_golden_spm_registers,
1616 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1617 		break;
1618 	case CHIP_KABINI:
1619 		radeon_program_register_sequence(rdev,
1620 						 kalindi_mgcg_cgcg_init,
1621 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1622 		radeon_program_register_sequence(rdev,
1623 						 kalindi_golden_registers,
1624 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1625 		radeon_program_register_sequence(rdev,
1626 						 kalindi_golden_common_registers,
1627 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1628 		radeon_program_register_sequence(rdev,
1629 						 kalindi_golden_spm_registers,
1630 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1631 		break;
1632 	case CHIP_MULLINS:
1633 		radeon_program_register_sequence(rdev,
1634 						 kalindi_mgcg_cgcg_init,
1635 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1636 		radeon_program_register_sequence(rdev,
1637 						 godavari_golden_registers,
1638 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1639 		radeon_program_register_sequence(rdev,
1640 						 kalindi_golden_common_registers,
1641 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1642 		radeon_program_register_sequence(rdev,
1643 						 kalindi_golden_spm_registers,
1644 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1645 		break;
1646 	case CHIP_KAVERI:
1647 		radeon_program_register_sequence(rdev,
1648 						 spectre_mgcg_cgcg_init,
1649 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1650 		radeon_program_register_sequence(rdev,
1651 						 spectre_golden_registers,
1652 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1653 		radeon_program_register_sequence(rdev,
1654 						 spectre_golden_common_registers,
1655 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1656 		radeon_program_register_sequence(rdev,
1657 						 spectre_golden_spm_registers,
1658 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1659 		break;
1660 	case CHIP_HAWAII:
1661 		radeon_program_register_sequence(rdev,
1662 						 hawaii_mgcg_cgcg_init,
1663 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1664 		radeon_program_register_sequence(rdev,
1665 						 hawaii_golden_registers,
1666 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1667 		radeon_program_register_sequence(rdev,
1668 						 hawaii_golden_common_registers,
1669 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1670 		radeon_program_register_sequence(rdev,
1671 						 hawaii_golden_spm_registers,
1672 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1673 		break;
1674 	default:
1675 		break;
1676 	}
1677 	mutex_unlock(&rdev->grbm_idx_mutex);
1678 }
1679 
1680 /**
1681  * cik_get_xclk - get the xclk
1682  *
1683  * @rdev: radeon_device pointer
1684  *
1685  * Returns the reference clock used by the gfx engine
1686  * (CIK).
1687  */
cik_get_xclk(struct radeon_device * rdev)1688 u32 cik_get_xclk(struct radeon_device *rdev)
1689 {
1690         u32 reference_clock = rdev->clock.spll.reference_freq;
1691 
1692 	if (rdev->flags & RADEON_IS_IGP) {
1693 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1694 			return reference_clock / 2;
1695 	} else {
1696 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1697 			return reference_clock / 4;
1698 	}
1699 	return reference_clock;
1700 }
1701 
1702 /**
1703  * cik_mm_rdoorbell - read a doorbell dword
1704  *
1705  * @rdev: radeon_device pointer
1706  * @index: doorbell index
1707  *
1708  * Returns the value in the doorbell aperture at the
1709  * requested doorbell index (CIK).
1710  */
cik_mm_rdoorbell(struct radeon_device * rdev,u32 index)1711 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1712 {
1713 	if (index < rdev->doorbell.num_doorbells) {
1714 		return readl(rdev->doorbell.ptr + index);
1715 	} else {
1716 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1717 		return 0;
1718 	}
1719 }
1720 
1721 /**
1722  * cik_mm_wdoorbell - write a doorbell dword
1723  *
1724  * @rdev: radeon_device pointer
1725  * @index: doorbell index
1726  * @v: value to write
1727  *
1728  * Writes @v to the doorbell aperture at the
1729  * requested doorbell index (CIK).
1730  */
cik_mm_wdoorbell(struct radeon_device * rdev,u32 index,u32 v)1731 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1732 {
1733 	if (index < rdev->doorbell.num_doorbells) {
1734 		writel(v, rdev->doorbell.ptr + index);
1735 	} else {
1736 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1737 	}
1738 }
1739 
1740 #define BONAIRE_IO_MC_REGS_SIZE 36
1741 
1742 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1743 {
1744 	{0x00000070, 0x04400000},
1745 	{0x00000071, 0x80c01803},
1746 	{0x00000072, 0x00004004},
1747 	{0x00000073, 0x00000100},
1748 	{0x00000074, 0x00ff0000},
1749 	{0x00000075, 0x34000000},
1750 	{0x00000076, 0x08000014},
1751 	{0x00000077, 0x00cc08ec},
1752 	{0x00000078, 0x00000400},
1753 	{0x00000079, 0x00000000},
1754 	{0x0000007a, 0x04090000},
1755 	{0x0000007c, 0x00000000},
1756 	{0x0000007e, 0x4408a8e8},
1757 	{0x0000007f, 0x00000304},
1758 	{0x00000080, 0x00000000},
1759 	{0x00000082, 0x00000001},
1760 	{0x00000083, 0x00000002},
1761 	{0x00000084, 0xf3e4f400},
1762 	{0x00000085, 0x052024e3},
1763 	{0x00000087, 0x00000000},
1764 	{0x00000088, 0x01000000},
1765 	{0x0000008a, 0x1c0a0000},
1766 	{0x0000008b, 0xff010000},
1767 	{0x0000008d, 0xffffefff},
1768 	{0x0000008e, 0xfff3efff},
1769 	{0x0000008f, 0xfff3efbf},
1770 	{0x00000092, 0xf7ffffff},
1771 	{0x00000093, 0xffffff7f},
1772 	{0x00000095, 0x00101101},
1773 	{0x00000096, 0x00000fff},
1774 	{0x00000097, 0x00116fff},
1775 	{0x00000098, 0x60010000},
1776 	{0x00000099, 0x10010000},
1777 	{0x0000009a, 0x00006000},
1778 	{0x0000009b, 0x00001000},
1779 	{0x0000009f, 0x00b48000}
1780 };
1781 
1782 #define HAWAII_IO_MC_REGS_SIZE 22
1783 
1784 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1785 {
1786 	{0x0000007d, 0x40000000},
1787 	{0x0000007e, 0x40180304},
1788 	{0x0000007f, 0x0000ff00},
1789 	{0x00000081, 0x00000000},
1790 	{0x00000083, 0x00000800},
1791 	{0x00000086, 0x00000000},
1792 	{0x00000087, 0x00000100},
1793 	{0x00000088, 0x00020100},
1794 	{0x00000089, 0x00000000},
1795 	{0x0000008b, 0x00040000},
1796 	{0x0000008c, 0x00000100},
1797 	{0x0000008e, 0xff010000},
1798 	{0x00000090, 0xffffefff},
1799 	{0x00000091, 0xfff3efff},
1800 	{0x00000092, 0xfff3efbf},
1801 	{0x00000093, 0xf7ffffff},
1802 	{0x00000094, 0xffffff7f},
1803 	{0x00000095, 0x00000fff},
1804 	{0x00000096, 0x00116fff},
1805 	{0x00000097, 0x60010000},
1806 	{0x00000098, 0x10010000},
1807 	{0x0000009f, 0x00c79000}
1808 };
1809 
1810 
1811 /**
1812  * cik_srbm_select - select specific register instances
1813  *
1814  * @rdev: radeon_device pointer
1815  * @me: selected ME (micro engine)
1816  * @pipe: pipe
1817  * @queue: queue
1818  * @vmid: VMID
1819  *
1820  * Switches the currently active registers instances.  Some
1821  * registers are instanced per VMID, others are instanced per
1822  * me/pipe/queue combination.
1823  */
cik_srbm_select(struct radeon_device * rdev,u32 me,u32 pipe,u32 queue,u32 vmid)1824 static void cik_srbm_select(struct radeon_device *rdev,
1825 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1826 {
1827 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1828 			     MEID(me & 0x3) |
1829 			     VMID(vmid & 0xf) |
1830 			     QUEUEID(queue & 0x7));
1831 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1832 }
1833 
1834 /* ucode loading */
1835 /**
1836  * ci_mc_load_microcode - load MC ucode into the hw
1837  *
1838  * @rdev: radeon_device pointer
1839  *
1840  * Load the GDDR MC ucode into the hw (CIK).
1841  * Returns 0 on success, error on failure.
1842  */
ci_mc_load_microcode(struct radeon_device * rdev)1843 int ci_mc_load_microcode(struct radeon_device *rdev)
1844 {
1845 	const __be32 *fw_data = NULL;
1846 	const __le32 *new_fw_data = NULL;
1847 	u32 running, blackout = 0, tmp;
1848 	u32 *io_mc_regs = NULL;
1849 	const __le32 *new_io_mc_regs = NULL;
1850 	int i, regs_size, ucode_size;
1851 
1852 	if (!rdev->mc_fw)
1853 		return -EINVAL;
1854 
1855 	if (rdev->new_fw) {
1856 		const struct mc_firmware_header_v1_0 *hdr =
1857 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1858 
1859 		radeon_ucode_print_mc_hdr(&hdr->header);
1860 
1861 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1862 		new_io_mc_regs = (const __le32 *)
1863 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1864 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1865 		new_fw_data = (const __le32 *)
1866 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1867 	} else {
1868 		ucode_size = rdev->mc_fw->size / 4;
1869 
1870 		switch (rdev->family) {
1871 		case CHIP_BONAIRE:
1872 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1873 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1874 			break;
1875 		case CHIP_HAWAII:
1876 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1877 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1878 			break;
1879 		default:
1880 			return -EINVAL;
1881 		}
1882 		fw_data = (const __be32 *)rdev->mc_fw->data;
1883 	}
1884 
1885 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1886 
1887 	if (running == 0) {
1888 		if (running) {
1889 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1890 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1891 		}
1892 
1893 		/* reset the engine and set to writable */
1894 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1895 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1896 
1897 		/* load mc io regs */
1898 		for (i = 0; i < regs_size; i++) {
1899 			if (rdev->new_fw) {
1900 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1901 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1902 			} else {
1903 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1904 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1905 			}
1906 		}
1907 
1908 		tmp = RREG32(MC_SEQ_MISC0);
1909 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1910 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1911 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1912 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1913 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1914 		}
1915 
1916 		/* load the MC ucode */
1917 		for (i = 0; i < ucode_size; i++) {
1918 			if (rdev->new_fw)
1919 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1920 			else
1921 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1922 		}
1923 
1924 		/* put the engine back into the active state */
1925 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1926 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1927 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1928 
1929 		/* wait for training to complete */
1930 		for (i = 0; i < rdev->usec_timeout; i++) {
1931 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1932 				break;
1933 			udelay(1);
1934 		}
1935 		for (i = 0; i < rdev->usec_timeout; i++) {
1936 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1937 				break;
1938 			udelay(1);
1939 		}
1940 
1941 		if (running)
1942 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1943 	}
1944 
1945 	return 0;
1946 }
1947 
1948 /**
1949  * cik_init_microcode - load ucode images from disk
1950  *
1951  * @rdev: radeon_device pointer
1952  *
1953  * Use the firmware interface to load the ucode images into
1954  * the driver (not loaded into hw).
1955  * Returns 0 on success, error on failure.
1956  */
cik_init_microcode(struct radeon_device * rdev)1957 static int cik_init_microcode(struct radeon_device *rdev)
1958 {
1959 	const char *chip_name;
1960 	const char *new_chip_name;
1961 	size_t pfp_req_size, me_req_size, ce_req_size,
1962 		mec_req_size, rlc_req_size, mc_req_size = 0,
1963 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1964 	char fw_name[30];
1965 	int new_fw = 0;
1966 	int err;
1967 	int num_fw;
1968 
1969 	DRM_DEBUG("\n");
1970 
1971 	switch (rdev->family) {
1972 	case CHIP_BONAIRE:
1973 		chip_name = "BONAIRE";
1974 		new_chip_name = "bonaire";
1975 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1976 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1977 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1978 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1979 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1980 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1981 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1982 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1983 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1984 		num_fw = 8;
1985 		break;
1986 	case CHIP_HAWAII:
1987 		chip_name = "HAWAII";
1988 		new_chip_name = "hawaii";
1989 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1990 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1991 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1992 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1993 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1994 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1995 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1996 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1997 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1998 		num_fw = 8;
1999 		break;
2000 	case CHIP_KAVERI:
2001 		chip_name = "KAVERI";
2002 		new_chip_name = "kaveri";
2003 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2004 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2005 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2006 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2007 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2008 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2009 		num_fw = 7;
2010 		break;
2011 	case CHIP_KABINI:
2012 		chip_name = "KABINI";
2013 		new_chip_name = "kabini";
2014 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2015 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2016 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2017 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2018 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2019 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2020 		num_fw = 6;
2021 		break;
2022 	case CHIP_MULLINS:
2023 		chip_name = "MULLINS";
2024 		new_chip_name = "mullins";
2025 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2026 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2027 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2028 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2029 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2030 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2031 		num_fw = 6;
2032 		break;
2033 	default: BUG();
2034 	}
2035 
2036 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2037 
2038 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2039 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2040 	if (err) {
2041 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2042 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2043 		if (err)
2044 			goto out;
2045 		if (rdev->pfp_fw->size != pfp_req_size) {
2046 			printk(KERN_ERR
2047 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2048 			       rdev->pfp_fw->size, fw_name);
2049 			err = -EINVAL;
2050 			goto out;
2051 		}
2052 	} else {
2053 		err = radeon_ucode_validate(rdev->pfp_fw);
2054 		if (err) {
2055 			printk(KERN_ERR
2056 			       "cik_fw: validation failed for firmware \"%s\"\n",
2057 			       fw_name);
2058 			goto out;
2059 		} else {
2060 			new_fw++;
2061 		}
2062 	}
2063 
2064 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2065 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2066 	if (err) {
2067 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2068 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2069 		if (err)
2070 			goto out;
2071 		if (rdev->me_fw->size != me_req_size) {
2072 			printk(KERN_ERR
2073 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2074 			       rdev->me_fw->size, fw_name);
2075 			err = -EINVAL;
2076 		}
2077 	} else {
2078 		err = radeon_ucode_validate(rdev->me_fw);
2079 		if (err) {
2080 			printk(KERN_ERR
2081 			       "cik_fw: validation failed for firmware \"%s\"\n",
2082 			       fw_name);
2083 			goto out;
2084 		} else {
2085 			new_fw++;
2086 		}
2087 	}
2088 
2089 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2090 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2091 	if (err) {
2092 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2093 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2094 		if (err)
2095 			goto out;
2096 		if (rdev->ce_fw->size != ce_req_size) {
2097 			printk(KERN_ERR
2098 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2099 			       rdev->ce_fw->size, fw_name);
2100 			err = -EINVAL;
2101 		}
2102 	} else {
2103 		err = radeon_ucode_validate(rdev->ce_fw);
2104 		if (err) {
2105 			printk(KERN_ERR
2106 			       "cik_fw: validation failed for firmware \"%s\"\n",
2107 			       fw_name);
2108 			goto out;
2109 		} else {
2110 			new_fw++;
2111 		}
2112 	}
2113 
2114 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2115 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2116 	if (err) {
2117 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2118 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2119 		if (err)
2120 			goto out;
2121 		if (rdev->mec_fw->size != mec_req_size) {
2122 			printk(KERN_ERR
2123 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2124 			       rdev->mec_fw->size, fw_name);
2125 			err = -EINVAL;
2126 		}
2127 	} else {
2128 		err = radeon_ucode_validate(rdev->mec_fw);
2129 		if (err) {
2130 			printk(KERN_ERR
2131 			       "cik_fw: validation failed for firmware \"%s\"\n",
2132 			       fw_name);
2133 			goto out;
2134 		} else {
2135 			new_fw++;
2136 		}
2137 	}
2138 
2139 	if (rdev->family == CHIP_KAVERI) {
2140 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2141 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2142 		if (err) {
2143 			goto out;
2144 		} else {
2145 			err = radeon_ucode_validate(rdev->mec2_fw);
2146 			if (err) {
2147 				goto out;
2148 			} else {
2149 				new_fw++;
2150 			}
2151 		}
2152 	}
2153 
2154 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2155 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2156 	if (err) {
2157 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2158 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2159 		if (err)
2160 			goto out;
2161 		if (rdev->rlc_fw->size != rlc_req_size) {
2162 			printk(KERN_ERR
2163 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2164 			       rdev->rlc_fw->size, fw_name);
2165 			err = -EINVAL;
2166 		}
2167 	} else {
2168 		err = radeon_ucode_validate(rdev->rlc_fw);
2169 		if (err) {
2170 			printk(KERN_ERR
2171 			       "cik_fw: validation failed for firmware \"%s\"\n",
2172 			       fw_name);
2173 			goto out;
2174 		} else {
2175 			new_fw++;
2176 		}
2177 	}
2178 
2179 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2180 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2181 	if (err) {
2182 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2183 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2184 		if (err)
2185 			goto out;
2186 		if (rdev->sdma_fw->size != sdma_req_size) {
2187 			printk(KERN_ERR
2188 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2189 			       rdev->sdma_fw->size, fw_name);
2190 			err = -EINVAL;
2191 		}
2192 	} else {
2193 		err = radeon_ucode_validate(rdev->sdma_fw);
2194 		if (err) {
2195 			printk(KERN_ERR
2196 			       "cik_fw: validation failed for firmware \"%s\"\n",
2197 			       fw_name);
2198 			goto out;
2199 		} else {
2200 			new_fw++;
2201 		}
2202 	}
2203 
2204 	/* No SMC, MC ucode on APUs */
2205 	if (!(rdev->flags & RADEON_IS_IGP)) {
2206 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2207 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2208 		if (err) {
2209 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2210 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2211 			if (err) {
2212 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2213 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2214 				if (err)
2215 					goto out;
2216 			}
2217 			if ((rdev->mc_fw->size != mc_req_size) &&
2218 			    (rdev->mc_fw->size != mc2_req_size)){
2219 				printk(KERN_ERR
2220 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2221 				       rdev->mc_fw->size, fw_name);
2222 				err = -EINVAL;
2223 			}
2224 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2225 		} else {
2226 			err = radeon_ucode_validate(rdev->mc_fw);
2227 			if (err) {
2228 				printk(KERN_ERR
2229 				       "cik_fw: validation failed for firmware \"%s\"\n",
2230 				       fw_name);
2231 				goto out;
2232 			} else {
2233 				new_fw++;
2234 			}
2235 		}
2236 
2237 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2238 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2239 		if (err) {
2240 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2241 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2242 			if (err) {
2243 				printk(KERN_ERR
2244 				       "smc: error loading firmware \"%s\"\n",
2245 				       fw_name);
2246 				release_firmware(rdev->smc_fw);
2247 				rdev->smc_fw = NULL;
2248 				err = 0;
2249 			} else if (rdev->smc_fw->size != smc_req_size) {
2250 				printk(KERN_ERR
2251 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2252 				       rdev->smc_fw->size, fw_name);
2253 				err = -EINVAL;
2254 			}
2255 		} else {
2256 			err = radeon_ucode_validate(rdev->smc_fw);
2257 			if (err) {
2258 				printk(KERN_ERR
2259 				       "cik_fw: validation failed for firmware \"%s\"\n",
2260 				       fw_name);
2261 				goto out;
2262 			} else {
2263 				new_fw++;
2264 			}
2265 		}
2266 	}
2267 
2268 	if (new_fw == 0) {
2269 		rdev->new_fw = false;
2270 	} else if (new_fw < num_fw) {
2271 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2272 		err = -EINVAL;
2273 	} else {
2274 		rdev->new_fw = true;
2275 	}
2276 
2277 out:
2278 	if (err) {
2279 		if (err != -EINVAL)
2280 			printk(KERN_ERR
2281 			       "cik_cp: Failed to load firmware \"%s\"\n",
2282 			       fw_name);
2283 		release_firmware(rdev->pfp_fw);
2284 		rdev->pfp_fw = NULL;
2285 		release_firmware(rdev->me_fw);
2286 		rdev->me_fw = NULL;
2287 		release_firmware(rdev->ce_fw);
2288 		rdev->ce_fw = NULL;
2289 		release_firmware(rdev->mec_fw);
2290 		rdev->mec_fw = NULL;
2291 		release_firmware(rdev->mec2_fw);
2292 		rdev->mec2_fw = NULL;
2293 		release_firmware(rdev->rlc_fw);
2294 		rdev->rlc_fw = NULL;
2295 		release_firmware(rdev->sdma_fw);
2296 		rdev->sdma_fw = NULL;
2297 		release_firmware(rdev->mc_fw);
2298 		rdev->mc_fw = NULL;
2299 		release_firmware(rdev->smc_fw);
2300 		rdev->smc_fw = NULL;
2301 	}
2302 	return err;
2303 }
2304 
2305 /*
2306  * Core functions
2307  */
2308 /**
2309  * cik_tiling_mode_table_init - init the hw tiling table
2310  *
2311  * @rdev: radeon_device pointer
2312  *
2313  * Starting with SI, the tiling setup is done globally in a
2314  * set of 32 tiling modes.  Rather than selecting each set of
2315  * parameters per surface as on older asics, we just select
2316  * which index in the tiling table we want to use, and the
2317  * surface uses those parameters (CIK).
2318  */
cik_tiling_mode_table_init(struct radeon_device * rdev)2319 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2320 {
2321 	const u32 num_tile_mode_states = 32;
2322 	const u32 num_secondary_tile_mode_states = 16;
2323 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2324 	u32 num_pipe_configs;
2325 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2326 		rdev->config.cik.max_shader_engines;
2327 
2328 	switch (rdev->config.cik.mem_row_size_in_kb) {
2329 	case 1:
2330 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2331 		break;
2332 	case 2:
2333 	default:
2334 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2335 		break;
2336 	case 4:
2337 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2338 		break;
2339 	}
2340 
2341 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2342 	if (num_pipe_configs > 8)
2343 		num_pipe_configs = 16;
2344 
2345 	if (num_pipe_configs == 16) {
2346 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2347 			switch (reg_offset) {
2348 			case 0:
2349 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2350 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2351 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2352 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2353 				break;
2354 			case 1:
2355 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2356 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2357 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2359 				break;
2360 			case 2:
2361 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2363 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2364 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2365 				break;
2366 			case 3:
2367 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2368 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2369 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2371 				break;
2372 			case 4:
2373 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2375 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376 						 TILE_SPLIT(split_equal_to_row_size));
2377 				break;
2378 			case 5:
2379 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2380 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2382 				break;
2383 			case 6:
2384 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2385 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2386 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2388 				break;
2389 			case 7:
2390 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2391 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2392 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2393 						 TILE_SPLIT(split_equal_to_row_size));
2394 				break;
2395 			case 8:
2396 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2397 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2398 				break;
2399 			case 9:
2400 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2401 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2403 				break;
2404 			case 10:
2405 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2406 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2407 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2408 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2409 				break;
2410 			case 11:
2411 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2412 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2413 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2414 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2415 				break;
2416 			case 12:
2417 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2418 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2419 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421 				break;
2422 			case 13:
2423 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2424 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2425 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2426 				break;
2427 			case 14:
2428 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2429 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2430 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2432 				break;
2433 			case 16:
2434 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2435 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2436 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2437 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438 				break;
2439 			case 17:
2440 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2441 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2442 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2443 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444 				break;
2445 			case 27:
2446 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2447 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2448 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2449 				break;
2450 			case 28:
2451 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2452 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2453 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2454 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2455 				break;
2456 			case 29:
2457 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2458 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2459 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2460 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2461 				break;
2462 			case 30:
2463 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2464 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2465 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2466 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2467 				break;
2468 			default:
2469 				gb_tile_moden = 0;
2470 				break;
2471 			}
2472 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2473 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2474 		}
2475 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2476 			switch (reg_offset) {
2477 			case 0:
2478 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2480 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2481 						 NUM_BANKS(ADDR_SURF_16_BANK));
2482 				break;
2483 			case 1:
2484 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2486 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2487 						 NUM_BANKS(ADDR_SURF_16_BANK));
2488 				break;
2489 			case 2:
2490 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2492 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2493 						 NUM_BANKS(ADDR_SURF_16_BANK));
2494 				break;
2495 			case 3:
2496 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2499 						 NUM_BANKS(ADDR_SURF_16_BANK));
2500 				break;
2501 			case 4:
2502 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2503 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2504 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2505 						 NUM_BANKS(ADDR_SURF_8_BANK));
2506 				break;
2507 			case 5:
2508 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2510 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2511 						 NUM_BANKS(ADDR_SURF_4_BANK));
2512 				break;
2513 			case 6:
2514 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2515 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2516 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2517 						 NUM_BANKS(ADDR_SURF_2_BANK));
2518 				break;
2519 			case 8:
2520 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2523 						 NUM_BANKS(ADDR_SURF_16_BANK));
2524 				break;
2525 			case 9:
2526 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2528 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2529 						 NUM_BANKS(ADDR_SURF_16_BANK));
2530 				break;
2531 			case 10:
2532 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2534 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2535 						 NUM_BANKS(ADDR_SURF_16_BANK));
2536 				break;
2537 			case 11:
2538 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2540 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2541 						 NUM_BANKS(ADDR_SURF_8_BANK));
2542 				break;
2543 			case 12:
2544 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2545 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2546 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2547 						 NUM_BANKS(ADDR_SURF_4_BANK));
2548 				break;
2549 			case 13:
2550 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2552 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2553 						 NUM_BANKS(ADDR_SURF_2_BANK));
2554 				break;
2555 			case 14:
2556 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2558 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2559 						 NUM_BANKS(ADDR_SURF_2_BANK));
2560 				break;
2561 			default:
2562 				gb_tile_moden = 0;
2563 				break;
2564 			}
2565 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2566 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2567 		}
2568 	} else if (num_pipe_configs == 8) {
2569 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2570 			switch (reg_offset) {
2571 			case 0:
2572 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2573 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2574 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2576 				break;
2577 			case 1:
2578 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2579 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2580 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2582 				break;
2583 			case 2:
2584 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2585 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2586 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2588 				break;
2589 			case 3:
2590 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2591 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2592 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2594 				break;
2595 			case 4:
2596 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2597 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2598 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2599 						 TILE_SPLIT(split_equal_to_row_size));
2600 				break;
2601 			case 5:
2602 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2603 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2604 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2605 				break;
2606 			case 6:
2607 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2608 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2609 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2610 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2611 				break;
2612 			case 7:
2613 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2614 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2615 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2616 						 TILE_SPLIT(split_equal_to_row_size));
2617 				break;
2618 			case 8:
2619 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2620 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2621 				break;
2622 			case 9:
2623 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2624 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2625 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2626 				break;
2627 			case 10:
2628 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2629 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2630 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2631 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2632 				break;
2633 			case 11:
2634 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2635 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2636 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2637 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2638 				break;
2639 			case 12:
2640 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2641 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2642 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2643 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2644 				break;
2645 			case 13:
2646 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2647 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2648 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2649 				break;
2650 			case 14:
2651 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2652 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2653 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2654 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2655 				break;
2656 			case 16:
2657 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2658 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2659 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2660 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2661 				break;
2662 			case 17:
2663 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2664 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2665 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2666 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2667 				break;
2668 			case 27:
2669 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2670 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2671 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2672 				break;
2673 			case 28:
2674 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2675 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2676 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2677 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2678 				break;
2679 			case 29:
2680 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2681 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2682 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2683 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2684 				break;
2685 			case 30:
2686 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2687 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2688 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2689 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2690 				break;
2691 			default:
2692 				gb_tile_moden = 0;
2693 				break;
2694 			}
2695 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2696 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2697 		}
2698 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2699 			switch (reg_offset) {
2700 			case 0:
2701 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2702 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2703 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2704 						 NUM_BANKS(ADDR_SURF_16_BANK));
2705 				break;
2706 			case 1:
2707 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2708 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2709 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2710 						 NUM_BANKS(ADDR_SURF_16_BANK));
2711 				break;
2712 			case 2:
2713 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2714 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2715 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2716 						 NUM_BANKS(ADDR_SURF_16_BANK));
2717 				break;
2718 			case 3:
2719 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2720 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2721 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2722 						 NUM_BANKS(ADDR_SURF_16_BANK));
2723 				break;
2724 			case 4:
2725 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2726 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2727 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2728 						 NUM_BANKS(ADDR_SURF_8_BANK));
2729 				break;
2730 			case 5:
2731 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2732 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2733 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2734 						 NUM_BANKS(ADDR_SURF_4_BANK));
2735 				break;
2736 			case 6:
2737 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2738 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2739 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2740 						 NUM_BANKS(ADDR_SURF_2_BANK));
2741 				break;
2742 			case 8:
2743 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2745 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2746 						 NUM_BANKS(ADDR_SURF_16_BANK));
2747 				break;
2748 			case 9:
2749 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2750 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2751 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2752 						 NUM_BANKS(ADDR_SURF_16_BANK));
2753 				break;
2754 			case 10:
2755 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2757 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2758 						 NUM_BANKS(ADDR_SURF_16_BANK));
2759 				break;
2760 			case 11:
2761 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2762 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2763 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2764 						 NUM_BANKS(ADDR_SURF_16_BANK));
2765 				break;
2766 			case 12:
2767 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2769 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2770 						 NUM_BANKS(ADDR_SURF_8_BANK));
2771 				break;
2772 			case 13:
2773 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2774 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2775 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2776 						 NUM_BANKS(ADDR_SURF_4_BANK));
2777 				break;
2778 			case 14:
2779 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2780 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2781 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2782 						 NUM_BANKS(ADDR_SURF_2_BANK));
2783 				break;
2784 			default:
2785 				gb_tile_moden = 0;
2786 				break;
2787 			}
2788 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2789 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2790 		}
2791 	} else if (num_pipe_configs == 4) {
2792 		if (num_rbs == 4) {
2793 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2794 				switch (reg_offset) {
2795 				case 0:
2796 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2797 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2798 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2799 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2800 					break;
2801 				case 1:
2802 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2803 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2804 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2806 					break;
2807 				case 2:
2808 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2810 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2811 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2812 					break;
2813 				case 3:
2814 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2815 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2816 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2818 					break;
2819 				case 4:
2820 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2821 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2822 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2823 							 TILE_SPLIT(split_equal_to_row_size));
2824 					break;
2825 				case 5:
2826 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2827 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2828 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2829 					break;
2830 				case 6:
2831 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2832 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2833 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2834 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2835 					break;
2836 				case 7:
2837 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2838 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2839 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2840 							 TILE_SPLIT(split_equal_to_row_size));
2841 					break;
2842 				case 8:
2843 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2844 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2845 					break;
2846 				case 9:
2847 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2848 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2849 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2850 					break;
2851 				case 10:
2852 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2853 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2854 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2855 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2856 					break;
2857 				case 11:
2858 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2859 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2860 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2861 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2862 					break;
2863 				case 12:
2864 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2865 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2866 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2867 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2868 					break;
2869 				case 13:
2870 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2871 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2872 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2873 					break;
2874 				case 14:
2875 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2876 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2877 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2878 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2879 					break;
2880 				case 16:
2881 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2882 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2883 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2884 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2885 					break;
2886 				case 17:
2887 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2888 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2889 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2890 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2891 					break;
2892 				case 27:
2893 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2894 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2895 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2896 					break;
2897 				case 28:
2898 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2899 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2900 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2901 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2902 					break;
2903 				case 29:
2904 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2905 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2906 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2907 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2908 					break;
2909 				case 30:
2910 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2911 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2912 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2913 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2914 					break;
2915 				default:
2916 					gb_tile_moden = 0;
2917 					break;
2918 				}
2919 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2920 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2921 			}
2922 		} else if (num_rbs < 4) {
2923 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2924 				switch (reg_offset) {
2925 				case 0:
2926 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2927 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2928 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2929 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2930 					break;
2931 				case 1:
2932 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2933 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2934 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2935 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2936 					break;
2937 				case 2:
2938 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2939 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2940 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2941 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2942 					break;
2943 				case 3:
2944 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2945 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2946 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2947 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2948 					break;
2949 				case 4:
2950 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2951 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2952 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2953 							 TILE_SPLIT(split_equal_to_row_size));
2954 					break;
2955 				case 5:
2956 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2957 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2958 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2959 					break;
2960 				case 6:
2961 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2962 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2963 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2964 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2965 					break;
2966 				case 7:
2967 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2968 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2969 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2970 							 TILE_SPLIT(split_equal_to_row_size));
2971 					break;
2972 				case 8:
2973 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2974 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2975 					break;
2976 				case 9:
2977 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2978 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2979 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2980 					break;
2981 				case 10:
2982 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2983 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2984 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2985 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2986 					break;
2987 				case 11:
2988 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2989 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2990 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2991 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2992 					break;
2993 				case 12:
2994 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2995 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2996 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2997 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2998 					break;
2999 				case 13:
3000 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3001 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3002 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3003 					break;
3004 				case 14:
3005 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3006 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3007 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3008 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3009 					break;
3010 				case 16:
3011 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3012 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3013 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3014 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3015 					break;
3016 				case 17:
3017 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3018 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3019 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3020 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3021 					break;
3022 				case 27:
3023 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3024 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3025 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
3026 					break;
3027 				case 28:
3028 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3029 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3030 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3031 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3032 					break;
3033 				case 29:
3034 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3035 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3036 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3037 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3038 					break;
3039 				case 30:
3040 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3041 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3042 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3043 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3044 					break;
3045 				default:
3046 					gb_tile_moden = 0;
3047 					break;
3048 				}
3049 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3050 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3051 			}
3052 		}
3053 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3054 			switch (reg_offset) {
3055 			case 0:
3056 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3057 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3058 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3059 						 NUM_BANKS(ADDR_SURF_16_BANK));
3060 				break;
3061 			case 1:
3062 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3063 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3064 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3065 						 NUM_BANKS(ADDR_SURF_16_BANK));
3066 				break;
3067 			case 2:
3068 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3069 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3070 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3071 						 NUM_BANKS(ADDR_SURF_16_BANK));
3072 				break;
3073 			case 3:
3074 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3075 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3076 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3077 						 NUM_BANKS(ADDR_SURF_16_BANK));
3078 				break;
3079 			case 4:
3080 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3081 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3082 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3083 						 NUM_BANKS(ADDR_SURF_16_BANK));
3084 				break;
3085 			case 5:
3086 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3087 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3088 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3089 						 NUM_BANKS(ADDR_SURF_8_BANK));
3090 				break;
3091 			case 6:
3092 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3093 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3094 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3095 						 NUM_BANKS(ADDR_SURF_4_BANK));
3096 				break;
3097 			case 8:
3098 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3099 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3100 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3101 						 NUM_BANKS(ADDR_SURF_16_BANK));
3102 				break;
3103 			case 9:
3104 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3105 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3106 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3107 						 NUM_BANKS(ADDR_SURF_16_BANK));
3108 				break;
3109 			case 10:
3110 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3111 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3112 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3113 						 NUM_BANKS(ADDR_SURF_16_BANK));
3114 				break;
3115 			case 11:
3116 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3117 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3118 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3119 						 NUM_BANKS(ADDR_SURF_16_BANK));
3120 				break;
3121 			case 12:
3122 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3123 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3124 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3125 						 NUM_BANKS(ADDR_SURF_16_BANK));
3126 				break;
3127 			case 13:
3128 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3129 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3130 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3131 						 NUM_BANKS(ADDR_SURF_8_BANK));
3132 				break;
3133 			case 14:
3134 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3135 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3136 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3137 						 NUM_BANKS(ADDR_SURF_4_BANK));
3138 				break;
3139 			default:
3140 				gb_tile_moden = 0;
3141 				break;
3142 			}
3143 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3144 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3145 		}
3146 	} else if (num_pipe_configs == 2) {
3147 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3148 			switch (reg_offset) {
3149 			case 0:
3150 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3151 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3152 						 PIPE_CONFIG(ADDR_SURF_P2) |
3153 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3154 				break;
3155 			case 1:
3156 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3157 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3158 						 PIPE_CONFIG(ADDR_SURF_P2) |
3159 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3160 				break;
3161 			case 2:
3162 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3163 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3164 						 PIPE_CONFIG(ADDR_SURF_P2) |
3165 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3166 				break;
3167 			case 3:
3168 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3169 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3170 						 PIPE_CONFIG(ADDR_SURF_P2) |
3171 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3172 				break;
3173 			case 4:
3174 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3175 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3176 						 PIPE_CONFIG(ADDR_SURF_P2) |
3177 						 TILE_SPLIT(split_equal_to_row_size));
3178 				break;
3179 			case 5:
3180 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3181 						 PIPE_CONFIG(ADDR_SURF_P2) |
3182 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3183 				break;
3184 			case 6:
3185 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3186 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3187 						 PIPE_CONFIG(ADDR_SURF_P2) |
3188 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3189 				break;
3190 			case 7:
3191 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3192 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3193 						 PIPE_CONFIG(ADDR_SURF_P2) |
3194 						 TILE_SPLIT(split_equal_to_row_size));
3195 				break;
3196 			case 8:
3197 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3198 						PIPE_CONFIG(ADDR_SURF_P2);
3199 				break;
3200 			case 9:
3201 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3202 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3203 						 PIPE_CONFIG(ADDR_SURF_P2));
3204 				break;
3205 			case 10:
3206 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3207 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3208 						 PIPE_CONFIG(ADDR_SURF_P2) |
3209 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3210 				break;
3211 			case 11:
3212 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3213 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3214 						 PIPE_CONFIG(ADDR_SURF_P2) |
3215 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3216 				break;
3217 			case 12:
3218 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3219 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3220 						 PIPE_CONFIG(ADDR_SURF_P2) |
3221 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3222 				break;
3223 			case 13:
3224 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3225 						 PIPE_CONFIG(ADDR_SURF_P2) |
3226 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3227 				break;
3228 			case 14:
3229 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3230 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3231 						 PIPE_CONFIG(ADDR_SURF_P2) |
3232 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3233 				break;
3234 			case 16:
3235 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3236 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3237 						 PIPE_CONFIG(ADDR_SURF_P2) |
3238 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3239 				break;
3240 			case 17:
3241 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3242 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3243 						 PIPE_CONFIG(ADDR_SURF_P2) |
3244 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3245 				break;
3246 			case 27:
3247 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3248 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3249 						 PIPE_CONFIG(ADDR_SURF_P2));
3250 				break;
3251 			case 28:
3252 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3253 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3254 						 PIPE_CONFIG(ADDR_SURF_P2) |
3255 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3256 				break;
3257 			case 29:
3258 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3259 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3260 						 PIPE_CONFIG(ADDR_SURF_P2) |
3261 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3262 				break;
3263 			case 30:
3264 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3265 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3266 						 PIPE_CONFIG(ADDR_SURF_P2) |
3267 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3268 				break;
3269 			default:
3270 				gb_tile_moden = 0;
3271 				break;
3272 			}
3273 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3274 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3275 		}
3276 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3277 			switch (reg_offset) {
3278 			case 0:
3279 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3280 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3281 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3282 						 NUM_BANKS(ADDR_SURF_16_BANK));
3283 				break;
3284 			case 1:
3285 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3286 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3287 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3288 						 NUM_BANKS(ADDR_SURF_16_BANK));
3289 				break;
3290 			case 2:
3291 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3292 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3293 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3294 						 NUM_BANKS(ADDR_SURF_16_BANK));
3295 				break;
3296 			case 3:
3297 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3298 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3299 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3300 						 NUM_BANKS(ADDR_SURF_16_BANK));
3301 				break;
3302 			case 4:
3303 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3304 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3305 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3306 						 NUM_BANKS(ADDR_SURF_16_BANK));
3307 				break;
3308 			case 5:
3309 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3310 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3311 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3312 						 NUM_BANKS(ADDR_SURF_16_BANK));
3313 				break;
3314 			case 6:
3315 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3316 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3317 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3318 						 NUM_BANKS(ADDR_SURF_8_BANK));
3319 				break;
3320 			case 8:
3321 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3322 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3323 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3324 						 NUM_BANKS(ADDR_SURF_16_BANK));
3325 				break;
3326 			case 9:
3327 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3328 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3329 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3330 						 NUM_BANKS(ADDR_SURF_16_BANK));
3331 				break;
3332 			case 10:
3333 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3334 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3335 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3336 						 NUM_BANKS(ADDR_SURF_16_BANK));
3337 				break;
3338 			case 11:
3339 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3340 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3341 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3342 						 NUM_BANKS(ADDR_SURF_16_BANK));
3343 				break;
3344 			case 12:
3345 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3346 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3347 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3348 						 NUM_BANKS(ADDR_SURF_16_BANK));
3349 				break;
3350 			case 13:
3351 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3352 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3353 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3354 						 NUM_BANKS(ADDR_SURF_16_BANK));
3355 				break;
3356 			case 14:
3357 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3358 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3359 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3360 						 NUM_BANKS(ADDR_SURF_8_BANK));
3361 				break;
3362 			default:
3363 				gb_tile_moden = 0;
3364 				break;
3365 			}
3366 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3367 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3368 		}
3369 	} else
3370 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3371 }
3372 
3373 /**
3374  * cik_select_se_sh - select which SE, SH to address
3375  *
3376  * @rdev: radeon_device pointer
3377  * @se_num: shader engine to address
3378  * @sh_num: sh block to address
3379  *
3380  * Select which SE, SH combinations to address. Certain
3381  * registers are instanced per SE or SH.  0xffffffff means
3382  * broadcast to all SEs or SHs (CIK).
3383  */
cik_select_se_sh(struct radeon_device * rdev,u32 se_num,u32 sh_num)3384 static void cik_select_se_sh(struct radeon_device *rdev,
3385 			     u32 se_num, u32 sh_num)
3386 {
3387 	u32 data = INSTANCE_BROADCAST_WRITES;
3388 
3389 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3390 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3391 	else if (se_num == 0xffffffff)
3392 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3393 	else if (sh_num == 0xffffffff)
3394 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3395 	else
3396 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3397 	WREG32(GRBM_GFX_INDEX, data);
3398 }
3399 
3400 /**
3401  * cik_create_bitmask - create a bitmask
3402  *
3403  * @bit_width: length of the mask
3404  *
3405  * create a variable length bit mask (CIK).
3406  * Returns the bitmask.
3407  */
cik_create_bitmask(u32 bit_width)3408 static u32 cik_create_bitmask(u32 bit_width)
3409 {
3410 	u32 i, mask = 0;
3411 
3412 	for (i = 0; i < bit_width; i++) {
3413 		mask <<= 1;
3414 		mask |= 1;
3415 	}
3416 	return mask;
3417 }
3418 
3419 /**
3420  * cik_get_rb_disabled - computes the mask of disabled RBs
3421  *
3422  * @rdev: radeon_device pointer
3423  * @max_rb_num: max RBs (render backends) for the asic
3424  * @se_num: number of SEs (shader engines) for the asic
3425  * @sh_per_se: number of SH blocks per SE for the asic
3426  *
3427  * Calculates the bitmask of disabled RBs (CIK).
3428  * Returns the disabled RB bitmask.
3429  */
cik_get_rb_disabled(struct radeon_device * rdev,u32 max_rb_num_per_se,u32 sh_per_se)3430 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3431 			      u32 max_rb_num_per_se,
3432 			      u32 sh_per_se)
3433 {
3434 	u32 data, mask;
3435 
3436 	data = RREG32(CC_RB_BACKEND_DISABLE);
3437 	if (data & 1)
3438 		data &= BACKEND_DISABLE_MASK;
3439 	else
3440 		data = 0;
3441 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3442 
3443 	data >>= BACKEND_DISABLE_SHIFT;
3444 
3445 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3446 
3447 	return data & mask;
3448 }
3449 
3450 /**
3451  * cik_setup_rb - setup the RBs on the asic
3452  *
3453  * @rdev: radeon_device pointer
3454  * @se_num: number of SEs (shader engines) for the asic
3455  * @sh_per_se: number of SH blocks per SE for the asic
3456  * @max_rb_num: max RBs (render backends) for the asic
3457  *
3458  * Configures per-SE/SH RB registers (CIK).
3459  */
cik_setup_rb(struct radeon_device * rdev,u32 se_num,u32 sh_per_se,u32 max_rb_num_per_se)3460 static void cik_setup_rb(struct radeon_device *rdev,
3461 			 u32 se_num, u32 sh_per_se,
3462 			 u32 max_rb_num_per_se)
3463 {
3464 	int i, j;
3465 	u32 data, mask;
3466 	u32 disabled_rbs = 0;
3467 	u32 enabled_rbs = 0;
3468 
3469 	mutex_lock(&rdev->grbm_idx_mutex);
3470 	for (i = 0; i < se_num; i++) {
3471 		for (j = 0; j < sh_per_se; j++) {
3472 			cik_select_se_sh(rdev, i, j);
3473 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3474 			if (rdev->family == CHIP_HAWAII)
3475 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3476 			else
3477 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3478 		}
3479 	}
3480 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3481 	mutex_unlock(&rdev->grbm_idx_mutex);
3482 
3483 	mask = 1;
3484 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3485 		if (!(disabled_rbs & mask))
3486 			enabled_rbs |= mask;
3487 		mask <<= 1;
3488 	}
3489 
3490 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3491 
3492 	mutex_lock(&rdev->grbm_idx_mutex);
3493 	for (i = 0; i < se_num; i++) {
3494 		cik_select_se_sh(rdev, i, 0xffffffff);
3495 		data = 0;
3496 		for (j = 0; j < sh_per_se; j++) {
3497 			switch (enabled_rbs & 3) {
3498 			case 0:
3499 				if (j == 0)
3500 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3501 				else
3502 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3503 				break;
3504 			case 1:
3505 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3506 				break;
3507 			case 2:
3508 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3509 				break;
3510 			case 3:
3511 			default:
3512 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3513 				break;
3514 			}
3515 			enabled_rbs >>= 2;
3516 		}
3517 		WREG32(PA_SC_RASTER_CONFIG, data);
3518 	}
3519 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3520 	mutex_unlock(&rdev->grbm_idx_mutex);
3521 }
3522 
3523 /**
3524  * cik_gpu_init - setup the 3D engine
3525  *
3526  * @rdev: radeon_device pointer
3527  *
3528  * Configures the 3D engine and tiling configuration
3529  * registers so that the 3D engine is usable.
3530  */
cik_gpu_init(struct radeon_device * rdev)3531 static void cik_gpu_init(struct radeon_device *rdev)
3532 {
3533 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3534 	u32 mc_shared_chmap, mc_arb_ramcfg;
3535 	u32 hdp_host_path_cntl;
3536 	u32 tmp;
3537 	int i, j;
3538 
3539 	switch (rdev->family) {
3540 	case CHIP_BONAIRE:
3541 		rdev->config.cik.max_shader_engines = 2;
3542 		rdev->config.cik.max_tile_pipes = 4;
3543 		rdev->config.cik.max_cu_per_sh = 7;
3544 		rdev->config.cik.max_sh_per_se = 1;
3545 		rdev->config.cik.max_backends_per_se = 2;
3546 		rdev->config.cik.max_texture_channel_caches = 4;
3547 		rdev->config.cik.max_gprs = 256;
3548 		rdev->config.cik.max_gs_threads = 32;
3549 		rdev->config.cik.max_hw_contexts = 8;
3550 
3551 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3552 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3553 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3554 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3555 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3556 		break;
3557 	case CHIP_HAWAII:
3558 		rdev->config.cik.max_shader_engines = 4;
3559 		rdev->config.cik.max_tile_pipes = 16;
3560 		rdev->config.cik.max_cu_per_sh = 11;
3561 		rdev->config.cik.max_sh_per_se = 1;
3562 		rdev->config.cik.max_backends_per_se = 4;
3563 		rdev->config.cik.max_texture_channel_caches = 16;
3564 		rdev->config.cik.max_gprs = 256;
3565 		rdev->config.cik.max_gs_threads = 32;
3566 		rdev->config.cik.max_hw_contexts = 8;
3567 
3568 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3569 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3570 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3571 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3572 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3573 		break;
3574 	case CHIP_KAVERI:
3575 		rdev->config.cik.max_shader_engines = 1;
3576 		rdev->config.cik.max_tile_pipes = 4;
3577 		if ((rdev->pdev->device == 0x1304) ||
3578 		    (rdev->pdev->device == 0x1305) ||
3579 		    (rdev->pdev->device == 0x130C) ||
3580 		    (rdev->pdev->device == 0x130F) ||
3581 		    (rdev->pdev->device == 0x1310) ||
3582 		    (rdev->pdev->device == 0x1311) ||
3583 		    (rdev->pdev->device == 0x131C)) {
3584 			rdev->config.cik.max_cu_per_sh = 8;
3585 			rdev->config.cik.max_backends_per_se = 2;
3586 		} else if ((rdev->pdev->device == 0x1309) ||
3587 			   (rdev->pdev->device == 0x130A) ||
3588 			   (rdev->pdev->device == 0x130D) ||
3589 			   (rdev->pdev->device == 0x1313) ||
3590 			   (rdev->pdev->device == 0x131D)) {
3591 			rdev->config.cik.max_cu_per_sh = 6;
3592 			rdev->config.cik.max_backends_per_se = 2;
3593 		} else if ((rdev->pdev->device == 0x1306) ||
3594 			   (rdev->pdev->device == 0x1307) ||
3595 			   (rdev->pdev->device == 0x130B) ||
3596 			   (rdev->pdev->device == 0x130E) ||
3597 			   (rdev->pdev->device == 0x1315) ||
3598 			   (rdev->pdev->device == 0x1318) ||
3599 			   (rdev->pdev->device == 0x131B)) {
3600 			rdev->config.cik.max_cu_per_sh = 4;
3601 			rdev->config.cik.max_backends_per_se = 1;
3602 		} else {
3603 			rdev->config.cik.max_cu_per_sh = 3;
3604 			rdev->config.cik.max_backends_per_se = 1;
3605 		}
3606 		rdev->config.cik.max_sh_per_se = 1;
3607 		rdev->config.cik.max_texture_channel_caches = 4;
3608 		rdev->config.cik.max_gprs = 256;
3609 		rdev->config.cik.max_gs_threads = 16;
3610 		rdev->config.cik.max_hw_contexts = 8;
3611 
3612 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3613 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3614 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3615 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3616 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3617 		break;
3618 	case CHIP_KABINI:
3619 	case CHIP_MULLINS:
3620 	default:
3621 		rdev->config.cik.max_shader_engines = 1;
3622 		rdev->config.cik.max_tile_pipes = 2;
3623 		rdev->config.cik.max_cu_per_sh = 2;
3624 		rdev->config.cik.max_sh_per_se = 1;
3625 		rdev->config.cik.max_backends_per_se = 1;
3626 		rdev->config.cik.max_texture_channel_caches = 2;
3627 		rdev->config.cik.max_gprs = 256;
3628 		rdev->config.cik.max_gs_threads = 16;
3629 		rdev->config.cik.max_hw_contexts = 8;
3630 
3631 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3632 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3633 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3634 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3635 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3636 		break;
3637 	}
3638 
3639 	/* Initialize HDP */
3640 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3641 		WREG32((0x2c14 + j), 0x00000000);
3642 		WREG32((0x2c18 + j), 0x00000000);
3643 		WREG32((0x2c1c + j), 0x00000000);
3644 		WREG32((0x2c20 + j), 0x00000000);
3645 		WREG32((0x2c24 + j), 0x00000000);
3646 	}
3647 
3648 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3649 	WREG32(SRBM_INT_CNTL, 0x1);
3650 	WREG32(SRBM_INT_ACK, 0x1);
3651 
3652 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3653 
3654 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3655 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3656 
3657 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3658 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3659 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3660 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3661 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3662 		rdev->config.cik.mem_row_size_in_kb = 4;
3663 	/* XXX use MC settings? */
3664 	rdev->config.cik.shader_engine_tile_size = 32;
3665 	rdev->config.cik.num_gpus = 1;
3666 	rdev->config.cik.multi_gpu_tile_size = 64;
3667 
3668 	/* fix up row size */
3669 	gb_addr_config &= ~ROW_SIZE_MASK;
3670 	switch (rdev->config.cik.mem_row_size_in_kb) {
3671 	case 1:
3672 	default:
3673 		gb_addr_config |= ROW_SIZE(0);
3674 		break;
3675 	case 2:
3676 		gb_addr_config |= ROW_SIZE(1);
3677 		break;
3678 	case 4:
3679 		gb_addr_config |= ROW_SIZE(2);
3680 		break;
3681 	}
3682 
3683 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3684 	 * not have bank info, so create a custom tiling dword.
3685 	 * bits 3:0   num_pipes
3686 	 * bits 7:4   num_banks
3687 	 * bits 11:8  group_size
3688 	 * bits 15:12 row_size
3689 	 */
3690 	rdev->config.cik.tile_config = 0;
3691 	switch (rdev->config.cik.num_tile_pipes) {
3692 	case 1:
3693 		rdev->config.cik.tile_config |= (0 << 0);
3694 		break;
3695 	case 2:
3696 		rdev->config.cik.tile_config |= (1 << 0);
3697 		break;
3698 	case 4:
3699 		rdev->config.cik.tile_config |= (2 << 0);
3700 		break;
3701 	case 8:
3702 	default:
3703 		/* XXX what about 12? */
3704 		rdev->config.cik.tile_config |= (3 << 0);
3705 		break;
3706 	}
3707 	rdev->config.cik.tile_config |=
3708 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3709 	rdev->config.cik.tile_config |=
3710 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3711 	rdev->config.cik.tile_config |=
3712 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3713 
3714 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3715 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3716 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3717 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3718 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3719 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3720 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3721 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3722 
3723 	cik_tiling_mode_table_init(rdev);
3724 
3725 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3726 		     rdev->config.cik.max_sh_per_se,
3727 		     rdev->config.cik.max_backends_per_se);
3728 
3729 	rdev->config.cik.active_cus = 0;
3730 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3731 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3732 			rdev->config.cik.active_cus +=
3733 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3734 		}
3735 	}
3736 
3737 	/* set HW defaults for 3D engine */
3738 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3739 
3740 	mutex_lock(&rdev->grbm_idx_mutex);
3741 	/*
3742 	 * making sure that the following register writes will be broadcasted
3743 	 * to all the shaders
3744 	 */
3745 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3746 	WREG32(SX_DEBUG_1, 0x20);
3747 
3748 	WREG32(TA_CNTL_AUX, 0x00010000);
3749 
3750 	tmp = RREG32(SPI_CONFIG_CNTL);
3751 	tmp |= 0x03000000;
3752 	WREG32(SPI_CONFIG_CNTL, tmp);
3753 
3754 	WREG32(SQ_CONFIG, 1);
3755 
3756 	WREG32(DB_DEBUG, 0);
3757 
3758 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3759 	tmp |= 0x00000400;
3760 	WREG32(DB_DEBUG2, tmp);
3761 
3762 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3763 	tmp |= 0x00020200;
3764 	WREG32(DB_DEBUG3, tmp);
3765 
3766 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3767 	tmp |= 0x00018208;
3768 	WREG32(CB_HW_CONTROL, tmp);
3769 
3770 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3771 
3772 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3773 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3774 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3775 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3776 
3777 	WREG32(VGT_NUM_INSTANCES, 1);
3778 
3779 	WREG32(CP_PERFMON_CNTL, 0);
3780 
3781 	WREG32(SQ_CONFIG, 0);
3782 
3783 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3784 					  FORCE_EOV_MAX_REZ_CNT(255)));
3785 
3786 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3787 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3788 
3789 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3790 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3791 
3792 	tmp = RREG32(HDP_MISC_CNTL);
3793 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3794 	WREG32(HDP_MISC_CNTL, tmp);
3795 
3796 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3797 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3798 
3799 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3800 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3801 	mutex_unlock(&rdev->grbm_idx_mutex);
3802 
3803 	udelay(50);
3804 }
3805 
3806 /*
3807  * GPU scratch registers helpers function.
3808  */
3809 /**
3810  * cik_scratch_init - setup driver info for CP scratch regs
3811  *
3812  * @rdev: radeon_device pointer
3813  *
3814  * Set up the number and offset of the CP scratch registers.
3815  * NOTE: use of CP scratch registers is a legacy inferface and
3816  * is not used by default on newer asics (r6xx+).  On newer asics,
3817  * memory buffers are used for fences rather than scratch regs.
3818  */
cik_scratch_init(struct radeon_device * rdev)3819 static void cik_scratch_init(struct radeon_device *rdev)
3820 {
3821 	int i;
3822 
3823 	rdev->scratch.num_reg = 7;
3824 	rdev->scratch.reg_base = SCRATCH_REG0;
3825 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3826 		rdev->scratch.free[i] = true;
3827 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3828 	}
3829 }
3830 
3831 /**
3832  * cik_ring_test - basic gfx ring test
3833  *
3834  * @rdev: radeon_device pointer
3835  * @ring: radeon_ring structure holding ring information
3836  *
3837  * Allocate a scratch register and write to it using the gfx ring (CIK).
3838  * Provides a basic gfx ring test to verify that the ring is working.
3839  * Used by cik_cp_gfx_resume();
3840  * Returns 0 on success, error on failure.
3841  */
cik_ring_test(struct radeon_device * rdev,struct radeon_ring * ring)3842 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3843 {
3844 	uint32_t scratch;
3845 	uint32_t tmp = 0;
3846 	unsigned i;
3847 	int r;
3848 
3849 	r = radeon_scratch_get(rdev, &scratch);
3850 	if (r) {
3851 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3852 		return r;
3853 	}
3854 	WREG32(scratch, 0xCAFEDEAD);
3855 	r = radeon_ring_lock(rdev, ring, 3);
3856 	if (r) {
3857 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3858 		radeon_scratch_free(rdev, scratch);
3859 		return r;
3860 	}
3861 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3862 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3863 	radeon_ring_write(ring, 0xDEADBEEF);
3864 	radeon_ring_unlock_commit(rdev, ring, false);
3865 
3866 	for (i = 0; i < rdev->usec_timeout; i++) {
3867 		tmp = RREG32(scratch);
3868 		if (tmp == 0xDEADBEEF)
3869 			break;
3870 		DRM_UDELAY(1);
3871 	}
3872 	if (i < rdev->usec_timeout) {
3873 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3874 	} else {
3875 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3876 			  ring->idx, scratch, tmp);
3877 		r = -EINVAL;
3878 	}
3879 	radeon_scratch_free(rdev, scratch);
3880 	return r;
3881 }
3882 
3883 /**
3884  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3885  *
3886  * @rdev: radeon_device pointer
3887  * @ridx: radeon ring index
3888  *
3889  * Emits an hdp flush on the cp.
3890  */
cik_hdp_flush_cp_ring_emit(struct radeon_device * rdev,int ridx)3891 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3892 				       int ridx)
3893 {
3894 	struct radeon_ring *ring = &rdev->ring[ridx];
3895 	u32 ref_and_mask;
3896 
3897 	switch (ring->idx) {
3898 	case CAYMAN_RING_TYPE_CP1_INDEX:
3899 	case CAYMAN_RING_TYPE_CP2_INDEX:
3900 	default:
3901 		switch (ring->me) {
3902 		case 0:
3903 			ref_and_mask = CP2 << ring->pipe;
3904 			break;
3905 		case 1:
3906 			ref_and_mask = CP6 << ring->pipe;
3907 			break;
3908 		default:
3909 			return;
3910 		}
3911 		break;
3912 	case RADEON_RING_TYPE_GFX_INDEX:
3913 		ref_and_mask = CP0;
3914 		break;
3915 	}
3916 
3917 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3918 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3919 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3920 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3921 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3922 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3923 	radeon_ring_write(ring, ref_and_mask);
3924 	radeon_ring_write(ring, ref_and_mask);
3925 	radeon_ring_write(ring, 0x20); /* poll interval */
3926 }
3927 
3928 /**
3929  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3930  *
3931  * @rdev: radeon_device pointer
3932  * @fence: radeon fence object
3933  *
3934  * Emits a fence sequnce number on the gfx ring and flushes
3935  * GPU caches.
3936  */
cik_fence_gfx_ring_emit(struct radeon_device * rdev,struct radeon_fence * fence)3937 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3938 			     struct radeon_fence *fence)
3939 {
3940 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3941 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3942 
3943 	/* Workaround for cache flush problems. First send a dummy EOP
3944 	 * event down the pipe with seq one below.
3945 	 */
3946 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3947 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3948 				 EOP_TC_ACTION_EN |
3949 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3950 				 EVENT_INDEX(5)));
3951 	radeon_ring_write(ring, addr & 0xfffffffc);
3952 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3953 				DATA_SEL(1) | INT_SEL(0));
3954 	radeon_ring_write(ring, fence->seq - 1);
3955 	radeon_ring_write(ring, 0);
3956 
3957 	/* Then send the real EOP event down the pipe. */
3958 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3959 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3960 				 EOP_TC_ACTION_EN |
3961 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3962 				 EVENT_INDEX(5)));
3963 	radeon_ring_write(ring, addr & 0xfffffffc);
3964 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3965 	radeon_ring_write(ring, fence->seq);
3966 	radeon_ring_write(ring, 0);
3967 }
3968 
3969 /**
3970  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3971  *
3972  * @rdev: radeon_device pointer
3973  * @fence: radeon fence object
3974  *
3975  * Emits a fence sequnce number on the compute ring and flushes
3976  * GPU caches.
3977  */
cik_fence_compute_ring_emit(struct radeon_device * rdev,struct radeon_fence * fence)3978 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3979 				 struct radeon_fence *fence)
3980 {
3981 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3982 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3983 
3984 	/* RELEASE_MEM - flush caches, send int */
3985 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3986 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3987 				 EOP_TC_ACTION_EN |
3988 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3989 				 EVENT_INDEX(5)));
3990 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3991 	radeon_ring_write(ring, addr & 0xfffffffc);
3992 	radeon_ring_write(ring, upper_32_bits(addr));
3993 	radeon_ring_write(ring, fence->seq);
3994 	radeon_ring_write(ring, 0);
3995 }
3996 
3997 /**
3998  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3999  *
4000  * @rdev: radeon_device pointer
4001  * @ring: radeon ring buffer object
4002  * @semaphore: radeon semaphore object
4003  * @emit_wait: Is this a sempahore wait?
4004  *
4005  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4006  * from running ahead of semaphore waits.
4007  */
cik_semaphore_ring_emit(struct radeon_device * rdev,struct radeon_ring * ring,struct radeon_semaphore * semaphore,bool emit_wait)4008 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
4009 			     struct radeon_ring *ring,
4010 			     struct radeon_semaphore *semaphore,
4011 			     bool emit_wait)
4012 {
4013 	uint64_t addr = semaphore->gpu_addr;
4014 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4015 
4016 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
4017 	radeon_ring_write(ring, lower_32_bits(addr));
4018 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
4019 
4020 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
4021 		/* Prevent the PFP from running ahead of the semaphore wait */
4022 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4023 		radeon_ring_write(ring, 0x0);
4024 	}
4025 
4026 	return true;
4027 }
4028 
4029 /**
4030  * cik_copy_cpdma - copy pages using the CP DMA engine
4031  *
4032  * @rdev: radeon_device pointer
4033  * @src_offset: src GPU address
4034  * @dst_offset: dst GPU address
4035  * @num_gpu_pages: number of GPU pages to xfer
4036  * @resv: reservation object to sync to
4037  *
4038  * Copy GPU paging using the CP DMA engine (CIK+).
4039  * Used by the radeon ttm implementation to move pages if
4040  * registered as the asic copy callback.
4041  */
cik_copy_cpdma(struct radeon_device * rdev,uint64_t src_offset,uint64_t dst_offset,unsigned num_gpu_pages,struct reservation_object * resv)4042 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
4043 				    uint64_t src_offset, uint64_t dst_offset,
4044 				    unsigned num_gpu_pages,
4045 				    struct reservation_object *resv)
4046 {
4047 	struct radeon_fence *fence;
4048 	struct radeon_sync sync;
4049 	int ring_index = rdev->asic->copy.blit_ring_index;
4050 	struct radeon_ring *ring = &rdev->ring[ring_index];
4051 	u32 size_in_bytes, cur_size_in_bytes, control;
4052 	int i, num_loops;
4053 	int r = 0;
4054 
4055 	radeon_sync_create(&sync);
4056 
4057 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4058 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4059 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4060 	if (r) {
4061 		DRM_ERROR("radeon: moving bo (%d).\n", r);
4062 		radeon_sync_free(rdev, &sync, NULL);
4063 		return ERR_PTR(r);
4064 	}
4065 
4066 	radeon_sync_resv(rdev, &sync, resv, false);
4067 	radeon_sync_rings(rdev, &sync, ring->idx);
4068 
4069 	for (i = 0; i < num_loops; i++) {
4070 		cur_size_in_bytes = size_in_bytes;
4071 		if (cur_size_in_bytes > 0x1fffff)
4072 			cur_size_in_bytes = 0x1fffff;
4073 		size_in_bytes -= cur_size_in_bytes;
4074 		control = 0;
4075 		if (size_in_bytes == 0)
4076 			control |= PACKET3_DMA_DATA_CP_SYNC;
4077 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4078 		radeon_ring_write(ring, control);
4079 		radeon_ring_write(ring, lower_32_bits(src_offset));
4080 		radeon_ring_write(ring, upper_32_bits(src_offset));
4081 		radeon_ring_write(ring, lower_32_bits(dst_offset));
4082 		radeon_ring_write(ring, upper_32_bits(dst_offset));
4083 		radeon_ring_write(ring, cur_size_in_bytes);
4084 		src_offset += cur_size_in_bytes;
4085 		dst_offset += cur_size_in_bytes;
4086 	}
4087 
4088 	r = radeon_fence_emit(rdev, &fence, ring->idx);
4089 	if (r) {
4090 		radeon_ring_unlock_undo(rdev, ring);
4091 		radeon_sync_free(rdev, &sync, NULL);
4092 		return ERR_PTR(r);
4093 	}
4094 
4095 	radeon_ring_unlock_commit(rdev, ring, false);
4096 	radeon_sync_free(rdev, &sync, fence);
4097 
4098 	return fence;
4099 }
4100 
4101 /*
4102  * IB stuff
4103  */
4104 /**
4105  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4106  *
4107  * @rdev: radeon_device pointer
4108  * @ib: radeon indirect buffer object
4109  *
4110  * Emits an DE (drawing engine) or CE (constant engine) IB
4111  * on the gfx ring.  IBs are usually generated by userspace
4112  * acceleration drivers and submitted to the kernel for
4113  * sheduling on the ring.  This function schedules the IB
4114  * on the gfx ring for execution by the GPU.
4115  */
cik_ring_ib_execute(struct radeon_device * rdev,struct radeon_ib * ib)4116 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4117 {
4118 	struct radeon_ring *ring = &rdev->ring[ib->ring];
4119 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4120 	u32 header, control = INDIRECT_BUFFER_VALID;
4121 
4122 	if (ib->is_const_ib) {
4123 		/* set switch buffer packet before const IB */
4124 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4125 		radeon_ring_write(ring, 0);
4126 
4127 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4128 	} else {
4129 		u32 next_rptr;
4130 		if (ring->rptr_save_reg) {
4131 			next_rptr = ring->wptr + 3 + 4;
4132 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4133 			radeon_ring_write(ring, ((ring->rptr_save_reg -
4134 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
4135 			radeon_ring_write(ring, next_rptr);
4136 		} else if (rdev->wb.enabled) {
4137 			next_rptr = ring->wptr + 5 + 4;
4138 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4139 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4140 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4141 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4142 			radeon_ring_write(ring, next_rptr);
4143 		}
4144 
4145 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4146 	}
4147 
4148 	control |= ib->length_dw | (vm_id << 24);
4149 
4150 	radeon_ring_write(ring, header);
4151 	radeon_ring_write(ring,
4152 #ifdef __BIG_ENDIAN
4153 			  (2 << 0) |
4154 #endif
4155 			  (ib->gpu_addr & 0xFFFFFFFC));
4156 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4157 	radeon_ring_write(ring, control);
4158 }
4159 
4160 /**
4161  * cik_ib_test - basic gfx ring IB test
4162  *
4163  * @rdev: radeon_device pointer
4164  * @ring: radeon_ring structure holding ring information
4165  *
4166  * Allocate an IB and execute it on the gfx ring (CIK).
4167  * Provides a basic gfx ring test to verify that IBs are working.
4168  * Returns 0 on success, error on failure.
4169  */
cik_ib_test(struct radeon_device * rdev,struct radeon_ring * ring)4170 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4171 {
4172 	struct radeon_ib ib;
4173 	uint32_t scratch;
4174 	uint32_t tmp = 0;
4175 	unsigned i;
4176 	int r;
4177 
4178 	r = radeon_scratch_get(rdev, &scratch);
4179 	if (r) {
4180 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4181 		return r;
4182 	}
4183 	WREG32(scratch, 0xCAFEDEAD);
4184 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4185 	if (r) {
4186 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4187 		radeon_scratch_free(rdev, scratch);
4188 		return r;
4189 	}
4190 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4191 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4192 	ib.ptr[2] = 0xDEADBEEF;
4193 	ib.length_dw = 3;
4194 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
4195 	if (r) {
4196 		radeon_scratch_free(rdev, scratch);
4197 		radeon_ib_free(rdev, &ib);
4198 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4199 		return r;
4200 	}
4201 	r = radeon_fence_wait(ib.fence, false);
4202 	if (r) {
4203 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4204 		radeon_scratch_free(rdev, scratch);
4205 		radeon_ib_free(rdev, &ib);
4206 		return r;
4207 	}
4208 	for (i = 0; i < rdev->usec_timeout; i++) {
4209 		tmp = RREG32(scratch);
4210 		if (tmp == 0xDEADBEEF)
4211 			break;
4212 		DRM_UDELAY(1);
4213 	}
4214 	if (i < rdev->usec_timeout) {
4215 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4216 	} else {
4217 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4218 			  scratch, tmp);
4219 		r = -EINVAL;
4220 	}
4221 	radeon_scratch_free(rdev, scratch);
4222 	radeon_ib_free(rdev, &ib);
4223 	return r;
4224 }
4225 
4226 /*
4227  * CP.
4228  * On CIK, gfx and compute now have independant command processors.
4229  *
4230  * GFX
4231  * Gfx consists of a single ring and can process both gfx jobs and
4232  * compute jobs.  The gfx CP consists of three microengines (ME):
4233  * PFP - Pre-Fetch Parser
4234  * ME - Micro Engine
4235  * CE - Constant Engine
4236  * The PFP and ME make up what is considered the Drawing Engine (DE).
4237  * The CE is an asynchronous engine used for updating buffer desciptors
4238  * used by the DE so that they can be loaded into cache in parallel
4239  * while the DE is processing state update packets.
4240  *
4241  * Compute
4242  * The compute CP consists of two microengines (ME):
4243  * MEC1 - Compute MicroEngine 1
4244  * MEC2 - Compute MicroEngine 2
4245  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4246  * The queues are exposed to userspace and are programmed directly
4247  * by the compute runtime.
4248  */
4249 /**
4250  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4251  *
4252  * @rdev: radeon_device pointer
4253  * @enable: enable or disable the MEs
4254  *
4255  * Halts or unhalts the gfx MEs.
4256  */
cik_cp_gfx_enable(struct radeon_device * rdev,bool enable)4257 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4258 {
4259 	if (enable)
4260 		WREG32(CP_ME_CNTL, 0);
4261 	else {
4262 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4263 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4264 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4265 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4266 	}
4267 	udelay(50);
4268 }
4269 
4270 /**
4271  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4272  *
4273  * @rdev: radeon_device pointer
4274  *
4275  * Loads the gfx PFP, ME, and CE ucode.
4276  * Returns 0 for success, -EINVAL if the ucode is not available.
4277  */
cik_cp_gfx_load_microcode(struct radeon_device * rdev)4278 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4279 {
4280 	int i;
4281 
4282 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4283 		return -EINVAL;
4284 
4285 	cik_cp_gfx_enable(rdev, false);
4286 
4287 	if (rdev->new_fw) {
4288 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
4289 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4290 		const struct gfx_firmware_header_v1_0 *ce_hdr =
4291 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4292 		const struct gfx_firmware_header_v1_0 *me_hdr =
4293 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4294 		const __le32 *fw_data;
4295 		u32 fw_size;
4296 
4297 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4298 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4299 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
4300 
4301 		/* PFP */
4302 		fw_data = (const __le32 *)
4303 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4304 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4305 		WREG32(CP_PFP_UCODE_ADDR, 0);
4306 		for (i = 0; i < fw_size; i++)
4307 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4308 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4309 
4310 		/* CE */
4311 		fw_data = (const __le32 *)
4312 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4313 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4314 		WREG32(CP_CE_UCODE_ADDR, 0);
4315 		for (i = 0; i < fw_size; i++)
4316 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4317 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4318 
4319 		/* ME */
4320 		fw_data = (const __be32 *)
4321 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4322 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4323 		WREG32(CP_ME_RAM_WADDR, 0);
4324 		for (i = 0; i < fw_size; i++)
4325 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4326 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4327 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4328 	} else {
4329 		const __be32 *fw_data;
4330 
4331 		/* PFP */
4332 		fw_data = (const __be32 *)rdev->pfp_fw->data;
4333 		WREG32(CP_PFP_UCODE_ADDR, 0);
4334 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4335 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4336 		WREG32(CP_PFP_UCODE_ADDR, 0);
4337 
4338 		/* CE */
4339 		fw_data = (const __be32 *)rdev->ce_fw->data;
4340 		WREG32(CP_CE_UCODE_ADDR, 0);
4341 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4342 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4343 		WREG32(CP_CE_UCODE_ADDR, 0);
4344 
4345 		/* ME */
4346 		fw_data = (const __be32 *)rdev->me_fw->data;
4347 		WREG32(CP_ME_RAM_WADDR, 0);
4348 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4349 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4350 		WREG32(CP_ME_RAM_WADDR, 0);
4351 	}
4352 
4353 	return 0;
4354 }
4355 
4356 /**
4357  * cik_cp_gfx_start - start the gfx ring
4358  *
4359  * @rdev: radeon_device pointer
4360  *
4361  * Enables the ring and loads the clear state context and other
4362  * packets required to init the ring.
4363  * Returns 0 for success, error for failure.
4364  */
cik_cp_gfx_start(struct radeon_device * rdev)4365 static int cik_cp_gfx_start(struct radeon_device *rdev)
4366 {
4367 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4368 	int r, i;
4369 
4370 	/* init the CP */
4371 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4372 	WREG32(CP_ENDIAN_SWAP, 0);
4373 	WREG32(CP_DEVICE_ID, 1);
4374 
4375 	cik_cp_gfx_enable(rdev, true);
4376 
4377 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4378 	if (r) {
4379 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4380 		return r;
4381 	}
4382 
4383 	/* init the CE partitions.  CE only used for gfx on CIK */
4384 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4385 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4386 	radeon_ring_write(ring, 0x8000);
4387 	radeon_ring_write(ring, 0x8000);
4388 
4389 	/* setup clear context state */
4390 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4391 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4392 
4393 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4394 	radeon_ring_write(ring, 0x80000000);
4395 	radeon_ring_write(ring, 0x80000000);
4396 
4397 	for (i = 0; i < cik_default_size; i++)
4398 		radeon_ring_write(ring, cik_default_state[i]);
4399 
4400 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4401 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4402 
4403 	/* set clear context state */
4404 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4405 	radeon_ring_write(ring, 0);
4406 
4407 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4408 	radeon_ring_write(ring, 0x00000316);
4409 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4410 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4411 
4412 	radeon_ring_unlock_commit(rdev, ring, false);
4413 
4414 	return 0;
4415 }
4416 
4417 /**
4418  * cik_cp_gfx_fini - stop the gfx ring
4419  *
4420  * @rdev: radeon_device pointer
4421  *
4422  * Stop the gfx ring and tear down the driver ring
4423  * info.
4424  */
cik_cp_gfx_fini(struct radeon_device * rdev)4425 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4426 {
4427 	cik_cp_gfx_enable(rdev, false);
4428 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4429 }
4430 
4431 /**
4432  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4433  *
4434  * @rdev: radeon_device pointer
4435  *
4436  * Program the location and size of the gfx ring buffer
4437  * and test it to make sure it's working.
4438  * Returns 0 for success, error for failure.
4439  */
cik_cp_gfx_resume(struct radeon_device * rdev)4440 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4441 {
4442 	struct radeon_ring *ring;
4443 	u32 tmp;
4444 	u32 rb_bufsz;
4445 	u64 rb_addr;
4446 	int r;
4447 
4448 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4449 	if (rdev->family != CHIP_HAWAII)
4450 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4451 
4452 	/* Set the write pointer delay */
4453 	WREG32(CP_RB_WPTR_DELAY, 0);
4454 
4455 	/* set the RB to use vmid 0 */
4456 	WREG32(CP_RB_VMID, 0);
4457 
4458 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4459 
4460 	/* ring 0 - compute and gfx */
4461 	/* Set ring buffer size */
4462 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4463 	rb_bufsz = order_base_2(ring->ring_size / 8);
4464 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4465 #ifdef __BIG_ENDIAN
4466 	tmp |= BUF_SWAP_32BIT;
4467 #endif
4468 	WREG32(CP_RB0_CNTL, tmp);
4469 
4470 	/* Initialize the ring buffer's read and write pointers */
4471 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4472 	ring->wptr = 0;
4473 	WREG32(CP_RB0_WPTR, ring->wptr);
4474 
4475 	/* set the wb address wether it's enabled or not */
4476 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4477 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4478 
4479 	/* scratch register shadowing is no longer supported */
4480 	WREG32(SCRATCH_UMSK, 0);
4481 
4482 	if (!rdev->wb.enabled)
4483 		tmp |= RB_NO_UPDATE;
4484 
4485 	mdelay(1);
4486 	WREG32(CP_RB0_CNTL, tmp);
4487 
4488 	rb_addr = ring->gpu_addr >> 8;
4489 	WREG32(CP_RB0_BASE, rb_addr);
4490 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4491 
4492 	/* start the ring */
4493 	cik_cp_gfx_start(rdev);
4494 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4495 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4496 	if (r) {
4497 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4498 		return r;
4499 	}
4500 
4501 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4502 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4503 
4504 	return 0;
4505 }
4506 
cik_gfx_get_rptr(struct radeon_device * rdev,struct radeon_ring * ring)4507 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4508 		     struct radeon_ring *ring)
4509 {
4510 	u32 rptr;
4511 
4512 	if (rdev->wb.enabled)
4513 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4514 	else
4515 		rptr = RREG32(CP_RB0_RPTR);
4516 
4517 	return rptr;
4518 }
4519 
cik_gfx_get_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4520 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4521 		     struct radeon_ring *ring)
4522 {
4523 	u32 wptr;
4524 
4525 	wptr = RREG32(CP_RB0_WPTR);
4526 
4527 	return wptr;
4528 }
4529 
cik_gfx_set_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4530 void cik_gfx_set_wptr(struct radeon_device *rdev,
4531 		      struct radeon_ring *ring)
4532 {
4533 	WREG32(CP_RB0_WPTR, ring->wptr);
4534 	(void)RREG32(CP_RB0_WPTR);
4535 }
4536 
cik_compute_get_rptr(struct radeon_device * rdev,struct radeon_ring * ring)4537 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4538 			 struct radeon_ring *ring)
4539 {
4540 	u32 rptr;
4541 
4542 	if (rdev->wb.enabled) {
4543 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4544 	} else {
4545 		mutex_lock(&rdev->srbm_mutex);
4546 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4547 		rptr = RREG32(CP_HQD_PQ_RPTR);
4548 		cik_srbm_select(rdev, 0, 0, 0, 0);
4549 		mutex_unlock(&rdev->srbm_mutex);
4550 	}
4551 
4552 	return rptr;
4553 }
4554 
cik_compute_get_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4555 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4556 			 struct radeon_ring *ring)
4557 {
4558 	u32 wptr;
4559 
4560 	if (rdev->wb.enabled) {
4561 		/* XXX check if swapping is necessary on BE */
4562 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4563 	} else {
4564 		mutex_lock(&rdev->srbm_mutex);
4565 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4566 		wptr = RREG32(CP_HQD_PQ_WPTR);
4567 		cik_srbm_select(rdev, 0, 0, 0, 0);
4568 		mutex_unlock(&rdev->srbm_mutex);
4569 	}
4570 
4571 	return wptr;
4572 }
4573 
cik_compute_set_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4574 void cik_compute_set_wptr(struct radeon_device *rdev,
4575 			  struct radeon_ring *ring)
4576 {
4577 	/* XXX check if swapping is necessary on BE */
4578 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4579 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4580 }
4581 
cik_compute_stop(struct radeon_device * rdev,struct radeon_ring * ring)4582 static void cik_compute_stop(struct radeon_device *rdev,
4583 			     struct radeon_ring *ring)
4584 {
4585 	u32 j, tmp;
4586 
4587 	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4588 	/* Disable wptr polling. */
4589 	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4590 	tmp &= ~WPTR_POLL_EN;
4591 	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4592 	/* Disable HQD. */
4593 	if (RREG32(CP_HQD_ACTIVE) & 1) {
4594 		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4595 		for (j = 0; j < rdev->usec_timeout; j++) {
4596 			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4597 				break;
4598 			udelay(1);
4599 		}
4600 		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4601 		WREG32(CP_HQD_PQ_RPTR, 0);
4602 		WREG32(CP_HQD_PQ_WPTR, 0);
4603 	}
4604 	cik_srbm_select(rdev, 0, 0, 0, 0);
4605 }
4606 
4607 /**
4608  * cik_cp_compute_enable - enable/disable the compute CP MEs
4609  *
4610  * @rdev: radeon_device pointer
4611  * @enable: enable or disable the MEs
4612  *
4613  * Halts or unhalts the compute MEs.
4614  */
cik_cp_compute_enable(struct radeon_device * rdev,bool enable)4615 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4616 {
4617 	if (enable)
4618 		WREG32(CP_MEC_CNTL, 0);
4619 	else {
4620 		/*
4621 		 * To make hibernation reliable we need to clear compute ring
4622 		 * configuration before halting the compute ring.
4623 		 */
4624 		mutex_lock(&rdev->srbm_mutex);
4625 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4626 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4627 		mutex_unlock(&rdev->srbm_mutex);
4628 
4629 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4630 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4631 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4632 	}
4633 	udelay(50);
4634 }
4635 
4636 /**
4637  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4638  *
4639  * @rdev: radeon_device pointer
4640  *
4641  * Loads the compute MEC1&2 ucode.
4642  * Returns 0 for success, -EINVAL if the ucode is not available.
4643  */
cik_cp_compute_load_microcode(struct radeon_device * rdev)4644 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4645 {
4646 	int i;
4647 
4648 	if (!rdev->mec_fw)
4649 		return -EINVAL;
4650 
4651 	cik_cp_compute_enable(rdev, false);
4652 
4653 	if (rdev->new_fw) {
4654 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4655 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4656 		const __le32 *fw_data;
4657 		u32 fw_size;
4658 
4659 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4660 
4661 		/* MEC1 */
4662 		fw_data = (const __le32 *)
4663 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4664 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4665 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4666 		for (i = 0; i < fw_size; i++)
4667 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4668 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4669 
4670 		/* MEC2 */
4671 		if (rdev->family == CHIP_KAVERI) {
4672 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4673 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4674 
4675 			fw_data = (const __le32 *)
4676 				(rdev->mec2_fw->data +
4677 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4678 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4679 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4680 			for (i = 0; i < fw_size; i++)
4681 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4682 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4683 		}
4684 	} else {
4685 		const __be32 *fw_data;
4686 
4687 		/* MEC1 */
4688 		fw_data = (const __be32 *)rdev->mec_fw->data;
4689 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4690 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4691 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4692 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4693 
4694 		if (rdev->family == CHIP_KAVERI) {
4695 			/* MEC2 */
4696 			fw_data = (const __be32 *)rdev->mec_fw->data;
4697 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4698 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4699 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4700 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4701 		}
4702 	}
4703 
4704 	return 0;
4705 }
4706 
4707 /**
4708  * cik_cp_compute_start - start the compute queues
4709  *
4710  * @rdev: radeon_device pointer
4711  *
4712  * Enable the compute queues.
4713  * Returns 0 for success, error for failure.
4714  */
cik_cp_compute_start(struct radeon_device * rdev)4715 static int cik_cp_compute_start(struct radeon_device *rdev)
4716 {
4717 	cik_cp_compute_enable(rdev, true);
4718 
4719 	return 0;
4720 }
4721 
4722 /**
4723  * cik_cp_compute_fini - stop the compute queues
4724  *
4725  * @rdev: radeon_device pointer
4726  *
4727  * Stop the compute queues and tear down the driver queue
4728  * info.
4729  */
cik_cp_compute_fini(struct radeon_device * rdev)4730 static void cik_cp_compute_fini(struct radeon_device *rdev)
4731 {
4732 	int i, idx, r;
4733 
4734 	cik_cp_compute_enable(rdev, false);
4735 
4736 	for (i = 0; i < 2; i++) {
4737 		if (i == 0)
4738 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4739 		else
4740 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4741 
4742 		if (rdev->ring[idx].mqd_obj) {
4743 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4744 			if (unlikely(r != 0))
4745 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4746 
4747 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4748 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4749 
4750 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4751 			rdev->ring[idx].mqd_obj = NULL;
4752 		}
4753 	}
4754 }
4755 
cik_mec_fini(struct radeon_device * rdev)4756 static void cik_mec_fini(struct radeon_device *rdev)
4757 {
4758 	int r;
4759 
4760 	if (rdev->mec.hpd_eop_obj) {
4761 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4762 		if (unlikely(r != 0))
4763 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4764 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4765 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4766 
4767 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4768 		rdev->mec.hpd_eop_obj = NULL;
4769 	}
4770 }
4771 
4772 #define MEC_HPD_SIZE 2048
4773 
cik_mec_init(struct radeon_device * rdev)4774 static int cik_mec_init(struct radeon_device *rdev)
4775 {
4776 	int r;
4777 	u32 *hpd;
4778 
4779 	/*
4780 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4781 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4782 	 * Nonetheless, we assign only 1 pipe because all other pipes will
4783 	 * be handled by KFD
4784 	 */
4785 	rdev->mec.num_mec = 1;
4786 	rdev->mec.num_pipe = 1;
4787 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4788 
4789 	if (rdev->mec.hpd_eop_obj == NULL) {
4790 		r = radeon_bo_create(rdev,
4791 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4792 				     PAGE_SIZE, true,
4793 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4794 				     &rdev->mec.hpd_eop_obj);
4795 		if (r) {
4796 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4797 			return r;
4798 		}
4799 	}
4800 
4801 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4802 	if (unlikely(r != 0)) {
4803 		cik_mec_fini(rdev);
4804 		return r;
4805 	}
4806 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4807 			  &rdev->mec.hpd_eop_gpu_addr);
4808 	if (r) {
4809 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4810 		cik_mec_fini(rdev);
4811 		return r;
4812 	}
4813 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4814 	if (r) {
4815 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4816 		cik_mec_fini(rdev);
4817 		return r;
4818 	}
4819 
4820 	/* clear memory.  Not sure if this is required or not */
4821 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4822 
4823 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4824 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4825 
4826 	return 0;
4827 }
4828 
4829 struct hqd_registers
4830 {
4831 	u32 cp_mqd_base_addr;
4832 	u32 cp_mqd_base_addr_hi;
4833 	u32 cp_hqd_active;
4834 	u32 cp_hqd_vmid;
4835 	u32 cp_hqd_persistent_state;
4836 	u32 cp_hqd_pipe_priority;
4837 	u32 cp_hqd_queue_priority;
4838 	u32 cp_hqd_quantum;
4839 	u32 cp_hqd_pq_base;
4840 	u32 cp_hqd_pq_base_hi;
4841 	u32 cp_hqd_pq_rptr;
4842 	u32 cp_hqd_pq_rptr_report_addr;
4843 	u32 cp_hqd_pq_rptr_report_addr_hi;
4844 	u32 cp_hqd_pq_wptr_poll_addr;
4845 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4846 	u32 cp_hqd_pq_doorbell_control;
4847 	u32 cp_hqd_pq_wptr;
4848 	u32 cp_hqd_pq_control;
4849 	u32 cp_hqd_ib_base_addr;
4850 	u32 cp_hqd_ib_base_addr_hi;
4851 	u32 cp_hqd_ib_rptr;
4852 	u32 cp_hqd_ib_control;
4853 	u32 cp_hqd_iq_timer;
4854 	u32 cp_hqd_iq_rptr;
4855 	u32 cp_hqd_dequeue_request;
4856 	u32 cp_hqd_dma_offload;
4857 	u32 cp_hqd_sema_cmd;
4858 	u32 cp_hqd_msg_type;
4859 	u32 cp_hqd_atomic0_preop_lo;
4860 	u32 cp_hqd_atomic0_preop_hi;
4861 	u32 cp_hqd_atomic1_preop_lo;
4862 	u32 cp_hqd_atomic1_preop_hi;
4863 	u32 cp_hqd_hq_scheduler0;
4864 	u32 cp_hqd_hq_scheduler1;
4865 	u32 cp_mqd_control;
4866 };
4867 
4868 struct bonaire_mqd
4869 {
4870 	u32 header;
4871 	u32 dispatch_initiator;
4872 	u32 dimensions[3];
4873 	u32 start_idx[3];
4874 	u32 num_threads[3];
4875 	u32 pipeline_stat_enable;
4876 	u32 perf_counter_enable;
4877 	u32 pgm[2];
4878 	u32 tba[2];
4879 	u32 tma[2];
4880 	u32 pgm_rsrc[2];
4881 	u32 vmid;
4882 	u32 resource_limits;
4883 	u32 static_thread_mgmt01[2];
4884 	u32 tmp_ring_size;
4885 	u32 static_thread_mgmt23[2];
4886 	u32 restart[3];
4887 	u32 thread_trace_enable;
4888 	u32 reserved1;
4889 	u32 user_data[16];
4890 	u32 vgtcs_invoke_count[2];
4891 	struct hqd_registers queue_state;
4892 	u32 dequeue_cntr;
4893 	u32 interrupt_queue[64];
4894 };
4895 
4896 /**
4897  * cik_cp_compute_resume - setup the compute queue registers
4898  *
4899  * @rdev: radeon_device pointer
4900  *
4901  * Program the compute queues and test them to make sure they
4902  * are working.
4903  * Returns 0 for success, error for failure.
4904  */
cik_cp_compute_resume(struct radeon_device * rdev)4905 static int cik_cp_compute_resume(struct radeon_device *rdev)
4906 {
4907 	int r, i, j, idx;
4908 	u32 tmp;
4909 	bool use_doorbell = true;
4910 	u64 hqd_gpu_addr;
4911 	u64 mqd_gpu_addr;
4912 	u64 eop_gpu_addr;
4913 	u64 wb_gpu_addr;
4914 	u32 *buf;
4915 	struct bonaire_mqd *mqd;
4916 
4917 	r = cik_cp_compute_start(rdev);
4918 	if (r)
4919 		return r;
4920 
4921 	/* fix up chicken bits */
4922 	tmp = RREG32(CP_CPF_DEBUG);
4923 	tmp |= (1 << 23);
4924 	WREG32(CP_CPF_DEBUG, tmp);
4925 
4926 	/* init the pipes */
4927 	mutex_lock(&rdev->srbm_mutex);
4928 
4929 	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4930 
4931 	cik_srbm_select(rdev, 0, 0, 0, 0);
4932 
4933 	/* write the EOP addr */
4934 	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4935 	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4936 
4937 	/* set the VMID assigned */
4938 	WREG32(CP_HPD_EOP_VMID, 0);
4939 
4940 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4941 	tmp = RREG32(CP_HPD_EOP_CONTROL);
4942 	tmp &= ~EOP_SIZE_MASK;
4943 	tmp |= order_base_2(MEC_HPD_SIZE / 8);
4944 	WREG32(CP_HPD_EOP_CONTROL, tmp);
4945 
4946 	mutex_unlock(&rdev->srbm_mutex);
4947 
4948 	/* init the queues.  Just two for now. */
4949 	for (i = 0; i < 2; i++) {
4950 		if (i == 0)
4951 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4952 		else
4953 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4954 
4955 		if (rdev->ring[idx].mqd_obj == NULL) {
4956 			r = radeon_bo_create(rdev,
4957 					     sizeof(struct bonaire_mqd),
4958 					     PAGE_SIZE, true,
4959 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4960 					     NULL, &rdev->ring[idx].mqd_obj);
4961 			if (r) {
4962 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4963 				return r;
4964 			}
4965 		}
4966 
4967 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4968 		if (unlikely(r != 0)) {
4969 			cik_cp_compute_fini(rdev);
4970 			return r;
4971 		}
4972 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4973 				  &mqd_gpu_addr);
4974 		if (r) {
4975 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4976 			cik_cp_compute_fini(rdev);
4977 			return r;
4978 		}
4979 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4980 		if (r) {
4981 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4982 			cik_cp_compute_fini(rdev);
4983 			return r;
4984 		}
4985 
4986 		/* init the mqd struct */
4987 		memset(buf, 0, sizeof(struct bonaire_mqd));
4988 
4989 		mqd = (struct bonaire_mqd *)buf;
4990 		mqd->header = 0xC0310800;
4991 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4992 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4993 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4994 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4995 
4996 		mutex_lock(&rdev->srbm_mutex);
4997 		cik_srbm_select(rdev, rdev->ring[idx].me,
4998 				rdev->ring[idx].pipe,
4999 				rdev->ring[idx].queue, 0);
5000 
5001 		/* disable wptr polling */
5002 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
5003 		tmp &= ~WPTR_POLL_EN;
5004 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
5005 
5006 		/* enable doorbell? */
5007 		mqd->queue_state.cp_hqd_pq_doorbell_control =
5008 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5009 		if (use_doorbell)
5010 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5011 		else
5012 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
5013 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5014 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5015 
5016 		/* disable the queue if it's active */
5017 		mqd->queue_state.cp_hqd_dequeue_request = 0;
5018 		mqd->queue_state.cp_hqd_pq_rptr = 0;
5019 		mqd->queue_state.cp_hqd_pq_wptr= 0;
5020 		if (RREG32(CP_HQD_ACTIVE) & 1) {
5021 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
5022 			for (j = 0; j < rdev->usec_timeout; j++) {
5023 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
5024 					break;
5025 				udelay(1);
5026 			}
5027 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
5028 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
5029 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5030 		}
5031 
5032 		/* set the pointer to the MQD */
5033 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
5034 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5035 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
5036 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
5037 		/* set MQD vmid to 0 */
5038 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
5039 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
5040 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
5041 
5042 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5043 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
5044 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
5045 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5046 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
5047 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
5048 
5049 		/* set up the HQD, this is similar to CP_RB0_CNTL */
5050 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
5051 		mqd->queue_state.cp_hqd_pq_control &=
5052 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
5053 
5054 		mqd->queue_state.cp_hqd_pq_control |=
5055 			order_base_2(rdev->ring[idx].ring_size / 8);
5056 		mqd->queue_state.cp_hqd_pq_control |=
5057 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
5058 #ifdef __BIG_ENDIAN
5059 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
5060 #endif
5061 		mqd->queue_state.cp_hqd_pq_control &=
5062 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
5063 		mqd->queue_state.cp_hqd_pq_control |=
5064 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
5065 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
5066 
5067 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
5068 		if (i == 0)
5069 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
5070 		else
5071 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
5072 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
5073 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5074 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
5075 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
5076 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
5077 
5078 		/* set the wb address wether it's enabled or not */
5079 		if (i == 0)
5080 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
5081 		else
5082 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
5083 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5084 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5085 			upper_32_bits(wb_gpu_addr) & 0xffff;
5086 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5087 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5088 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5089 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5090 
5091 		/* enable the doorbell if requested */
5092 		if (use_doorbell) {
5093 			mqd->queue_state.cp_hqd_pq_doorbell_control =
5094 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5095 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5096 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
5097 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5098 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5099 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
5100 				~(DOORBELL_SOURCE | DOORBELL_HIT);
5101 
5102 		} else {
5103 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5104 		}
5105 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5106 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5107 
5108 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5109 		rdev->ring[idx].wptr = 0;
5110 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5111 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5112 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5113 
5114 		/* set the vmid for the queue */
5115 		mqd->queue_state.cp_hqd_vmid = 0;
5116 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5117 
5118 		/* activate the queue */
5119 		mqd->queue_state.cp_hqd_active = 1;
5120 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5121 
5122 		cik_srbm_select(rdev, 0, 0, 0, 0);
5123 		mutex_unlock(&rdev->srbm_mutex);
5124 
5125 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5126 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5127 
5128 		rdev->ring[idx].ready = true;
5129 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5130 		if (r)
5131 			rdev->ring[idx].ready = false;
5132 	}
5133 
5134 	return 0;
5135 }
5136 
cik_cp_enable(struct radeon_device * rdev,bool enable)5137 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5138 {
5139 	cik_cp_gfx_enable(rdev, enable);
5140 	cik_cp_compute_enable(rdev, enable);
5141 }
5142 
cik_cp_load_microcode(struct radeon_device * rdev)5143 static int cik_cp_load_microcode(struct radeon_device *rdev)
5144 {
5145 	int r;
5146 
5147 	r = cik_cp_gfx_load_microcode(rdev);
5148 	if (r)
5149 		return r;
5150 	r = cik_cp_compute_load_microcode(rdev);
5151 	if (r)
5152 		return r;
5153 
5154 	return 0;
5155 }
5156 
cik_cp_fini(struct radeon_device * rdev)5157 static void cik_cp_fini(struct radeon_device *rdev)
5158 {
5159 	cik_cp_gfx_fini(rdev);
5160 	cik_cp_compute_fini(rdev);
5161 }
5162 
cik_cp_resume(struct radeon_device * rdev)5163 static int cik_cp_resume(struct radeon_device *rdev)
5164 {
5165 	int r;
5166 
5167 	cik_enable_gui_idle_interrupt(rdev, false);
5168 
5169 	r = cik_cp_load_microcode(rdev);
5170 	if (r)
5171 		return r;
5172 
5173 	r = cik_cp_gfx_resume(rdev);
5174 	if (r)
5175 		return r;
5176 	r = cik_cp_compute_resume(rdev);
5177 	if (r)
5178 		return r;
5179 
5180 	cik_enable_gui_idle_interrupt(rdev, true);
5181 
5182 	return 0;
5183 }
5184 
cik_print_gpu_status_regs(struct radeon_device * rdev)5185 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5186 {
5187 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5188 		RREG32(GRBM_STATUS));
5189 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5190 		RREG32(GRBM_STATUS2));
5191 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5192 		RREG32(GRBM_STATUS_SE0));
5193 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5194 		RREG32(GRBM_STATUS_SE1));
5195 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5196 		RREG32(GRBM_STATUS_SE2));
5197 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5198 		RREG32(GRBM_STATUS_SE3));
5199 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5200 		RREG32(SRBM_STATUS));
5201 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5202 		RREG32(SRBM_STATUS2));
5203 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5204 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5205 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5206 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5207 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5208 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5209 		 RREG32(CP_STALLED_STAT1));
5210 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5211 		 RREG32(CP_STALLED_STAT2));
5212 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5213 		 RREG32(CP_STALLED_STAT3));
5214 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5215 		 RREG32(CP_CPF_BUSY_STAT));
5216 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5217 		 RREG32(CP_CPF_STALLED_STAT1));
5218 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5219 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5220 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5221 		 RREG32(CP_CPC_STALLED_STAT1));
5222 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5223 }
5224 
5225 /**
5226  * cik_gpu_check_soft_reset - check which blocks are busy
5227  *
5228  * @rdev: radeon_device pointer
5229  *
5230  * Check which blocks are busy and return the relevant reset
5231  * mask to be used by cik_gpu_soft_reset().
5232  * Returns a mask of the blocks to be reset.
5233  */
cik_gpu_check_soft_reset(struct radeon_device * rdev)5234 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5235 {
5236 	u32 reset_mask = 0;
5237 	u32 tmp;
5238 
5239 	/* GRBM_STATUS */
5240 	tmp = RREG32(GRBM_STATUS);
5241 	if (tmp & (PA_BUSY | SC_BUSY |
5242 		   BCI_BUSY | SX_BUSY |
5243 		   TA_BUSY | VGT_BUSY |
5244 		   DB_BUSY | CB_BUSY |
5245 		   GDS_BUSY | SPI_BUSY |
5246 		   IA_BUSY | IA_BUSY_NO_DMA))
5247 		reset_mask |= RADEON_RESET_GFX;
5248 
5249 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5250 		reset_mask |= RADEON_RESET_CP;
5251 
5252 	/* GRBM_STATUS2 */
5253 	tmp = RREG32(GRBM_STATUS2);
5254 	if (tmp & RLC_BUSY)
5255 		reset_mask |= RADEON_RESET_RLC;
5256 
5257 	/* SDMA0_STATUS_REG */
5258 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5259 	if (!(tmp & SDMA_IDLE))
5260 		reset_mask |= RADEON_RESET_DMA;
5261 
5262 	/* SDMA1_STATUS_REG */
5263 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5264 	if (!(tmp & SDMA_IDLE))
5265 		reset_mask |= RADEON_RESET_DMA1;
5266 
5267 	/* SRBM_STATUS2 */
5268 	tmp = RREG32(SRBM_STATUS2);
5269 	if (tmp & SDMA_BUSY)
5270 		reset_mask |= RADEON_RESET_DMA;
5271 
5272 	if (tmp & SDMA1_BUSY)
5273 		reset_mask |= RADEON_RESET_DMA1;
5274 
5275 	/* SRBM_STATUS */
5276 	tmp = RREG32(SRBM_STATUS);
5277 
5278 	if (tmp & IH_BUSY)
5279 		reset_mask |= RADEON_RESET_IH;
5280 
5281 	if (tmp & SEM_BUSY)
5282 		reset_mask |= RADEON_RESET_SEM;
5283 
5284 	if (tmp & GRBM_RQ_PENDING)
5285 		reset_mask |= RADEON_RESET_GRBM;
5286 
5287 	if (tmp & VMC_BUSY)
5288 		reset_mask |= RADEON_RESET_VMC;
5289 
5290 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5291 		   MCC_BUSY | MCD_BUSY))
5292 		reset_mask |= RADEON_RESET_MC;
5293 
5294 	if (evergreen_is_display_hung(rdev))
5295 		reset_mask |= RADEON_RESET_DISPLAY;
5296 
5297 	/* Skip MC reset as it's mostly likely not hung, just busy */
5298 	if (reset_mask & RADEON_RESET_MC) {
5299 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5300 		reset_mask &= ~RADEON_RESET_MC;
5301 	}
5302 
5303 	return reset_mask;
5304 }
5305 
5306 /**
5307  * cik_gpu_soft_reset - soft reset GPU
5308  *
5309  * @rdev: radeon_device pointer
5310  * @reset_mask: mask of which blocks to reset
5311  *
5312  * Soft reset the blocks specified in @reset_mask.
5313  */
cik_gpu_soft_reset(struct radeon_device * rdev,u32 reset_mask)5314 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5315 {
5316 	struct evergreen_mc_save save;
5317 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5318 	u32 tmp;
5319 
5320 	if (reset_mask == 0)
5321 		return;
5322 
5323 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5324 
5325 	cik_print_gpu_status_regs(rdev);
5326 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5327 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5328 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5329 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5330 
5331 	/* disable CG/PG */
5332 	cik_fini_pg(rdev);
5333 	cik_fini_cg(rdev);
5334 
5335 	/* stop the rlc */
5336 	cik_rlc_stop(rdev);
5337 
5338 	/* Disable GFX parsing/prefetching */
5339 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5340 
5341 	/* Disable MEC parsing/prefetching */
5342 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5343 
5344 	if (reset_mask & RADEON_RESET_DMA) {
5345 		/* sdma0 */
5346 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5347 		tmp |= SDMA_HALT;
5348 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5349 	}
5350 	if (reset_mask & RADEON_RESET_DMA1) {
5351 		/* sdma1 */
5352 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5353 		tmp |= SDMA_HALT;
5354 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5355 	}
5356 
5357 	evergreen_mc_stop(rdev, &save);
5358 	if (evergreen_mc_wait_for_idle(rdev)) {
5359 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5360 	}
5361 
5362 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5363 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5364 
5365 	if (reset_mask & RADEON_RESET_CP) {
5366 		grbm_soft_reset |= SOFT_RESET_CP;
5367 
5368 		srbm_soft_reset |= SOFT_RESET_GRBM;
5369 	}
5370 
5371 	if (reset_mask & RADEON_RESET_DMA)
5372 		srbm_soft_reset |= SOFT_RESET_SDMA;
5373 
5374 	if (reset_mask & RADEON_RESET_DMA1)
5375 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5376 
5377 	if (reset_mask & RADEON_RESET_DISPLAY)
5378 		srbm_soft_reset |= SOFT_RESET_DC;
5379 
5380 	if (reset_mask & RADEON_RESET_RLC)
5381 		grbm_soft_reset |= SOFT_RESET_RLC;
5382 
5383 	if (reset_mask & RADEON_RESET_SEM)
5384 		srbm_soft_reset |= SOFT_RESET_SEM;
5385 
5386 	if (reset_mask & RADEON_RESET_IH)
5387 		srbm_soft_reset |= SOFT_RESET_IH;
5388 
5389 	if (reset_mask & RADEON_RESET_GRBM)
5390 		srbm_soft_reset |= SOFT_RESET_GRBM;
5391 
5392 	if (reset_mask & RADEON_RESET_VMC)
5393 		srbm_soft_reset |= SOFT_RESET_VMC;
5394 
5395 	if (!(rdev->flags & RADEON_IS_IGP)) {
5396 		if (reset_mask & RADEON_RESET_MC)
5397 			srbm_soft_reset |= SOFT_RESET_MC;
5398 	}
5399 
5400 	if (grbm_soft_reset) {
5401 		tmp = RREG32(GRBM_SOFT_RESET);
5402 		tmp |= grbm_soft_reset;
5403 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5404 		WREG32(GRBM_SOFT_RESET, tmp);
5405 		tmp = RREG32(GRBM_SOFT_RESET);
5406 
5407 		udelay(50);
5408 
5409 		tmp &= ~grbm_soft_reset;
5410 		WREG32(GRBM_SOFT_RESET, tmp);
5411 		tmp = RREG32(GRBM_SOFT_RESET);
5412 	}
5413 
5414 	if (srbm_soft_reset) {
5415 		tmp = RREG32(SRBM_SOFT_RESET);
5416 		tmp |= srbm_soft_reset;
5417 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5418 		WREG32(SRBM_SOFT_RESET, tmp);
5419 		tmp = RREG32(SRBM_SOFT_RESET);
5420 
5421 		udelay(50);
5422 
5423 		tmp &= ~srbm_soft_reset;
5424 		WREG32(SRBM_SOFT_RESET, tmp);
5425 		tmp = RREG32(SRBM_SOFT_RESET);
5426 	}
5427 
5428 	/* Wait a little for things to settle down */
5429 	udelay(50);
5430 
5431 	evergreen_mc_resume(rdev, &save);
5432 	udelay(50);
5433 
5434 	cik_print_gpu_status_regs(rdev);
5435 }
5436 
5437 struct kv_reset_save_regs {
5438 	u32 gmcon_reng_execute;
5439 	u32 gmcon_misc;
5440 	u32 gmcon_misc3;
5441 };
5442 
kv_save_regs_for_reset(struct radeon_device * rdev,struct kv_reset_save_regs * save)5443 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5444 				   struct kv_reset_save_regs *save)
5445 {
5446 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5447 	save->gmcon_misc = RREG32(GMCON_MISC);
5448 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5449 
5450 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5451 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5452 						STCTRL_STUTTER_EN));
5453 }
5454 
kv_restore_regs_for_reset(struct radeon_device * rdev,struct kv_reset_save_regs * save)5455 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5456 				      struct kv_reset_save_regs *save)
5457 {
5458 	int i;
5459 
5460 	WREG32(GMCON_PGFSM_WRITE, 0);
5461 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5462 
5463 	for (i = 0; i < 5; i++)
5464 		WREG32(GMCON_PGFSM_WRITE, 0);
5465 
5466 	WREG32(GMCON_PGFSM_WRITE, 0);
5467 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5468 
5469 	for (i = 0; i < 5; i++)
5470 		WREG32(GMCON_PGFSM_WRITE, 0);
5471 
5472 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5473 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5474 
5475 	for (i = 0; i < 5; i++)
5476 		WREG32(GMCON_PGFSM_WRITE, 0);
5477 
5478 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5479 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5480 
5481 	for (i = 0; i < 5; i++)
5482 		WREG32(GMCON_PGFSM_WRITE, 0);
5483 
5484 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5485 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5486 
5487 	for (i = 0; i < 5; i++)
5488 		WREG32(GMCON_PGFSM_WRITE, 0);
5489 
5490 	WREG32(GMCON_PGFSM_WRITE, 0);
5491 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5492 
5493 	for (i = 0; i < 5; i++)
5494 		WREG32(GMCON_PGFSM_WRITE, 0);
5495 
5496 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5497 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5498 
5499 	for (i = 0; i < 5; i++)
5500 		WREG32(GMCON_PGFSM_WRITE, 0);
5501 
5502 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5503 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5504 
5505 	for (i = 0; i < 5; i++)
5506 		WREG32(GMCON_PGFSM_WRITE, 0);
5507 
5508 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5509 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5510 
5511 	for (i = 0; i < 5; i++)
5512 		WREG32(GMCON_PGFSM_WRITE, 0);
5513 
5514 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5515 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5516 
5517 	for (i = 0; i < 5; i++)
5518 		WREG32(GMCON_PGFSM_WRITE, 0);
5519 
5520 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5521 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5522 
5523 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5524 	WREG32(GMCON_MISC, save->gmcon_misc);
5525 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5526 }
5527 
cik_gpu_pci_config_reset(struct radeon_device * rdev)5528 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5529 {
5530 	struct evergreen_mc_save save;
5531 	struct kv_reset_save_regs kv_save = { 0 };
5532 	u32 tmp, i;
5533 
5534 	dev_info(rdev->dev, "GPU pci config reset\n");
5535 
5536 	/* disable dpm? */
5537 
5538 	/* disable cg/pg */
5539 	cik_fini_pg(rdev);
5540 	cik_fini_cg(rdev);
5541 
5542 	/* Disable GFX parsing/prefetching */
5543 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5544 
5545 	/* Disable MEC parsing/prefetching */
5546 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5547 
5548 	/* sdma0 */
5549 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5550 	tmp |= SDMA_HALT;
5551 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5552 	/* sdma1 */
5553 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5554 	tmp |= SDMA_HALT;
5555 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5556 	/* XXX other engines? */
5557 
5558 	/* halt the rlc, disable cp internal ints */
5559 	cik_rlc_stop(rdev);
5560 
5561 	udelay(50);
5562 
5563 	/* disable mem access */
5564 	evergreen_mc_stop(rdev, &save);
5565 	if (evergreen_mc_wait_for_idle(rdev)) {
5566 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5567 	}
5568 
5569 	if (rdev->flags & RADEON_IS_IGP)
5570 		kv_save_regs_for_reset(rdev, &kv_save);
5571 
5572 	/* disable BM */
5573 	pci_clear_master(rdev->pdev);
5574 	/* reset */
5575 	radeon_pci_config_reset(rdev);
5576 
5577 	udelay(100);
5578 
5579 	/* wait for asic to come out of reset */
5580 	for (i = 0; i < rdev->usec_timeout; i++) {
5581 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5582 			break;
5583 		udelay(1);
5584 	}
5585 
5586 	/* does asic init need to be run first??? */
5587 	if (rdev->flags & RADEON_IS_IGP)
5588 		kv_restore_regs_for_reset(rdev, &kv_save);
5589 }
5590 
5591 /**
5592  * cik_asic_reset - soft reset GPU
5593  *
5594  * @rdev: radeon_device pointer
5595  *
5596  * Look up which blocks are hung and attempt
5597  * to reset them.
5598  * Returns 0 for success.
5599  */
cik_asic_reset(struct radeon_device * rdev)5600 int cik_asic_reset(struct radeon_device *rdev)
5601 {
5602 	u32 reset_mask;
5603 
5604 	reset_mask = cik_gpu_check_soft_reset(rdev);
5605 
5606 	if (reset_mask)
5607 		r600_set_bios_scratch_engine_hung(rdev, true);
5608 
5609 	/* try soft reset */
5610 	cik_gpu_soft_reset(rdev, reset_mask);
5611 
5612 	reset_mask = cik_gpu_check_soft_reset(rdev);
5613 
5614 	/* try pci config reset */
5615 	if (reset_mask && radeon_hard_reset)
5616 		cik_gpu_pci_config_reset(rdev);
5617 
5618 	reset_mask = cik_gpu_check_soft_reset(rdev);
5619 
5620 	if (!reset_mask)
5621 		r600_set_bios_scratch_engine_hung(rdev, false);
5622 
5623 	return 0;
5624 }
5625 
5626 /**
5627  * cik_gfx_is_lockup - check if the 3D engine is locked up
5628  *
5629  * @rdev: radeon_device pointer
5630  * @ring: radeon_ring structure holding ring information
5631  *
5632  * Check if the 3D engine is locked up (CIK).
5633  * Returns true if the engine is locked, false if not.
5634  */
cik_gfx_is_lockup(struct radeon_device * rdev,struct radeon_ring * ring)5635 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5636 {
5637 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5638 
5639 	if (!(reset_mask & (RADEON_RESET_GFX |
5640 			    RADEON_RESET_COMPUTE |
5641 			    RADEON_RESET_CP))) {
5642 		radeon_ring_lockup_update(rdev, ring);
5643 		return false;
5644 	}
5645 	return radeon_ring_test_lockup(rdev, ring);
5646 }
5647 
5648 /* MC */
5649 /**
5650  * cik_mc_program - program the GPU memory controller
5651  *
5652  * @rdev: radeon_device pointer
5653  *
5654  * Set the location of vram, gart, and AGP in the GPU's
5655  * physical address space (CIK).
5656  */
cik_mc_program(struct radeon_device * rdev)5657 static void cik_mc_program(struct radeon_device *rdev)
5658 {
5659 	struct evergreen_mc_save save;
5660 	u32 tmp;
5661 	int i, j;
5662 
5663 	/* Initialize HDP */
5664 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5665 		WREG32((0x2c14 + j), 0x00000000);
5666 		WREG32((0x2c18 + j), 0x00000000);
5667 		WREG32((0x2c1c + j), 0x00000000);
5668 		WREG32((0x2c20 + j), 0x00000000);
5669 		WREG32((0x2c24 + j), 0x00000000);
5670 	}
5671 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5672 
5673 	evergreen_mc_stop(rdev, &save);
5674 	if (radeon_mc_wait_for_idle(rdev)) {
5675 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5676 	}
5677 	/* Lockout access through VGA aperture*/
5678 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5679 	/* Update configuration */
5680 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5681 	       rdev->mc.vram_start >> 12);
5682 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5683 	       rdev->mc.vram_end >> 12);
5684 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5685 	       rdev->vram_scratch.gpu_addr >> 12);
5686 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5687 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5688 	WREG32(MC_VM_FB_LOCATION, tmp);
5689 	/* XXX double check these! */
5690 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5691 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5692 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5693 	WREG32(MC_VM_AGP_BASE, 0);
5694 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5695 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5696 	if (radeon_mc_wait_for_idle(rdev)) {
5697 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5698 	}
5699 	evergreen_mc_resume(rdev, &save);
5700 	/* we need to own VRAM, so turn off the VGA renderer here
5701 	 * to stop it overwriting our objects */
5702 	rv515_vga_render_disable(rdev);
5703 }
5704 
5705 /**
5706  * cik_mc_init - initialize the memory controller driver params
5707  *
5708  * @rdev: radeon_device pointer
5709  *
5710  * Look up the amount of vram, vram width, and decide how to place
5711  * vram and gart within the GPU's physical address space (CIK).
5712  * Returns 0 for success.
5713  */
cik_mc_init(struct radeon_device * rdev)5714 static int cik_mc_init(struct radeon_device *rdev)
5715 {
5716 	u32 tmp;
5717 	int chansize, numchan;
5718 
5719 	/* Get VRAM informations */
5720 	rdev->mc.vram_is_ddr = true;
5721 	tmp = RREG32(MC_ARB_RAMCFG);
5722 	if (tmp & CHANSIZE_MASK) {
5723 		chansize = 64;
5724 	} else {
5725 		chansize = 32;
5726 	}
5727 	tmp = RREG32(MC_SHARED_CHMAP);
5728 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5729 	case 0:
5730 	default:
5731 		numchan = 1;
5732 		break;
5733 	case 1:
5734 		numchan = 2;
5735 		break;
5736 	case 2:
5737 		numchan = 4;
5738 		break;
5739 	case 3:
5740 		numchan = 8;
5741 		break;
5742 	case 4:
5743 		numchan = 3;
5744 		break;
5745 	case 5:
5746 		numchan = 6;
5747 		break;
5748 	case 6:
5749 		numchan = 10;
5750 		break;
5751 	case 7:
5752 		numchan = 12;
5753 		break;
5754 	case 8:
5755 		numchan = 16;
5756 		break;
5757 	}
5758 	rdev->mc.vram_width = numchan * chansize;
5759 	/* Could aper size report 0 ? */
5760 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5761 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5762 	/* size in MB on si */
5763 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5764 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5765 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5766 	si_vram_gtt_location(rdev, &rdev->mc);
5767 	radeon_update_bandwidth_info(rdev);
5768 
5769 	return 0;
5770 }
5771 
5772 /*
5773  * GART
5774  * VMID 0 is the physical GPU addresses as used by the kernel.
5775  * VMIDs 1-15 are used for userspace clients and are handled
5776  * by the radeon vm/hsa code.
5777  */
5778 /**
5779  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5780  *
5781  * @rdev: radeon_device pointer
5782  *
5783  * Flush the TLB for the VMID 0 page table (CIK).
5784  */
cik_pcie_gart_tlb_flush(struct radeon_device * rdev)5785 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5786 {
5787 	/* flush hdp cache */
5788 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5789 
5790 	/* bits 0-15 are the VM contexts0-15 */
5791 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5792 }
5793 
cik_pcie_init_compute_vmid(struct radeon_device * rdev)5794 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5795 {
5796 	int i;
5797 	uint32_t sh_mem_bases, sh_mem_config;
5798 
5799 	sh_mem_bases = 0x6000 | 0x6000 << 16;
5800 	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5801 	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5802 
5803 	mutex_lock(&rdev->srbm_mutex);
5804 	for (i = 8; i < 16; i++) {
5805 		cik_srbm_select(rdev, 0, 0, 0, i);
5806 		/* CP and shaders */
5807 		WREG32(SH_MEM_CONFIG, sh_mem_config);
5808 		WREG32(SH_MEM_APE1_BASE, 1);
5809 		WREG32(SH_MEM_APE1_LIMIT, 0);
5810 		WREG32(SH_MEM_BASES, sh_mem_bases);
5811 	}
5812 	cik_srbm_select(rdev, 0, 0, 0, 0);
5813 	mutex_unlock(&rdev->srbm_mutex);
5814 }
5815 
5816 /**
5817  * cik_pcie_gart_enable - gart enable
5818  *
5819  * @rdev: radeon_device pointer
5820  *
5821  * This sets up the TLBs, programs the page tables for VMID0,
5822  * sets up the hw for VMIDs 1-15 which are allocated on
5823  * demand, and sets up the global locations for the LDS, GDS,
5824  * and GPUVM for FSA64 clients (CIK).
5825  * Returns 0 for success, errors for failure.
5826  */
cik_pcie_gart_enable(struct radeon_device * rdev)5827 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5828 {
5829 	int r, i;
5830 
5831 	if (rdev->gart.robj == NULL) {
5832 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5833 		return -EINVAL;
5834 	}
5835 	r = radeon_gart_table_vram_pin(rdev);
5836 	if (r)
5837 		return r;
5838 	/* Setup TLB control */
5839 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5840 	       (0xA << 7) |
5841 	       ENABLE_L1_TLB |
5842 	       ENABLE_L1_FRAGMENT_PROCESSING |
5843 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5844 	       ENABLE_ADVANCED_DRIVER_MODEL |
5845 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5846 	/* Setup L2 cache */
5847 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5848 	       ENABLE_L2_FRAGMENT_PROCESSING |
5849 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5850 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5851 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5852 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5853 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5854 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5855 	       BANK_SELECT(4) |
5856 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5857 	/* setup context0 */
5858 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5859 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5860 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5861 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5862 			(u32)(rdev->dummy_page.addr >> 12));
5863 	WREG32(VM_CONTEXT0_CNTL2, 0);
5864 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5865 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5866 
5867 	WREG32(0x15D4, 0);
5868 	WREG32(0x15D8, 0);
5869 	WREG32(0x15DC, 0);
5870 
5871 	/* restore context1-15 */
5872 	/* set vm size, must be a multiple of 4 */
5873 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5874 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5875 	for (i = 1; i < 16; i++) {
5876 		if (i < 8)
5877 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5878 			       rdev->vm_manager.saved_table_addr[i]);
5879 		else
5880 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5881 			       rdev->vm_manager.saved_table_addr[i]);
5882 	}
5883 
5884 	/* enable context1-15 */
5885 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5886 	       (u32)(rdev->dummy_page.addr >> 12));
5887 	WREG32(VM_CONTEXT1_CNTL2, 4);
5888 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5889 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5890 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5891 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5892 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5893 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5894 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5895 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5896 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5897 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5898 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5899 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5900 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5901 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5902 
5903 	if (rdev->family == CHIP_KAVERI) {
5904 		u32 tmp = RREG32(CHUB_CONTROL);
5905 		tmp &= ~BYPASS_VM;
5906 		WREG32(CHUB_CONTROL, tmp);
5907 	}
5908 
5909 	/* XXX SH_MEM regs */
5910 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5911 	mutex_lock(&rdev->srbm_mutex);
5912 	for (i = 0; i < 16; i++) {
5913 		cik_srbm_select(rdev, 0, 0, 0, i);
5914 		/* CP and shaders */
5915 		WREG32(SH_MEM_CONFIG, 0);
5916 		WREG32(SH_MEM_APE1_BASE, 1);
5917 		WREG32(SH_MEM_APE1_LIMIT, 0);
5918 		WREG32(SH_MEM_BASES, 0);
5919 		/* SDMA GFX */
5920 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5921 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5922 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5923 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5924 		/* XXX SDMA RLC - todo */
5925 	}
5926 	cik_srbm_select(rdev, 0, 0, 0, 0);
5927 	mutex_unlock(&rdev->srbm_mutex);
5928 
5929 	cik_pcie_init_compute_vmid(rdev);
5930 
5931 	cik_pcie_gart_tlb_flush(rdev);
5932 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5933 		 (unsigned)(rdev->mc.gtt_size >> 20),
5934 		 (unsigned long long)rdev->gart.table_addr);
5935 	rdev->gart.ready = true;
5936 	return 0;
5937 }
5938 
5939 /**
5940  * cik_pcie_gart_disable - gart disable
5941  *
5942  * @rdev: radeon_device pointer
5943  *
5944  * This disables all VM page table (CIK).
5945  */
cik_pcie_gart_disable(struct radeon_device * rdev)5946 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5947 {
5948 	unsigned i;
5949 
5950 	for (i = 1; i < 16; ++i) {
5951 		uint32_t reg;
5952 		if (i < 8)
5953 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5954 		else
5955 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5956 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5957 	}
5958 
5959 	/* Disable all tables */
5960 	WREG32(VM_CONTEXT0_CNTL, 0);
5961 	WREG32(VM_CONTEXT1_CNTL, 0);
5962 	/* Setup TLB control */
5963 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5964 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5965 	/* Setup L2 cache */
5966 	WREG32(VM_L2_CNTL,
5967 	       ENABLE_L2_FRAGMENT_PROCESSING |
5968 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5969 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5970 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5971 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5972 	WREG32(VM_L2_CNTL2, 0);
5973 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5974 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5975 	radeon_gart_table_vram_unpin(rdev);
5976 }
5977 
5978 /**
5979  * cik_pcie_gart_fini - vm fini callback
5980  *
5981  * @rdev: radeon_device pointer
5982  *
5983  * Tears down the driver GART/VM setup (CIK).
5984  */
cik_pcie_gart_fini(struct radeon_device * rdev)5985 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5986 {
5987 	cik_pcie_gart_disable(rdev);
5988 	radeon_gart_table_vram_free(rdev);
5989 	radeon_gart_fini(rdev);
5990 }
5991 
5992 /* vm parser */
5993 /**
5994  * cik_ib_parse - vm ib_parse callback
5995  *
5996  * @rdev: radeon_device pointer
5997  * @ib: indirect buffer pointer
5998  *
5999  * CIK uses hw IB checking so this is a nop (CIK).
6000  */
cik_ib_parse(struct radeon_device * rdev,struct radeon_ib * ib)6001 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
6002 {
6003 	return 0;
6004 }
6005 
6006 /*
6007  * vm
6008  * VMID 0 is the physical GPU addresses as used by the kernel.
6009  * VMIDs 1-15 are used for userspace clients and are handled
6010  * by the radeon vm/hsa code.
6011  */
6012 /**
6013  * cik_vm_init - cik vm init callback
6014  *
6015  * @rdev: radeon_device pointer
6016  *
6017  * Inits cik specific vm parameters (number of VMs, base of vram for
6018  * VMIDs 1-15) (CIK).
6019  * Returns 0 for success.
6020  */
cik_vm_init(struct radeon_device * rdev)6021 int cik_vm_init(struct radeon_device *rdev)
6022 {
6023 	/*
6024 	 * number of VMs
6025 	 * VMID 0 is reserved for System
6026 	 * radeon graphics/compute will use VMIDs 1-7
6027 	 * amdkfd will use VMIDs 8-15
6028 	 */
6029 	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
6030 	/* base offset of vram pages */
6031 	if (rdev->flags & RADEON_IS_IGP) {
6032 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
6033 		tmp <<= 22;
6034 		rdev->vm_manager.vram_base_offset = tmp;
6035 	} else
6036 		rdev->vm_manager.vram_base_offset = 0;
6037 
6038 	return 0;
6039 }
6040 
6041 /**
6042  * cik_vm_fini - cik vm fini callback
6043  *
6044  * @rdev: radeon_device pointer
6045  *
6046  * Tear down any asic specific VM setup (CIK).
6047  */
cik_vm_fini(struct radeon_device * rdev)6048 void cik_vm_fini(struct radeon_device *rdev)
6049 {
6050 }
6051 
6052 /**
6053  * cik_vm_decode_fault - print human readable fault info
6054  *
6055  * @rdev: radeon_device pointer
6056  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
6057  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
6058  *
6059  * Print human readable fault information (CIK).
6060  */
cik_vm_decode_fault(struct radeon_device * rdev,u32 status,u32 addr,u32 mc_client)6061 static void cik_vm_decode_fault(struct radeon_device *rdev,
6062 				u32 status, u32 addr, u32 mc_client)
6063 {
6064 	u32 mc_id;
6065 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
6066 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
6067 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
6068 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
6069 
6070 	if (rdev->family == CHIP_HAWAII)
6071 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6072 	else
6073 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6074 
6075 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
6076 	       protections, vmid, addr,
6077 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
6078 	       block, mc_client, mc_id);
6079 }
6080 
6081 /**
6082  * cik_vm_flush - cik vm flush using the CP
6083  *
6084  * @rdev: radeon_device pointer
6085  *
6086  * Update the page table base and flush the VM TLB
6087  * using the CP (CIK).
6088  */
cik_vm_flush(struct radeon_device * rdev,struct radeon_ring * ring,unsigned vm_id,uint64_t pd_addr)6089 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
6090 		  unsigned vm_id, uint64_t pd_addr)
6091 {
6092 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
6093 
6094 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6095 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6096 				 WRITE_DATA_DST_SEL(0)));
6097 	if (vm_id < 8) {
6098 		radeon_ring_write(ring,
6099 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
6100 	} else {
6101 		radeon_ring_write(ring,
6102 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
6103 	}
6104 	radeon_ring_write(ring, 0);
6105 	radeon_ring_write(ring, pd_addr >> 12);
6106 
6107 	/* update SH_MEM_* regs */
6108 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6109 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6110 				 WRITE_DATA_DST_SEL(0)));
6111 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6112 	radeon_ring_write(ring, 0);
6113 	radeon_ring_write(ring, VMID(vm_id));
6114 
6115 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6116 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6117 				 WRITE_DATA_DST_SEL(0)));
6118 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
6119 	radeon_ring_write(ring, 0);
6120 
6121 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6122 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6123 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6124 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6125 
6126 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6127 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6128 				 WRITE_DATA_DST_SEL(0)));
6129 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6130 	radeon_ring_write(ring, 0);
6131 	radeon_ring_write(ring, VMID(0));
6132 
6133 	/* HDP flush */
6134 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6135 
6136 	/* bits 0-15 are the VM contexts0-15 */
6137 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6138 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6139 				 WRITE_DATA_DST_SEL(0)));
6140 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6141 	radeon_ring_write(ring, 0);
6142 	radeon_ring_write(ring, 1 << vm_id);
6143 
6144 	/* wait for the invalidate to complete */
6145 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6146 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6147 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6148 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6149 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6150 	radeon_ring_write(ring, 0);
6151 	radeon_ring_write(ring, 0); /* ref */
6152 	radeon_ring_write(ring, 0); /* mask */
6153 	radeon_ring_write(ring, 0x20); /* poll interval */
6154 
6155 	/* compute doesn't have PFP */
6156 	if (usepfp) {
6157 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6158 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6159 		radeon_ring_write(ring, 0x0);
6160 	}
6161 }
6162 
6163 /*
6164  * RLC
6165  * The RLC is a multi-purpose microengine that handles a
6166  * variety of functions, the most important of which is
6167  * the interrupt controller.
6168  */
cik_enable_gui_idle_interrupt(struct radeon_device * rdev,bool enable)6169 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6170 					  bool enable)
6171 {
6172 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
6173 
6174 	if (enable)
6175 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6176 	else
6177 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6178 	WREG32(CP_INT_CNTL_RING0, tmp);
6179 }
6180 
cik_enable_lbpw(struct radeon_device * rdev,bool enable)6181 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6182 {
6183 	u32 tmp;
6184 
6185 	tmp = RREG32(RLC_LB_CNTL);
6186 	if (enable)
6187 		tmp |= LOAD_BALANCE_ENABLE;
6188 	else
6189 		tmp &= ~LOAD_BALANCE_ENABLE;
6190 	WREG32(RLC_LB_CNTL, tmp);
6191 }
6192 
cik_wait_for_rlc_serdes(struct radeon_device * rdev)6193 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6194 {
6195 	u32 i, j, k;
6196 	u32 mask;
6197 
6198 	mutex_lock(&rdev->grbm_idx_mutex);
6199 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6200 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6201 			cik_select_se_sh(rdev, i, j);
6202 			for (k = 0; k < rdev->usec_timeout; k++) {
6203 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6204 					break;
6205 				udelay(1);
6206 			}
6207 		}
6208 	}
6209 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6210 	mutex_unlock(&rdev->grbm_idx_mutex);
6211 
6212 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6213 	for (k = 0; k < rdev->usec_timeout; k++) {
6214 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6215 			break;
6216 		udelay(1);
6217 	}
6218 }
6219 
cik_update_rlc(struct radeon_device * rdev,u32 rlc)6220 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6221 {
6222 	u32 tmp;
6223 
6224 	tmp = RREG32(RLC_CNTL);
6225 	if (tmp != rlc)
6226 		WREG32(RLC_CNTL, rlc);
6227 }
6228 
cik_halt_rlc(struct radeon_device * rdev)6229 static u32 cik_halt_rlc(struct radeon_device *rdev)
6230 {
6231 	u32 data, orig;
6232 
6233 	orig = data = RREG32(RLC_CNTL);
6234 
6235 	if (data & RLC_ENABLE) {
6236 		u32 i;
6237 
6238 		data &= ~RLC_ENABLE;
6239 		WREG32(RLC_CNTL, data);
6240 
6241 		for (i = 0; i < rdev->usec_timeout; i++) {
6242 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6243 				break;
6244 			udelay(1);
6245 		}
6246 
6247 		cik_wait_for_rlc_serdes(rdev);
6248 	}
6249 
6250 	return orig;
6251 }
6252 
cik_enter_rlc_safe_mode(struct radeon_device * rdev)6253 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6254 {
6255 	u32 tmp, i, mask;
6256 
6257 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6258 	WREG32(RLC_GPR_REG2, tmp);
6259 
6260 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6261 	for (i = 0; i < rdev->usec_timeout; i++) {
6262 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6263 			break;
6264 		udelay(1);
6265 	}
6266 
6267 	for (i = 0; i < rdev->usec_timeout; i++) {
6268 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6269 			break;
6270 		udelay(1);
6271 	}
6272 }
6273 
cik_exit_rlc_safe_mode(struct radeon_device * rdev)6274 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6275 {
6276 	u32 tmp;
6277 
6278 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6279 	WREG32(RLC_GPR_REG2, tmp);
6280 }
6281 
6282 /**
6283  * cik_rlc_stop - stop the RLC ME
6284  *
6285  * @rdev: radeon_device pointer
6286  *
6287  * Halt the RLC ME (MicroEngine) (CIK).
6288  */
cik_rlc_stop(struct radeon_device * rdev)6289 static void cik_rlc_stop(struct radeon_device *rdev)
6290 {
6291 	WREG32(RLC_CNTL, 0);
6292 
6293 	cik_enable_gui_idle_interrupt(rdev, false);
6294 
6295 	cik_wait_for_rlc_serdes(rdev);
6296 }
6297 
6298 /**
6299  * cik_rlc_start - start the RLC ME
6300  *
6301  * @rdev: radeon_device pointer
6302  *
6303  * Unhalt the RLC ME (MicroEngine) (CIK).
6304  */
cik_rlc_start(struct radeon_device * rdev)6305 static void cik_rlc_start(struct radeon_device *rdev)
6306 {
6307 	WREG32(RLC_CNTL, RLC_ENABLE);
6308 
6309 	cik_enable_gui_idle_interrupt(rdev, true);
6310 
6311 	udelay(50);
6312 }
6313 
6314 /**
6315  * cik_rlc_resume - setup the RLC hw
6316  *
6317  * @rdev: radeon_device pointer
6318  *
6319  * Initialize the RLC registers, load the ucode,
6320  * and start the RLC (CIK).
6321  * Returns 0 for success, -EINVAL if the ucode is not available.
6322  */
cik_rlc_resume(struct radeon_device * rdev)6323 static int cik_rlc_resume(struct radeon_device *rdev)
6324 {
6325 	u32 i, size, tmp;
6326 
6327 	if (!rdev->rlc_fw)
6328 		return -EINVAL;
6329 
6330 	cik_rlc_stop(rdev);
6331 
6332 	/* disable CG */
6333 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6334 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6335 
6336 	si_rlc_reset(rdev);
6337 
6338 	cik_init_pg(rdev);
6339 
6340 	cik_init_cg(rdev);
6341 
6342 	WREG32(RLC_LB_CNTR_INIT, 0);
6343 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6344 
6345 	mutex_lock(&rdev->grbm_idx_mutex);
6346 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6347 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6348 	WREG32(RLC_LB_PARAMS, 0x00600408);
6349 	WREG32(RLC_LB_CNTL, 0x80000004);
6350 	mutex_unlock(&rdev->grbm_idx_mutex);
6351 
6352 	WREG32(RLC_MC_CNTL, 0);
6353 	WREG32(RLC_UCODE_CNTL, 0);
6354 
6355 	if (rdev->new_fw) {
6356 		const struct rlc_firmware_header_v1_0 *hdr =
6357 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6358 		const __le32 *fw_data = (const __le32 *)
6359 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6360 
6361 		radeon_ucode_print_rlc_hdr(&hdr->header);
6362 
6363 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6364 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6365 		for (i = 0; i < size; i++)
6366 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6367 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6368 	} else {
6369 		const __be32 *fw_data;
6370 
6371 		switch (rdev->family) {
6372 		case CHIP_BONAIRE:
6373 		case CHIP_HAWAII:
6374 		default:
6375 			size = BONAIRE_RLC_UCODE_SIZE;
6376 			break;
6377 		case CHIP_KAVERI:
6378 			size = KV_RLC_UCODE_SIZE;
6379 			break;
6380 		case CHIP_KABINI:
6381 			size = KB_RLC_UCODE_SIZE;
6382 			break;
6383 		case CHIP_MULLINS:
6384 			size = ML_RLC_UCODE_SIZE;
6385 			break;
6386 		}
6387 
6388 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6389 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6390 		for (i = 0; i < size; i++)
6391 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6392 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6393 	}
6394 
6395 	/* XXX - find out what chips support lbpw */
6396 	cik_enable_lbpw(rdev, false);
6397 
6398 	if (rdev->family == CHIP_BONAIRE)
6399 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6400 
6401 	cik_rlc_start(rdev);
6402 
6403 	return 0;
6404 }
6405 
cik_enable_cgcg(struct radeon_device * rdev,bool enable)6406 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6407 {
6408 	u32 data, orig, tmp, tmp2;
6409 
6410 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6411 
6412 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6413 		cik_enable_gui_idle_interrupt(rdev, true);
6414 
6415 		tmp = cik_halt_rlc(rdev);
6416 
6417 		mutex_lock(&rdev->grbm_idx_mutex);
6418 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6419 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6420 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6421 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6422 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6423 		mutex_unlock(&rdev->grbm_idx_mutex);
6424 
6425 		cik_update_rlc(rdev, tmp);
6426 
6427 		data |= CGCG_EN | CGLS_EN;
6428 	} else {
6429 		cik_enable_gui_idle_interrupt(rdev, false);
6430 
6431 		RREG32(CB_CGTT_SCLK_CTRL);
6432 		RREG32(CB_CGTT_SCLK_CTRL);
6433 		RREG32(CB_CGTT_SCLK_CTRL);
6434 		RREG32(CB_CGTT_SCLK_CTRL);
6435 
6436 		data &= ~(CGCG_EN | CGLS_EN);
6437 	}
6438 
6439 	if (orig != data)
6440 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6441 
6442 }
6443 
cik_enable_mgcg(struct radeon_device * rdev,bool enable)6444 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6445 {
6446 	u32 data, orig, tmp = 0;
6447 
6448 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6449 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6450 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6451 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6452 				data |= CP_MEM_LS_EN;
6453 				if (orig != data)
6454 					WREG32(CP_MEM_SLP_CNTL, data);
6455 			}
6456 		}
6457 
6458 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6459 		data |= 0x00000001;
6460 		data &= 0xfffffffd;
6461 		if (orig != data)
6462 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6463 
6464 		tmp = cik_halt_rlc(rdev);
6465 
6466 		mutex_lock(&rdev->grbm_idx_mutex);
6467 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6468 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6469 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6470 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6471 		WREG32(RLC_SERDES_WR_CTRL, data);
6472 		mutex_unlock(&rdev->grbm_idx_mutex);
6473 
6474 		cik_update_rlc(rdev, tmp);
6475 
6476 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6477 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6478 			data &= ~SM_MODE_MASK;
6479 			data |= SM_MODE(0x2);
6480 			data |= SM_MODE_ENABLE;
6481 			data &= ~CGTS_OVERRIDE;
6482 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6483 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6484 				data &= ~CGTS_LS_OVERRIDE;
6485 			data &= ~ON_MONITOR_ADD_MASK;
6486 			data |= ON_MONITOR_ADD_EN;
6487 			data |= ON_MONITOR_ADD(0x96);
6488 			if (orig != data)
6489 				WREG32(CGTS_SM_CTRL_REG, data);
6490 		}
6491 	} else {
6492 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6493 		data |= 0x00000003;
6494 		if (orig != data)
6495 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6496 
6497 		data = RREG32(RLC_MEM_SLP_CNTL);
6498 		if (data & RLC_MEM_LS_EN) {
6499 			data &= ~RLC_MEM_LS_EN;
6500 			WREG32(RLC_MEM_SLP_CNTL, data);
6501 		}
6502 
6503 		data = RREG32(CP_MEM_SLP_CNTL);
6504 		if (data & CP_MEM_LS_EN) {
6505 			data &= ~CP_MEM_LS_EN;
6506 			WREG32(CP_MEM_SLP_CNTL, data);
6507 		}
6508 
6509 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6510 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6511 		if (orig != data)
6512 			WREG32(CGTS_SM_CTRL_REG, data);
6513 
6514 		tmp = cik_halt_rlc(rdev);
6515 
6516 		mutex_lock(&rdev->grbm_idx_mutex);
6517 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6518 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6519 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6520 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6521 		WREG32(RLC_SERDES_WR_CTRL, data);
6522 		mutex_unlock(&rdev->grbm_idx_mutex);
6523 
6524 		cik_update_rlc(rdev, tmp);
6525 	}
6526 }
6527 
6528 static const u32 mc_cg_registers[] =
6529 {
6530 	MC_HUB_MISC_HUB_CG,
6531 	MC_HUB_MISC_SIP_CG,
6532 	MC_HUB_MISC_VM_CG,
6533 	MC_XPB_CLK_GAT,
6534 	ATC_MISC_CG,
6535 	MC_CITF_MISC_WR_CG,
6536 	MC_CITF_MISC_RD_CG,
6537 	MC_CITF_MISC_VM_CG,
6538 	VM_L2_CG,
6539 };
6540 
cik_enable_mc_ls(struct radeon_device * rdev,bool enable)6541 static void cik_enable_mc_ls(struct radeon_device *rdev,
6542 			     bool enable)
6543 {
6544 	int i;
6545 	u32 orig, data;
6546 
6547 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6548 		orig = data = RREG32(mc_cg_registers[i]);
6549 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6550 			data |= MC_LS_ENABLE;
6551 		else
6552 			data &= ~MC_LS_ENABLE;
6553 		if (data != orig)
6554 			WREG32(mc_cg_registers[i], data);
6555 	}
6556 }
6557 
cik_enable_mc_mgcg(struct radeon_device * rdev,bool enable)6558 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6559 			       bool enable)
6560 {
6561 	int i;
6562 	u32 orig, data;
6563 
6564 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6565 		orig = data = RREG32(mc_cg_registers[i]);
6566 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6567 			data |= MC_CG_ENABLE;
6568 		else
6569 			data &= ~MC_CG_ENABLE;
6570 		if (data != orig)
6571 			WREG32(mc_cg_registers[i], data);
6572 	}
6573 }
6574 
cik_enable_sdma_mgcg(struct radeon_device * rdev,bool enable)6575 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6576 				 bool enable)
6577 {
6578 	u32 orig, data;
6579 
6580 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6581 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6582 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6583 	} else {
6584 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6585 		data |= 0xff000000;
6586 		if (data != orig)
6587 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6588 
6589 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6590 		data |= 0xff000000;
6591 		if (data != orig)
6592 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6593 	}
6594 }
6595 
cik_enable_sdma_mgls(struct radeon_device * rdev,bool enable)6596 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6597 				 bool enable)
6598 {
6599 	u32 orig, data;
6600 
6601 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6602 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6603 		data |= 0x100;
6604 		if (orig != data)
6605 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6606 
6607 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6608 		data |= 0x100;
6609 		if (orig != data)
6610 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6611 	} else {
6612 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6613 		data &= ~0x100;
6614 		if (orig != data)
6615 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6616 
6617 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6618 		data &= ~0x100;
6619 		if (orig != data)
6620 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6621 	}
6622 }
6623 
cik_enable_uvd_mgcg(struct radeon_device * rdev,bool enable)6624 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6625 				bool enable)
6626 {
6627 	u32 orig, data;
6628 
6629 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6630 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6631 		data = 0xfff;
6632 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6633 
6634 		orig = data = RREG32(UVD_CGC_CTRL);
6635 		data |= DCM;
6636 		if (orig != data)
6637 			WREG32(UVD_CGC_CTRL, data);
6638 	} else {
6639 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6640 		data &= ~0xfff;
6641 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6642 
6643 		orig = data = RREG32(UVD_CGC_CTRL);
6644 		data &= ~DCM;
6645 		if (orig != data)
6646 			WREG32(UVD_CGC_CTRL, data);
6647 	}
6648 }
6649 
cik_enable_bif_mgls(struct radeon_device * rdev,bool enable)6650 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6651 			       bool enable)
6652 {
6653 	u32 orig, data;
6654 
6655 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6656 
6657 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6658 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6659 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6660 	else
6661 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6662 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6663 
6664 	if (orig != data)
6665 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6666 }
6667 
cik_enable_hdp_mgcg(struct radeon_device * rdev,bool enable)6668 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6669 				bool enable)
6670 {
6671 	u32 orig, data;
6672 
6673 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6674 
6675 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6676 		data &= ~CLOCK_GATING_DIS;
6677 	else
6678 		data |= CLOCK_GATING_DIS;
6679 
6680 	if (orig != data)
6681 		WREG32(HDP_HOST_PATH_CNTL, data);
6682 }
6683 
cik_enable_hdp_ls(struct radeon_device * rdev,bool enable)6684 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6685 			      bool enable)
6686 {
6687 	u32 orig, data;
6688 
6689 	orig = data = RREG32(HDP_MEM_POWER_LS);
6690 
6691 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6692 		data |= HDP_LS_ENABLE;
6693 	else
6694 		data &= ~HDP_LS_ENABLE;
6695 
6696 	if (orig != data)
6697 		WREG32(HDP_MEM_POWER_LS, data);
6698 }
6699 
cik_update_cg(struct radeon_device * rdev,u32 block,bool enable)6700 void cik_update_cg(struct radeon_device *rdev,
6701 		   u32 block, bool enable)
6702 {
6703 
6704 	if (block & RADEON_CG_BLOCK_GFX) {
6705 		cik_enable_gui_idle_interrupt(rdev, false);
6706 		/* order matters! */
6707 		if (enable) {
6708 			cik_enable_mgcg(rdev, true);
6709 			cik_enable_cgcg(rdev, true);
6710 		} else {
6711 			cik_enable_cgcg(rdev, false);
6712 			cik_enable_mgcg(rdev, false);
6713 		}
6714 		cik_enable_gui_idle_interrupt(rdev, true);
6715 	}
6716 
6717 	if (block & RADEON_CG_BLOCK_MC) {
6718 		if (!(rdev->flags & RADEON_IS_IGP)) {
6719 			cik_enable_mc_mgcg(rdev, enable);
6720 			cik_enable_mc_ls(rdev, enable);
6721 		}
6722 	}
6723 
6724 	if (block & RADEON_CG_BLOCK_SDMA) {
6725 		cik_enable_sdma_mgcg(rdev, enable);
6726 		cik_enable_sdma_mgls(rdev, enable);
6727 	}
6728 
6729 	if (block & RADEON_CG_BLOCK_BIF) {
6730 		cik_enable_bif_mgls(rdev, enable);
6731 	}
6732 
6733 	if (block & RADEON_CG_BLOCK_UVD) {
6734 		if (rdev->has_uvd)
6735 			cik_enable_uvd_mgcg(rdev, enable);
6736 	}
6737 
6738 	if (block & RADEON_CG_BLOCK_HDP) {
6739 		cik_enable_hdp_mgcg(rdev, enable);
6740 		cik_enable_hdp_ls(rdev, enable);
6741 	}
6742 
6743 	if (block & RADEON_CG_BLOCK_VCE) {
6744 		vce_v2_0_enable_mgcg(rdev, enable);
6745 	}
6746 }
6747 
cik_init_cg(struct radeon_device * rdev)6748 static void cik_init_cg(struct radeon_device *rdev)
6749 {
6750 
6751 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6752 
6753 	if (rdev->has_uvd)
6754 		si_init_uvd_internal_cg(rdev);
6755 
6756 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6757 			     RADEON_CG_BLOCK_SDMA |
6758 			     RADEON_CG_BLOCK_BIF |
6759 			     RADEON_CG_BLOCK_UVD |
6760 			     RADEON_CG_BLOCK_HDP), true);
6761 }
6762 
cik_fini_cg(struct radeon_device * rdev)6763 static void cik_fini_cg(struct radeon_device *rdev)
6764 {
6765 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6766 			     RADEON_CG_BLOCK_SDMA |
6767 			     RADEON_CG_BLOCK_BIF |
6768 			     RADEON_CG_BLOCK_UVD |
6769 			     RADEON_CG_BLOCK_HDP), false);
6770 
6771 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6772 }
6773 
cik_enable_sck_slowdown_on_pu(struct radeon_device * rdev,bool enable)6774 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6775 					  bool enable)
6776 {
6777 	u32 data, orig;
6778 
6779 	orig = data = RREG32(RLC_PG_CNTL);
6780 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6781 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6782 	else
6783 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6784 	if (orig != data)
6785 		WREG32(RLC_PG_CNTL, data);
6786 }
6787 
cik_enable_sck_slowdown_on_pd(struct radeon_device * rdev,bool enable)6788 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6789 					  bool enable)
6790 {
6791 	u32 data, orig;
6792 
6793 	orig = data = RREG32(RLC_PG_CNTL);
6794 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6795 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6796 	else
6797 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6798 	if (orig != data)
6799 		WREG32(RLC_PG_CNTL, data);
6800 }
6801 
cik_enable_cp_pg(struct radeon_device * rdev,bool enable)6802 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6803 {
6804 	u32 data, orig;
6805 
6806 	orig = data = RREG32(RLC_PG_CNTL);
6807 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6808 		data &= ~DISABLE_CP_PG;
6809 	else
6810 		data |= DISABLE_CP_PG;
6811 	if (orig != data)
6812 		WREG32(RLC_PG_CNTL, data);
6813 }
6814 
cik_enable_gds_pg(struct radeon_device * rdev,bool enable)6815 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6816 {
6817 	u32 data, orig;
6818 
6819 	orig = data = RREG32(RLC_PG_CNTL);
6820 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6821 		data &= ~DISABLE_GDS_PG;
6822 	else
6823 		data |= DISABLE_GDS_PG;
6824 	if (orig != data)
6825 		WREG32(RLC_PG_CNTL, data);
6826 }
6827 
6828 #define CP_ME_TABLE_SIZE    96
6829 #define CP_ME_TABLE_OFFSET  2048
6830 #define CP_MEC_TABLE_OFFSET 4096
6831 
cik_init_cp_pg_table(struct radeon_device * rdev)6832 void cik_init_cp_pg_table(struct radeon_device *rdev)
6833 {
6834 	volatile u32 *dst_ptr;
6835 	int me, i, max_me = 4;
6836 	u32 bo_offset = 0;
6837 	u32 table_offset, table_size;
6838 
6839 	if (rdev->family == CHIP_KAVERI)
6840 		max_me = 5;
6841 
6842 	if (rdev->rlc.cp_table_ptr == NULL)
6843 		return;
6844 
6845 	/* write the cp table buffer */
6846 	dst_ptr = rdev->rlc.cp_table_ptr;
6847 	for (me = 0; me < max_me; me++) {
6848 		if (rdev->new_fw) {
6849 			const __le32 *fw_data;
6850 			const struct gfx_firmware_header_v1_0 *hdr;
6851 
6852 			if (me == 0) {
6853 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6854 				fw_data = (const __le32 *)
6855 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6856 				table_offset = le32_to_cpu(hdr->jt_offset);
6857 				table_size = le32_to_cpu(hdr->jt_size);
6858 			} else if (me == 1) {
6859 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6860 				fw_data = (const __le32 *)
6861 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6862 				table_offset = le32_to_cpu(hdr->jt_offset);
6863 				table_size = le32_to_cpu(hdr->jt_size);
6864 			} else if (me == 2) {
6865 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6866 				fw_data = (const __le32 *)
6867 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6868 				table_offset = le32_to_cpu(hdr->jt_offset);
6869 				table_size = le32_to_cpu(hdr->jt_size);
6870 			} else if (me == 3) {
6871 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6872 				fw_data = (const __le32 *)
6873 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6874 				table_offset = le32_to_cpu(hdr->jt_offset);
6875 				table_size = le32_to_cpu(hdr->jt_size);
6876 			} else {
6877 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6878 				fw_data = (const __le32 *)
6879 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6880 				table_offset = le32_to_cpu(hdr->jt_offset);
6881 				table_size = le32_to_cpu(hdr->jt_size);
6882 			}
6883 
6884 			for (i = 0; i < table_size; i ++) {
6885 				dst_ptr[bo_offset + i] =
6886 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6887 			}
6888 			bo_offset += table_size;
6889 		} else {
6890 			const __be32 *fw_data;
6891 			table_size = CP_ME_TABLE_SIZE;
6892 
6893 			if (me == 0) {
6894 				fw_data = (const __be32 *)rdev->ce_fw->data;
6895 				table_offset = CP_ME_TABLE_OFFSET;
6896 			} else if (me == 1) {
6897 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6898 				table_offset = CP_ME_TABLE_OFFSET;
6899 			} else if (me == 2) {
6900 				fw_data = (const __be32 *)rdev->me_fw->data;
6901 				table_offset = CP_ME_TABLE_OFFSET;
6902 			} else {
6903 				fw_data = (const __be32 *)rdev->mec_fw->data;
6904 				table_offset = CP_MEC_TABLE_OFFSET;
6905 			}
6906 
6907 			for (i = 0; i < table_size; i ++) {
6908 				dst_ptr[bo_offset + i] =
6909 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6910 			}
6911 			bo_offset += table_size;
6912 		}
6913 	}
6914 }
6915 
cik_enable_gfx_cgpg(struct radeon_device * rdev,bool enable)6916 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6917 				bool enable)
6918 {
6919 	u32 data, orig;
6920 
6921 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6922 		orig = data = RREG32(RLC_PG_CNTL);
6923 		data |= GFX_PG_ENABLE;
6924 		if (orig != data)
6925 			WREG32(RLC_PG_CNTL, data);
6926 
6927 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6928 		data |= AUTO_PG_EN;
6929 		if (orig != data)
6930 			WREG32(RLC_AUTO_PG_CTRL, data);
6931 	} else {
6932 		orig = data = RREG32(RLC_PG_CNTL);
6933 		data &= ~GFX_PG_ENABLE;
6934 		if (orig != data)
6935 			WREG32(RLC_PG_CNTL, data);
6936 
6937 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6938 		data &= ~AUTO_PG_EN;
6939 		if (orig != data)
6940 			WREG32(RLC_AUTO_PG_CTRL, data);
6941 
6942 		data = RREG32(DB_RENDER_CONTROL);
6943 	}
6944 }
6945 
cik_get_cu_active_bitmap(struct radeon_device * rdev,u32 se,u32 sh)6946 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6947 {
6948 	u32 mask = 0, tmp, tmp1;
6949 	int i;
6950 
6951 	mutex_lock(&rdev->grbm_idx_mutex);
6952 	cik_select_se_sh(rdev, se, sh);
6953 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6954 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6955 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6956 	mutex_unlock(&rdev->grbm_idx_mutex);
6957 
6958 	tmp &= 0xffff0000;
6959 
6960 	tmp |= tmp1;
6961 	tmp >>= 16;
6962 
6963 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6964 		mask <<= 1;
6965 		mask |= 1;
6966 	}
6967 
6968 	return (~tmp) & mask;
6969 }
6970 
cik_init_ao_cu_mask(struct radeon_device * rdev)6971 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6972 {
6973 	u32 i, j, k, active_cu_number = 0;
6974 	u32 mask, counter, cu_bitmap;
6975 	u32 tmp = 0;
6976 
6977 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6978 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6979 			mask = 1;
6980 			cu_bitmap = 0;
6981 			counter = 0;
6982 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6983 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6984 					if (counter < 2)
6985 						cu_bitmap |= mask;
6986 					counter ++;
6987 				}
6988 				mask <<= 1;
6989 			}
6990 
6991 			active_cu_number += counter;
6992 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6993 		}
6994 	}
6995 
6996 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6997 
6998 	tmp = RREG32(RLC_MAX_PG_CU);
6999 	tmp &= ~MAX_PU_CU_MASK;
7000 	tmp |= MAX_PU_CU(active_cu_number);
7001 	WREG32(RLC_MAX_PG_CU, tmp);
7002 }
7003 
cik_enable_gfx_static_mgpg(struct radeon_device * rdev,bool enable)7004 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
7005 				       bool enable)
7006 {
7007 	u32 data, orig;
7008 
7009 	orig = data = RREG32(RLC_PG_CNTL);
7010 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
7011 		data |= STATIC_PER_CU_PG_ENABLE;
7012 	else
7013 		data &= ~STATIC_PER_CU_PG_ENABLE;
7014 	if (orig != data)
7015 		WREG32(RLC_PG_CNTL, data);
7016 }
7017 
cik_enable_gfx_dynamic_mgpg(struct radeon_device * rdev,bool enable)7018 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
7019 					bool enable)
7020 {
7021 	u32 data, orig;
7022 
7023 	orig = data = RREG32(RLC_PG_CNTL);
7024 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
7025 		data |= DYN_PER_CU_PG_ENABLE;
7026 	else
7027 		data &= ~DYN_PER_CU_PG_ENABLE;
7028 	if (orig != data)
7029 		WREG32(RLC_PG_CNTL, data);
7030 }
7031 
7032 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
7033 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
7034 
cik_init_gfx_cgpg(struct radeon_device * rdev)7035 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
7036 {
7037 	u32 data, orig;
7038 	u32 i;
7039 
7040 	if (rdev->rlc.cs_data) {
7041 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7042 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
7043 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
7044 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
7045 	} else {
7046 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7047 		for (i = 0; i < 3; i++)
7048 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
7049 	}
7050 	if (rdev->rlc.reg_list) {
7051 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
7052 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
7053 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
7054 	}
7055 
7056 	orig = data = RREG32(RLC_PG_CNTL);
7057 	data |= GFX_PG_SRC;
7058 	if (orig != data)
7059 		WREG32(RLC_PG_CNTL, data);
7060 
7061 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
7062 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
7063 
7064 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
7065 	data &= ~IDLE_POLL_COUNT_MASK;
7066 	data |= IDLE_POLL_COUNT(0x60);
7067 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
7068 
7069 	data = 0x10101010;
7070 	WREG32(RLC_PG_DELAY, data);
7071 
7072 	data = RREG32(RLC_PG_DELAY_2);
7073 	data &= ~0xff;
7074 	data |= 0x3;
7075 	WREG32(RLC_PG_DELAY_2, data);
7076 
7077 	data = RREG32(RLC_AUTO_PG_CTRL);
7078 	data &= ~GRBM_REG_SGIT_MASK;
7079 	data |= GRBM_REG_SGIT(0x700);
7080 	WREG32(RLC_AUTO_PG_CTRL, data);
7081 
7082 }
7083 
cik_update_gfx_pg(struct radeon_device * rdev,bool enable)7084 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
7085 {
7086 	cik_enable_gfx_cgpg(rdev, enable);
7087 	cik_enable_gfx_static_mgpg(rdev, enable);
7088 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
7089 }
7090 
cik_get_csb_size(struct radeon_device * rdev)7091 u32 cik_get_csb_size(struct radeon_device *rdev)
7092 {
7093 	u32 count = 0;
7094 	const struct cs_section_def *sect = NULL;
7095 	const struct cs_extent_def *ext = NULL;
7096 
7097 	if (rdev->rlc.cs_data == NULL)
7098 		return 0;
7099 
7100 	/* begin clear state */
7101 	count += 2;
7102 	/* context control state */
7103 	count += 3;
7104 
7105 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7106 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7107 			if (sect->id == SECT_CONTEXT)
7108 				count += 2 + ext->reg_count;
7109 			else
7110 				return 0;
7111 		}
7112 	}
7113 	/* pa_sc_raster_config/pa_sc_raster_config1 */
7114 	count += 4;
7115 	/* end clear state */
7116 	count += 2;
7117 	/* clear state */
7118 	count += 2;
7119 
7120 	return count;
7121 }
7122 
cik_get_csb_buffer(struct radeon_device * rdev,volatile u32 * buffer)7123 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7124 {
7125 	u32 count = 0, i;
7126 	const struct cs_section_def *sect = NULL;
7127 	const struct cs_extent_def *ext = NULL;
7128 
7129 	if (rdev->rlc.cs_data == NULL)
7130 		return;
7131 	if (buffer == NULL)
7132 		return;
7133 
7134 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7135 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7136 
7137 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7138 	buffer[count++] = cpu_to_le32(0x80000000);
7139 	buffer[count++] = cpu_to_le32(0x80000000);
7140 
7141 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7142 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7143 			if (sect->id == SECT_CONTEXT) {
7144 				buffer[count++] =
7145 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7146 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7147 				for (i = 0; i < ext->reg_count; i++)
7148 					buffer[count++] = cpu_to_le32(ext->extent[i]);
7149 			} else {
7150 				return;
7151 			}
7152 		}
7153 	}
7154 
7155 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7156 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7157 	switch (rdev->family) {
7158 	case CHIP_BONAIRE:
7159 		buffer[count++] = cpu_to_le32(0x16000012);
7160 		buffer[count++] = cpu_to_le32(0x00000000);
7161 		break;
7162 	case CHIP_KAVERI:
7163 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7164 		buffer[count++] = cpu_to_le32(0x00000000);
7165 		break;
7166 	case CHIP_KABINI:
7167 	case CHIP_MULLINS:
7168 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7169 		buffer[count++] = cpu_to_le32(0x00000000);
7170 		break;
7171 	case CHIP_HAWAII:
7172 		buffer[count++] = cpu_to_le32(0x3a00161a);
7173 		buffer[count++] = cpu_to_le32(0x0000002e);
7174 		break;
7175 	default:
7176 		buffer[count++] = cpu_to_le32(0x00000000);
7177 		buffer[count++] = cpu_to_le32(0x00000000);
7178 		break;
7179 	}
7180 
7181 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7182 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7183 
7184 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7185 	buffer[count++] = cpu_to_le32(0);
7186 }
7187 
cik_init_pg(struct radeon_device * rdev)7188 static void cik_init_pg(struct radeon_device *rdev)
7189 {
7190 	if (rdev->pg_flags) {
7191 		cik_enable_sck_slowdown_on_pu(rdev, true);
7192 		cik_enable_sck_slowdown_on_pd(rdev, true);
7193 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7194 			cik_init_gfx_cgpg(rdev);
7195 			cik_enable_cp_pg(rdev, true);
7196 			cik_enable_gds_pg(rdev, true);
7197 		}
7198 		cik_init_ao_cu_mask(rdev);
7199 		cik_update_gfx_pg(rdev, true);
7200 	}
7201 }
7202 
cik_fini_pg(struct radeon_device * rdev)7203 static void cik_fini_pg(struct radeon_device *rdev)
7204 {
7205 	if (rdev->pg_flags) {
7206 		cik_update_gfx_pg(rdev, false);
7207 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7208 			cik_enable_cp_pg(rdev, false);
7209 			cik_enable_gds_pg(rdev, false);
7210 		}
7211 	}
7212 }
7213 
7214 /*
7215  * Interrupts
7216  * Starting with r6xx, interrupts are handled via a ring buffer.
7217  * Ring buffers are areas of GPU accessible memory that the GPU
7218  * writes interrupt vectors into and the host reads vectors out of.
7219  * There is a rptr (read pointer) that determines where the
7220  * host is currently reading, and a wptr (write pointer)
7221  * which determines where the GPU has written.  When the
7222  * pointers are equal, the ring is idle.  When the GPU
7223  * writes vectors to the ring buffer, it increments the
7224  * wptr.  When there is an interrupt, the host then starts
7225  * fetching commands and processing them until the pointers are
7226  * equal again at which point it updates the rptr.
7227  */
7228 
7229 /**
7230  * cik_enable_interrupts - Enable the interrupt ring buffer
7231  *
7232  * @rdev: radeon_device pointer
7233  *
7234  * Enable the interrupt ring buffer (CIK).
7235  */
cik_enable_interrupts(struct radeon_device * rdev)7236 static void cik_enable_interrupts(struct radeon_device *rdev)
7237 {
7238 	u32 ih_cntl = RREG32(IH_CNTL);
7239 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7240 
7241 	ih_cntl |= ENABLE_INTR;
7242 	ih_rb_cntl |= IH_RB_ENABLE;
7243 	WREG32(IH_CNTL, ih_cntl);
7244 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7245 	rdev->ih.enabled = true;
7246 }
7247 
7248 /**
7249  * cik_disable_interrupts - Disable the interrupt ring buffer
7250  *
7251  * @rdev: radeon_device pointer
7252  *
7253  * Disable the interrupt ring buffer (CIK).
7254  */
cik_disable_interrupts(struct radeon_device * rdev)7255 static void cik_disable_interrupts(struct radeon_device *rdev)
7256 {
7257 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7258 	u32 ih_cntl = RREG32(IH_CNTL);
7259 
7260 	ih_rb_cntl &= ~IH_RB_ENABLE;
7261 	ih_cntl &= ~ENABLE_INTR;
7262 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7263 	WREG32(IH_CNTL, ih_cntl);
7264 	/* set rptr, wptr to 0 */
7265 	WREG32(IH_RB_RPTR, 0);
7266 	WREG32(IH_RB_WPTR, 0);
7267 	rdev->ih.enabled = false;
7268 	rdev->ih.rptr = 0;
7269 }
7270 
7271 /**
7272  * cik_disable_interrupt_state - Disable all interrupt sources
7273  *
7274  * @rdev: radeon_device pointer
7275  *
7276  * Clear all interrupt enable bits used by the driver (CIK).
7277  */
cik_disable_interrupt_state(struct radeon_device * rdev)7278 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7279 {
7280 	u32 tmp;
7281 
7282 	/* gfx ring */
7283 	tmp = RREG32(CP_INT_CNTL_RING0) &
7284 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7285 	WREG32(CP_INT_CNTL_RING0, tmp);
7286 	/* sdma */
7287 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7288 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7289 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7290 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7291 	/* compute queues */
7292 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7293 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7294 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7295 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7296 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7297 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7298 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7299 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7300 	/* grbm */
7301 	WREG32(GRBM_INT_CNTL, 0);
7302 	/* SRBM */
7303 	WREG32(SRBM_INT_CNTL, 0);
7304 	/* vline/vblank, etc. */
7305 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7306 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7307 	if (rdev->num_crtc >= 4) {
7308 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7309 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7310 	}
7311 	if (rdev->num_crtc >= 6) {
7312 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7313 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7314 	}
7315 	/* pflip */
7316 	if (rdev->num_crtc >= 2) {
7317 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7318 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7319 	}
7320 	if (rdev->num_crtc >= 4) {
7321 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7322 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7323 	}
7324 	if (rdev->num_crtc >= 6) {
7325 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7326 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7327 	}
7328 
7329 	/* dac hotplug */
7330 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7331 
7332 	/* digital hotplug */
7333 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7334 	WREG32(DC_HPD1_INT_CONTROL, tmp);
7335 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7336 	WREG32(DC_HPD2_INT_CONTROL, tmp);
7337 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7338 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7339 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7340 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7341 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7342 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7343 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7344 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7345 
7346 }
7347 
7348 /**
7349  * cik_irq_init - init and enable the interrupt ring
7350  *
7351  * @rdev: radeon_device pointer
7352  *
7353  * Allocate a ring buffer for the interrupt controller,
7354  * enable the RLC, disable interrupts, enable the IH
7355  * ring buffer and enable it (CIK).
7356  * Called at device load and reume.
7357  * Returns 0 for success, errors for failure.
7358  */
cik_irq_init(struct radeon_device * rdev)7359 static int cik_irq_init(struct radeon_device *rdev)
7360 {
7361 	int ret = 0;
7362 	int rb_bufsz;
7363 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7364 
7365 	/* allocate ring */
7366 	ret = r600_ih_ring_alloc(rdev);
7367 	if (ret)
7368 		return ret;
7369 
7370 	/* disable irqs */
7371 	cik_disable_interrupts(rdev);
7372 
7373 	/* init rlc */
7374 	ret = cik_rlc_resume(rdev);
7375 	if (ret) {
7376 		r600_ih_ring_fini(rdev);
7377 		return ret;
7378 	}
7379 
7380 	/* setup interrupt control */
7381 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7382 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7383 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7384 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7385 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7386 	 */
7387 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7388 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7389 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7390 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7391 
7392 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7393 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7394 
7395 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7396 		      IH_WPTR_OVERFLOW_CLEAR |
7397 		      (rb_bufsz << 1));
7398 
7399 	if (rdev->wb.enabled)
7400 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7401 
7402 	/* set the writeback address whether it's enabled or not */
7403 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7404 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7405 
7406 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7407 
7408 	/* set rptr, wptr to 0 */
7409 	WREG32(IH_RB_RPTR, 0);
7410 	WREG32(IH_RB_WPTR, 0);
7411 
7412 	/* Default settings for IH_CNTL (disabled at first) */
7413 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7414 	/* RPTR_REARM only works if msi's are enabled */
7415 	if (rdev->msi_enabled)
7416 		ih_cntl |= RPTR_REARM;
7417 	WREG32(IH_CNTL, ih_cntl);
7418 
7419 	/* force the active interrupt state to all disabled */
7420 	cik_disable_interrupt_state(rdev);
7421 
7422 	pci_set_master(rdev->pdev);
7423 
7424 	/* enable irqs */
7425 	cik_enable_interrupts(rdev);
7426 
7427 	return ret;
7428 }
7429 
7430 /**
7431  * cik_irq_set - enable/disable interrupt sources
7432  *
7433  * @rdev: radeon_device pointer
7434  *
7435  * Enable interrupt sources on the GPU (vblanks, hpd,
7436  * etc.) (CIK).
7437  * Returns 0 for success, errors for failure.
7438  */
cik_irq_set(struct radeon_device * rdev)7439 int cik_irq_set(struct radeon_device *rdev)
7440 {
7441 	u32 cp_int_cntl;
7442 	u32 cp_m1p0;
7443 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7444 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7445 	u32 grbm_int_cntl = 0;
7446 	u32 dma_cntl, dma_cntl1;
7447 
7448 	if (!rdev->irq.installed) {
7449 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7450 		return -EINVAL;
7451 	}
7452 	/* don't enable anything if the ih is disabled */
7453 	if (!rdev->ih.enabled) {
7454 		cik_disable_interrupts(rdev);
7455 		/* force the active interrupt state to all disabled */
7456 		cik_disable_interrupt_state(rdev);
7457 		return 0;
7458 	}
7459 
7460 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7461 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7462 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7463 
7464 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7465 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7466 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7467 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7468 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7469 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7470 
7471 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7472 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7473 
7474 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7475 
7476 	/* enable CP interrupts on all rings */
7477 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7478 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7479 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7480 	}
7481 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7482 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7483 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7484 		if (ring->me == 1) {
7485 			switch (ring->pipe) {
7486 			case 0:
7487 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7488 				break;
7489 			default:
7490 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7491 				break;
7492 			}
7493 		} else {
7494 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7495 		}
7496 	}
7497 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7498 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7499 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7500 		if (ring->me == 1) {
7501 			switch (ring->pipe) {
7502 			case 0:
7503 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7504 				break;
7505 			default:
7506 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7507 				break;
7508 			}
7509 		} else {
7510 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7511 		}
7512 	}
7513 
7514 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7515 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7516 		dma_cntl |= TRAP_ENABLE;
7517 	}
7518 
7519 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7520 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7521 		dma_cntl1 |= TRAP_ENABLE;
7522 	}
7523 
7524 	if (rdev->irq.crtc_vblank_int[0] ||
7525 	    atomic_read(&rdev->irq.pflip[0])) {
7526 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7527 		crtc1 |= VBLANK_INTERRUPT_MASK;
7528 	}
7529 	if (rdev->irq.crtc_vblank_int[1] ||
7530 	    atomic_read(&rdev->irq.pflip[1])) {
7531 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7532 		crtc2 |= VBLANK_INTERRUPT_MASK;
7533 	}
7534 	if (rdev->irq.crtc_vblank_int[2] ||
7535 	    atomic_read(&rdev->irq.pflip[2])) {
7536 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7537 		crtc3 |= VBLANK_INTERRUPT_MASK;
7538 	}
7539 	if (rdev->irq.crtc_vblank_int[3] ||
7540 	    atomic_read(&rdev->irq.pflip[3])) {
7541 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7542 		crtc4 |= VBLANK_INTERRUPT_MASK;
7543 	}
7544 	if (rdev->irq.crtc_vblank_int[4] ||
7545 	    atomic_read(&rdev->irq.pflip[4])) {
7546 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7547 		crtc5 |= VBLANK_INTERRUPT_MASK;
7548 	}
7549 	if (rdev->irq.crtc_vblank_int[5] ||
7550 	    atomic_read(&rdev->irq.pflip[5])) {
7551 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7552 		crtc6 |= VBLANK_INTERRUPT_MASK;
7553 	}
7554 	if (rdev->irq.hpd[0]) {
7555 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7556 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7557 	}
7558 	if (rdev->irq.hpd[1]) {
7559 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7560 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7561 	}
7562 	if (rdev->irq.hpd[2]) {
7563 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7564 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7565 	}
7566 	if (rdev->irq.hpd[3]) {
7567 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7568 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7569 	}
7570 	if (rdev->irq.hpd[4]) {
7571 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7572 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7573 	}
7574 	if (rdev->irq.hpd[5]) {
7575 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7576 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7577 	}
7578 
7579 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7580 
7581 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7582 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7583 
7584 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7585 
7586 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7587 
7588 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7589 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7590 	if (rdev->num_crtc >= 4) {
7591 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7592 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7593 	}
7594 	if (rdev->num_crtc >= 6) {
7595 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7596 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7597 	}
7598 
7599 	if (rdev->num_crtc >= 2) {
7600 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7601 		       GRPH_PFLIP_INT_MASK);
7602 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7603 		       GRPH_PFLIP_INT_MASK);
7604 	}
7605 	if (rdev->num_crtc >= 4) {
7606 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7607 		       GRPH_PFLIP_INT_MASK);
7608 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7609 		       GRPH_PFLIP_INT_MASK);
7610 	}
7611 	if (rdev->num_crtc >= 6) {
7612 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7613 		       GRPH_PFLIP_INT_MASK);
7614 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7615 		       GRPH_PFLIP_INT_MASK);
7616 	}
7617 
7618 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7619 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7620 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7621 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7622 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7623 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7624 
7625 	/* posting read */
7626 	RREG32(SRBM_STATUS);
7627 
7628 	return 0;
7629 }
7630 
7631 /**
7632  * cik_irq_ack - ack interrupt sources
7633  *
7634  * @rdev: radeon_device pointer
7635  *
7636  * Ack interrupt sources on the GPU (vblanks, hpd,
7637  * etc.) (CIK).  Certain interrupts sources are sw
7638  * generated and do not require an explicit ack.
7639  */
cik_irq_ack(struct radeon_device * rdev)7640 static inline void cik_irq_ack(struct radeon_device *rdev)
7641 {
7642 	u32 tmp;
7643 
7644 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7645 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7646 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7647 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7648 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7649 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7650 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7651 
7652 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7653 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7654 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7655 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7656 	if (rdev->num_crtc >= 4) {
7657 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7658 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7659 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7660 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7661 	}
7662 	if (rdev->num_crtc >= 6) {
7663 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7664 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7665 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7666 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7667 	}
7668 
7669 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7670 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7671 		       GRPH_PFLIP_INT_CLEAR);
7672 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7673 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7674 		       GRPH_PFLIP_INT_CLEAR);
7675 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7676 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7677 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7678 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7679 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7680 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7681 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7682 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7683 
7684 	if (rdev->num_crtc >= 4) {
7685 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7686 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7687 			       GRPH_PFLIP_INT_CLEAR);
7688 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7689 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7690 			       GRPH_PFLIP_INT_CLEAR);
7691 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7692 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7693 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7694 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7695 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7696 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7697 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7698 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7699 	}
7700 
7701 	if (rdev->num_crtc >= 6) {
7702 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7703 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7704 			       GRPH_PFLIP_INT_CLEAR);
7705 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7706 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7707 			       GRPH_PFLIP_INT_CLEAR);
7708 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7709 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7710 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7711 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7712 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7713 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7714 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7715 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7716 	}
7717 
7718 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7719 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7720 		tmp |= DC_HPDx_INT_ACK;
7721 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7722 	}
7723 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7724 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7725 		tmp |= DC_HPDx_INT_ACK;
7726 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7727 	}
7728 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7729 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7730 		tmp |= DC_HPDx_INT_ACK;
7731 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7732 	}
7733 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7734 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7735 		tmp |= DC_HPDx_INT_ACK;
7736 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7737 	}
7738 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7739 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7740 		tmp |= DC_HPDx_INT_ACK;
7741 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7742 	}
7743 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7744 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7745 		tmp |= DC_HPDx_INT_ACK;
7746 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7747 	}
7748 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7749 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7750 		tmp |= DC_HPDx_RX_INT_ACK;
7751 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7752 	}
7753 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7754 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7755 		tmp |= DC_HPDx_RX_INT_ACK;
7756 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7757 	}
7758 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7759 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7760 		tmp |= DC_HPDx_RX_INT_ACK;
7761 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7762 	}
7763 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7764 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7765 		tmp |= DC_HPDx_RX_INT_ACK;
7766 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7767 	}
7768 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7769 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7770 		tmp |= DC_HPDx_RX_INT_ACK;
7771 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7772 	}
7773 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7774 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7775 		tmp |= DC_HPDx_RX_INT_ACK;
7776 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7777 	}
7778 }
7779 
7780 /**
7781  * cik_irq_disable - disable interrupts
7782  *
7783  * @rdev: radeon_device pointer
7784  *
7785  * Disable interrupts on the hw (CIK).
7786  */
cik_irq_disable(struct radeon_device * rdev)7787 static void cik_irq_disable(struct radeon_device *rdev)
7788 {
7789 	cik_disable_interrupts(rdev);
7790 	/* Wait and acknowledge irq */
7791 	mdelay(1);
7792 	cik_irq_ack(rdev);
7793 	cik_disable_interrupt_state(rdev);
7794 }
7795 
7796 /**
7797  * cik_irq_disable - disable interrupts for suspend
7798  *
7799  * @rdev: radeon_device pointer
7800  *
7801  * Disable interrupts and stop the RLC (CIK).
7802  * Used for suspend.
7803  */
cik_irq_suspend(struct radeon_device * rdev)7804 static void cik_irq_suspend(struct radeon_device *rdev)
7805 {
7806 	cik_irq_disable(rdev);
7807 	cik_rlc_stop(rdev);
7808 }
7809 
7810 /**
7811  * cik_irq_fini - tear down interrupt support
7812  *
7813  * @rdev: radeon_device pointer
7814  *
7815  * Disable interrupts on the hw and free the IH ring
7816  * buffer (CIK).
7817  * Used for driver unload.
7818  */
cik_irq_fini(struct radeon_device * rdev)7819 static void cik_irq_fini(struct radeon_device *rdev)
7820 {
7821 	cik_irq_suspend(rdev);
7822 	r600_ih_ring_fini(rdev);
7823 }
7824 
7825 /**
7826  * cik_get_ih_wptr - get the IH ring buffer wptr
7827  *
7828  * @rdev: radeon_device pointer
7829  *
7830  * Get the IH ring buffer wptr from either the register
7831  * or the writeback memory buffer (CIK).  Also check for
7832  * ring buffer overflow and deal with it.
7833  * Used by cik_irq_process().
7834  * Returns the value of the wptr.
7835  */
cik_get_ih_wptr(struct radeon_device * rdev)7836 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7837 {
7838 	u32 wptr, tmp;
7839 
7840 	if (rdev->wb.enabled)
7841 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7842 	else
7843 		wptr = RREG32(IH_RB_WPTR);
7844 
7845 	if (wptr & RB_OVERFLOW) {
7846 		wptr &= ~RB_OVERFLOW;
7847 		/* When a ring buffer overflow happen start parsing interrupt
7848 		 * from the last not overwritten vector (wptr + 16). Hopefully
7849 		 * this should allow us to catchup.
7850 		 */
7851 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7852 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7853 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7854 		tmp = RREG32(IH_RB_CNTL);
7855 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7856 		WREG32(IH_RB_CNTL, tmp);
7857 	}
7858 	return (wptr & rdev->ih.ptr_mask);
7859 }
7860 
7861 /*        CIK IV Ring
7862  * Each IV ring entry is 128 bits:
7863  * [7:0]    - interrupt source id
7864  * [31:8]   - reserved
7865  * [59:32]  - interrupt source data
7866  * [63:60]  - reserved
7867  * [71:64]  - RINGID
7868  *            CP:
7869  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7870  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7871  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7872  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7873  *            PIPE_ID - ME0 0=3D
7874  *                    - ME1&2 compute dispatcher (4 pipes each)
7875  *            SDMA:
7876  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7877  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7878  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7879  * [79:72]  - VMID
7880  * [95:80]  - PASID
7881  * [127:96] - reserved
7882  */
7883 /**
7884  * cik_irq_process - interrupt handler
7885  *
7886  * @rdev: radeon_device pointer
7887  *
7888  * Interrupt hander (CIK).  Walk the IH ring,
7889  * ack interrupts and schedule work to handle
7890  * interrupt events.
7891  * Returns irq process return code.
7892  */
cik_irq_process(struct radeon_device * rdev)7893 int cik_irq_process(struct radeon_device *rdev)
7894 {
7895 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7896 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7897 	u32 wptr;
7898 	u32 rptr;
7899 	u32 src_id, src_data, ring_id;
7900 	u8 me_id, pipe_id, queue_id;
7901 	u32 ring_index;
7902 	bool queue_hotplug = false;
7903 	bool queue_dp = false;
7904 	bool queue_reset = false;
7905 	u32 addr, status, mc_client;
7906 	bool queue_thermal = false;
7907 
7908 	if (!rdev->ih.enabled || rdev->shutdown)
7909 		return IRQ_NONE;
7910 
7911 	wptr = cik_get_ih_wptr(rdev);
7912 
7913 restart_ih:
7914 	/* is somebody else already processing irqs? */
7915 	if (atomic_xchg(&rdev->ih.lock, 1))
7916 		return IRQ_NONE;
7917 
7918 	rptr = rdev->ih.rptr;
7919 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7920 
7921 	/* Order reading of wptr vs. reading of IH ring data */
7922 	rmb();
7923 
7924 	/* display interrupts */
7925 	cik_irq_ack(rdev);
7926 
7927 	while (rptr != wptr) {
7928 		/* wptr/rptr are in bytes! */
7929 		ring_index = rptr / 4;
7930 
7931 		radeon_kfd_interrupt(rdev,
7932 				(const void *) &rdev->ih.ring[ring_index]);
7933 
7934 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7935 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7936 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7937 
7938 		switch (src_id) {
7939 		case 1: /* D1 vblank/vline */
7940 			switch (src_data) {
7941 			case 0: /* D1 vblank */
7942 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7943 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7944 
7945 				if (rdev->irq.crtc_vblank_int[0]) {
7946 					drm_handle_vblank(rdev->ddev, 0);
7947 					rdev->pm.vblank_sync = true;
7948 					wake_up(&rdev->irq.vblank_queue);
7949 				}
7950 				if (atomic_read(&rdev->irq.pflip[0]))
7951 					radeon_crtc_handle_vblank(rdev, 0);
7952 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7953 				DRM_DEBUG("IH: D1 vblank\n");
7954 
7955 				break;
7956 			case 1: /* D1 vline */
7957 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7958 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7959 
7960 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7961 				DRM_DEBUG("IH: D1 vline\n");
7962 
7963 				break;
7964 			default:
7965 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7966 				break;
7967 			}
7968 			break;
7969 		case 2: /* D2 vblank/vline */
7970 			switch (src_data) {
7971 			case 0: /* D2 vblank */
7972 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7973 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7974 
7975 				if (rdev->irq.crtc_vblank_int[1]) {
7976 					drm_handle_vblank(rdev->ddev, 1);
7977 					rdev->pm.vblank_sync = true;
7978 					wake_up(&rdev->irq.vblank_queue);
7979 				}
7980 				if (atomic_read(&rdev->irq.pflip[1]))
7981 					radeon_crtc_handle_vblank(rdev, 1);
7982 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7983 				DRM_DEBUG("IH: D2 vblank\n");
7984 
7985 				break;
7986 			case 1: /* D2 vline */
7987 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7988 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7989 
7990 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7991 				DRM_DEBUG("IH: D2 vline\n");
7992 
7993 				break;
7994 			default:
7995 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7996 				break;
7997 			}
7998 			break;
7999 		case 3: /* D3 vblank/vline */
8000 			switch (src_data) {
8001 			case 0: /* D3 vblank */
8002 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
8003 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8004 
8005 				if (rdev->irq.crtc_vblank_int[2]) {
8006 					drm_handle_vblank(rdev->ddev, 2);
8007 					rdev->pm.vblank_sync = true;
8008 					wake_up(&rdev->irq.vblank_queue);
8009 				}
8010 				if (atomic_read(&rdev->irq.pflip[2]))
8011 					radeon_crtc_handle_vblank(rdev, 2);
8012 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
8013 				DRM_DEBUG("IH: D3 vblank\n");
8014 
8015 				break;
8016 			case 1: /* D3 vline */
8017 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
8018 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8019 
8020 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
8021 				DRM_DEBUG("IH: D3 vline\n");
8022 
8023 				break;
8024 			default:
8025 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8026 				break;
8027 			}
8028 			break;
8029 		case 4: /* D4 vblank/vline */
8030 			switch (src_data) {
8031 			case 0: /* D4 vblank */
8032 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
8033 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8034 
8035 				if (rdev->irq.crtc_vblank_int[3]) {
8036 					drm_handle_vblank(rdev->ddev, 3);
8037 					rdev->pm.vblank_sync = true;
8038 					wake_up(&rdev->irq.vblank_queue);
8039 				}
8040 				if (atomic_read(&rdev->irq.pflip[3]))
8041 					radeon_crtc_handle_vblank(rdev, 3);
8042 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
8043 				DRM_DEBUG("IH: D4 vblank\n");
8044 
8045 				break;
8046 			case 1: /* D4 vline */
8047 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
8048 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8049 
8050 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
8051 				DRM_DEBUG("IH: D4 vline\n");
8052 
8053 				break;
8054 			default:
8055 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8056 				break;
8057 			}
8058 			break;
8059 		case 5: /* D5 vblank/vline */
8060 			switch (src_data) {
8061 			case 0: /* D5 vblank */
8062 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
8063 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8064 
8065 				if (rdev->irq.crtc_vblank_int[4]) {
8066 					drm_handle_vblank(rdev->ddev, 4);
8067 					rdev->pm.vblank_sync = true;
8068 					wake_up(&rdev->irq.vblank_queue);
8069 				}
8070 				if (atomic_read(&rdev->irq.pflip[4]))
8071 					radeon_crtc_handle_vblank(rdev, 4);
8072 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
8073 				DRM_DEBUG("IH: D5 vblank\n");
8074 
8075 				break;
8076 			case 1: /* D5 vline */
8077 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
8078 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8079 
8080 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
8081 				DRM_DEBUG("IH: D5 vline\n");
8082 
8083 				break;
8084 			default:
8085 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8086 				break;
8087 			}
8088 			break;
8089 		case 6: /* D6 vblank/vline */
8090 			switch (src_data) {
8091 			case 0: /* D6 vblank */
8092 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
8093 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8094 
8095 				if (rdev->irq.crtc_vblank_int[5]) {
8096 					drm_handle_vblank(rdev->ddev, 5);
8097 					rdev->pm.vblank_sync = true;
8098 					wake_up(&rdev->irq.vblank_queue);
8099 				}
8100 				if (atomic_read(&rdev->irq.pflip[5]))
8101 					radeon_crtc_handle_vblank(rdev, 5);
8102 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
8103 				DRM_DEBUG("IH: D6 vblank\n");
8104 
8105 				break;
8106 			case 1: /* D6 vline */
8107 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
8108 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8109 
8110 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
8111 				DRM_DEBUG("IH: D6 vline\n");
8112 
8113 				break;
8114 			default:
8115 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8116 				break;
8117 			}
8118 			break;
8119 		case 8: /* D1 page flip */
8120 		case 10: /* D2 page flip */
8121 		case 12: /* D3 page flip */
8122 		case 14: /* D4 page flip */
8123 		case 16: /* D5 page flip */
8124 		case 18: /* D6 page flip */
8125 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8126 			if (radeon_use_pflipirq > 0)
8127 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8128 			break;
8129 		case 42: /* HPD hotplug */
8130 			switch (src_data) {
8131 			case 0:
8132 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
8133 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8134 
8135 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8136 				queue_hotplug = true;
8137 				DRM_DEBUG("IH: HPD1\n");
8138 
8139 				break;
8140 			case 1:
8141 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
8142 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8143 
8144 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8145 				queue_hotplug = true;
8146 				DRM_DEBUG("IH: HPD2\n");
8147 
8148 				break;
8149 			case 2:
8150 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
8151 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8152 
8153 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8154 				queue_hotplug = true;
8155 				DRM_DEBUG("IH: HPD3\n");
8156 
8157 				break;
8158 			case 3:
8159 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
8160 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8161 
8162 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8163 				queue_hotplug = true;
8164 				DRM_DEBUG("IH: HPD4\n");
8165 
8166 				break;
8167 			case 4:
8168 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
8169 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8170 
8171 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8172 				queue_hotplug = true;
8173 				DRM_DEBUG("IH: HPD5\n");
8174 
8175 				break;
8176 			case 5:
8177 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
8178 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8179 
8180 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8181 				queue_hotplug = true;
8182 				DRM_DEBUG("IH: HPD6\n");
8183 
8184 				break;
8185 			case 6:
8186 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
8187 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8188 
8189 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
8190 				queue_dp = true;
8191 				DRM_DEBUG("IH: HPD_RX 1\n");
8192 
8193 				break;
8194 			case 7:
8195 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
8196 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8197 
8198 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
8199 				queue_dp = true;
8200 				DRM_DEBUG("IH: HPD_RX 2\n");
8201 
8202 				break;
8203 			case 8:
8204 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
8205 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8206 
8207 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
8208 				queue_dp = true;
8209 				DRM_DEBUG("IH: HPD_RX 3\n");
8210 
8211 				break;
8212 			case 9:
8213 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
8214 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8215 
8216 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
8217 				queue_dp = true;
8218 				DRM_DEBUG("IH: HPD_RX 4\n");
8219 
8220 				break;
8221 			case 10:
8222 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
8223 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8224 
8225 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
8226 				queue_dp = true;
8227 				DRM_DEBUG("IH: HPD_RX 5\n");
8228 
8229 				break;
8230 			case 11:
8231 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
8232 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8233 
8234 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
8235 				queue_dp = true;
8236 				DRM_DEBUG("IH: HPD_RX 6\n");
8237 
8238 				break;
8239 			default:
8240 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8241 				break;
8242 			}
8243 			break;
8244 		case 96:
8245 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
8246 			WREG32(SRBM_INT_ACK, 0x1);
8247 			break;
8248 		case 124: /* UVD */
8249 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8250 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8251 			break;
8252 		case 146:
8253 		case 147:
8254 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8255 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8256 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8257 			/* reset addr and status */
8258 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8259 			if (addr == 0x0 && status == 0x0)
8260 				break;
8261 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8262 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8263 				addr);
8264 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8265 				status);
8266 			cik_vm_decode_fault(rdev, status, addr, mc_client);
8267 			break;
8268 		case 167: /* VCE */
8269 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8270 			switch (src_data) {
8271 			case 0:
8272 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8273 				break;
8274 			case 1:
8275 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8276 				break;
8277 			default:
8278 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8279 				break;
8280 			}
8281 			break;
8282 		case 176: /* GFX RB CP_INT */
8283 		case 177: /* GFX IB CP_INT */
8284 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8285 			break;
8286 		case 181: /* CP EOP event */
8287 			DRM_DEBUG("IH: CP EOP\n");
8288 			/* XXX check the bitfield order! */
8289 			me_id = (ring_id & 0x60) >> 5;
8290 			pipe_id = (ring_id & 0x18) >> 3;
8291 			queue_id = (ring_id & 0x7) >> 0;
8292 			switch (me_id) {
8293 			case 0:
8294 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8295 				break;
8296 			case 1:
8297 			case 2:
8298 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8299 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8300 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8301 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8302 				break;
8303 			}
8304 			break;
8305 		case 184: /* CP Privileged reg access */
8306 			DRM_ERROR("Illegal register access in command stream\n");
8307 			/* XXX check the bitfield order! */
8308 			me_id = (ring_id & 0x60) >> 5;
8309 			pipe_id = (ring_id & 0x18) >> 3;
8310 			queue_id = (ring_id & 0x7) >> 0;
8311 			switch (me_id) {
8312 			case 0:
8313 				/* This results in a full GPU reset, but all we need to do is soft
8314 				 * reset the CP for gfx
8315 				 */
8316 				queue_reset = true;
8317 				break;
8318 			case 1:
8319 				/* XXX compute */
8320 				queue_reset = true;
8321 				break;
8322 			case 2:
8323 				/* XXX compute */
8324 				queue_reset = true;
8325 				break;
8326 			}
8327 			break;
8328 		case 185: /* CP Privileged inst */
8329 			DRM_ERROR("Illegal instruction in command stream\n");
8330 			/* XXX check the bitfield order! */
8331 			me_id = (ring_id & 0x60) >> 5;
8332 			pipe_id = (ring_id & 0x18) >> 3;
8333 			queue_id = (ring_id & 0x7) >> 0;
8334 			switch (me_id) {
8335 			case 0:
8336 				/* This results in a full GPU reset, but all we need to do is soft
8337 				 * reset the CP for gfx
8338 				 */
8339 				queue_reset = true;
8340 				break;
8341 			case 1:
8342 				/* XXX compute */
8343 				queue_reset = true;
8344 				break;
8345 			case 2:
8346 				/* XXX compute */
8347 				queue_reset = true;
8348 				break;
8349 			}
8350 			break;
8351 		case 224: /* SDMA trap event */
8352 			/* XXX check the bitfield order! */
8353 			me_id = (ring_id & 0x3) >> 0;
8354 			queue_id = (ring_id & 0xc) >> 2;
8355 			DRM_DEBUG("IH: SDMA trap\n");
8356 			switch (me_id) {
8357 			case 0:
8358 				switch (queue_id) {
8359 				case 0:
8360 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8361 					break;
8362 				case 1:
8363 					/* XXX compute */
8364 					break;
8365 				case 2:
8366 					/* XXX compute */
8367 					break;
8368 				}
8369 				break;
8370 			case 1:
8371 				switch (queue_id) {
8372 				case 0:
8373 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8374 					break;
8375 				case 1:
8376 					/* XXX compute */
8377 					break;
8378 				case 2:
8379 					/* XXX compute */
8380 					break;
8381 				}
8382 				break;
8383 			}
8384 			break;
8385 		case 230: /* thermal low to high */
8386 			DRM_DEBUG("IH: thermal low to high\n");
8387 			rdev->pm.dpm.thermal.high_to_low = false;
8388 			queue_thermal = true;
8389 			break;
8390 		case 231: /* thermal high to low */
8391 			DRM_DEBUG("IH: thermal high to low\n");
8392 			rdev->pm.dpm.thermal.high_to_low = true;
8393 			queue_thermal = true;
8394 			break;
8395 		case 233: /* GUI IDLE */
8396 			DRM_DEBUG("IH: GUI idle\n");
8397 			break;
8398 		case 241: /* SDMA Privileged inst */
8399 		case 247: /* SDMA Privileged inst */
8400 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8401 			/* XXX check the bitfield order! */
8402 			me_id = (ring_id & 0x3) >> 0;
8403 			queue_id = (ring_id & 0xc) >> 2;
8404 			switch (me_id) {
8405 			case 0:
8406 				switch (queue_id) {
8407 				case 0:
8408 					queue_reset = true;
8409 					break;
8410 				case 1:
8411 					/* XXX compute */
8412 					queue_reset = true;
8413 					break;
8414 				case 2:
8415 					/* XXX compute */
8416 					queue_reset = true;
8417 					break;
8418 				}
8419 				break;
8420 			case 1:
8421 				switch (queue_id) {
8422 				case 0:
8423 					queue_reset = true;
8424 					break;
8425 				case 1:
8426 					/* XXX compute */
8427 					queue_reset = true;
8428 					break;
8429 				case 2:
8430 					/* XXX compute */
8431 					queue_reset = true;
8432 					break;
8433 				}
8434 				break;
8435 			}
8436 			break;
8437 		default:
8438 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8439 			break;
8440 		}
8441 
8442 		/* wptr/rptr are in bytes! */
8443 		rptr += 16;
8444 		rptr &= rdev->ih.ptr_mask;
8445 		WREG32(IH_RB_RPTR, rptr);
8446 	}
8447 	if (queue_dp)
8448 		schedule_work(&rdev->dp_work);
8449 	if (queue_hotplug)
8450 		schedule_work(&rdev->hotplug_work);
8451 	if (queue_reset) {
8452 		rdev->needs_reset = true;
8453 		wake_up_all(&rdev->fence_queue);
8454 	}
8455 	if (queue_thermal)
8456 		schedule_work(&rdev->pm.dpm.thermal.work);
8457 	rdev->ih.rptr = rptr;
8458 	atomic_set(&rdev->ih.lock, 0);
8459 
8460 	/* make sure wptr hasn't changed while processing */
8461 	wptr = cik_get_ih_wptr(rdev);
8462 	if (wptr != rptr)
8463 		goto restart_ih;
8464 
8465 	return IRQ_HANDLED;
8466 }
8467 
8468 /*
8469  * startup/shutdown callbacks
8470  */
8471 /**
8472  * cik_startup - program the asic to a functional state
8473  *
8474  * @rdev: radeon_device pointer
8475  *
8476  * Programs the asic to a functional state (CIK).
8477  * Called by cik_init() and cik_resume().
8478  * Returns 0 for success, error for failure.
8479  */
cik_startup(struct radeon_device * rdev)8480 static int cik_startup(struct radeon_device *rdev)
8481 {
8482 	struct radeon_ring *ring;
8483 	u32 nop;
8484 	int r;
8485 
8486 	/* enable pcie gen2/3 link */
8487 	cik_pcie_gen3_enable(rdev);
8488 	/* enable aspm */
8489 	cik_program_aspm(rdev);
8490 
8491 	/* scratch needs to be initialized before MC */
8492 	r = r600_vram_scratch_init(rdev);
8493 	if (r)
8494 		return r;
8495 
8496 	cik_mc_program(rdev);
8497 
8498 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8499 		r = ci_mc_load_microcode(rdev);
8500 		if (r) {
8501 			DRM_ERROR("Failed to load MC firmware!\n");
8502 			return r;
8503 		}
8504 	}
8505 
8506 	r = cik_pcie_gart_enable(rdev);
8507 	if (r)
8508 		return r;
8509 	cik_gpu_init(rdev);
8510 
8511 	/* allocate rlc buffers */
8512 	if (rdev->flags & RADEON_IS_IGP) {
8513 		if (rdev->family == CHIP_KAVERI) {
8514 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8515 			rdev->rlc.reg_list_size =
8516 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8517 		} else {
8518 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8519 			rdev->rlc.reg_list_size =
8520 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8521 		}
8522 	}
8523 	rdev->rlc.cs_data = ci_cs_data;
8524 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8525 	r = sumo_rlc_init(rdev);
8526 	if (r) {
8527 		DRM_ERROR("Failed to init rlc BOs!\n");
8528 		return r;
8529 	}
8530 
8531 	/* allocate wb buffer */
8532 	r = radeon_wb_init(rdev);
8533 	if (r)
8534 		return r;
8535 
8536 	/* allocate mec buffers */
8537 	r = cik_mec_init(rdev);
8538 	if (r) {
8539 		DRM_ERROR("Failed to init MEC BOs!\n");
8540 		return r;
8541 	}
8542 
8543 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8544 	if (r) {
8545 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8546 		return r;
8547 	}
8548 
8549 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8550 	if (r) {
8551 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8552 		return r;
8553 	}
8554 
8555 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8556 	if (r) {
8557 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8558 		return r;
8559 	}
8560 
8561 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8562 	if (r) {
8563 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8564 		return r;
8565 	}
8566 
8567 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8568 	if (r) {
8569 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8570 		return r;
8571 	}
8572 
8573 	r = radeon_uvd_resume(rdev);
8574 	if (!r) {
8575 		r = uvd_v4_2_resume(rdev);
8576 		if (!r) {
8577 			r = radeon_fence_driver_start_ring(rdev,
8578 							   R600_RING_TYPE_UVD_INDEX);
8579 			if (r)
8580 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8581 		}
8582 	}
8583 	if (r)
8584 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8585 
8586 	r = radeon_vce_resume(rdev);
8587 	if (!r) {
8588 		r = vce_v2_0_resume(rdev);
8589 		if (!r)
8590 			r = radeon_fence_driver_start_ring(rdev,
8591 							   TN_RING_TYPE_VCE1_INDEX);
8592 		if (!r)
8593 			r = radeon_fence_driver_start_ring(rdev,
8594 							   TN_RING_TYPE_VCE2_INDEX);
8595 	}
8596 	if (r) {
8597 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8598 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8599 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8600 	}
8601 
8602 	/* Enable IRQ */
8603 	if (!rdev->irq.installed) {
8604 		r = radeon_irq_kms_init(rdev);
8605 		if (r)
8606 			return r;
8607 	}
8608 
8609 	r = cik_irq_init(rdev);
8610 	if (r) {
8611 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8612 		radeon_irq_kms_fini(rdev);
8613 		return r;
8614 	}
8615 	cik_irq_set(rdev);
8616 
8617 	if (rdev->family == CHIP_HAWAII) {
8618 		if (rdev->new_fw)
8619 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8620 		else
8621 			nop = RADEON_CP_PACKET2;
8622 	} else {
8623 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8624 	}
8625 
8626 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8627 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8628 			     nop);
8629 	if (r)
8630 		return r;
8631 
8632 	/* set up the compute queues */
8633 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8634 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8635 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8636 			     nop);
8637 	if (r)
8638 		return r;
8639 	ring->me = 1; /* first MEC */
8640 	ring->pipe = 0; /* first pipe */
8641 	ring->queue = 0; /* first queue */
8642 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8643 
8644 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8645 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8646 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8647 			     nop);
8648 	if (r)
8649 		return r;
8650 	/* dGPU only have 1 MEC */
8651 	ring->me = 1; /* first MEC */
8652 	ring->pipe = 0; /* first pipe */
8653 	ring->queue = 1; /* second queue */
8654 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8655 
8656 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8657 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8658 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8659 	if (r)
8660 		return r;
8661 
8662 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8663 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8664 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8665 	if (r)
8666 		return r;
8667 
8668 	r = cik_cp_resume(rdev);
8669 	if (r)
8670 		return r;
8671 
8672 	r = cik_sdma_resume(rdev);
8673 	if (r)
8674 		return r;
8675 
8676 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8677 	if (ring->ring_size) {
8678 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8679 				     RADEON_CP_PACKET2);
8680 		if (!r)
8681 			r = uvd_v1_0_init(rdev);
8682 		if (r)
8683 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8684 	}
8685 
8686 	r = -ENOENT;
8687 
8688 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8689 	if (ring->ring_size)
8690 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8691 				     VCE_CMD_NO_OP);
8692 
8693 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8694 	if (ring->ring_size)
8695 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8696 				     VCE_CMD_NO_OP);
8697 
8698 	if (!r)
8699 		r = vce_v1_0_init(rdev);
8700 	else if (r != -ENOENT)
8701 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8702 
8703 	r = radeon_ib_pool_init(rdev);
8704 	if (r) {
8705 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8706 		return r;
8707 	}
8708 
8709 	r = radeon_vm_manager_init(rdev);
8710 	if (r) {
8711 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8712 		return r;
8713 	}
8714 
8715 	r = radeon_audio_init(rdev);
8716 	if (r)
8717 		return r;
8718 
8719 	r = radeon_kfd_resume(rdev);
8720 	if (r)
8721 		return r;
8722 
8723 	return 0;
8724 }
8725 
8726 /**
8727  * cik_resume - resume the asic to a functional state
8728  *
8729  * @rdev: radeon_device pointer
8730  *
8731  * Programs the asic to a functional state (CIK).
8732  * Called at resume.
8733  * Returns 0 for success, error for failure.
8734  */
cik_resume(struct radeon_device * rdev)8735 int cik_resume(struct radeon_device *rdev)
8736 {
8737 	int r;
8738 
8739 	/* post card */
8740 	atom_asic_init(rdev->mode_info.atom_context);
8741 
8742 	/* init golden registers */
8743 	cik_init_golden_registers(rdev);
8744 
8745 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8746 		radeon_pm_resume(rdev);
8747 
8748 	rdev->accel_working = true;
8749 	r = cik_startup(rdev);
8750 	if (r) {
8751 		DRM_ERROR("cik startup failed on resume\n");
8752 		rdev->accel_working = false;
8753 		return r;
8754 	}
8755 
8756 	return r;
8757 
8758 }
8759 
8760 /**
8761  * cik_suspend - suspend the asic
8762  *
8763  * @rdev: radeon_device pointer
8764  *
8765  * Bring the chip into a state suitable for suspend (CIK).
8766  * Called at suspend.
8767  * Returns 0 for success.
8768  */
cik_suspend(struct radeon_device * rdev)8769 int cik_suspend(struct radeon_device *rdev)
8770 {
8771 	radeon_kfd_suspend(rdev);
8772 	radeon_pm_suspend(rdev);
8773 	radeon_audio_fini(rdev);
8774 	radeon_vm_manager_fini(rdev);
8775 	cik_cp_enable(rdev, false);
8776 	cik_sdma_enable(rdev, false);
8777 	uvd_v1_0_fini(rdev);
8778 	radeon_uvd_suspend(rdev);
8779 	radeon_vce_suspend(rdev);
8780 	cik_fini_pg(rdev);
8781 	cik_fini_cg(rdev);
8782 	cik_irq_suspend(rdev);
8783 	radeon_wb_disable(rdev);
8784 	cik_pcie_gart_disable(rdev);
8785 	return 0;
8786 }
8787 
8788 /* Plan is to move initialization in that function and use
8789  * helper function so that radeon_device_init pretty much
8790  * do nothing more than calling asic specific function. This
8791  * should also allow to remove a bunch of callback function
8792  * like vram_info.
8793  */
8794 /**
8795  * cik_init - asic specific driver and hw init
8796  *
8797  * @rdev: radeon_device pointer
8798  *
8799  * Setup asic specific driver variables and program the hw
8800  * to a functional state (CIK).
8801  * Called at driver startup.
8802  * Returns 0 for success, errors for failure.
8803  */
cik_init(struct radeon_device * rdev)8804 int cik_init(struct radeon_device *rdev)
8805 {
8806 	struct radeon_ring *ring;
8807 	int r;
8808 
8809 	/* Read BIOS */
8810 	if (!radeon_get_bios(rdev)) {
8811 		if (ASIC_IS_AVIVO(rdev))
8812 			return -EINVAL;
8813 	}
8814 	/* Must be an ATOMBIOS */
8815 	if (!rdev->is_atom_bios) {
8816 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8817 		return -EINVAL;
8818 	}
8819 	r = radeon_atombios_init(rdev);
8820 	if (r)
8821 		return r;
8822 
8823 	/* Post card if necessary */
8824 	if (!radeon_card_posted(rdev)) {
8825 		if (!rdev->bios) {
8826 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8827 			return -EINVAL;
8828 		}
8829 		DRM_INFO("GPU not posted. posting now...\n");
8830 		atom_asic_init(rdev->mode_info.atom_context);
8831 	}
8832 	/* init golden registers */
8833 	cik_init_golden_registers(rdev);
8834 	/* Initialize scratch registers */
8835 	cik_scratch_init(rdev);
8836 	/* Initialize surface registers */
8837 	radeon_surface_init(rdev);
8838 	/* Initialize clocks */
8839 	radeon_get_clock_info(rdev->ddev);
8840 
8841 	/* Fence driver */
8842 	r = radeon_fence_driver_init(rdev);
8843 	if (r)
8844 		return r;
8845 
8846 	/* initialize memory controller */
8847 	r = cik_mc_init(rdev);
8848 	if (r)
8849 		return r;
8850 	/* Memory manager */
8851 	r = radeon_bo_init(rdev);
8852 	if (r)
8853 		return r;
8854 
8855 	if (rdev->flags & RADEON_IS_IGP) {
8856 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8857 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8858 			r = cik_init_microcode(rdev);
8859 			if (r) {
8860 				DRM_ERROR("Failed to load firmware!\n");
8861 				return r;
8862 			}
8863 		}
8864 	} else {
8865 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8866 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8867 		    !rdev->mc_fw) {
8868 			r = cik_init_microcode(rdev);
8869 			if (r) {
8870 				DRM_ERROR("Failed to load firmware!\n");
8871 				return r;
8872 			}
8873 		}
8874 	}
8875 
8876 	/* Initialize power management */
8877 	radeon_pm_init(rdev);
8878 
8879 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8880 	ring->ring_obj = NULL;
8881 	r600_ring_init(rdev, ring, 1024 * 1024);
8882 
8883 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8884 	ring->ring_obj = NULL;
8885 	r600_ring_init(rdev, ring, 1024 * 1024);
8886 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8887 	if (r)
8888 		return r;
8889 
8890 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8891 	ring->ring_obj = NULL;
8892 	r600_ring_init(rdev, ring, 1024 * 1024);
8893 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8894 	if (r)
8895 		return r;
8896 
8897 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8898 	ring->ring_obj = NULL;
8899 	r600_ring_init(rdev, ring, 256 * 1024);
8900 
8901 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8902 	ring->ring_obj = NULL;
8903 	r600_ring_init(rdev, ring, 256 * 1024);
8904 
8905 	r = radeon_uvd_init(rdev);
8906 	if (!r) {
8907 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8908 		ring->ring_obj = NULL;
8909 		r600_ring_init(rdev, ring, 4096);
8910 	}
8911 
8912 	r = radeon_vce_init(rdev);
8913 	if (!r) {
8914 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8915 		ring->ring_obj = NULL;
8916 		r600_ring_init(rdev, ring, 4096);
8917 
8918 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8919 		ring->ring_obj = NULL;
8920 		r600_ring_init(rdev, ring, 4096);
8921 	}
8922 
8923 	rdev->ih.ring_obj = NULL;
8924 	r600_ih_ring_init(rdev, 64 * 1024);
8925 
8926 	r = r600_pcie_gart_init(rdev);
8927 	if (r)
8928 		return r;
8929 
8930 	rdev->accel_working = true;
8931 	r = cik_startup(rdev);
8932 	if (r) {
8933 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8934 		cik_cp_fini(rdev);
8935 		cik_sdma_fini(rdev);
8936 		cik_irq_fini(rdev);
8937 		sumo_rlc_fini(rdev);
8938 		cik_mec_fini(rdev);
8939 		radeon_wb_fini(rdev);
8940 		radeon_ib_pool_fini(rdev);
8941 		radeon_vm_manager_fini(rdev);
8942 		radeon_irq_kms_fini(rdev);
8943 		cik_pcie_gart_fini(rdev);
8944 		rdev->accel_working = false;
8945 	}
8946 
8947 	/* Don't start up if the MC ucode is missing.
8948 	 * The default clocks and voltages before the MC ucode
8949 	 * is loaded are not suffient for advanced operations.
8950 	 */
8951 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8952 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8953 		return -EINVAL;
8954 	}
8955 
8956 	return 0;
8957 }
8958 
8959 /**
8960  * cik_fini - asic specific driver and hw fini
8961  *
8962  * @rdev: radeon_device pointer
8963  *
8964  * Tear down the asic specific driver variables and program the hw
8965  * to an idle state (CIK).
8966  * Called at driver unload.
8967  */
cik_fini(struct radeon_device * rdev)8968 void cik_fini(struct radeon_device *rdev)
8969 {
8970 	radeon_pm_fini(rdev);
8971 	cik_cp_fini(rdev);
8972 	cik_sdma_fini(rdev);
8973 	cik_fini_pg(rdev);
8974 	cik_fini_cg(rdev);
8975 	cik_irq_fini(rdev);
8976 	sumo_rlc_fini(rdev);
8977 	cik_mec_fini(rdev);
8978 	radeon_wb_fini(rdev);
8979 	radeon_vm_manager_fini(rdev);
8980 	radeon_ib_pool_fini(rdev);
8981 	radeon_irq_kms_fini(rdev);
8982 	uvd_v1_0_fini(rdev);
8983 	radeon_uvd_fini(rdev);
8984 	radeon_vce_fini(rdev);
8985 	cik_pcie_gart_fini(rdev);
8986 	r600_vram_scratch_fini(rdev);
8987 	radeon_gem_fini(rdev);
8988 	radeon_fence_driver_fini(rdev);
8989 	radeon_bo_fini(rdev);
8990 	radeon_atombios_fini(rdev);
8991 	kfree(rdev->bios);
8992 	rdev->bios = NULL;
8993 }
8994 
dce8_program_fmt(struct drm_encoder * encoder)8995 void dce8_program_fmt(struct drm_encoder *encoder)
8996 {
8997 	struct drm_device *dev = encoder->dev;
8998 	struct radeon_device *rdev = dev->dev_private;
8999 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
9000 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
9001 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
9002 	int bpc = 0;
9003 	u32 tmp = 0;
9004 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
9005 
9006 	if (connector) {
9007 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
9008 		bpc = radeon_get_monitor_bpc(connector);
9009 		dither = radeon_connector->dither;
9010 	}
9011 
9012 	/* LVDS/eDP FMT is set up by atom */
9013 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
9014 		return;
9015 
9016 	/* not needed for analog */
9017 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
9018 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
9019 		return;
9020 
9021 	if (bpc == 0)
9022 		return;
9023 
9024 	switch (bpc) {
9025 	case 6:
9026 		if (dither == RADEON_FMT_DITHER_ENABLE)
9027 			/* XXX sort out optimal dither settings */
9028 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9029 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
9030 		else
9031 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
9032 		break;
9033 	case 8:
9034 		if (dither == RADEON_FMT_DITHER_ENABLE)
9035 			/* XXX sort out optimal dither settings */
9036 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9037 				FMT_RGB_RANDOM_ENABLE |
9038 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
9039 		else
9040 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
9041 		break;
9042 	case 10:
9043 		if (dither == RADEON_FMT_DITHER_ENABLE)
9044 			/* XXX sort out optimal dither settings */
9045 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9046 				FMT_RGB_RANDOM_ENABLE |
9047 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
9048 		else
9049 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
9050 		break;
9051 	default:
9052 		/* not needed */
9053 		break;
9054 	}
9055 
9056 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
9057 }
9058 
9059 /* display watermark setup */
9060 /**
9061  * dce8_line_buffer_adjust - Set up the line buffer
9062  *
9063  * @rdev: radeon_device pointer
9064  * @radeon_crtc: the selected display controller
9065  * @mode: the current display mode on the selected display
9066  * controller
9067  *
9068  * Setup up the line buffer allocation for
9069  * the selected display controller (CIK).
9070  * Returns the line buffer size in pixels.
9071  */
dce8_line_buffer_adjust(struct radeon_device * rdev,struct radeon_crtc * radeon_crtc,struct drm_display_mode * mode)9072 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
9073 				   struct radeon_crtc *radeon_crtc,
9074 				   struct drm_display_mode *mode)
9075 {
9076 	u32 tmp, buffer_alloc, i;
9077 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
9078 	/*
9079 	 * Line Buffer Setup
9080 	 * There are 6 line buffers, one for each display controllers.
9081 	 * There are 3 partitions per LB. Select the number of partitions
9082 	 * to enable based on the display width.  For display widths larger
9083 	 * than 4096, you need use to use 2 display controllers and combine
9084 	 * them using the stereo blender.
9085 	 */
9086 	if (radeon_crtc->base.enabled && mode) {
9087 		if (mode->crtc_hdisplay < 1920) {
9088 			tmp = 1;
9089 			buffer_alloc = 2;
9090 		} else if (mode->crtc_hdisplay < 2560) {
9091 			tmp = 2;
9092 			buffer_alloc = 2;
9093 		} else if (mode->crtc_hdisplay < 4096) {
9094 			tmp = 0;
9095 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9096 		} else {
9097 			DRM_DEBUG_KMS("Mode too big for LB!\n");
9098 			tmp = 0;
9099 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9100 		}
9101 	} else {
9102 		tmp = 1;
9103 		buffer_alloc = 0;
9104 	}
9105 
9106 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
9107 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
9108 
9109 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
9110 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
9111 	for (i = 0; i < rdev->usec_timeout; i++) {
9112 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
9113 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
9114 			break;
9115 		udelay(1);
9116 	}
9117 
9118 	if (radeon_crtc->base.enabled && mode) {
9119 		switch (tmp) {
9120 		case 0:
9121 		default:
9122 			return 4096 * 2;
9123 		case 1:
9124 			return 1920 * 2;
9125 		case 2:
9126 			return 2560 * 2;
9127 		}
9128 	}
9129 
9130 	/* controller not enabled, so no lb used */
9131 	return 0;
9132 }
9133 
9134 /**
9135  * cik_get_number_of_dram_channels - get the number of dram channels
9136  *
9137  * @rdev: radeon_device pointer
9138  *
9139  * Look up the number of video ram channels (CIK).
9140  * Used for display watermark bandwidth calculations
9141  * Returns the number of dram channels
9142  */
cik_get_number_of_dram_channels(struct radeon_device * rdev)9143 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
9144 {
9145 	u32 tmp = RREG32(MC_SHARED_CHMAP);
9146 
9147 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
9148 	case 0:
9149 	default:
9150 		return 1;
9151 	case 1:
9152 		return 2;
9153 	case 2:
9154 		return 4;
9155 	case 3:
9156 		return 8;
9157 	case 4:
9158 		return 3;
9159 	case 5:
9160 		return 6;
9161 	case 6:
9162 		return 10;
9163 	case 7:
9164 		return 12;
9165 	case 8:
9166 		return 16;
9167 	}
9168 }
9169 
9170 struct dce8_wm_params {
9171 	u32 dram_channels; /* number of dram channels */
9172 	u32 yclk;          /* bandwidth per dram data pin in kHz */
9173 	u32 sclk;          /* engine clock in kHz */
9174 	u32 disp_clk;      /* display clock in kHz */
9175 	u32 src_width;     /* viewport width */
9176 	u32 active_time;   /* active display time in ns */
9177 	u32 blank_time;    /* blank time in ns */
9178 	bool interlaced;    /* mode is interlaced */
9179 	fixed20_12 vsc;    /* vertical scale ratio */
9180 	u32 num_heads;     /* number of active crtcs */
9181 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
9182 	u32 lb_size;       /* line buffer allocated to pipe */
9183 	u32 vtaps;         /* vertical scaler taps */
9184 };
9185 
9186 /**
9187  * dce8_dram_bandwidth - get the dram bandwidth
9188  *
9189  * @wm: watermark calculation data
9190  *
9191  * Calculate the raw dram bandwidth (CIK).
9192  * Used for display watermark bandwidth calculations
9193  * Returns the dram bandwidth in MBytes/s
9194  */
dce8_dram_bandwidth(struct dce8_wm_params * wm)9195 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9196 {
9197 	/* Calculate raw DRAM Bandwidth */
9198 	fixed20_12 dram_efficiency; /* 0.7 */
9199 	fixed20_12 yclk, dram_channels, bandwidth;
9200 	fixed20_12 a;
9201 
9202 	a.full = dfixed_const(1000);
9203 	yclk.full = dfixed_const(wm->yclk);
9204 	yclk.full = dfixed_div(yclk, a);
9205 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9206 	a.full = dfixed_const(10);
9207 	dram_efficiency.full = dfixed_const(7);
9208 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
9209 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9210 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9211 
9212 	return dfixed_trunc(bandwidth);
9213 }
9214 
9215 /**
9216  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9217  *
9218  * @wm: watermark calculation data
9219  *
9220  * Calculate the dram bandwidth used for display (CIK).
9221  * Used for display watermark bandwidth calculations
9222  * Returns the dram bandwidth for display in MBytes/s
9223  */
dce8_dram_bandwidth_for_display(struct dce8_wm_params * wm)9224 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9225 {
9226 	/* Calculate DRAM Bandwidth and the part allocated to display. */
9227 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9228 	fixed20_12 yclk, dram_channels, bandwidth;
9229 	fixed20_12 a;
9230 
9231 	a.full = dfixed_const(1000);
9232 	yclk.full = dfixed_const(wm->yclk);
9233 	yclk.full = dfixed_div(yclk, a);
9234 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9235 	a.full = dfixed_const(10);
9236 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9237 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9238 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9239 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9240 
9241 	return dfixed_trunc(bandwidth);
9242 }
9243 
9244 /**
9245  * dce8_data_return_bandwidth - get the data return bandwidth
9246  *
9247  * @wm: watermark calculation data
9248  *
9249  * Calculate the data return bandwidth used for display (CIK).
9250  * Used for display watermark bandwidth calculations
9251  * Returns the data return bandwidth in MBytes/s
9252  */
dce8_data_return_bandwidth(struct dce8_wm_params * wm)9253 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9254 {
9255 	/* Calculate the display Data return Bandwidth */
9256 	fixed20_12 return_efficiency; /* 0.8 */
9257 	fixed20_12 sclk, bandwidth;
9258 	fixed20_12 a;
9259 
9260 	a.full = dfixed_const(1000);
9261 	sclk.full = dfixed_const(wm->sclk);
9262 	sclk.full = dfixed_div(sclk, a);
9263 	a.full = dfixed_const(10);
9264 	return_efficiency.full = dfixed_const(8);
9265 	return_efficiency.full = dfixed_div(return_efficiency, a);
9266 	a.full = dfixed_const(32);
9267 	bandwidth.full = dfixed_mul(a, sclk);
9268 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9269 
9270 	return dfixed_trunc(bandwidth);
9271 }
9272 
9273 /**
9274  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9275  *
9276  * @wm: watermark calculation data
9277  *
9278  * Calculate the dmif bandwidth used for display (CIK).
9279  * Used for display watermark bandwidth calculations
9280  * Returns the dmif bandwidth in MBytes/s
9281  */
dce8_dmif_request_bandwidth(struct dce8_wm_params * wm)9282 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9283 {
9284 	/* Calculate the DMIF Request Bandwidth */
9285 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9286 	fixed20_12 disp_clk, bandwidth;
9287 	fixed20_12 a, b;
9288 
9289 	a.full = dfixed_const(1000);
9290 	disp_clk.full = dfixed_const(wm->disp_clk);
9291 	disp_clk.full = dfixed_div(disp_clk, a);
9292 	a.full = dfixed_const(32);
9293 	b.full = dfixed_mul(a, disp_clk);
9294 
9295 	a.full = dfixed_const(10);
9296 	disp_clk_request_efficiency.full = dfixed_const(8);
9297 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9298 
9299 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9300 
9301 	return dfixed_trunc(bandwidth);
9302 }
9303 
9304 /**
9305  * dce8_available_bandwidth - get the min available bandwidth
9306  *
9307  * @wm: watermark calculation data
9308  *
9309  * Calculate the min available bandwidth used for display (CIK).
9310  * Used for display watermark bandwidth calculations
9311  * Returns the min available bandwidth in MBytes/s
9312  */
dce8_available_bandwidth(struct dce8_wm_params * wm)9313 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9314 {
9315 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9316 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9317 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9318 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9319 
9320 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9321 }
9322 
9323 /**
9324  * dce8_average_bandwidth - get the average available bandwidth
9325  *
9326  * @wm: watermark calculation data
9327  *
9328  * Calculate the average available bandwidth used for display (CIK).
9329  * Used for display watermark bandwidth calculations
9330  * Returns the average available bandwidth in MBytes/s
9331  */
dce8_average_bandwidth(struct dce8_wm_params * wm)9332 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9333 {
9334 	/* Calculate the display mode Average Bandwidth
9335 	 * DisplayMode should contain the source and destination dimensions,
9336 	 * timing, etc.
9337 	 */
9338 	fixed20_12 bpp;
9339 	fixed20_12 line_time;
9340 	fixed20_12 src_width;
9341 	fixed20_12 bandwidth;
9342 	fixed20_12 a;
9343 
9344 	a.full = dfixed_const(1000);
9345 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9346 	line_time.full = dfixed_div(line_time, a);
9347 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9348 	src_width.full = dfixed_const(wm->src_width);
9349 	bandwidth.full = dfixed_mul(src_width, bpp);
9350 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9351 	bandwidth.full = dfixed_div(bandwidth, line_time);
9352 
9353 	return dfixed_trunc(bandwidth);
9354 }
9355 
9356 /**
9357  * dce8_latency_watermark - get the latency watermark
9358  *
9359  * @wm: watermark calculation data
9360  *
9361  * Calculate the latency watermark (CIK).
9362  * Used for display watermark bandwidth calculations
9363  * Returns the latency watermark in ns
9364  */
dce8_latency_watermark(struct dce8_wm_params * wm)9365 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9366 {
9367 	/* First calculate the latency in ns */
9368 	u32 mc_latency = 2000; /* 2000 ns. */
9369 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9370 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9371 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9372 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9373 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9374 		(wm->num_heads * cursor_line_pair_return_time);
9375 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9376 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9377 	u32 tmp, dmif_size = 12288;
9378 	fixed20_12 a, b, c;
9379 
9380 	if (wm->num_heads == 0)
9381 		return 0;
9382 
9383 	a.full = dfixed_const(2);
9384 	b.full = dfixed_const(1);
9385 	if ((wm->vsc.full > a.full) ||
9386 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9387 	    (wm->vtaps >= 5) ||
9388 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9389 		max_src_lines_per_dst_line = 4;
9390 	else
9391 		max_src_lines_per_dst_line = 2;
9392 
9393 	a.full = dfixed_const(available_bandwidth);
9394 	b.full = dfixed_const(wm->num_heads);
9395 	a.full = dfixed_div(a, b);
9396 
9397 	b.full = dfixed_const(mc_latency + 512);
9398 	c.full = dfixed_const(wm->disp_clk);
9399 	b.full = dfixed_div(b, c);
9400 
9401 	c.full = dfixed_const(dmif_size);
9402 	b.full = dfixed_div(c, b);
9403 
9404 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9405 
9406 	b.full = dfixed_const(1000);
9407 	c.full = dfixed_const(wm->disp_clk);
9408 	b.full = dfixed_div(c, b);
9409 	c.full = dfixed_const(wm->bytes_per_pixel);
9410 	b.full = dfixed_mul(b, c);
9411 
9412 	lb_fill_bw = min(tmp, dfixed_trunc(b));
9413 
9414 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9415 	b.full = dfixed_const(1000);
9416 	c.full = dfixed_const(lb_fill_bw);
9417 	b.full = dfixed_div(c, b);
9418 	a.full = dfixed_div(a, b);
9419 	line_fill_time = dfixed_trunc(a);
9420 
9421 	if (line_fill_time < wm->active_time)
9422 		return latency;
9423 	else
9424 		return latency + (line_fill_time - wm->active_time);
9425 
9426 }
9427 
9428 /**
9429  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9430  * average and available dram bandwidth
9431  *
9432  * @wm: watermark calculation data
9433  *
9434  * Check if the display average bandwidth fits in the display
9435  * dram bandwidth (CIK).
9436  * Used for display watermark bandwidth calculations
9437  * Returns true if the display fits, false if not.
9438  */
dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params * wm)9439 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9440 {
9441 	if (dce8_average_bandwidth(wm) <=
9442 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9443 		return true;
9444 	else
9445 		return false;
9446 }
9447 
9448 /**
9449  * dce8_average_bandwidth_vs_available_bandwidth - check
9450  * average and available bandwidth
9451  *
9452  * @wm: watermark calculation data
9453  *
9454  * Check if the display average bandwidth fits in the display
9455  * available bandwidth (CIK).
9456  * Used for display watermark bandwidth calculations
9457  * Returns true if the display fits, false if not.
9458  */
dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params * wm)9459 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9460 {
9461 	if (dce8_average_bandwidth(wm) <=
9462 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9463 		return true;
9464 	else
9465 		return false;
9466 }
9467 
9468 /**
9469  * dce8_check_latency_hiding - check latency hiding
9470  *
9471  * @wm: watermark calculation data
9472  *
9473  * Check latency hiding (CIK).
9474  * Used for display watermark bandwidth calculations
9475  * Returns true if the display fits, false if not.
9476  */
dce8_check_latency_hiding(struct dce8_wm_params * wm)9477 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9478 {
9479 	u32 lb_partitions = wm->lb_size / wm->src_width;
9480 	u32 line_time = wm->active_time + wm->blank_time;
9481 	u32 latency_tolerant_lines;
9482 	u32 latency_hiding;
9483 	fixed20_12 a;
9484 
9485 	a.full = dfixed_const(1);
9486 	if (wm->vsc.full > a.full)
9487 		latency_tolerant_lines = 1;
9488 	else {
9489 		if (lb_partitions <= (wm->vtaps + 1))
9490 			latency_tolerant_lines = 1;
9491 		else
9492 			latency_tolerant_lines = 2;
9493 	}
9494 
9495 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9496 
9497 	if (dce8_latency_watermark(wm) <= latency_hiding)
9498 		return true;
9499 	else
9500 		return false;
9501 }
9502 
9503 /**
9504  * dce8_program_watermarks - program display watermarks
9505  *
9506  * @rdev: radeon_device pointer
9507  * @radeon_crtc: the selected display controller
9508  * @lb_size: line buffer size
9509  * @num_heads: number of display controllers in use
9510  *
9511  * Calculate and program the display watermarks for the
9512  * selected display controller (CIK).
9513  */
dce8_program_watermarks(struct radeon_device * rdev,struct radeon_crtc * radeon_crtc,u32 lb_size,u32 num_heads)9514 static void dce8_program_watermarks(struct radeon_device *rdev,
9515 				    struct radeon_crtc *radeon_crtc,
9516 				    u32 lb_size, u32 num_heads)
9517 {
9518 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9519 	struct dce8_wm_params wm_low, wm_high;
9520 	u32 pixel_period;
9521 	u32 line_time = 0;
9522 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9523 	u32 tmp, wm_mask;
9524 
9525 	if (radeon_crtc->base.enabled && num_heads && mode) {
9526 		pixel_period = 1000000 / (u32)mode->clock;
9527 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9528 
9529 		/* watermark for high clocks */
9530 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9531 		    rdev->pm.dpm_enabled) {
9532 			wm_high.yclk =
9533 				radeon_dpm_get_mclk(rdev, false) * 10;
9534 			wm_high.sclk =
9535 				radeon_dpm_get_sclk(rdev, false) * 10;
9536 		} else {
9537 			wm_high.yclk = rdev->pm.current_mclk * 10;
9538 			wm_high.sclk = rdev->pm.current_sclk * 10;
9539 		}
9540 
9541 		wm_high.disp_clk = mode->clock;
9542 		wm_high.src_width = mode->crtc_hdisplay;
9543 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9544 		wm_high.blank_time = line_time - wm_high.active_time;
9545 		wm_high.interlaced = false;
9546 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9547 			wm_high.interlaced = true;
9548 		wm_high.vsc = radeon_crtc->vsc;
9549 		wm_high.vtaps = 1;
9550 		if (radeon_crtc->rmx_type != RMX_OFF)
9551 			wm_high.vtaps = 2;
9552 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9553 		wm_high.lb_size = lb_size;
9554 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9555 		wm_high.num_heads = num_heads;
9556 
9557 		/* set for high clocks */
9558 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9559 
9560 		/* possibly force display priority to high */
9561 		/* should really do this at mode validation time... */
9562 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9563 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9564 		    !dce8_check_latency_hiding(&wm_high) ||
9565 		    (rdev->disp_priority == 2)) {
9566 			DRM_DEBUG_KMS("force priority to high\n");
9567 		}
9568 
9569 		/* watermark for low clocks */
9570 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9571 		    rdev->pm.dpm_enabled) {
9572 			wm_low.yclk =
9573 				radeon_dpm_get_mclk(rdev, true) * 10;
9574 			wm_low.sclk =
9575 				radeon_dpm_get_sclk(rdev, true) * 10;
9576 		} else {
9577 			wm_low.yclk = rdev->pm.current_mclk * 10;
9578 			wm_low.sclk = rdev->pm.current_sclk * 10;
9579 		}
9580 
9581 		wm_low.disp_clk = mode->clock;
9582 		wm_low.src_width = mode->crtc_hdisplay;
9583 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9584 		wm_low.blank_time = line_time - wm_low.active_time;
9585 		wm_low.interlaced = false;
9586 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9587 			wm_low.interlaced = true;
9588 		wm_low.vsc = radeon_crtc->vsc;
9589 		wm_low.vtaps = 1;
9590 		if (radeon_crtc->rmx_type != RMX_OFF)
9591 			wm_low.vtaps = 2;
9592 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9593 		wm_low.lb_size = lb_size;
9594 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9595 		wm_low.num_heads = num_heads;
9596 
9597 		/* set for low clocks */
9598 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9599 
9600 		/* possibly force display priority to high */
9601 		/* should really do this at mode validation time... */
9602 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9603 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9604 		    !dce8_check_latency_hiding(&wm_low) ||
9605 		    (rdev->disp_priority == 2)) {
9606 			DRM_DEBUG_KMS("force priority to high\n");
9607 		}
9608 	}
9609 
9610 	/* select wm A */
9611 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9612 	tmp = wm_mask;
9613 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9614 	tmp |= LATENCY_WATERMARK_MASK(1);
9615 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9616 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9617 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9618 		LATENCY_HIGH_WATERMARK(line_time)));
9619 	/* select wm B */
9620 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9621 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9622 	tmp |= LATENCY_WATERMARK_MASK(2);
9623 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9624 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9625 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9626 		LATENCY_HIGH_WATERMARK(line_time)));
9627 	/* restore original selection */
9628 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9629 
9630 	/* save values for DPM */
9631 	radeon_crtc->line_time = line_time;
9632 	radeon_crtc->wm_high = latency_watermark_a;
9633 	radeon_crtc->wm_low = latency_watermark_b;
9634 }
9635 
9636 /**
9637  * dce8_bandwidth_update - program display watermarks
9638  *
9639  * @rdev: radeon_device pointer
9640  *
9641  * Calculate and program the display watermarks and line
9642  * buffer allocation (CIK).
9643  */
dce8_bandwidth_update(struct radeon_device * rdev)9644 void dce8_bandwidth_update(struct radeon_device *rdev)
9645 {
9646 	struct drm_display_mode *mode = NULL;
9647 	u32 num_heads = 0, lb_size;
9648 	int i;
9649 
9650 	if (!rdev->mode_info.mode_config_initialized)
9651 		return;
9652 
9653 	radeon_update_display_priority(rdev);
9654 
9655 	for (i = 0; i < rdev->num_crtc; i++) {
9656 		if (rdev->mode_info.crtcs[i]->base.enabled)
9657 			num_heads++;
9658 	}
9659 	for (i = 0; i < rdev->num_crtc; i++) {
9660 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9661 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9662 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9663 	}
9664 }
9665 
9666 /**
9667  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9668  *
9669  * @rdev: radeon_device pointer
9670  *
9671  * Fetches a GPU clock counter snapshot (SI).
9672  * Returns the 64 bit clock counter snapshot.
9673  */
cik_get_gpu_clock_counter(struct radeon_device * rdev)9674 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9675 {
9676 	uint64_t clock;
9677 
9678 	mutex_lock(&rdev->gpu_clock_mutex);
9679 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9680 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9681 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9682 	mutex_unlock(&rdev->gpu_clock_mutex);
9683 	return clock;
9684 }
9685 
cik_set_uvd_clock(struct radeon_device * rdev,u32 clock,u32 cntl_reg,u32 status_reg)9686 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9687                               u32 cntl_reg, u32 status_reg)
9688 {
9689 	int r, i;
9690 	struct atom_clock_dividers dividers;
9691 	uint32_t tmp;
9692 
9693 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9694 					   clock, false, &dividers);
9695 	if (r)
9696 		return r;
9697 
9698 	tmp = RREG32_SMC(cntl_reg);
9699 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9700 	tmp |= dividers.post_divider;
9701 	WREG32_SMC(cntl_reg, tmp);
9702 
9703 	for (i = 0; i < 100; i++) {
9704 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9705 			break;
9706 		mdelay(10);
9707 	}
9708 	if (i == 100)
9709 		return -ETIMEDOUT;
9710 
9711 	return 0;
9712 }
9713 
cik_set_uvd_clocks(struct radeon_device * rdev,u32 vclk,u32 dclk)9714 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9715 {
9716 	int r = 0;
9717 
9718 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9719 	if (r)
9720 		return r;
9721 
9722 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9723 	return r;
9724 }
9725 
cik_set_vce_clocks(struct radeon_device * rdev,u32 evclk,u32 ecclk)9726 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9727 {
9728 	int r, i;
9729 	struct atom_clock_dividers dividers;
9730 	u32 tmp;
9731 
9732 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9733 					   ecclk, false, &dividers);
9734 	if (r)
9735 		return r;
9736 
9737 	for (i = 0; i < 100; i++) {
9738 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9739 			break;
9740 		mdelay(10);
9741 	}
9742 	if (i == 100)
9743 		return -ETIMEDOUT;
9744 
9745 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9746 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9747 	tmp |= dividers.post_divider;
9748 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9749 
9750 	for (i = 0; i < 100; i++) {
9751 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9752 			break;
9753 		mdelay(10);
9754 	}
9755 	if (i == 100)
9756 		return -ETIMEDOUT;
9757 
9758 	return 0;
9759 }
9760 
cik_pcie_gen3_enable(struct radeon_device * rdev)9761 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9762 {
9763 	struct pci_dev *root = rdev->pdev->bus->self;
9764 	int bridge_pos, gpu_pos;
9765 	u32 speed_cntl, mask, current_data_rate;
9766 	int ret, i;
9767 	u16 tmp16;
9768 
9769 	if (pci_is_root_bus(rdev->pdev->bus))
9770 		return;
9771 
9772 	if (radeon_pcie_gen2 == 0)
9773 		return;
9774 
9775 	if (rdev->flags & RADEON_IS_IGP)
9776 		return;
9777 
9778 	if (!(rdev->flags & RADEON_IS_PCIE))
9779 		return;
9780 
9781 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9782 	if (ret != 0)
9783 		return;
9784 
9785 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9786 		return;
9787 
9788 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9789 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9790 		LC_CURRENT_DATA_RATE_SHIFT;
9791 	if (mask & DRM_PCIE_SPEED_80) {
9792 		if (current_data_rate == 2) {
9793 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9794 			return;
9795 		}
9796 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9797 	} else if (mask & DRM_PCIE_SPEED_50) {
9798 		if (current_data_rate == 1) {
9799 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9800 			return;
9801 		}
9802 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9803 	}
9804 
9805 	bridge_pos = pci_pcie_cap(root);
9806 	if (!bridge_pos)
9807 		return;
9808 
9809 	gpu_pos = pci_pcie_cap(rdev->pdev);
9810 	if (!gpu_pos)
9811 		return;
9812 
9813 	if (mask & DRM_PCIE_SPEED_80) {
9814 		/* re-try equalization if gen3 is not already enabled */
9815 		if (current_data_rate != 2) {
9816 			u16 bridge_cfg, gpu_cfg;
9817 			u16 bridge_cfg2, gpu_cfg2;
9818 			u32 max_lw, current_lw, tmp;
9819 
9820 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9821 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9822 
9823 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9824 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9825 
9826 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9827 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9828 
9829 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9830 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9831 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9832 
9833 			if (current_lw < max_lw) {
9834 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9835 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9836 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9837 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9838 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9839 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9840 				}
9841 			}
9842 
9843 			for (i = 0; i < 10; i++) {
9844 				/* check status */
9845 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9846 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9847 					break;
9848 
9849 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9850 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9851 
9852 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9853 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9854 
9855 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9856 				tmp |= LC_SET_QUIESCE;
9857 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9858 
9859 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9860 				tmp |= LC_REDO_EQ;
9861 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9862 
9863 				mdelay(100);
9864 
9865 				/* linkctl */
9866 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9867 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9868 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9869 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9870 
9871 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9872 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9873 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9874 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9875 
9876 				/* linkctl2 */
9877 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9878 				tmp16 &= ~((1 << 4) | (7 << 9));
9879 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9880 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9881 
9882 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9883 				tmp16 &= ~((1 << 4) | (7 << 9));
9884 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9885 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9886 
9887 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9888 				tmp &= ~LC_SET_QUIESCE;
9889 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9890 			}
9891 		}
9892 	}
9893 
9894 	/* set the link speed */
9895 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9896 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9897 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9898 
9899 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9900 	tmp16 &= ~0xf;
9901 	if (mask & DRM_PCIE_SPEED_80)
9902 		tmp16 |= 3; /* gen3 */
9903 	else if (mask & DRM_PCIE_SPEED_50)
9904 		tmp16 |= 2; /* gen2 */
9905 	else
9906 		tmp16 |= 1; /* gen1 */
9907 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9908 
9909 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9910 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9911 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9912 
9913 	for (i = 0; i < rdev->usec_timeout; i++) {
9914 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9915 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9916 			break;
9917 		udelay(1);
9918 	}
9919 }
9920 
cik_program_aspm(struct radeon_device * rdev)9921 static void cik_program_aspm(struct radeon_device *rdev)
9922 {
9923 	u32 data, orig;
9924 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9925 	bool disable_clkreq = false;
9926 
9927 	if (radeon_aspm == 0)
9928 		return;
9929 
9930 	/* XXX double check IGPs */
9931 	if (rdev->flags & RADEON_IS_IGP)
9932 		return;
9933 
9934 	if (!(rdev->flags & RADEON_IS_PCIE))
9935 		return;
9936 
9937 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9938 	data &= ~LC_XMIT_N_FTS_MASK;
9939 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9940 	if (orig != data)
9941 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9942 
9943 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9944 	data |= LC_GO_TO_RECOVERY;
9945 	if (orig != data)
9946 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9947 
9948 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9949 	data |= P_IGNORE_EDB_ERR;
9950 	if (orig != data)
9951 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9952 
9953 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9954 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9955 	data |= LC_PMI_TO_L1_DIS;
9956 	if (!disable_l0s)
9957 		data |= LC_L0S_INACTIVITY(7);
9958 
9959 	if (!disable_l1) {
9960 		data |= LC_L1_INACTIVITY(7);
9961 		data &= ~LC_PMI_TO_L1_DIS;
9962 		if (orig != data)
9963 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9964 
9965 		if (!disable_plloff_in_l1) {
9966 			bool clk_req_support;
9967 
9968 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9969 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9970 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9971 			if (orig != data)
9972 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9973 
9974 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9975 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9976 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9977 			if (orig != data)
9978 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9979 
9980 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9981 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9982 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9983 			if (orig != data)
9984 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9985 
9986 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9987 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9988 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9989 			if (orig != data)
9990 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9991 
9992 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9993 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9994 			data |= LC_DYN_LANES_PWR_STATE(3);
9995 			if (orig != data)
9996 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9997 
9998 			if (!disable_clkreq &&
9999 			    !pci_is_root_bus(rdev->pdev->bus)) {
10000 				struct pci_dev *root = rdev->pdev->bus->self;
10001 				u32 lnkcap;
10002 
10003 				clk_req_support = false;
10004 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
10005 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
10006 					clk_req_support = true;
10007 			} else {
10008 				clk_req_support = false;
10009 			}
10010 
10011 			if (clk_req_support) {
10012 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
10013 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
10014 				if (orig != data)
10015 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
10016 
10017 				orig = data = RREG32_SMC(THM_CLK_CNTL);
10018 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
10019 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
10020 				if (orig != data)
10021 					WREG32_SMC(THM_CLK_CNTL, data);
10022 
10023 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
10024 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
10025 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
10026 				if (orig != data)
10027 					WREG32_SMC(MISC_CLK_CTRL, data);
10028 
10029 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
10030 				data &= ~BCLK_AS_XCLK;
10031 				if (orig != data)
10032 					WREG32_SMC(CG_CLKPIN_CNTL, data);
10033 
10034 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
10035 				data &= ~FORCE_BIF_REFCLK_EN;
10036 				if (orig != data)
10037 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
10038 
10039 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
10040 				data &= ~MPLL_CLKOUT_SEL_MASK;
10041 				data |= MPLL_CLKOUT_SEL(4);
10042 				if (orig != data)
10043 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
10044 			}
10045 		}
10046 	} else {
10047 		if (orig != data)
10048 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10049 	}
10050 
10051 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
10052 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
10053 	if (orig != data)
10054 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
10055 
10056 	if (!disable_l0s) {
10057 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
10058 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
10059 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
10060 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
10061 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
10062 				data &= ~LC_L0S_INACTIVITY_MASK;
10063 				if (orig != data)
10064 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10065 			}
10066 		}
10067 	}
10068 }
10069