1/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
25#include <linux/slab.h>
26#include <linux/module.h>
27#include "drmP.h"
28#include "radeon.h"
29#include "radeon_asic.h"
30#include "radeon_audio.h"
31#include "cikd.h"
32#include "atom.h"
33#include "cik_blit_shaders.h"
34#include "radeon_ucode.h"
35#include "clearstate_ci.h"
36#include "radeon_kfd.h"
37
38MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47
48MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49MODULE_FIRMWARE("radeon/bonaire_me.bin");
50MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56
57MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
58MODULE_FIRMWARE("radeon/HAWAII_me.bin");
59MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
60MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
61MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
62MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
63MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
64MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
65MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
66
67MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
68MODULE_FIRMWARE("radeon/hawaii_me.bin");
69MODULE_FIRMWARE("radeon/hawaii_ce.bin");
70MODULE_FIRMWARE("radeon/hawaii_mec.bin");
71MODULE_FIRMWARE("radeon/hawaii_mc.bin");
72MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
73MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
74MODULE_FIRMWARE("radeon/hawaii_smc.bin");
75
76MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
77MODULE_FIRMWARE("radeon/KAVERI_me.bin");
78MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
79MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
80MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
81MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
82
83MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
84MODULE_FIRMWARE("radeon/kaveri_me.bin");
85MODULE_FIRMWARE("radeon/kaveri_ce.bin");
86MODULE_FIRMWARE("radeon/kaveri_mec.bin");
87MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
88MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
89MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
90
91MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
92MODULE_FIRMWARE("radeon/KABINI_me.bin");
93MODULE_FIRMWARE("radeon/KABINI_ce.bin");
94MODULE_FIRMWARE("radeon/KABINI_mec.bin");
95MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
96MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
97
98MODULE_FIRMWARE("radeon/kabini_pfp.bin");
99MODULE_FIRMWARE("radeon/kabini_me.bin");
100MODULE_FIRMWARE("radeon/kabini_ce.bin");
101MODULE_FIRMWARE("radeon/kabini_mec.bin");
102MODULE_FIRMWARE("radeon/kabini_rlc.bin");
103MODULE_FIRMWARE("radeon/kabini_sdma.bin");
104
105MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
106MODULE_FIRMWARE("radeon/MULLINS_me.bin");
107MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
108MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
109MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
110MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
111
112MODULE_FIRMWARE("radeon/mullins_pfp.bin");
113MODULE_FIRMWARE("radeon/mullins_me.bin");
114MODULE_FIRMWARE("radeon/mullins_ce.bin");
115MODULE_FIRMWARE("radeon/mullins_mec.bin");
116MODULE_FIRMWARE("radeon/mullins_rlc.bin");
117MODULE_FIRMWARE("radeon/mullins_sdma.bin");
118
119extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120extern void r600_ih_ring_fini(struct radeon_device *rdev);
121extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
122extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
123extern bool evergreen_is_display_hung(struct radeon_device *rdev);
124extern void sumo_rlc_fini(struct radeon_device *rdev);
125extern int sumo_rlc_init(struct radeon_device *rdev);
126extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
127extern void si_rlc_reset(struct radeon_device *rdev);
128extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
129static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
130extern int cik_sdma_resume(struct radeon_device *rdev);
131extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
132extern void cik_sdma_fini(struct radeon_device *rdev);
133extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
134static void cik_rlc_stop(struct radeon_device *rdev);
135static void cik_pcie_gen3_enable(struct radeon_device *rdev);
136static void cik_program_aspm(struct radeon_device *rdev);
137static void cik_init_pg(struct radeon_device *rdev);
138static void cik_init_cg(struct radeon_device *rdev);
139static void cik_fini_pg(struct radeon_device *rdev);
140static void cik_fini_cg(struct radeon_device *rdev);
141static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
142					  bool enable);
143
144/**
145 * cik_get_allowed_info_register - fetch the register for the info ioctl
146 *
147 * @rdev: radeon_device pointer
148 * @reg: register offset in bytes
149 * @val: register value
150 *
151 * Returns 0 for success or -EINVAL for an invalid register
152 *
153 */
154int cik_get_allowed_info_register(struct radeon_device *rdev,
155				  u32 reg, u32 *val)
156{
157	switch (reg) {
158	case GRBM_STATUS:
159	case GRBM_STATUS2:
160	case GRBM_STATUS_SE0:
161	case GRBM_STATUS_SE1:
162	case GRBM_STATUS_SE2:
163	case GRBM_STATUS_SE3:
164	case SRBM_STATUS:
165	case SRBM_STATUS2:
166	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
167	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
168	case UVD_STATUS:
169	/* TODO VCE */
170		*val = RREG32(reg);
171		return 0;
172	default:
173		return -EINVAL;
174	}
175}
176
177/* get temperature in millidegrees */
178int ci_get_temp(struct radeon_device *rdev)
179{
180	u32 temp;
181	int actual_temp = 0;
182
183	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
184		CTF_TEMP_SHIFT;
185
186	if (temp & 0x200)
187		actual_temp = 255;
188	else
189		actual_temp = temp & 0x1ff;
190
191	actual_temp = actual_temp * 1000;
192
193	return actual_temp;
194}
195
196/* get temperature in millidegrees */
197int kv_get_temp(struct radeon_device *rdev)
198{
199	u32 temp;
200	int actual_temp = 0;
201
202	temp = RREG32_SMC(0xC0300E0C);
203
204	if (temp)
205		actual_temp = (temp / 8) - 49;
206	else
207		actual_temp = 0;
208
209	actual_temp = actual_temp * 1000;
210
211	return actual_temp;
212}
213
214/*
215 * Indirect registers accessor
216 */
217u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
218{
219	unsigned long flags;
220	u32 r;
221
222	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
223	WREG32(PCIE_INDEX, reg);
224	(void)RREG32(PCIE_INDEX);
225	r = RREG32(PCIE_DATA);
226	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
227	return r;
228}
229
230void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
231{
232	unsigned long flags;
233
234	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
235	WREG32(PCIE_INDEX, reg);
236	(void)RREG32(PCIE_INDEX);
237	WREG32(PCIE_DATA, v);
238	(void)RREG32(PCIE_DATA);
239	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
240}
241
242static const u32 spectre_rlc_save_restore_register_list[] =
243{
244	(0x0e00 << 16) | (0xc12c >> 2),
245	0x00000000,
246	(0x0e00 << 16) | (0xc140 >> 2),
247	0x00000000,
248	(0x0e00 << 16) | (0xc150 >> 2),
249	0x00000000,
250	(0x0e00 << 16) | (0xc15c >> 2),
251	0x00000000,
252	(0x0e00 << 16) | (0xc168 >> 2),
253	0x00000000,
254	(0x0e00 << 16) | (0xc170 >> 2),
255	0x00000000,
256	(0x0e00 << 16) | (0xc178 >> 2),
257	0x00000000,
258	(0x0e00 << 16) | (0xc204 >> 2),
259	0x00000000,
260	(0x0e00 << 16) | (0xc2b4 >> 2),
261	0x00000000,
262	(0x0e00 << 16) | (0xc2b8 >> 2),
263	0x00000000,
264	(0x0e00 << 16) | (0xc2bc >> 2),
265	0x00000000,
266	(0x0e00 << 16) | (0xc2c0 >> 2),
267	0x00000000,
268	(0x0e00 << 16) | (0x8228 >> 2),
269	0x00000000,
270	(0x0e00 << 16) | (0x829c >> 2),
271	0x00000000,
272	(0x0e00 << 16) | (0x869c >> 2),
273	0x00000000,
274	(0x0600 << 16) | (0x98f4 >> 2),
275	0x00000000,
276	(0x0e00 << 16) | (0x98f8 >> 2),
277	0x00000000,
278	(0x0e00 << 16) | (0x9900 >> 2),
279	0x00000000,
280	(0x0e00 << 16) | (0xc260 >> 2),
281	0x00000000,
282	(0x0e00 << 16) | (0x90e8 >> 2),
283	0x00000000,
284	(0x0e00 << 16) | (0x3c000 >> 2),
285	0x00000000,
286	(0x0e00 << 16) | (0x3c00c >> 2),
287	0x00000000,
288	(0x0e00 << 16) | (0x8c1c >> 2),
289	0x00000000,
290	(0x0e00 << 16) | (0x9700 >> 2),
291	0x00000000,
292	(0x0e00 << 16) | (0xcd20 >> 2),
293	0x00000000,
294	(0x4e00 << 16) | (0xcd20 >> 2),
295	0x00000000,
296	(0x5e00 << 16) | (0xcd20 >> 2),
297	0x00000000,
298	(0x6e00 << 16) | (0xcd20 >> 2),
299	0x00000000,
300	(0x7e00 << 16) | (0xcd20 >> 2),
301	0x00000000,
302	(0x8e00 << 16) | (0xcd20 >> 2),
303	0x00000000,
304	(0x9e00 << 16) | (0xcd20 >> 2),
305	0x00000000,
306	(0xae00 << 16) | (0xcd20 >> 2),
307	0x00000000,
308	(0xbe00 << 16) | (0xcd20 >> 2),
309	0x00000000,
310	(0x0e00 << 16) | (0x89bc >> 2),
311	0x00000000,
312	(0x0e00 << 16) | (0x8900 >> 2),
313	0x00000000,
314	0x3,
315	(0x0e00 << 16) | (0xc130 >> 2),
316	0x00000000,
317	(0x0e00 << 16) | (0xc134 >> 2),
318	0x00000000,
319	(0x0e00 << 16) | (0xc1fc >> 2),
320	0x00000000,
321	(0x0e00 << 16) | (0xc208 >> 2),
322	0x00000000,
323	(0x0e00 << 16) | (0xc264 >> 2),
324	0x00000000,
325	(0x0e00 << 16) | (0xc268 >> 2),
326	0x00000000,
327	(0x0e00 << 16) | (0xc26c >> 2),
328	0x00000000,
329	(0x0e00 << 16) | (0xc270 >> 2),
330	0x00000000,
331	(0x0e00 << 16) | (0xc274 >> 2),
332	0x00000000,
333	(0x0e00 << 16) | (0xc278 >> 2),
334	0x00000000,
335	(0x0e00 << 16) | (0xc27c >> 2),
336	0x00000000,
337	(0x0e00 << 16) | (0xc280 >> 2),
338	0x00000000,
339	(0x0e00 << 16) | (0xc284 >> 2),
340	0x00000000,
341	(0x0e00 << 16) | (0xc288 >> 2),
342	0x00000000,
343	(0x0e00 << 16) | (0xc28c >> 2),
344	0x00000000,
345	(0x0e00 << 16) | (0xc290 >> 2),
346	0x00000000,
347	(0x0e00 << 16) | (0xc294 >> 2),
348	0x00000000,
349	(0x0e00 << 16) | (0xc298 >> 2),
350	0x00000000,
351	(0x0e00 << 16) | (0xc29c >> 2),
352	0x00000000,
353	(0x0e00 << 16) | (0xc2a0 >> 2),
354	0x00000000,
355	(0x0e00 << 16) | (0xc2a4 >> 2),
356	0x00000000,
357	(0x0e00 << 16) | (0xc2a8 >> 2),
358	0x00000000,
359	(0x0e00 << 16) | (0xc2ac  >> 2),
360	0x00000000,
361	(0x0e00 << 16) | (0xc2b0 >> 2),
362	0x00000000,
363	(0x0e00 << 16) | (0x301d0 >> 2),
364	0x00000000,
365	(0x0e00 << 16) | (0x30238 >> 2),
366	0x00000000,
367	(0x0e00 << 16) | (0x30250 >> 2),
368	0x00000000,
369	(0x0e00 << 16) | (0x30254 >> 2),
370	0x00000000,
371	(0x0e00 << 16) | (0x30258 >> 2),
372	0x00000000,
373	(0x0e00 << 16) | (0x3025c >> 2),
374	0x00000000,
375	(0x4e00 << 16) | (0xc900 >> 2),
376	0x00000000,
377	(0x5e00 << 16) | (0xc900 >> 2),
378	0x00000000,
379	(0x6e00 << 16) | (0xc900 >> 2),
380	0x00000000,
381	(0x7e00 << 16) | (0xc900 >> 2),
382	0x00000000,
383	(0x8e00 << 16) | (0xc900 >> 2),
384	0x00000000,
385	(0x9e00 << 16) | (0xc900 >> 2),
386	0x00000000,
387	(0xae00 << 16) | (0xc900 >> 2),
388	0x00000000,
389	(0xbe00 << 16) | (0xc900 >> 2),
390	0x00000000,
391	(0x4e00 << 16) | (0xc904 >> 2),
392	0x00000000,
393	(0x5e00 << 16) | (0xc904 >> 2),
394	0x00000000,
395	(0x6e00 << 16) | (0xc904 >> 2),
396	0x00000000,
397	(0x7e00 << 16) | (0xc904 >> 2),
398	0x00000000,
399	(0x8e00 << 16) | (0xc904 >> 2),
400	0x00000000,
401	(0x9e00 << 16) | (0xc904 >> 2),
402	0x00000000,
403	(0xae00 << 16) | (0xc904 >> 2),
404	0x00000000,
405	(0xbe00 << 16) | (0xc904 >> 2),
406	0x00000000,
407	(0x4e00 << 16) | (0xc908 >> 2),
408	0x00000000,
409	(0x5e00 << 16) | (0xc908 >> 2),
410	0x00000000,
411	(0x6e00 << 16) | (0xc908 >> 2),
412	0x00000000,
413	(0x7e00 << 16) | (0xc908 >> 2),
414	0x00000000,
415	(0x8e00 << 16) | (0xc908 >> 2),
416	0x00000000,
417	(0x9e00 << 16) | (0xc908 >> 2),
418	0x00000000,
419	(0xae00 << 16) | (0xc908 >> 2),
420	0x00000000,
421	(0xbe00 << 16) | (0xc908 >> 2),
422	0x00000000,
423	(0x4e00 << 16) | (0xc90c >> 2),
424	0x00000000,
425	(0x5e00 << 16) | (0xc90c >> 2),
426	0x00000000,
427	(0x6e00 << 16) | (0xc90c >> 2),
428	0x00000000,
429	(0x7e00 << 16) | (0xc90c >> 2),
430	0x00000000,
431	(0x8e00 << 16) | (0xc90c >> 2),
432	0x00000000,
433	(0x9e00 << 16) | (0xc90c >> 2),
434	0x00000000,
435	(0xae00 << 16) | (0xc90c >> 2),
436	0x00000000,
437	(0xbe00 << 16) | (0xc90c >> 2),
438	0x00000000,
439	(0x4e00 << 16) | (0xc910 >> 2),
440	0x00000000,
441	(0x5e00 << 16) | (0xc910 >> 2),
442	0x00000000,
443	(0x6e00 << 16) | (0xc910 >> 2),
444	0x00000000,
445	(0x7e00 << 16) | (0xc910 >> 2),
446	0x00000000,
447	(0x8e00 << 16) | (0xc910 >> 2),
448	0x00000000,
449	(0x9e00 << 16) | (0xc910 >> 2),
450	0x00000000,
451	(0xae00 << 16) | (0xc910 >> 2),
452	0x00000000,
453	(0xbe00 << 16) | (0xc910 >> 2),
454	0x00000000,
455	(0x0e00 << 16) | (0xc99c >> 2),
456	0x00000000,
457	(0x0e00 << 16) | (0x9834 >> 2),
458	0x00000000,
459	(0x0000 << 16) | (0x30f00 >> 2),
460	0x00000000,
461	(0x0001 << 16) | (0x30f00 >> 2),
462	0x00000000,
463	(0x0000 << 16) | (0x30f04 >> 2),
464	0x00000000,
465	(0x0001 << 16) | (0x30f04 >> 2),
466	0x00000000,
467	(0x0000 << 16) | (0x30f08 >> 2),
468	0x00000000,
469	(0x0001 << 16) | (0x30f08 >> 2),
470	0x00000000,
471	(0x0000 << 16) | (0x30f0c >> 2),
472	0x00000000,
473	(0x0001 << 16) | (0x30f0c >> 2),
474	0x00000000,
475	(0x0600 << 16) | (0x9b7c >> 2),
476	0x00000000,
477	(0x0e00 << 16) | (0x8a14 >> 2),
478	0x00000000,
479	(0x0e00 << 16) | (0x8a18 >> 2),
480	0x00000000,
481	(0x0600 << 16) | (0x30a00 >> 2),
482	0x00000000,
483	(0x0e00 << 16) | (0x8bf0 >> 2),
484	0x00000000,
485	(0x0e00 << 16) | (0x8bcc >> 2),
486	0x00000000,
487	(0x0e00 << 16) | (0x8b24 >> 2),
488	0x00000000,
489	(0x0e00 << 16) | (0x30a04 >> 2),
490	0x00000000,
491	(0x0600 << 16) | (0x30a10 >> 2),
492	0x00000000,
493	(0x0600 << 16) | (0x30a14 >> 2),
494	0x00000000,
495	(0x0600 << 16) | (0x30a18 >> 2),
496	0x00000000,
497	(0x0600 << 16) | (0x30a2c >> 2),
498	0x00000000,
499	(0x0e00 << 16) | (0xc700 >> 2),
500	0x00000000,
501	(0x0e00 << 16) | (0xc704 >> 2),
502	0x00000000,
503	(0x0e00 << 16) | (0xc708 >> 2),
504	0x00000000,
505	(0x0e00 << 16) | (0xc768 >> 2),
506	0x00000000,
507	(0x0400 << 16) | (0xc770 >> 2),
508	0x00000000,
509	(0x0400 << 16) | (0xc774 >> 2),
510	0x00000000,
511	(0x0400 << 16) | (0xc778 >> 2),
512	0x00000000,
513	(0x0400 << 16) | (0xc77c >> 2),
514	0x00000000,
515	(0x0400 << 16) | (0xc780 >> 2),
516	0x00000000,
517	(0x0400 << 16) | (0xc784 >> 2),
518	0x00000000,
519	(0x0400 << 16) | (0xc788 >> 2),
520	0x00000000,
521	(0x0400 << 16) | (0xc78c >> 2),
522	0x00000000,
523	(0x0400 << 16) | (0xc798 >> 2),
524	0x00000000,
525	(0x0400 << 16) | (0xc79c >> 2),
526	0x00000000,
527	(0x0400 << 16) | (0xc7a0 >> 2),
528	0x00000000,
529	(0x0400 << 16) | (0xc7a4 >> 2),
530	0x00000000,
531	(0x0400 << 16) | (0xc7a8 >> 2),
532	0x00000000,
533	(0x0400 << 16) | (0xc7ac >> 2),
534	0x00000000,
535	(0x0400 << 16) | (0xc7b0 >> 2),
536	0x00000000,
537	(0x0400 << 16) | (0xc7b4 >> 2),
538	0x00000000,
539	(0x0e00 << 16) | (0x9100 >> 2),
540	0x00000000,
541	(0x0e00 << 16) | (0x3c010 >> 2),
542	0x00000000,
543	(0x0e00 << 16) | (0x92a8 >> 2),
544	0x00000000,
545	(0x0e00 << 16) | (0x92ac >> 2),
546	0x00000000,
547	(0x0e00 << 16) | (0x92b4 >> 2),
548	0x00000000,
549	(0x0e00 << 16) | (0x92b8 >> 2),
550	0x00000000,
551	(0x0e00 << 16) | (0x92bc >> 2),
552	0x00000000,
553	(0x0e00 << 16) | (0x92c0 >> 2),
554	0x00000000,
555	(0x0e00 << 16) | (0x92c4 >> 2),
556	0x00000000,
557	(0x0e00 << 16) | (0x92c8 >> 2),
558	0x00000000,
559	(0x0e00 << 16) | (0x92cc >> 2),
560	0x00000000,
561	(0x0e00 << 16) | (0x92d0 >> 2),
562	0x00000000,
563	(0x0e00 << 16) | (0x8c00 >> 2),
564	0x00000000,
565	(0x0e00 << 16) | (0x8c04 >> 2),
566	0x00000000,
567	(0x0e00 << 16) | (0x8c20 >> 2),
568	0x00000000,
569	(0x0e00 << 16) | (0x8c38 >> 2),
570	0x00000000,
571	(0x0e00 << 16) | (0x8c3c >> 2),
572	0x00000000,
573	(0x0e00 << 16) | (0xae00 >> 2),
574	0x00000000,
575	(0x0e00 << 16) | (0x9604 >> 2),
576	0x00000000,
577	(0x0e00 << 16) | (0xac08 >> 2),
578	0x00000000,
579	(0x0e00 << 16) | (0xac0c >> 2),
580	0x00000000,
581	(0x0e00 << 16) | (0xac10 >> 2),
582	0x00000000,
583	(0x0e00 << 16) | (0xac14 >> 2),
584	0x00000000,
585	(0x0e00 << 16) | (0xac58 >> 2),
586	0x00000000,
587	(0x0e00 << 16) | (0xac68 >> 2),
588	0x00000000,
589	(0x0e00 << 16) | (0xac6c >> 2),
590	0x00000000,
591	(0x0e00 << 16) | (0xac70 >> 2),
592	0x00000000,
593	(0x0e00 << 16) | (0xac74 >> 2),
594	0x00000000,
595	(0x0e00 << 16) | (0xac78 >> 2),
596	0x00000000,
597	(0x0e00 << 16) | (0xac7c >> 2),
598	0x00000000,
599	(0x0e00 << 16) | (0xac80 >> 2),
600	0x00000000,
601	(0x0e00 << 16) | (0xac84 >> 2),
602	0x00000000,
603	(0x0e00 << 16) | (0xac88 >> 2),
604	0x00000000,
605	(0x0e00 << 16) | (0xac8c >> 2),
606	0x00000000,
607	(0x0e00 << 16) | (0x970c >> 2),
608	0x00000000,
609	(0x0e00 << 16) | (0x9714 >> 2),
610	0x00000000,
611	(0x0e00 << 16) | (0x9718 >> 2),
612	0x00000000,
613	(0x0e00 << 16) | (0x971c >> 2),
614	0x00000000,
615	(0x0e00 << 16) | (0x31068 >> 2),
616	0x00000000,
617	(0x4e00 << 16) | (0x31068 >> 2),
618	0x00000000,
619	(0x5e00 << 16) | (0x31068 >> 2),
620	0x00000000,
621	(0x6e00 << 16) | (0x31068 >> 2),
622	0x00000000,
623	(0x7e00 << 16) | (0x31068 >> 2),
624	0x00000000,
625	(0x8e00 << 16) | (0x31068 >> 2),
626	0x00000000,
627	(0x9e00 << 16) | (0x31068 >> 2),
628	0x00000000,
629	(0xae00 << 16) | (0x31068 >> 2),
630	0x00000000,
631	(0xbe00 << 16) | (0x31068 >> 2),
632	0x00000000,
633	(0x0e00 << 16) | (0xcd10 >> 2),
634	0x00000000,
635	(0x0e00 << 16) | (0xcd14 >> 2),
636	0x00000000,
637	(0x0e00 << 16) | (0x88b0 >> 2),
638	0x00000000,
639	(0x0e00 << 16) | (0x88b4 >> 2),
640	0x00000000,
641	(0x0e00 << 16) | (0x88b8 >> 2),
642	0x00000000,
643	(0x0e00 << 16) | (0x88bc >> 2),
644	0x00000000,
645	(0x0400 << 16) | (0x89c0 >> 2),
646	0x00000000,
647	(0x0e00 << 16) | (0x88c4 >> 2),
648	0x00000000,
649	(0x0e00 << 16) | (0x88c8 >> 2),
650	0x00000000,
651	(0x0e00 << 16) | (0x88d0 >> 2),
652	0x00000000,
653	(0x0e00 << 16) | (0x88d4 >> 2),
654	0x00000000,
655	(0x0e00 << 16) | (0x88d8 >> 2),
656	0x00000000,
657	(0x0e00 << 16) | (0x8980 >> 2),
658	0x00000000,
659	(0x0e00 << 16) | (0x30938 >> 2),
660	0x00000000,
661	(0x0e00 << 16) | (0x3093c >> 2),
662	0x00000000,
663	(0x0e00 << 16) | (0x30940 >> 2),
664	0x00000000,
665	(0x0e00 << 16) | (0x89a0 >> 2),
666	0x00000000,
667	(0x0e00 << 16) | (0x30900 >> 2),
668	0x00000000,
669	(0x0e00 << 16) | (0x30904 >> 2),
670	0x00000000,
671	(0x0e00 << 16) | (0x89b4 >> 2),
672	0x00000000,
673	(0x0e00 << 16) | (0x3c210 >> 2),
674	0x00000000,
675	(0x0e00 << 16) | (0x3c214 >> 2),
676	0x00000000,
677	(0x0e00 << 16) | (0x3c218 >> 2),
678	0x00000000,
679	(0x0e00 << 16) | (0x8904 >> 2),
680	0x00000000,
681	0x5,
682	(0x0e00 << 16) | (0x8c28 >> 2),
683	(0x0e00 << 16) | (0x8c2c >> 2),
684	(0x0e00 << 16) | (0x8c30 >> 2),
685	(0x0e00 << 16) | (0x8c34 >> 2),
686	(0x0e00 << 16) | (0x9600 >> 2),
687};
688
689static const u32 kalindi_rlc_save_restore_register_list[] =
690{
691	(0x0e00 << 16) | (0xc12c >> 2),
692	0x00000000,
693	(0x0e00 << 16) | (0xc140 >> 2),
694	0x00000000,
695	(0x0e00 << 16) | (0xc150 >> 2),
696	0x00000000,
697	(0x0e00 << 16) | (0xc15c >> 2),
698	0x00000000,
699	(0x0e00 << 16) | (0xc168 >> 2),
700	0x00000000,
701	(0x0e00 << 16) | (0xc170 >> 2),
702	0x00000000,
703	(0x0e00 << 16) | (0xc204 >> 2),
704	0x00000000,
705	(0x0e00 << 16) | (0xc2b4 >> 2),
706	0x00000000,
707	(0x0e00 << 16) | (0xc2b8 >> 2),
708	0x00000000,
709	(0x0e00 << 16) | (0xc2bc >> 2),
710	0x00000000,
711	(0x0e00 << 16) | (0xc2c0 >> 2),
712	0x00000000,
713	(0x0e00 << 16) | (0x8228 >> 2),
714	0x00000000,
715	(0x0e00 << 16) | (0x829c >> 2),
716	0x00000000,
717	(0x0e00 << 16) | (0x869c >> 2),
718	0x00000000,
719	(0x0600 << 16) | (0x98f4 >> 2),
720	0x00000000,
721	(0x0e00 << 16) | (0x98f8 >> 2),
722	0x00000000,
723	(0x0e00 << 16) | (0x9900 >> 2),
724	0x00000000,
725	(0x0e00 << 16) | (0xc260 >> 2),
726	0x00000000,
727	(0x0e00 << 16) | (0x90e8 >> 2),
728	0x00000000,
729	(0x0e00 << 16) | (0x3c000 >> 2),
730	0x00000000,
731	(0x0e00 << 16) | (0x3c00c >> 2),
732	0x00000000,
733	(0x0e00 << 16) | (0x8c1c >> 2),
734	0x00000000,
735	(0x0e00 << 16) | (0x9700 >> 2),
736	0x00000000,
737	(0x0e00 << 16) | (0xcd20 >> 2),
738	0x00000000,
739	(0x4e00 << 16) | (0xcd20 >> 2),
740	0x00000000,
741	(0x5e00 << 16) | (0xcd20 >> 2),
742	0x00000000,
743	(0x6e00 << 16) | (0xcd20 >> 2),
744	0x00000000,
745	(0x7e00 << 16) | (0xcd20 >> 2),
746	0x00000000,
747	(0x0e00 << 16) | (0x89bc >> 2),
748	0x00000000,
749	(0x0e00 << 16) | (0x8900 >> 2),
750	0x00000000,
751	0x3,
752	(0x0e00 << 16) | (0xc130 >> 2),
753	0x00000000,
754	(0x0e00 << 16) | (0xc134 >> 2),
755	0x00000000,
756	(0x0e00 << 16) | (0xc1fc >> 2),
757	0x00000000,
758	(0x0e00 << 16) | (0xc208 >> 2),
759	0x00000000,
760	(0x0e00 << 16) | (0xc264 >> 2),
761	0x00000000,
762	(0x0e00 << 16) | (0xc268 >> 2),
763	0x00000000,
764	(0x0e00 << 16) | (0xc26c >> 2),
765	0x00000000,
766	(0x0e00 << 16) | (0xc270 >> 2),
767	0x00000000,
768	(0x0e00 << 16) | (0xc274 >> 2),
769	0x00000000,
770	(0x0e00 << 16) | (0xc28c >> 2),
771	0x00000000,
772	(0x0e00 << 16) | (0xc290 >> 2),
773	0x00000000,
774	(0x0e00 << 16) | (0xc294 >> 2),
775	0x00000000,
776	(0x0e00 << 16) | (0xc298 >> 2),
777	0x00000000,
778	(0x0e00 << 16) | (0xc2a0 >> 2),
779	0x00000000,
780	(0x0e00 << 16) | (0xc2a4 >> 2),
781	0x00000000,
782	(0x0e00 << 16) | (0xc2a8 >> 2),
783	0x00000000,
784	(0x0e00 << 16) | (0xc2ac >> 2),
785	0x00000000,
786	(0x0e00 << 16) | (0x301d0 >> 2),
787	0x00000000,
788	(0x0e00 << 16) | (0x30238 >> 2),
789	0x00000000,
790	(0x0e00 << 16) | (0x30250 >> 2),
791	0x00000000,
792	(0x0e00 << 16) | (0x30254 >> 2),
793	0x00000000,
794	(0x0e00 << 16) | (0x30258 >> 2),
795	0x00000000,
796	(0x0e00 << 16) | (0x3025c >> 2),
797	0x00000000,
798	(0x4e00 << 16) | (0xc900 >> 2),
799	0x00000000,
800	(0x5e00 << 16) | (0xc900 >> 2),
801	0x00000000,
802	(0x6e00 << 16) | (0xc900 >> 2),
803	0x00000000,
804	(0x7e00 << 16) | (0xc900 >> 2),
805	0x00000000,
806	(0x4e00 << 16) | (0xc904 >> 2),
807	0x00000000,
808	(0x5e00 << 16) | (0xc904 >> 2),
809	0x00000000,
810	(0x6e00 << 16) | (0xc904 >> 2),
811	0x00000000,
812	(0x7e00 << 16) | (0xc904 >> 2),
813	0x00000000,
814	(0x4e00 << 16) | (0xc908 >> 2),
815	0x00000000,
816	(0x5e00 << 16) | (0xc908 >> 2),
817	0x00000000,
818	(0x6e00 << 16) | (0xc908 >> 2),
819	0x00000000,
820	(0x7e00 << 16) | (0xc908 >> 2),
821	0x00000000,
822	(0x4e00 << 16) | (0xc90c >> 2),
823	0x00000000,
824	(0x5e00 << 16) | (0xc90c >> 2),
825	0x00000000,
826	(0x6e00 << 16) | (0xc90c >> 2),
827	0x00000000,
828	(0x7e00 << 16) | (0xc90c >> 2),
829	0x00000000,
830	(0x4e00 << 16) | (0xc910 >> 2),
831	0x00000000,
832	(0x5e00 << 16) | (0xc910 >> 2),
833	0x00000000,
834	(0x6e00 << 16) | (0xc910 >> 2),
835	0x00000000,
836	(0x7e00 << 16) | (0xc910 >> 2),
837	0x00000000,
838	(0x0e00 << 16) | (0xc99c >> 2),
839	0x00000000,
840	(0x0e00 << 16) | (0x9834 >> 2),
841	0x00000000,
842	(0x0000 << 16) | (0x30f00 >> 2),
843	0x00000000,
844	(0x0000 << 16) | (0x30f04 >> 2),
845	0x00000000,
846	(0x0000 << 16) | (0x30f08 >> 2),
847	0x00000000,
848	(0x0000 << 16) | (0x30f0c >> 2),
849	0x00000000,
850	(0x0600 << 16) | (0x9b7c >> 2),
851	0x00000000,
852	(0x0e00 << 16) | (0x8a14 >> 2),
853	0x00000000,
854	(0x0e00 << 16) | (0x8a18 >> 2),
855	0x00000000,
856	(0x0600 << 16) | (0x30a00 >> 2),
857	0x00000000,
858	(0x0e00 << 16) | (0x8bf0 >> 2),
859	0x00000000,
860	(0x0e00 << 16) | (0x8bcc >> 2),
861	0x00000000,
862	(0x0e00 << 16) | (0x8b24 >> 2),
863	0x00000000,
864	(0x0e00 << 16) | (0x30a04 >> 2),
865	0x00000000,
866	(0x0600 << 16) | (0x30a10 >> 2),
867	0x00000000,
868	(0x0600 << 16) | (0x30a14 >> 2),
869	0x00000000,
870	(0x0600 << 16) | (0x30a18 >> 2),
871	0x00000000,
872	(0x0600 << 16) | (0x30a2c >> 2),
873	0x00000000,
874	(0x0e00 << 16) | (0xc700 >> 2),
875	0x00000000,
876	(0x0e00 << 16) | (0xc704 >> 2),
877	0x00000000,
878	(0x0e00 << 16) | (0xc708 >> 2),
879	0x00000000,
880	(0x0e00 << 16) | (0xc768 >> 2),
881	0x00000000,
882	(0x0400 << 16) | (0xc770 >> 2),
883	0x00000000,
884	(0x0400 << 16) | (0xc774 >> 2),
885	0x00000000,
886	(0x0400 << 16) | (0xc798 >> 2),
887	0x00000000,
888	(0x0400 << 16) | (0xc79c >> 2),
889	0x00000000,
890	(0x0e00 << 16) | (0x9100 >> 2),
891	0x00000000,
892	(0x0e00 << 16) | (0x3c010 >> 2),
893	0x00000000,
894	(0x0e00 << 16) | (0x8c00 >> 2),
895	0x00000000,
896	(0x0e00 << 16) | (0x8c04 >> 2),
897	0x00000000,
898	(0x0e00 << 16) | (0x8c20 >> 2),
899	0x00000000,
900	(0x0e00 << 16) | (0x8c38 >> 2),
901	0x00000000,
902	(0x0e00 << 16) | (0x8c3c >> 2),
903	0x00000000,
904	(0x0e00 << 16) | (0xae00 >> 2),
905	0x00000000,
906	(0x0e00 << 16) | (0x9604 >> 2),
907	0x00000000,
908	(0x0e00 << 16) | (0xac08 >> 2),
909	0x00000000,
910	(0x0e00 << 16) | (0xac0c >> 2),
911	0x00000000,
912	(0x0e00 << 16) | (0xac10 >> 2),
913	0x00000000,
914	(0x0e00 << 16) | (0xac14 >> 2),
915	0x00000000,
916	(0x0e00 << 16) | (0xac58 >> 2),
917	0x00000000,
918	(0x0e00 << 16) | (0xac68 >> 2),
919	0x00000000,
920	(0x0e00 << 16) | (0xac6c >> 2),
921	0x00000000,
922	(0x0e00 << 16) | (0xac70 >> 2),
923	0x00000000,
924	(0x0e00 << 16) | (0xac74 >> 2),
925	0x00000000,
926	(0x0e00 << 16) | (0xac78 >> 2),
927	0x00000000,
928	(0x0e00 << 16) | (0xac7c >> 2),
929	0x00000000,
930	(0x0e00 << 16) | (0xac80 >> 2),
931	0x00000000,
932	(0x0e00 << 16) | (0xac84 >> 2),
933	0x00000000,
934	(0x0e00 << 16) | (0xac88 >> 2),
935	0x00000000,
936	(0x0e00 << 16) | (0xac8c >> 2),
937	0x00000000,
938	(0x0e00 << 16) | (0x970c >> 2),
939	0x00000000,
940	(0x0e00 << 16) | (0x9714 >> 2),
941	0x00000000,
942	(0x0e00 << 16) | (0x9718 >> 2),
943	0x00000000,
944	(0x0e00 << 16) | (0x971c >> 2),
945	0x00000000,
946	(0x0e00 << 16) | (0x31068 >> 2),
947	0x00000000,
948	(0x4e00 << 16) | (0x31068 >> 2),
949	0x00000000,
950	(0x5e00 << 16) | (0x31068 >> 2),
951	0x00000000,
952	(0x6e00 << 16) | (0x31068 >> 2),
953	0x00000000,
954	(0x7e00 << 16) | (0x31068 >> 2),
955	0x00000000,
956	(0x0e00 << 16) | (0xcd10 >> 2),
957	0x00000000,
958	(0x0e00 << 16) | (0xcd14 >> 2),
959	0x00000000,
960	(0x0e00 << 16) | (0x88b0 >> 2),
961	0x00000000,
962	(0x0e00 << 16) | (0x88b4 >> 2),
963	0x00000000,
964	(0x0e00 << 16) | (0x88b8 >> 2),
965	0x00000000,
966	(0x0e00 << 16) | (0x88bc >> 2),
967	0x00000000,
968	(0x0400 << 16) | (0x89c0 >> 2),
969	0x00000000,
970	(0x0e00 << 16) | (0x88c4 >> 2),
971	0x00000000,
972	(0x0e00 << 16) | (0x88c8 >> 2),
973	0x00000000,
974	(0x0e00 << 16) | (0x88d0 >> 2),
975	0x00000000,
976	(0x0e00 << 16) | (0x88d4 >> 2),
977	0x00000000,
978	(0x0e00 << 16) | (0x88d8 >> 2),
979	0x00000000,
980	(0x0e00 << 16) | (0x8980 >> 2),
981	0x00000000,
982	(0x0e00 << 16) | (0x30938 >> 2),
983	0x00000000,
984	(0x0e00 << 16) | (0x3093c >> 2),
985	0x00000000,
986	(0x0e00 << 16) | (0x30940 >> 2),
987	0x00000000,
988	(0x0e00 << 16) | (0x89a0 >> 2),
989	0x00000000,
990	(0x0e00 << 16) | (0x30900 >> 2),
991	0x00000000,
992	(0x0e00 << 16) | (0x30904 >> 2),
993	0x00000000,
994	(0x0e00 << 16) | (0x89b4 >> 2),
995	0x00000000,
996	(0x0e00 << 16) | (0x3e1fc >> 2),
997	0x00000000,
998	(0x0e00 << 16) | (0x3c210 >> 2),
999	0x00000000,
1000	(0x0e00 << 16) | (0x3c214 >> 2),
1001	0x00000000,
1002	(0x0e00 << 16) | (0x3c218 >> 2),
1003	0x00000000,
1004	(0x0e00 << 16) | (0x8904 >> 2),
1005	0x00000000,
1006	0x5,
1007	(0x0e00 << 16) | (0x8c28 >> 2),
1008	(0x0e00 << 16) | (0x8c2c >> 2),
1009	(0x0e00 << 16) | (0x8c30 >> 2),
1010	(0x0e00 << 16) | (0x8c34 >> 2),
1011	(0x0e00 << 16) | (0x9600 >> 2),
1012};
1013
1014static const u32 bonaire_golden_spm_registers[] =
1015{
1016	0x30800, 0xe0ffffff, 0xe0000000
1017};
1018
1019static const u32 bonaire_golden_common_registers[] =
1020{
1021	0xc770, 0xffffffff, 0x00000800,
1022	0xc774, 0xffffffff, 0x00000800,
1023	0xc798, 0xffffffff, 0x00007fbf,
1024	0xc79c, 0xffffffff, 0x00007faf
1025};
1026
1027static const u32 bonaire_golden_registers[] =
1028{
1029	0x3354, 0x00000333, 0x00000333,
1030	0x3350, 0x000c0fc0, 0x00040200,
1031	0x9a10, 0x00010000, 0x00058208,
1032	0x3c000, 0xffff1fff, 0x00140000,
1033	0x3c200, 0xfdfc0fff, 0x00000100,
1034	0x3c234, 0x40000000, 0x40000200,
1035	0x9830, 0xffffffff, 0x00000000,
1036	0x9834, 0xf00fffff, 0x00000400,
1037	0x9838, 0x0002021c, 0x00020200,
1038	0xc78, 0x00000080, 0x00000000,
1039	0x5bb0, 0x000000f0, 0x00000070,
1040	0x5bc0, 0xf0311fff, 0x80300000,
1041	0x98f8, 0x73773777, 0x12010001,
1042	0x350c, 0x00810000, 0x408af000,
1043	0x7030, 0x31000111, 0x00000011,
1044	0x2f48, 0x73773777, 0x12010001,
1045	0x220c, 0x00007fb6, 0x0021a1b1,
1046	0x2210, 0x00007fb6, 0x002021b1,
1047	0x2180, 0x00007fb6, 0x00002191,
1048	0x2218, 0x00007fb6, 0x002121b1,
1049	0x221c, 0x00007fb6, 0x002021b1,
1050	0x21dc, 0x00007fb6, 0x00002191,
1051	0x21e0, 0x00007fb6, 0x00002191,
1052	0x3628, 0x0000003f, 0x0000000a,
1053	0x362c, 0x0000003f, 0x0000000a,
1054	0x2ae4, 0x00073ffe, 0x000022a2,
1055	0x240c, 0x000007ff, 0x00000000,
1056	0x8a14, 0xf000003f, 0x00000007,
1057	0x8bf0, 0x00002001, 0x00000001,
1058	0x8b24, 0xffffffff, 0x00ffffff,
1059	0x30a04, 0x0000ff0f, 0x00000000,
1060	0x28a4c, 0x07ffffff, 0x06000000,
1061	0x4d8, 0x00000fff, 0x00000100,
1062	0x3e78, 0x00000001, 0x00000002,
1063	0x9100, 0x03000000, 0x0362c688,
1064	0x8c00, 0x000000ff, 0x00000001,
1065	0xe40, 0x00001fff, 0x00001fff,
1066	0x9060, 0x0000007f, 0x00000020,
1067	0x9508, 0x00010000, 0x00010000,
1068	0xac14, 0x000003ff, 0x000000f3,
1069	0xac0c, 0xffffffff, 0x00001032
1070};
1071
1072static const u32 bonaire_mgcg_cgcg_init[] =
1073{
1074	0xc420, 0xffffffff, 0xfffffffc,
1075	0x30800, 0xffffffff, 0xe0000000,
1076	0x3c2a0, 0xffffffff, 0x00000100,
1077	0x3c208, 0xffffffff, 0x00000100,
1078	0x3c2c0, 0xffffffff, 0xc0000100,
1079	0x3c2c8, 0xffffffff, 0xc0000100,
1080	0x3c2c4, 0xffffffff, 0xc0000100,
1081	0x55e4, 0xffffffff, 0x00600100,
1082	0x3c280, 0xffffffff, 0x00000100,
1083	0x3c214, 0xffffffff, 0x06000100,
1084	0x3c220, 0xffffffff, 0x00000100,
1085	0x3c218, 0xffffffff, 0x06000100,
1086	0x3c204, 0xffffffff, 0x00000100,
1087	0x3c2e0, 0xffffffff, 0x00000100,
1088	0x3c224, 0xffffffff, 0x00000100,
1089	0x3c200, 0xffffffff, 0x00000100,
1090	0x3c230, 0xffffffff, 0x00000100,
1091	0x3c234, 0xffffffff, 0x00000100,
1092	0x3c250, 0xffffffff, 0x00000100,
1093	0x3c254, 0xffffffff, 0x00000100,
1094	0x3c258, 0xffffffff, 0x00000100,
1095	0x3c25c, 0xffffffff, 0x00000100,
1096	0x3c260, 0xffffffff, 0x00000100,
1097	0x3c27c, 0xffffffff, 0x00000100,
1098	0x3c278, 0xffffffff, 0x00000100,
1099	0x3c210, 0xffffffff, 0x06000100,
1100	0x3c290, 0xffffffff, 0x00000100,
1101	0x3c274, 0xffffffff, 0x00000100,
1102	0x3c2b4, 0xffffffff, 0x00000100,
1103	0x3c2b0, 0xffffffff, 0x00000100,
1104	0x3c270, 0xffffffff, 0x00000100,
1105	0x30800, 0xffffffff, 0xe0000000,
1106	0x3c020, 0xffffffff, 0x00010000,
1107	0x3c024, 0xffffffff, 0x00030002,
1108	0x3c028, 0xffffffff, 0x00040007,
1109	0x3c02c, 0xffffffff, 0x00060005,
1110	0x3c030, 0xffffffff, 0x00090008,
1111	0x3c034, 0xffffffff, 0x00010000,
1112	0x3c038, 0xffffffff, 0x00030002,
1113	0x3c03c, 0xffffffff, 0x00040007,
1114	0x3c040, 0xffffffff, 0x00060005,
1115	0x3c044, 0xffffffff, 0x00090008,
1116	0x3c048, 0xffffffff, 0x00010000,
1117	0x3c04c, 0xffffffff, 0x00030002,
1118	0x3c050, 0xffffffff, 0x00040007,
1119	0x3c054, 0xffffffff, 0x00060005,
1120	0x3c058, 0xffffffff, 0x00090008,
1121	0x3c05c, 0xffffffff, 0x00010000,
1122	0x3c060, 0xffffffff, 0x00030002,
1123	0x3c064, 0xffffffff, 0x00040007,
1124	0x3c068, 0xffffffff, 0x00060005,
1125	0x3c06c, 0xffffffff, 0x00090008,
1126	0x3c070, 0xffffffff, 0x00010000,
1127	0x3c074, 0xffffffff, 0x00030002,
1128	0x3c078, 0xffffffff, 0x00040007,
1129	0x3c07c, 0xffffffff, 0x00060005,
1130	0x3c080, 0xffffffff, 0x00090008,
1131	0x3c084, 0xffffffff, 0x00010000,
1132	0x3c088, 0xffffffff, 0x00030002,
1133	0x3c08c, 0xffffffff, 0x00040007,
1134	0x3c090, 0xffffffff, 0x00060005,
1135	0x3c094, 0xffffffff, 0x00090008,
1136	0x3c098, 0xffffffff, 0x00010000,
1137	0x3c09c, 0xffffffff, 0x00030002,
1138	0x3c0a0, 0xffffffff, 0x00040007,
1139	0x3c0a4, 0xffffffff, 0x00060005,
1140	0x3c0a8, 0xffffffff, 0x00090008,
1141	0x3c000, 0xffffffff, 0x96e00200,
1142	0x8708, 0xffffffff, 0x00900100,
1143	0xc424, 0xffffffff, 0x0020003f,
1144	0x38, 0xffffffff, 0x0140001c,
1145	0x3c, 0x000f0000, 0x000f0000,
1146	0x220, 0xffffffff, 0xC060000C,
1147	0x224, 0xc0000fff, 0x00000100,
1148	0xf90, 0xffffffff, 0x00000100,
1149	0xf98, 0x00000101, 0x00000000,
1150	0x20a8, 0xffffffff, 0x00000104,
1151	0x55e4, 0xff000fff, 0x00000100,
1152	0x30cc, 0xc0000fff, 0x00000104,
1153	0xc1e4, 0x00000001, 0x00000001,
1154	0xd00c, 0xff000ff0, 0x00000100,
1155	0xd80c, 0xff000ff0, 0x00000100
1156};
1157
1158static const u32 spectre_golden_spm_registers[] =
1159{
1160	0x30800, 0xe0ffffff, 0xe0000000
1161};
1162
1163static const u32 spectre_golden_common_registers[] =
1164{
1165	0xc770, 0xffffffff, 0x00000800,
1166	0xc774, 0xffffffff, 0x00000800,
1167	0xc798, 0xffffffff, 0x00007fbf,
1168	0xc79c, 0xffffffff, 0x00007faf
1169};
1170
1171static const u32 spectre_golden_registers[] =
1172{
1173	0x3c000, 0xffff1fff, 0x96940200,
1174	0x3c00c, 0xffff0001, 0xff000000,
1175	0x3c200, 0xfffc0fff, 0x00000100,
1176	0x6ed8, 0x00010101, 0x00010000,
1177	0x9834, 0xf00fffff, 0x00000400,
1178	0x9838, 0xfffffffc, 0x00020200,
1179	0x5bb0, 0x000000f0, 0x00000070,
1180	0x5bc0, 0xf0311fff, 0x80300000,
1181	0x98f8, 0x73773777, 0x12010001,
1182	0x9b7c, 0x00ff0000, 0x00fc0000,
1183	0x2f48, 0x73773777, 0x12010001,
1184	0x8a14, 0xf000003f, 0x00000007,
1185	0x8b24, 0xffffffff, 0x00ffffff,
1186	0x28350, 0x3f3f3fff, 0x00000082,
1187	0x28354, 0x0000003f, 0x00000000,
1188	0x3e78, 0x00000001, 0x00000002,
1189	0x913c, 0xffff03df, 0x00000004,
1190	0xc768, 0x00000008, 0x00000008,
1191	0x8c00, 0x000008ff, 0x00000800,
1192	0x9508, 0x00010000, 0x00010000,
1193	0xac0c, 0xffffffff, 0x54763210,
1194	0x214f8, 0x01ff01ff, 0x00000002,
1195	0x21498, 0x007ff800, 0x00200000,
1196	0x2015c, 0xffffffff, 0x00000f40,
1197	0x30934, 0xffffffff, 0x00000001
1198};
1199
1200static const u32 spectre_mgcg_cgcg_init[] =
1201{
1202	0xc420, 0xffffffff, 0xfffffffc,
1203	0x30800, 0xffffffff, 0xe0000000,
1204	0x3c2a0, 0xffffffff, 0x00000100,
1205	0x3c208, 0xffffffff, 0x00000100,
1206	0x3c2c0, 0xffffffff, 0x00000100,
1207	0x3c2c8, 0xffffffff, 0x00000100,
1208	0x3c2c4, 0xffffffff, 0x00000100,
1209	0x55e4, 0xffffffff, 0x00600100,
1210	0x3c280, 0xffffffff, 0x00000100,
1211	0x3c214, 0xffffffff, 0x06000100,
1212	0x3c220, 0xffffffff, 0x00000100,
1213	0x3c218, 0xffffffff, 0x06000100,
1214	0x3c204, 0xffffffff, 0x00000100,
1215	0x3c2e0, 0xffffffff, 0x00000100,
1216	0x3c224, 0xffffffff, 0x00000100,
1217	0x3c200, 0xffffffff, 0x00000100,
1218	0x3c230, 0xffffffff, 0x00000100,
1219	0x3c234, 0xffffffff, 0x00000100,
1220	0x3c250, 0xffffffff, 0x00000100,
1221	0x3c254, 0xffffffff, 0x00000100,
1222	0x3c258, 0xffffffff, 0x00000100,
1223	0x3c25c, 0xffffffff, 0x00000100,
1224	0x3c260, 0xffffffff, 0x00000100,
1225	0x3c27c, 0xffffffff, 0x00000100,
1226	0x3c278, 0xffffffff, 0x00000100,
1227	0x3c210, 0xffffffff, 0x06000100,
1228	0x3c290, 0xffffffff, 0x00000100,
1229	0x3c274, 0xffffffff, 0x00000100,
1230	0x3c2b4, 0xffffffff, 0x00000100,
1231	0x3c2b0, 0xffffffff, 0x00000100,
1232	0x3c270, 0xffffffff, 0x00000100,
1233	0x30800, 0xffffffff, 0xe0000000,
1234	0x3c020, 0xffffffff, 0x00010000,
1235	0x3c024, 0xffffffff, 0x00030002,
1236	0x3c028, 0xffffffff, 0x00040007,
1237	0x3c02c, 0xffffffff, 0x00060005,
1238	0x3c030, 0xffffffff, 0x00090008,
1239	0x3c034, 0xffffffff, 0x00010000,
1240	0x3c038, 0xffffffff, 0x00030002,
1241	0x3c03c, 0xffffffff, 0x00040007,
1242	0x3c040, 0xffffffff, 0x00060005,
1243	0x3c044, 0xffffffff, 0x00090008,
1244	0x3c048, 0xffffffff, 0x00010000,
1245	0x3c04c, 0xffffffff, 0x00030002,
1246	0x3c050, 0xffffffff, 0x00040007,
1247	0x3c054, 0xffffffff, 0x00060005,
1248	0x3c058, 0xffffffff, 0x00090008,
1249	0x3c05c, 0xffffffff, 0x00010000,
1250	0x3c060, 0xffffffff, 0x00030002,
1251	0x3c064, 0xffffffff, 0x00040007,
1252	0x3c068, 0xffffffff, 0x00060005,
1253	0x3c06c, 0xffffffff, 0x00090008,
1254	0x3c070, 0xffffffff, 0x00010000,
1255	0x3c074, 0xffffffff, 0x00030002,
1256	0x3c078, 0xffffffff, 0x00040007,
1257	0x3c07c, 0xffffffff, 0x00060005,
1258	0x3c080, 0xffffffff, 0x00090008,
1259	0x3c084, 0xffffffff, 0x00010000,
1260	0x3c088, 0xffffffff, 0x00030002,
1261	0x3c08c, 0xffffffff, 0x00040007,
1262	0x3c090, 0xffffffff, 0x00060005,
1263	0x3c094, 0xffffffff, 0x00090008,
1264	0x3c098, 0xffffffff, 0x00010000,
1265	0x3c09c, 0xffffffff, 0x00030002,
1266	0x3c0a0, 0xffffffff, 0x00040007,
1267	0x3c0a4, 0xffffffff, 0x00060005,
1268	0x3c0a8, 0xffffffff, 0x00090008,
1269	0x3c0ac, 0xffffffff, 0x00010000,
1270	0x3c0b0, 0xffffffff, 0x00030002,
1271	0x3c0b4, 0xffffffff, 0x00040007,
1272	0x3c0b8, 0xffffffff, 0x00060005,
1273	0x3c0bc, 0xffffffff, 0x00090008,
1274	0x3c000, 0xffffffff, 0x96e00200,
1275	0x8708, 0xffffffff, 0x00900100,
1276	0xc424, 0xffffffff, 0x0020003f,
1277	0x38, 0xffffffff, 0x0140001c,
1278	0x3c, 0x000f0000, 0x000f0000,
1279	0x220, 0xffffffff, 0xC060000C,
1280	0x224, 0xc0000fff, 0x00000100,
1281	0xf90, 0xffffffff, 0x00000100,
1282	0xf98, 0x00000101, 0x00000000,
1283	0x20a8, 0xffffffff, 0x00000104,
1284	0x55e4, 0xff000fff, 0x00000100,
1285	0x30cc, 0xc0000fff, 0x00000104,
1286	0xc1e4, 0x00000001, 0x00000001,
1287	0xd00c, 0xff000ff0, 0x00000100,
1288	0xd80c, 0xff000ff0, 0x00000100
1289};
1290
1291static const u32 kalindi_golden_spm_registers[] =
1292{
1293	0x30800, 0xe0ffffff, 0xe0000000
1294};
1295
1296static const u32 kalindi_golden_common_registers[] =
1297{
1298	0xc770, 0xffffffff, 0x00000800,
1299	0xc774, 0xffffffff, 0x00000800,
1300	0xc798, 0xffffffff, 0x00007fbf,
1301	0xc79c, 0xffffffff, 0x00007faf
1302};
1303
1304static const u32 kalindi_golden_registers[] =
1305{
1306	0x3c000, 0xffffdfff, 0x6e944040,
1307	0x55e4, 0xff607fff, 0xfc000100,
1308	0x3c220, 0xff000fff, 0x00000100,
1309	0x3c224, 0xff000fff, 0x00000100,
1310	0x3c200, 0xfffc0fff, 0x00000100,
1311	0x6ed8, 0x00010101, 0x00010000,
1312	0x9830, 0xffffffff, 0x00000000,
1313	0x9834, 0xf00fffff, 0x00000400,
1314	0x5bb0, 0x000000f0, 0x00000070,
1315	0x5bc0, 0xf0311fff, 0x80300000,
1316	0x98f8, 0x73773777, 0x12010001,
1317	0x98fc, 0xffffffff, 0x00000010,
1318	0x9b7c, 0x00ff0000, 0x00fc0000,
1319	0x8030, 0x00001f0f, 0x0000100a,
1320	0x2f48, 0x73773777, 0x12010001,
1321	0x2408, 0x000fffff, 0x000c007f,
1322	0x8a14, 0xf000003f, 0x00000007,
1323	0x8b24, 0x3fff3fff, 0x00ffcfff,
1324	0x30a04, 0x0000ff0f, 0x00000000,
1325	0x28a4c, 0x07ffffff, 0x06000000,
1326	0x4d8, 0x00000fff, 0x00000100,
1327	0x3e78, 0x00000001, 0x00000002,
1328	0xc768, 0x00000008, 0x00000008,
1329	0x8c00, 0x000000ff, 0x00000003,
1330	0x214f8, 0x01ff01ff, 0x00000002,
1331	0x21498, 0x007ff800, 0x00200000,
1332	0x2015c, 0xffffffff, 0x00000f40,
1333	0x88c4, 0x001f3ae3, 0x00000082,
1334	0x88d4, 0x0000001f, 0x00000010,
1335	0x30934, 0xffffffff, 0x00000000
1336};
1337
1338static const u32 kalindi_mgcg_cgcg_init[] =
1339{
1340	0xc420, 0xffffffff, 0xfffffffc,
1341	0x30800, 0xffffffff, 0xe0000000,
1342	0x3c2a0, 0xffffffff, 0x00000100,
1343	0x3c208, 0xffffffff, 0x00000100,
1344	0x3c2c0, 0xffffffff, 0x00000100,
1345	0x3c2c8, 0xffffffff, 0x00000100,
1346	0x3c2c4, 0xffffffff, 0x00000100,
1347	0x55e4, 0xffffffff, 0x00600100,
1348	0x3c280, 0xffffffff, 0x00000100,
1349	0x3c214, 0xffffffff, 0x06000100,
1350	0x3c220, 0xffffffff, 0x00000100,
1351	0x3c218, 0xffffffff, 0x06000100,
1352	0x3c204, 0xffffffff, 0x00000100,
1353	0x3c2e0, 0xffffffff, 0x00000100,
1354	0x3c224, 0xffffffff, 0x00000100,
1355	0x3c200, 0xffffffff, 0x00000100,
1356	0x3c230, 0xffffffff, 0x00000100,
1357	0x3c234, 0xffffffff, 0x00000100,
1358	0x3c250, 0xffffffff, 0x00000100,
1359	0x3c254, 0xffffffff, 0x00000100,
1360	0x3c258, 0xffffffff, 0x00000100,
1361	0x3c25c, 0xffffffff, 0x00000100,
1362	0x3c260, 0xffffffff, 0x00000100,
1363	0x3c27c, 0xffffffff, 0x00000100,
1364	0x3c278, 0xffffffff, 0x00000100,
1365	0x3c210, 0xffffffff, 0x06000100,
1366	0x3c290, 0xffffffff, 0x00000100,
1367	0x3c274, 0xffffffff, 0x00000100,
1368	0x3c2b4, 0xffffffff, 0x00000100,
1369	0x3c2b0, 0xffffffff, 0x00000100,
1370	0x3c270, 0xffffffff, 0x00000100,
1371	0x30800, 0xffffffff, 0xe0000000,
1372	0x3c020, 0xffffffff, 0x00010000,
1373	0x3c024, 0xffffffff, 0x00030002,
1374	0x3c028, 0xffffffff, 0x00040007,
1375	0x3c02c, 0xffffffff, 0x00060005,
1376	0x3c030, 0xffffffff, 0x00090008,
1377	0x3c034, 0xffffffff, 0x00010000,
1378	0x3c038, 0xffffffff, 0x00030002,
1379	0x3c03c, 0xffffffff, 0x00040007,
1380	0x3c040, 0xffffffff, 0x00060005,
1381	0x3c044, 0xffffffff, 0x00090008,
1382	0x3c000, 0xffffffff, 0x96e00200,
1383	0x8708, 0xffffffff, 0x00900100,
1384	0xc424, 0xffffffff, 0x0020003f,
1385	0x38, 0xffffffff, 0x0140001c,
1386	0x3c, 0x000f0000, 0x000f0000,
1387	0x220, 0xffffffff, 0xC060000C,
1388	0x224, 0xc0000fff, 0x00000100,
1389	0x20a8, 0xffffffff, 0x00000104,
1390	0x55e4, 0xff000fff, 0x00000100,
1391	0x30cc, 0xc0000fff, 0x00000104,
1392	0xc1e4, 0x00000001, 0x00000001,
1393	0xd00c, 0xff000ff0, 0x00000100,
1394	0xd80c, 0xff000ff0, 0x00000100
1395};
1396
1397static const u32 hawaii_golden_spm_registers[] =
1398{
1399	0x30800, 0xe0ffffff, 0xe0000000
1400};
1401
1402static const u32 hawaii_golden_common_registers[] =
1403{
1404	0x30800, 0xffffffff, 0xe0000000,
1405	0x28350, 0xffffffff, 0x3a00161a,
1406	0x28354, 0xffffffff, 0x0000002e,
1407	0x9a10, 0xffffffff, 0x00018208,
1408	0x98f8, 0xffffffff, 0x12011003
1409};
1410
1411static const u32 hawaii_golden_registers[] =
1412{
1413	0x3354, 0x00000333, 0x00000333,
1414	0x9a10, 0x00010000, 0x00058208,
1415	0x9830, 0xffffffff, 0x00000000,
1416	0x9834, 0xf00fffff, 0x00000400,
1417	0x9838, 0x0002021c, 0x00020200,
1418	0xc78, 0x00000080, 0x00000000,
1419	0x5bb0, 0x000000f0, 0x00000070,
1420	0x5bc0, 0xf0311fff, 0x80300000,
1421	0x350c, 0x00810000, 0x408af000,
1422	0x7030, 0x31000111, 0x00000011,
1423	0x2f48, 0x73773777, 0x12010001,
1424	0x2120, 0x0000007f, 0x0000001b,
1425	0x21dc, 0x00007fb6, 0x00002191,
1426	0x3628, 0x0000003f, 0x0000000a,
1427	0x362c, 0x0000003f, 0x0000000a,
1428	0x2ae4, 0x00073ffe, 0x000022a2,
1429	0x240c, 0x000007ff, 0x00000000,
1430	0x8bf0, 0x00002001, 0x00000001,
1431	0x8b24, 0xffffffff, 0x00ffffff,
1432	0x30a04, 0x0000ff0f, 0x00000000,
1433	0x28a4c, 0x07ffffff, 0x06000000,
1434	0x3e78, 0x00000001, 0x00000002,
1435	0xc768, 0x00000008, 0x00000008,
1436	0xc770, 0x00000f00, 0x00000800,
1437	0xc774, 0x00000f00, 0x00000800,
1438	0xc798, 0x00ffffff, 0x00ff7fbf,
1439	0xc79c, 0x00ffffff, 0x00ff7faf,
1440	0x8c00, 0x000000ff, 0x00000800,
1441	0xe40, 0x00001fff, 0x00001fff,
1442	0x9060, 0x0000007f, 0x00000020,
1443	0x9508, 0x00010000, 0x00010000,
1444	0xae00, 0x00100000, 0x000ff07c,
1445	0xac14, 0x000003ff, 0x0000000f,
1446	0xac10, 0xffffffff, 0x7564fdec,
1447	0xac0c, 0xffffffff, 0x3120b9a8,
1448	0xac08, 0x20000000, 0x0f9c0000
1449};
1450
1451static const u32 hawaii_mgcg_cgcg_init[] =
1452{
1453	0xc420, 0xffffffff, 0xfffffffd,
1454	0x30800, 0xffffffff, 0xe0000000,
1455	0x3c2a0, 0xffffffff, 0x00000100,
1456	0x3c208, 0xffffffff, 0x00000100,
1457	0x3c2c0, 0xffffffff, 0x00000100,
1458	0x3c2c8, 0xffffffff, 0x00000100,
1459	0x3c2c4, 0xffffffff, 0x00000100,
1460	0x55e4, 0xffffffff, 0x00200100,
1461	0x3c280, 0xffffffff, 0x00000100,
1462	0x3c214, 0xffffffff, 0x06000100,
1463	0x3c220, 0xffffffff, 0x00000100,
1464	0x3c218, 0xffffffff, 0x06000100,
1465	0x3c204, 0xffffffff, 0x00000100,
1466	0x3c2e0, 0xffffffff, 0x00000100,
1467	0x3c224, 0xffffffff, 0x00000100,
1468	0x3c200, 0xffffffff, 0x00000100,
1469	0x3c230, 0xffffffff, 0x00000100,
1470	0x3c234, 0xffffffff, 0x00000100,
1471	0x3c250, 0xffffffff, 0x00000100,
1472	0x3c254, 0xffffffff, 0x00000100,
1473	0x3c258, 0xffffffff, 0x00000100,
1474	0x3c25c, 0xffffffff, 0x00000100,
1475	0x3c260, 0xffffffff, 0x00000100,
1476	0x3c27c, 0xffffffff, 0x00000100,
1477	0x3c278, 0xffffffff, 0x00000100,
1478	0x3c210, 0xffffffff, 0x06000100,
1479	0x3c290, 0xffffffff, 0x00000100,
1480	0x3c274, 0xffffffff, 0x00000100,
1481	0x3c2b4, 0xffffffff, 0x00000100,
1482	0x3c2b0, 0xffffffff, 0x00000100,
1483	0x3c270, 0xffffffff, 0x00000100,
1484	0x30800, 0xffffffff, 0xe0000000,
1485	0x3c020, 0xffffffff, 0x00010000,
1486	0x3c024, 0xffffffff, 0x00030002,
1487	0x3c028, 0xffffffff, 0x00040007,
1488	0x3c02c, 0xffffffff, 0x00060005,
1489	0x3c030, 0xffffffff, 0x00090008,
1490	0x3c034, 0xffffffff, 0x00010000,
1491	0x3c038, 0xffffffff, 0x00030002,
1492	0x3c03c, 0xffffffff, 0x00040007,
1493	0x3c040, 0xffffffff, 0x00060005,
1494	0x3c044, 0xffffffff, 0x00090008,
1495	0x3c048, 0xffffffff, 0x00010000,
1496	0x3c04c, 0xffffffff, 0x00030002,
1497	0x3c050, 0xffffffff, 0x00040007,
1498	0x3c054, 0xffffffff, 0x00060005,
1499	0x3c058, 0xffffffff, 0x00090008,
1500	0x3c05c, 0xffffffff, 0x00010000,
1501	0x3c060, 0xffffffff, 0x00030002,
1502	0x3c064, 0xffffffff, 0x00040007,
1503	0x3c068, 0xffffffff, 0x00060005,
1504	0x3c06c, 0xffffffff, 0x00090008,
1505	0x3c070, 0xffffffff, 0x00010000,
1506	0x3c074, 0xffffffff, 0x00030002,
1507	0x3c078, 0xffffffff, 0x00040007,
1508	0x3c07c, 0xffffffff, 0x00060005,
1509	0x3c080, 0xffffffff, 0x00090008,
1510	0x3c084, 0xffffffff, 0x00010000,
1511	0x3c088, 0xffffffff, 0x00030002,
1512	0x3c08c, 0xffffffff, 0x00040007,
1513	0x3c090, 0xffffffff, 0x00060005,
1514	0x3c094, 0xffffffff, 0x00090008,
1515	0x3c098, 0xffffffff, 0x00010000,
1516	0x3c09c, 0xffffffff, 0x00030002,
1517	0x3c0a0, 0xffffffff, 0x00040007,
1518	0x3c0a4, 0xffffffff, 0x00060005,
1519	0x3c0a8, 0xffffffff, 0x00090008,
1520	0x3c0ac, 0xffffffff, 0x00010000,
1521	0x3c0b0, 0xffffffff, 0x00030002,
1522	0x3c0b4, 0xffffffff, 0x00040007,
1523	0x3c0b8, 0xffffffff, 0x00060005,
1524	0x3c0bc, 0xffffffff, 0x00090008,
1525	0x3c0c0, 0xffffffff, 0x00010000,
1526	0x3c0c4, 0xffffffff, 0x00030002,
1527	0x3c0c8, 0xffffffff, 0x00040007,
1528	0x3c0cc, 0xffffffff, 0x00060005,
1529	0x3c0d0, 0xffffffff, 0x00090008,
1530	0x3c0d4, 0xffffffff, 0x00010000,
1531	0x3c0d8, 0xffffffff, 0x00030002,
1532	0x3c0dc, 0xffffffff, 0x00040007,
1533	0x3c0e0, 0xffffffff, 0x00060005,
1534	0x3c0e4, 0xffffffff, 0x00090008,
1535	0x3c0e8, 0xffffffff, 0x00010000,
1536	0x3c0ec, 0xffffffff, 0x00030002,
1537	0x3c0f0, 0xffffffff, 0x00040007,
1538	0x3c0f4, 0xffffffff, 0x00060005,
1539	0x3c0f8, 0xffffffff, 0x00090008,
1540	0xc318, 0xffffffff, 0x00020200,
1541	0x3350, 0xffffffff, 0x00000200,
1542	0x15c0, 0xffffffff, 0x00000400,
1543	0x55e8, 0xffffffff, 0x00000000,
1544	0x2f50, 0xffffffff, 0x00000902,
1545	0x3c000, 0xffffffff, 0x96940200,
1546	0x8708, 0xffffffff, 0x00900100,
1547	0xc424, 0xffffffff, 0x0020003f,
1548	0x38, 0xffffffff, 0x0140001c,
1549	0x3c, 0x000f0000, 0x000f0000,
1550	0x220, 0xffffffff, 0xc060000c,
1551	0x224, 0xc0000fff, 0x00000100,
1552	0xf90, 0xffffffff, 0x00000100,
1553	0xf98, 0x00000101, 0x00000000,
1554	0x20a8, 0xffffffff, 0x00000104,
1555	0x55e4, 0xff000fff, 0x00000100,
1556	0x30cc, 0xc0000fff, 0x00000104,
1557	0xc1e4, 0x00000001, 0x00000001,
1558	0xd00c, 0xff000ff0, 0x00000100,
1559	0xd80c, 0xff000ff0, 0x00000100
1560};
1561
1562static const u32 godavari_golden_registers[] =
1563{
1564	0x55e4, 0xff607fff, 0xfc000100,
1565	0x6ed8, 0x00010101, 0x00010000,
1566	0x9830, 0xffffffff, 0x00000000,
1567	0x98302, 0xf00fffff, 0x00000400,
1568	0x6130, 0xffffffff, 0x00010000,
1569	0x5bb0, 0x000000f0, 0x00000070,
1570	0x5bc0, 0xf0311fff, 0x80300000,
1571	0x98f8, 0x73773777, 0x12010001,
1572	0x98fc, 0xffffffff, 0x00000010,
1573	0x8030, 0x00001f0f, 0x0000100a,
1574	0x2f48, 0x73773777, 0x12010001,
1575	0x2408, 0x000fffff, 0x000c007f,
1576	0x8a14, 0xf000003f, 0x00000007,
1577	0x8b24, 0xffffffff, 0x00ff0fff,
1578	0x30a04, 0x0000ff0f, 0x00000000,
1579	0x28a4c, 0x07ffffff, 0x06000000,
1580	0x4d8, 0x00000fff, 0x00000100,
1581	0xd014, 0x00010000, 0x00810001,
1582	0xd814, 0x00010000, 0x00810001,
1583	0x3e78, 0x00000001, 0x00000002,
1584	0xc768, 0x00000008, 0x00000008,
1585	0xc770, 0x00000f00, 0x00000800,
1586	0xc774, 0x00000f00, 0x00000800,
1587	0xc798, 0x00ffffff, 0x00ff7fbf,
1588	0xc79c, 0x00ffffff, 0x00ff7faf,
1589	0x8c00, 0x000000ff, 0x00000001,
1590	0x214f8, 0x01ff01ff, 0x00000002,
1591	0x21498, 0x007ff800, 0x00200000,
1592	0x2015c, 0xffffffff, 0x00000f40,
1593	0x88c4, 0x001f3ae3, 0x00000082,
1594	0x88d4, 0x0000001f, 0x00000010,
1595	0x30934, 0xffffffff, 0x00000000
1596};
1597
1598
1599static void cik_init_golden_registers(struct radeon_device *rdev)
1600{
1601	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1602	mutex_lock(&rdev->grbm_idx_mutex);
1603	switch (rdev->family) {
1604	case CHIP_BONAIRE:
1605		radeon_program_register_sequence(rdev,
1606						 bonaire_mgcg_cgcg_init,
1607						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1608		radeon_program_register_sequence(rdev,
1609						 bonaire_golden_registers,
1610						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1611		radeon_program_register_sequence(rdev,
1612						 bonaire_golden_common_registers,
1613						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1614		radeon_program_register_sequence(rdev,
1615						 bonaire_golden_spm_registers,
1616						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1617		break;
1618	case CHIP_KABINI:
1619		radeon_program_register_sequence(rdev,
1620						 kalindi_mgcg_cgcg_init,
1621						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1622		radeon_program_register_sequence(rdev,
1623						 kalindi_golden_registers,
1624						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1625		radeon_program_register_sequence(rdev,
1626						 kalindi_golden_common_registers,
1627						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1628		radeon_program_register_sequence(rdev,
1629						 kalindi_golden_spm_registers,
1630						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1631		break;
1632	case CHIP_MULLINS:
1633		radeon_program_register_sequence(rdev,
1634						 kalindi_mgcg_cgcg_init,
1635						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1636		radeon_program_register_sequence(rdev,
1637						 godavari_golden_registers,
1638						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1639		radeon_program_register_sequence(rdev,
1640						 kalindi_golden_common_registers,
1641						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1642		radeon_program_register_sequence(rdev,
1643						 kalindi_golden_spm_registers,
1644						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1645		break;
1646	case CHIP_KAVERI:
1647		radeon_program_register_sequence(rdev,
1648						 spectre_mgcg_cgcg_init,
1649						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1650		radeon_program_register_sequence(rdev,
1651						 spectre_golden_registers,
1652						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1653		radeon_program_register_sequence(rdev,
1654						 spectre_golden_common_registers,
1655						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1656		radeon_program_register_sequence(rdev,
1657						 spectre_golden_spm_registers,
1658						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1659		break;
1660	case CHIP_HAWAII:
1661		radeon_program_register_sequence(rdev,
1662						 hawaii_mgcg_cgcg_init,
1663						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1664		radeon_program_register_sequence(rdev,
1665						 hawaii_golden_registers,
1666						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1667		radeon_program_register_sequence(rdev,
1668						 hawaii_golden_common_registers,
1669						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1670		radeon_program_register_sequence(rdev,
1671						 hawaii_golden_spm_registers,
1672						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1673		break;
1674	default:
1675		break;
1676	}
1677	mutex_unlock(&rdev->grbm_idx_mutex);
1678}
1679
1680/**
1681 * cik_get_xclk - get the xclk
1682 *
1683 * @rdev: radeon_device pointer
1684 *
1685 * Returns the reference clock used by the gfx engine
1686 * (CIK).
1687 */
1688u32 cik_get_xclk(struct radeon_device *rdev)
1689{
1690        u32 reference_clock = rdev->clock.spll.reference_freq;
1691
1692	if (rdev->flags & RADEON_IS_IGP) {
1693		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1694			return reference_clock / 2;
1695	} else {
1696		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1697			return reference_clock / 4;
1698	}
1699	return reference_clock;
1700}
1701
1702/**
1703 * cik_mm_rdoorbell - read a doorbell dword
1704 *
1705 * @rdev: radeon_device pointer
1706 * @index: doorbell index
1707 *
1708 * Returns the value in the doorbell aperture at the
1709 * requested doorbell index (CIK).
1710 */
1711u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1712{
1713	if (index < rdev->doorbell.num_doorbells) {
1714		return readl(rdev->doorbell.ptr + index);
1715	} else {
1716		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1717		return 0;
1718	}
1719}
1720
1721/**
1722 * cik_mm_wdoorbell - write a doorbell dword
1723 *
1724 * @rdev: radeon_device pointer
1725 * @index: doorbell index
1726 * @v: value to write
1727 *
1728 * Writes @v to the doorbell aperture at the
1729 * requested doorbell index (CIK).
1730 */
1731void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1732{
1733	if (index < rdev->doorbell.num_doorbells) {
1734		writel(v, rdev->doorbell.ptr + index);
1735	} else {
1736		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1737	}
1738}
1739
1740#define BONAIRE_IO_MC_REGS_SIZE 36
1741
1742static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1743{
1744	{0x00000070, 0x04400000},
1745	{0x00000071, 0x80c01803},
1746	{0x00000072, 0x00004004},
1747	{0x00000073, 0x00000100},
1748	{0x00000074, 0x00ff0000},
1749	{0x00000075, 0x34000000},
1750	{0x00000076, 0x08000014},
1751	{0x00000077, 0x00cc08ec},
1752	{0x00000078, 0x00000400},
1753	{0x00000079, 0x00000000},
1754	{0x0000007a, 0x04090000},
1755	{0x0000007c, 0x00000000},
1756	{0x0000007e, 0x4408a8e8},
1757	{0x0000007f, 0x00000304},
1758	{0x00000080, 0x00000000},
1759	{0x00000082, 0x00000001},
1760	{0x00000083, 0x00000002},
1761	{0x00000084, 0xf3e4f400},
1762	{0x00000085, 0x052024e3},
1763	{0x00000087, 0x00000000},
1764	{0x00000088, 0x01000000},
1765	{0x0000008a, 0x1c0a0000},
1766	{0x0000008b, 0xff010000},
1767	{0x0000008d, 0xffffefff},
1768	{0x0000008e, 0xfff3efff},
1769	{0x0000008f, 0xfff3efbf},
1770	{0x00000092, 0xf7ffffff},
1771	{0x00000093, 0xffffff7f},
1772	{0x00000095, 0x00101101},
1773	{0x00000096, 0x00000fff},
1774	{0x00000097, 0x00116fff},
1775	{0x00000098, 0x60010000},
1776	{0x00000099, 0x10010000},
1777	{0x0000009a, 0x00006000},
1778	{0x0000009b, 0x00001000},
1779	{0x0000009f, 0x00b48000}
1780};
1781
1782#define HAWAII_IO_MC_REGS_SIZE 22
1783
1784static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1785{
1786	{0x0000007d, 0x40000000},
1787	{0x0000007e, 0x40180304},
1788	{0x0000007f, 0x0000ff00},
1789	{0x00000081, 0x00000000},
1790	{0x00000083, 0x00000800},
1791	{0x00000086, 0x00000000},
1792	{0x00000087, 0x00000100},
1793	{0x00000088, 0x00020100},
1794	{0x00000089, 0x00000000},
1795	{0x0000008b, 0x00040000},
1796	{0x0000008c, 0x00000100},
1797	{0x0000008e, 0xff010000},
1798	{0x00000090, 0xffffefff},
1799	{0x00000091, 0xfff3efff},
1800	{0x00000092, 0xfff3efbf},
1801	{0x00000093, 0xf7ffffff},
1802	{0x00000094, 0xffffff7f},
1803	{0x00000095, 0x00000fff},
1804	{0x00000096, 0x00116fff},
1805	{0x00000097, 0x60010000},
1806	{0x00000098, 0x10010000},
1807	{0x0000009f, 0x00c79000}
1808};
1809
1810
1811/**
1812 * cik_srbm_select - select specific register instances
1813 *
1814 * @rdev: radeon_device pointer
1815 * @me: selected ME (micro engine)
1816 * @pipe: pipe
1817 * @queue: queue
1818 * @vmid: VMID
1819 *
1820 * Switches the currently active registers instances.  Some
1821 * registers are instanced per VMID, others are instanced per
1822 * me/pipe/queue combination.
1823 */
1824static void cik_srbm_select(struct radeon_device *rdev,
1825			    u32 me, u32 pipe, u32 queue, u32 vmid)
1826{
1827	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1828			     MEID(me & 0x3) |
1829			     VMID(vmid & 0xf) |
1830			     QUEUEID(queue & 0x7));
1831	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1832}
1833
1834/* ucode loading */
1835/**
1836 * ci_mc_load_microcode - load MC ucode into the hw
1837 *
1838 * @rdev: radeon_device pointer
1839 *
1840 * Load the GDDR MC ucode into the hw (CIK).
1841 * Returns 0 on success, error on failure.
1842 */
1843int ci_mc_load_microcode(struct radeon_device *rdev)
1844{
1845	const __be32 *fw_data = NULL;
1846	const __le32 *new_fw_data = NULL;
1847	u32 running, blackout = 0, tmp;
1848	u32 *io_mc_regs = NULL;
1849	const __le32 *new_io_mc_regs = NULL;
1850	int i, regs_size, ucode_size;
1851
1852	if (!rdev->mc_fw)
1853		return -EINVAL;
1854
1855	if (rdev->new_fw) {
1856		const struct mc_firmware_header_v1_0 *hdr =
1857			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1858
1859		radeon_ucode_print_mc_hdr(&hdr->header);
1860
1861		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1862		new_io_mc_regs = (const __le32 *)
1863			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1864		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1865		new_fw_data = (const __le32 *)
1866			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1867	} else {
1868		ucode_size = rdev->mc_fw->size / 4;
1869
1870		switch (rdev->family) {
1871		case CHIP_BONAIRE:
1872			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1873			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1874			break;
1875		case CHIP_HAWAII:
1876			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1877			regs_size = HAWAII_IO_MC_REGS_SIZE;
1878			break;
1879		default:
1880			return -EINVAL;
1881		}
1882		fw_data = (const __be32 *)rdev->mc_fw->data;
1883	}
1884
1885	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1886
1887	if (running == 0) {
1888		if (running) {
1889			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1890			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1891		}
1892
1893		/* reset the engine and set to writable */
1894		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1895		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1896
1897		/* load mc io regs */
1898		for (i = 0; i < regs_size; i++) {
1899			if (rdev->new_fw) {
1900				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1901				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1902			} else {
1903				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1904				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1905			}
1906		}
1907
1908		tmp = RREG32(MC_SEQ_MISC0);
1909		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1910			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1911			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1912			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1913			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1914		}
1915
1916		/* load the MC ucode */
1917		for (i = 0; i < ucode_size; i++) {
1918			if (rdev->new_fw)
1919				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1920			else
1921				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1922		}
1923
1924		/* put the engine back into the active state */
1925		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1926		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1927		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1928
1929		/* wait for training to complete */
1930		for (i = 0; i < rdev->usec_timeout; i++) {
1931			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1932				break;
1933			udelay(1);
1934		}
1935		for (i = 0; i < rdev->usec_timeout; i++) {
1936			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1937				break;
1938			udelay(1);
1939		}
1940
1941		if (running)
1942			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1943	}
1944
1945	return 0;
1946}
1947
1948/**
1949 * cik_init_microcode - load ucode images from disk
1950 *
1951 * @rdev: radeon_device pointer
1952 *
1953 * Use the firmware interface to load the ucode images into
1954 * the driver (not loaded into hw).
1955 * Returns 0 on success, error on failure.
1956 */
1957static int cik_init_microcode(struct radeon_device *rdev)
1958{
1959	const char *chip_name;
1960	const char *new_chip_name;
1961	size_t pfp_req_size, me_req_size, ce_req_size,
1962		mec_req_size, rlc_req_size, mc_req_size = 0,
1963		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1964	char fw_name[30];
1965	int new_fw = 0;
1966	int err;
1967	int num_fw;
1968
1969	DRM_DEBUG("\n");
1970
1971	switch (rdev->family) {
1972	case CHIP_BONAIRE:
1973		chip_name = "BONAIRE";
1974		new_chip_name = "bonaire";
1975		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1976		me_req_size = CIK_ME_UCODE_SIZE * 4;
1977		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1978		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1979		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1980		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1981		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1982		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1983		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1984		num_fw = 8;
1985		break;
1986	case CHIP_HAWAII:
1987		chip_name = "HAWAII";
1988		new_chip_name = "hawaii";
1989		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1990		me_req_size = CIK_ME_UCODE_SIZE * 4;
1991		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1992		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1993		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1994		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1995		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1996		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1997		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1998		num_fw = 8;
1999		break;
2000	case CHIP_KAVERI:
2001		chip_name = "KAVERI";
2002		new_chip_name = "kaveri";
2003		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2004		me_req_size = CIK_ME_UCODE_SIZE * 4;
2005		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2006		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2007		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2008		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2009		num_fw = 7;
2010		break;
2011	case CHIP_KABINI:
2012		chip_name = "KABINI";
2013		new_chip_name = "kabini";
2014		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2015		me_req_size = CIK_ME_UCODE_SIZE * 4;
2016		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2017		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2018		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2019		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2020		num_fw = 6;
2021		break;
2022	case CHIP_MULLINS:
2023		chip_name = "MULLINS";
2024		new_chip_name = "mullins";
2025		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2026		me_req_size = CIK_ME_UCODE_SIZE * 4;
2027		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2028		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2029		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2030		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2031		num_fw = 6;
2032		break;
2033	default: BUG();
2034	}
2035
2036	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2037
2038	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2039	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2040	if (err) {
2041		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2042		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2043		if (err)
2044			goto out;
2045		if (rdev->pfp_fw->size != pfp_req_size) {
2046			printk(KERN_ERR
2047			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2048			       rdev->pfp_fw->size, fw_name);
2049			err = -EINVAL;
2050			goto out;
2051		}
2052	} else {
2053		err = radeon_ucode_validate(rdev->pfp_fw);
2054		if (err) {
2055			printk(KERN_ERR
2056			       "cik_fw: validation failed for firmware \"%s\"\n",
2057			       fw_name);
2058			goto out;
2059		} else {
2060			new_fw++;
2061		}
2062	}
2063
2064	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2065	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2066	if (err) {
2067		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2068		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2069		if (err)
2070			goto out;
2071		if (rdev->me_fw->size != me_req_size) {
2072			printk(KERN_ERR
2073			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2074			       rdev->me_fw->size, fw_name);
2075			err = -EINVAL;
2076		}
2077	} else {
2078		err = radeon_ucode_validate(rdev->me_fw);
2079		if (err) {
2080			printk(KERN_ERR
2081			       "cik_fw: validation failed for firmware \"%s\"\n",
2082			       fw_name);
2083			goto out;
2084		} else {
2085			new_fw++;
2086		}
2087	}
2088
2089	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2090	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2091	if (err) {
2092		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2093		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2094		if (err)
2095			goto out;
2096		if (rdev->ce_fw->size != ce_req_size) {
2097			printk(KERN_ERR
2098			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2099			       rdev->ce_fw->size, fw_name);
2100			err = -EINVAL;
2101		}
2102	} else {
2103		err = radeon_ucode_validate(rdev->ce_fw);
2104		if (err) {
2105			printk(KERN_ERR
2106			       "cik_fw: validation failed for firmware \"%s\"\n",
2107			       fw_name);
2108			goto out;
2109		} else {
2110			new_fw++;
2111		}
2112	}
2113
2114	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2115	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2116	if (err) {
2117		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2118		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2119		if (err)
2120			goto out;
2121		if (rdev->mec_fw->size != mec_req_size) {
2122			printk(KERN_ERR
2123			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2124			       rdev->mec_fw->size, fw_name);
2125			err = -EINVAL;
2126		}
2127	} else {
2128		err = radeon_ucode_validate(rdev->mec_fw);
2129		if (err) {
2130			printk(KERN_ERR
2131			       "cik_fw: validation failed for firmware \"%s\"\n",
2132			       fw_name);
2133			goto out;
2134		} else {
2135			new_fw++;
2136		}
2137	}
2138
2139	if (rdev->family == CHIP_KAVERI) {
2140		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2141		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2142		if (err) {
2143			goto out;
2144		} else {
2145			err = radeon_ucode_validate(rdev->mec2_fw);
2146			if (err) {
2147				goto out;
2148			} else {
2149				new_fw++;
2150			}
2151		}
2152	}
2153
2154	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2155	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2156	if (err) {
2157		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2158		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2159		if (err)
2160			goto out;
2161		if (rdev->rlc_fw->size != rlc_req_size) {
2162			printk(KERN_ERR
2163			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2164			       rdev->rlc_fw->size, fw_name);
2165			err = -EINVAL;
2166		}
2167	} else {
2168		err = radeon_ucode_validate(rdev->rlc_fw);
2169		if (err) {
2170			printk(KERN_ERR
2171			       "cik_fw: validation failed for firmware \"%s\"\n",
2172			       fw_name);
2173			goto out;
2174		} else {
2175			new_fw++;
2176		}
2177	}
2178
2179	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2180	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2181	if (err) {
2182		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2183		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2184		if (err)
2185			goto out;
2186		if (rdev->sdma_fw->size != sdma_req_size) {
2187			printk(KERN_ERR
2188			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2189			       rdev->sdma_fw->size, fw_name);
2190			err = -EINVAL;
2191		}
2192	} else {
2193		err = radeon_ucode_validate(rdev->sdma_fw);
2194		if (err) {
2195			printk(KERN_ERR
2196			       "cik_fw: validation failed for firmware \"%s\"\n",
2197			       fw_name);
2198			goto out;
2199		} else {
2200			new_fw++;
2201		}
2202	}
2203
2204	/* No SMC, MC ucode on APUs */
2205	if (!(rdev->flags & RADEON_IS_IGP)) {
2206		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2207		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2208		if (err) {
2209			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2210			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2211			if (err) {
2212				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2213				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2214				if (err)
2215					goto out;
2216			}
2217			if ((rdev->mc_fw->size != mc_req_size) &&
2218			    (rdev->mc_fw->size != mc2_req_size)){
2219				printk(KERN_ERR
2220				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2221				       rdev->mc_fw->size, fw_name);
2222				err = -EINVAL;
2223			}
2224			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2225		} else {
2226			err = radeon_ucode_validate(rdev->mc_fw);
2227			if (err) {
2228				printk(KERN_ERR
2229				       "cik_fw: validation failed for firmware \"%s\"\n",
2230				       fw_name);
2231				goto out;
2232			} else {
2233				new_fw++;
2234			}
2235		}
2236
2237		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2238		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2239		if (err) {
2240			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2241			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2242			if (err) {
2243				printk(KERN_ERR
2244				       "smc: error loading firmware \"%s\"\n",
2245				       fw_name);
2246				release_firmware(rdev->smc_fw);
2247				rdev->smc_fw = NULL;
2248				err = 0;
2249			} else if (rdev->smc_fw->size != smc_req_size) {
2250				printk(KERN_ERR
2251				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2252				       rdev->smc_fw->size, fw_name);
2253				err = -EINVAL;
2254			}
2255		} else {
2256			err = radeon_ucode_validate(rdev->smc_fw);
2257			if (err) {
2258				printk(KERN_ERR
2259				       "cik_fw: validation failed for firmware \"%s\"\n",
2260				       fw_name);
2261				goto out;
2262			} else {
2263				new_fw++;
2264			}
2265		}
2266	}
2267
2268	if (new_fw == 0) {
2269		rdev->new_fw = false;
2270	} else if (new_fw < num_fw) {
2271		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2272		err = -EINVAL;
2273	} else {
2274		rdev->new_fw = true;
2275	}
2276
2277out:
2278	if (err) {
2279		if (err != -EINVAL)
2280			printk(KERN_ERR
2281			       "cik_cp: Failed to load firmware \"%s\"\n",
2282			       fw_name);
2283		release_firmware(rdev->pfp_fw);
2284		rdev->pfp_fw = NULL;
2285		release_firmware(rdev->me_fw);
2286		rdev->me_fw = NULL;
2287		release_firmware(rdev->ce_fw);
2288		rdev->ce_fw = NULL;
2289		release_firmware(rdev->mec_fw);
2290		rdev->mec_fw = NULL;
2291		release_firmware(rdev->mec2_fw);
2292		rdev->mec2_fw = NULL;
2293		release_firmware(rdev->rlc_fw);
2294		rdev->rlc_fw = NULL;
2295		release_firmware(rdev->sdma_fw);
2296		rdev->sdma_fw = NULL;
2297		release_firmware(rdev->mc_fw);
2298		rdev->mc_fw = NULL;
2299		release_firmware(rdev->smc_fw);
2300		rdev->smc_fw = NULL;
2301	}
2302	return err;
2303}
2304
2305/*
2306 * Core functions
2307 */
2308/**
2309 * cik_tiling_mode_table_init - init the hw tiling table
2310 *
2311 * @rdev: radeon_device pointer
2312 *
2313 * Starting with SI, the tiling setup is done globally in a
2314 * set of 32 tiling modes.  Rather than selecting each set of
2315 * parameters per surface as on older asics, we just select
2316 * which index in the tiling table we want to use, and the
2317 * surface uses those parameters (CIK).
2318 */
2319static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2320{
2321	const u32 num_tile_mode_states = 32;
2322	const u32 num_secondary_tile_mode_states = 16;
2323	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2324	u32 num_pipe_configs;
2325	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2326		rdev->config.cik.max_shader_engines;
2327
2328	switch (rdev->config.cik.mem_row_size_in_kb) {
2329	case 1:
2330		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2331		break;
2332	case 2:
2333	default:
2334		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2335		break;
2336	case 4:
2337		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2338		break;
2339	}
2340
2341	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2342	if (num_pipe_configs > 8)
2343		num_pipe_configs = 16;
2344
2345	if (num_pipe_configs == 16) {
2346		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2347			switch (reg_offset) {
2348			case 0:
2349				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2350						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2351						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2352						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2353				break;
2354			case 1:
2355				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2356						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2357						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2359				break;
2360			case 2:
2361				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2363						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2364						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2365				break;
2366			case 3:
2367				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2368						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2369						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2371				break;
2372			case 4:
2373				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2375						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376						 TILE_SPLIT(split_equal_to_row_size));
2377				break;
2378			case 5:
2379				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2380						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2382				break;
2383			case 6:
2384				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2385						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2386						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2388				break;
2389			case 7:
2390				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2391						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2392						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2393						 TILE_SPLIT(split_equal_to_row_size));
2394				break;
2395			case 8:
2396				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2397						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2398				break;
2399			case 9:
2400				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2401						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2403				break;
2404			case 10:
2405				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2406						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2407						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2408						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2409				break;
2410			case 11:
2411				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2412						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2413						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2414						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2415				break;
2416			case 12:
2417				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2418						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2419						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421				break;
2422			case 13:
2423				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2424						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2425						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2426				break;
2427			case 14:
2428				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2429						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2430						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2432				break;
2433			case 16:
2434				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2435						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2436						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2437						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438				break;
2439			case 17:
2440				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2441						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2442						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2443						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444				break;
2445			case 27:
2446				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2447						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2448						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2449				break;
2450			case 28:
2451				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2452						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2453						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2454						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2455				break;
2456			case 29:
2457				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2458						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2459						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2460						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2461				break;
2462			case 30:
2463				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2464						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2465						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2466						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2467				break;
2468			default:
2469				gb_tile_moden = 0;
2470				break;
2471			}
2472			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2473			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2474		}
2475		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2476			switch (reg_offset) {
2477			case 0:
2478				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2480						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2481						 NUM_BANKS(ADDR_SURF_16_BANK));
2482				break;
2483			case 1:
2484				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2486						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2487						 NUM_BANKS(ADDR_SURF_16_BANK));
2488				break;
2489			case 2:
2490				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2492						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2493						 NUM_BANKS(ADDR_SURF_16_BANK));
2494				break;
2495			case 3:
2496				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2499						 NUM_BANKS(ADDR_SURF_16_BANK));
2500				break;
2501			case 4:
2502				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2503						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2504						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2505						 NUM_BANKS(ADDR_SURF_8_BANK));
2506				break;
2507			case 5:
2508				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2510						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2511						 NUM_BANKS(ADDR_SURF_4_BANK));
2512				break;
2513			case 6:
2514				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2515						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2516						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2517						 NUM_BANKS(ADDR_SURF_2_BANK));
2518				break;
2519			case 8:
2520				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2523						 NUM_BANKS(ADDR_SURF_16_BANK));
2524				break;
2525			case 9:
2526				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2528						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2529						 NUM_BANKS(ADDR_SURF_16_BANK));
2530				break;
2531			case 10:
2532				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2534						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2535						 NUM_BANKS(ADDR_SURF_16_BANK));
2536				break;
2537			case 11:
2538				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2540						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2541						 NUM_BANKS(ADDR_SURF_8_BANK));
2542				break;
2543			case 12:
2544				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2545						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2546						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2547						 NUM_BANKS(ADDR_SURF_4_BANK));
2548				break;
2549			case 13:
2550				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2552						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2553						 NUM_BANKS(ADDR_SURF_2_BANK));
2554				break;
2555			case 14:
2556				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2558						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2559						 NUM_BANKS(ADDR_SURF_2_BANK));
2560				break;
2561			default:
2562				gb_tile_moden = 0;
2563				break;
2564			}
2565			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2566			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2567		}
2568	} else if (num_pipe_configs == 8) {
2569		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2570			switch (reg_offset) {
2571			case 0:
2572				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2573						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2574						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2576				break;
2577			case 1:
2578				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2579						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2580						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2582				break;
2583			case 2:
2584				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2585						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2586						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2588				break;
2589			case 3:
2590				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2591						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2592						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2594				break;
2595			case 4:
2596				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2597						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2598						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2599						 TILE_SPLIT(split_equal_to_row_size));
2600				break;
2601			case 5:
2602				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2603						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2604						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2605				break;
2606			case 6:
2607				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2608						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2609						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2610						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2611				break;
2612			case 7:
2613				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2614						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2615						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2616						 TILE_SPLIT(split_equal_to_row_size));
2617				break;
2618			case 8:
2619				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2620						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2621				break;
2622			case 9:
2623				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2624						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2625						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2626				break;
2627			case 10:
2628				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2629						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2630						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2631						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2632				break;
2633			case 11:
2634				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2635						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2636						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2637						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2638				break;
2639			case 12:
2640				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2641						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2642						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2643						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2644				break;
2645			case 13:
2646				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2647						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2648						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2649				break;
2650			case 14:
2651				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2652						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2653						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2654						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2655				break;
2656			case 16:
2657				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2658						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2659						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2660						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2661				break;
2662			case 17:
2663				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2664						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2665						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2666						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2667				break;
2668			case 27:
2669				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2670						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2671						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2672				break;
2673			case 28:
2674				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2675						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2676						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2677						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2678				break;
2679			case 29:
2680				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2681						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2682						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2683						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2684				break;
2685			case 30:
2686				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2687						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2688						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2689						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2690				break;
2691			default:
2692				gb_tile_moden = 0;
2693				break;
2694			}
2695			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2696			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2697		}
2698		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2699			switch (reg_offset) {
2700			case 0:
2701				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2702						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2703						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2704						 NUM_BANKS(ADDR_SURF_16_BANK));
2705				break;
2706			case 1:
2707				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2708						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2709						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2710						 NUM_BANKS(ADDR_SURF_16_BANK));
2711				break;
2712			case 2:
2713				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2714						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2715						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2716						 NUM_BANKS(ADDR_SURF_16_BANK));
2717				break;
2718			case 3:
2719				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2720						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2721						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2722						 NUM_BANKS(ADDR_SURF_16_BANK));
2723				break;
2724			case 4:
2725				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2726						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2727						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2728						 NUM_BANKS(ADDR_SURF_8_BANK));
2729				break;
2730			case 5:
2731				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2732						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2733						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2734						 NUM_BANKS(ADDR_SURF_4_BANK));
2735				break;
2736			case 6:
2737				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2738						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2739						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2740						 NUM_BANKS(ADDR_SURF_2_BANK));
2741				break;
2742			case 8:
2743				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2745						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2746						 NUM_BANKS(ADDR_SURF_16_BANK));
2747				break;
2748			case 9:
2749				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2750						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2751						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2752						 NUM_BANKS(ADDR_SURF_16_BANK));
2753				break;
2754			case 10:
2755				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2757						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2758						 NUM_BANKS(ADDR_SURF_16_BANK));
2759				break;
2760			case 11:
2761				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2762						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2763						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2764						 NUM_BANKS(ADDR_SURF_16_BANK));
2765				break;
2766			case 12:
2767				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2769						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2770						 NUM_BANKS(ADDR_SURF_8_BANK));
2771				break;
2772			case 13:
2773				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2774						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2775						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2776						 NUM_BANKS(ADDR_SURF_4_BANK));
2777				break;
2778			case 14:
2779				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2780						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2781						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2782						 NUM_BANKS(ADDR_SURF_2_BANK));
2783				break;
2784			default:
2785				gb_tile_moden = 0;
2786				break;
2787			}
2788			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2789			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2790		}
2791	} else if (num_pipe_configs == 4) {
2792		if (num_rbs == 4) {
2793			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2794				switch (reg_offset) {
2795				case 0:
2796					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2797							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2798							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2799							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2800					break;
2801				case 1:
2802					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2803							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2804							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2806					break;
2807				case 2:
2808					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2810							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2811							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2812					break;
2813				case 3:
2814					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2815							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2816							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2818					break;
2819				case 4:
2820					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2821							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2822							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2823							 TILE_SPLIT(split_equal_to_row_size));
2824					break;
2825				case 5:
2826					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2827							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2828							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2829					break;
2830				case 6:
2831					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2832							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2833							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2834							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2835					break;
2836				case 7:
2837					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2838							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2839							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2840							 TILE_SPLIT(split_equal_to_row_size));
2841					break;
2842				case 8:
2843					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2844							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2845					break;
2846				case 9:
2847					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2848							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2849							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2850					break;
2851				case 10:
2852					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2853							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2854							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2855							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2856					break;
2857				case 11:
2858					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2859							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2860							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2861							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2862					break;
2863				case 12:
2864					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2865							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2866							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2867							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2868					break;
2869				case 13:
2870					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2871							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2872							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2873					break;
2874				case 14:
2875					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2876							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2877							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2878							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2879					break;
2880				case 16:
2881					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2882							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2883							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2884							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2885					break;
2886				case 17:
2887					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2888							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2889							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2890							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2891					break;
2892				case 27:
2893					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2894							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2895							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2896					break;
2897				case 28:
2898					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2899							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2900							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2901							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2902					break;
2903				case 29:
2904					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2905							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2906							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2907							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2908					break;
2909				case 30:
2910					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2911							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2912							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2913							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2914					break;
2915				default:
2916					gb_tile_moden = 0;
2917					break;
2918				}
2919				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2920				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2921			}
2922		} else if (num_rbs < 4) {
2923			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2924				switch (reg_offset) {
2925				case 0:
2926					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2927							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2928							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2929							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2930					break;
2931				case 1:
2932					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2933							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2934							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2935							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2936					break;
2937				case 2:
2938					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2939							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2940							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2941							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2942					break;
2943				case 3:
2944					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2945							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2946							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2947							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2948					break;
2949				case 4:
2950					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2951							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2952							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2953							 TILE_SPLIT(split_equal_to_row_size));
2954					break;
2955				case 5:
2956					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2957							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2958							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2959					break;
2960				case 6:
2961					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2962							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2963							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2964							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2965					break;
2966				case 7:
2967					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2968							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2969							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2970							 TILE_SPLIT(split_equal_to_row_size));
2971					break;
2972				case 8:
2973					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2974						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2975					break;
2976				case 9:
2977					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2978							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2979							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2980					break;
2981				case 10:
2982					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2983							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2984							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2985							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2986					break;
2987				case 11:
2988					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2989							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2990							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2991							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2992					break;
2993				case 12:
2994					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2995							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2996							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2997							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2998					break;
2999				case 13:
3000					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3001							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3002							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3003					break;
3004				case 14:
3005					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3006							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3007							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3008							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3009					break;
3010				case 16:
3011					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3012							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3013							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3014							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3015					break;
3016				case 17:
3017					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3018							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3019							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3020							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3021					break;
3022				case 27:
3023					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3024							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3025							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
3026					break;
3027				case 28:
3028					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3029							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3030							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3031							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3032					break;
3033				case 29:
3034					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3035							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3036							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3037							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3038					break;
3039				case 30:
3040					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3041							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3042							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3043							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3044					break;
3045				default:
3046					gb_tile_moden = 0;
3047					break;
3048				}
3049				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3050				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3051			}
3052		}
3053		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3054			switch (reg_offset) {
3055			case 0:
3056				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3057						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3058						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3059						 NUM_BANKS(ADDR_SURF_16_BANK));
3060				break;
3061			case 1:
3062				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3063						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3064						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3065						 NUM_BANKS(ADDR_SURF_16_BANK));
3066				break;
3067			case 2:
3068				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3069						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3070						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3071						 NUM_BANKS(ADDR_SURF_16_BANK));
3072				break;
3073			case 3:
3074				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3075						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3076						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3077						 NUM_BANKS(ADDR_SURF_16_BANK));
3078				break;
3079			case 4:
3080				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3081						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3082						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3083						 NUM_BANKS(ADDR_SURF_16_BANK));
3084				break;
3085			case 5:
3086				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3087						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3088						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3089						 NUM_BANKS(ADDR_SURF_8_BANK));
3090				break;
3091			case 6:
3092				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3093						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3094						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3095						 NUM_BANKS(ADDR_SURF_4_BANK));
3096				break;
3097			case 8:
3098				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3099						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3100						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3101						 NUM_BANKS(ADDR_SURF_16_BANK));
3102				break;
3103			case 9:
3104				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3105						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3106						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3107						 NUM_BANKS(ADDR_SURF_16_BANK));
3108				break;
3109			case 10:
3110				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3111						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3112						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3113						 NUM_BANKS(ADDR_SURF_16_BANK));
3114				break;
3115			case 11:
3116				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3117						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3118						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3119						 NUM_BANKS(ADDR_SURF_16_BANK));
3120				break;
3121			case 12:
3122				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3123						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3124						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3125						 NUM_BANKS(ADDR_SURF_16_BANK));
3126				break;
3127			case 13:
3128				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3129						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3130						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3131						 NUM_BANKS(ADDR_SURF_8_BANK));
3132				break;
3133			case 14:
3134				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3135						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3136						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3137						 NUM_BANKS(ADDR_SURF_4_BANK));
3138				break;
3139			default:
3140				gb_tile_moden = 0;
3141				break;
3142			}
3143			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3144			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3145		}
3146	} else if (num_pipe_configs == 2) {
3147		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3148			switch (reg_offset) {
3149			case 0:
3150				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3151						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3152						 PIPE_CONFIG(ADDR_SURF_P2) |
3153						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3154				break;
3155			case 1:
3156				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3157						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3158						 PIPE_CONFIG(ADDR_SURF_P2) |
3159						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3160				break;
3161			case 2:
3162				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3163						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3164						 PIPE_CONFIG(ADDR_SURF_P2) |
3165						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3166				break;
3167			case 3:
3168				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3169						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3170						 PIPE_CONFIG(ADDR_SURF_P2) |
3171						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3172				break;
3173			case 4:
3174				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3175						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3176						 PIPE_CONFIG(ADDR_SURF_P2) |
3177						 TILE_SPLIT(split_equal_to_row_size));
3178				break;
3179			case 5:
3180				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3181						 PIPE_CONFIG(ADDR_SURF_P2) |
3182						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3183				break;
3184			case 6:
3185				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3186						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3187						 PIPE_CONFIG(ADDR_SURF_P2) |
3188						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3189				break;
3190			case 7:
3191				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3192						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3193						 PIPE_CONFIG(ADDR_SURF_P2) |
3194						 TILE_SPLIT(split_equal_to_row_size));
3195				break;
3196			case 8:
3197				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3198						PIPE_CONFIG(ADDR_SURF_P2);
3199				break;
3200			case 9:
3201				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3202						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3203						 PIPE_CONFIG(ADDR_SURF_P2));
3204				break;
3205			case 10:
3206				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3207						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3208						 PIPE_CONFIG(ADDR_SURF_P2) |
3209						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3210				break;
3211			case 11:
3212				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3213						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3214						 PIPE_CONFIG(ADDR_SURF_P2) |
3215						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3216				break;
3217			case 12:
3218				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3219						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3220						 PIPE_CONFIG(ADDR_SURF_P2) |
3221						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3222				break;
3223			case 13:
3224				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3225						 PIPE_CONFIG(ADDR_SURF_P2) |
3226						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3227				break;
3228			case 14:
3229				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3230						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3231						 PIPE_CONFIG(ADDR_SURF_P2) |
3232						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3233				break;
3234			case 16:
3235				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3236						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3237						 PIPE_CONFIG(ADDR_SURF_P2) |
3238						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3239				break;
3240			case 17:
3241				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3242						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3243						 PIPE_CONFIG(ADDR_SURF_P2) |
3244						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3245				break;
3246			case 27:
3247				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3248						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3249						 PIPE_CONFIG(ADDR_SURF_P2));
3250				break;
3251			case 28:
3252				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3253						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3254						 PIPE_CONFIG(ADDR_SURF_P2) |
3255						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3256				break;
3257			case 29:
3258				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3259						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3260						 PIPE_CONFIG(ADDR_SURF_P2) |
3261						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3262				break;
3263			case 30:
3264				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3265						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3266						 PIPE_CONFIG(ADDR_SURF_P2) |
3267						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3268				break;
3269			default:
3270				gb_tile_moden = 0;
3271				break;
3272			}
3273			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3274			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3275		}
3276		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3277			switch (reg_offset) {
3278			case 0:
3279				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3280						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3281						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3282						 NUM_BANKS(ADDR_SURF_16_BANK));
3283				break;
3284			case 1:
3285				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3286						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3287						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3288						 NUM_BANKS(ADDR_SURF_16_BANK));
3289				break;
3290			case 2:
3291				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3292						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3293						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3294						 NUM_BANKS(ADDR_SURF_16_BANK));
3295				break;
3296			case 3:
3297				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3298						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3299						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3300						 NUM_BANKS(ADDR_SURF_16_BANK));
3301				break;
3302			case 4:
3303				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3304						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3305						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3306						 NUM_BANKS(ADDR_SURF_16_BANK));
3307				break;
3308			case 5:
3309				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3310						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3311						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3312						 NUM_BANKS(ADDR_SURF_16_BANK));
3313				break;
3314			case 6:
3315				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3316						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3317						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3318						 NUM_BANKS(ADDR_SURF_8_BANK));
3319				break;
3320			case 8:
3321				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3322						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3323						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3324						 NUM_BANKS(ADDR_SURF_16_BANK));
3325				break;
3326			case 9:
3327				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3328						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3329						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3330						 NUM_BANKS(ADDR_SURF_16_BANK));
3331				break;
3332			case 10:
3333				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3334						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3335						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3336						 NUM_BANKS(ADDR_SURF_16_BANK));
3337				break;
3338			case 11:
3339				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3340						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3341						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3342						 NUM_BANKS(ADDR_SURF_16_BANK));
3343				break;
3344			case 12:
3345				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3346						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3347						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3348						 NUM_BANKS(ADDR_SURF_16_BANK));
3349				break;
3350			case 13:
3351				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3352						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3353						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3354						 NUM_BANKS(ADDR_SURF_16_BANK));
3355				break;
3356			case 14:
3357				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3358						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3359						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3360						 NUM_BANKS(ADDR_SURF_8_BANK));
3361				break;
3362			default:
3363				gb_tile_moden = 0;
3364				break;
3365			}
3366			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3367			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3368		}
3369	} else
3370		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3371}
3372
3373/**
3374 * cik_select_se_sh - select which SE, SH to address
3375 *
3376 * @rdev: radeon_device pointer
3377 * @se_num: shader engine to address
3378 * @sh_num: sh block to address
3379 *
3380 * Select which SE, SH combinations to address. Certain
3381 * registers are instanced per SE or SH.  0xffffffff means
3382 * broadcast to all SEs or SHs (CIK).
3383 */
3384static void cik_select_se_sh(struct radeon_device *rdev,
3385			     u32 se_num, u32 sh_num)
3386{
3387	u32 data = INSTANCE_BROADCAST_WRITES;
3388
3389	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3390		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3391	else if (se_num == 0xffffffff)
3392		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3393	else if (sh_num == 0xffffffff)
3394		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3395	else
3396		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3397	WREG32(GRBM_GFX_INDEX, data);
3398}
3399
3400/**
3401 * cik_create_bitmask - create a bitmask
3402 *
3403 * @bit_width: length of the mask
3404 *
3405 * create a variable length bit mask (CIK).
3406 * Returns the bitmask.
3407 */
3408static u32 cik_create_bitmask(u32 bit_width)
3409{
3410	u32 i, mask = 0;
3411
3412	for (i = 0; i < bit_width; i++) {
3413		mask <<= 1;
3414		mask |= 1;
3415	}
3416	return mask;
3417}
3418
3419/**
3420 * cik_get_rb_disabled - computes the mask of disabled RBs
3421 *
3422 * @rdev: radeon_device pointer
3423 * @max_rb_num: max RBs (render backends) for the asic
3424 * @se_num: number of SEs (shader engines) for the asic
3425 * @sh_per_se: number of SH blocks per SE for the asic
3426 *
3427 * Calculates the bitmask of disabled RBs (CIK).
3428 * Returns the disabled RB bitmask.
3429 */
3430static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3431			      u32 max_rb_num_per_se,
3432			      u32 sh_per_se)
3433{
3434	u32 data, mask;
3435
3436	data = RREG32(CC_RB_BACKEND_DISABLE);
3437	if (data & 1)
3438		data &= BACKEND_DISABLE_MASK;
3439	else
3440		data = 0;
3441	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3442
3443	data >>= BACKEND_DISABLE_SHIFT;
3444
3445	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3446
3447	return data & mask;
3448}
3449
3450/**
3451 * cik_setup_rb - setup the RBs on the asic
3452 *
3453 * @rdev: radeon_device pointer
3454 * @se_num: number of SEs (shader engines) for the asic
3455 * @sh_per_se: number of SH blocks per SE for the asic
3456 * @max_rb_num: max RBs (render backends) for the asic
3457 *
3458 * Configures per-SE/SH RB registers (CIK).
3459 */
3460static void cik_setup_rb(struct radeon_device *rdev,
3461			 u32 se_num, u32 sh_per_se,
3462			 u32 max_rb_num_per_se)
3463{
3464	int i, j;
3465	u32 data, mask;
3466	u32 disabled_rbs = 0;
3467	u32 enabled_rbs = 0;
3468
3469	mutex_lock(&rdev->grbm_idx_mutex);
3470	for (i = 0; i < se_num; i++) {
3471		for (j = 0; j < sh_per_se; j++) {
3472			cik_select_se_sh(rdev, i, j);
3473			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3474			if (rdev->family == CHIP_HAWAII)
3475				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3476			else
3477				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3478		}
3479	}
3480	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3481	mutex_unlock(&rdev->grbm_idx_mutex);
3482
3483	mask = 1;
3484	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3485		if (!(disabled_rbs & mask))
3486			enabled_rbs |= mask;
3487		mask <<= 1;
3488	}
3489
3490	rdev->config.cik.backend_enable_mask = enabled_rbs;
3491
3492	mutex_lock(&rdev->grbm_idx_mutex);
3493	for (i = 0; i < se_num; i++) {
3494		cik_select_se_sh(rdev, i, 0xffffffff);
3495		data = 0;
3496		for (j = 0; j < sh_per_se; j++) {
3497			switch (enabled_rbs & 3) {
3498			case 0:
3499				if (j == 0)
3500					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3501				else
3502					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3503				break;
3504			case 1:
3505				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3506				break;
3507			case 2:
3508				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3509				break;
3510			case 3:
3511			default:
3512				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3513				break;
3514			}
3515			enabled_rbs >>= 2;
3516		}
3517		WREG32(PA_SC_RASTER_CONFIG, data);
3518	}
3519	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3520	mutex_unlock(&rdev->grbm_idx_mutex);
3521}
3522
3523/**
3524 * cik_gpu_init - setup the 3D engine
3525 *
3526 * @rdev: radeon_device pointer
3527 *
3528 * Configures the 3D engine and tiling configuration
3529 * registers so that the 3D engine is usable.
3530 */
3531static void cik_gpu_init(struct radeon_device *rdev)
3532{
3533	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3534	u32 mc_shared_chmap, mc_arb_ramcfg;
3535	u32 hdp_host_path_cntl;
3536	u32 tmp;
3537	int i, j;
3538
3539	switch (rdev->family) {
3540	case CHIP_BONAIRE:
3541		rdev->config.cik.max_shader_engines = 2;
3542		rdev->config.cik.max_tile_pipes = 4;
3543		rdev->config.cik.max_cu_per_sh = 7;
3544		rdev->config.cik.max_sh_per_se = 1;
3545		rdev->config.cik.max_backends_per_se = 2;
3546		rdev->config.cik.max_texture_channel_caches = 4;
3547		rdev->config.cik.max_gprs = 256;
3548		rdev->config.cik.max_gs_threads = 32;
3549		rdev->config.cik.max_hw_contexts = 8;
3550
3551		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3552		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3553		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3554		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3555		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3556		break;
3557	case CHIP_HAWAII:
3558		rdev->config.cik.max_shader_engines = 4;
3559		rdev->config.cik.max_tile_pipes = 16;
3560		rdev->config.cik.max_cu_per_sh = 11;
3561		rdev->config.cik.max_sh_per_se = 1;
3562		rdev->config.cik.max_backends_per_se = 4;
3563		rdev->config.cik.max_texture_channel_caches = 16;
3564		rdev->config.cik.max_gprs = 256;
3565		rdev->config.cik.max_gs_threads = 32;
3566		rdev->config.cik.max_hw_contexts = 8;
3567
3568		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3569		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3570		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3571		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3572		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3573		break;
3574	case CHIP_KAVERI:
3575		rdev->config.cik.max_shader_engines = 1;
3576		rdev->config.cik.max_tile_pipes = 4;
3577		if ((rdev->pdev->device == 0x1304) ||
3578		    (rdev->pdev->device == 0x1305) ||
3579		    (rdev->pdev->device == 0x130C) ||
3580		    (rdev->pdev->device == 0x130F) ||
3581		    (rdev->pdev->device == 0x1310) ||
3582		    (rdev->pdev->device == 0x1311) ||
3583		    (rdev->pdev->device == 0x131C)) {
3584			rdev->config.cik.max_cu_per_sh = 8;
3585			rdev->config.cik.max_backends_per_se = 2;
3586		} else if ((rdev->pdev->device == 0x1309) ||
3587			   (rdev->pdev->device == 0x130A) ||
3588			   (rdev->pdev->device == 0x130D) ||
3589			   (rdev->pdev->device == 0x1313) ||
3590			   (rdev->pdev->device == 0x131D)) {
3591			rdev->config.cik.max_cu_per_sh = 6;
3592			rdev->config.cik.max_backends_per_se = 2;
3593		} else if ((rdev->pdev->device == 0x1306) ||
3594			   (rdev->pdev->device == 0x1307) ||
3595			   (rdev->pdev->device == 0x130B) ||
3596			   (rdev->pdev->device == 0x130E) ||
3597			   (rdev->pdev->device == 0x1315) ||
3598			   (rdev->pdev->device == 0x1318) ||
3599			   (rdev->pdev->device == 0x131B)) {
3600			rdev->config.cik.max_cu_per_sh = 4;
3601			rdev->config.cik.max_backends_per_se = 1;
3602		} else {
3603			rdev->config.cik.max_cu_per_sh = 3;
3604			rdev->config.cik.max_backends_per_se = 1;
3605		}
3606		rdev->config.cik.max_sh_per_se = 1;
3607		rdev->config.cik.max_texture_channel_caches = 4;
3608		rdev->config.cik.max_gprs = 256;
3609		rdev->config.cik.max_gs_threads = 16;
3610		rdev->config.cik.max_hw_contexts = 8;
3611
3612		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3613		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3614		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3615		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3616		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3617		break;
3618	case CHIP_KABINI:
3619	case CHIP_MULLINS:
3620	default:
3621		rdev->config.cik.max_shader_engines = 1;
3622		rdev->config.cik.max_tile_pipes = 2;
3623		rdev->config.cik.max_cu_per_sh = 2;
3624		rdev->config.cik.max_sh_per_se = 1;
3625		rdev->config.cik.max_backends_per_se = 1;
3626		rdev->config.cik.max_texture_channel_caches = 2;
3627		rdev->config.cik.max_gprs = 256;
3628		rdev->config.cik.max_gs_threads = 16;
3629		rdev->config.cik.max_hw_contexts = 8;
3630
3631		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3632		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3633		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3634		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3635		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3636		break;
3637	}
3638
3639	/* Initialize HDP */
3640	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3641		WREG32((0x2c14 + j), 0x00000000);
3642		WREG32((0x2c18 + j), 0x00000000);
3643		WREG32((0x2c1c + j), 0x00000000);
3644		WREG32((0x2c20 + j), 0x00000000);
3645		WREG32((0x2c24 + j), 0x00000000);
3646	}
3647
3648	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3649	WREG32(SRBM_INT_CNTL, 0x1);
3650	WREG32(SRBM_INT_ACK, 0x1);
3651
3652	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3653
3654	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3655	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3656
3657	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3658	rdev->config.cik.mem_max_burst_length_bytes = 256;
3659	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3660	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3661	if (rdev->config.cik.mem_row_size_in_kb > 4)
3662		rdev->config.cik.mem_row_size_in_kb = 4;
3663	/* XXX use MC settings? */
3664	rdev->config.cik.shader_engine_tile_size = 32;
3665	rdev->config.cik.num_gpus = 1;
3666	rdev->config.cik.multi_gpu_tile_size = 64;
3667
3668	/* fix up row size */
3669	gb_addr_config &= ~ROW_SIZE_MASK;
3670	switch (rdev->config.cik.mem_row_size_in_kb) {
3671	case 1:
3672	default:
3673		gb_addr_config |= ROW_SIZE(0);
3674		break;
3675	case 2:
3676		gb_addr_config |= ROW_SIZE(1);
3677		break;
3678	case 4:
3679		gb_addr_config |= ROW_SIZE(2);
3680		break;
3681	}
3682
3683	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3684	 * not have bank info, so create a custom tiling dword.
3685	 * bits 3:0   num_pipes
3686	 * bits 7:4   num_banks
3687	 * bits 11:8  group_size
3688	 * bits 15:12 row_size
3689	 */
3690	rdev->config.cik.tile_config = 0;
3691	switch (rdev->config.cik.num_tile_pipes) {
3692	case 1:
3693		rdev->config.cik.tile_config |= (0 << 0);
3694		break;
3695	case 2:
3696		rdev->config.cik.tile_config |= (1 << 0);
3697		break;
3698	case 4:
3699		rdev->config.cik.tile_config |= (2 << 0);
3700		break;
3701	case 8:
3702	default:
3703		/* XXX what about 12? */
3704		rdev->config.cik.tile_config |= (3 << 0);
3705		break;
3706	}
3707	rdev->config.cik.tile_config |=
3708		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3709	rdev->config.cik.tile_config |=
3710		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3711	rdev->config.cik.tile_config |=
3712		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3713
3714	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3715	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3716	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3717	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3718	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3719	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3720	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3721	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3722
3723	cik_tiling_mode_table_init(rdev);
3724
3725	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3726		     rdev->config.cik.max_sh_per_se,
3727		     rdev->config.cik.max_backends_per_se);
3728
3729	rdev->config.cik.active_cus = 0;
3730	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3731		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3732			rdev->config.cik.active_cus +=
3733				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3734		}
3735	}
3736
3737	/* set HW defaults for 3D engine */
3738	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3739
3740	mutex_lock(&rdev->grbm_idx_mutex);
3741	/*
3742	 * making sure that the following register writes will be broadcasted
3743	 * to all the shaders
3744	 */
3745	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3746	WREG32(SX_DEBUG_1, 0x20);
3747
3748	WREG32(TA_CNTL_AUX, 0x00010000);
3749
3750	tmp = RREG32(SPI_CONFIG_CNTL);
3751	tmp |= 0x03000000;
3752	WREG32(SPI_CONFIG_CNTL, tmp);
3753
3754	WREG32(SQ_CONFIG, 1);
3755
3756	WREG32(DB_DEBUG, 0);
3757
3758	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3759	tmp |= 0x00000400;
3760	WREG32(DB_DEBUG2, tmp);
3761
3762	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3763	tmp |= 0x00020200;
3764	WREG32(DB_DEBUG3, tmp);
3765
3766	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3767	tmp |= 0x00018208;
3768	WREG32(CB_HW_CONTROL, tmp);
3769
3770	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3771
3772	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3773				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3774				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3775				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3776
3777	WREG32(VGT_NUM_INSTANCES, 1);
3778
3779	WREG32(CP_PERFMON_CNTL, 0);
3780
3781	WREG32(SQ_CONFIG, 0);
3782
3783	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3784					  FORCE_EOV_MAX_REZ_CNT(255)));
3785
3786	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3787	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3788
3789	WREG32(VGT_GS_VERTEX_REUSE, 16);
3790	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3791
3792	tmp = RREG32(HDP_MISC_CNTL);
3793	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3794	WREG32(HDP_MISC_CNTL, tmp);
3795
3796	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3797	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3798
3799	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3800	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3801	mutex_unlock(&rdev->grbm_idx_mutex);
3802
3803	udelay(50);
3804}
3805
3806/*
3807 * GPU scratch registers helpers function.
3808 */
3809/**
3810 * cik_scratch_init - setup driver info for CP scratch regs
3811 *
3812 * @rdev: radeon_device pointer
3813 *
3814 * Set up the number and offset of the CP scratch registers.
3815 * NOTE: use of CP scratch registers is a legacy inferface and
3816 * is not used by default on newer asics (r6xx+).  On newer asics,
3817 * memory buffers are used for fences rather than scratch regs.
3818 */
3819static void cik_scratch_init(struct radeon_device *rdev)
3820{
3821	int i;
3822
3823	rdev->scratch.num_reg = 7;
3824	rdev->scratch.reg_base = SCRATCH_REG0;
3825	for (i = 0; i < rdev->scratch.num_reg; i++) {
3826		rdev->scratch.free[i] = true;
3827		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3828	}
3829}
3830
3831/**
3832 * cik_ring_test - basic gfx ring test
3833 *
3834 * @rdev: radeon_device pointer
3835 * @ring: radeon_ring structure holding ring information
3836 *
3837 * Allocate a scratch register and write to it using the gfx ring (CIK).
3838 * Provides a basic gfx ring test to verify that the ring is working.
3839 * Used by cik_cp_gfx_resume();
3840 * Returns 0 on success, error on failure.
3841 */
3842int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3843{
3844	uint32_t scratch;
3845	uint32_t tmp = 0;
3846	unsigned i;
3847	int r;
3848
3849	r = radeon_scratch_get(rdev, &scratch);
3850	if (r) {
3851		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3852		return r;
3853	}
3854	WREG32(scratch, 0xCAFEDEAD);
3855	r = radeon_ring_lock(rdev, ring, 3);
3856	if (r) {
3857		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3858		radeon_scratch_free(rdev, scratch);
3859		return r;
3860	}
3861	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3862	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3863	radeon_ring_write(ring, 0xDEADBEEF);
3864	radeon_ring_unlock_commit(rdev, ring, false);
3865
3866	for (i = 0; i < rdev->usec_timeout; i++) {
3867		tmp = RREG32(scratch);
3868		if (tmp == 0xDEADBEEF)
3869			break;
3870		DRM_UDELAY(1);
3871	}
3872	if (i < rdev->usec_timeout) {
3873		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3874	} else {
3875		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3876			  ring->idx, scratch, tmp);
3877		r = -EINVAL;
3878	}
3879	radeon_scratch_free(rdev, scratch);
3880	return r;
3881}
3882
3883/**
3884 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3885 *
3886 * @rdev: radeon_device pointer
3887 * @ridx: radeon ring index
3888 *
3889 * Emits an hdp flush on the cp.
3890 */
3891static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3892				       int ridx)
3893{
3894	struct radeon_ring *ring = &rdev->ring[ridx];
3895	u32 ref_and_mask;
3896
3897	switch (ring->idx) {
3898	case CAYMAN_RING_TYPE_CP1_INDEX:
3899	case CAYMAN_RING_TYPE_CP2_INDEX:
3900	default:
3901		switch (ring->me) {
3902		case 0:
3903			ref_and_mask = CP2 << ring->pipe;
3904			break;
3905		case 1:
3906			ref_and_mask = CP6 << ring->pipe;
3907			break;
3908		default:
3909			return;
3910		}
3911		break;
3912	case RADEON_RING_TYPE_GFX_INDEX:
3913		ref_and_mask = CP0;
3914		break;
3915	}
3916
3917	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3918	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3919				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3920				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3921	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3922	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3923	radeon_ring_write(ring, ref_and_mask);
3924	radeon_ring_write(ring, ref_and_mask);
3925	radeon_ring_write(ring, 0x20); /* poll interval */
3926}
3927
3928/**
3929 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3930 *
3931 * @rdev: radeon_device pointer
3932 * @fence: radeon fence object
3933 *
3934 * Emits a fence sequnce number on the gfx ring and flushes
3935 * GPU caches.
3936 */
3937void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3938			     struct radeon_fence *fence)
3939{
3940	struct radeon_ring *ring = &rdev->ring[fence->ring];
3941	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3942
3943	/* Workaround for cache flush problems. First send a dummy EOP
3944	 * event down the pipe with seq one below.
3945	 */
3946	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3947	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3948				 EOP_TC_ACTION_EN |
3949				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3950				 EVENT_INDEX(5)));
3951	radeon_ring_write(ring, addr & 0xfffffffc);
3952	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3953				DATA_SEL(1) | INT_SEL(0));
3954	radeon_ring_write(ring, fence->seq - 1);
3955	radeon_ring_write(ring, 0);
3956
3957	/* Then send the real EOP event down the pipe. */
3958	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3959	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3960				 EOP_TC_ACTION_EN |
3961				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3962				 EVENT_INDEX(5)));
3963	radeon_ring_write(ring, addr & 0xfffffffc);
3964	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3965	radeon_ring_write(ring, fence->seq);
3966	radeon_ring_write(ring, 0);
3967}
3968
3969/**
3970 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3971 *
3972 * @rdev: radeon_device pointer
3973 * @fence: radeon fence object
3974 *
3975 * Emits a fence sequnce number on the compute ring and flushes
3976 * GPU caches.
3977 */
3978void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3979				 struct radeon_fence *fence)
3980{
3981	struct radeon_ring *ring = &rdev->ring[fence->ring];
3982	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3983
3984	/* RELEASE_MEM - flush caches, send int */
3985	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3986	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3987				 EOP_TC_ACTION_EN |
3988				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3989				 EVENT_INDEX(5)));
3990	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3991	radeon_ring_write(ring, addr & 0xfffffffc);
3992	radeon_ring_write(ring, upper_32_bits(addr));
3993	radeon_ring_write(ring, fence->seq);
3994	radeon_ring_write(ring, 0);
3995}
3996
3997/**
3998 * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3999 *
4000 * @rdev: radeon_device pointer
4001 * @ring: radeon ring buffer object
4002 * @semaphore: radeon semaphore object
4003 * @emit_wait: Is this a sempahore wait?
4004 *
4005 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4006 * from running ahead of semaphore waits.
4007 */
4008bool cik_semaphore_ring_emit(struct radeon_device *rdev,
4009			     struct radeon_ring *ring,
4010			     struct radeon_semaphore *semaphore,
4011			     bool emit_wait)
4012{
4013	uint64_t addr = semaphore->gpu_addr;
4014	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4015
4016	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
4017	radeon_ring_write(ring, lower_32_bits(addr));
4018	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
4019
4020	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
4021		/* Prevent the PFP from running ahead of the semaphore wait */
4022		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4023		radeon_ring_write(ring, 0x0);
4024	}
4025
4026	return true;
4027}
4028
4029/**
4030 * cik_copy_cpdma - copy pages using the CP DMA engine
4031 *
4032 * @rdev: radeon_device pointer
4033 * @src_offset: src GPU address
4034 * @dst_offset: dst GPU address
4035 * @num_gpu_pages: number of GPU pages to xfer
4036 * @resv: reservation object to sync to
4037 *
4038 * Copy GPU paging using the CP DMA engine (CIK+).
4039 * Used by the radeon ttm implementation to move pages if
4040 * registered as the asic copy callback.
4041 */
4042struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
4043				    uint64_t src_offset, uint64_t dst_offset,
4044				    unsigned num_gpu_pages,
4045				    struct reservation_object *resv)
4046{
4047	struct radeon_fence *fence;
4048	struct radeon_sync sync;
4049	int ring_index = rdev->asic->copy.blit_ring_index;
4050	struct radeon_ring *ring = &rdev->ring[ring_index];
4051	u32 size_in_bytes, cur_size_in_bytes, control;
4052	int i, num_loops;
4053	int r = 0;
4054
4055	radeon_sync_create(&sync);
4056
4057	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4058	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4059	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4060	if (r) {
4061		DRM_ERROR("radeon: moving bo (%d).\n", r);
4062		radeon_sync_free(rdev, &sync, NULL);
4063		return ERR_PTR(r);
4064	}
4065
4066	radeon_sync_resv(rdev, &sync, resv, false);
4067	radeon_sync_rings(rdev, &sync, ring->idx);
4068
4069	for (i = 0; i < num_loops; i++) {
4070		cur_size_in_bytes = size_in_bytes;
4071		if (cur_size_in_bytes > 0x1fffff)
4072			cur_size_in_bytes = 0x1fffff;
4073		size_in_bytes -= cur_size_in_bytes;
4074		control = 0;
4075		if (size_in_bytes == 0)
4076			control |= PACKET3_DMA_DATA_CP_SYNC;
4077		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4078		radeon_ring_write(ring, control);
4079		radeon_ring_write(ring, lower_32_bits(src_offset));
4080		radeon_ring_write(ring, upper_32_bits(src_offset));
4081		radeon_ring_write(ring, lower_32_bits(dst_offset));
4082		radeon_ring_write(ring, upper_32_bits(dst_offset));
4083		radeon_ring_write(ring, cur_size_in_bytes);
4084		src_offset += cur_size_in_bytes;
4085		dst_offset += cur_size_in_bytes;
4086	}
4087
4088	r = radeon_fence_emit(rdev, &fence, ring->idx);
4089	if (r) {
4090		radeon_ring_unlock_undo(rdev, ring);
4091		radeon_sync_free(rdev, &sync, NULL);
4092		return ERR_PTR(r);
4093	}
4094
4095	radeon_ring_unlock_commit(rdev, ring, false);
4096	radeon_sync_free(rdev, &sync, fence);
4097
4098	return fence;
4099}
4100
4101/*
4102 * IB stuff
4103 */
4104/**
4105 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4106 *
4107 * @rdev: radeon_device pointer
4108 * @ib: radeon indirect buffer object
4109 *
4110 * Emits an DE (drawing engine) or CE (constant engine) IB
4111 * on the gfx ring.  IBs are usually generated by userspace
4112 * acceleration drivers and submitted to the kernel for
4113 * sheduling on the ring.  This function schedules the IB
4114 * on the gfx ring for execution by the GPU.
4115 */
4116void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4117{
4118	struct radeon_ring *ring = &rdev->ring[ib->ring];
4119	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4120	u32 header, control = INDIRECT_BUFFER_VALID;
4121
4122	if (ib->is_const_ib) {
4123		/* set switch buffer packet before const IB */
4124		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4125		radeon_ring_write(ring, 0);
4126
4127		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4128	} else {
4129		u32 next_rptr;
4130		if (ring->rptr_save_reg) {
4131			next_rptr = ring->wptr + 3 + 4;
4132			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4133			radeon_ring_write(ring, ((ring->rptr_save_reg -
4134						  PACKET3_SET_UCONFIG_REG_START) >> 2));
4135			radeon_ring_write(ring, next_rptr);
4136		} else if (rdev->wb.enabled) {
4137			next_rptr = ring->wptr + 5 + 4;
4138			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4139			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4140			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4141			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4142			radeon_ring_write(ring, next_rptr);
4143		}
4144
4145		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4146	}
4147
4148	control |= ib->length_dw | (vm_id << 24);
4149
4150	radeon_ring_write(ring, header);
4151	radeon_ring_write(ring,
4152#ifdef __BIG_ENDIAN
4153			  (2 << 0) |
4154#endif
4155			  (ib->gpu_addr & 0xFFFFFFFC));
4156	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4157	radeon_ring_write(ring, control);
4158}
4159
4160/**
4161 * cik_ib_test - basic gfx ring IB test
4162 *
4163 * @rdev: radeon_device pointer
4164 * @ring: radeon_ring structure holding ring information
4165 *
4166 * Allocate an IB and execute it on the gfx ring (CIK).
4167 * Provides a basic gfx ring test to verify that IBs are working.
4168 * Returns 0 on success, error on failure.
4169 */
4170int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4171{
4172	struct radeon_ib ib;
4173	uint32_t scratch;
4174	uint32_t tmp = 0;
4175	unsigned i;
4176	int r;
4177
4178	r = radeon_scratch_get(rdev, &scratch);
4179	if (r) {
4180		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4181		return r;
4182	}
4183	WREG32(scratch, 0xCAFEDEAD);
4184	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4185	if (r) {
4186		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4187		radeon_scratch_free(rdev, scratch);
4188		return r;
4189	}
4190	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4191	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4192	ib.ptr[2] = 0xDEADBEEF;
4193	ib.length_dw = 3;
4194	r = radeon_ib_schedule(rdev, &ib, NULL, false);
4195	if (r) {
4196		radeon_scratch_free(rdev, scratch);
4197		radeon_ib_free(rdev, &ib);
4198		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4199		return r;
4200	}
4201	r = radeon_fence_wait(ib.fence, false);
4202	if (r) {
4203		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4204		radeon_scratch_free(rdev, scratch);
4205		radeon_ib_free(rdev, &ib);
4206		return r;
4207	}
4208	for (i = 0; i < rdev->usec_timeout; i++) {
4209		tmp = RREG32(scratch);
4210		if (tmp == 0xDEADBEEF)
4211			break;
4212		DRM_UDELAY(1);
4213	}
4214	if (i < rdev->usec_timeout) {
4215		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4216	} else {
4217		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4218			  scratch, tmp);
4219		r = -EINVAL;
4220	}
4221	radeon_scratch_free(rdev, scratch);
4222	radeon_ib_free(rdev, &ib);
4223	return r;
4224}
4225
4226/*
4227 * CP.
4228 * On CIK, gfx and compute now have independant command processors.
4229 *
4230 * GFX
4231 * Gfx consists of a single ring and can process both gfx jobs and
4232 * compute jobs.  The gfx CP consists of three microengines (ME):
4233 * PFP - Pre-Fetch Parser
4234 * ME - Micro Engine
4235 * CE - Constant Engine
4236 * The PFP and ME make up what is considered the Drawing Engine (DE).
4237 * The CE is an asynchronous engine used for updating buffer desciptors
4238 * used by the DE so that they can be loaded into cache in parallel
4239 * while the DE is processing state update packets.
4240 *
4241 * Compute
4242 * The compute CP consists of two microengines (ME):
4243 * MEC1 - Compute MicroEngine 1
4244 * MEC2 - Compute MicroEngine 2
4245 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4246 * The queues are exposed to userspace and are programmed directly
4247 * by the compute runtime.
4248 */
4249/**
4250 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4251 *
4252 * @rdev: radeon_device pointer
4253 * @enable: enable or disable the MEs
4254 *
4255 * Halts or unhalts the gfx MEs.
4256 */
4257static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4258{
4259	if (enable)
4260		WREG32(CP_ME_CNTL, 0);
4261	else {
4262		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4263			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4264		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4265		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4266	}
4267	udelay(50);
4268}
4269
4270/**
4271 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4272 *
4273 * @rdev: radeon_device pointer
4274 *
4275 * Loads the gfx PFP, ME, and CE ucode.
4276 * Returns 0 for success, -EINVAL if the ucode is not available.
4277 */
4278static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4279{
4280	int i;
4281
4282	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4283		return -EINVAL;
4284
4285	cik_cp_gfx_enable(rdev, false);
4286
4287	if (rdev->new_fw) {
4288		const struct gfx_firmware_header_v1_0 *pfp_hdr =
4289			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4290		const struct gfx_firmware_header_v1_0 *ce_hdr =
4291			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4292		const struct gfx_firmware_header_v1_0 *me_hdr =
4293			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4294		const __le32 *fw_data;
4295		u32 fw_size;
4296
4297		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4298		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4299		radeon_ucode_print_gfx_hdr(&me_hdr->header);
4300
4301		/* PFP */
4302		fw_data = (const __le32 *)
4303			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4304		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4305		WREG32(CP_PFP_UCODE_ADDR, 0);
4306		for (i = 0; i < fw_size; i++)
4307			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4308		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4309
4310		/* CE */
4311		fw_data = (const __le32 *)
4312			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4313		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4314		WREG32(CP_CE_UCODE_ADDR, 0);
4315		for (i = 0; i < fw_size; i++)
4316			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4317		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4318
4319		/* ME */
4320		fw_data = (const __be32 *)
4321			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4322		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4323		WREG32(CP_ME_RAM_WADDR, 0);
4324		for (i = 0; i < fw_size; i++)
4325			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4326		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4327		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4328	} else {
4329		const __be32 *fw_data;
4330
4331		/* PFP */
4332		fw_data = (const __be32 *)rdev->pfp_fw->data;
4333		WREG32(CP_PFP_UCODE_ADDR, 0);
4334		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4335			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4336		WREG32(CP_PFP_UCODE_ADDR, 0);
4337
4338		/* CE */
4339		fw_data = (const __be32 *)rdev->ce_fw->data;
4340		WREG32(CP_CE_UCODE_ADDR, 0);
4341		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4342			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4343		WREG32(CP_CE_UCODE_ADDR, 0);
4344
4345		/* ME */
4346		fw_data = (const __be32 *)rdev->me_fw->data;
4347		WREG32(CP_ME_RAM_WADDR, 0);
4348		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4349			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4350		WREG32(CP_ME_RAM_WADDR, 0);
4351	}
4352
4353	return 0;
4354}
4355
4356/**
4357 * cik_cp_gfx_start - start the gfx ring
4358 *
4359 * @rdev: radeon_device pointer
4360 *
4361 * Enables the ring and loads the clear state context and other
4362 * packets required to init the ring.
4363 * Returns 0 for success, error for failure.
4364 */
4365static int cik_cp_gfx_start(struct radeon_device *rdev)
4366{
4367	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4368	int r, i;
4369
4370	/* init the CP */
4371	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4372	WREG32(CP_ENDIAN_SWAP, 0);
4373	WREG32(CP_DEVICE_ID, 1);
4374
4375	cik_cp_gfx_enable(rdev, true);
4376
4377	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4378	if (r) {
4379		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4380		return r;
4381	}
4382
4383	/* init the CE partitions.  CE only used for gfx on CIK */
4384	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4385	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4386	radeon_ring_write(ring, 0x8000);
4387	radeon_ring_write(ring, 0x8000);
4388
4389	/* setup clear context state */
4390	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4391	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4392
4393	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4394	radeon_ring_write(ring, 0x80000000);
4395	radeon_ring_write(ring, 0x80000000);
4396
4397	for (i = 0; i < cik_default_size; i++)
4398		radeon_ring_write(ring, cik_default_state[i]);
4399
4400	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4401	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4402
4403	/* set clear context state */
4404	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4405	radeon_ring_write(ring, 0);
4406
4407	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4408	radeon_ring_write(ring, 0x00000316);
4409	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4410	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4411
4412	radeon_ring_unlock_commit(rdev, ring, false);
4413
4414	return 0;
4415}
4416
4417/**
4418 * cik_cp_gfx_fini - stop the gfx ring
4419 *
4420 * @rdev: radeon_device pointer
4421 *
4422 * Stop the gfx ring and tear down the driver ring
4423 * info.
4424 */
4425static void cik_cp_gfx_fini(struct radeon_device *rdev)
4426{
4427	cik_cp_gfx_enable(rdev, false);
4428	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4429}
4430
4431/**
4432 * cik_cp_gfx_resume - setup the gfx ring buffer registers
4433 *
4434 * @rdev: radeon_device pointer
4435 *
4436 * Program the location and size of the gfx ring buffer
4437 * and test it to make sure it's working.
4438 * Returns 0 for success, error for failure.
4439 */
4440static int cik_cp_gfx_resume(struct radeon_device *rdev)
4441{
4442	struct radeon_ring *ring;
4443	u32 tmp;
4444	u32 rb_bufsz;
4445	u64 rb_addr;
4446	int r;
4447
4448	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4449	if (rdev->family != CHIP_HAWAII)
4450		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4451
4452	/* Set the write pointer delay */
4453	WREG32(CP_RB_WPTR_DELAY, 0);
4454
4455	/* set the RB to use vmid 0 */
4456	WREG32(CP_RB_VMID, 0);
4457
4458	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4459
4460	/* ring 0 - compute and gfx */
4461	/* Set ring buffer size */
4462	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4463	rb_bufsz = order_base_2(ring->ring_size / 8);
4464	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4465#ifdef __BIG_ENDIAN
4466	tmp |= BUF_SWAP_32BIT;
4467#endif
4468	WREG32(CP_RB0_CNTL, tmp);
4469
4470	/* Initialize the ring buffer's read and write pointers */
4471	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4472	ring->wptr = 0;
4473	WREG32(CP_RB0_WPTR, ring->wptr);
4474
4475	/* set the wb address wether it's enabled or not */
4476	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4477	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4478
4479	/* scratch register shadowing is no longer supported */
4480	WREG32(SCRATCH_UMSK, 0);
4481
4482	if (!rdev->wb.enabled)
4483		tmp |= RB_NO_UPDATE;
4484
4485	mdelay(1);
4486	WREG32(CP_RB0_CNTL, tmp);
4487
4488	rb_addr = ring->gpu_addr >> 8;
4489	WREG32(CP_RB0_BASE, rb_addr);
4490	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4491
4492	/* start the ring */
4493	cik_cp_gfx_start(rdev);
4494	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4495	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4496	if (r) {
4497		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4498		return r;
4499	}
4500
4501	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4502		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4503
4504	return 0;
4505}
4506
4507u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4508		     struct radeon_ring *ring)
4509{
4510	u32 rptr;
4511
4512	if (rdev->wb.enabled)
4513		rptr = rdev->wb.wb[ring->rptr_offs/4];
4514	else
4515		rptr = RREG32(CP_RB0_RPTR);
4516
4517	return rptr;
4518}
4519
4520u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4521		     struct radeon_ring *ring)
4522{
4523	u32 wptr;
4524
4525	wptr = RREG32(CP_RB0_WPTR);
4526
4527	return wptr;
4528}
4529
4530void cik_gfx_set_wptr(struct radeon_device *rdev,
4531		      struct radeon_ring *ring)
4532{
4533	WREG32(CP_RB0_WPTR, ring->wptr);
4534	(void)RREG32(CP_RB0_WPTR);
4535}
4536
4537u32 cik_compute_get_rptr(struct radeon_device *rdev,
4538			 struct radeon_ring *ring)
4539{
4540	u32 rptr;
4541
4542	if (rdev->wb.enabled) {
4543		rptr = rdev->wb.wb[ring->rptr_offs/4];
4544	} else {
4545		mutex_lock(&rdev->srbm_mutex);
4546		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4547		rptr = RREG32(CP_HQD_PQ_RPTR);
4548		cik_srbm_select(rdev, 0, 0, 0, 0);
4549		mutex_unlock(&rdev->srbm_mutex);
4550	}
4551
4552	return rptr;
4553}
4554
4555u32 cik_compute_get_wptr(struct radeon_device *rdev,
4556			 struct radeon_ring *ring)
4557{
4558	u32 wptr;
4559
4560	if (rdev->wb.enabled) {
4561		/* XXX check if swapping is necessary on BE */
4562		wptr = rdev->wb.wb[ring->wptr_offs/4];
4563	} else {
4564		mutex_lock(&rdev->srbm_mutex);
4565		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4566		wptr = RREG32(CP_HQD_PQ_WPTR);
4567		cik_srbm_select(rdev, 0, 0, 0, 0);
4568		mutex_unlock(&rdev->srbm_mutex);
4569	}
4570
4571	return wptr;
4572}
4573
4574void cik_compute_set_wptr(struct radeon_device *rdev,
4575			  struct radeon_ring *ring)
4576{
4577	/* XXX check if swapping is necessary on BE */
4578	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4579	WDOORBELL32(ring->doorbell_index, ring->wptr);
4580}
4581
4582static void cik_compute_stop(struct radeon_device *rdev,
4583			     struct radeon_ring *ring)
4584{
4585	u32 j, tmp;
4586
4587	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4588	/* Disable wptr polling. */
4589	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4590	tmp &= ~WPTR_POLL_EN;
4591	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4592	/* Disable HQD. */
4593	if (RREG32(CP_HQD_ACTIVE) & 1) {
4594		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4595		for (j = 0; j < rdev->usec_timeout; j++) {
4596			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4597				break;
4598			udelay(1);
4599		}
4600		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4601		WREG32(CP_HQD_PQ_RPTR, 0);
4602		WREG32(CP_HQD_PQ_WPTR, 0);
4603	}
4604	cik_srbm_select(rdev, 0, 0, 0, 0);
4605}
4606
4607/**
4608 * cik_cp_compute_enable - enable/disable the compute CP MEs
4609 *
4610 * @rdev: radeon_device pointer
4611 * @enable: enable or disable the MEs
4612 *
4613 * Halts or unhalts the compute MEs.
4614 */
4615static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4616{
4617	if (enable)
4618		WREG32(CP_MEC_CNTL, 0);
4619	else {
4620		/*
4621		 * To make hibernation reliable we need to clear compute ring
4622		 * configuration before halting the compute ring.
4623		 */
4624		mutex_lock(&rdev->srbm_mutex);
4625		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4626		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4627		mutex_unlock(&rdev->srbm_mutex);
4628
4629		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4630		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4631		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4632	}
4633	udelay(50);
4634}
4635
4636/**
4637 * cik_cp_compute_load_microcode - load the compute CP ME ucode
4638 *
4639 * @rdev: radeon_device pointer
4640 *
4641 * Loads the compute MEC1&2 ucode.
4642 * Returns 0 for success, -EINVAL if the ucode is not available.
4643 */
4644static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4645{
4646	int i;
4647
4648	if (!rdev->mec_fw)
4649		return -EINVAL;
4650
4651	cik_cp_compute_enable(rdev, false);
4652
4653	if (rdev->new_fw) {
4654		const struct gfx_firmware_header_v1_0 *mec_hdr =
4655			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4656		const __le32 *fw_data;
4657		u32 fw_size;
4658
4659		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4660
4661		/* MEC1 */
4662		fw_data = (const __le32 *)
4663			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4664		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4665		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4666		for (i = 0; i < fw_size; i++)
4667			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4668		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4669
4670		/* MEC2 */
4671		if (rdev->family == CHIP_KAVERI) {
4672			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4673				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4674
4675			fw_data = (const __le32 *)
4676				(rdev->mec2_fw->data +
4677				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4678			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4679			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4680			for (i = 0; i < fw_size; i++)
4681				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4682			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4683		}
4684	} else {
4685		const __be32 *fw_data;
4686
4687		/* MEC1 */
4688		fw_data = (const __be32 *)rdev->mec_fw->data;
4689		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4690		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4691			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4692		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4693
4694		if (rdev->family == CHIP_KAVERI) {
4695			/* MEC2 */
4696			fw_data = (const __be32 *)rdev->mec_fw->data;
4697			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4698			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4699				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4700			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4701		}
4702	}
4703
4704	return 0;
4705}
4706
4707/**
4708 * cik_cp_compute_start - start the compute queues
4709 *
4710 * @rdev: radeon_device pointer
4711 *
4712 * Enable the compute queues.
4713 * Returns 0 for success, error for failure.
4714 */
4715static int cik_cp_compute_start(struct radeon_device *rdev)
4716{
4717	cik_cp_compute_enable(rdev, true);
4718
4719	return 0;
4720}
4721
4722/**
4723 * cik_cp_compute_fini - stop the compute queues
4724 *
4725 * @rdev: radeon_device pointer
4726 *
4727 * Stop the compute queues and tear down the driver queue
4728 * info.
4729 */
4730static void cik_cp_compute_fini(struct radeon_device *rdev)
4731{
4732	int i, idx, r;
4733
4734	cik_cp_compute_enable(rdev, false);
4735
4736	for (i = 0; i < 2; i++) {
4737		if (i == 0)
4738			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4739		else
4740			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4741
4742		if (rdev->ring[idx].mqd_obj) {
4743			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4744			if (unlikely(r != 0))
4745				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4746
4747			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4748			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4749
4750			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4751			rdev->ring[idx].mqd_obj = NULL;
4752		}
4753	}
4754}
4755
4756static void cik_mec_fini(struct radeon_device *rdev)
4757{
4758	int r;
4759
4760	if (rdev->mec.hpd_eop_obj) {
4761		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4762		if (unlikely(r != 0))
4763			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4764		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4765		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4766
4767		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4768		rdev->mec.hpd_eop_obj = NULL;
4769	}
4770}
4771
4772#define MEC_HPD_SIZE 2048
4773
4774static int cik_mec_init(struct radeon_device *rdev)
4775{
4776	int r;
4777	u32 *hpd;
4778
4779	/*
4780	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4781	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4782	 * Nonetheless, we assign only 1 pipe because all other pipes will
4783	 * be handled by KFD
4784	 */
4785	rdev->mec.num_mec = 1;
4786	rdev->mec.num_pipe = 1;
4787	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4788
4789	if (rdev->mec.hpd_eop_obj == NULL) {
4790		r = radeon_bo_create(rdev,
4791				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4792				     PAGE_SIZE, true,
4793				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4794				     &rdev->mec.hpd_eop_obj);
4795		if (r) {
4796			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4797			return r;
4798		}
4799	}
4800
4801	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4802	if (unlikely(r != 0)) {
4803		cik_mec_fini(rdev);
4804		return r;
4805	}
4806	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4807			  &rdev->mec.hpd_eop_gpu_addr);
4808	if (r) {
4809		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4810		cik_mec_fini(rdev);
4811		return r;
4812	}
4813	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4814	if (r) {
4815		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4816		cik_mec_fini(rdev);
4817		return r;
4818	}
4819
4820	/* clear memory.  Not sure if this is required or not */
4821	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4822
4823	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4824	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4825
4826	return 0;
4827}
4828
4829struct hqd_registers
4830{
4831	u32 cp_mqd_base_addr;
4832	u32 cp_mqd_base_addr_hi;
4833	u32 cp_hqd_active;
4834	u32 cp_hqd_vmid;
4835	u32 cp_hqd_persistent_state;
4836	u32 cp_hqd_pipe_priority;
4837	u32 cp_hqd_queue_priority;
4838	u32 cp_hqd_quantum;
4839	u32 cp_hqd_pq_base;
4840	u32 cp_hqd_pq_base_hi;
4841	u32 cp_hqd_pq_rptr;
4842	u32 cp_hqd_pq_rptr_report_addr;
4843	u32 cp_hqd_pq_rptr_report_addr_hi;
4844	u32 cp_hqd_pq_wptr_poll_addr;
4845	u32 cp_hqd_pq_wptr_poll_addr_hi;
4846	u32 cp_hqd_pq_doorbell_control;
4847	u32 cp_hqd_pq_wptr;
4848	u32 cp_hqd_pq_control;
4849	u32 cp_hqd_ib_base_addr;
4850	u32 cp_hqd_ib_base_addr_hi;
4851	u32 cp_hqd_ib_rptr;
4852	u32 cp_hqd_ib_control;
4853	u32 cp_hqd_iq_timer;
4854	u32 cp_hqd_iq_rptr;
4855	u32 cp_hqd_dequeue_request;
4856	u32 cp_hqd_dma_offload;
4857	u32 cp_hqd_sema_cmd;
4858	u32 cp_hqd_msg_type;
4859	u32 cp_hqd_atomic0_preop_lo;
4860	u32 cp_hqd_atomic0_preop_hi;
4861	u32 cp_hqd_atomic1_preop_lo;
4862	u32 cp_hqd_atomic1_preop_hi;
4863	u32 cp_hqd_hq_scheduler0;
4864	u32 cp_hqd_hq_scheduler1;
4865	u32 cp_mqd_control;
4866};
4867
4868struct bonaire_mqd
4869{
4870	u32 header;
4871	u32 dispatch_initiator;
4872	u32 dimensions[3];
4873	u32 start_idx[3];
4874	u32 num_threads[3];
4875	u32 pipeline_stat_enable;
4876	u32 perf_counter_enable;
4877	u32 pgm[2];
4878	u32 tba[2];
4879	u32 tma[2];
4880	u32 pgm_rsrc[2];
4881	u32 vmid;
4882	u32 resource_limits;
4883	u32 static_thread_mgmt01[2];
4884	u32 tmp_ring_size;
4885	u32 static_thread_mgmt23[2];
4886	u32 restart[3];
4887	u32 thread_trace_enable;
4888	u32 reserved1;
4889	u32 user_data[16];
4890	u32 vgtcs_invoke_count[2];
4891	struct hqd_registers queue_state;
4892	u32 dequeue_cntr;
4893	u32 interrupt_queue[64];
4894};
4895
4896/**
4897 * cik_cp_compute_resume - setup the compute queue registers
4898 *
4899 * @rdev: radeon_device pointer
4900 *
4901 * Program the compute queues and test them to make sure they
4902 * are working.
4903 * Returns 0 for success, error for failure.
4904 */
4905static int cik_cp_compute_resume(struct radeon_device *rdev)
4906{
4907	int r, i, j, idx;
4908	u32 tmp;
4909	bool use_doorbell = true;
4910	u64 hqd_gpu_addr;
4911	u64 mqd_gpu_addr;
4912	u64 eop_gpu_addr;
4913	u64 wb_gpu_addr;
4914	u32 *buf;
4915	struct bonaire_mqd *mqd;
4916
4917	r = cik_cp_compute_start(rdev);
4918	if (r)
4919		return r;
4920
4921	/* fix up chicken bits */
4922	tmp = RREG32(CP_CPF_DEBUG);
4923	tmp |= (1 << 23);
4924	WREG32(CP_CPF_DEBUG, tmp);
4925
4926	/* init the pipes */
4927	mutex_lock(&rdev->srbm_mutex);
4928
4929	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4930
4931	cik_srbm_select(rdev, 0, 0, 0, 0);
4932
4933	/* write the EOP addr */
4934	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4935	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4936
4937	/* set the VMID assigned */
4938	WREG32(CP_HPD_EOP_VMID, 0);
4939
4940	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4941	tmp = RREG32(CP_HPD_EOP_CONTROL);
4942	tmp &= ~EOP_SIZE_MASK;
4943	tmp |= order_base_2(MEC_HPD_SIZE / 8);
4944	WREG32(CP_HPD_EOP_CONTROL, tmp);
4945
4946	mutex_unlock(&rdev->srbm_mutex);
4947
4948	/* init the queues.  Just two for now. */
4949	for (i = 0; i < 2; i++) {
4950		if (i == 0)
4951			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4952		else
4953			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4954
4955		if (rdev->ring[idx].mqd_obj == NULL) {
4956			r = radeon_bo_create(rdev,
4957					     sizeof(struct bonaire_mqd),
4958					     PAGE_SIZE, true,
4959					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4960					     NULL, &rdev->ring[idx].mqd_obj);
4961			if (r) {
4962				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4963				return r;
4964			}
4965		}
4966
4967		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4968		if (unlikely(r != 0)) {
4969			cik_cp_compute_fini(rdev);
4970			return r;
4971		}
4972		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4973				  &mqd_gpu_addr);
4974		if (r) {
4975			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4976			cik_cp_compute_fini(rdev);
4977			return r;
4978		}
4979		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4980		if (r) {
4981			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4982			cik_cp_compute_fini(rdev);
4983			return r;
4984		}
4985
4986		/* init the mqd struct */
4987		memset(buf, 0, sizeof(struct bonaire_mqd));
4988
4989		mqd = (struct bonaire_mqd *)buf;
4990		mqd->header = 0xC0310800;
4991		mqd->static_thread_mgmt01[0] = 0xffffffff;
4992		mqd->static_thread_mgmt01[1] = 0xffffffff;
4993		mqd->static_thread_mgmt23[0] = 0xffffffff;
4994		mqd->static_thread_mgmt23[1] = 0xffffffff;
4995
4996		mutex_lock(&rdev->srbm_mutex);
4997		cik_srbm_select(rdev, rdev->ring[idx].me,
4998				rdev->ring[idx].pipe,
4999				rdev->ring[idx].queue, 0);
5000
5001		/* disable wptr polling */
5002		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
5003		tmp &= ~WPTR_POLL_EN;
5004		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
5005
5006		/* enable doorbell? */
5007		mqd->queue_state.cp_hqd_pq_doorbell_control =
5008			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5009		if (use_doorbell)
5010			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5011		else
5012			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
5013		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5014		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5015
5016		/* disable the queue if it's active */
5017		mqd->queue_state.cp_hqd_dequeue_request = 0;
5018		mqd->queue_state.cp_hqd_pq_rptr = 0;
5019		mqd->queue_state.cp_hqd_pq_wptr= 0;
5020		if (RREG32(CP_HQD_ACTIVE) & 1) {
5021			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
5022			for (j = 0; j < rdev->usec_timeout; j++) {
5023				if (!(RREG32(CP_HQD_ACTIVE) & 1))
5024					break;
5025				udelay(1);
5026			}
5027			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
5028			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
5029			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5030		}
5031
5032		/* set the pointer to the MQD */
5033		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
5034		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5035		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
5036		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
5037		/* set MQD vmid to 0 */
5038		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
5039		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
5040		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
5041
5042		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5043		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
5044		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
5045		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5046		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
5047		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
5048
5049		/* set up the HQD, this is similar to CP_RB0_CNTL */
5050		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
5051		mqd->queue_state.cp_hqd_pq_control &=
5052			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
5053
5054		mqd->queue_state.cp_hqd_pq_control |=
5055			order_base_2(rdev->ring[idx].ring_size / 8);
5056		mqd->queue_state.cp_hqd_pq_control |=
5057			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
5058#ifdef __BIG_ENDIAN
5059		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
5060#endif
5061		mqd->queue_state.cp_hqd_pq_control &=
5062			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
5063		mqd->queue_state.cp_hqd_pq_control |=
5064			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
5065		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
5066
5067		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
5068		if (i == 0)
5069			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
5070		else
5071			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
5072		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
5073		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5074		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
5075		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
5076		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
5077
5078		/* set the wb address wether it's enabled or not */
5079		if (i == 0)
5080			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
5081		else
5082			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
5083		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5084		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5085			upper_32_bits(wb_gpu_addr) & 0xffff;
5086		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5087		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5088		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5089		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5090
5091		/* enable the doorbell if requested */
5092		if (use_doorbell) {
5093			mqd->queue_state.cp_hqd_pq_doorbell_control =
5094				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5095			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5096			mqd->queue_state.cp_hqd_pq_doorbell_control |=
5097				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5098			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5099			mqd->queue_state.cp_hqd_pq_doorbell_control &=
5100				~(DOORBELL_SOURCE | DOORBELL_HIT);
5101
5102		} else {
5103			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5104		}
5105		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5106		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5107
5108		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5109		rdev->ring[idx].wptr = 0;
5110		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5111		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5112		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5113
5114		/* set the vmid for the queue */
5115		mqd->queue_state.cp_hqd_vmid = 0;
5116		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5117
5118		/* activate the queue */
5119		mqd->queue_state.cp_hqd_active = 1;
5120		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5121
5122		cik_srbm_select(rdev, 0, 0, 0, 0);
5123		mutex_unlock(&rdev->srbm_mutex);
5124
5125		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5126		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5127
5128		rdev->ring[idx].ready = true;
5129		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5130		if (r)
5131			rdev->ring[idx].ready = false;
5132	}
5133
5134	return 0;
5135}
5136
5137static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5138{
5139	cik_cp_gfx_enable(rdev, enable);
5140	cik_cp_compute_enable(rdev, enable);
5141}
5142
5143static int cik_cp_load_microcode(struct radeon_device *rdev)
5144{
5145	int r;
5146
5147	r = cik_cp_gfx_load_microcode(rdev);
5148	if (r)
5149		return r;
5150	r = cik_cp_compute_load_microcode(rdev);
5151	if (r)
5152		return r;
5153
5154	return 0;
5155}
5156
5157static void cik_cp_fini(struct radeon_device *rdev)
5158{
5159	cik_cp_gfx_fini(rdev);
5160	cik_cp_compute_fini(rdev);
5161}
5162
5163static int cik_cp_resume(struct radeon_device *rdev)
5164{
5165	int r;
5166
5167	cik_enable_gui_idle_interrupt(rdev, false);
5168
5169	r = cik_cp_load_microcode(rdev);
5170	if (r)
5171		return r;
5172
5173	r = cik_cp_gfx_resume(rdev);
5174	if (r)
5175		return r;
5176	r = cik_cp_compute_resume(rdev);
5177	if (r)
5178		return r;
5179
5180	cik_enable_gui_idle_interrupt(rdev, true);
5181
5182	return 0;
5183}
5184
5185static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5186{
5187	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5188		RREG32(GRBM_STATUS));
5189	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5190		RREG32(GRBM_STATUS2));
5191	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5192		RREG32(GRBM_STATUS_SE0));
5193	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5194		RREG32(GRBM_STATUS_SE1));
5195	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5196		RREG32(GRBM_STATUS_SE2));
5197	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5198		RREG32(GRBM_STATUS_SE3));
5199	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5200		RREG32(SRBM_STATUS));
5201	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5202		RREG32(SRBM_STATUS2));
5203	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5204		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5205	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5206		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5207	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5208	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5209		 RREG32(CP_STALLED_STAT1));
5210	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5211		 RREG32(CP_STALLED_STAT2));
5212	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5213		 RREG32(CP_STALLED_STAT3));
5214	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5215		 RREG32(CP_CPF_BUSY_STAT));
5216	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5217		 RREG32(CP_CPF_STALLED_STAT1));
5218	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5219	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5220	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5221		 RREG32(CP_CPC_STALLED_STAT1));
5222	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5223}
5224
5225/**
5226 * cik_gpu_check_soft_reset - check which blocks are busy
5227 *
5228 * @rdev: radeon_device pointer
5229 *
5230 * Check which blocks are busy and return the relevant reset
5231 * mask to be used by cik_gpu_soft_reset().
5232 * Returns a mask of the blocks to be reset.
5233 */
5234u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5235{
5236	u32 reset_mask = 0;
5237	u32 tmp;
5238
5239	/* GRBM_STATUS */
5240	tmp = RREG32(GRBM_STATUS);
5241	if (tmp & (PA_BUSY | SC_BUSY |
5242		   BCI_BUSY | SX_BUSY |
5243		   TA_BUSY | VGT_BUSY |
5244		   DB_BUSY | CB_BUSY |
5245		   GDS_BUSY | SPI_BUSY |
5246		   IA_BUSY | IA_BUSY_NO_DMA))
5247		reset_mask |= RADEON_RESET_GFX;
5248
5249	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5250		reset_mask |= RADEON_RESET_CP;
5251
5252	/* GRBM_STATUS2 */
5253	tmp = RREG32(GRBM_STATUS2);
5254	if (tmp & RLC_BUSY)
5255		reset_mask |= RADEON_RESET_RLC;
5256
5257	/* SDMA0_STATUS_REG */
5258	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5259	if (!(tmp & SDMA_IDLE))
5260		reset_mask |= RADEON_RESET_DMA;
5261
5262	/* SDMA1_STATUS_REG */
5263	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5264	if (!(tmp & SDMA_IDLE))
5265		reset_mask |= RADEON_RESET_DMA1;
5266
5267	/* SRBM_STATUS2 */
5268	tmp = RREG32(SRBM_STATUS2);
5269	if (tmp & SDMA_BUSY)
5270		reset_mask |= RADEON_RESET_DMA;
5271
5272	if (tmp & SDMA1_BUSY)
5273		reset_mask |= RADEON_RESET_DMA1;
5274
5275	/* SRBM_STATUS */
5276	tmp = RREG32(SRBM_STATUS);
5277
5278	if (tmp & IH_BUSY)
5279		reset_mask |= RADEON_RESET_IH;
5280
5281	if (tmp & SEM_BUSY)
5282		reset_mask |= RADEON_RESET_SEM;
5283
5284	if (tmp & GRBM_RQ_PENDING)
5285		reset_mask |= RADEON_RESET_GRBM;
5286
5287	if (tmp & VMC_BUSY)
5288		reset_mask |= RADEON_RESET_VMC;
5289
5290	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5291		   MCC_BUSY | MCD_BUSY))
5292		reset_mask |= RADEON_RESET_MC;
5293
5294	if (evergreen_is_display_hung(rdev))
5295		reset_mask |= RADEON_RESET_DISPLAY;
5296
5297	/* Skip MC reset as it's mostly likely not hung, just busy */
5298	if (reset_mask & RADEON_RESET_MC) {
5299		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5300		reset_mask &= ~RADEON_RESET_MC;
5301	}
5302
5303	return reset_mask;
5304}
5305
5306/**
5307 * cik_gpu_soft_reset - soft reset GPU
5308 *
5309 * @rdev: radeon_device pointer
5310 * @reset_mask: mask of which blocks to reset
5311 *
5312 * Soft reset the blocks specified in @reset_mask.
5313 */
5314static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5315{
5316	struct evergreen_mc_save save;
5317	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5318	u32 tmp;
5319
5320	if (reset_mask == 0)
5321		return;
5322
5323	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5324
5325	cik_print_gpu_status_regs(rdev);
5326	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5327		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5328	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5329		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5330
5331	/* disable CG/PG */
5332	cik_fini_pg(rdev);
5333	cik_fini_cg(rdev);
5334
5335	/* stop the rlc */
5336	cik_rlc_stop(rdev);
5337
5338	/* Disable GFX parsing/prefetching */
5339	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5340
5341	/* Disable MEC parsing/prefetching */
5342	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5343
5344	if (reset_mask & RADEON_RESET_DMA) {
5345		/* sdma0 */
5346		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5347		tmp |= SDMA_HALT;
5348		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5349	}
5350	if (reset_mask & RADEON_RESET_DMA1) {
5351		/* sdma1 */
5352		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5353		tmp |= SDMA_HALT;
5354		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5355	}
5356
5357	evergreen_mc_stop(rdev, &save);
5358	if (evergreen_mc_wait_for_idle(rdev)) {
5359		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5360	}
5361
5362	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5363		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5364
5365	if (reset_mask & RADEON_RESET_CP) {
5366		grbm_soft_reset |= SOFT_RESET_CP;
5367
5368		srbm_soft_reset |= SOFT_RESET_GRBM;
5369	}
5370
5371	if (reset_mask & RADEON_RESET_DMA)
5372		srbm_soft_reset |= SOFT_RESET_SDMA;
5373
5374	if (reset_mask & RADEON_RESET_DMA1)
5375		srbm_soft_reset |= SOFT_RESET_SDMA1;
5376
5377	if (reset_mask & RADEON_RESET_DISPLAY)
5378		srbm_soft_reset |= SOFT_RESET_DC;
5379
5380	if (reset_mask & RADEON_RESET_RLC)
5381		grbm_soft_reset |= SOFT_RESET_RLC;
5382
5383	if (reset_mask & RADEON_RESET_SEM)
5384		srbm_soft_reset |= SOFT_RESET_SEM;
5385
5386	if (reset_mask & RADEON_RESET_IH)
5387		srbm_soft_reset |= SOFT_RESET_IH;
5388
5389	if (reset_mask & RADEON_RESET_GRBM)
5390		srbm_soft_reset |= SOFT_RESET_GRBM;
5391
5392	if (reset_mask & RADEON_RESET_VMC)
5393		srbm_soft_reset |= SOFT_RESET_VMC;
5394
5395	if (!(rdev->flags & RADEON_IS_IGP)) {
5396		if (reset_mask & RADEON_RESET_MC)
5397			srbm_soft_reset |= SOFT_RESET_MC;
5398	}
5399
5400	if (grbm_soft_reset) {
5401		tmp = RREG32(GRBM_SOFT_RESET);
5402		tmp |= grbm_soft_reset;
5403		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5404		WREG32(GRBM_SOFT_RESET, tmp);
5405		tmp = RREG32(GRBM_SOFT_RESET);
5406
5407		udelay(50);
5408
5409		tmp &= ~grbm_soft_reset;
5410		WREG32(GRBM_SOFT_RESET, tmp);
5411		tmp = RREG32(GRBM_SOFT_RESET);
5412	}
5413
5414	if (srbm_soft_reset) {
5415		tmp = RREG32(SRBM_SOFT_RESET);
5416		tmp |= srbm_soft_reset;
5417		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5418		WREG32(SRBM_SOFT_RESET, tmp);
5419		tmp = RREG32(SRBM_SOFT_RESET);
5420
5421		udelay(50);
5422
5423		tmp &= ~srbm_soft_reset;
5424		WREG32(SRBM_SOFT_RESET, tmp);
5425		tmp = RREG32(SRBM_SOFT_RESET);
5426	}
5427
5428	/* Wait a little for things to settle down */
5429	udelay(50);
5430
5431	evergreen_mc_resume(rdev, &save);
5432	udelay(50);
5433
5434	cik_print_gpu_status_regs(rdev);
5435}
5436
5437struct kv_reset_save_regs {
5438	u32 gmcon_reng_execute;
5439	u32 gmcon_misc;
5440	u32 gmcon_misc3;
5441};
5442
5443static void kv_save_regs_for_reset(struct radeon_device *rdev,
5444				   struct kv_reset_save_regs *save)
5445{
5446	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5447	save->gmcon_misc = RREG32(GMCON_MISC);
5448	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5449
5450	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5451	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5452						STCTRL_STUTTER_EN));
5453}
5454
5455static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5456				      struct kv_reset_save_regs *save)
5457{
5458	int i;
5459
5460	WREG32(GMCON_PGFSM_WRITE, 0);
5461	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5462
5463	for (i = 0; i < 5; i++)
5464		WREG32(GMCON_PGFSM_WRITE, 0);
5465
5466	WREG32(GMCON_PGFSM_WRITE, 0);
5467	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5468
5469	for (i = 0; i < 5; i++)
5470		WREG32(GMCON_PGFSM_WRITE, 0);
5471
5472	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5473	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5474
5475	for (i = 0; i < 5; i++)
5476		WREG32(GMCON_PGFSM_WRITE, 0);
5477
5478	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5479	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5480
5481	for (i = 0; i < 5; i++)
5482		WREG32(GMCON_PGFSM_WRITE, 0);
5483
5484	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5485	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5486
5487	for (i = 0; i < 5; i++)
5488		WREG32(GMCON_PGFSM_WRITE, 0);
5489
5490	WREG32(GMCON_PGFSM_WRITE, 0);
5491	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5492
5493	for (i = 0; i < 5; i++)
5494		WREG32(GMCON_PGFSM_WRITE, 0);
5495
5496	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5497	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5498
5499	for (i = 0; i < 5; i++)
5500		WREG32(GMCON_PGFSM_WRITE, 0);
5501
5502	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5503	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5504
5505	for (i = 0; i < 5; i++)
5506		WREG32(GMCON_PGFSM_WRITE, 0);
5507
5508	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5509	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5510
5511	for (i = 0; i < 5; i++)
5512		WREG32(GMCON_PGFSM_WRITE, 0);
5513
5514	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5515	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5516
5517	for (i = 0; i < 5; i++)
5518		WREG32(GMCON_PGFSM_WRITE, 0);
5519
5520	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5521	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5522
5523	WREG32(GMCON_MISC3, save->gmcon_misc3);
5524	WREG32(GMCON_MISC, save->gmcon_misc);
5525	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5526}
5527
5528static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5529{
5530	struct evergreen_mc_save save;
5531	struct kv_reset_save_regs kv_save = { 0 };
5532	u32 tmp, i;
5533
5534	dev_info(rdev->dev, "GPU pci config reset\n");
5535
5536	/* disable dpm? */
5537
5538	/* disable cg/pg */
5539	cik_fini_pg(rdev);
5540	cik_fini_cg(rdev);
5541
5542	/* Disable GFX parsing/prefetching */
5543	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5544
5545	/* Disable MEC parsing/prefetching */
5546	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5547
5548	/* sdma0 */
5549	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5550	tmp |= SDMA_HALT;
5551	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5552	/* sdma1 */
5553	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5554	tmp |= SDMA_HALT;
5555	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5556	/* XXX other engines? */
5557
5558	/* halt the rlc, disable cp internal ints */
5559	cik_rlc_stop(rdev);
5560
5561	udelay(50);
5562
5563	/* disable mem access */
5564	evergreen_mc_stop(rdev, &save);
5565	if (evergreen_mc_wait_for_idle(rdev)) {
5566		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5567	}
5568
5569	if (rdev->flags & RADEON_IS_IGP)
5570		kv_save_regs_for_reset(rdev, &kv_save);
5571
5572	/* disable BM */
5573	pci_clear_master(rdev->pdev);
5574	/* reset */
5575	radeon_pci_config_reset(rdev);
5576
5577	udelay(100);
5578
5579	/* wait for asic to come out of reset */
5580	for (i = 0; i < rdev->usec_timeout; i++) {
5581		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5582			break;
5583		udelay(1);
5584	}
5585
5586	/* does asic init need to be run first??? */
5587	if (rdev->flags & RADEON_IS_IGP)
5588		kv_restore_regs_for_reset(rdev, &kv_save);
5589}
5590
5591/**
5592 * cik_asic_reset - soft reset GPU
5593 *
5594 * @rdev: radeon_device pointer
5595 *
5596 * Look up which blocks are hung and attempt
5597 * to reset them.
5598 * Returns 0 for success.
5599 */
5600int cik_asic_reset(struct radeon_device *rdev)
5601{
5602	u32 reset_mask;
5603
5604	reset_mask = cik_gpu_check_soft_reset(rdev);
5605
5606	if (reset_mask)
5607		r600_set_bios_scratch_engine_hung(rdev, true);
5608
5609	/* try soft reset */
5610	cik_gpu_soft_reset(rdev, reset_mask);
5611
5612	reset_mask = cik_gpu_check_soft_reset(rdev);
5613
5614	/* try pci config reset */
5615	if (reset_mask && radeon_hard_reset)
5616		cik_gpu_pci_config_reset(rdev);
5617
5618	reset_mask = cik_gpu_check_soft_reset(rdev);
5619
5620	if (!reset_mask)
5621		r600_set_bios_scratch_engine_hung(rdev, false);
5622
5623	return 0;
5624}
5625
5626/**
5627 * cik_gfx_is_lockup - check if the 3D engine is locked up
5628 *
5629 * @rdev: radeon_device pointer
5630 * @ring: radeon_ring structure holding ring information
5631 *
5632 * Check if the 3D engine is locked up (CIK).
5633 * Returns true if the engine is locked, false if not.
5634 */
5635bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5636{
5637	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5638
5639	if (!(reset_mask & (RADEON_RESET_GFX |
5640			    RADEON_RESET_COMPUTE |
5641			    RADEON_RESET_CP))) {
5642		radeon_ring_lockup_update(rdev, ring);
5643		return false;
5644	}
5645	return radeon_ring_test_lockup(rdev, ring);
5646}
5647
5648/* MC */
5649/**
5650 * cik_mc_program - program the GPU memory controller
5651 *
5652 * @rdev: radeon_device pointer
5653 *
5654 * Set the location of vram, gart, and AGP in the GPU's
5655 * physical address space (CIK).
5656 */
5657static void cik_mc_program(struct radeon_device *rdev)
5658{
5659	struct evergreen_mc_save save;
5660	u32 tmp;
5661	int i, j;
5662
5663	/* Initialize HDP */
5664	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5665		WREG32((0x2c14 + j), 0x00000000);
5666		WREG32((0x2c18 + j), 0x00000000);
5667		WREG32((0x2c1c + j), 0x00000000);
5668		WREG32((0x2c20 + j), 0x00000000);
5669		WREG32((0x2c24 + j), 0x00000000);
5670	}
5671	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5672
5673	evergreen_mc_stop(rdev, &save);
5674	if (radeon_mc_wait_for_idle(rdev)) {
5675		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5676	}
5677	/* Lockout access through VGA aperture*/
5678	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5679	/* Update configuration */
5680	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5681	       rdev->mc.vram_start >> 12);
5682	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5683	       rdev->mc.vram_end >> 12);
5684	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5685	       rdev->vram_scratch.gpu_addr >> 12);
5686	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5687	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5688	WREG32(MC_VM_FB_LOCATION, tmp);
5689	/* XXX double check these! */
5690	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5691	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5692	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5693	WREG32(MC_VM_AGP_BASE, 0);
5694	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5695	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5696	if (radeon_mc_wait_for_idle(rdev)) {
5697		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5698	}
5699	evergreen_mc_resume(rdev, &save);
5700	/* we need to own VRAM, so turn off the VGA renderer here
5701	 * to stop it overwriting our objects */
5702	rv515_vga_render_disable(rdev);
5703}
5704
5705/**
5706 * cik_mc_init - initialize the memory controller driver params
5707 *
5708 * @rdev: radeon_device pointer
5709 *
5710 * Look up the amount of vram, vram width, and decide how to place
5711 * vram and gart within the GPU's physical address space (CIK).
5712 * Returns 0 for success.
5713 */
5714static int cik_mc_init(struct radeon_device *rdev)
5715{
5716	u32 tmp;
5717	int chansize, numchan;
5718
5719	/* Get VRAM informations */
5720	rdev->mc.vram_is_ddr = true;
5721	tmp = RREG32(MC_ARB_RAMCFG);
5722	if (tmp & CHANSIZE_MASK) {
5723		chansize = 64;
5724	} else {
5725		chansize = 32;
5726	}
5727	tmp = RREG32(MC_SHARED_CHMAP);
5728	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5729	case 0:
5730	default:
5731		numchan = 1;
5732		break;
5733	case 1:
5734		numchan = 2;
5735		break;
5736	case 2:
5737		numchan = 4;
5738		break;
5739	case 3:
5740		numchan = 8;
5741		break;
5742	case 4:
5743		numchan = 3;
5744		break;
5745	case 5:
5746		numchan = 6;
5747		break;
5748	case 6:
5749		numchan = 10;
5750		break;
5751	case 7:
5752		numchan = 12;
5753		break;
5754	case 8:
5755		numchan = 16;
5756		break;
5757	}
5758	rdev->mc.vram_width = numchan * chansize;
5759	/* Could aper size report 0 ? */
5760	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5761	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5762	/* size in MB on si */
5763	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5764	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5765	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5766	si_vram_gtt_location(rdev, &rdev->mc);
5767	radeon_update_bandwidth_info(rdev);
5768
5769	return 0;
5770}
5771
5772/*
5773 * GART
5774 * VMID 0 is the physical GPU addresses as used by the kernel.
5775 * VMIDs 1-15 are used for userspace clients and are handled
5776 * by the radeon vm/hsa code.
5777 */
5778/**
5779 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5780 *
5781 * @rdev: radeon_device pointer
5782 *
5783 * Flush the TLB for the VMID 0 page table (CIK).
5784 */
5785void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5786{
5787	/* flush hdp cache */
5788	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5789
5790	/* bits 0-15 are the VM contexts0-15 */
5791	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5792}
5793
5794static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5795{
5796	int i;
5797	uint32_t sh_mem_bases, sh_mem_config;
5798
5799	sh_mem_bases = 0x6000 | 0x6000 << 16;
5800	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5801	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5802
5803	mutex_lock(&rdev->srbm_mutex);
5804	for (i = 8; i < 16; i++) {
5805		cik_srbm_select(rdev, 0, 0, 0, i);
5806		/* CP and shaders */
5807		WREG32(SH_MEM_CONFIG, sh_mem_config);
5808		WREG32(SH_MEM_APE1_BASE, 1);
5809		WREG32(SH_MEM_APE1_LIMIT, 0);
5810		WREG32(SH_MEM_BASES, sh_mem_bases);
5811	}
5812	cik_srbm_select(rdev, 0, 0, 0, 0);
5813	mutex_unlock(&rdev->srbm_mutex);
5814}
5815
5816/**
5817 * cik_pcie_gart_enable - gart enable
5818 *
5819 * @rdev: radeon_device pointer
5820 *
5821 * This sets up the TLBs, programs the page tables for VMID0,
5822 * sets up the hw for VMIDs 1-15 which are allocated on
5823 * demand, and sets up the global locations for the LDS, GDS,
5824 * and GPUVM for FSA64 clients (CIK).
5825 * Returns 0 for success, errors for failure.
5826 */
5827static int cik_pcie_gart_enable(struct radeon_device *rdev)
5828{
5829	int r, i;
5830
5831	if (rdev->gart.robj == NULL) {
5832		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5833		return -EINVAL;
5834	}
5835	r = radeon_gart_table_vram_pin(rdev);
5836	if (r)
5837		return r;
5838	/* Setup TLB control */
5839	WREG32(MC_VM_MX_L1_TLB_CNTL,
5840	       (0xA << 7) |
5841	       ENABLE_L1_TLB |
5842	       ENABLE_L1_FRAGMENT_PROCESSING |
5843	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5844	       ENABLE_ADVANCED_DRIVER_MODEL |
5845	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5846	/* Setup L2 cache */
5847	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5848	       ENABLE_L2_FRAGMENT_PROCESSING |
5849	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5850	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5851	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5852	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5853	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5854	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5855	       BANK_SELECT(4) |
5856	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5857	/* setup context0 */
5858	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5859	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5860	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5861	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5862			(u32)(rdev->dummy_page.addr >> 12));
5863	WREG32(VM_CONTEXT0_CNTL2, 0);
5864	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5865				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5866
5867	WREG32(0x15D4, 0);
5868	WREG32(0x15D8, 0);
5869	WREG32(0x15DC, 0);
5870
5871	/* restore context1-15 */
5872	/* set vm size, must be a multiple of 4 */
5873	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5874	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5875	for (i = 1; i < 16; i++) {
5876		if (i < 8)
5877			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5878			       rdev->vm_manager.saved_table_addr[i]);
5879		else
5880			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5881			       rdev->vm_manager.saved_table_addr[i]);
5882	}
5883
5884	/* enable context1-15 */
5885	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5886	       (u32)(rdev->dummy_page.addr >> 12));
5887	WREG32(VM_CONTEXT1_CNTL2, 4);
5888	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5889				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5890				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5891				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5892				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5893				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5894				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5895				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5896				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5897				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5898				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5899				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5900				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5901				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5902
5903	if (rdev->family == CHIP_KAVERI) {
5904		u32 tmp = RREG32(CHUB_CONTROL);
5905		tmp &= ~BYPASS_VM;
5906		WREG32(CHUB_CONTROL, tmp);
5907	}
5908
5909	/* XXX SH_MEM regs */
5910	/* where to put LDS, scratch, GPUVM in FSA64 space */
5911	mutex_lock(&rdev->srbm_mutex);
5912	for (i = 0; i < 16; i++) {
5913		cik_srbm_select(rdev, 0, 0, 0, i);
5914		/* CP and shaders */
5915		WREG32(SH_MEM_CONFIG, 0);
5916		WREG32(SH_MEM_APE1_BASE, 1);
5917		WREG32(SH_MEM_APE1_LIMIT, 0);
5918		WREG32(SH_MEM_BASES, 0);
5919		/* SDMA GFX */
5920		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5921		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5922		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5923		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5924		/* XXX SDMA RLC - todo */
5925	}
5926	cik_srbm_select(rdev, 0, 0, 0, 0);
5927	mutex_unlock(&rdev->srbm_mutex);
5928
5929	cik_pcie_init_compute_vmid(rdev);
5930
5931	cik_pcie_gart_tlb_flush(rdev);
5932	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5933		 (unsigned)(rdev->mc.gtt_size >> 20),
5934		 (unsigned long long)rdev->gart.table_addr);
5935	rdev->gart.ready = true;
5936	return 0;
5937}
5938
5939/**
5940 * cik_pcie_gart_disable - gart disable
5941 *
5942 * @rdev: radeon_device pointer
5943 *
5944 * This disables all VM page table (CIK).
5945 */
5946static void cik_pcie_gart_disable(struct radeon_device *rdev)
5947{
5948	unsigned i;
5949
5950	for (i = 1; i < 16; ++i) {
5951		uint32_t reg;
5952		if (i < 8)
5953			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5954		else
5955			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5956		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5957	}
5958
5959	/* Disable all tables */
5960	WREG32(VM_CONTEXT0_CNTL, 0);
5961	WREG32(VM_CONTEXT1_CNTL, 0);
5962	/* Setup TLB control */
5963	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5964	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5965	/* Setup L2 cache */
5966	WREG32(VM_L2_CNTL,
5967	       ENABLE_L2_FRAGMENT_PROCESSING |
5968	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5969	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5970	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5971	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5972	WREG32(VM_L2_CNTL2, 0);
5973	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5974	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5975	radeon_gart_table_vram_unpin(rdev);
5976}
5977
5978/**
5979 * cik_pcie_gart_fini - vm fini callback
5980 *
5981 * @rdev: radeon_device pointer
5982 *
5983 * Tears down the driver GART/VM setup (CIK).
5984 */
5985static void cik_pcie_gart_fini(struct radeon_device *rdev)
5986{
5987	cik_pcie_gart_disable(rdev);
5988	radeon_gart_table_vram_free(rdev);
5989	radeon_gart_fini(rdev);
5990}
5991
5992/* vm parser */
5993/**
5994 * cik_ib_parse - vm ib_parse callback
5995 *
5996 * @rdev: radeon_device pointer
5997 * @ib: indirect buffer pointer
5998 *
5999 * CIK uses hw IB checking so this is a nop (CIK).
6000 */
6001int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
6002{
6003	return 0;
6004}
6005
6006/*
6007 * vm
6008 * VMID 0 is the physical GPU addresses as used by the kernel.
6009 * VMIDs 1-15 are used for userspace clients and are handled
6010 * by the radeon vm/hsa code.
6011 */
6012/**
6013 * cik_vm_init - cik vm init callback
6014 *
6015 * @rdev: radeon_device pointer
6016 *
6017 * Inits cik specific vm parameters (number of VMs, base of vram for
6018 * VMIDs 1-15) (CIK).
6019 * Returns 0 for success.
6020 */
6021int cik_vm_init(struct radeon_device *rdev)
6022{
6023	/*
6024	 * number of VMs
6025	 * VMID 0 is reserved for System
6026	 * radeon graphics/compute will use VMIDs 1-7
6027	 * amdkfd will use VMIDs 8-15
6028	 */
6029	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
6030	/* base offset of vram pages */
6031	if (rdev->flags & RADEON_IS_IGP) {
6032		u64 tmp = RREG32(MC_VM_FB_OFFSET);
6033		tmp <<= 22;
6034		rdev->vm_manager.vram_base_offset = tmp;
6035	} else
6036		rdev->vm_manager.vram_base_offset = 0;
6037
6038	return 0;
6039}
6040
6041/**
6042 * cik_vm_fini - cik vm fini callback
6043 *
6044 * @rdev: radeon_device pointer
6045 *
6046 * Tear down any asic specific VM setup (CIK).
6047 */
6048void cik_vm_fini(struct radeon_device *rdev)
6049{
6050}
6051
6052/**
6053 * cik_vm_decode_fault - print human readable fault info
6054 *
6055 * @rdev: radeon_device pointer
6056 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
6057 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
6058 *
6059 * Print human readable fault information (CIK).
6060 */
6061static void cik_vm_decode_fault(struct radeon_device *rdev,
6062				u32 status, u32 addr, u32 mc_client)
6063{
6064	u32 mc_id;
6065	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
6066	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
6067	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
6068		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
6069
6070	if (rdev->family == CHIP_HAWAII)
6071		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6072	else
6073		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6074
6075	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
6076	       protections, vmid, addr,
6077	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
6078	       block, mc_client, mc_id);
6079}
6080
6081/**
6082 * cik_vm_flush - cik vm flush using the CP
6083 *
6084 * @rdev: radeon_device pointer
6085 *
6086 * Update the page table base and flush the VM TLB
6087 * using the CP (CIK).
6088 */
6089void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
6090		  unsigned vm_id, uint64_t pd_addr)
6091{
6092	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
6093
6094	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6095	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6096				 WRITE_DATA_DST_SEL(0)));
6097	if (vm_id < 8) {
6098		radeon_ring_write(ring,
6099				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
6100	} else {
6101		radeon_ring_write(ring,
6102				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
6103	}
6104	radeon_ring_write(ring, 0);
6105	radeon_ring_write(ring, pd_addr >> 12);
6106
6107	/* update SH_MEM_* regs */
6108	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6109	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6110				 WRITE_DATA_DST_SEL(0)));
6111	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6112	radeon_ring_write(ring, 0);
6113	radeon_ring_write(ring, VMID(vm_id));
6114
6115	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6116	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6117				 WRITE_DATA_DST_SEL(0)));
6118	radeon_ring_write(ring, SH_MEM_BASES >> 2);
6119	radeon_ring_write(ring, 0);
6120
6121	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6122	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6123	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6124	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6125
6126	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6127	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6128				 WRITE_DATA_DST_SEL(0)));
6129	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6130	radeon_ring_write(ring, 0);
6131	radeon_ring_write(ring, VMID(0));
6132
6133	/* HDP flush */
6134	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6135
6136	/* bits 0-15 are the VM contexts0-15 */
6137	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6138	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6139				 WRITE_DATA_DST_SEL(0)));
6140	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6141	radeon_ring_write(ring, 0);
6142	radeon_ring_write(ring, 1 << vm_id);
6143
6144	/* wait for the invalidate to complete */
6145	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6146	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6147				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6148				 WAIT_REG_MEM_ENGINE(0))); /* me */
6149	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6150	radeon_ring_write(ring, 0);
6151	radeon_ring_write(ring, 0); /* ref */
6152	radeon_ring_write(ring, 0); /* mask */
6153	radeon_ring_write(ring, 0x20); /* poll interval */
6154
6155	/* compute doesn't have PFP */
6156	if (usepfp) {
6157		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6158		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6159		radeon_ring_write(ring, 0x0);
6160	}
6161}
6162
6163/*
6164 * RLC
6165 * The RLC is a multi-purpose microengine that handles a
6166 * variety of functions, the most important of which is
6167 * the interrupt controller.
6168 */
6169static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6170					  bool enable)
6171{
6172	u32 tmp = RREG32(CP_INT_CNTL_RING0);
6173
6174	if (enable)
6175		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6176	else
6177		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6178	WREG32(CP_INT_CNTL_RING0, tmp);
6179}
6180
6181static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6182{
6183	u32 tmp;
6184
6185	tmp = RREG32(RLC_LB_CNTL);
6186	if (enable)
6187		tmp |= LOAD_BALANCE_ENABLE;
6188	else
6189		tmp &= ~LOAD_BALANCE_ENABLE;
6190	WREG32(RLC_LB_CNTL, tmp);
6191}
6192
6193static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6194{
6195	u32 i, j, k;
6196	u32 mask;
6197
6198	mutex_lock(&rdev->grbm_idx_mutex);
6199	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6200		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6201			cik_select_se_sh(rdev, i, j);
6202			for (k = 0; k < rdev->usec_timeout; k++) {
6203				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6204					break;
6205				udelay(1);
6206			}
6207		}
6208	}
6209	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6210	mutex_unlock(&rdev->grbm_idx_mutex);
6211
6212	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6213	for (k = 0; k < rdev->usec_timeout; k++) {
6214		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6215			break;
6216		udelay(1);
6217	}
6218}
6219
6220static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6221{
6222	u32 tmp;
6223
6224	tmp = RREG32(RLC_CNTL);
6225	if (tmp != rlc)
6226		WREG32(RLC_CNTL, rlc);
6227}
6228
6229static u32 cik_halt_rlc(struct radeon_device *rdev)
6230{
6231	u32 data, orig;
6232
6233	orig = data = RREG32(RLC_CNTL);
6234
6235	if (data & RLC_ENABLE) {
6236		u32 i;
6237
6238		data &= ~RLC_ENABLE;
6239		WREG32(RLC_CNTL, data);
6240
6241		for (i = 0; i < rdev->usec_timeout; i++) {
6242			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6243				break;
6244			udelay(1);
6245		}
6246
6247		cik_wait_for_rlc_serdes(rdev);
6248	}
6249
6250	return orig;
6251}
6252
6253void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6254{
6255	u32 tmp, i, mask;
6256
6257	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6258	WREG32(RLC_GPR_REG2, tmp);
6259
6260	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6261	for (i = 0; i < rdev->usec_timeout; i++) {
6262		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6263			break;
6264		udelay(1);
6265	}
6266
6267	for (i = 0; i < rdev->usec_timeout; i++) {
6268		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6269			break;
6270		udelay(1);
6271	}
6272}
6273
6274void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6275{
6276	u32 tmp;
6277
6278	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6279	WREG32(RLC_GPR_REG2, tmp);
6280}
6281
6282/**
6283 * cik_rlc_stop - stop the RLC ME
6284 *
6285 * @rdev: radeon_device pointer
6286 *
6287 * Halt the RLC ME (MicroEngine) (CIK).
6288 */
6289static void cik_rlc_stop(struct radeon_device *rdev)
6290{
6291	WREG32(RLC_CNTL, 0);
6292
6293	cik_enable_gui_idle_interrupt(rdev, false);
6294
6295	cik_wait_for_rlc_serdes(rdev);
6296}
6297
6298/**
6299 * cik_rlc_start - start the RLC ME
6300 *
6301 * @rdev: radeon_device pointer
6302 *
6303 * Unhalt the RLC ME (MicroEngine) (CIK).
6304 */
6305static void cik_rlc_start(struct radeon_device *rdev)
6306{
6307	WREG32(RLC_CNTL, RLC_ENABLE);
6308
6309	cik_enable_gui_idle_interrupt(rdev, true);
6310
6311	udelay(50);
6312}
6313
6314/**
6315 * cik_rlc_resume - setup the RLC hw
6316 *
6317 * @rdev: radeon_device pointer
6318 *
6319 * Initialize the RLC registers, load the ucode,
6320 * and start the RLC (CIK).
6321 * Returns 0 for success, -EINVAL if the ucode is not available.
6322 */
6323static int cik_rlc_resume(struct radeon_device *rdev)
6324{
6325	u32 i, size, tmp;
6326
6327	if (!rdev->rlc_fw)
6328		return -EINVAL;
6329
6330	cik_rlc_stop(rdev);
6331
6332	/* disable CG */
6333	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6334	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6335
6336	si_rlc_reset(rdev);
6337
6338	cik_init_pg(rdev);
6339
6340	cik_init_cg(rdev);
6341
6342	WREG32(RLC_LB_CNTR_INIT, 0);
6343	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6344
6345	mutex_lock(&rdev->grbm_idx_mutex);
6346	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6347	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6348	WREG32(RLC_LB_PARAMS, 0x00600408);
6349	WREG32(RLC_LB_CNTL, 0x80000004);
6350	mutex_unlock(&rdev->grbm_idx_mutex);
6351
6352	WREG32(RLC_MC_CNTL, 0);
6353	WREG32(RLC_UCODE_CNTL, 0);
6354
6355	if (rdev->new_fw) {
6356		const struct rlc_firmware_header_v1_0 *hdr =
6357			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6358		const __le32 *fw_data = (const __le32 *)
6359			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6360
6361		radeon_ucode_print_rlc_hdr(&hdr->header);
6362
6363		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6364		WREG32(RLC_GPM_UCODE_ADDR, 0);
6365		for (i = 0; i < size; i++)
6366			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6367		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6368	} else {
6369		const __be32 *fw_data;
6370
6371		switch (rdev->family) {
6372		case CHIP_BONAIRE:
6373		case CHIP_HAWAII:
6374		default:
6375			size = BONAIRE_RLC_UCODE_SIZE;
6376			break;
6377		case CHIP_KAVERI:
6378			size = KV_RLC_UCODE_SIZE;
6379			break;
6380		case CHIP_KABINI:
6381			size = KB_RLC_UCODE_SIZE;
6382			break;
6383		case CHIP_MULLINS:
6384			size = ML_RLC_UCODE_SIZE;
6385			break;
6386		}
6387
6388		fw_data = (const __be32 *)rdev->rlc_fw->data;
6389		WREG32(RLC_GPM_UCODE_ADDR, 0);
6390		for (i = 0; i < size; i++)
6391			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6392		WREG32(RLC_GPM_UCODE_ADDR, 0);
6393	}
6394
6395	/* XXX - find out what chips support lbpw */
6396	cik_enable_lbpw(rdev, false);
6397
6398	if (rdev->family == CHIP_BONAIRE)
6399		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6400
6401	cik_rlc_start(rdev);
6402
6403	return 0;
6404}
6405
6406static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6407{
6408	u32 data, orig, tmp, tmp2;
6409
6410	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6411
6412	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6413		cik_enable_gui_idle_interrupt(rdev, true);
6414
6415		tmp = cik_halt_rlc(rdev);
6416
6417		mutex_lock(&rdev->grbm_idx_mutex);
6418		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6419		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6420		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6421		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6422		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6423		mutex_unlock(&rdev->grbm_idx_mutex);
6424
6425		cik_update_rlc(rdev, tmp);
6426
6427		data |= CGCG_EN | CGLS_EN;
6428	} else {
6429		cik_enable_gui_idle_interrupt(rdev, false);
6430
6431		RREG32(CB_CGTT_SCLK_CTRL);
6432		RREG32(CB_CGTT_SCLK_CTRL);
6433		RREG32(CB_CGTT_SCLK_CTRL);
6434		RREG32(CB_CGTT_SCLK_CTRL);
6435
6436		data &= ~(CGCG_EN | CGLS_EN);
6437	}
6438
6439	if (orig != data)
6440		WREG32(RLC_CGCG_CGLS_CTRL, data);
6441
6442}
6443
6444static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6445{
6446	u32 data, orig, tmp = 0;
6447
6448	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6449		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6450			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6451				orig = data = RREG32(CP_MEM_SLP_CNTL);
6452				data |= CP_MEM_LS_EN;
6453				if (orig != data)
6454					WREG32(CP_MEM_SLP_CNTL, data);
6455			}
6456		}
6457
6458		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6459		data |= 0x00000001;
6460		data &= 0xfffffffd;
6461		if (orig != data)
6462			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6463
6464		tmp = cik_halt_rlc(rdev);
6465
6466		mutex_lock(&rdev->grbm_idx_mutex);
6467		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6468		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6469		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6470		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6471		WREG32(RLC_SERDES_WR_CTRL, data);
6472		mutex_unlock(&rdev->grbm_idx_mutex);
6473
6474		cik_update_rlc(rdev, tmp);
6475
6476		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6477			orig = data = RREG32(CGTS_SM_CTRL_REG);
6478			data &= ~SM_MODE_MASK;
6479			data |= SM_MODE(0x2);
6480			data |= SM_MODE_ENABLE;
6481			data &= ~CGTS_OVERRIDE;
6482			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6483			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6484				data &= ~CGTS_LS_OVERRIDE;
6485			data &= ~ON_MONITOR_ADD_MASK;
6486			data |= ON_MONITOR_ADD_EN;
6487			data |= ON_MONITOR_ADD(0x96);
6488			if (orig != data)
6489				WREG32(CGTS_SM_CTRL_REG, data);
6490		}
6491	} else {
6492		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6493		data |= 0x00000003;
6494		if (orig != data)
6495			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6496
6497		data = RREG32(RLC_MEM_SLP_CNTL);
6498		if (data & RLC_MEM_LS_EN) {
6499			data &= ~RLC_MEM_LS_EN;
6500			WREG32(RLC_MEM_SLP_CNTL, data);
6501		}
6502
6503		data = RREG32(CP_MEM_SLP_CNTL);
6504		if (data & CP_MEM_LS_EN) {
6505			data &= ~CP_MEM_LS_EN;
6506			WREG32(CP_MEM_SLP_CNTL, data);
6507		}
6508
6509		orig = data = RREG32(CGTS_SM_CTRL_REG);
6510		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6511		if (orig != data)
6512			WREG32(CGTS_SM_CTRL_REG, data);
6513
6514		tmp = cik_halt_rlc(rdev);
6515
6516		mutex_lock(&rdev->grbm_idx_mutex);
6517		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6518		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6519		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6520		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6521		WREG32(RLC_SERDES_WR_CTRL, data);
6522		mutex_unlock(&rdev->grbm_idx_mutex);
6523
6524		cik_update_rlc(rdev, tmp);
6525	}
6526}
6527
6528static const u32 mc_cg_registers[] =
6529{
6530	MC_HUB_MISC_HUB_CG,
6531	MC_HUB_MISC_SIP_CG,
6532	MC_HUB_MISC_VM_CG,
6533	MC_XPB_CLK_GAT,
6534	ATC_MISC_CG,
6535	MC_CITF_MISC_WR_CG,
6536	MC_CITF_MISC_RD_CG,
6537	MC_CITF_MISC_VM_CG,
6538	VM_L2_CG,
6539};
6540
6541static void cik_enable_mc_ls(struct radeon_device *rdev,
6542			     bool enable)
6543{
6544	int i;
6545	u32 orig, data;
6546
6547	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6548		orig = data = RREG32(mc_cg_registers[i]);
6549		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6550			data |= MC_LS_ENABLE;
6551		else
6552			data &= ~MC_LS_ENABLE;
6553		if (data != orig)
6554			WREG32(mc_cg_registers[i], data);
6555	}
6556}
6557
6558static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6559			       bool enable)
6560{
6561	int i;
6562	u32 orig, data;
6563
6564	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6565		orig = data = RREG32(mc_cg_registers[i]);
6566		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6567			data |= MC_CG_ENABLE;
6568		else
6569			data &= ~MC_CG_ENABLE;
6570		if (data != orig)
6571			WREG32(mc_cg_registers[i], data);
6572	}
6573}
6574
6575static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6576				 bool enable)
6577{
6578	u32 orig, data;
6579
6580	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6581		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6582		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6583	} else {
6584		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6585		data |= 0xff000000;
6586		if (data != orig)
6587			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6588
6589		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6590		data |= 0xff000000;
6591		if (data != orig)
6592			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6593	}
6594}
6595
6596static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6597				 bool enable)
6598{
6599	u32 orig, data;
6600
6601	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6602		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6603		data |= 0x100;
6604		if (orig != data)
6605			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6606
6607		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6608		data |= 0x100;
6609		if (orig != data)
6610			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6611	} else {
6612		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6613		data &= ~0x100;
6614		if (orig != data)
6615			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6616
6617		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6618		data &= ~0x100;
6619		if (orig != data)
6620			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6621	}
6622}
6623
6624static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6625				bool enable)
6626{
6627	u32 orig, data;
6628
6629	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6630		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6631		data = 0xfff;
6632		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6633
6634		orig = data = RREG32(UVD_CGC_CTRL);
6635		data |= DCM;
6636		if (orig != data)
6637			WREG32(UVD_CGC_CTRL, data);
6638	} else {
6639		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6640		data &= ~0xfff;
6641		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6642
6643		orig = data = RREG32(UVD_CGC_CTRL);
6644		data &= ~DCM;
6645		if (orig != data)
6646			WREG32(UVD_CGC_CTRL, data);
6647	}
6648}
6649
6650static void cik_enable_bif_mgls(struct radeon_device *rdev,
6651			       bool enable)
6652{
6653	u32 orig, data;
6654
6655	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6656
6657	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6658		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6659			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6660	else
6661		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6662			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6663
6664	if (orig != data)
6665		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6666}
6667
6668static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6669				bool enable)
6670{
6671	u32 orig, data;
6672
6673	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6674
6675	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6676		data &= ~CLOCK_GATING_DIS;
6677	else
6678		data |= CLOCK_GATING_DIS;
6679
6680	if (orig != data)
6681		WREG32(HDP_HOST_PATH_CNTL, data);
6682}
6683
6684static void cik_enable_hdp_ls(struct radeon_device *rdev,
6685			      bool enable)
6686{
6687	u32 orig, data;
6688
6689	orig = data = RREG32(HDP_MEM_POWER_LS);
6690
6691	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6692		data |= HDP_LS_ENABLE;
6693	else
6694		data &= ~HDP_LS_ENABLE;
6695
6696	if (orig != data)
6697		WREG32(HDP_MEM_POWER_LS, data);
6698}
6699
6700void cik_update_cg(struct radeon_device *rdev,
6701		   u32 block, bool enable)
6702{
6703
6704	if (block & RADEON_CG_BLOCK_GFX) {
6705		cik_enable_gui_idle_interrupt(rdev, false);
6706		/* order matters! */
6707		if (enable) {
6708			cik_enable_mgcg(rdev, true);
6709			cik_enable_cgcg(rdev, true);
6710		} else {
6711			cik_enable_cgcg(rdev, false);
6712			cik_enable_mgcg(rdev, false);
6713		}
6714		cik_enable_gui_idle_interrupt(rdev, true);
6715	}
6716
6717	if (block & RADEON_CG_BLOCK_MC) {
6718		if (!(rdev->flags & RADEON_IS_IGP)) {
6719			cik_enable_mc_mgcg(rdev, enable);
6720			cik_enable_mc_ls(rdev, enable);
6721		}
6722	}
6723
6724	if (block & RADEON_CG_BLOCK_SDMA) {
6725		cik_enable_sdma_mgcg(rdev, enable);
6726		cik_enable_sdma_mgls(rdev, enable);
6727	}
6728
6729	if (block & RADEON_CG_BLOCK_BIF) {
6730		cik_enable_bif_mgls(rdev, enable);
6731	}
6732
6733	if (block & RADEON_CG_BLOCK_UVD) {
6734		if (rdev->has_uvd)
6735			cik_enable_uvd_mgcg(rdev, enable);
6736	}
6737
6738	if (block & RADEON_CG_BLOCK_HDP) {
6739		cik_enable_hdp_mgcg(rdev, enable);
6740		cik_enable_hdp_ls(rdev, enable);
6741	}
6742
6743	if (block & RADEON_CG_BLOCK_VCE) {
6744		vce_v2_0_enable_mgcg(rdev, enable);
6745	}
6746}
6747
6748static void cik_init_cg(struct radeon_device *rdev)
6749{
6750
6751	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6752
6753	if (rdev->has_uvd)
6754		si_init_uvd_internal_cg(rdev);
6755
6756	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6757			     RADEON_CG_BLOCK_SDMA |
6758			     RADEON_CG_BLOCK_BIF |
6759			     RADEON_CG_BLOCK_UVD |
6760			     RADEON_CG_BLOCK_HDP), true);
6761}
6762
6763static void cik_fini_cg(struct radeon_device *rdev)
6764{
6765	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6766			     RADEON_CG_BLOCK_SDMA |
6767			     RADEON_CG_BLOCK_BIF |
6768			     RADEON_CG_BLOCK_UVD |
6769			     RADEON_CG_BLOCK_HDP), false);
6770
6771	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6772}
6773
6774static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6775					  bool enable)
6776{
6777	u32 data, orig;
6778
6779	orig = data = RREG32(RLC_PG_CNTL);
6780	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6781		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6782	else
6783		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6784	if (orig != data)
6785		WREG32(RLC_PG_CNTL, data);
6786}
6787
6788static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6789					  bool enable)
6790{
6791	u32 data, orig;
6792
6793	orig = data = RREG32(RLC_PG_CNTL);
6794	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6795		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6796	else
6797		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6798	if (orig != data)
6799		WREG32(RLC_PG_CNTL, data);
6800}
6801
6802static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6803{
6804	u32 data, orig;
6805
6806	orig = data = RREG32(RLC_PG_CNTL);
6807	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6808		data &= ~DISABLE_CP_PG;
6809	else
6810		data |= DISABLE_CP_PG;
6811	if (orig != data)
6812		WREG32(RLC_PG_CNTL, data);
6813}
6814
6815static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6816{
6817	u32 data, orig;
6818
6819	orig = data = RREG32(RLC_PG_CNTL);
6820	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6821		data &= ~DISABLE_GDS_PG;
6822	else
6823		data |= DISABLE_GDS_PG;
6824	if (orig != data)
6825		WREG32(RLC_PG_CNTL, data);
6826}
6827
6828#define CP_ME_TABLE_SIZE    96
6829#define CP_ME_TABLE_OFFSET  2048
6830#define CP_MEC_TABLE_OFFSET 4096
6831
6832void cik_init_cp_pg_table(struct radeon_device *rdev)
6833{
6834	volatile u32 *dst_ptr;
6835	int me, i, max_me = 4;
6836	u32 bo_offset = 0;
6837	u32 table_offset, table_size;
6838
6839	if (rdev->family == CHIP_KAVERI)
6840		max_me = 5;
6841
6842	if (rdev->rlc.cp_table_ptr == NULL)
6843		return;
6844
6845	/* write the cp table buffer */
6846	dst_ptr = rdev->rlc.cp_table_ptr;
6847	for (me = 0; me < max_me; me++) {
6848		if (rdev->new_fw) {
6849			const __le32 *fw_data;
6850			const struct gfx_firmware_header_v1_0 *hdr;
6851
6852			if (me == 0) {
6853				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6854				fw_data = (const __le32 *)
6855					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6856				table_offset = le32_to_cpu(hdr->jt_offset);
6857				table_size = le32_to_cpu(hdr->jt_size);
6858			} else if (me == 1) {
6859				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6860				fw_data = (const __le32 *)
6861					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6862				table_offset = le32_to_cpu(hdr->jt_offset);
6863				table_size = le32_to_cpu(hdr->jt_size);
6864			} else if (me == 2) {
6865				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6866				fw_data = (const __le32 *)
6867					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6868				table_offset = le32_to_cpu(hdr->jt_offset);
6869				table_size = le32_to_cpu(hdr->jt_size);
6870			} else if (me == 3) {
6871				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6872				fw_data = (const __le32 *)
6873					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6874				table_offset = le32_to_cpu(hdr->jt_offset);
6875				table_size = le32_to_cpu(hdr->jt_size);
6876			} else {
6877				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6878				fw_data = (const __le32 *)
6879					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6880				table_offset = le32_to_cpu(hdr->jt_offset);
6881				table_size = le32_to_cpu(hdr->jt_size);
6882			}
6883
6884			for (i = 0; i < table_size; i ++) {
6885				dst_ptr[bo_offset + i] =
6886					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6887			}
6888			bo_offset += table_size;
6889		} else {
6890			const __be32 *fw_data;
6891			table_size = CP_ME_TABLE_SIZE;
6892
6893			if (me == 0) {
6894				fw_data = (const __be32 *)rdev->ce_fw->data;
6895				table_offset = CP_ME_TABLE_OFFSET;
6896			} else if (me == 1) {
6897				fw_data = (const __be32 *)rdev->pfp_fw->data;
6898				table_offset = CP_ME_TABLE_OFFSET;
6899			} else if (me == 2) {
6900				fw_data = (const __be32 *)rdev->me_fw->data;
6901				table_offset = CP_ME_TABLE_OFFSET;
6902			} else {
6903				fw_data = (const __be32 *)rdev->mec_fw->data;
6904				table_offset = CP_MEC_TABLE_OFFSET;
6905			}
6906
6907			for (i = 0; i < table_size; i ++) {
6908				dst_ptr[bo_offset + i] =
6909					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6910			}
6911			bo_offset += table_size;
6912		}
6913	}
6914}
6915
6916static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6917				bool enable)
6918{
6919	u32 data, orig;
6920
6921	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6922		orig = data = RREG32(RLC_PG_CNTL);
6923		data |= GFX_PG_ENABLE;
6924		if (orig != data)
6925			WREG32(RLC_PG_CNTL, data);
6926
6927		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6928		data |= AUTO_PG_EN;
6929		if (orig != data)
6930			WREG32(RLC_AUTO_PG_CTRL, data);
6931	} else {
6932		orig = data = RREG32(RLC_PG_CNTL);
6933		data &= ~GFX_PG_ENABLE;
6934		if (orig != data)
6935			WREG32(RLC_PG_CNTL, data);
6936
6937		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6938		data &= ~AUTO_PG_EN;
6939		if (orig != data)
6940			WREG32(RLC_AUTO_PG_CTRL, data);
6941
6942		data = RREG32(DB_RENDER_CONTROL);
6943	}
6944}
6945
6946static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6947{
6948	u32 mask = 0, tmp, tmp1;
6949	int i;
6950
6951	mutex_lock(&rdev->grbm_idx_mutex);
6952	cik_select_se_sh(rdev, se, sh);
6953	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6954	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6955	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6956	mutex_unlock(&rdev->grbm_idx_mutex);
6957
6958	tmp &= 0xffff0000;
6959
6960	tmp |= tmp1;
6961	tmp >>= 16;
6962
6963	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6964		mask <<= 1;
6965		mask |= 1;
6966	}
6967
6968	return (~tmp) & mask;
6969}
6970
6971static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6972{
6973	u32 i, j, k, active_cu_number = 0;
6974	u32 mask, counter, cu_bitmap;
6975	u32 tmp = 0;
6976
6977	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6978		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6979			mask = 1;
6980			cu_bitmap = 0;
6981			counter = 0;
6982			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6983				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6984					if (counter < 2)
6985						cu_bitmap |= mask;
6986					counter ++;
6987				}
6988				mask <<= 1;
6989			}
6990
6991			active_cu_number += counter;
6992			tmp |= (cu_bitmap << (i * 16 + j * 8));
6993		}
6994	}
6995
6996	WREG32(RLC_PG_AO_CU_MASK, tmp);
6997
6998	tmp = RREG32(RLC_MAX_PG_CU);
6999	tmp &= ~MAX_PU_CU_MASK;
7000	tmp |= MAX_PU_CU(active_cu_number);
7001	WREG32(RLC_MAX_PG_CU, tmp);
7002}
7003
7004static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
7005				       bool enable)
7006{
7007	u32 data, orig;
7008
7009	orig = data = RREG32(RLC_PG_CNTL);
7010	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
7011		data |= STATIC_PER_CU_PG_ENABLE;
7012	else
7013		data &= ~STATIC_PER_CU_PG_ENABLE;
7014	if (orig != data)
7015		WREG32(RLC_PG_CNTL, data);
7016}
7017
7018static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
7019					bool enable)
7020{
7021	u32 data, orig;
7022
7023	orig = data = RREG32(RLC_PG_CNTL);
7024	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
7025		data |= DYN_PER_CU_PG_ENABLE;
7026	else
7027		data &= ~DYN_PER_CU_PG_ENABLE;
7028	if (orig != data)
7029		WREG32(RLC_PG_CNTL, data);
7030}
7031
7032#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
7033#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
7034
7035static void cik_init_gfx_cgpg(struct radeon_device *rdev)
7036{
7037	u32 data, orig;
7038	u32 i;
7039
7040	if (rdev->rlc.cs_data) {
7041		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7042		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
7043		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
7044		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
7045	} else {
7046		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7047		for (i = 0; i < 3; i++)
7048			WREG32(RLC_GPM_SCRATCH_DATA, 0);
7049	}
7050	if (rdev->rlc.reg_list) {
7051		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
7052		for (i = 0; i < rdev->rlc.reg_list_size; i++)
7053			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
7054	}
7055
7056	orig = data = RREG32(RLC_PG_CNTL);
7057	data |= GFX_PG_SRC;
7058	if (orig != data)
7059		WREG32(RLC_PG_CNTL, data);
7060
7061	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
7062	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
7063
7064	data = RREG32(CP_RB_WPTR_POLL_CNTL);
7065	data &= ~IDLE_POLL_COUNT_MASK;
7066	data |= IDLE_POLL_COUNT(0x60);
7067	WREG32(CP_RB_WPTR_POLL_CNTL, data);
7068
7069	data = 0x10101010;
7070	WREG32(RLC_PG_DELAY, data);
7071
7072	data = RREG32(RLC_PG_DELAY_2);
7073	data &= ~0xff;
7074	data |= 0x3;
7075	WREG32(RLC_PG_DELAY_2, data);
7076
7077	data = RREG32(RLC_AUTO_PG_CTRL);
7078	data &= ~GRBM_REG_SGIT_MASK;
7079	data |= GRBM_REG_SGIT(0x700);
7080	WREG32(RLC_AUTO_PG_CTRL, data);
7081
7082}
7083
7084static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
7085{
7086	cik_enable_gfx_cgpg(rdev, enable);
7087	cik_enable_gfx_static_mgpg(rdev, enable);
7088	cik_enable_gfx_dynamic_mgpg(rdev, enable);
7089}
7090
7091u32 cik_get_csb_size(struct radeon_device *rdev)
7092{
7093	u32 count = 0;
7094	const struct cs_section_def *sect = NULL;
7095	const struct cs_extent_def *ext = NULL;
7096
7097	if (rdev->rlc.cs_data == NULL)
7098		return 0;
7099
7100	/* begin clear state */
7101	count += 2;
7102	/* context control state */
7103	count += 3;
7104
7105	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7106		for (ext = sect->section; ext->extent != NULL; ++ext) {
7107			if (sect->id == SECT_CONTEXT)
7108				count += 2 + ext->reg_count;
7109			else
7110				return 0;
7111		}
7112	}
7113	/* pa_sc_raster_config/pa_sc_raster_config1 */
7114	count += 4;
7115	/* end clear state */
7116	count += 2;
7117	/* clear state */
7118	count += 2;
7119
7120	return count;
7121}
7122
7123void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7124{
7125	u32 count = 0, i;
7126	const struct cs_section_def *sect = NULL;
7127	const struct cs_extent_def *ext = NULL;
7128
7129	if (rdev->rlc.cs_data == NULL)
7130		return;
7131	if (buffer == NULL)
7132		return;
7133
7134	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7135	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7136
7137	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7138	buffer[count++] = cpu_to_le32(0x80000000);
7139	buffer[count++] = cpu_to_le32(0x80000000);
7140
7141	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7142		for (ext = sect->section; ext->extent != NULL; ++ext) {
7143			if (sect->id == SECT_CONTEXT) {
7144				buffer[count++] =
7145					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7146				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7147				for (i = 0; i < ext->reg_count; i++)
7148					buffer[count++] = cpu_to_le32(ext->extent[i]);
7149			} else {
7150				return;
7151			}
7152		}
7153	}
7154
7155	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7156	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7157	switch (rdev->family) {
7158	case CHIP_BONAIRE:
7159		buffer[count++] = cpu_to_le32(0x16000012);
7160		buffer[count++] = cpu_to_le32(0x00000000);
7161		break;
7162	case CHIP_KAVERI:
7163		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7164		buffer[count++] = cpu_to_le32(0x00000000);
7165		break;
7166	case CHIP_KABINI:
7167	case CHIP_MULLINS:
7168		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7169		buffer[count++] = cpu_to_le32(0x00000000);
7170		break;
7171	case CHIP_HAWAII:
7172		buffer[count++] = cpu_to_le32(0x3a00161a);
7173		buffer[count++] = cpu_to_le32(0x0000002e);
7174		break;
7175	default:
7176		buffer[count++] = cpu_to_le32(0x00000000);
7177		buffer[count++] = cpu_to_le32(0x00000000);
7178		break;
7179	}
7180
7181	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7182	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7183
7184	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7185	buffer[count++] = cpu_to_le32(0);
7186}
7187
7188static void cik_init_pg(struct radeon_device *rdev)
7189{
7190	if (rdev->pg_flags) {
7191		cik_enable_sck_slowdown_on_pu(rdev, true);
7192		cik_enable_sck_slowdown_on_pd(rdev, true);
7193		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7194			cik_init_gfx_cgpg(rdev);
7195			cik_enable_cp_pg(rdev, true);
7196			cik_enable_gds_pg(rdev, true);
7197		}
7198		cik_init_ao_cu_mask(rdev);
7199		cik_update_gfx_pg(rdev, true);
7200	}
7201}
7202
7203static void cik_fini_pg(struct radeon_device *rdev)
7204{
7205	if (rdev->pg_flags) {
7206		cik_update_gfx_pg(rdev, false);
7207		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7208			cik_enable_cp_pg(rdev, false);
7209			cik_enable_gds_pg(rdev, false);
7210		}
7211	}
7212}
7213
7214/*
7215 * Interrupts
7216 * Starting with r6xx, interrupts are handled via a ring buffer.
7217 * Ring buffers are areas of GPU accessible memory that the GPU
7218 * writes interrupt vectors into and the host reads vectors out of.
7219 * There is a rptr (read pointer) that determines where the
7220 * host is currently reading, and a wptr (write pointer)
7221 * which determines where the GPU has written.  When the
7222 * pointers are equal, the ring is idle.  When the GPU
7223 * writes vectors to the ring buffer, it increments the
7224 * wptr.  When there is an interrupt, the host then starts
7225 * fetching commands and processing them until the pointers are
7226 * equal again at which point it updates the rptr.
7227 */
7228
7229/**
7230 * cik_enable_interrupts - Enable the interrupt ring buffer
7231 *
7232 * @rdev: radeon_device pointer
7233 *
7234 * Enable the interrupt ring buffer (CIK).
7235 */
7236static void cik_enable_interrupts(struct radeon_device *rdev)
7237{
7238	u32 ih_cntl = RREG32(IH_CNTL);
7239	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7240
7241	ih_cntl |= ENABLE_INTR;
7242	ih_rb_cntl |= IH_RB_ENABLE;
7243	WREG32(IH_CNTL, ih_cntl);
7244	WREG32(IH_RB_CNTL, ih_rb_cntl);
7245	rdev->ih.enabled = true;
7246}
7247
7248/**
7249 * cik_disable_interrupts - Disable the interrupt ring buffer
7250 *
7251 * @rdev: radeon_device pointer
7252 *
7253 * Disable the interrupt ring buffer (CIK).
7254 */
7255static void cik_disable_interrupts(struct radeon_device *rdev)
7256{
7257	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7258	u32 ih_cntl = RREG32(IH_CNTL);
7259
7260	ih_rb_cntl &= ~IH_RB_ENABLE;
7261	ih_cntl &= ~ENABLE_INTR;
7262	WREG32(IH_RB_CNTL, ih_rb_cntl);
7263	WREG32(IH_CNTL, ih_cntl);
7264	/* set rptr, wptr to 0 */
7265	WREG32(IH_RB_RPTR, 0);
7266	WREG32(IH_RB_WPTR, 0);
7267	rdev->ih.enabled = false;
7268	rdev->ih.rptr = 0;
7269}
7270
7271/**
7272 * cik_disable_interrupt_state - Disable all interrupt sources
7273 *
7274 * @rdev: radeon_device pointer
7275 *
7276 * Clear all interrupt enable bits used by the driver (CIK).
7277 */
7278static void cik_disable_interrupt_state(struct radeon_device *rdev)
7279{
7280	u32 tmp;
7281
7282	/* gfx ring */
7283	tmp = RREG32(CP_INT_CNTL_RING0) &
7284		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7285	WREG32(CP_INT_CNTL_RING0, tmp);
7286	/* sdma */
7287	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7288	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7289	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7290	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7291	/* compute queues */
7292	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7293	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7294	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7295	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7296	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7297	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7298	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7299	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7300	/* grbm */
7301	WREG32(GRBM_INT_CNTL, 0);
7302	/* SRBM */
7303	WREG32(SRBM_INT_CNTL, 0);
7304	/* vline/vblank, etc. */
7305	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7306	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7307	if (rdev->num_crtc >= 4) {
7308		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7309		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7310	}
7311	if (rdev->num_crtc >= 6) {
7312		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7313		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7314	}
7315	/* pflip */
7316	if (rdev->num_crtc >= 2) {
7317		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7318		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7319	}
7320	if (rdev->num_crtc >= 4) {
7321		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7322		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7323	}
7324	if (rdev->num_crtc >= 6) {
7325		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7326		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7327	}
7328
7329	/* dac hotplug */
7330	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7331
7332	/* digital hotplug */
7333	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7334	WREG32(DC_HPD1_INT_CONTROL, tmp);
7335	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7336	WREG32(DC_HPD2_INT_CONTROL, tmp);
7337	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7338	WREG32(DC_HPD3_INT_CONTROL, tmp);
7339	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7340	WREG32(DC_HPD4_INT_CONTROL, tmp);
7341	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7342	WREG32(DC_HPD5_INT_CONTROL, tmp);
7343	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7344	WREG32(DC_HPD6_INT_CONTROL, tmp);
7345
7346}
7347
7348/**
7349 * cik_irq_init - init and enable the interrupt ring
7350 *
7351 * @rdev: radeon_device pointer
7352 *
7353 * Allocate a ring buffer for the interrupt controller,
7354 * enable the RLC, disable interrupts, enable the IH
7355 * ring buffer and enable it (CIK).
7356 * Called at device load and reume.
7357 * Returns 0 for success, errors for failure.
7358 */
7359static int cik_irq_init(struct radeon_device *rdev)
7360{
7361	int ret = 0;
7362	int rb_bufsz;
7363	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7364
7365	/* allocate ring */
7366	ret = r600_ih_ring_alloc(rdev);
7367	if (ret)
7368		return ret;
7369
7370	/* disable irqs */
7371	cik_disable_interrupts(rdev);
7372
7373	/* init rlc */
7374	ret = cik_rlc_resume(rdev);
7375	if (ret) {
7376		r600_ih_ring_fini(rdev);
7377		return ret;
7378	}
7379
7380	/* setup interrupt control */
7381	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7382	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7383	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7384	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7385	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7386	 */
7387	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7388	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7389	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7390	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7391
7392	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7393	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7394
7395	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7396		      IH_WPTR_OVERFLOW_CLEAR |
7397		      (rb_bufsz << 1));
7398
7399	if (rdev->wb.enabled)
7400		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7401
7402	/* set the writeback address whether it's enabled or not */
7403	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7404	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7405
7406	WREG32(IH_RB_CNTL, ih_rb_cntl);
7407
7408	/* set rptr, wptr to 0 */
7409	WREG32(IH_RB_RPTR, 0);
7410	WREG32(IH_RB_WPTR, 0);
7411
7412	/* Default settings for IH_CNTL (disabled at first) */
7413	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7414	/* RPTR_REARM only works if msi's are enabled */
7415	if (rdev->msi_enabled)
7416		ih_cntl |= RPTR_REARM;
7417	WREG32(IH_CNTL, ih_cntl);
7418
7419	/* force the active interrupt state to all disabled */
7420	cik_disable_interrupt_state(rdev);
7421
7422	pci_set_master(rdev->pdev);
7423
7424	/* enable irqs */
7425	cik_enable_interrupts(rdev);
7426
7427	return ret;
7428}
7429
7430/**
7431 * cik_irq_set - enable/disable interrupt sources
7432 *
7433 * @rdev: radeon_device pointer
7434 *
7435 * Enable interrupt sources on the GPU (vblanks, hpd,
7436 * etc.) (CIK).
7437 * Returns 0 for success, errors for failure.
7438 */
7439int cik_irq_set(struct radeon_device *rdev)
7440{
7441	u32 cp_int_cntl;
7442	u32 cp_m1p0;
7443	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7444	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7445	u32 grbm_int_cntl = 0;
7446	u32 dma_cntl, dma_cntl1;
7447
7448	if (!rdev->irq.installed) {
7449		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7450		return -EINVAL;
7451	}
7452	/* don't enable anything if the ih is disabled */
7453	if (!rdev->ih.enabled) {
7454		cik_disable_interrupts(rdev);
7455		/* force the active interrupt state to all disabled */
7456		cik_disable_interrupt_state(rdev);
7457		return 0;
7458	}
7459
7460	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7461		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7462	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7463
7464	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7465	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7466	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7467	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7468	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7469	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7470
7471	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7472	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7473
7474	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7475
7476	/* enable CP interrupts on all rings */
7477	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7478		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7479		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7480	}
7481	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7482		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7483		DRM_DEBUG("si_irq_set: sw int cp1\n");
7484		if (ring->me == 1) {
7485			switch (ring->pipe) {
7486			case 0:
7487				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7488				break;
7489			default:
7490				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7491				break;
7492			}
7493		} else {
7494			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7495		}
7496	}
7497	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7498		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7499		DRM_DEBUG("si_irq_set: sw int cp2\n");
7500		if (ring->me == 1) {
7501			switch (ring->pipe) {
7502			case 0:
7503				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7504				break;
7505			default:
7506				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7507				break;
7508			}
7509		} else {
7510			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7511		}
7512	}
7513
7514	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7515		DRM_DEBUG("cik_irq_set: sw int dma\n");
7516		dma_cntl |= TRAP_ENABLE;
7517	}
7518
7519	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7520		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7521		dma_cntl1 |= TRAP_ENABLE;
7522	}
7523
7524	if (rdev->irq.crtc_vblank_int[0] ||
7525	    atomic_read(&rdev->irq.pflip[0])) {
7526		DRM_DEBUG("cik_irq_set: vblank 0\n");
7527		crtc1 |= VBLANK_INTERRUPT_MASK;
7528	}
7529	if (rdev->irq.crtc_vblank_int[1] ||
7530	    atomic_read(&rdev->irq.pflip[1])) {
7531		DRM_DEBUG("cik_irq_set: vblank 1\n");
7532		crtc2 |= VBLANK_INTERRUPT_MASK;
7533	}
7534	if (rdev->irq.crtc_vblank_int[2] ||
7535	    atomic_read(&rdev->irq.pflip[2])) {
7536		DRM_DEBUG("cik_irq_set: vblank 2\n");
7537		crtc3 |= VBLANK_INTERRUPT_MASK;
7538	}
7539	if (rdev->irq.crtc_vblank_int[3] ||
7540	    atomic_read(&rdev->irq.pflip[3])) {
7541		DRM_DEBUG("cik_irq_set: vblank 3\n");
7542		crtc4 |= VBLANK_INTERRUPT_MASK;
7543	}
7544	if (rdev->irq.crtc_vblank_int[4] ||
7545	    atomic_read(&rdev->irq.pflip[4])) {
7546		DRM_DEBUG("cik_irq_set: vblank 4\n");
7547		crtc5 |= VBLANK_INTERRUPT_MASK;
7548	}
7549	if (rdev->irq.crtc_vblank_int[5] ||
7550	    atomic_read(&rdev->irq.pflip[5])) {
7551		DRM_DEBUG("cik_irq_set: vblank 5\n");
7552		crtc6 |= VBLANK_INTERRUPT_MASK;
7553	}
7554	if (rdev->irq.hpd[0]) {
7555		DRM_DEBUG("cik_irq_set: hpd 1\n");
7556		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7557	}
7558	if (rdev->irq.hpd[1]) {
7559		DRM_DEBUG("cik_irq_set: hpd 2\n");
7560		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7561	}
7562	if (rdev->irq.hpd[2]) {
7563		DRM_DEBUG("cik_irq_set: hpd 3\n");
7564		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7565	}
7566	if (rdev->irq.hpd[3]) {
7567		DRM_DEBUG("cik_irq_set: hpd 4\n");
7568		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7569	}
7570	if (rdev->irq.hpd[4]) {
7571		DRM_DEBUG("cik_irq_set: hpd 5\n");
7572		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7573	}
7574	if (rdev->irq.hpd[5]) {
7575		DRM_DEBUG("cik_irq_set: hpd 6\n");
7576		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7577	}
7578
7579	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7580
7581	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7582	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7583
7584	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7585
7586	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7587
7588	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7589	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7590	if (rdev->num_crtc >= 4) {
7591		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7592		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7593	}
7594	if (rdev->num_crtc >= 6) {
7595		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7596		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7597	}
7598
7599	if (rdev->num_crtc >= 2) {
7600		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7601		       GRPH_PFLIP_INT_MASK);
7602		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7603		       GRPH_PFLIP_INT_MASK);
7604	}
7605	if (rdev->num_crtc >= 4) {
7606		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7607		       GRPH_PFLIP_INT_MASK);
7608		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7609		       GRPH_PFLIP_INT_MASK);
7610	}
7611	if (rdev->num_crtc >= 6) {
7612		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7613		       GRPH_PFLIP_INT_MASK);
7614		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7615		       GRPH_PFLIP_INT_MASK);
7616	}
7617
7618	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7619	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7620	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7621	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7622	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7623	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7624
7625	/* posting read */
7626	RREG32(SRBM_STATUS);
7627
7628	return 0;
7629}
7630
7631/**
7632 * cik_irq_ack - ack interrupt sources
7633 *
7634 * @rdev: radeon_device pointer
7635 *
7636 * Ack interrupt sources on the GPU (vblanks, hpd,
7637 * etc.) (CIK).  Certain interrupts sources are sw
7638 * generated and do not require an explicit ack.
7639 */
7640static inline void cik_irq_ack(struct radeon_device *rdev)
7641{
7642	u32 tmp;
7643
7644	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7645	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7646	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7647	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7648	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7649	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7650	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7651
7652	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7653		EVERGREEN_CRTC0_REGISTER_OFFSET);
7654	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7655		EVERGREEN_CRTC1_REGISTER_OFFSET);
7656	if (rdev->num_crtc >= 4) {
7657		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7658			EVERGREEN_CRTC2_REGISTER_OFFSET);
7659		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7660			EVERGREEN_CRTC3_REGISTER_OFFSET);
7661	}
7662	if (rdev->num_crtc >= 6) {
7663		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7664			EVERGREEN_CRTC4_REGISTER_OFFSET);
7665		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7666			EVERGREEN_CRTC5_REGISTER_OFFSET);
7667	}
7668
7669	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7670		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7671		       GRPH_PFLIP_INT_CLEAR);
7672	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7673		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7674		       GRPH_PFLIP_INT_CLEAR);
7675	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7676		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7677	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7678		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7679	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7680		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7681	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7682		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7683
7684	if (rdev->num_crtc >= 4) {
7685		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7686			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7687			       GRPH_PFLIP_INT_CLEAR);
7688		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7689			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7690			       GRPH_PFLIP_INT_CLEAR);
7691		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7692			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7693		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7694			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7695		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7696			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7697		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7698			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7699	}
7700
7701	if (rdev->num_crtc >= 6) {
7702		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7703			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7704			       GRPH_PFLIP_INT_CLEAR);
7705		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7706			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7707			       GRPH_PFLIP_INT_CLEAR);
7708		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7709			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7710		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7711			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7712		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7713			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7714		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7715			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7716	}
7717
7718	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7719		tmp = RREG32(DC_HPD1_INT_CONTROL);
7720		tmp |= DC_HPDx_INT_ACK;
7721		WREG32(DC_HPD1_INT_CONTROL, tmp);
7722	}
7723	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7724		tmp = RREG32(DC_HPD2_INT_CONTROL);
7725		tmp |= DC_HPDx_INT_ACK;
7726		WREG32(DC_HPD2_INT_CONTROL, tmp);
7727	}
7728	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7729		tmp = RREG32(DC_HPD3_INT_CONTROL);
7730		tmp |= DC_HPDx_INT_ACK;
7731		WREG32(DC_HPD3_INT_CONTROL, tmp);
7732	}
7733	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7734		tmp = RREG32(DC_HPD4_INT_CONTROL);
7735		tmp |= DC_HPDx_INT_ACK;
7736		WREG32(DC_HPD4_INT_CONTROL, tmp);
7737	}
7738	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7739		tmp = RREG32(DC_HPD5_INT_CONTROL);
7740		tmp |= DC_HPDx_INT_ACK;
7741		WREG32(DC_HPD5_INT_CONTROL, tmp);
7742	}
7743	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7744		tmp = RREG32(DC_HPD5_INT_CONTROL);
7745		tmp |= DC_HPDx_INT_ACK;
7746		WREG32(DC_HPD6_INT_CONTROL, tmp);
7747	}
7748	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7749		tmp = RREG32(DC_HPD1_INT_CONTROL);
7750		tmp |= DC_HPDx_RX_INT_ACK;
7751		WREG32(DC_HPD1_INT_CONTROL, tmp);
7752	}
7753	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7754		tmp = RREG32(DC_HPD2_INT_CONTROL);
7755		tmp |= DC_HPDx_RX_INT_ACK;
7756		WREG32(DC_HPD2_INT_CONTROL, tmp);
7757	}
7758	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7759		tmp = RREG32(DC_HPD3_INT_CONTROL);
7760		tmp |= DC_HPDx_RX_INT_ACK;
7761		WREG32(DC_HPD3_INT_CONTROL, tmp);
7762	}
7763	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7764		tmp = RREG32(DC_HPD4_INT_CONTROL);
7765		tmp |= DC_HPDx_RX_INT_ACK;
7766		WREG32(DC_HPD4_INT_CONTROL, tmp);
7767	}
7768	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7769		tmp = RREG32(DC_HPD5_INT_CONTROL);
7770		tmp |= DC_HPDx_RX_INT_ACK;
7771		WREG32(DC_HPD5_INT_CONTROL, tmp);
7772	}
7773	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7774		tmp = RREG32(DC_HPD5_INT_CONTROL);
7775		tmp |= DC_HPDx_RX_INT_ACK;
7776		WREG32(DC_HPD6_INT_CONTROL, tmp);
7777	}
7778}
7779
7780/**
7781 * cik_irq_disable - disable interrupts
7782 *
7783 * @rdev: radeon_device pointer
7784 *
7785 * Disable interrupts on the hw (CIK).
7786 */
7787static void cik_irq_disable(struct radeon_device *rdev)
7788{
7789	cik_disable_interrupts(rdev);
7790	/* Wait and acknowledge irq */
7791	mdelay(1);
7792	cik_irq_ack(rdev);
7793	cik_disable_interrupt_state(rdev);
7794}
7795
7796/**
7797 * cik_irq_disable - disable interrupts for suspend
7798 *
7799 * @rdev: radeon_device pointer
7800 *
7801 * Disable interrupts and stop the RLC (CIK).
7802 * Used for suspend.
7803 */
7804static void cik_irq_suspend(struct radeon_device *rdev)
7805{
7806	cik_irq_disable(rdev);
7807	cik_rlc_stop(rdev);
7808}
7809
7810/**
7811 * cik_irq_fini - tear down interrupt support
7812 *
7813 * @rdev: radeon_device pointer
7814 *
7815 * Disable interrupts on the hw and free the IH ring
7816 * buffer (CIK).
7817 * Used for driver unload.
7818 */
7819static void cik_irq_fini(struct radeon_device *rdev)
7820{
7821	cik_irq_suspend(rdev);
7822	r600_ih_ring_fini(rdev);
7823}
7824
7825/**
7826 * cik_get_ih_wptr - get the IH ring buffer wptr
7827 *
7828 * @rdev: radeon_device pointer
7829 *
7830 * Get the IH ring buffer wptr from either the register
7831 * or the writeback memory buffer (CIK).  Also check for
7832 * ring buffer overflow and deal with it.
7833 * Used by cik_irq_process().
7834 * Returns the value of the wptr.
7835 */
7836static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7837{
7838	u32 wptr, tmp;
7839
7840	if (rdev->wb.enabled)
7841		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7842	else
7843		wptr = RREG32(IH_RB_WPTR);
7844
7845	if (wptr & RB_OVERFLOW) {
7846		wptr &= ~RB_OVERFLOW;
7847		/* When a ring buffer overflow happen start parsing interrupt
7848		 * from the last not overwritten vector (wptr + 16). Hopefully
7849		 * this should allow us to catchup.
7850		 */
7851		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7852			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7853		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7854		tmp = RREG32(IH_RB_CNTL);
7855		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7856		WREG32(IH_RB_CNTL, tmp);
7857	}
7858	return (wptr & rdev->ih.ptr_mask);
7859}
7860
7861/*        CIK IV Ring
7862 * Each IV ring entry is 128 bits:
7863 * [7:0]    - interrupt source id
7864 * [31:8]   - reserved
7865 * [59:32]  - interrupt source data
7866 * [63:60]  - reserved
7867 * [71:64]  - RINGID
7868 *            CP:
7869 *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7870 *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7871 *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7872 *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7873 *            PIPE_ID - ME0 0=3D
7874 *                    - ME1&2 compute dispatcher (4 pipes each)
7875 *            SDMA:
7876 *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7877 *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7878 *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7879 * [79:72]  - VMID
7880 * [95:80]  - PASID
7881 * [127:96] - reserved
7882 */
7883/**
7884 * cik_irq_process - interrupt handler
7885 *
7886 * @rdev: radeon_device pointer
7887 *
7888 * Interrupt hander (CIK).  Walk the IH ring,
7889 * ack interrupts and schedule work to handle
7890 * interrupt events.
7891 * Returns irq process return code.
7892 */
7893int cik_irq_process(struct radeon_device *rdev)
7894{
7895	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7896	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7897	u32 wptr;
7898	u32 rptr;
7899	u32 src_id, src_data, ring_id;
7900	u8 me_id, pipe_id, queue_id;
7901	u32 ring_index;
7902	bool queue_hotplug = false;
7903	bool queue_dp = false;
7904	bool queue_reset = false;
7905	u32 addr, status, mc_client;
7906	bool queue_thermal = false;
7907
7908	if (!rdev->ih.enabled || rdev->shutdown)
7909		return IRQ_NONE;
7910
7911	wptr = cik_get_ih_wptr(rdev);
7912
7913restart_ih:
7914	/* is somebody else already processing irqs? */
7915	if (atomic_xchg(&rdev->ih.lock, 1))
7916		return IRQ_NONE;
7917
7918	rptr = rdev->ih.rptr;
7919	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7920
7921	/* Order reading of wptr vs. reading of IH ring data */
7922	rmb();
7923
7924	/* display interrupts */
7925	cik_irq_ack(rdev);
7926
7927	while (rptr != wptr) {
7928		/* wptr/rptr are in bytes! */
7929		ring_index = rptr / 4;
7930
7931		radeon_kfd_interrupt(rdev,
7932				(const void *) &rdev->ih.ring[ring_index]);
7933
7934		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7935		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7936		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7937
7938		switch (src_id) {
7939		case 1: /* D1 vblank/vline */
7940			switch (src_data) {
7941			case 0: /* D1 vblank */
7942				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7943					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7944
7945				if (rdev->irq.crtc_vblank_int[0]) {
7946					drm_handle_vblank(rdev->ddev, 0);
7947					rdev->pm.vblank_sync = true;
7948					wake_up(&rdev->irq.vblank_queue);
7949				}
7950				if (atomic_read(&rdev->irq.pflip[0]))
7951					radeon_crtc_handle_vblank(rdev, 0);
7952				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7953				DRM_DEBUG("IH: D1 vblank\n");
7954
7955				break;
7956			case 1: /* D1 vline */
7957				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7958					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7959
7960				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7961				DRM_DEBUG("IH: D1 vline\n");
7962
7963				break;
7964			default:
7965				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7966				break;
7967			}
7968			break;
7969		case 2: /* D2 vblank/vline */
7970			switch (src_data) {
7971			case 0: /* D2 vblank */
7972				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7973					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7974
7975				if (rdev->irq.crtc_vblank_int[1]) {
7976					drm_handle_vblank(rdev->ddev, 1);
7977					rdev->pm.vblank_sync = true;
7978					wake_up(&rdev->irq.vblank_queue);
7979				}
7980				if (atomic_read(&rdev->irq.pflip[1]))
7981					radeon_crtc_handle_vblank(rdev, 1);
7982				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7983				DRM_DEBUG("IH: D2 vblank\n");
7984
7985				break;
7986			case 1: /* D2 vline */
7987				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7988					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7989
7990				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7991				DRM_DEBUG("IH: D2 vline\n");
7992
7993				break;
7994			default:
7995				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7996				break;
7997			}
7998			break;
7999		case 3: /* D3 vblank/vline */
8000			switch (src_data) {
8001			case 0: /* D3 vblank */
8002				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
8003					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8004
8005				if (rdev->irq.crtc_vblank_int[2]) {
8006					drm_handle_vblank(rdev->ddev, 2);
8007					rdev->pm.vblank_sync = true;
8008					wake_up(&rdev->irq.vblank_queue);
8009				}
8010				if (atomic_read(&rdev->irq.pflip[2]))
8011					radeon_crtc_handle_vblank(rdev, 2);
8012				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
8013				DRM_DEBUG("IH: D3 vblank\n");
8014
8015				break;
8016			case 1: /* D3 vline */
8017				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
8018					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8019
8020				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
8021				DRM_DEBUG("IH: D3 vline\n");
8022
8023				break;
8024			default:
8025				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8026				break;
8027			}
8028			break;
8029		case 4: /* D4 vblank/vline */
8030			switch (src_data) {
8031			case 0: /* D4 vblank */
8032				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
8033					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8034
8035				if (rdev->irq.crtc_vblank_int[3]) {
8036					drm_handle_vblank(rdev->ddev, 3);
8037					rdev->pm.vblank_sync = true;
8038					wake_up(&rdev->irq.vblank_queue);
8039				}
8040				if (atomic_read(&rdev->irq.pflip[3]))
8041					radeon_crtc_handle_vblank(rdev, 3);
8042				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
8043				DRM_DEBUG("IH: D4 vblank\n");
8044
8045				break;
8046			case 1: /* D4 vline */
8047				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
8048					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8049
8050				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
8051				DRM_DEBUG("IH: D4 vline\n");
8052
8053				break;
8054			default:
8055				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8056				break;
8057			}
8058			break;
8059		case 5: /* D5 vblank/vline */
8060			switch (src_data) {
8061			case 0: /* D5 vblank */
8062				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
8063					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8064
8065				if (rdev->irq.crtc_vblank_int[4]) {
8066					drm_handle_vblank(rdev->ddev, 4);
8067					rdev->pm.vblank_sync = true;
8068					wake_up(&rdev->irq.vblank_queue);
8069				}
8070				if (atomic_read(&rdev->irq.pflip[4]))
8071					radeon_crtc_handle_vblank(rdev, 4);
8072				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
8073				DRM_DEBUG("IH: D5 vblank\n");
8074
8075				break;
8076			case 1: /* D5 vline */
8077				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
8078					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8079
8080				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
8081				DRM_DEBUG("IH: D5 vline\n");
8082
8083				break;
8084			default:
8085				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8086				break;
8087			}
8088			break;
8089		case 6: /* D6 vblank/vline */
8090			switch (src_data) {
8091			case 0: /* D6 vblank */
8092				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
8093					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8094
8095				if (rdev->irq.crtc_vblank_int[5]) {
8096					drm_handle_vblank(rdev->ddev, 5);
8097					rdev->pm.vblank_sync = true;
8098					wake_up(&rdev->irq.vblank_queue);
8099				}
8100				if (atomic_read(&rdev->irq.pflip[5]))
8101					radeon_crtc_handle_vblank(rdev, 5);
8102				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
8103				DRM_DEBUG("IH: D6 vblank\n");
8104
8105				break;
8106			case 1: /* D6 vline */
8107				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
8108					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8109
8110				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
8111				DRM_DEBUG("IH: D6 vline\n");
8112
8113				break;
8114			default:
8115				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8116				break;
8117			}
8118			break;
8119		case 8: /* D1 page flip */
8120		case 10: /* D2 page flip */
8121		case 12: /* D3 page flip */
8122		case 14: /* D4 page flip */
8123		case 16: /* D5 page flip */
8124		case 18: /* D6 page flip */
8125			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8126			if (radeon_use_pflipirq > 0)
8127				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8128			break;
8129		case 42: /* HPD hotplug */
8130			switch (src_data) {
8131			case 0:
8132				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
8133					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8134
8135				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8136				queue_hotplug = true;
8137				DRM_DEBUG("IH: HPD1\n");
8138
8139				break;
8140			case 1:
8141				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
8142					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8143
8144				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8145				queue_hotplug = true;
8146				DRM_DEBUG("IH: HPD2\n");
8147
8148				break;
8149			case 2:
8150				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
8151					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8152
8153				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8154				queue_hotplug = true;
8155				DRM_DEBUG("IH: HPD3\n");
8156
8157				break;
8158			case 3:
8159				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
8160					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8161
8162				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8163				queue_hotplug = true;
8164				DRM_DEBUG("IH: HPD4\n");
8165
8166				break;
8167			case 4:
8168				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
8169					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8170
8171				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8172				queue_hotplug = true;
8173				DRM_DEBUG("IH: HPD5\n");
8174
8175				break;
8176			case 5:
8177				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
8178					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8179
8180				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8181				queue_hotplug = true;
8182				DRM_DEBUG("IH: HPD6\n");
8183
8184				break;
8185			case 6:
8186				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
8187					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8188
8189				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
8190				queue_dp = true;
8191				DRM_DEBUG("IH: HPD_RX 1\n");
8192
8193				break;
8194			case 7:
8195				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
8196					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8197
8198				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
8199				queue_dp = true;
8200				DRM_DEBUG("IH: HPD_RX 2\n");
8201
8202				break;
8203			case 8:
8204				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
8205					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8206
8207				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
8208				queue_dp = true;
8209				DRM_DEBUG("IH: HPD_RX 3\n");
8210
8211				break;
8212			case 9:
8213				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
8214					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8215
8216				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
8217				queue_dp = true;
8218				DRM_DEBUG("IH: HPD_RX 4\n");
8219
8220				break;
8221			case 10:
8222				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
8223					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8224
8225				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
8226				queue_dp = true;
8227				DRM_DEBUG("IH: HPD_RX 5\n");
8228
8229				break;
8230			case 11:
8231				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
8232					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8233
8234				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
8235				queue_dp = true;
8236				DRM_DEBUG("IH: HPD_RX 6\n");
8237
8238				break;
8239			default:
8240				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8241				break;
8242			}
8243			break;
8244		case 96:
8245			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
8246			WREG32(SRBM_INT_ACK, 0x1);
8247			break;
8248		case 124: /* UVD */
8249			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8250			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8251			break;
8252		case 146:
8253		case 147:
8254			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8255			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8256			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8257			/* reset addr and status */
8258			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8259			if (addr == 0x0 && status == 0x0)
8260				break;
8261			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8262			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8263				addr);
8264			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8265				status);
8266			cik_vm_decode_fault(rdev, status, addr, mc_client);
8267			break;
8268		case 167: /* VCE */
8269			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8270			switch (src_data) {
8271			case 0:
8272				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8273				break;
8274			case 1:
8275				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8276				break;
8277			default:
8278				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8279				break;
8280			}
8281			break;
8282		case 176: /* GFX RB CP_INT */
8283		case 177: /* GFX IB CP_INT */
8284			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8285			break;
8286		case 181: /* CP EOP event */
8287			DRM_DEBUG("IH: CP EOP\n");
8288			/* XXX check the bitfield order! */
8289			me_id = (ring_id & 0x60) >> 5;
8290			pipe_id = (ring_id & 0x18) >> 3;
8291			queue_id = (ring_id & 0x7) >> 0;
8292			switch (me_id) {
8293			case 0:
8294				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8295				break;
8296			case 1:
8297			case 2:
8298				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8299					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8300				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8301					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8302				break;
8303			}
8304			break;
8305		case 184: /* CP Privileged reg access */
8306			DRM_ERROR("Illegal register access in command stream\n");
8307			/* XXX check the bitfield order! */
8308			me_id = (ring_id & 0x60) >> 5;
8309			pipe_id = (ring_id & 0x18) >> 3;
8310			queue_id = (ring_id & 0x7) >> 0;
8311			switch (me_id) {
8312			case 0:
8313				/* This results in a full GPU reset, but all we need to do is soft
8314				 * reset the CP for gfx
8315				 */
8316				queue_reset = true;
8317				break;
8318			case 1:
8319				/* XXX compute */
8320				queue_reset = true;
8321				break;
8322			case 2:
8323				/* XXX compute */
8324				queue_reset = true;
8325				break;
8326			}
8327			break;
8328		case 185: /* CP Privileged inst */
8329			DRM_ERROR("Illegal instruction in command stream\n");
8330			/* XXX check the bitfield order! */
8331			me_id = (ring_id & 0x60) >> 5;
8332			pipe_id = (ring_id & 0x18) >> 3;
8333			queue_id = (ring_id & 0x7) >> 0;
8334			switch (me_id) {
8335			case 0:
8336				/* This results in a full GPU reset, but all we need to do is soft
8337				 * reset the CP for gfx
8338				 */
8339				queue_reset = true;
8340				break;
8341			case 1:
8342				/* XXX compute */
8343				queue_reset = true;
8344				break;
8345			case 2:
8346				/* XXX compute */
8347				queue_reset = true;
8348				break;
8349			}
8350			break;
8351		case 224: /* SDMA trap event */
8352			/* XXX check the bitfield order! */
8353			me_id = (ring_id & 0x3) >> 0;
8354			queue_id = (ring_id & 0xc) >> 2;
8355			DRM_DEBUG("IH: SDMA trap\n");
8356			switch (me_id) {
8357			case 0:
8358				switch (queue_id) {
8359				case 0:
8360					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8361					break;
8362				case 1:
8363					/* XXX compute */
8364					break;
8365				case 2:
8366					/* XXX compute */
8367					break;
8368				}
8369				break;
8370			case 1:
8371				switch (queue_id) {
8372				case 0:
8373					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8374					break;
8375				case 1:
8376					/* XXX compute */
8377					break;
8378				case 2:
8379					/* XXX compute */
8380					break;
8381				}
8382				break;
8383			}
8384			break;
8385		case 230: /* thermal low to high */
8386			DRM_DEBUG("IH: thermal low to high\n");
8387			rdev->pm.dpm.thermal.high_to_low = false;
8388			queue_thermal = true;
8389			break;
8390		case 231: /* thermal high to low */
8391			DRM_DEBUG("IH: thermal high to low\n");
8392			rdev->pm.dpm.thermal.high_to_low = true;
8393			queue_thermal = true;
8394			break;
8395		case 233: /* GUI IDLE */
8396			DRM_DEBUG("IH: GUI idle\n");
8397			break;
8398		case 241: /* SDMA Privileged inst */
8399		case 247: /* SDMA Privileged inst */
8400			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8401			/* XXX check the bitfield order! */
8402			me_id = (ring_id & 0x3) >> 0;
8403			queue_id = (ring_id & 0xc) >> 2;
8404			switch (me_id) {
8405			case 0:
8406				switch (queue_id) {
8407				case 0:
8408					queue_reset = true;
8409					break;
8410				case 1:
8411					/* XXX compute */
8412					queue_reset = true;
8413					break;
8414				case 2:
8415					/* XXX compute */
8416					queue_reset = true;
8417					break;
8418				}
8419				break;
8420			case 1:
8421				switch (queue_id) {
8422				case 0:
8423					queue_reset = true;
8424					break;
8425				case 1:
8426					/* XXX compute */
8427					queue_reset = true;
8428					break;
8429				case 2:
8430					/* XXX compute */
8431					queue_reset = true;
8432					break;
8433				}
8434				break;
8435			}
8436			break;
8437		default:
8438			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8439			break;
8440		}
8441
8442		/* wptr/rptr are in bytes! */
8443		rptr += 16;
8444		rptr &= rdev->ih.ptr_mask;
8445		WREG32(IH_RB_RPTR, rptr);
8446	}
8447	if (queue_dp)
8448		schedule_work(&rdev->dp_work);
8449	if (queue_hotplug)
8450		schedule_work(&rdev->hotplug_work);
8451	if (queue_reset) {
8452		rdev->needs_reset = true;
8453		wake_up_all(&rdev->fence_queue);
8454	}
8455	if (queue_thermal)
8456		schedule_work(&rdev->pm.dpm.thermal.work);
8457	rdev->ih.rptr = rptr;
8458	atomic_set(&rdev->ih.lock, 0);
8459
8460	/* make sure wptr hasn't changed while processing */
8461	wptr = cik_get_ih_wptr(rdev);
8462	if (wptr != rptr)
8463		goto restart_ih;
8464
8465	return IRQ_HANDLED;
8466}
8467
8468/*
8469 * startup/shutdown callbacks
8470 */
8471/**
8472 * cik_startup - program the asic to a functional state
8473 *
8474 * @rdev: radeon_device pointer
8475 *
8476 * Programs the asic to a functional state (CIK).
8477 * Called by cik_init() and cik_resume().
8478 * Returns 0 for success, error for failure.
8479 */
8480static int cik_startup(struct radeon_device *rdev)
8481{
8482	struct radeon_ring *ring;
8483	u32 nop;
8484	int r;
8485
8486	/* enable pcie gen2/3 link */
8487	cik_pcie_gen3_enable(rdev);
8488	/* enable aspm */
8489	cik_program_aspm(rdev);
8490
8491	/* scratch needs to be initialized before MC */
8492	r = r600_vram_scratch_init(rdev);
8493	if (r)
8494		return r;
8495
8496	cik_mc_program(rdev);
8497
8498	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8499		r = ci_mc_load_microcode(rdev);
8500		if (r) {
8501			DRM_ERROR("Failed to load MC firmware!\n");
8502			return r;
8503		}
8504	}
8505
8506	r = cik_pcie_gart_enable(rdev);
8507	if (r)
8508		return r;
8509	cik_gpu_init(rdev);
8510
8511	/* allocate rlc buffers */
8512	if (rdev->flags & RADEON_IS_IGP) {
8513		if (rdev->family == CHIP_KAVERI) {
8514			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8515			rdev->rlc.reg_list_size =
8516				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8517		} else {
8518			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8519			rdev->rlc.reg_list_size =
8520				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8521		}
8522	}
8523	rdev->rlc.cs_data = ci_cs_data;
8524	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8525	r = sumo_rlc_init(rdev);
8526	if (r) {
8527		DRM_ERROR("Failed to init rlc BOs!\n");
8528		return r;
8529	}
8530
8531	/* allocate wb buffer */
8532	r = radeon_wb_init(rdev);
8533	if (r)
8534		return r;
8535
8536	/* allocate mec buffers */
8537	r = cik_mec_init(rdev);
8538	if (r) {
8539		DRM_ERROR("Failed to init MEC BOs!\n");
8540		return r;
8541	}
8542
8543	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8544	if (r) {
8545		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8546		return r;
8547	}
8548
8549	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8550	if (r) {
8551		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8552		return r;
8553	}
8554
8555	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8556	if (r) {
8557		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8558		return r;
8559	}
8560
8561	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8562	if (r) {
8563		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8564		return r;
8565	}
8566
8567	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8568	if (r) {
8569		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8570		return r;
8571	}
8572
8573	r = radeon_uvd_resume(rdev);
8574	if (!r) {
8575		r = uvd_v4_2_resume(rdev);
8576		if (!r) {
8577			r = radeon_fence_driver_start_ring(rdev,
8578							   R600_RING_TYPE_UVD_INDEX);
8579			if (r)
8580				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8581		}
8582	}
8583	if (r)
8584		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8585
8586	r = radeon_vce_resume(rdev);
8587	if (!r) {
8588		r = vce_v2_0_resume(rdev);
8589		if (!r)
8590			r = radeon_fence_driver_start_ring(rdev,
8591							   TN_RING_TYPE_VCE1_INDEX);
8592		if (!r)
8593			r = radeon_fence_driver_start_ring(rdev,
8594							   TN_RING_TYPE_VCE2_INDEX);
8595	}
8596	if (r) {
8597		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8598		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8599		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8600	}
8601
8602	/* Enable IRQ */
8603	if (!rdev->irq.installed) {
8604		r = radeon_irq_kms_init(rdev);
8605		if (r)
8606			return r;
8607	}
8608
8609	r = cik_irq_init(rdev);
8610	if (r) {
8611		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8612		radeon_irq_kms_fini(rdev);
8613		return r;
8614	}
8615	cik_irq_set(rdev);
8616
8617	if (rdev->family == CHIP_HAWAII) {
8618		if (rdev->new_fw)
8619			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8620		else
8621			nop = RADEON_CP_PACKET2;
8622	} else {
8623		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8624	}
8625
8626	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8627	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8628			     nop);
8629	if (r)
8630		return r;
8631
8632	/* set up the compute queues */
8633	/* type-2 packets are deprecated on MEC, use type-3 instead */
8634	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8635	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8636			     nop);
8637	if (r)
8638		return r;
8639	ring->me = 1; /* first MEC */
8640	ring->pipe = 0; /* first pipe */
8641	ring->queue = 0; /* first queue */
8642	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8643
8644	/* type-2 packets are deprecated on MEC, use type-3 instead */
8645	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8646	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8647			     nop);
8648	if (r)
8649		return r;
8650	/* dGPU only have 1 MEC */
8651	ring->me = 1; /* first MEC */
8652	ring->pipe = 0; /* first pipe */
8653	ring->queue = 1; /* second queue */
8654	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8655
8656	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8657	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8658			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8659	if (r)
8660		return r;
8661
8662	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8663	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8664			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8665	if (r)
8666		return r;
8667
8668	r = cik_cp_resume(rdev);
8669	if (r)
8670		return r;
8671
8672	r = cik_sdma_resume(rdev);
8673	if (r)
8674		return r;
8675
8676	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8677	if (ring->ring_size) {
8678		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8679				     RADEON_CP_PACKET2);
8680		if (!r)
8681			r = uvd_v1_0_init(rdev);
8682		if (r)
8683			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8684	}
8685
8686	r = -ENOENT;
8687
8688	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8689	if (ring->ring_size)
8690		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8691				     VCE_CMD_NO_OP);
8692
8693	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8694	if (ring->ring_size)
8695		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8696				     VCE_CMD_NO_OP);
8697
8698	if (!r)
8699		r = vce_v1_0_init(rdev);
8700	else if (r != -ENOENT)
8701		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8702
8703	r = radeon_ib_pool_init(rdev);
8704	if (r) {
8705		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8706		return r;
8707	}
8708
8709	r = radeon_vm_manager_init(rdev);
8710	if (r) {
8711		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8712		return r;
8713	}
8714
8715	r = radeon_audio_init(rdev);
8716	if (r)
8717		return r;
8718
8719	r = radeon_kfd_resume(rdev);
8720	if (r)
8721		return r;
8722
8723	return 0;
8724}
8725
8726/**
8727 * cik_resume - resume the asic to a functional state
8728 *
8729 * @rdev: radeon_device pointer
8730 *
8731 * Programs the asic to a functional state (CIK).
8732 * Called at resume.
8733 * Returns 0 for success, error for failure.
8734 */
8735int cik_resume(struct radeon_device *rdev)
8736{
8737	int r;
8738
8739	/* post card */
8740	atom_asic_init(rdev->mode_info.atom_context);
8741
8742	/* init golden registers */
8743	cik_init_golden_registers(rdev);
8744
8745	if (rdev->pm.pm_method == PM_METHOD_DPM)
8746		radeon_pm_resume(rdev);
8747
8748	rdev->accel_working = true;
8749	r = cik_startup(rdev);
8750	if (r) {
8751		DRM_ERROR("cik startup failed on resume\n");
8752		rdev->accel_working = false;
8753		return r;
8754	}
8755
8756	return r;
8757
8758}
8759
8760/**
8761 * cik_suspend - suspend the asic
8762 *
8763 * @rdev: radeon_device pointer
8764 *
8765 * Bring the chip into a state suitable for suspend (CIK).
8766 * Called at suspend.
8767 * Returns 0 for success.
8768 */
8769int cik_suspend(struct radeon_device *rdev)
8770{
8771	radeon_kfd_suspend(rdev);
8772	radeon_pm_suspend(rdev);
8773	radeon_audio_fini(rdev);
8774	radeon_vm_manager_fini(rdev);
8775	cik_cp_enable(rdev, false);
8776	cik_sdma_enable(rdev, false);
8777	uvd_v1_0_fini(rdev);
8778	radeon_uvd_suspend(rdev);
8779	radeon_vce_suspend(rdev);
8780	cik_fini_pg(rdev);
8781	cik_fini_cg(rdev);
8782	cik_irq_suspend(rdev);
8783	radeon_wb_disable(rdev);
8784	cik_pcie_gart_disable(rdev);
8785	return 0;
8786}
8787
8788/* Plan is to move initialization in that function and use
8789 * helper function so that radeon_device_init pretty much
8790 * do nothing more than calling asic specific function. This
8791 * should also allow to remove a bunch of callback function
8792 * like vram_info.
8793 */
8794/**
8795 * cik_init - asic specific driver and hw init
8796 *
8797 * @rdev: radeon_device pointer
8798 *
8799 * Setup asic specific driver variables and program the hw
8800 * to a functional state (CIK).
8801 * Called at driver startup.
8802 * Returns 0 for success, errors for failure.
8803 */
8804int cik_init(struct radeon_device *rdev)
8805{
8806	struct radeon_ring *ring;
8807	int r;
8808
8809	/* Read BIOS */
8810	if (!radeon_get_bios(rdev)) {
8811		if (ASIC_IS_AVIVO(rdev))
8812			return -EINVAL;
8813	}
8814	/* Must be an ATOMBIOS */
8815	if (!rdev->is_atom_bios) {
8816		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8817		return -EINVAL;
8818	}
8819	r = radeon_atombios_init(rdev);
8820	if (r)
8821		return r;
8822
8823	/* Post card if necessary */
8824	if (!radeon_card_posted(rdev)) {
8825		if (!rdev->bios) {
8826			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8827			return -EINVAL;
8828		}
8829		DRM_INFO("GPU not posted. posting now...\n");
8830		atom_asic_init(rdev->mode_info.atom_context);
8831	}
8832	/* init golden registers */
8833	cik_init_golden_registers(rdev);
8834	/* Initialize scratch registers */
8835	cik_scratch_init(rdev);
8836	/* Initialize surface registers */
8837	radeon_surface_init(rdev);
8838	/* Initialize clocks */
8839	radeon_get_clock_info(rdev->ddev);
8840
8841	/* Fence driver */
8842	r = radeon_fence_driver_init(rdev);
8843	if (r)
8844		return r;
8845
8846	/* initialize memory controller */
8847	r = cik_mc_init(rdev);
8848	if (r)
8849		return r;
8850	/* Memory manager */
8851	r = radeon_bo_init(rdev);
8852	if (r)
8853		return r;
8854
8855	if (rdev->flags & RADEON_IS_IGP) {
8856		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8857		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8858			r = cik_init_microcode(rdev);
8859			if (r) {
8860				DRM_ERROR("Failed to load firmware!\n");
8861				return r;
8862			}
8863		}
8864	} else {
8865		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8866		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8867		    !rdev->mc_fw) {
8868			r = cik_init_microcode(rdev);
8869			if (r) {
8870				DRM_ERROR("Failed to load firmware!\n");
8871				return r;
8872			}
8873		}
8874	}
8875
8876	/* Initialize power management */
8877	radeon_pm_init(rdev);
8878
8879	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8880	ring->ring_obj = NULL;
8881	r600_ring_init(rdev, ring, 1024 * 1024);
8882
8883	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8884	ring->ring_obj = NULL;
8885	r600_ring_init(rdev, ring, 1024 * 1024);
8886	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8887	if (r)
8888		return r;
8889
8890	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8891	ring->ring_obj = NULL;
8892	r600_ring_init(rdev, ring, 1024 * 1024);
8893	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8894	if (r)
8895		return r;
8896
8897	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8898	ring->ring_obj = NULL;
8899	r600_ring_init(rdev, ring, 256 * 1024);
8900
8901	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8902	ring->ring_obj = NULL;
8903	r600_ring_init(rdev, ring, 256 * 1024);
8904
8905	r = radeon_uvd_init(rdev);
8906	if (!r) {
8907		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8908		ring->ring_obj = NULL;
8909		r600_ring_init(rdev, ring, 4096);
8910	}
8911
8912	r = radeon_vce_init(rdev);
8913	if (!r) {
8914		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8915		ring->ring_obj = NULL;
8916		r600_ring_init(rdev, ring, 4096);
8917
8918		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8919		ring->ring_obj = NULL;
8920		r600_ring_init(rdev, ring, 4096);
8921	}
8922
8923	rdev->ih.ring_obj = NULL;
8924	r600_ih_ring_init(rdev, 64 * 1024);
8925
8926	r = r600_pcie_gart_init(rdev);
8927	if (r)
8928		return r;
8929
8930	rdev->accel_working = true;
8931	r = cik_startup(rdev);
8932	if (r) {
8933		dev_err(rdev->dev, "disabling GPU acceleration\n");
8934		cik_cp_fini(rdev);
8935		cik_sdma_fini(rdev);
8936		cik_irq_fini(rdev);
8937		sumo_rlc_fini(rdev);
8938		cik_mec_fini(rdev);
8939		radeon_wb_fini(rdev);
8940		radeon_ib_pool_fini(rdev);
8941		radeon_vm_manager_fini(rdev);
8942		radeon_irq_kms_fini(rdev);
8943		cik_pcie_gart_fini(rdev);
8944		rdev->accel_working = false;
8945	}
8946
8947	/* Don't start up if the MC ucode is missing.
8948	 * The default clocks and voltages before the MC ucode
8949	 * is loaded are not suffient for advanced operations.
8950	 */
8951	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8952		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8953		return -EINVAL;
8954	}
8955
8956	return 0;
8957}
8958
8959/**
8960 * cik_fini - asic specific driver and hw fini
8961 *
8962 * @rdev: radeon_device pointer
8963 *
8964 * Tear down the asic specific driver variables and program the hw
8965 * to an idle state (CIK).
8966 * Called at driver unload.
8967 */
8968void cik_fini(struct radeon_device *rdev)
8969{
8970	radeon_pm_fini(rdev);
8971	cik_cp_fini(rdev);
8972	cik_sdma_fini(rdev);
8973	cik_fini_pg(rdev);
8974	cik_fini_cg(rdev);
8975	cik_irq_fini(rdev);
8976	sumo_rlc_fini(rdev);
8977	cik_mec_fini(rdev);
8978	radeon_wb_fini(rdev);
8979	radeon_vm_manager_fini(rdev);
8980	radeon_ib_pool_fini(rdev);
8981	radeon_irq_kms_fini(rdev);
8982	uvd_v1_0_fini(rdev);
8983	radeon_uvd_fini(rdev);
8984	radeon_vce_fini(rdev);
8985	cik_pcie_gart_fini(rdev);
8986	r600_vram_scratch_fini(rdev);
8987	radeon_gem_fini(rdev);
8988	radeon_fence_driver_fini(rdev);
8989	radeon_bo_fini(rdev);
8990	radeon_atombios_fini(rdev);
8991	kfree(rdev->bios);
8992	rdev->bios = NULL;
8993}
8994
8995void dce8_program_fmt(struct drm_encoder *encoder)
8996{
8997	struct drm_device *dev = encoder->dev;
8998	struct radeon_device *rdev = dev->dev_private;
8999	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
9000	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
9001	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
9002	int bpc = 0;
9003	u32 tmp = 0;
9004	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
9005
9006	if (connector) {
9007		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
9008		bpc = radeon_get_monitor_bpc(connector);
9009		dither = radeon_connector->dither;
9010	}
9011
9012	/* LVDS/eDP FMT is set up by atom */
9013	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
9014		return;
9015
9016	/* not needed for analog */
9017	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
9018	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
9019		return;
9020
9021	if (bpc == 0)
9022		return;
9023
9024	switch (bpc) {
9025	case 6:
9026		if (dither == RADEON_FMT_DITHER_ENABLE)
9027			/* XXX sort out optimal dither settings */
9028			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9029				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
9030		else
9031			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
9032		break;
9033	case 8:
9034		if (dither == RADEON_FMT_DITHER_ENABLE)
9035			/* XXX sort out optimal dither settings */
9036			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9037				FMT_RGB_RANDOM_ENABLE |
9038				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
9039		else
9040			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
9041		break;
9042	case 10:
9043		if (dither == RADEON_FMT_DITHER_ENABLE)
9044			/* XXX sort out optimal dither settings */
9045			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9046				FMT_RGB_RANDOM_ENABLE |
9047				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
9048		else
9049			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
9050		break;
9051	default:
9052		/* not needed */
9053		break;
9054	}
9055
9056	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
9057}
9058
9059/* display watermark setup */
9060/**
9061 * dce8_line_buffer_adjust - Set up the line buffer
9062 *
9063 * @rdev: radeon_device pointer
9064 * @radeon_crtc: the selected display controller
9065 * @mode: the current display mode on the selected display
9066 * controller
9067 *
9068 * Setup up the line buffer allocation for
9069 * the selected display controller (CIK).
9070 * Returns the line buffer size in pixels.
9071 */
9072static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
9073				   struct radeon_crtc *radeon_crtc,
9074				   struct drm_display_mode *mode)
9075{
9076	u32 tmp, buffer_alloc, i;
9077	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
9078	/*
9079	 * Line Buffer Setup
9080	 * There are 6 line buffers, one for each display controllers.
9081	 * There are 3 partitions per LB. Select the number of partitions
9082	 * to enable based on the display width.  For display widths larger
9083	 * than 4096, you need use to use 2 display controllers and combine
9084	 * them using the stereo blender.
9085	 */
9086	if (radeon_crtc->base.enabled && mode) {
9087		if (mode->crtc_hdisplay < 1920) {
9088			tmp = 1;
9089			buffer_alloc = 2;
9090		} else if (mode->crtc_hdisplay < 2560) {
9091			tmp = 2;
9092			buffer_alloc = 2;
9093		} else if (mode->crtc_hdisplay < 4096) {
9094			tmp = 0;
9095			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9096		} else {
9097			DRM_DEBUG_KMS("Mode too big for LB!\n");
9098			tmp = 0;
9099			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9100		}
9101	} else {
9102		tmp = 1;
9103		buffer_alloc = 0;
9104	}
9105
9106	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
9107	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
9108
9109	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
9110	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
9111	for (i = 0; i < rdev->usec_timeout; i++) {
9112		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
9113		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
9114			break;
9115		udelay(1);
9116	}
9117
9118	if (radeon_crtc->base.enabled && mode) {
9119		switch (tmp) {
9120		case 0:
9121		default:
9122			return 4096 * 2;
9123		case 1:
9124			return 1920 * 2;
9125		case 2:
9126			return 2560 * 2;
9127		}
9128	}
9129
9130	/* controller not enabled, so no lb used */
9131	return 0;
9132}
9133
9134/**
9135 * cik_get_number_of_dram_channels - get the number of dram channels
9136 *
9137 * @rdev: radeon_device pointer
9138 *
9139 * Look up the number of video ram channels (CIK).
9140 * Used for display watermark bandwidth calculations
9141 * Returns the number of dram channels
9142 */
9143static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
9144{
9145	u32 tmp = RREG32(MC_SHARED_CHMAP);
9146
9147	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
9148	case 0:
9149	default:
9150		return 1;
9151	case 1:
9152		return 2;
9153	case 2:
9154		return 4;
9155	case 3:
9156		return 8;
9157	case 4:
9158		return 3;
9159	case 5:
9160		return 6;
9161	case 6:
9162		return 10;
9163	case 7:
9164		return 12;
9165	case 8:
9166		return 16;
9167	}
9168}
9169
9170struct dce8_wm_params {
9171	u32 dram_channels; /* number of dram channels */
9172	u32 yclk;          /* bandwidth per dram data pin in kHz */
9173	u32 sclk;          /* engine clock in kHz */
9174	u32 disp_clk;      /* display clock in kHz */
9175	u32 src_width;     /* viewport width */
9176	u32 active_time;   /* active display time in ns */
9177	u32 blank_time;    /* blank time in ns */
9178	bool interlaced;    /* mode is interlaced */
9179	fixed20_12 vsc;    /* vertical scale ratio */
9180	u32 num_heads;     /* number of active crtcs */
9181	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
9182	u32 lb_size;       /* line buffer allocated to pipe */
9183	u32 vtaps;         /* vertical scaler taps */
9184};
9185
9186/**
9187 * dce8_dram_bandwidth - get the dram bandwidth
9188 *
9189 * @wm: watermark calculation data
9190 *
9191 * Calculate the raw dram bandwidth (CIK).
9192 * Used for display watermark bandwidth calculations
9193 * Returns the dram bandwidth in MBytes/s
9194 */
9195static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9196{
9197	/* Calculate raw DRAM Bandwidth */
9198	fixed20_12 dram_efficiency; /* 0.7 */
9199	fixed20_12 yclk, dram_channels, bandwidth;
9200	fixed20_12 a;
9201
9202	a.full = dfixed_const(1000);
9203	yclk.full = dfixed_const(wm->yclk);
9204	yclk.full = dfixed_div(yclk, a);
9205	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9206	a.full = dfixed_const(10);
9207	dram_efficiency.full = dfixed_const(7);
9208	dram_efficiency.full = dfixed_div(dram_efficiency, a);
9209	bandwidth.full = dfixed_mul(dram_channels, yclk);
9210	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9211
9212	return dfixed_trunc(bandwidth);
9213}
9214
9215/**
9216 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9217 *
9218 * @wm: watermark calculation data
9219 *
9220 * Calculate the dram bandwidth used for display (CIK).
9221 * Used for display watermark bandwidth calculations
9222 * Returns the dram bandwidth for display in MBytes/s
9223 */
9224static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9225{
9226	/* Calculate DRAM Bandwidth and the part allocated to display. */
9227	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9228	fixed20_12 yclk, dram_channels, bandwidth;
9229	fixed20_12 a;
9230
9231	a.full = dfixed_const(1000);
9232	yclk.full = dfixed_const(wm->yclk);
9233	yclk.full = dfixed_div(yclk, a);
9234	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9235	a.full = dfixed_const(10);
9236	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9237	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9238	bandwidth.full = dfixed_mul(dram_channels, yclk);
9239	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9240
9241	return dfixed_trunc(bandwidth);
9242}
9243
9244/**
9245 * dce8_data_return_bandwidth - get the data return bandwidth
9246 *
9247 * @wm: watermark calculation data
9248 *
9249 * Calculate the data return bandwidth used for display (CIK).
9250 * Used for display watermark bandwidth calculations
9251 * Returns the data return bandwidth in MBytes/s
9252 */
9253static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9254{
9255	/* Calculate the display Data return Bandwidth */
9256	fixed20_12 return_efficiency; /* 0.8 */
9257	fixed20_12 sclk, bandwidth;
9258	fixed20_12 a;
9259
9260	a.full = dfixed_const(1000);
9261	sclk.full = dfixed_const(wm->sclk);
9262	sclk.full = dfixed_div(sclk, a);
9263	a.full = dfixed_const(10);
9264	return_efficiency.full = dfixed_const(8);
9265	return_efficiency.full = dfixed_div(return_efficiency, a);
9266	a.full = dfixed_const(32);
9267	bandwidth.full = dfixed_mul(a, sclk);
9268	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9269
9270	return dfixed_trunc(bandwidth);
9271}
9272
9273/**
9274 * dce8_dmif_request_bandwidth - get the dmif bandwidth
9275 *
9276 * @wm: watermark calculation data
9277 *
9278 * Calculate the dmif bandwidth used for display (CIK).
9279 * Used for display watermark bandwidth calculations
9280 * Returns the dmif bandwidth in MBytes/s
9281 */
9282static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9283{
9284	/* Calculate the DMIF Request Bandwidth */
9285	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9286	fixed20_12 disp_clk, bandwidth;
9287	fixed20_12 a, b;
9288
9289	a.full = dfixed_const(1000);
9290	disp_clk.full = dfixed_const(wm->disp_clk);
9291	disp_clk.full = dfixed_div(disp_clk, a);
9292	a.full = dfixed_const(32);
9293	b.full = dfixed_mul(a, disp_clk);
9294
9295	a.full = dfixed_const(10);
9296	disp_clk_request_efficiency.full = dfixed_const(8);
9297	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9298
9299	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9300
9301	return dfixed_trunc(bandwidth);
9302}
9303
9304/**
9305 * dce8_available_bandwidth - get the min available bandwidth
9306 *
9307 * @wm: watermark calculation data
9308 *
9309 * Calculate the min available bandwidth used for display (CIK).
9310 * Used for display watermark bandwidth calculations
9311 * Returns the min available bandwidth in MBytes/s
9312 */
9313static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9314{
9315	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9316	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9317	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9318	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9319
9320	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9321}
9322
9323/**
9324 * dce8_average_bandwidth - get the average available bandwidth
9325 *
9326 * @wm: watermark calculation data
9327 *
9328 * Calculate the average available bandwidth used for display (CIK).
9329 * Used for display watermark bandwidth calculations
9330 * Returns the average available bandwidth in MBytes/s
9331 */
9332static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9333{
9334	/* Calculate the display mode Average Bandwidth
9335	 * DisplayMode should contain the source and destination dimensions,
9336	 * timing, etc.
9337	 */
9338	fixed20_12 bpp;
9339	fixed20_12 line_time;
9340	fixed20_12 src_width;
9341	fixed20_12 bandwidth;
9342	fixed20_12 a;
9343
9344	a.full = dfixed_const(1000);
9345	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9346	line_time.full = dfixed_div(line_time, a);
9347	bpp.full = dfixed_const(wm->bytes_per_pixel);
9348	src_width.full = dfixed_const(wm->src_width);
9349	bandwidth.full = dfixed_mul(src_width, bpp);
9350	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9351	bandwidth.full = dfixed_div(bandwidth, line_time);
9352
9353	return dfixed_trunc(bandwidth);
9354}
9355
9356/**
9357 * dce8_latency_watermark - get the latency watermark
9358 *
9359 * @wm: watermark calculation data
9360 *
9361 * Calculate the latency watermark (CIK).
9362 * Used for display watermark bandwidth calculations
9363 * Returns the latency watermark in ns
9364 */
9365static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9366{
9367	/* First calculate the latency in ns */
9368	u32 mc_latency = 2000; /* 2000 ns. */
9369	u32 available_bandwidth = dce8_available_bandwidth(wm);
9370	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9371	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9372	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9373	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9374		(wm->num_heads * cursor_line_pair_return_time);
9375	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9376	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9377	u32 tmp, dmif_size = 12288;
9378	fixed20_12 a, b, c;
9379
9380	if (wm->num_heads == 0)
9381		return 0;
9382
9383	a.full = dfixed_const(2);
9384	b.full = dfixed_const(1);
9385	if ((wm->vsc.full > a.full) ||
9386	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9387	    (wm->vtaps >= 5) ||
9388	    ((wm->vsc.full >= a.full) && wm->interlaced))
9389		max_src_lines_per_dst_line = 4;
9390	else
9391		max_src_lines_per_dst_line = 2;
9392
9393	a.full = dfixed_const(available_bandwidth);
9394	b.full = dfixed_const(wm->num_heads);
9395	a.full = dfixed_div(a, b);
9396
9397	b.full = dfixed_const(mc_latency + 512);
9398	c.full = dfixed_const(wm->disp_clk);
9399	b.full = dfixed_div(b, c);
9400
9401	c.full = dfixed_const(dmif_size);
9402	b.full = dfixed_div(c, b);
9403
9404	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9405
9406	b.full = dfixed_const(1000);
9407	c.full = dfixed_const(wm->disp_clk);
9408	b.full = dfixed_div(c, b);
9409	c.full = dfixed_const(wm->bytes_per_pixel);
9410	b.full = dfixed_mul(b, c);
9411
9412	lb_fill_bw = min(tmp, dfixed_trunc(b));
9413
9414	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9415	b.full = dfixed_const(1000);
9416	c.full = dfixed_const(lb_fill_bw);
9417	b.full = dfixed_div(c, b);
9418	a.full = dfixed_div(a, b);
9419	line_fill_time = dfixed_trunc(a);
9420
9421	if (line_fill_time < wm->active_time)
9422		return latency;
9423	else
9424		return latency + (line_fill_time - wm->active_time);
9425
9426}
9427
9428/**
9429 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9430 * average and available dram bandwidth
9431 *
9432 * @wm: watermark calculation data
9433 *
9434 * Check if the display average bandwidth fits in the display
9435 * dram bandwidth (CIK).
9436 * Used for display watermark bandwidth calculations
9437 * Returns true if the display fits, false if not.
9438 */
9439static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9440{
9441	if (dce8_average_bandwidth(wm) <=
9442	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9443		return true;
9444	else
9445		return false;
9446}
9447
9448/**
9449 * dce8_average_bandwidth_vs_available_bandwidth - check
9450 * average and available bandwidth
9451 *
9452 * @wm: watermark calculation data
9453 *
9454 * Check if the display average bandwidth fits in the display
9455 * available bandwidth (CIK).
9456 * Used for display watermark bandwidth calculations
9457 * Returns true if the display fits, false if not.
9458 */
9459static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9460{
9461	if (dce8_average_bandwidth(wm) <=
9462	    (dce8_available_bandwidth(wm) / wm->num_heads))
9463		return true;
9464	else
9465		return false;
9466}
9467
9468/**
9469 * dce8_check_latency_hiding - check latency hiding
9470 *
9471 * @wm: watermark calculation data
9472 *
9473 * Check latency hiding (CIK).
9474 * Used for display watermark bandwidth calculations
9475 * Returns true if the display fits, false if not.
9476 */
9477static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9478{
9479	u32 lb_partitions = wm->lb_size / wm->src_width;
9480	u32 line_time = wm->active_time + wm->blank_time;
9481	u32 latency_tolerant_lines;
9482	u32 latency_hiding;
9483	fixed20_12 a;
9484
9485	a.full = dfixed_const(1);
9486	if (wm->vsc.full > a.full)
9487		latency_tolerant_lines = 1;
9488	else {
9489		if (lb_partitions <= (wm->vtaps + 1))
9490			latency_tolerant_lines = 1;
9491		else
9492			latency_tolerant_lines = 2;
9493	}
9494
9495	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9496
9497	if (dce8_latency_watermark(wm) <= latency_hiding)
9498		return true;
9499	else
9500		return false;
9501}
9502
9503/**
9504 * dce8_program_watermarks - program display watermarks
9505 *
9506 * @rdev: radeon_device pointer
9507 * @radeon_crtc: the selected display controller
9508 * @lb_size: line buffer size
9509 * @num_heads: number of display controllers in use
9510 *
9511 * Calculate and program the display watermarks for the
9512 * selected display controller (CIK).
9513 */
9514static void dce8_program_watermarks(struct radeon_device *rdev,
9515				    struct radeon_crtc *radeon_crtc,
9516				    u32 lb_size, u32 num_heads)
9517{
9518	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9519	struct dce8_wm_params wm_low, wm_high;
9520	u32 pixel_period;
9521	u32 line_time = 0;
9522	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9523	u32 tmp, wm_mask;
9524
9525	if (radeon_crtc->base.enabled && num_heads && mode) {
9526		pixel_period = 1000000 / (u32)mode->clock;
9527		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9528
9529		/* watermark for high clocks */
9530		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9531		    rdev->pm.dpm_enabled) {
9532			wm_high.yclk =
9533				radeon_dpm_get_mclk(rdev, false) * 10;
9534			wm_high.sclk =
9535				radeon_dpm_get_sclk(rdev, false) * 10;
9536		} else {
9537			wm_high.yclk = rdev->pm.current_mclk * 10;
9538			wm_high.sclk = rdev->pm.current_sclk * 10;
9539		}
9540
9541		wm_high.disp_clk = mode->clock;
9542		wm_high.src_width = mode->crtc_hdisplay;
9543		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9544		wm_high.blank_time = line_time - wm_high.active_time;
9545		wm_high.interlaced = false;
9546		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9547			wm_high.interlaced = true;
9548		wm_high.vsc = radeon_crtc->vsc;
9549		wm_high.vtaps = 1;
9550		if (radeon_crtc->rmx_type != RMX_OFF)
9551			wm_high.vtaps = 2;
9552		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9553		wm_high.lb_size = lb_size;
9554		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9555		wm_high.num_heads = num_heads;
9556
9557		/* set for high clocks */
9558		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9559
9560		/* possibly force display priority to high */
9561		/* should really do this at mode validation time... */
9562		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9563		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9564		    !dce8_check_latency_hiding(&wm_high) ||
9565		    (rdev->disp_priority == 2)) {
9566			DRM_DEBUG_KMS("force priority to high\n");
9567		}
9568
9569		/* watermark for low clocks */
9570		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9571		    rdev->pm.dpm_enabled) {
9572			wm_low.yclk =
9573				radeon_dpm_get_mclk(rdev, true) * 10;
9574			wm_low.sclk =
9575				radeon_dpm_get_sclk(rdev, true) * 10;
9576		} else {
9577			wm_low.yclk = rdev->pm.current_mclk * 10;
9578			wm_low.sclk = rdev->pm.current_sclk * 10;
9579		}
9580
9581		wm_low.disp_clk = mode->clock;
9582		wm_low.src_width = mode->crtc_hdisplay;
9583		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9584		wm_low.blank_time = line_time - wm_low.active_time;
9585		wm_low.interlaced = false;
9586		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9587			wm_low.interlaced = true;
9588		wm_low.vsc = radeon_crtc->vsc;
9589		wm_low.vtaps = 1;
9590		if (radeon_crtc->rmx_type != RMX_OFF)
9591			wm_low.vtaps = 2;
9592		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9593		wm_low.lb_size = lb_size;
9594		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9595		wm_low.num_heads = num_heads;
9596
9597		/* set for low clocks */
9598		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9599
9600		/* possibly force display priority to high */
9601		/* should really do this at mode validation time... */
9602		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9603		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9604		    !dce8_check_latency_hiding(&wm_low) ||
9605		    (rdev->disp_priority == 2)) {
9606			DRM_DEBUG_KMS("force priority to high\n");
9607		}
9608	}
9609
9610	/* select wm A */
9611	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9612	tmp = wm_mask;
9613	tmp &= ~LATENCY_WATERMARK_MASK(3);
9614	tmp |= LATENCY_WATERMARK_MASK(1);
9615	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9616	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9617	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9618		LATENCY_HIGH_WATERMARK(line_time)));
9619	/* select wm B */
9620	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9621	tmp &= ~LATENCY_WATERMARK_MASK(3);
9622	tmp |= LATENCY_WATERMARK_MASK(2);
9623	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9624	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9625	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9626		LATENCY_HIGH_WATERMARK(line_time)));
9627	/* restore original selection */
9628	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9629
9630	/* save values for DPM */
9631	radeon_crtc->line_time = line_time;
9632	radeon_crtc->wm_high = latency_watermark_a;
9633	radeon_crtc->wm_low = latency_watermark_b;
9634}
9635
9636/**
9637 * dce8_bandwidth_update - program display watermarks
9638 *
9639 * @rdev: radeon_device pointer
9640 *
9641 * Calculate and program the display watermarks and line
9642 * buffer allocation (CIK).
9643 */
9644void dce8_bandwidth_update(struct radeon_device *rdev)
9645{
9646	struct drm_display_mode *mode = NULL;
9647	u32 num_heads = 0, lb_size;
9648	int i;
9649
9650	if (!rdev->mode_info.mode_config_initialized)
9651		return;
9652
9653	radeon_update_display_priority(rdev);
9654
9655	for (i = 0; i < rdev->num_crtc; i++) {
9656		if (rdev->mode_info.crtcs[i]->base.enabled)
9657			num_heads++;
9658	}
9659	for (i = 0; i < rdev->num_crtc; i++) {
9660		mode = &rdev->mode_info.crtcs[i]->base.mode;
9661		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9662		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9663	}
9664}
9665
9666/**
9667 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9668 *
9669 * @rdev: radeon_device pointer
9670 *
9671 * Fetches a GPU clock counter snapshot (SI).
9672 * Returns the 64 bit clock counter snapshot.
9673 */
9674uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9675{
9676	uint64_t clock;
9677
9678	mutex_lock(&rdev->gpu_clock_mutex);
9679	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9680	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9681	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9682	mutex_unlock(&rdev->gpu_clock_mutex);
9683	return clock;
9684}
9685
9686static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9687                              u32 cntl_reg, u32 status_reg)
9688{
9689	int r, i;
9690	struct atom_clock_dividers dividers;
9691	uint32_t tmp;
9692
9693	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9694					   clock, false, &dividers);
9695	if (r)
9696		return r;
9697
9698	tmp = RREG32_SMC(cntl_reg);
9699	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9700	tmp |= dividers.post_divider;
9701	WREG32_SMC(cntl_reg, tmp);
9702
9703	for (i = 0; i < 100; i++) {
9704		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9705			break;
9706		mdelay(10);
9707	}
9708	if (i == 100)
9709		return -ETIMEDOUT;
9710
9711	return 0;
9712}
9713
9714int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9715{
9716	int r = 0;
9717
9718	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9719	if (r)
9720		return r;
9721
9722	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9723	return r;
9724}
9725
9726int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9727{
9728	int r, i;
9729	struct atom_clock_dividers dividers;
9730	u32 tmp;
9731
9732	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9733					   ecclk, false, &dividers);
9734	if (r)
9735		return r;
9736
9737	for (i = 0; i < 100; i++) {
9738		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9739			break;
9740		mdelay(10);
9741	}
9742	if (i == 100)
9743		return -ETIMEDOUT;
9744
9745	tmp = RREG32_SMC(CG_ECLK_CNTL);
9746	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9747	tmp |= dividers.post_divider;
9748	WREG32_SMC(CG_ECLK_CNTL, tmp);
9749
9750	for (i = 0; i < 100; i++) {
9751		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9752			break;
9753		mdelay(10);
9754	}
9755	if (i == 100)
9756		return -ETIMEDOUT;
9757
9758	return 0;
9759}
9760
9761static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9762{
9763	struct pci_dev *root = rdev->pdev->bus->self;
9764	int bridge_pos, gpu_pos;
9765	u32 speed_cntl, mask, current_data_rate;
9766	int ret, i;
9767	u16 tmp16;
9768
9769	if (pci_is_root_bus(rdev->pdev->bus))
9770		return;
9771
9772	if (radeon_pcie_gen2 == 0)
9773		return;
9774
9775	if (rdev->flags & RADEON_IS_IGP)
9776		return;
9777
9778	if (!(rdev->flags & RADEON_IS_PCIE))
9779		return;
9780
9781	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9782	if (ret != 0)
9783		return;
9784
9785	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9786		return;
9787
9788	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9789	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9790		LC_CURRENT_DATA_RATE_SHIFT;
9791	if (mask & DRM_PCIE_SPEED_80) {
9792		if (current_data_rate == 2) {
9793			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9794			return;
9795		}
9796		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9797	} else if (mask & DRM_PCIE_SPEED_50) {
9798		if (current_data_rate == 1) {
9799			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9800			return;
9801		}
9802		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9803	}
9804
9805	bridge_pos = pci_pcie_cap(root);
9806	if (!bridge_pos)
9807		return;
9808
9809	gpu_pos = pci_pcie_cap(rdev->pdev);
9810	if (!gpu_pos)
9811		return;
9812
9813	if (mask & DRM_PCIE_SPEED_80) {
9814		/* re-try equalization if gen3 is not already enabled */
9815		if (current_data_rate != 2) {
9816			u16 bridge_cfg, gpu_cfg;
9817			u16 bridge_cfg2, gpu_cfg2;
9818			u32 max_lw, current_lw, tmp;
9819
9820			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9821			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9822
9823			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9824			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9825
9826			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9827			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9828
9829			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9830			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9831			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9832
9833			if (current_lw < max_lw) {
9834				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9835				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9836					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9837					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9838					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9839					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9840				}
9841			}
9842
9843			for (i = 0; i < 10; i++) {
9844				/* check status */
9845				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9846				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9847					break;
9848
9849				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9850				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9851
9852				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9853				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9854
9855				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9856				tmp |= LC_SET_QUIESCE;
9857				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9858
9859				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9860				tmp |= LC_REDO_EQ;
9861				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9862
9863				mdelay(100);
9864
9865				/* linkctl */
9866				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9867				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9868				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9869				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9870
9871				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9872				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9873				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9874				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9875
9876				/* linkctl2 */
9877				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9878				tmp16 &= ~((1 << 4) | (7 << 9));
9879				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9880				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9881
9882				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9883				tmp16 &= ~((1 << 4) | (7 << 9));
9884				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9885				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9886
9887				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9888				tmp &= ~LC_SET_QUIESCE;
9889				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9890			}
9891		}
9892	}
9893
9894	/* set the link speed */
9895	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9896	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9897	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9898
9899	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9900	tmp16 &= ~0xf;
9901	if (mask & DRM_PCIE_SPEED_80)
9902		tmp16 |= 3; /* gen3 */
9903	else if (mask & DRM_PCIE_SPEED_50)
9904		tmp16 |= 2; /* gen2 */
9905	else
9906		tmp16 |= 1; /* gen1 */
9907	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9908
9909	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9910	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9911	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9912
9913	for (i = 0; i < rdev->usec_timeout; i++) {
9914		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9915		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9916			break;
9917		udelay(1);
9918	}
9919}
9920
9921static void cik_program_aspm(struct radeon_device *rdev)
9922{
9923	u32 data, orig;
9924	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9925	bool disable_clkreq = false;
9926
9927	if (radeon_aspm == 0)
9928		return;
9929
9930	/* XXX double check IGPs */
9931	if (rdev->flags & RADEON_IS_IGP)
9932		return;
9933
9934	if (!(rdev->flags & RADEON_IS_PCIE))
9935		return;
9936
9937	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9938	data &= ~LC_XMIT_N_FTS_MASK;
9939	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9940	if (orig != data)
9941		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9942
9943	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9944	data |= LC_GO_TO_RECOVERY;
9945	if (orig != data)
9946		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9947
9948	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9949	data |= P_IGNORE_EDB_ERR;
9950	if (orig != data)
9951		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9952
9953	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9954	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9955	data |= LC_PMI_TO_L1_DIS;
9956	if (!disable_l0s)
9957		data |= LC_L0S_INACTIVITY(7);
9958
9959	if (!disable_l1) {
9960		data |= LC_L1_INACTIVITY(7);
9961		data &= ~LC_PMI_TO_L1_DIS;
9962		if (orig != data)
9963			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9964
9965		if (!disable_plloff_in_l1) {
9966			bool clk_req_support;
9967
9968			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9969			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9970			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9971			if (orig != data)
9972				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9973
9974			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9975			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9976			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9977			if (orig != data)
9978				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9979
9980			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9981			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9982			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9983			if (orig != data)
9984				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9985
9986			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9987			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9988			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9989			if (orig != data)
9990				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9991
9992			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9993			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9994			data |= LC_DYN_LANES_PWR_STATE(3);
9995			if (orig != data)
9996				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9997
9998			if (!disable_clkreq &&
9999			    !pci_is_root_bus(rdev->pdev->bus)) {
10000				struct pci_dev *root = rdev->pdev->bus->self;
10001				u32 lnkcap;
10002
10003				clk_req_support = false;
10004				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
10005				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
10006					clk_req_support = true;
10007			} else {
10008				clk_req_support = false;
10009			}
10010
10011			if (clk_req_support) {
10012				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
10013				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
10014				if (orig != data)
10015					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
10016
10017				orig = data = RREG32_SMC(THM_CLK_CNTL);
10018				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
10019				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
10020				if (orig != data)
10021					WREG32_SMC(THM_CLK_CNTL, data);
10022
10023				orig = data = RREG32_SMC(MISC_CLK_CTRL);
10024				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
10025				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
10026				if (orig != data)
10027					WREG32_SMC(MISC_CLK_CTRL, data);
10028
10029				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
10030				data &= ~BCLK_AS_XCLK;
10031				if (orig != data)
10032					WREG32_SMC(CG_CLKPIN_CNTL, data);
10033
10034				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
10035				data &= ~FORCE_BIF_REFCLK_EN;
10036				if (orig != data)
10037					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
10038
10039				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
10040				data &= ~MPLL_CLKOUT_SEL_MASK;
10041				data |= MPLL_CLKOUT_SEL(4);
10042				if (orig != data)
10043					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
10044			}
10045		}
10046	} else {
10047		if (orig != data)
10048			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10049	}
10050
10051	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
10052	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
10053	if (orig != data)
10054		WREG32_PCIE_PORT(PCIE_CNTL2, data);
10055
10056	if (!disable_l0s) {
10057		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
10058		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
10059			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
10060			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
10061				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
10062				data &= ~LC_L0S_INACTIVITY_MASK;
10063				if (orig != data)
10064					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10065			}
10066		}
10067	}
10068}
10069