1/*
2 * Copyright 2011 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
25#include <linux/slab.h>
26#include <linux/module.h>
27#include <drm/drmP.h>
28#include "radeon.h"
29#include "radeon_asic.h"
30#include "radeon_audio.h"
31#include <drm/radeon_drm.h>
32#include "sid.h"
33#include "atom.h"
34#include "si_blit_shaders.h"
35#include "clearstate_si.h"
36#include "radeon_ucode.h"
37
38
39MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
40MODULE_FIRMWARE("radeon/TAHITI_me.bin");
41MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
42MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
43MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
44MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
45MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
46
47MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
48MODULE_FIRMWARE("radeon/tahiti_me.bin");
49MODULE_FIRMWARE("radeon/tahiti_ce.bin");
50MODULE_FIRMWARE("radeon/tahiti_mc.bin");
51MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
52MODULE_FIRMWARE("radeon/tahiti_smc.bin");
53
54MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
55MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
56MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
57MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
58MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
59MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
60MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
61
62MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
63MODULE_FIRMWARE("radeon/pitcairn_me.bin");
64MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
65MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
66MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
67MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
68
69MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
70MODULE_FIRMWARE("radeon/VERDE_me.bin");
71MODULE_FIRMWARE("radeon/VERDE_ce.bin");
72MODULE_FIRMWARE("radeon/VERDE_mc.bin");
73MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
74MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
75MODULE_FIRMWARE("radeon/VERDE_smc.bin");
76
77MODULE_FIRMWARE("radeon/verde_pfp.bin");
78MODULE_FIRMWARE("radeon/verde_me.bin");
79MODULE_FIRMWARE("radeon/verde_ce.bin");
80MODULE_FIRMWARE("radeon/verde_mc.bin");
81MODULE_FIRMWARE("radeon/verde_rlc.bin");
82MODULE_FIRMWARE("radeon/verde_smc.bin");
83
84MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
85MODULE_FIRMWARE("radeon/OLAND_me.bin");
86MODULE_FIRMWARE("radeon/OLAND_ce.bin");
87MODULE_FIRMWARE("radeon/OLAND_mc.bin");
88MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
89MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
90MODULE_FIRMWARE("radeon/OLAND_smc.bin");
91
92MODULE_FIRMWARE("radeon/oland_pfp.bin");
93MODULE_FIRMWARE("radeon/oland_me.bin");
94MODULE_FIRMWARE("radeon/oland_ce.bin");
95MODULE_FIRMWARE("radeon/oland_mc.bin");
96MODULE_FIRMWARE("radeon/oland_rlc.bin");
97MODULE_FIRMWARE("radeon/oland_smc.bin");
98
99MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
100MODULE_FIRMWARE("radeon/HAINAN_me.bin");
101MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
102MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
103MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
104MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
105MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
106
107MODULE_FIRMWARE("radeon/hainan_pfp.bin");
108MODULE_FIRMWARE("radeon/hainan_me.bin");
109MODULE_FIRMWARE("radeon/hainan_ce.bin");
110MODULE_FIRMWARE("radeon/hainan_mc.bin");
111MODULE_FIRMWARE("radeon/hainan_rlc.bin");
112MODULE_FIRMWARE("radeon/hainan_smc.bin");
113
114static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
115static void si_pcie_gen3_enable(struct radeon_device *rdev);
116static void si_program_aspm(struct radeon_device *rdev);
117extern void sumo_rlc_fini(struct radeon_device *rdev);
118extern int sumo_rlc_init(struct radeon_device *rdev);
119extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120extern void r600_ih_ring_fini(struct radeon_device *rdev);
121extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
122extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
123extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
124extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
125extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
126extern bool evergreen_is_display_hung(struct radeon_device *rdev);
127static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
128					 bool enable);
129static void si_init_pg(struct radeon_device *rdev);
130static void si_init_cg(struct radeon_device *rdev);
131static void si_fini_pg(struct radeon_device *rdev);
132static void si_fini_cg(struct radeon_device *rdev);
133static void si_rlc_stop(struct radeon_device *rdev);
134
135static const u32 verde_rlc_save_restore_register_list[] =
136{
137	(0x8000 << 16) | (0x98f4 >> 2),
138	0x00000000,
139	(0x8040 << 16) | (0x98f4 >> 2),
140	0x00000000,
141	(0x8000 << 16) | (0xe80 >> 2),
142	0x00000000,
143	(0x8040 << 16) | (0xe80 >> 2),
144	0x00000000,
145	(0x8000 << 16) | (0x89bc >> 2),
146	0x00000000,
147	(0x8040 << 16) | (0x89bc >> 2),
148	0x00000000,
149	(0x8000 << 16) | (0x8c1c >> 2),
150	0x00000000,
151	(0x8040 << 16) | (0x8c1c >> 2),
152	0x00000000,
153	(0x9c00 << 16) | (0x98f0 >> 2),
154	0x00000000,
155	(0x9c00 << 16) | (0xe7c >> 2),
156	0x00000000,
157	(0x8000 << 16) | (0x9148 >> 2),
158	0x00000000,
159	(0x8040 << 16) | (0x9148 >> 2),
160	0x00000000,
161	(0x9c00 << 16) | (0x9150 >> 2),
162	0x00000000,
163	(0x9c00 << 16) | (0x897c >> 2),
164	0x00000000,
165	(0x9c00 << 16) | (0x8d8c >> 2),
166	0x00000000,
167	(0x9c00 << 16) | (0xac54 >> 2),
168	0X00000000,
169	0x3,
170	(0x9c00 << 16) | (0x98f8 >> 2),
171	0x00000000,
172	(0x9c00 << 16) | (0x9910 >> 2),
173	0x00000000,
174	(0x9c00 << 16) | (0x9914 >> 2),
175	0x00000000,
176	(0x9c00 << 16) | (0x9918 >> 2),
177	0x00000000,
178	(0x9c00 << 16) | (0x991c >> 2),
179	0x00000000,
180	(0x9c00 << 16) | (0x9920 >> 2),
181	0x00000000,
182	(0x9c00 << 16) | (0x9924 >> 2),
183	0x00000000,
184	(0x9c00 << 16) | (0x9928 >> 2),
185	0x00000000,
186	(0x9c00 << 16) | (0x992c >> 2),
187	0x00000000,
188	(0x9c00 << 16) | (0x9930 >> 2),
189	0x00000000,
190	(0x9c00 << 16) | (0x9934 >> 2),
191	0x00000000,
192	(0x9c00 << 16) | (0x9938 >> 2),
193	0x00000000,
194	(0x9c00 << 16) | (0x993c >> 2),
195	0x00000000,
196	(0x9c00 << 16) | (0x9940 >> 2),
197	0x00000000,
198	(0x9c00 << 16) | (0x9944 >> 2),
199	0x00000000,
200	(0x9c00 << 16) | (0x9948 >> 2),
201	0x00000000,
202	(0x9c00 << 16) | (0x994c >> 2),
203	0x00000000,
204	(0x9c00 << 16) | (0x9950 >> 2),
205	0x00000000,
206	(0x9c00 << 16) | (0x9954 >> 2),
207	0x00000000,
208	(0x9c00 << 16) | (0x9958 >> 2),
209	0x00000000,
210	(0x9c00 << 16) | (0x995c >> 2),
211	0x00000000,
212	(0x9c00 << 16) | (0x9960 >> 2),
213	0x00000000,
214	(0x9c00 << 16) | (0x9964 >> 2),
215	0x00000000,
216	(0x9c00 << 16) | (0x9968 >> 2),
217	0x00000000,
218	(0x9c00 << 16) | (0x996c >> 2),
219	0x00000000,
220	(0x9c00 << 16) | (0x9970 >> 2),
221	0x00000000,
222	(0x9c00 << 16) | (0x9974 >> 2),
223	0x00000000,
224	(0x9c00 << 16) | (0x9978 >> 2),
225	0x00000000,
226	(0x9c00 << 16) | (0x997c >> 2),
227	0x00000000,
228	(0x9c00 << 16) | (0x9980 >> 2),
229	0x00000000,
230	(0x9c00 << 16) | (0x9984 >> 2),
231	0x00000000,
232	(0x9c00 << 16) | (0x9988 >> 2),
233	0x00000000,
234	(0x9c00 << 16) | (0x998c >> 2),
235	0x00000000,
236	(0x9c00 << 16) | (0x8c00 >> 2),
237	0x00000000,
238	(0x9c00 << 16) | (0x8c14 >> 2),
239	0x00000000,
240	(0x9c00 << 16) | (0x8c04 >> 2),
241	0x00000000,
242	(0x9c00 << 16) | (0x8c08 >> 2),
243	0x00000000,
244	(0x8000 << 16) | (0x9b7c >> 2),
245	0x00000000,
246	(0x8040 << 16) | (0x9b7c >> 2),
247	0x00000000,
248	(0x8000 << 16) | (0xe84 >> 2),
249	0x00000000,
250	(0x8040 << 16) | (0xe84 >> 2),
251	0x00000000,
252	(0x8000 << 16) | (0x89c0 >> 2),
253	0x00000000,
254	(0x8040 << 16) | (0x89c0 >> 2),
255	0x00000000,
256	(0x8000 << 16) | (0x914c >> 2),
257	0x00000000,
258	(0x8040 << 16) | (0x914c >> 2),
259	0x00000000,
260	(0x8000 << 16) | (0x8c20 >> 2),
261	0x00000000,
262	(0x8040 << 16) | (0x8c20 >> 2),
263	0x00000000,
264	(0x8000 << 16) | (0x9354 >> 2),
265	0x00000000,
266	(0x8040 << 16) | (0x9354 >> 2),
267	0x00000000,
268	(0x9c00 << 16) | (0x9060 >> 2),
269	0x00000000,
270	(0x9c00 << 16) | (0x9364 >> 2),
271	0x00000000,
272	(0x9c00 << 16) | (0x9100 >> 2),
273	0x00000000,
274	(0x9c00 << 16) | (0x913c >> 2),
275	0x00000000,
276	(0x8000 << 16) | (0x90e0 >> 2),
277	0x00000000,
278	(0x8000 << 16) | (0x90e4 >> 2),
279	0x00000000,
280	(0x8000 << 16) | (0x90e8 >> 2),
281	0x00000000,
282	(0x8040 << 16) | (0x90e0 >> 2),
283	0x00000000,
284	(0x8040 << 16) | (0x90e4 >> 2),
285	0x00000000,
286	(0x8040 << 16) | (0x90e8 >> 2),
287	0x00000000,
288	(0x9c00 << 16) | (0x8bcc >> 2),
289	0x00000000,
290	(0x9c00 << 16) | (0x8b24 >> 2),
291	0x00000000,
292	(0x9c00 << 16) | (0x88c4 >> 2),
293	0x00000000,
294	(0x9c00 << 16) | (0x8e50 >> 2),
295	0x00000000,
296	(0x9c00 << 16) | (0x8c0c >> 2),
297	0x00000000,
298	(0x9c00 << 16) | (0x8e58 >> 2),
299	0x00000000,
300	(0x9c00 << 16) | (0x8e5c >> 2),
301	0x00000000,
302	(0x9c00 << 16) | (0x9508 >> 2),
303	0x00000000,
304	(0x9c00 << 16) | (0x950c >> 2),
305	0x00000000,
306	(0x9c00 << 16) | (0x9494 >> 2),
307	0x00000000,
308	(0x9c00 << 16) | (0xac0c >> 2),
309	0x00000000,
310	(0x9c00 << 16) | (0xac10 >> 2),
311	0x00000000,
312	(0x9c00 << 16) | (0xac14 >> 2),
313	0x00000000,
314	(0x9c00 << 16) | (0xae00 >> 2),
315	0x00000000,
316	(0x9c00 << 16) | (0xac08 >> 2),
317	0x00000000,
318	(0x9c00 << 16) | (0x88d4 >> 2),
319	0x00000000,
320	(0x9c00 << 16) | (0x88c8 >> 2),
321	0x00000000,
322	(0x9c00 << 16) | (0x88cc >> 2),
323	0x00000000,
324	(0x9c00 << 16) | (0x89b0 >> 2),
325	0x00000000,
326	(0x9c00 << 16) | (0x8b10 >> 2),
327	0x00000000,
328	(0x9c00 << 16) | (0x8a14 >> 2),
329	0x00000000,
330	(0x9c00 << 16) | (0x9830 >> 2),
331	0x00000000,
332	(0x9c00 << 16) | (0x9834 >> 2),
333	0x00000000,
334	(0x9c00 << 16) | (0x9838 >> 2),
335	0x00000000,
336	(0x9c00 << 16) | (0x9a10 >> 2),
337	0x00000000,
338	(0x8000 << 16) | (0x9870 >> 2),
339	0x00000000,
340	(0x8000 << 16) | (0x9874 >> 2),
341	0x00000000,
342	(0x8001 << 16) | (0x9870 >> 2),
343	0x00000000,
344	(0x8001 << 16) | (0x9874 >> 2),
345	0x00000000,
346	(0x8040 << 16) | (0x9870 >> 2),
347	0x00000000,
348	(0x8040 << 16) | (0x9874 >> 2),
349	0x00000000,
350	(0x8041 << 16) | (0x9870 >> 2),
351	0x00000000,
352	(0x8041 << 16) | (0x9874 >> 2),
353	0x00000000,
354	0x00000000
355};
356
357static const u32 tahiti_golden_rlc_registers[] =
358{
359	0xc424, 0xffffffff, 0x00601005,
360	0xc47c, 0xffffffff, 0x10104040,
361	0xc488, 0xffffffff, 0x0100000a,
362	0xc314, 0xffffffff, 0x00000800,
363	0xc30c, 0xffffffff, 0x800000f4,
364	0xf4a8, 0xffffffff, 0x00000000
365};
366
367static const u32 tahiti_golden_registers[] =
368{
369	0x9a10, 0x00010000, 0x00018208,
370	0x9830, 0xffffffff, 0x00000000,
371	0x9834, 0xf00fffff, 0x00000400,
372	0x9838, 0x0002021c, 0x00020200,
373	0xc78, 0x00000080, 0x00000000,
374	0xd030, 0x000300c0, 0x00800040,
375	0xd830, 0x000300c0, 0x00800040,
376	0x5bb0, 0x000000f0, 0x00000070,
377	0x5bc0, 0x00200000, 0x50100000,
378	0x7030, 0x31000311, 0x00000011,
379	0x277c, 0x00000003, 0x000007ff,
380	0x240c, 0x000007ff, 0x00000000,
381	0x8a14, 0xf000001f, 0x00000007,
382	0x8b24, 0xffffffff, 0x00ffffff,
383	0x8b10, 0x0000ff0f, 0x00000000,
384	0x28a4c, 0x07ffffff, 0x4e000000,
385	0x28350, 0x3f3f3fff, 0x2a00126a,
386	0x30, 0x000000ff, 0x0040,
387	0x34, 0x00000040, 0x00004040,
388	0x9100, 0x07ffffff, 0x03000000,
389	0x8e88, 0x01ff1f3f, 0x00000000,
390	0x8e84, 0x01ff1f3f, 0x00000000,
391	0x9060, 0x0000007f, 0x00000020,
392	0x9508, 0x00010000, 0x00010000,
393	0xac14, 0x00000200, 0x000002fb,
394	0xac10, 0xffffffff, 0x0000543b,
395	0xac0c, 0xffffffff, 0xa9210876,
396	0x88d0, 0xffffffff, 0x000fff40,
397	0x88d4, 0x0000001f, 0x00000010,
398	0x1410, 0x20000000, 0x20fffed8,
399	0x15c0, 0x000c0fc0, 0x000c0400
400};
401
402static const u32 tahiti_golden_registers2[] =
403{
404	0xc64, 0x00000001, 0x00000001
405};
406
407static const u32 pitcairn_golden_rlc_registers[] =
408{
409	0xc424, 0xffffffff, 0x00601004,
410	0xc47c, 0xffffffff, 0x10102020,
411	0xc488, 0xffffffff, 0x01000020,
412	0xc314, 0xffffffff, 0x00000800,
413	0xc30c, 0xffffffff, 0x800000a4
414};
415
416static const u32 pitcairn_golden_registers[] =
417{
418	0x9a10, 0x00010000, 0x00018208,
419	0x9830, 0xffffffff, 0x00000000,
420	0x9834, 0xf00fffff, 0x00000400,
421	0x9838, 0x0002021c, 0x00020200,
422	0xc78, 0x00000080, 0x00000000,
423	0xd030, 0x000300c0, 0x00800040,
424	0xd830, 0x000300c0, 0x00800040,
425	0x5bb0, 0x000000f0, 0x00000070,
426	0x5bc0, 0x00200000, 0x50100000,
427	0x7030, 0x31000311, 0x00000011,
428	0x2ae4, 0x00073ffe, 0x000022a2,
429	0x240c, 0x000007ff, 0x00000000,
430	0x8a14, 0xf000001f, 0x00000007,
431	0x8b24, 0xffffffff, 0x00ffffff,
432	0x8b10, 0x0000ff0f, 0x00000000,
433	0x28a4c, 0x07ffffff, 0x4e000000,
434	0x28350, 0x3f3f3fff, 0x2a00126a,
435	0x30, 0x000000ff, 0x0040,
436	0x34, 0x00000040, 0x00004040,
437	0x9100, 0x07ffffff, 0x03000000,
438	0x9060, 0x0000007f, 0x00000020,
439	0x9508, 0x00010000, 0x00010000,
440	0xac14, 0x000003ff, 0x000000f7,
441	0xac10, 0xffffffff, 0x00000000,
442	0xac0c, 0xffffffff, 0x32761054,
443	0x88d4, 0x0000001f, 0x00000010,
444	0x15c0, 0x000c0fc0, 0x000c0400
445};
446
447static const u32 verde_golden_rlc_registers[] =
448{
449	0xc424, 0xffffffff, 0x033f1005,
450	0xc47c, 0xffffffff, 0x10808020,
451	0xc488, 0xffffffff, 0x00800008,
452	0xc314, 0xffffffff, 0x00001000,
453	0xc30c, 0xffffffff, 0x80010014
454};
455
456static const u32 verde_golden_registers[] =
457{
458	0x9a10, 0x00010000, 0x00018208,
459	0x9830, 0xffffffff, 0x00000000,
460	0x9834, 0xf00fffff, 0x00000400,
461	0x9838, 0x0002021c, 0x00020200,
462	0xc78, 0x00000080, 0x00000000,
463	0xd030, 0x000300c0, 0x00800040,
464	0xd030, 0x000300c0, 0x00800040,
465	0xd830, 0x000300c0, 0x00800040,
466	0xd830, 0x000300c0, 0x00800040,
467	0x5bb0, 0x000000f0, 0x00000070,
468	0x5bc0, 0x00200000, 0x50100000,
469	0x7030, 0x31000311, 0x00000011,
470	0x2ae4, 0x00073ffe, 0x000022a2,
471	0x2ae4, 0x00073ffe, 0x000022a2,
472	0x2ae4, 0x00073ffe, 0x000022a2,
473	0x240c, 0x000007ff, 0x00000000,
474	0x240c, 0x000007ff, 0x00000000,
475	0x240c, 0x000007ff, 0x00000000,
476	0x8a14, 0xf000001f, 0x00000007,
477	0x8a14, 0xf000001f, 0x00000007,
478	0x8a14, 0xf000001f, 0x00000007,
479	0x8b24, 0xffffffff, 0x00ffffff,
480	0x8b10, 0x0000ff0f, 0x00000000,
481	0x28a4c, 0x07ffffff, 0x4e000000,
482	0x28350, 0x3f3f3fff, 0x0000124a,
483	0x28350, 0x3f3f3fff, 0x0000124a,
484	0x28350, 0x3f3f3fff, 0x0000124a,
485	0x30, 0x000000ff, 0x0040,
486	0x34, 0x00000040, 0x00004040,
487	0x9100, 0x07ffffff, 0x03000000,
488	0x9100, 0x07ffffff, 0x03000000,
489	0x8e88, 0x01ff1f3f, 0x00000000,
490	0x8e88, 0x01ff1f3f, 0x00000000,
491	0x8e88, 0x01ff1f3f, 0x00000000,
492	0x8e84, 0x01ff1f3f, 0x00000000,
493	0x8e84, 0x01ff1f3f, 0x00000000,
494	0x8e84, 0x01ff1f3f, 0x00000000,
495	0x9060, 0x0000007f, 0x00000020,
496	0x9508, 0x00010000, 0x00010000,
497	0xac14, 0x000003ff, 0x00000003,
498	0xac14, 0x000003ff, 0x00000003,
499	0xac14, 0x000003ff, 0x00000003,
500	0xac10, 0xffffffff, 0x00000000,
501	0xac10, 0xffffffff, 0x00000000,
502	0xac10, 0xffffffff, 0x00000000,
503	0xac0c, 0xffffffff, 0x00001032,
504	0xac0c, 0xffffffff, 0x00001032,
505	0xac0c, 0xffffffff, 0x00001032,
506	0x88d4, 0x0000001f, 0x00000010,
507	0x88d4, 0x0000001f, 0x00000010,
508	0x88d4, 0x0000001f, 0x00000010,
509	0x15c0, 0x000c0fc0, 0x000c0400
510};
511
512static const u32 oland_golden_rlc_registers[] =
513{
514	0xc424, 0xffffffff, 0x00601005,
515	0xc47c, 0xffffffff, 0x10104040,
516	0xc488, 0xffffffff, 0x0100000a,
517	0xc314, 0xffffffff, 0x00000800,
518	0xc30c, 0xffffffff, 0x800000f4
519};
520
521static const u32 oland_golden_registers[] =
522{
523	0x9a10, 0x00010000, 0x00018208,
524	0x9830, 0xffffffff, 0x00000000,
525	0x9834, 0xf00fffff, 0x00000400,
526	0x9838, 0x0002021c, 0x00020200,
527	0xc78, 0x00000080, 0x00000000,
528	0xd030, 0x000300c0, 0x00800040,
529	0xd830, 0x000300c0, 0x00800040,
530	0x5bb0, 0x000000f0, 0x00000070,
531	0x5bc0, 0x00200000, 0x50100000,
532	0x7030, 0x31000311, 0x00000011,
533	0x2ae4, 0x00073ffe, 0x000022a2,
534	0x240c, 0x000007ff, 0x00000000,
535	0x8a14, 0xf000001f, 0x00000007,
536	0x8b24, 0xffffffff, 0x00ffffff,
537	0x8b10, 0x0000ff0f, 0x00000000,
538	0x28a4c, 0x07ffffff, 0x4e000000,
539	0x28350, 0x3f3f3fff, 0x00000082,
540	0x30, 0x000000ff, 0x0040,
541	0x34, 0x00000040, 0x00004040,
542	0x9100, 0x07ffffff, 0x03000000,
543	0x9060, 0x0000007f, 0x00000020,
544	0x9508, 0x00010000, 0x00010000,
545	0xac14, 0x000003ff, 0x000000f3,
546	0xac10, 0xffffffff, 0x00000000,
547	0xac0c, 0xffffffff, 0x00003210,
548	0x88d4, 0x0000001f, 0x00000010,
549	0x15c0, 0x000c0fc0, 0x000c0400
550};
551
552static const u32 hainan_golden_registers[] =
553{
554	0x9a10, 0x00010000, 0x00018208,
555	0x9830, 0xffffffff, 0x00000000,
556	0x9834, 0xf00fffff, 0x00000400,
557	0x9838, 0x0002021c, 0x00020200,
558	0xd0c0, 0xff000fff, 0x00000100,
559	0xd030, 0x000300c0, 0x00800040,
560	0xd8c0, 0xff000fff, 0x00000100,
561	0xd830, 0x000300c0, 0x00800040,
562	0x2ae4, 0x00073ffe, 0x000022a2,
563	0x240c, 0x000007ff, 0x00000000,
564	0x8a14, 0xf000001f, 0x00000007,
565	0x8b24, 0xffffffff, 0x00ffffff,
566	0x8b10, 0x0000ff0f, 0x00000000,
567	0x28a4c, 0x07ffffff, 0x4e000000,
568	0x28350, 0x3f3f3fff, 0x00000000,
569	0x30, 0x000000ff, 0x0040,
570	0x34, 0x00000040, 0x00004040,
571	0x9100, 0x03e00000, 0x03600000,
572	0x9060, 0x0000007f, 0x00000020,
573	0x9508, 0x00010000, 0x00010000,
574	0xac14, 0x000003ff, 0x000000f1,
575	0xac10, 0xffffffff, 0x00000000,
576	0xac0c, 0xffffffff, 0x00003210,
577	0x88d4, 0x0000001f, 0x00000010,
578	0x15c0, 0x000c0fc0, 0x000c0400
579};
580
581static const u32 hainan_golden_registers2[] =
582{
583	0x98f8, 0xffffffff, 0x02010001
584};
585
586static const u32 tahiti_mgcg_cgcg_init[] =
587{
588	0xc400, 0xffffffff, 0xfffffffc,
589	0x802c, 0xffffffff, 0xe0000000,
590	0x9a60, 0xffffffff, 0x00000100,
591	0x92a4, 0xffffffff, 0x00000100,
592	0xc164, 0xffffffff, 0x00000100,
593	0x9774, 0xffffffff, 0x00000100,
594	0x8984, 0xffffffff, 0x06000100,
595	0x8a18, 0xffffffff, 0x00000100,
596	0x92a0, 0xffffffff, 0x00000100,
597	0xc380, 0xffffffff, 0x00000100,
598	0x8b28, 0xffffffff, 0x00000100,
599	0x9144, 0xffffffff, 0x00000100,
600	0x8d88, 0xffffffff, 0x00000100,
601	0x8d8c, 0xffffffff, 0x00000100,
602	0x9030, 0xffffffff, 0x00000100,
603	0x9034, 0xffffffff, 0x00000100,
604	0x9038, 0xffffffff, 0x00000100,
605	0x903c, 0xffffffff, 0x00000100,
606	0xad80, 0xffffffff, 0x00000100,
607	0xac54, 0xffffffff, 0x00000100,
608	0x897c, 0xffffffff, 0x06000100,
609	0x9868, 0xffffffff, 0x00000100,
610	0x9510, 0xffffffff, 0x00000100,
611	0xaf04, 0xffffffff, 0x00000100,
612	0xae04, 0xffffffff, 0x00000100,
613	0x949c, 0xffffffff, 0x00000100,
614	0x802c, 0xffffffff, 0xe0000000,
615	0x9160, 0xffffffff, 0x00010000,
616	0x9164, 0xffffffff, 0x00030002,
617	0x9168, 0xffffffff, 0x00040007,
618	0x916c, 0xffffffff, 0x00060005,
619	0x9170, 0xffffffff, 0x00090008,
620	0x9174, 0xffffffff, 0x00020001,
621	0x9178, 0xffffffff, 0x00040003,
622	0x917c, 0xffffffff, 0x00000007,
623	0x9180, 0xffffffff, 0x00060005,
624	0x9184, 0xffffffff, 0x00090008,
625	0x9188, 0xffffffff, 0x00030002,
626	0x918c, 0xffffffff, 0x00050004,
627	0x9190, 0xffffffff, 0x00000008,
628	0x9194, 0xffffffff, 0x00070006,
629	0x9198, 0xffffffff, 0x000a0009,
630	0x919c, 0xffffffff, 0x00040003,
631	0x91a0, 0xffffffff, 0x00060005,
632	0x91a4, 0xffffffff, 0x00000009,
633	0x91a8, 0xffffffff, 0x00080007,
634	0x91ac, 0xffffffff, 0x000b000a,
635	0x91b0, 0xffffffff, 0x00050004,
636	0x91b4, 0xffffffff, 0x00070006,
637	0x91b8, 0xffffffff, 0x0008000b,
638	0x91bc, 0xffffffff, 0x000a0009,
639	0x91c0, 0xffffffff, 0x000d000c,
640	0x91c4, 0xffffffff, 0x00060005,
641	0x91c8, 0xffffffff, 0x00080007,
642	0x91cc, 0xffffffff, 0x0000000b,
643	0x91d0, 0xffffffff, 0x000a0009,
644	0x91d4, 0xffffffff, 0x000d000c,
645	0x91d8, 0xffffffff, 0x00070006,
646	0x91dc, 0xffffffff, 0x00090008,
647	0x91e0, 0xffffffff, 0x0000000c,
648	0x91e4, 0xffffffff, 0x000b000a,
649	0x91e8, 0xffffffff, 0x000e000d,
650	0x91ec, 0xffffffff, 0x00080007,
651	0x91f0, 0xffffffff, 0x000a0009,
652	0x91f4, 0xffffffff, 0x0000000d,
653	0x91f8, 0xffffffff, 0x000c000b,
654	0x91fc, 0xffffffff, 0x000f000e,
655	0x9200, 0xffffffff, 0x00090008,
656	0x9204, 0xffffffff, 0x000b000a,
657	0x9208, 0xffffffff, 0x000c000f,
658	0x920c, 0xffffffff, 0x000e000d,
659	0x9210, 0xffffffff, 0x00110010,
660	0x9214, 0xffffffff, 0x000a0009,
661	0x9218, 0xffffffff, 0x000c000b,
662	0x921c, 0xffffffff, 0x0000000f,
663	0x9220, 0xffffffff, 0x000e000d,
664	0x9224, 0xffffffff, 0x00110010,
665	0x9228, 0xffffffff, 0x000b000a,
666	0x922c, 0xffffffff, 0x000d000c,
667	0x9230, 0xffffffff, 0x00000010,
668	0x9234, 0xffffffff, 0x000f000e,
669	0x9238, 0xffffffff, 0x00120011,
670	0x923c, 0xffffffff, 0x000c000b,
671	0x9240, 0xffffffff, 0x000e000d,
672	0x9244, 0xffffffff, 0x00000011,
673	0x9248, 0xffffffff, 0x0010000f,
674	0x924c, 0xffffffff, 0x00130012,
675	0x9250, 0xffffffff, 0x000d000c,
676	0x9254, 0xffffffff, 0x000f000e,
677	0x9258, 0xffffffff, 0x00100013,
678	0x925c, 0xffffffff, 0x00120011,
679	0x9260, 0xffffffff, 0x00150014,
680	0x9264, 0xffffffff, 0x000e000d,
681	0x9268, 0xffffffff, 0x0010000f,
682	0x926c, 0xffffffff, 0x00000013,
683	0x9270, 0xffffffff, 0x00120011,
684	0x9274, 0xffffffff, 0x00150014,
685	0x9278, 0xffffffff, 0x000f000e,
686	0x927c, 0xffffffff, 0x00110010,
687	0x9280, 0xffffffff, 0x00000014,
688	0x9284, 0xffffffff, 0x00130012,
689	0x9288, 0xffffffff, 0x00160015,
690	0x928c, 0xffffffff, 0x0010000f,
691	0x9290, 0xffffffff, 0x00120011,
692	0x9294, 0xffffffff, 0x00000015,
693	0x9298, 0xffffffff, 0x00140013,
694	0x929c, 0xffffffff, 0x00170016,
695	0x9150, 0xffffffff, 0x96940200,
696	0x8708, 0xffffffff, 0x00900100,
697	0xc478, 0xffffffff, 0x00000080,
698	0xc404, 0xffffffff, 0x0020003f,
699	0x30, 0xffffffff, 0x0000001c,
700	0x34, 0x000f0000, 0x000f0000,
701	0x160c, 0xffffffff, 0x00000100,
702	0x1024, 0xffffffff, 0x00000100,
703	0x102c, 0x00000101, 0x00000000,
704	0x20a8, 0xffffffff, 0x00000104,
705	0x264c, 0x000c0000, 0x000c0000,
706	0x2648, 0x000c0000, 0x000c0000,
707	0x55e4, 0xff000fff, 0x00000100,
708	0x55e8, 0x00000001, 0x00000001,
709	0x2f50, 0x00000001, 0x00000001,
710	0x30cc, 0xc0000fff, 0x00000104,
711	0xc1e4, 0x00000001, 0x00000001,
712	0xd0c0, 0xfffffff0, 0x00000100,
713	0xd8c0, 0xfffffff0, 0x00000100
714};
715
716static const u32 pitcairn_mgcg_cgcg_init[] =
717{
718	0xc400, 0xffffffff, 0xfffffffc,
719	0x802c, 0xffffffff, 0xe0000000,
720	0x9a60, 0xffffffff, 0x00000100,
721	0x92a4, 0xffffffff, 0x00000100,
722	0xc164, 0xffffffff, 0x00000100,
723	0x9774, 0xffffffff, 0x00000100,
724	0x8984, 0xffffffff, 0x06000100,
725	0x8a18, 0xffffffff, 0x00000100,
726	0x92a0, 0xffffffff, 0x00000100,
727	0xc380, 0xffffffff, 0x00000100,
728	0x8b28, 0xffffffff, 0x00000100,
729	0x9144, 0xffffffff, 0x00000100,
730	0x8d88, 0xffffffff, 0x00000100,
731	0x8d8c, 0xffffffff, 0x00000100,
732	0x9030, 0xffffffff, 0x00000100,
733	0x9034, 0xffffffff, 0x00000100,
734	0x9038, 0xffffffff, 0x00000100,
735	0x903c, 0xffffffff, 0x00000100,
736	0xad80, 0xffffffff, 0x00000100,
737	0xac54, 0xffffffff, 0x00000100,
738	0x897c, 0xffffffff, 0x06000100,
739	0x9868, 0xffffffff, 0x00000100,
740	0x9510, 0xffffffff, 0x00000100,
741	0xaf04, 0xffffffff, 0x00000100,
742	0xae04, 0xffffffff, 0x00000100,
743	0x949c, 0xffffffff, 0x00000100,
744	0x802c, 0xffffffff, 0xe0000000,
745	0x9160, 0xffffffff, 0x00010000,
746	0x9164, 0xffffffff, 0x00030002,
747	0x9168, 0xffffffff, 0x00040007,
748	0x916c, 0xffffffff, 0x00060005,
749	0x9170, 0xffffffff, 0x00090008,
750	0x9174, 0xffffffff, 0x00020001,
751	0x9178, 0xffffffff, 0x00040003,
752	0x917c, 0xffffffff, 0x00000007,
753	0x9180, 0xffffffff, 0x00060005,
754	0x9184, 0xffffffff, 0x00090008,
755	0x9188, 0xffffffff, 0x00030002,
756	0x918c, 0xffffffff, 0x00050004,
757	0x9190, 0xffffffff, 0x00000008,
758	0x9194, 0xffffffff, 0x00070006,
759	0x9198, 0xffffffff, 0x000a0009,
760	0x919c, 0xffffffff, 0x00040003,
761	0x91a0, 0xffffffff, 0x00060005,
762	0x91a4, 0xffffffff, 0x00000009,
763	0x91a8, 0xffffffff, 0x00080007,
764	0x91ac, 0xffffffff, 0x000b000a,
765	0x91b0, 0xffffffff, 0x00050004,
766	0x91b4, 0xffffffff, 0x00070006,
767	0x91b8, 0xffffffff, 0x0008000b,
768	0x91bc, 0xffffffff, 0x000a0009,
769	0x91c0, 0xffffffff, 0x000d000c,
770	0x9200, 0xffffffff, 0x00090008,
771	0x9204, 0xffffffff, 0x000b000a,
772	0x9208, 0xffffffff, 0x000c000f,
773	0x920c, 0xffffffff, 0x000e000d,
774	0x9210, 0xffffffff, 0x00110010,
775	0x9214, 0xffffffff, 0x000a0009,
776	0x9218, 0xffffffff, 0x000c000b,
777	0x921c, 0xffffffff, 0x0000000f,
778	0x9220, 0xffffffff, 0x000e000d,
779	0x9224, 0xffffffff, 0x00110010,
780	0x9228, 0xffffffff, 0x000b000a,
781	0x922c, 0xffffffff, 0x000d000c,
782	0x9230, 0xffffffff, 0x00000010,
783	0x9234, 0xffffffff, 0x000f000e,
784	0x9238, 0xffffffff, 0x00120011,
785	0x923c, 0xffffffff, 0x000c000b,
786	0x9240, 0xffffffff, 0x000e000d,
787	0x9244, 0xffffffff, 0x00000011,
788	0x9248, 0xffffffff, 0x0010000f,
789	0x924c, 0xffffffff, 0x00130012,
790	0x9250, 0xffffffff, 0x000d000c,
791	0x9254, 0xffffffff, 0x000f000e,
792	0x9258, 0xffffffff, 0x00100013,
793	0x925c, 0xffffffff, 0x00120011,
794	0x9260, 0xffffffff, 0x00150014,
795	0x9150, 0xffffffff, 0x96940200,
796	0x8708, 0xffffffff, 0x00900100,
797	0xc478, 0xffffffff, 0x00000080,
798	0xc404, 0xffffffff, 0x0020003f,
799	0x30, 0xffffffff, 0x0000001c,
800	0x34, 0x000f0000, 0x000f0000,
801	0x160c, 0xffffffff, 0x00000100,
802	0x1024, 0xffffffff, 0x00000100,
803	0x102c, 0x00000101, 0x00000000,
804	0x20a8, 0xffffffff, 0x00000104,
805	0x55e4, 0xff000fff, 0x00000100,
806	0x55e8, 0x00000001, 0x00000001,
807	0x2f50, 0x00000001, 0x00000001,
808	0x30cc, 0xc0000fff, 0x00000104,
809	0xc1e4, 0x00000001, 0x00000001,
810	0xd0c0, 0xfffffff0, 0x00000100,
811	0xd8c0, 0xfffffff0, 0x00000100
812};
813
814static const u32 verde_mgcg_cgcg_init[] =
815{
816	0xc400, 0xffffffff, 0xfffffffc,
817	0x802c, 0xffffffff, 0xe0000000,
818	0x9a60, 0xffffffff, 0x00000100,
819	0x92a4, 0xffffffff, 0x00000100,
820	0xc164, 0xffffffff, 0x00000100,
821	0x9774, 0xffffffff, 0x00000100,
822	0x8984, 0xffffffff, 0x06000100,
823	0x8a18, 0xffffffff, 0x00000100,
824	0x92a0, 0xffffffff, 0x00000100,
825	0xc380, 0xffffffff, 0x00000100,
826	0x8b28, 0xffffffff, 0x00000100,
827	0x9144, 0xffffffff, 0x00000100,
828	0x8d88, 0xffffffff, 0x00000100,
829	0x8d8c, 0xffffffff, 0x00000100,
830	0x9030, 0xffffffff, 0x00000100,
831	0x9034, 0xffffffff, 0x00000100,
832	0x9038, 0xffffffff, 0x00000100,
833	0x903c, 0xffffffff, 0x00000100,
834	0xad80, 0xffffffff, 0x00000100,
835	0xac54, 0xffffffff, 0x00000100,
836	0x897c, 0xffffffff, 0x06000100,
837	0x9868, 0xffffffff, 0x00000100,
838	0x9510, 0xffffffff, 0x00000100,
839	0xaf04, 0xffffffff, 0x00000100,
840	0xae04, 0xffffffff, 0x00000100,
841	0x949c, 0xffffffff, 0x00000100,
842	0x802c, 0xffffffff, 0xe0000000,
843	0x9160, 0xffffffff, 0x00010000,
844	0x9164, 0xffffffff, 0x00030002,
845	0x9168, 0xffffffff, 0x00040007,
846	0x916c, 0xffffffff, 0x00060005,
847	0x9170, 0xffffffff, 0x00090008,
848	0x9174, 0xffffffff, 0x00020001,
849	0x9178, 0xffffffff, 0x00040003,
850	0x917c, 0xffffffff, 0x00000007,
851	0x9180, 0xffffffff, 0x00060005,
852	0x9184, 0xffffffff, 0x00090008,
853	0x9188, 0xffffffff, 0x00030002,
854	0x918c, 0xffffffff, 0x00050004,
855	0x9190, 0xffffffff, 0x00000008,
856	0x9194, 0xffffffff, 0x00070006,
857	0x9198, 0xffffffff, 0x000a0009,
858	0x919c, 0xffffffff, 0x00040003,
859	0x91a0, 0xffffffff, 0x00060005,
860	0x91a4, 0xffffffff, 0x00000009,
861	0x91a8, 0xffffffff, 0x00080007,
862	0x91ac, 0xffffffff, 0x000b000a,
863	0x91b0, 0xffffffff, 0x00050004,
864	0x91b4, 0xffffffff, 0x00070006,
865	0x91b8, 0xffffffff, 0x0008000b,
866	0x91bc, 0xffffffff, 0x000a0009,
867	0x91c0, 0xffffffff, 0x000d000c,
868	0x9200, 0xffffffff, 0x00090008,
869	0x9204, 0xffffffff, 0x000b000a,
870	0x9208, 0xffffffff, 0x000c000f,
871	0x920c, 0xffffffff, 0x000e000d,
872	0x9210, 0xffffffff, 0x00110010,
873	0x9214, 0xffffffff, 0x000a0009,
874	0x9218, 0xffffffff, 0x000c000b,
875	0x921c, 0xffffffff, 0x0000000f,
876	0x9220, 0xffffffff, 0x000e000d,
877	0x9224, 0xffffffff, 0x00110010,
878	0x9228, 0xffffffff, 0x000b000a,
879	0x922c, 0xffffffff, 0x000d000c,
880	0x9230, 0xffffffff, 0x00000010,
881	0x9234, 0xffffffff, 0x000f000e,
882	0x9238, 0xffffffff, 0x00120011,
883	0x923c, 0xffffffff, 0x000c000b,
884	0x9240, 0xffffffff, 0x000e000d,
885	0x9244, 0xffffffff, 0x00000011,
886	0x9248, 0xffffffff, 0x0010000f,
887	0x924c, 0xffffffff, 0x00130012,
888	0x9250, 0xffffffff, 0x000d000c,
889	0x9254, 0xffffffff, 0x000f000e,
890	0x9258, 0xffffffff, 0x00100013,
891	0x925c, 0xffffffff, 0x00120011,
892	0x9260, 0xffffffff, 0x00150014,
893	0x9150, 0xffffffff, 0x96940200,
894	0x8708, 0xffffffff, 0x00900100,
895	0xc478, 0xffffffff, 0x00000080,
896	0xc404, 0xffffffff, 0x0020003f,
897	0x30, 0xffffffff, 0x0000001c,
898	0x34, 0x000f0000, 0x000f0000,
899	0x160c, 0xffffffff, 0x00000100,
900	0x1024, 0xffffffff, 0x00000100,
901	0x102c, 0x00000101, 0x00000000,
902	0x20a8, 0xffffffff, 0x00000104,
903	0x264c, 0x000c0000, 0x000c0000,
904	0x2648, 0x000c0000, 0x000c0000,
905	0x55e4, 0xff000fff, 0x00000100,
906	0x55e8, 0x00000001, 0x00000001,
907	0x2f50, 0x00000001, 0x00000001,
908	0x30cc, 0xc0000fff, 0x00000104,
909	0xc1e4, 0x00000001, 0x00000001,
910	0xd0c0, 0xfffffff0, 0x00000100,
911	0xd8c0, 0xfffffff0, 0x00000100
912};
913
914static const u32 oland_mgcg_cgcg_init[] =
915{
916	0xc400, 0xffffffff, 0xfffffffc,
917	0x802c, 0xffffffff, 0xe0000000,
918	0x9a60, 0xffffffff, 0x00000100,
919	0x92a4, 0xffffffff, 0x00000100,
920	0xc164, 0xffffffff, 0x00000100,
921	0x9774, 0xffffffff, 0x00000100,
922	0x8984, 0xffffffff, 0x06000100,
923	0x8a18, 0xffffffff, 0x00000100,
924	0x92a0, 0xffffffff, 0x00000100,
925	0xc380, 0xffffffff, 0x00000100,
926	0x8b28, 0xffffffff, 0x00000100,
927	0x9144, 0xffffffff, 0x00000100,
928	0x8d88, 0xffffffff, 0x00000100,
929	0x8d8c, 0xffffffff, 0x00000100,
930	0x9030, 0xffffffff, 0x00000100,
931	0x9034, 0xffffffff, 0x00000100,
932	0x9038, 0xffffffff, 0x00000100,
933	0x903c, 0xffffffff, 0x00000100,
934	0xad80, 0xffffffff, 0x00000100,
935	0xac54, 0xffffffff, 0x00000100,
936	0x897c, 0xffffffff, 0x06000100,
937	0x9868, 0xffffffff, 0x00000100,
938	0x9510, 0xffffffff, 0x00000100,
939	0xaf04, 0xffffffff, 0x00000100,
940	0xae04, 0xffffffff, 0x00000100,
941	0x949c, 0xffffffff, 0x00000100,
942	0x802c, 0xffffffff, 0xe0000000,
943	0x9160, 0xffffffff, 0x00010000,
944	0x9164, 0xffffffff, 0x00030002,
945	0x9168, 0xffffffff, 0x00040007,
946	0x916c, 0xffffffff, 0x00060005,
947	0x9170, 0xffffffff, 0x00090008,
948	0x9174, 0xffffffff, 0x00020001,
949	0x9178, 0xffffffff, 0x00040003,
950	0x917c, 0xffffffff, 0x00000007,
951	0x9180, 0xffffffff, 0x00060005,
952	0x9184, 0xffffffff, 0x00090008,
953	0x9188, 0xffffffff, 0x00030002,
954	0x918c, 0xffffffff, 0x00050004,
955	0x9190, 0xffffffff, 0x00000008,
956	0x9194, 0xffffffff, 0x00070006,
957	0x9198, 0xffffffff, 0x000a0009,
958	0x919c, 0xffffffff, 0x00040003,
959	0x91a0, 0xffffffff, 0x00060005,
960	0x91a4, 0xffffffff, 0x00000009,
961	0x91a8, 0xffffffff, 0x00080007,
962	0x91ac, 0xffffffff, 0x000b000a,
963	0x91b0, 0xffffffff, 0x00050004,
964	0x91b4, 0xffffffff, 0x00070006,
965	0x91b8, 0xffffffff, 0x0008000b,
966	0x91bc, 0xffffffff, 0x000a0009,
967	0x91c0, 0xffffffff, 0x000d000c,
968	0x91c4, 0xffffffff, 0x00060005,
969	0x91c8, 0xffffffff, 0x00080007,
970	0x91cc, 0xffffffff, 0x0000000b,
971	0x91d0, 0xffffffff, 0x000a0009,
972	0x91d4, 0xffffffff, 0x000d000c,
973	0x9150, 0xffffffff, 0x96940200,
974	0x8708, 0xffffffff, 0x00900100,
975	0xc478, 0xffffffff, 0x00000080,
976	0xc404, 0xffffffff, 0x0020003f,
977	0x30, 0xffffffff, 0x0000001c,
978	0x34, 0x000f0000, 0x000f0000,
979	0x160c, 0xffffffff, 0x00000100,
980	0x1024, 0xffffffff, 0x00000100,
981	0x102c, 0x00000101, 0x00000000,
982	0x20a8, 0xffffffff, 0x00000104,
983	0x264c, 0x000c0000, 0x000c0000,
984	0x2648, 0x000c0000, 0x000c0000,
985	0x55e4, 0xff000fff, 0x00000100,
986	0x55e8, 0x00000001, 0x00000001,
987	0x2f50, 0x00000001, 0x00000001,
988	0x30cc, 0xc0000fff, 0x00000104,
989	0xc1e4, 0x00000001, 0x00000001,
990	0xd0c0, 0xfffffff0, 0x00000100,
991	0xd8c0, 0xfffffff0, 0x00000100
992};
993
994static const u32 hainan_mgcg_cgcg_init[] =
995{
996	0xc400, 0xffffffff, 0xfffffffc,
997	0x802c, 0xffffffff, 0xe0000000,
998	0x9a60, 0xffffffff, 0x00000100,
999	0x92a4, 0xffffffff, 0x00000100,
1000	0xc164, 0xffffffff, 0x00000100,
1001	0x9774, 0xffffffff, 0x00000100,
1002	0x8984, 0xffffffff, 0x06000100,
1003	0x8a18, 0xffffffff, 0x00000100,
1004	0x92a0, 0xffffffff, 0x00000100,
1005	0xc380, 0xffffffff, 0x00000100,
1006	0x8b28, 0xffffffff, 0x00000100,
1007	0x9144, 0xffffffff, 0x00000100,
1008	0x8d88, 0xffffffff, 0x00000100,
1009	0x8d8c, 0xffffffff, 0x00000100,
1010	0x9030, 0xffffffff, 0x00000100,
1011	0x9034, 0xffffffff, 0x00000100,
1012	0x9038, 0xffffffff, 0x00000100,
1013	0x903c, 0xffffffff, 0x00000100,
1014	0xad80, 0xffffffff, 0x00000100,
1015	0xac54, 0xffffffff, 0x00000100,
1016	0x897c, 0xffffffff, 0x06000100,
1017	0x9868, 0xffffffff, 0x00000100,
1018	0x9510, 0xffffffff, 0x00000100,
1019	0xaf04, 0xffffffff, 0x00000100,
1020	0xae04, 0xffffffff, 0x00000100,
1021	0x949c, 0xffffffff, 0x00000100,
1022	0x802c, 0xffffffff, 0xe0000000,
1023	0x9160, 0xffffffff, 0x00010000,
1024	0x9164, 0xffffffff, 0x00030002,
1025	0x9168, 0xffffffff, 0x00040007,
1026	0x916c, 0xffffffff, 0x00060005,
1027	0x9170, 0xffffffff, 0x00090008,
1028	0x9174, 0xffffffff, 0x00020001,
1029	0x9178, 0xffffffff, 0x00040003,
1030	0x917c, 0xffffffff, 0x00000007,
1031	0x9180, 0xffffffff, 0x00060005,
1032	0x9184, 0xffffffff, 0x00090008,
1033	0x9188, 0xffffffff, 0x00030002,
1034	0x918c, 0xffffffff, 0x00050004,
1035	0x9190, 0xffffffff, 0x00000008,
1036	0x9194, 0xffffffff, 0x00070006,
1037	0x9198, 0xffffffff, 0x000a0009,
1038	0x919c, 0xffffffff, 0x00040003,
1039	0x91a0, 0xffffffff, 0x00060005,
1040	0x91a4, 0xffffffff, 0x00000009,
1041	0x91a8, 0xffffffff, 0x00080007,
1042	0x91ac, 0xffffffff, 0x000b000a,
1043	0x91b0, 0xffffffff, 0x00050004,
1044	0x91b4, 0xffffffff, 0x00070006,
1045	0x91b8, 0xffffffff, 0x0008000b,
1046	0x91bc, 0xffffffff, 0x000a0009,
1047	0x91c0, 0xffffffff, 0x000d000c,
1048	0x91c4, 0xffffffff, 0x00060005,
1049	0x91c8, 0xffffffff, 0x00080007,
1050	0x91cc, 0xffffffff, 0x0000000b,
1051	0x91d0, 0xffffffff, 0x000a0009,
1052	0x91d4, 0xffffffff, 0x000d000c,
1053	0x9150, 0xffffffff, 0x96940200,
1054	0x8708, 0xffffffff, 0x00900100,
1055	0xc478, 0xffffffff, 0x00000080,
1056	0xc404, 0xffffffff, 0x0020003f,
1057	0x30, 0xffffffff, 0x0000001c,
1058	0x34, 0x000f0000, 0x000f0000,
1059	0x160c, 0xffffffff, 0x00000100,
1060	0x1024, 0xffffffff, 0x00000100,
1061	0x20a8, 0xffffffff, 0x00000104,
1062	0x264c, 0x000c0000, 0x000c0000,
1063	0x2648, 0x000c0000, 0x000c0000,
1064	0x2f50, 0x00000001, 0x00000001,
1065	0x30cc, 0xc0000fff, 0x00000104,
1066	0xc1e4, 0x00000001, 0x00000001,
1067	0xd0c0, 0xfffffff0, 0x00000100,
1068	0xd8c0, 0xfffffff0, 0x00000100
1069};
1070
1071static u32 verde_pg_init[] =
1072{
1073	0x353c, 0xffffffff, 0x40000,
1074	0x3538, 0xffffffff, 0x200010ff,
1075	0x353c, 0xffffffff, 0x0,
1076	0x353c, 0xffffffff, 0x0,
1077	0x353c, 0xffffffff, 0x0,
1078	0x353c, 0xffffffff, 0x0,
1079	0x353c, 0xffffffff, 0x0,
1080	0x353c, 0xffffffff, 0x7007,
1081	0x3538, 0xffffffff, 0x300010ff,
1082	0x353c, 0xffffffff, 0x0,
1083	0x353c, 0xffffffff, 0x0,
1084	0x353c, 0xffffffff, 0x0,
1085	0x353c, 0xffffffff, 0x0,
1086	0x353c, 0xffffffff, 0x0,
1087	0x353c, 0xffffffff, 0x400000,
1088	0x3538, 0xffffffff, 0x100010ff,
1089	0x353c, 0xffffffff, 0x0,
1090	0x353c, 0xffffffff, 0x0,
1091	0x353c, 0xffffffff, 0x0,
1092	0x353c, 0xffffffff, 0x0,
1093	0x353c, 0xffffffff, 0x0,
1094	0x353c, 0xffffffff, 0x120200,
1095	0x3538, 0xffffffff, 0x500010ff,
1096	0x353c, 0xffffffff, 0x0,
1097	0x353c, 0xffffffff, 0x0,
1098	0x353c, 0xffffffff, 0x0,
1099	0x353c, 0xffffffff, 0x0,
1100	0x353c, 0xffffffff, 0x0,
1101	0x353c, 0xffffffff, 0x1e1e16,
1102	0x3538, 0xffffffff, 0x600010ff,
1103	0x353c, 0xffffffff, 0x0,
1104	0x353c, 0xffffffff, 0x0,
1105	0x353c, 0xffffffff, 0x0,
1106	0x353c, 0xffffffff, 0x0,
1107	0x353c, 0xffffffff, 0x0,
1108	0x353c, 0xffffffff, 0x171f1e,
1109	0x3538, 0xffffffff, 0x700010ff,
1110	0x353c, 0xffffffff, 0x0,
1111	0x353c, 0xffffffff, 0x0,
1112	0x353c, 0xffffffff, 0x0,
1113	0x353c, 0xffffffff, 0x0,
1114	0x353c, 0xffffffff, 0x0,
1115	0x353c, 0xffffffff, 0x0,
1116	0x3538, 0xffffffff, 0x9ff,
1117	0x3500, 0xffffffff, 0x0,
1118	0x3504, 0xffffffff, 0x10000800,
1119	0x3504, 0xffffffff, 0xf,
1120	0x3504, 0xffffffff, 0xf,
1121	0x3500, 0xffffffff, 0x4,
1122	0x3504, 0xffffffff, 0x1000051e,
1123	0x3504, 0xffffffff, 0xffff,
1124	0x3504, 0xffffffff, 0xffff,
1125	0x3500, 0xffffffff, 0x8,
1126	0x3504, 0xffffffff, 0x80500,
1127	0x3500, 0xffffffff, 0x12,
1128	0x3504, 0xffffffff, 0x9050c,
1129	0x3500, 0xffffffff, 0x1d,
1130	0x3504, 0xffffffff, 0xb052c,
1131	0x3500, 0xffffffff, 0x2a,
1132	0x3504, 0xffffffff, 0x1053e,
1133	0x3500, 0xffffffff, 0x2d,
1134	0x3504, 0xffffffff, 0x10546,
1135	0x3500, 0xffffffff, 0x30,
1136	0x3504, 0xffffffff, 0xa054e,
1137	0x3500, 0xffffffff, 0x3c,
1138	0x3504, 0xffffffff, 0x1055f,
1139	0x3500, 0xffffffff, 0x3f,
1140	0x3504, 0xffffffff, 0x10567,
1141	0x3500, 0xffffffff, 0x42,
1142	0x3504, 0xffffffff, 0x1056f,
1143	0x3500, 0xffffffff, 0x45,
1144	0x3504, 0xffffffff, 0x10572,
1145	0x3500, 0xffffffff, 0x48,
1146	0x3504, 0xffffffff, 0x20575,
1147	0x3500, 0xffffffff, 0x4c,
1148	0x3504, 0xffffffff, 0x190801,
1149	0x3500, 0xffffffff, 0x67,
1150	0x3504, 0xffffffff, 0x1082a,
1151	0x3500, 0xffffffff, 0x6a,
1152	0x3504, 0xffffffff, 0x1b082d,
1153	0x3500, 0xffffffff, 0x87,
1154	0x3504, 0xffffffff, 0x310851,
1155	0x3500, 0xffffffff, 0xba,
1156	0x3504, 0xffffffff, 0x891,
1157	0x3500, 0xffffffff, 0xbc,
1158	0x3504, 0xffffffff, 0x893,
1159	0x3500, 0xffffffff, 0xbe,
1160	0x3504, 0xffffffff, 0x20895,
1161	0x3500, 0xffffffff, 0xc2,
1162	0x3504, 0xffffffff, 0x20899,
1163	0x3500, 0xffffffff, 0xc6,
1164	0x3504, 0xffffffff, 0x2089d,
1165	0x3500, 0xffffffff, 0xca,
1166	0x3504, 0xffffffff, 0x8a1,
1167	0x3500, 0xffffffff, 0xcc,
1168	0x3504, 0xffffffff, 0x8a3,
1169	0x3500, 0xffffffff, 0xce,
1170	0x3504, 0xffffffff, 0x308a5,
1171	0x3500, 0xffffffff, 0xd3,
1172	0x3504, 0xffffffff, 0x6d08cd,
1173	0x3500, 0xffffffff, 0x142,
1174	0x3504, 0xffffffff, 0x2000095a,
1175	0x3504, 0xffffffff, 0x1,
1176	0x3500, 0xffffffff, 0x144,
1177	0x3504, 0xffffffff, 0x301f095b,
1178	0x3500, 0xffffffff, 0x165,
1179	0x3504, 0xffffffff, 0xc094d,
1180	0x3500, 0xffffffff, 0x173,
1181	0x3504, 0xffffffff, 0xf096d,
1182	0x3500, 0xffffffff, 0x184,
1183	0x3504, 0xffffffff, 0x15097f,
1184	0x3500, 0xffffffff, 0x19b,
1185	0x3504, 0xffffffff, 0xc0998,
1186	0x3500, 0xffffffff, 0x1a9,
1187	0x3504, 0xffffffff, 0x409a7,
1188	0x3500, 0xffffffff, 0x1af,
1189	0x3504, 0xffffffff, 0xcdc,
1190	0x3500, 0xffffffff, 0x1b1,
1191	0x3504, 0xffffffff, 0x800,
1192	0x3508, 0xffffffff, 0x6c9b2000,
1193	0x3510, 0xfc00, 0x2000,
1194	0x3544, 0xffffffff, 0xfc0,
1195	0x28d4, 0x00000100, 0x100
1196};
1197
1198static void si_init_golden_registers(struct radeon_device *rdev)
1199{
1200	switch (rdev->family) {
1201	case CHIP_TAHITI:
1202		radeon_program_register_sequence(rdev,
1203						 tahiti_golden_registers,
1204						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1205		radeon_program_register_sequence(rdev,
1206						 tahiti_golden_rlc_registers,
1207						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1208		radeon_program_register_sequence(rdev,
1209						 tahiti_mgcg_cgcg_init,
1210						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1211		radeon_program_register_sequence(rdev,
1212						 tahiti_golden_registers2,
1213						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1214		break;
1215	case CHIP_PITCAIRN:
1216		radeon_program_register_sequence(rdev,
1217						 pitcairn_golden_registers,
1218						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1219		radeon_program_register_sequence(rdev,
1220						 pitcairn_golden_rlc_registers,
1221						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1222		radeon_program_register_sequence(rdev,
1223						 pitcairn_mgcg_cgcg_init,
1224						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1225		break;
1226	case CHIP_VERDE:
1227		radeon_program_register_sequence(rdev,
1228						 verde_golden_registers,
1229						 (const u32)ARRAY_SIZE(verde_golden_registers));
1230		radeon_program_register_sequence(rdev,
1231						 verde_golden_rlc_registers,
1232						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1233		radeon_program_register_sequence(rdev,
1234						 verde_mgcg_cgcg_init,
1235						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1236		radeon_program_register_sequence(rdev,
1237						 verde_pg_init,
1238						 (const u32)ARRAY_SIZE(verde_pg_init));
1239		break;
1240	case CHIP_OLAND:
1241		radeon_program_register_sequence(rdev,
1242						 oland_golden_registers,
1243						 (const u32)ARRAY_SIZE(oland_golden_registers));
1244		radeon_program_register_sequence(rdev,
1245						 oland_golden_rlc_registers,
1246						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1247		radeon_program_register_sequence(rdev,
1248						 oland_mgcg_cgcg_init,
1249						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1250		break;
1251	case CHIP_HAINAN:
1252		radeon_program_register_sequence(rdev,
1253						 hainan_golden_registers,
1254						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1255		radeon_program_register_sequence(rdev,
1256						 hainan_golden_registers2,
1257						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1258		radeon_program_register_sequence(rdev,
1259						 hainan_mgcg_cgcg_init,
1260						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1261		break;
1262	default:
1263		break;
1264	}
1265}
1266
1267/**
1268 * si_get_allowed_info_register - fetch the register for the info ioctl
1269 *
1270 * @rdev: radeon_device pointer
1271 * @reg: register offset in bytes
1272 * @val: register value
1273 *
1274 * Returns 0 for success or -EINVAL for an invalid register
1275 *
1276 */
1277int si_get_allowed_info_register(struct radeon_device *rdev,
1278				 u32 reg, u32 *val)
1279{
1280	switch (reg) {
1281	case GRBM_STATUS:
1282	case GRBM_STATUS2:
1283	case GRBM_STATUS_SE0:
1284	case GRBM_STATUS_SE1:
1285	case SRBM_STATUS:
1286	case SRBM_STATUS2:
1287	case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1288	case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1289	case UVD_STATUS:
1290		*val = RREG32(reg);
1291		return 0;
1292	default:
1293		return -EINVAL;
1294	}
1295}
1296
1297#define PCIE_BUS_CLK                10000
1298#define TCLK                        (PCIE_BUS_CLK / 10)
1299
1300/**
1301 * si_get_xclk - get the xclk
1302 *
1303 * @rdev: radeon_device pointer
1304 *
1305 * Returns the reference clock used by the gfx engine
1306 * (SI).
1307 */
1308u32 si_get_xclk(struct radeon_device *rdev)
1309{
1310        u32 reference_clock = rdev->clock.spll.reference_freq;
1311	u32 tmp;
1312
1313	tmp = RREG32(CG_CLKPIN_CNTL_2);
1314	if (tmp & MUX_TCLK_TO_XCLK)
1315		return TCLK;
1316
1317	tmp = RREG32(CG_CLKPIN_CNTL);
1318	if (tmp & XTALIN_DIVIDE)
1319		return reference_clock / 4;
1320
1321	return reference_clock;
1322}
1323
1324/* get temperature in millidegrees */
1325int si_get_temp(struct radeon_device *rdev)
1326{
1327	u32 temp;
1328	int actual_temp = 0;
1329
1330	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1331		CTF_TEMP_SHIFT;
1332
1333	if (temp & 0x200)
1334		actual_temp = 255;
1335	else
1336		actual_temp = temp & 0x1ff;
1337
1338	actual_temp = (actual_temp * 1000);
1339
1340	return actual_temp;
1341}
1342
1343#define TAHITI_IO_MC_REGS_SIZE 36
1344
1345static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1346	{0x0000006f, 0x03044000},
1347	{0x00000070, 0x0480c018},
1348	{0x00000071, 0x00000040},
1349	{0x00000072, 0x01000000},
1350	{0x00000074, 0x000000ff},
1351	{0x00000075, 0x00143400},
1352	{0x00000076, 0x08ec0800},
1353	{0x00000077, 0x040000cc},
1354	{0x00000079, 0x00000000},
1355	{0x0000007a, 0x21000409},
1356	{0x0000007c, 0x00000000},
1357	{0x0000007d, 0xe8000000},
1358	{0x0000007e, 0x044408a8},
1359	{0x0000007f, 0x00000003},
1360	{0x00000080, 0x00000000},
1361	{0x00000081, 0x01000000},
1362	{0x00000082, 0x02000000},
1363	{0x00000083, 0x00000000},
1364	{0x00000084, 0xe3f3e4f4},
1365	{0x00000085, 0x00052024},
1366	{0x00000087, 0x00000000},
1367	{0x00000088, 0x66036603},
1368	{0x00000089, 0x01000000},
1369	{0x0000008b, 0x1c0a0000},
1370	{0x0000008c, 0xff010000},
1371	{0x0000008e, 0xffffefff},
1372	{0x0000008f, 0xfff3efff},
1373	{0x00000090, 0xfff3efbf},
1374	{0x00000094, 0x00101101},
1375	{0x00000095, 0x00000fff},
1376	{0x00000096, 0x00116fff},
1377	{0x00000097, 0x60010000},
1378	{0x00000098, 0x10010000},
1379	{0x00000099, 0x00006000},
1380	{0x0000009a, 0x00001000},
1381	{0x0000009f, 0x00a77400}
1382};
1383
1384static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1385	{0x0000006f, 0x03044000},
1386	{0x00000070, 0x0480c018},
1387	{0x00000071, 0x00000040},
1388	{0x00000072, 0x01000000},
1389	{0x00000074, 0x000000ff},
1390	{0x00000075, 0x00143400},
1391	{0x00000076, 0x08ec0800},
1392	{0x00000077, 0x040000cc},
1393	{0x00000079, 0x00000000},
1394	{0x0000007a, 0x21000409},
1395	{0x0000007c, 0x00000000},
1396	{0x0000007d, 0xe8000000},
1397	{0x0000007e, 0x044408a8},
1398	{0x0000007f, 0x00000003},
1399	{0x00000080, 0x00000000},
1400	{0x00000081, 0x01000000},
1401	{0x00000082, 0x02000000},
1402	{0x00000083, 0x00000000},
1403	{0x00000084, 0xe3f3e4f4},
1404	{0x00000085, 0x00052024},
1405	{0x00000087, 0x00000000},
1406	{0x00000088, 0x66036603},
1407	{0x00000089, 0x01000000},
1408	{0x0000008b, 0x1c0a0000},
1409	{0x0000008c, 0xff010000},
1410	{0x0000008e, 0xffffefff},
1411	{0x0000008f, 0xfff3efff},
1412	{0x00000090, 0xfff3efbf},
1413	{0x00000094, 0x00101101},
1414	{0x00000095, 0x00000fff},
1415	{0x00000096, 0x00116fff},
1416	{0x00000097, 0x60010000},
1417	{0x00000098, 0x10010000},
1418	{0x00000099, 0x00006000},
1419	{0x0000009a, 0x00001000},
1420	{0x0000009f, 0x00a47400}
1421};
1422
1423static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1424	{0x0000006f, 0x03044000},
1425	{0x00000070, 0x0480c018},
1426	{0x00000071, 0x00000040},
1427	{0x00000072, 0x01000000},
1428	{0x00000074, 0x000000ff},
1429	{0x00000075, 0x00143400},
1430	{0x00000076, 0x08ec0800},
1431	{0x00000077, 0x040000cc},
1432	{0x00000079, 0x00000000},
1433	{0x0000007a, 0x21000409},
1434	{0x0000007c, 0x00000000},
1435	{0x0000007d, 0xe8000000},
1436	{0x0000007e, 0x044408a8},
1437	{0x0000007f, 0x00000003},
1438	{0x00000080, 0x00000000},
1439	{0x00000081, 0x01000000},
1440	{0x00000082, 0x02000000},
1441	{0x00000083, 0x00000000},
1442	{0x00000084, 0xe3f3e4f4},
1443	{0x00000085, 0x00052024},
1444	{0x00000087, 0x00000000},
1445	{0x00000088, 0x66036603},
1446	{0x00000089, 0x01000000},
1447	{0x0000008b, 0x1c0a0000},
1448	{0x0000008c, 0xff010000},
1449	{0x0000008e, 0xffffefff},
1450	{0x0000008f, 0xfff3efff},
1451	{0x00000090, 0xfff3efbf},
1452	{0x00000094, 0x00101101},
1453	{0x00000095, 0x00000fff},
1454	{0x00000096, 0x00116fff},
1455	{0x00000097, 0x60010000},
1456	{0x00000098, 0x10010000},
1457	{0x00000099, 0x00006000},
1458	{0x0000009a, 0x00001000},
1459	{0x0000009f, 0x00a37400}
1460};
1461
1462static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1463	{0x0000006f, 0x03044000},
1464	{0x00000070, 0x0480c018},
1465	{0x00000071, 0x00000040},
1466	{0x00000072, 0x01000000},
1467	{0x00000074, 0x000000ff},
1468	{0x00000075, 0x00143400},
1469	{0x00000076, 0x08ec0800},
1470	{0x00000077, 0x040000cc},
1471	{0x00000079, 0x00000000},
1472	{0x0000007a, 0x21000409},
1473	{0x0000007c, 0x00000000},
1474	{0x0000007d, 0xe8000000},
1475	{0x0000007e, 0x044408a8},
1476	{0x0000007f, 0x00000003},
1477	{0x00000080, 0x00000000},
1478	{0x00000081, 0x01000000},
1479	{0x00000082, 0x02000000},
1480	{0x00000083, 0x00000000},
1481	{0x00000084, 0xe3f3e4f4},
1482	{0x00000085, 0x00052024},
1483	{0x00000087, 0x00000000},
1484	{0x00000088, 0x66036603},
1485	{0x00000089, 0x01000000},
1486	{0x0000008b, 0x1c0a0000},
1487	{0x0000008c, 0xff010000},
1488	{0x0000008e, 0xffffefff},
1489	{0x0000008f, 0xfff3efff},
1490	{0x00000090, 0xfff3efbf},
1491	{0x00000094, 0x00101101},
1492	{0x00000095, 0x00000fff},
1493	{0x00000096, 0x00116fff},
1494	{0x00000097, 0x60010000},
1495	{0x00000098, 0x10010000},
1496	{0x00000099, 0x00006000},
1497	{0x0000009a, 0x00001000},
1498	{0x0000009f, 0x00a17730}
1499};
1500
1501static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1502	{0x0000006f, 0x03044000},
1503	{0x00000070, 0x0480c018},
1504	{0x00000071, 0x00000040},
1505	{0x00000072, 0x01000000},
1506	{0x00000074, 0x000000ff},
1507	{0x00000075, 0x00143400},
1508	{0x00000076, 0x08ec0800},
1509	{0x00000077, 0x040000cc},
1510	{0x00000079, 0x00000000},
1511	{0x0000007a, 0x21000409},
1512	{0x0000007c, 0x00000000},
1513	{0x0000007d, 0xe8000000},
1514	{0x0000007e, 0x044408a8},
1515	{0x0000007f, 0x00000003},
1516	{0x00000080, 0x00000000},
1517	{0x00000081, 0x01000000},
1518	{0x00000082, 0x02000000},
1519	{0x00000083, 0x00000000},
1520	{0x00000084, 0xe3f3e4f4},
1521	{0x00000085, 0x00052024},
1522	{0x00000087, 0x00000000},
1523	{0x00000088, 0x66036603},
1524	{0x00000089, 0x01000000},
1525	{0x0000008b, 0x1c0a0000},
1526	{0x0000008c, 0xff010000},
1527	{0x0000008e, 0xffffefff},
1528	{0x0000008f, 0xfff3efff},
1529	{0x00000090, 0xfff3efbf},
1530	{0x00000094, 0x00101101},
1531	{0x00000095, 0x00000fff},
1532	{0x00000096, 0x00116fff},
1533	{0x00000097, 0x60010000},
1534	{0x00000098, 0x10010000},
1535	{0x00000099, 0x00006000},
1536	{0x0000009a, 0x00001000},
1537	{0x0000009f, 0x00a07730}
1538};
1539
1540/* ucode loading */
1541int si_mc_load_microcode(struct radeon_device *rdev)
1542{
1543	const __be32 *fw_data = NULL;
1544	const __le32 *new_fw_data = NULL;
1545	u32 running, blackout = 0;
1546	u32 *io_mc_regs = NULL;
1547	const __le32 *new_io_mc_regs = NULL;
1548	int i, regs_size, ucode_size;
1549
1550	if (!rdev->mc_fw)
1551		return -EINVAL;
1552
1553	if (rdev->new_fw) {
1554		const struct mc_firmware_header_v1_0 *hdr =
1555			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1556
1557		radeon_ucode_print_mc_hdr(&hdr->header);
1558		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1559		new_io_mc_regs = (const __le32 *)
1560			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1561		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1562		new_fw_data = (const __le32 *)
1563			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1564	} else {
1565		ucode_size = rdev->mc_fw->size / 4;
1566
1567		switch (rdev->family) {
1568		case CHIP_TAHITI:
1569			io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1570			regs_size = TAHITI_IO_MC_REGS_SIZE;
1571			break;
1572		case CHIP_PITCAIRN:
1573			io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1574			regs_size = TAHITI_IO_MC_REGS_SIZE;
1575			break;
1576		case CHIP_VERDE:
1577		default:
1578			io_mc_regs = (u32 *)&verde_io_mc_regs;
1579			regs_size = TAHITI_IO_MC_REGS_SIZE;
1580			break;
1581		case CHIP_OLAND:
1582			io_mc_regs = (u32 *)&oland_io_mc_regs;
1583			regs_size = TAHITI_IO_MC_REGS_SIZE;
1584			break;
1585		case CHIP_HAINAN:
1586			io_mc_regs = (u32 *)&hainan_io_mc_regs;
1587			regs_size = TAHITI_IO_MC_REGS_SIZE;
1588			break;
1589		}
1590		fw_data = (const __be32 *)rdev->mc_fw->data;
1591	}
1592
1593	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1594
1595	if (running == 0) {
1596		if (running) {
1597			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1598			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1599		}
1600
1601		/* reset the engine and set to writable */
1602		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1603		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1604
1605		/* load mc io regs */
1606		for (i = 0; i < regs_size; i++) {
1607			if (rdev->new_fw) {
1608				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1609				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1610			} else {
1611				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1612				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1613			}
1614		}
1615		/* load the MC ucode */
1616		for (i = 0; i < ucode_size; i++) {
1617			if (rdev->new_fw)
1618				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1619			else
1620				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1621		}
1622
1623		/* put the engine back into the active state */
1624		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1625		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1626		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1627
1628		/* wait for training to complete */
1629		for (i = 0; i < rdev->usec_timeout; i++) {
1630			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1631				break;
1632			udelay(1);
1633		}
1634		for (i = 0; i < rdev->usec_timeout; i++) {
1635			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1636				break;
1637			udelay(1);
1638		}
1639
1640		if (running)
1641			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1642	}
1643
1644	return 0;
1645}
1646
1647static int si_init_microcode(struct radeon_device *rdev)
1648{
1649	const char *chip_name;
1650	const char *new_chip_name;
1651	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1652	size_t smc_req_size, mc2_req_size;
1653	char fw_name[30];
1654	int err;
1655	int new_fw = 0;
1656
1657	DRM_DEBUG("\n");
1658
1659	switch (rdev->family) {
1660	case CHIP_TAHITI:
1661		chip_name = "TAHITI";
1662		new_chip_name = "tahiti";
1663		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1664		me_req_size = SI_PM4_UCODE_SIZE * 4;
1665		ce_req_size = SI_CE_UCODE_SIZE * 4;
1666		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1667		mc_req_size = SI_MC_UCODE_SIZE * 4;
1668		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1669		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1670		break;
1671	case CHIP_PITCAIRN:
1672		chip_name = "PITCAIRN";
1673		new_chip_name = "pitcairn";
1674		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1675		me_req_size = SI_PM4_UCODE_SIZE * 4;
1676		ce_req_size = SI_CE_UCODE_SIZE * 4;
1677		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1678		mc_req_size = SI_MC_UCODE_SIZE * 4;
1679		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1680		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1681		break;
1682	case CHIP_VERDE:
1683		chip_name = "VERDE";
1684		new_chip_name = "verde";
1685		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1686		me_req_size = SI_PM4_UCODE_SIZE * 4;
1687		ce_req_size = SI_CE_UCODE_SIZE * 4;
1688		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1689		mc_req_size = SI_MC_UCODE_SIZE * 4;
1690		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1691		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1692		break;
1693	case CHIP_OLAND:
1694		chip_name = "OLAND";
1695		new_chip_name = "oland";
1696		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1697		me_req_size = SI_PM4_UCODE_SIZE * 4;
1698		ce_req_size = SI_CE_UCODE_SIZE * 4;
1699		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1700		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1701		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1702		break;
1703	case CHIP_HAINAN:
1704		chip_name = "HAINAN";
1705		new_chip_name = "hainan";
1706		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1707		me_req_size = SI_PM4_UCODE_SIZE * 4;
1708		ce_req_size = SI_CE_UCODE_SIZE * 4;
1709		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1710		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1711		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1712		break;
1713	default: BUG();
1714	}
1715
1716	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1717
1718	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1719	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1720	if (err) {
1721		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1722		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1723		if (err)
1724			goto out;
1725		if (rdev->pfp_fw->size != pfp_req_size) {
1726			printk(KERN_ERR
1727			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1728			       rdev->pfp_fw->size, fw_name);
1729			err = -EINVAL;
1730			goto out;
1731		}
1732	} else {
1733		err = radeon_ucode_validate(rdev->pfp_fw);
1734		if (err) {
1735			printk(KERN_ERR
1736			       "si_cp: validation failed for firmware \"%s\"\n",
1737			       fw_name);
1738			goto out;
1739		} else {
1740			new_fw++;
1741		}
1742	}
1743
1744	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1745	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1746	if (err) {
1747		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1748		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1749		if (err)
1750			goto out;
1751		if (rdev->me_fw->size != me_req_size) {
1752			printk(KERN_ERR
1753			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1754			       rdev->me_fw->size, fw_name);
1755			err = -EINVAL;
1756		}
1757	} else {
1758		err = radeon_ucode_validate(rdev->me_fw);
1759		if (err) {
1760			printk(KERN_ERR
1761			       "si_cp: validation failed for firmware \"%s\"\n",
1762			       fw_name);
1763			goto out;
1764		} else {
1765			new_fw++;
1766		}
1767	}
1768
1769	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1770	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1771	if (err) {
1772		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1773		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1774		if (err)
1775			goto out;
1776		if (rdev->ce_fw->size != ce_req_size) {
1777			printk(KERN_ERR
1778			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1779			       rdev->ce_fw->size, fw_name);
1780			err = -EINVAL;
1781		}
1782	} else {
1783		err = radeon_ucode_validate(rdev->ce_fw);
1784		if (err) {
1785			printk(KERN_ERR
1786			       "si_cp: validation failed for firmware \"%s\"\n",
1787			       fw_name);
1788			goto out;
1789		} else {
1790			new_fw++;
1791		}
1792	}
1793
1794	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1795	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1796	if (err) {
1797		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1798		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1799		if (err)
1800			goto out;
1801		if (rdev->rlc_fw->size != rlc_req_size) {
1802			printk(KERN_ERR
1803			       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1804			       rdev->rlc_fw->size, fw_name);
1805			err = -EINVAL;
1806		}
1807	} else {
1808		err = radeon_ucode_validate(rdev->rlc_fw);
1809		if (err) {
1810			printk(KERN_ERR
1811			       "si_cp: validation failed for firmware \"%s\"\n",
1812			       fw_name);
1813			goto out;
1814		} else {
1815			new_fw++;
1816		}
1817	}
1818
1819	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1820	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1821	if (err) {
1822		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1823		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1824		if (err) {
1825			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1826			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1827			if (err)
1828				goto out;
1829		}
1830		if ((rdev->mc_fw->size != mc_req_size) &&
1831		    (rdev->mc_fw->size != mc2_req_size)) {
1832			printk(KERN_ERR
1833			       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1834			       rdev->mc_fw->size, fw_name);
1835			err = -EINVAL;
1836		}
1837		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1838	} else {
1839		err = radeon_ucode_validate(rdev->mc_fw);
1840		if (err) {
1841			printk(KERN_ERR
1842			       "si_cp: validation failed for firmware \"%s\"\n",
1843			       fw_name);
1844			goto out;
1845		} else {
1846			new_fw++;
1847		}
1848	}
1849
1850	snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1851	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1852	if (err) {
1853		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1854		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1855		if (err) {
1856			printk(KERN_ERR
1857			       "smc: error loading firmware \"%s\"\n",
1858			       fw_name);
1859			release_firmware(rdev->smc_fw);
1860			rdev->smc_fw = NULL;
1861			err = 0;
1862		} else if (rdev->smc_fw->size != smc_req_size) {
1863			printk(KERN_ERR
1864			       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1865			       rdev->smc_fw->size, fw_name);
1866			err = -EINVAL;
1867		}
1868	} else {
1869		err = radeon_ucode_validate(rdev->smc_fw);
1870		if (err) {
1871			printk(KERN_ERR
1872			       "si_cp: validation failed for firmware \"%s\"\n",
1873			       fw_name);
1874			goto out;
1875		} else {
1876			new_fw++;
1877		}
1878	}
1879
1880	if (new_fw == 0) {
1881		rdev->new_fw = false;
1882	} else if (new_fw < 6) {
1883		printk(KERN_ERR "si_fw: mixing new and old firmware!\n");
1884		err = -EINVAL;
1885	} else {
1886		rdev->new_fw = true;
1887	}
1888out:
1889	if (err) {
1890		if (err != -EINVAL)
1891			printk(KERN_ERR
1892			       "si_cp: Failed to load firmware \"%s\"\n",
1893			       fw_name);
1894		release_firmware(rdev->pfp_fw);
1895		rdev->pfp_fw = NULL;
1896		release_firmware(rdev->me_fw);
1897		rdev->me_fw = NULL;
1898		release_firmware(rdev->ce_fw);
1899		rdev->ce_fw = NULL;
1900		release_firmware(rdev->rlc_fw);
1901		rdev->rlc_fw = NULL;
1902		release_firmware(rdev->mc_fw);
1903		rdev->mc_fw = NULL;
1904		release_firmware(rdev->smc_fw);
1905		rdev->smc_fw = NULL;
1906	}
1907	return err;
1908}
1909
1910/* watermark setup */
1911static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1912				   struct radeon_crtc *radeon_crtc,
1913				   struct drm_display_mode *mode,
1914				   struct drm_display_mode *other_mode)
1915{
1916	u32 tmp, buffer_alloc, i;
1917	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1918	/*
1919	 * Line Buffer Setup
1920	 * There are 3 line buffers, each one shared by 2 display controllers.
1921	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1922	 * the display controllers.  The paritioning is done via one of four
1923	 * preset allocations specified in bits 21:20:
1924	 *  0 - half lb
1925	 *  2 - whole lb, other crtc must be disabled
1926	 */
1927	/* this can get tricky if we have two large displays on a paired group
1928	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1929	 * non-linked crtcs for maximum line buffer allocation.
1930	 */
1931	if (radeon_crtc->base.enabled && mode) {
1932		if (other_mode) {
1933			tmp = 0; /* 1/2 */
1934			buffer_alloc = 1;
1935		} else {
1936			tmp = 2; /* whole */
1937			buffer_alloc = 2;
1938		}
1939	} else {
1940		tmp = 0;
1941		buffer_alloc = 0;
1942	}
1943
1944	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1945	       DC_LB_MEMORY_CONFIG(tmp));
1946
1947	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1948	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1949	for (i = 0; i < rdev->usec_timeout; i++) {
1950		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1951		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1952			break;
1953		udelay(1);
1954	}
1955
1956	if (radeon_crtc->base.enabled && mode) {
1957		switch (tmp) {
1958		case 0:
1959		default:
1960			return 4096 * 2;
1961		case 2:
1962			return 8192 * 2;
1963		}
1964	}
1965
1966	/* controller not enabled, so no lb used */
1967	return 0;
1968}
1969
1970static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1971{
1972	u32 tmp = RREG32(MC_SHARED_CHMAP);
1973
1974	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1975	case 0:
1976	default:
1977		return 1;
1978	case 1:
1979		return 2;
1980	case 2:
1981		return 4;
1982	case 3:
1983		return 8;
1984	case 4:
1985		return 3;
1986	case 5:
1987		return 6;
1988	case 6:
1989		return 10;
1990	case 7:
1991		return 12;
1992	case 8:
1993		return 16;
1994	}
1995}
1996
1997struct dce6_wm_params {
1998	u32 dram_channels; /* number of dram channels */
1999	u32 yclk;          /* bandwidth per dram data pin in kHz */
2000	u32 sclk;          /* engine clock in kHz */
2001	u32 disp_clk;      /* display clock in kHz */
2002	u32 src_width;     /* viewport width */
2003	u32 active_time;   /* active display time in ns */
2004	u32 blank_time;    /* blank time in ns */
2005	bool interlaced;    /* mode is interlaced */
2006	fixed20_12 vsc;    /* vertical scale ratio */
2007	u32 num_heads;     /* number of active crtcs */
2008	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2009	u32 lb_size;       /* line buffer allocated to pipe */
2010	u32 vtaps;         /* vertical scaler taps */
2011};
2012
2013static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2014{
2015	/* Calculate raw DRAM Bandwidth */
2016	fixed20_12 dram_efficiency; /* 0.7 */
2017	fixed20_12 yclk, dram_channels, bandwidth;
2018	fixed20_12 a;
2019
2020	a.full = dfixed_const(1000);
2021	yclk.full = dfixed_const(wm->yclk);
2022	yclk.full = dfixed_div(yclk, a);
2023	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2024	a.full = dfixed_const(10);
2025	dram_efficiency.full = dfixed_const(7);
2026	dram_efficiency.full = dfixed_div(dram_efficiency, a);
2027	bandwidth.full = dfixed_mul(dram_channels, yclk);
2028	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2029
2030	return dfixed_trunc(bandwidth);
2031}
2032
2033static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2034{
2035	/* Calculate DRAM Bandwidth and the part allocated to display. */
2036	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2037	fixed20_12 yclk, dram_channels, bandwidth;
2038	fixed20_12 a;
2039
2040	a.full = dfixed_const(1000);
2041	yclk.full = dfixed_const(wm->yclk);
2042	yclk.full = dfixed_div(yclk, a);
2043	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2044	a.full = dfixed_const(10);
2045	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2046	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2047	bandwidth.full = dfixed_mul(dram_channels, yclk);
2048	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2049
2050	return dfixed_trunc(bandwidth);
2051}
2052
2053static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2054{
2055	/* Calculate the display Data return Bandwidth */
2056	fixed20_12 return_efficiency; /* 0.8 */
2057	fixed20_12 sclk, bandwidth;
2058	fixed20_12 a;
2059
2060	a.full = dfixed_const(1000);
2061	sclk.full = dfixed_const(wm->sclk);
2062	sclk.full = dfixed_div(sclk, a);
2063	a.full = dfixed_const(10);
2064	return_efficiency.full = dfixed_const(8);
2065	return_efficiency.full = dfixed_div(return_efficiency, a);
2066	a.full = dfixed_const(32);
2067	bandwidth.full = dfixed_mul(a, sclk);
2068	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2069
2070	return dfixed_trunc(bandwidth);
2071}
2072
2073static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2074{
2075	return 32;
2076}
2077
2078static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2079{
2080	/* Calculate the DMIF Request Bandwidth */
2081	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2082	fixed20_12 disp_clk, sclk, bandwidth;
2083	fixed20_12 a, b1, b2;
2084	u32 min_bandwidth;
2085
2086	a.full = dfixed_const(1000);
2087	disp_clk.full = dfixed_const(wm->disp_clk);
2088	disp_clk.full = dfixed_div(disp_clk, a);
2089	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2090	b1.full = dfixed_mul(a, disp_clk);
2091
2092	a.full = dfixed_const(1000);
2093	sclk.full = dfixed_const(wm->sclk);
2094	sclk.full = dfixed_div(sclk, a);
2095	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2096	b2.full = dfixed_mul(a, sclk);
2097
2098	a.full = dfixed_const(10);
2099	disp_clk_request_efficiency.full = dfixed_const(8);
2100	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2101
2102	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2103
2104	a.full = dfixed_const(min_bandwidth);
2105	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2106
2107	return dfixed_trunc(bandwidth);
2108}
2109
2110static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2111{
2112	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2113	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2114	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2115	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2116
2117	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2118}
2119
2120static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2121{
2122	/* Calculate the display mode Average Bandwidth
2123	 * DisplayMode should contain the source and destination dimensions,
2124	 * timing, etc.
2125	 */
2126	fixed20_12 bpp;
2127	fixed20_12 line_time;
2128	fixed20_12 src_width;
2129	fixed20_12 bandwidth;
2130	fixed20_12 a;
2131
2132	a.full = dfixed_const(1000);
2133	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2134	line_time.full = dfixed_div(line_time, a);
2135	bpp.full = dfixed_const(wm->bytes_per_pixel);
2136	src_width.full = dfixed_const(wm->src_width);
2137	bandwidth.full = dfixed_mul(src_width, bpp);
2138	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2139	bandwidth.full = dfixed_div(bandwidth, line_time);
2140
2141	return dfixed_trunc(bandwidth);
2142}
2143
2144static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2145{
2146	/* First calcualte the latency in ns */
2147	u32 mc_latency = 2000; /* 2000 ns. */
2148	u32 available_bandwidth = dce6_available_bandwidth(wm);
2149	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2150	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2151	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2152	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2153		(wm->num_heads * cursor_line_pair_return_time);
2154	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2155	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2156	u32 tmp, dmif_size = 12288;
2157	fixed20_12 a, b, c;
2158
2159	if (wm->num_heads == 0)
2160		return 0;
2161
2162	a.full = dfixed_const(2);
2163	b.full = dfixed_const(1);
2164	if ((wm->vsc.full > a.full) ||
2165	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2166	    (wm->vtaps >= 5) ||
2167	    ((wm->vsc.full >= a.full) && wm->interlaced))
2168		max_src_lines_per_dst_line = 4;
2169	else
2170		max_src_lines_per_dst_line = 2;
2171
2172	a.full = dfixed_const(available_bandwidth);
2173	b.full = dfixed_const(wm->num_heads);
2174	a.full = dfixed_div(a, b);
2175
2176	b.full = dfixed_const(mc_latency + 512);
2177	c.full = dfixed_const(wm->disp_clk);
2178	b.full = dfixed_div(b, c);
2179
2180	c.full = dfixed_const(dmif_size);
2181	b.full = dfixed_div(c, b);
2182
2183	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2184
2185	b.full = dfixed_const(1000);
2186	c.full = dfixed_const(wm->disp_clk);
2187	b.full = dfixed_div(c, b);
2188	c.full = dfixed_const(wm->bytes_per_pixel);
2189	b.full = dfixed_mul(b, c);
2190
2191	lb_fill_bw = min(tmp, dfixed_trunc(b));
2192
2193	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2194	b.full = dfixed_const(1000);
2195	c.full = dfixed_const(lb_fill_bw);
2196	b.full = dfixed_div(c, b);
2197	a.full = dfixed_div(a, b);
2198	line_fill_time = dfixed_trunc(a);
2199
2200	if (line_fill_time < wm->active_time)
2201		return latency;
2202	else
2203		return latency + (line_fill_time - wm->active_time);
2204
2205}
2206
2207static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2208{
2209	if (dce6_average_bandwidth(wm) <=
2210	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2211		return true;
2212	else
2213		return false;
2214};
2215
2216static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2217{
2218	if (dce6_average_bandwidth(wm) <=
2219	    (dce6_available_bandwidth(wm) / wm->num_heads))
2220		return true;
2221	else
2222		return false;
2223};
2224
2225static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2226{
2227	u32 lb_partitions = wm->lb_size / wm->src_width;
2228	u32 line_time = wm->active_time + wm->blank_time;
2229	u32 latency_tolerant_lines;
2230	u32 latency_hiding;
2231	fixed20_12 a;
2232
2233	a.full = dfixed_const(1);
2234	if (wm->vsc.full > a.full)
2235		latency_tolerant_lines = 1;
2236	else {
2237		if (lb_partitions <= (wm->vtaps + 1))
2238			latency_tolerant_lines = 1;
2239		else
2240			latency_tolerant_lines = 2;
2241	}
2242
2243	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2244
2245	if (dce6_latency_watermark(wm) <= latency_hiding)
2246		return true;
2247	else
2248		return false;
2249}
2250
2251static void dce6_program_watermarks(struct radeon_device *rdev,
2252					 struct radeon_crtc *radeon_crtc,
2253					 u32 lb_size, u32 num_heads)
2254{
2255	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2256	struct dce6_wm_params wm_low, wm_high;
2257	u32 dram_channels;
2258	u32 pixel_period;
2259	u32 line_time = 0;
2260	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2261	u32 priority_a_mark = 0, priority_b_mark = 0;
2262	u32 priority_a_cnt = PRIORITY_OFF;
2263	u32 priority_b_cnt = PRIORITY_OFF;
2264	u32 tmp, arb_control3;
2265	fixed20_12 a, b, c;
2266
2267	if (radeon_crtc->base.enabled && num_heads && mode) {
2268		pixel_period = 1000000 / (u32)mode->clock;
2269		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2270		priority_a_cnt = 0;
2271		priority_b_cnt = 0;
2272
2273		if (rdev->family == CHIP_ARUBA)
2274			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2275		else
2276			dram_channels = si_get_number_of_dram_channels(rdev);
2277
2278		/* watermark for high clocks */
2279		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2280			wm_high.yclk =
2281				radeon_dpm_get_mclk(rdev, false) * 10;
2282			wm_high.sclk =
2283				radeon_dpm_get_sclk(rdev, false) * 10;
2284		} else {
2285			wm_high.yclk = rdev->pm.current_mclk * 10;
2286			wm_high.sclk = rdev->pm.current_sclk * 10;
2287		}
2288
2289		wm_high.disp_clk = mode->clock;
2290		wm_high.src_width = mode->crtc_hdisplay;
2291		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2292		wm_high.blank_time = line_time - wm_high.active_time;
2293		wm_high.interlaced = false;
2294		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2295			wm_high.interlaced = true;
2296		wm_high.vsc = radeon_crtc->vsc;
2297		wm_high.vtaps = 1;
2298		if (radeon_crtc->rmx_type != RMX_OFF)
2299			wm_high.vtaps = 2;
2300		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2301		wm_high.lb_size = lb_size;
2302		wm_high.dram_channels = dram_channels;
2303		wm_high.num_heads = num_heads;
2304
2305		/* watermark for low clocks */
2306		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2307			wm_low.yclk =
2308				radeon_dpm_get_mclk(rdev, true) * 10;
2309			wm_low.sclk =
2310				radeon_dpm_get_sclk(rdev, true) * 10;
2311		} else {
2312			wm_low.yclk = rdev->pm.current_mclk * 10;
2313			wm_low.sclk = rdev->pm.current_sclk * 10;
2314		}
2315
2316		wm_low.disp_clk = mode->clock;
2317		wm_low.src_width = mode->crtc_hdisplay;
2318		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2319		wm_low.blank_time = line_time - wm_low.active_time;
2320		wm_low.interlaced = false;
2321		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2322			wm_low.interlaced = true;
2323		wm_low.vsc = radeon_crtc->vsc;
2324		wm_low.vtaps = 1;
2325		if (radeon_crtc->rmx_type != RMX_OFF)
2326			wm_low.vtaps = 2;
2327		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2328		wm_low.lb_size = lb_size;
2329		wm_low.dram_channels = dram_channels;
2330		wm_low.num_heads = num_heads;
2331
2332		/* set for high clocks */
2333		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2334		/* set for low clocks */
2335		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2336
2337		/* possibly force display priority to high */
2338		/* should really do this at mode validation time... */
2339		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2340		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2341		    !dce6_check_latency_hiding(&wm_high) ||
2342		    (rdev->disp_priority == 2)) {
2343			DRM_DEBUG_KMS("force priority to high\n");
2344			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2345			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2346		}
2347		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2348		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2349		    !dce6_check_latency_hiding(&wm_low) ||
2350		    (rdev->disp_priority == 2)) {
2351			DRM_DEBUG_KMS("force priority to high\n");
2352			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2353			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2354		}
2355
2356		a.full = dfixed_const(1000);
2357		b.full = dfixed_const(mode->clock);
2358		b.full = dfixed_div(b, a);
2359		c.full = dfixed_const(latency_watermark_a);
2360		c.full = dfixed_mul(c, b);
2361		c.full = dfixed_mul(c, radeon_crtc->hsc);
2362		c.full = dfixed_div(c, a);
2363		a.full = dfixed_const(16);
2364		c.full = dfixed_div(c, a);
2365		priority_a_mark = dfixed_trunc(c);
2366		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2367
2368		a.full = dfixed_const(1000);
2369		b.full = dfixed_const(mode->clock);
2370		b.full = dfixed_div(b, a);
2371		c.full = dfixed_const(latency_watermark_b);
2372		c.full = dfixed_mul(c, b);
2373		c.full = dfixed_mul(c, radeon_crtc->hsc);
2374		c.full = dfixed_div(c, a);
2375		a.full = dfixed_const(16);
2376		c.full = dfixed_div(c, a);
2377		priority_b_mark = dfixed_trunc(c);
2378		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2379	}
2380
2381	/* select wm A */
2382	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2383	tmp = arb_control3;
2384	tmp &= ~LATENCY_WATERMARK_MASK(3);
2385	tmp |= LATENCY_WATERMARK_MASK(1);
2386	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2387	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2388	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2389		LATENCY_HIGH_WATERMARK(line_time)));
2390	/* select wm B */
2391	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2392	tmp &= ~LATENCY_WATERMARK_MASK(3);
2393	tmp |= LATENCY_WATERMARK_MASK(2);
2394	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2395	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2396	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2397		LATENCY_HIGH_WATERMARK(line_time)));
2398	/* restore original selection */
2399	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2400
2401	/* write the priority marks */
2402	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2403	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2404
2405	/* save values for DPM */
2406	radeon_crtc->line_time = line_time;
2407	radeon_crtc->wm_high = latency_watermark_a;
2408	radeon_crtc->wm_low = latency_watermark_b;
2409}
2410
2411void dce6_bandwidth_update(struct radeon_device *rdev)
2412{
2413	struct drm_display_mode *mode0 = NULL;
2414	struct drm_display_mode *mode1 = NULL;
2415	u32 num_heads = 0, lb_size;
2416	int i;
2417
2418	if (!rdev->mode_info.mode_config_initialized)
2419		return;
2420
2421	radeon_update_display_priority(rdev);
2422
2423	for (i = 0; i < rdev->num_crtc; i++) {
2424		if (rdev->mode_info.crtcs[i]->base.enabled)
2425			num_heads++;
2426	}
2427	for (i = 0; i < rdev->num_crtc; i += 2) {
2428		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2429		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2430		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2431		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2432		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2433		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2434	}
2435}
2436
2437/*
2438 * Core functions
2439 */
2440static void si_tiling_mode_table_init(struct radeon_device *rdev)
2441{
2442	const u32 num_tile_mode_states = 32;
2443	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2444
2445	switch (rdev->config.si.mem_row_size_in_kb) {
2446	case 1:
2447		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2448		break;
2449	case 2:
2450	default:
2451		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2452		break;
2453	case 4:
2454		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2455		break;
2456	}
2457
2458	if ((rdev->family == CHIP_TAHITI) ||
2459	    (rdev->family == CHIP_PITCAIRN)) {
2460		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2461			switch (reg_offset) {
2462			case 0:  /* non-AA compressed depth or any compressed stencil */
2463				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2464						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2465						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2466						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2467						 NUM_BANKS(ADDR_SURF_16_BANK) |
2468						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2470						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2471				break;
2472			case 1:  /* 2xAA/4xAA compressed depth only */
2473				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2474						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2475						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2476						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2477						 NUM_BANKS(ADDR_SURF_16_BANK) |
2478						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2480						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2481				break;
2482			case 2:  /* 8xAA compressed depth only */
2483				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2484						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2485						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2486						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2487						 NUM_BANKS(ADDR_SURF_16_BANK) |
2488						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2490						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2491				break;
2492			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2493				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2494						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2495						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2496						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2497						 NUM_BANKS(ADDR_SURF_16_BANK) |
2498						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2499						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2500						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2501				break;
2502			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2503				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2504						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2505						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2506						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2507						 NUM_BANKS(ADDR_SURF_16_BANK) |
2508						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2510						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2511				break;
2512			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2513				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2515						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2516						 TILE_SPLIT(split_equal_to_row_size) |
2517						 NUM_BANKS(ADDR_SURF_16_BANK) |
2518						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2519						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2520						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2521				break;
2522			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2523				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2524						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2525						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2526						 TILE_SPLIT(split_equal_to_row_size) |
2527						 NUM_BANKS(ADDR_SURF_16_BANK) |
2528						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2530						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2531				break;
2532			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2533				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2535						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2536						 TILE_SPLIT(split_equal_to_row_size) |
2537						 NUM_BANKS(ADDR_SURF_16_BANK) |
2538						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2540						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2541				break;
2542			case 8:  /* 1D and 1D Array Surfaces */
2543				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2544						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2545						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2546						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2547						 NUM_BANKS(ADDR_SURF_16_BANK) |
2548						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2549						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2550						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2551				break;
2552			case 9:  /* Displayable maps. */
2553				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2554						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2555						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2556						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2557						 NUM_BANKS(ADDR_SURF_16_BANK) |
2558						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2559						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2560						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2561				break;
2562			case 10:  /* Display 8bpp. */
2563				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2564						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2565						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2566						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2567						 NUM_BANKS(ADDR_SURF_16_BANK) |
2568						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2569						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2570						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2571				break;
2572			case 11:  /* Display 16bpp. */
2573				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2574						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2575						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2576						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2577						 NUM_BANKS(ADDR_SURF_16_BANK) |
2578						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2579						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2580						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2581				break;
2582			case 12:  /* Display 32bpp. */
2583				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2584						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2585						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2587						 NUM_BANKS(ADDR_SURF_16_BANK) |
2588						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2589						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2590						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2591				break;
2592			case 13:  /* Thin. */
2593				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2594						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2595						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2596						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2597						 NUM_BANKS(ADDR_SURF_16_BANK) |
2598						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2599						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2600						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2601				break;
2602			case 14:  /* Thin 8 bpp. */
2603				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2605						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2606						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2607						 NUM_BANKS(ADDR_SURF_16_BANK) |
2608						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2610						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2611				break;
2612			case 15:  /* Thin 16 bpp. */
2613				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2614						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2615						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2616						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2617						 NUM_BANKS(ADDR_SURF_16_BANK) |
2618						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2619						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2620						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2621				break;
2622			case 16:  /* Thin 32 bpp. */
2623				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2624						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2625						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2626						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2627						 NUM_BANKS(ADDR_SURF_16_BANK) |
2628						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2630						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2631				break;
2632			case 17:  /* Thin 64 bpp. */
2633				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2634						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2635						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2636						 TILE_SPLIT(split_equal_to_row_size) |
2637						 NUM_BANKS(ADDR_SURF_16_BANK) |
2638						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2640						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2641				break;
2642			case 21:  /* 8 bpp PRT. */
2643				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2645						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2646						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2647						 NUM_BANKS(ADDR_SURF_16_BANK) |
2648						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2649						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2650						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2651				break;
2652			case 22:  /* 16 bpp PRT */
2653				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2655						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2656						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2657						 NUM_BANKS(ADDR_SURF_16_BANK) |
2658						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2660						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2661				break;
2662			case 23:  /* 32 bpp PRT */
2663				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2664						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2665						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2666						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2667						 NUM_BANKS(ADDR_SURF_16_BANK) |
2668						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2669						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2670						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2671				break;
2672			case 24:  /* 64 bpp PRT */
2673				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2675						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2676						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2677						 NUM_BANKS(ADDR_SURF_16_BANK) |
2678						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2680						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2681				break;
2682			case 25:  /* 128 bpp PRT */
2683				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2684						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2685						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2686						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2687						 NUM_BANKS(ADDR_SURF_8_BANK) |
2688						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2689						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2690						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2691				break;
2692			default:
2693				gb_tile_moden = 0;
2694				break;
2695			}
2696			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2697			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2698		}
2699	} else if ((rdev->family == CHIP_VERDE) ||
2700		   (rdev->family == CHIP_OLAND) ||
2701		   (rdev->family == CHIP_HAINAN)) {
2702		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2703			switch (reg_offset) {
2704			case 0:  /* non-AA compressed depth or any compressed stencil */
2705				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2706						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2707						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2708						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2709						 NUM_BANKS(ADDR_SURF_16_BANK) |
2710						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2711						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2712						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2713				break;
2714			case 1:  /* 2xAA/4xAA compressed depth only */
2715				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2716						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2717						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2718						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2719						 NUM_BANKS(ADDR_SURF_16_BANK) |
2720						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2722						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2723				break;
2724			case 2:  /* 8xAA compressed depth only */
2725				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2726						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2727						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2728						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2729						 NUM_BANKS(ADDR_SURF_16_BANK) |
2730						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2731						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2732						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2733				break;
2734			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2735				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2736						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2737						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2738						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2739						 NUM_BANKS(ADDR_SURF_16_BANK) |
2740						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2741						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2742						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2743				break;
2744			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2745				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2746						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2747						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2749						 NUM_BANKS(ADDR_SURF_16_BANK) |
2750						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2752						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2753				break;
2754			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2755				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2756						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2757						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2758						 TILE_SPLIT(split_equal_to_row_size) |
2759						 NUM_BANKS(ADDR_SURF_16_BANK) |
2760						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2761						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2762						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2763				break;
2764			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2765				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2766						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2767						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2768						 TILE_SPLIT(split_equal_to_row_size) |
2769						 NUM_BANKS(ADDR_SURF_16_BANK) |
2770						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2772						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2773				break;
2774			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2775				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2776						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2777						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778						 TILE_SPLIT(split_equal_to_row_size) |
2779						 NUM_BANKS(ADDR_SURF_16_BANK) |
2780						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2782						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2783				break;
2784			case 8:  /* 1D and 1D Array Surfaces */
2785				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2786						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2787						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2788						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2789						 NUM_BANKS(ADDR_SURF_16_BANK) |
2790						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2791						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2792						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2793				break;
2794			case 9:  /* Displayable maps. */
2795				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2796						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2797						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2798						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2799						 NUM_BANKS(ADDR_SURF_16_BANK) |
2800						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2801						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2802						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2803				break;
2804			case 10:  /* Display 8bpp. */
2805				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2806						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2807						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2808						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2809						 NUM_BANKS(ADDR_SURF_16_BANK) |
2810						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2811						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2812						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2813				break;
2814			case 11:  /* Display 16bpp. */
2815				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2816						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2817						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2818						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2819						 NUM_BANKS(ADDR_SURF_16_BANK) |
2820						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2822						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2823				break;
2824			case 12:  /* Display 32bpp. */
2825				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2826						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2827						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2828						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2829						 NUM_BANKS(ADDR_SURF_16_BANK) |
2830						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2831						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2832						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2833				break;
2834			case 13:  /* Thin. */
2835				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2836						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2837						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2838						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2839						 NUM_BANKS(ADDR_SURF_16_BANK) |
2840						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2842						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2843				break;
2844			case 14:  /* Thin 8 bpp. */
2845				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2846						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2847						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2848						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2849						 NUM_BANKS(ADDR_SURF_16_BANK) |
2850						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2851						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2852						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2853				break;
2854			case 15:  /* Thin 16 bpp. */
2855				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2856						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2857						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2858						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2859						 NUM_BANKS(ADDR_SURF_16_BANK) |
2860						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2861						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2862						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2863				break;
2864			case 16:  /* Thin 32 bpp. */
2865				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2866						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2867						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2868						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2869						 NUM_BANKS(ADDR_SURF_16_BANK) |
2870						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2871						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2872						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2873				break;
2874			case 17:  /* Thin 64 bpp. */
2875				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2876						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2877						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2878						 TILE_SPLIT(split_equal_to_row_size) |
2879						 NUM_BANKS(ADDR_SURF_16_BANK) |
2880						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2882						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2883				break;
2884			case 21:  /* 8 bpp PRT. */
2885				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2887						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2888						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2889						 NUM_BANKS(ADDR_SURF_16_BANK) |
2890						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2891						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2892						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2893				break;
2894			case 22:  /* 16 bpp PRT */
2895				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2897						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2898						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2899						 NUM_BANKS(ADDR_SURF_16_BANK) |
2900						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2901						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2902						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2903				break;
2904			case 23:  /* 32 bpp PRT */
2905				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2906						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2907						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2908						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2909						 NUM_BANKS(ADDR_SURF_16_BANK) |
2910						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2911						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2912						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2913				break;
2914			case 24:  /* 64 bpp PRT */
2915				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2916						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2917						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2918						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2919						 NUM_BANKS(ADDR_SURF_16_BANK) |
2920						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2921						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2922						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2923				break;
2924			case 25:  /* 128 bpp PRT */
2925				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2926						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2927						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2928						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2929						 NUM_BANKS(ADDR_SURF_8_BANK) |
2930						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2931						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2932						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2933				break;
2934			default:
2935				gb_tile_moden = 0;
2936				break;
2937			}
2938			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2939			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2940		}
2941	} else
2942		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2943}
2944
2945static void si_select_se_sh(struct radeon_device *rdev,
2946			    u32 se_num, u32 sh_num)
2947{
2948	u32 data = INSTANCE_BROADCAST_WRITES;
2949
2950	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2951		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2952	else if (se_num == 0xffffffff)
2953		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2954	else if (sh_num == 0xffffffff)
2955		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2956	else
2957		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2958	WREG32(GRBM_GFX_INDEX, data);
2959}
2960
2961static u32 si_create_bitmask(u32 bit_width)
2962{
2963	u32 i, mask = 0;
2964
2965	for (i = 0; i < bit_width; i++) {
2966		mask <<= 1;
2967		mask |= 1;
2968	}
2969	return mask;
2970}
2971
2972static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2973{
2974	u32 data, mask;
2975
2976	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2977	if (data & 1)
2978		data &= INACTIVE_CUS_MASK;
2979	else
2980		data = 0;
2981	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2982
2983	data >>= INACTIVE_CUS_SHIFT;
2984
2985	mask = si_create_bitmask(cu_per_sh);
2986
2987	return ~data & mask;
2988}
2989
2990static void si_setup_spi(struct radeon_device *rdev,
2991			 u32 se_num, u32 sh_per_se,
2992			 u32 cu_per_sh)
2993{
2994	int i, j, k;
2995	u32 data, mask, active_cu;
2996
2997	for (i = 0; i < se_num; i++) {
2998		for (j = 0; j < sh_per_se; j++) {
2999			si_select_se_sh(rdev, i, j);
3000			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
3001			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
3002
3003			mask = 1;
3004			for (k = 0; k < 16; k++) {
3005				mask <<= k;
3006				if (active_cu & mask) {
3007					data &= ~mask;
3008					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
3009					break;
3010				}
3011			}
3012		}
3013	}
3014	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3015}
3016
3017static u32 si_get_rb_disabled(struct radeon_device *rdev,
3018			      u32 max_rb_num_per_se,
3019			      u32 sh_per_se)
3020{
3021	u32 data, mask;
3022
3023	data = RREG32(CC_RB_BACKEND_DISABLE);
3024	if (data & 1)
3025		data &= BACKEND_DISABLE_MASK;
3026	else
3027		data = 0;
3028	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3029
3030	data >>= BACKEND_DISABLE_SHIFT;
3031
3032	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3033
3034	return data & mask;
3035}
3036
3037static void si_setup_rb(struct radeon_device *rdev,
3038			u32 se_num, u32 sh_per_se,
3039			u32 max_rb_num_per_se)
3040{
3041	int i, j;
3042	u32 data, mask;
3043	u32 disabled_rbs = 0;
3044	u32 enabled_rbs = 0;
3045
3046	for (i = 0; i < se_num; i++) {
3047		for (j = 0; j < sh_per_se; j++) {
3048			si_select_se_sh(rdev, i, j);
3049			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3050			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3051		}
3052	}
3053	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3054
3055	mask = 1;
3056	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3057		if (!(disabled_rbs & mask))
3058			enabled_rbs |= mask;
3059		mask <<= 1;
3060	}
3061
3062	rdev->config.si.backend_enable_mask = enabled_rbs;
3063
3064	for (i = 0; i < se_num; i++) {
3065		si_select_se_sh(rdev, i, 0xffffffff);
3066		data = 0;
3067		for (j = 0; j < sh_per_se; j++) {
3068			switch (enabled_rbs & 3) {
3069			case 1:
3070				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3071				break;
3072			case 2:
3073				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3074				break;
3075			case 3:
3076			default:
3077				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3078				break;
3079			}
3080			enabled_rbs >>= 2;
3081		}
3082		WREG32(PA_SC_RASTER_CONFIG, data);
3083	}
3084	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3085}
3086
3087static void si_gpu_init(struct radeon_device *rdev)
3088{
3089	u32 gb_addr_config = 0;
3090	u32 mc_shared_chmap, mc_arb_ramcfg;
3091	u32 sx_debug_1;
3092	u32 hdp_host_path_cntl;
3093	u32 tmp;
3094	int i, j;
3095
3096	switch (rdev->family) {
3097	case CHIP_TAHITI:
3098		rdev->config.si.max_shader_engines = 2;
3099		rdev->config.si.max_tile_pipes = 12;
3100		rdev->config.si.max_cu_per_sh = 8;
3101		rdev->config.si.max_sh_per_se = 2;
3102		rdev->config.si.max_backends_per_se = 4;
3103		rdev->config.si.max_texture_channel_caches = 12;
3104		rdev->config.si.max_gprs = 256;
3105		rdev->config.si.max_gs_threads = 32;
3106		rdev->config.si.max_hw_contexts = 8;
3107
3108		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3109		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3110		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3111		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3112		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3113		break;
3114	case CHIP_PITCAIRN:
3115		rdev->config.si.max_shader_engines = 2;
3116		rdev->config.si.max_tile_pipes = 8;
3117		rdev->config.si.max_cu_per_sh = 5;
3118		rdev->config.si.max_sh_per_se = 2;
3119		rdev->config.si.max_backends_per_se = 4;
3120		rdev->config.si.max_texture_channel_caches = 8;
3121		rdev->config.si.max_gprs = 256;
3122		rdev->config.si.max_gs_threads = 32;
3123		rdev->config.si.max_hw_contexts = 8;
3124
3125		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3126		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3127		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3128		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3129		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3130		break;
3131	case CHIP_VERDE:
3132	default:
3133		rdev->config.si.max_shader_engines = 1;
3134		rdev->config.si.max_tile_pipes = 4;
3135		rdev->config.si.max_cu_per_sh = 5;
3136		rdev->config.si.max_sh_per_se = 2;
3137		rdev->config.si.max_backends_per_se = 4;
3138		rdev->config.si.max_texture_channel_caches = 4;
3139		rdev->config.si.max_gprs = 256;
3140		rdev->config.si.max_gs_threads = 32;
3141		rdev->config.si.max_hw_contexts = 8;
3142
3143		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3144		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3145		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3146		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3147		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3148		break;
3149	case CHIP_OLAND:
3150		rdev->config.si.max_shader_engines = 1;
3151		rdev->config.si.max_tile_pipes = 4;
3152		rdev->config.si.max_cu_per_sh = 6;
3153		rdev->config.si.max_sh_per_se = 1;
3154		rdev->config.si.max_backends_per_se = 2;
3155		rdev->config.si.max_texture_channel_caches = 4;
3156		rdev->config.si.max_gprs = 256;
3157		rdev->config.si.max_gs_threads = 16;
3158		rdev->config.si.max_hw_contexts = 8;
3159
3160		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3161		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3162		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3163		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3164		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3165		break;
3166	case CHIP_HAINAN:
3167		rdev->config.si.max_shader_engines = 1;
3168		rdev->config.si.max_tile_pipes = 4;
3169		rdev->config.si.max_cu_per_sh = 5;
3170		rdev->config.si.max_sh_per_se = 1;
3171		rdev->config.si.max_backends_per_se = 1;
3172		rdev->config.si.max_texture_channel_caches = 2;
3173		rdev->config.si.max_gprs = 256;
3174		rdev->config.si.max_gs_threads = 16;
3175		rdev->config.si.max_hw_contexts = 8;
3176
3177		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3178		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3179		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3180		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3181		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3182		break;
3183	}
3184
3185	/* Initialize HDP */
3186	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3187		WREG32((0x2c14 + j), 0x00000000);
3188		WREG32((0x2c18 + j), 0x00000000);
3189		WREG32((0x2c1c + j), 0x00000000);
3190		WREG32((0x2c20 + j), 0x00000000);
3191		WREG32((0x2c24 + j), 0x00000000);
3192	}
3193
3194	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3195	WREG32(SRBM_INT_CNTL, 1);
3196	WREG32(SRBM_INT_ACK, 1);
3197
3198	evergreen_fix_pci_max_read_req_size(rdev);
3199
3200	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3201
3202	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3203	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3204
3205	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3206	rdev->config.si.mem_max_burst_length_bytes = 256;
3207	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3208	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3209	if (rdev->config.si.mem_row_size_in_kb > 4)
3210		rdev->config.si.mem_row_size_in_kb = 4;
3211	/* XXX use MC settings? */
3212	rdev->config.si.shader_engine_tile_size = 32;
3213	rdev->config.si.num_gpus = 1;
3214	rdev->config.si.multi_gpu_tile_size = 64;
3215
3216	/* fix up row size */
3217	gb_addr_config &= ~ROW_SIZE_MASK;
3218	switch (rdev->config.si.mem_row_size_in_kb) {
3219	case 1:
3220	default:
3221		gb_addr_config |= ROW_SIZE(0);
3222		break;
3223	case 2:
3224		gb_addr_config |= ROW_SIZE(1);
3225		break;
3226	case 4:
3227		gb_addr_config |= ROW_SIZE(2);
3228		break;
3229	}
3230
3231	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3232	 * not have bank info, so create a custom tiling dword.
3233	 * bits 3:0   num_pipes
3234	 * bits 7:4   num_banks
3235	 * bits 11:8  group_size
3236	 * bits 15:12 row_size
3237	 */
3238	rdev->config.si.tile_config = 0;
3239	switch (rdev->config.si.num_tile_pipes) {
3240	case 1:
3241		rdev->config.si.tile_config |= (0 << 0);
3242		break;
3243	case 2:
3244		rdev->config.si.tile_config |= (1 << 0);
3245		break;
3246	case 4:
3247		rdev->config.si.tile_config |= (2 << 0);
3248		break;
3249	case 8:
3250	default:
3251		/* XXX what about 12? */
3252		rdev->config.si.tile_config |= (3 << 0);
3253		break;
3254	}
3255	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3256	case 0: /* four banks */
3257		rdev->config.si.tile_config |= 0 << 4;
3258		break;
3259	case 1: /* eight banks */
3260		rdev->config.si.tile_config |= 1 << 4;
3261		break;
3262	case 2: /* sixteen banks */
3263	default:
3264		rdev->config.si.tile_config |= 2 << 4;
3265		break;
3266	}
3267	rdev->config.si.tile_config |=
3268		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3269	rdev->config.si.tile_config |=
3270		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3271
3272	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3273	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3274	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3275	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3276	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3277	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3278	if (rdev->has_uvd) {
3279		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3280		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3281		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3282	}
3283
3284	si_tiling_mode_table_init(rdev);
3285
3286	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3287		    rdev->config.si.max_sh_per_se,
3288		    rdev->config.si.max_backends_per_se);
3289
3290	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3291		     rdev->config.si.max_sh_per_se,
3292		     rdev->config.si.max_cu_per_sh);
3293
3294	rdev->config.si.active_cus = 0;
3295	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3296		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3297			rdev->config.si.active_cus +=
3298				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3299		}
3300	}
3301
3302	/* set HW defaults for 3D engine */
3303	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3304				     ROQ_IB2_START(0x2b)));
3305	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3306
3307	sx_debug_1 = RREG32(SX_DEBUG_1);
3308	WREG32(SX_DEBUG_1, sx_debug_1);
3309
3310	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3311
3312	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3313				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3314				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3315				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3316
3317	WREG32(VGT_NUM_INSTANCES, 1);
3318
3319	WREG32(CP_PERFMON_CNTL, 0);
3320
3321	WREG32(SQ_CONFIG, 0);
3322
3323	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3324					  FORCE_EOV_MAX_REZ_CNT(255)));
3325
3326	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3327	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3328
3329	WREG32(VGT_GS_VERTEX_REUSE, 16);
3330	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3331
3332	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3333	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3334	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3335	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3336	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3337	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3338	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3339	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3340
3341	tmp = RREG32(HDP_MISC_CNTL);
3342	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3343	WREG32(HDP_MISC_CNTL, tmp);
3344
3345	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3346	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3347
3348	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3349
3350	udelay(50);
3351}
3352
3353/*
3354 * GPU scratch registers helpers function.
3355 */
3356static void si_scratch_init(struct radeon_device *rdev)
3357{
3358	int i;
3359
3360	rdev->scratch.num_reg = 7;
3361	rdev->scratch.reg_base = SCRATCH_REG0;
3362	for (i = 0; i < rdev->scratch.num_reg; i++) {
3363		rdev->scratch.free[i] = true;
3364		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3365	}
3366}
3367
3368void si_fence_ring_emit(struct radeon_device *rdev,
3369			struct radeon_fence *fence)
3370{
3371	struct radeon_ring *ring = &rdev->ring[fence->ring];
3372	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3373
3374	/* flush read cache over gart */
3375	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3376	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3377	radeon_ring_write(ring, 0);
3378	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3379	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3380			  PACKET3_TC_ACTION_ENA |
3381			  PACKET3_SH_KCACHE_ACTION_ENA |
3382			  PACKET3_SH_ICACHE_ACTION_ENA);
3383	radeon_ring_write(ring, 0xFFFFFFFF);
3384	radeon_ring_write(ring, 0);
3385	radeon_ring_write(ring, 10); /* poll interval */
3386	/* EVENT_WRITE_EOP - flush caches, send int */
3387	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3388	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3389	radeon_ring_write(ring, lower_32_bits(addr));
3390	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3391	radeon_ring_write(ring, fence->seq);
3392	radeon_ring_write(ring, 0);
3393}
3394
3395/*
3396 * IB stuff
3397 */
3398void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3399{
3400	struct radeon_ring *ring = &rdev->ring[ib->ring];
3401	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3402	u32 header;
3403
3404	if (ib->is_const_ib) {
3405		/* set switch buffer packet before const IB */
3406		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3407		radeon_ring_write(ring, 0);
3408
3409		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3410	} else {
3411		u32 next_rptr;
3412		if (ring->rptr_save_reg) {
3413			next_rptr = ring->wptr + 3 + 4 + 8;
3414			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3415			radeon_ring_write(ring, ((ring->rptr_save_reg -
3416						  PACKET3_SET_CONFIG_REG_START) >> 2));
3417			radeon_ring_write(ring, next_rptr);
3418		} else if (rdev->wb.enabled) {
3419			next_rptr = ring->wptr + 5 + 4 + 8;
3420			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3421			radeon_ring_write(ring, (1 << 8));
3422			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3423			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3424			radeon_ring_write(ring, next_rptr);
3425		}
3426
3427		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3428	}
3429
3430	radeon_ring_write(ring, header);
3431	radeon_ring_write(ring,
3432#ifdef __BIG_ENDIAN
3433			  (2 << 0) |
3434#endif
3435			  (ib->gpu_addr & 0xFFFFFFFC));
3436	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3437	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3438
3439	if (!ib->is_const_ib) {
3440		/* flush read cache over gart for this vmid */
3441		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3442		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3443		radeon_ring_write(ring, vm_id);
3444		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3445		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3446				  PACKET3_TC_ACTION_ENA |
3447				  PACKET3_SH_KCACHE_ACTION_ENA |
3448				  PACKET3_SH_ICACHE_ACTION_ENA);
3449		radeon_ring_write(ring, 0xFFFFFFFF);
3450		radeon_ring_write(ring, 0);
3451		radeon_ring_write(ring, 10); /* poll interval */
3452	}
3453}
3454
3455/*
3456 * CP.
3457 */
3458static void si_cp_enable(struct radeon_device *rdev, bool enable)
3459{
3460	if (enable)
3461		WREG32(CP_ME_CNTL, 0);
3462	else {
3463		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3464			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3465		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3466		WREG32(SCRATCH_UMSK, 0);
3467		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3468		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3469		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3470	}
3471	udelay(50);
3472}
3473
3474static int si_cp_load_microcode(struct radeon_device *rdev)
3475{
3476	int i;
3477
3478	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3479		return -EINVAL;
3480
3481	si_cp_enable(rdev, false);
3482
3483	if (rdev->new_fw) {
3484		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3485			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3486		const struct gfx_firmware_header_v1_0 *ce_hdr =
3487			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3488		const struct gfx_firmware_header_v1_0 *me_hdr =
3489			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3490		const __le32 *fw_data;
3491		u32 fw_size;
3492
3493		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3494		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3495		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3496
3497		/* PFP */
3498		fw_data = (const __le32 *)
3499			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3500		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3501		WREG32(CP_PFP_UCODE_ADDR, 0);
3502		for (i = 0; i < fw_size; i++)
3503			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3504		WREG32(CP_PFP_UCODE_ADDR, 0);
3505
3506		/* CE */
3507		fw_data = (const __le32 *)
3508			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3509		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3510		WREG32(CP_CE_UCODE_ADDR, 0);
3511		for (i = 0; i < fw_size; i++)
3512			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3513		WREG32(CP_CE_UCODE_ADDR, 0);
3514
3515		/* ME */
3516		fw_data = (const __be32 *)
3517			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3518		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3519		WREG32(CP_ME_RAM_WADDR, 0);
3520		for (i = 0; i < fw_size; i++)
3521			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3522		WREG32(CP_ME_RAM_WADDR, 0);
3523	} else {
3524		const __be32 *fw_data;
3525
3526		/* PFP */
3527		fw_data = (const __be32 *)rdev->pfp_fw->data;
3528		WREG32(CP_PFP_UCODE_ADDR, 0);
3529		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3530			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3531		WREG32(CP_PFP_UCODE_ADDR, 0);
3532
3533		/* CE */
3534		fw_data = (const __be32 *)rdev->ce_fw->data;
3535		WREG32(CP_CE_UCODE_ADDR, 0);
3536		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3537			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3538		WREG32(CP_CE_UCODE_ADDR, 0);
3539
3540		/* ME */
3541		fw_data = (const __be32 *)rdev->me_fw->data;
3542		WREG32(CP_ME_RAM_WADDR, 0);
3543		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3544			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3545		WREG32(CP_ME_RAM_WADDR, 0);
3546	}
3547
3548	WREG32(CP_PFP_UCODE_ADDR, 0);
3549	WREG32(CP_CE_UCODE_ADDR, 0);
3550	WREG32(CP_ME_RAM_WADDR, 0);
3551	WREG32(CP_ME_RAM_RADDR, 0);
3552	return 0;
3553}
3554
3555static int si_cp_start(struct radeon_device *rdev)
3556{
3557	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3558	int r, i;
3559
3560	r = radeon_ring_lock(rdev, ring, 7 + 4);
3561	if (r) {
3562		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3563		return r;
3564	}
3565	/* init the CP */
3566	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3567	radeon_ring_write(ring, 0x1);
3568	radeon_ring_write(ring, 0x0);
3569	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3570	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3571	radeon_ring_write(ring, 0);
3572	radeon_ring_write(ring, 0);
3573
3574	/* init the CE partitions */
3575	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3576	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3577	radeon_ring_write(ring, 0xc000);
3578	radeon_ring_write(ring, 0xe000);
3579	radeon_ring_unlock_commit(rdev, ring, false);
3580
3581	si_cp_enable(rdev, true);
3582
3583	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3584	if (r) {
3585		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3586		return r;
3587	}
3588
3589	/* setup clear context state */
3590	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3591	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3592
3593	for (i = 0; i < si_default_size; i++)
3594		radeon_ring_write(ring, si_default_state[i]);
3595
3596	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3597	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3598
3599	/* set clear context state */
3600	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3601	radeon_ring_write(ring, 0);
3602
3603	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3604	radeon_ring_write(ring, 0x00000316);
3605	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3606	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3607
3608	radeon_ring_unlock_commit(rdev, ring, false);
3609
3610	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3611		ring = &rdev->ring[i];
3612		r = radeon_ring_lock(rdev, ring, 2);
3613
3614		/* clear the compute context state */
3615		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3616		radeon_ring_write(ring, 0);
3617
3618		radeon_ring_unlock_commit(rdev, ring, false);
3619	}
3620
3621	return 0;
3622}
3623
3624static void si_cp_fini(struct radeon_device *rdev)
3625{
3626	struct radeon_ring *ring;
3627	si_cp_enable(rdev, false);
3628
3629	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3630	radeon_ring_fini(rdev, ring);
3631	radeon_scratch_free(rdev, ring->rptr_save_reg);
3632
3633	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3634	radeon_ring_fini(rdev, ring);
3635	radeon_scratch_free(rdev, ring->rptr_save_reg);
3636
3637	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3638	radeon_ring_fini(rdev, ring);
3639	radeon_scratch_free(rdev, ring->rptr_save_reg);
3640}
3641
3642static int si_cp_resume(struct radeon_device *rdev)
3643{
3644	struct radeon_ring *ring;
3645	u32 tmp;
3646	u32 rb_bufsz;
3647	int r;
3648
3649	si_enable_gui_idle_interrupt(rdev, false);
3650
3651	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3652	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3653
3654	/* Set the write pointer delay */
3655	WREG32(CP_RB_WPTR_DELAY, 0);
3656
3657	WREG32(CP_DEBUG, 0);
3658	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3659
3660	/* ring 0 - compute and gfx */
3661	/* Set ring buffer size */
3662	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3663	rb_bufsz = order_base_2(ring->ring_size / 8);
3664	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3665#ifdef __BIG_ENDIAN
3666	tmp |= BUF_SWAP_32BIT;
3667#endif
3668	WREG32(CP_RB0_CNTL, tmp);
3669
3670	/* Initialize the ring buffer's read and write pointers */
3671	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3672	ring->wptr = 0;
3673	WREG32(CP_RB0_WPTR, ring->wptr);
3674
3675	/* set the wb address whether it's enabled or not */
3676	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3677	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3678
3679	if (rdev->wb.enabled)
3680		WREG32(SCRATCH_UMSK, 0xff);
3681	else {
3682		tmp |= RB_NO_UPDATE;
3683		WREG32(SCRATCH_UMSK, 0);
3684	}
3685
3686	mdelay(1);
3687	WREG32(CP_RB0_CNTL, tmp);
3688
3689	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3690
3691	/* ring1  - compute only */
3692	/* Set ring buffer size */
3693	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3694	rb_bufsz = order_base_2(ring->ring_size / 8);
3695	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3696#ifdef __BIG_ENDIAN
3697	tmp |= BUF_SWAP_32BIT;
3698#endif
3699	WREG32(CP_RB1_CNTL, tmp);
3700
3701	/* Initialize the ring buffer's read and write pointers */
3702	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3703	ring->wptr = 0;
3704	WREG32(CP_RB1_WPTR, ring->wptr);
3705
3706	/* set the wb address whether it's enabled or not */
3707	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3708	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3709
3710	mdelay(1);
3711	WREG32(CP_RB1_CNTL, tmp);
3712
3713	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3714
3715	/* ring2 - compute only */
3716	/* Set ring buffer size */
3717	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3718	rb_bufsz = order_base_2(ring->ring_size / 8);
3719	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3720#ifdef __BIG_ENDIAN
3721	tmp |= BUF_SWAP_32BIT;
3722#endif
3723	WREG32(CP_RB2_CNTL, tmp);
3724
3725	/* Initialize the ring buffer's read and write pointers */
3726	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3727	ring->wptr = 0;
3728	WREG32(CP_RB2_WPTR, ring->wptr);
3729
3730	/* set the wb address whether it's enabled or not */
3731	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3732	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3733
3734	mdelay(1);
3735	WREG32(CP_RB2_CNTL, tmp);
3736
3737	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3738
3739	/* start the rings */
3740	si_cp_start(rdev);
3741	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3742	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3743	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3744	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3745	if (r) {
3746		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3747		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3748		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3749		return r;
3750	}
3751	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3752	if (r) {
3753		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3754	}
3755	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3756	if (r) {
3757		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3758	}
3759
3760	si_enable_gui_idle_interrupt(rdev, true);
3761
3762	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3763		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3764
3765	return 0;
3766}
3767
3768u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3769{
3770	u32 reset_mask = 0;
3771	u32 tmp;
3772
3773	/* GRBM_STATUS */
3774	tmp = RREG32(GRBM_STATUS);
3775	if (tmp & (PA_BUSY | SC_BUSY |
3776		   BCI_BUSY | SX_BUSY |
3777		   TA_BUSY | VGT_BUSY |
3778		   DB_BUSY | CB_BUSY |
3779		   GDS_BUSY | SPI_BUSY |
3780		   IA_BUSY | IA_BUSY_NO_DMA))
3781		reset_mask |= RADEON_RESET_GFX;
3782
3783	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3784		   CP_BUSY | CP_COHERENCY_BUSY))
3785		reset_mask |= RADEON_RESET_CP;
3786
3787	if (tmp & GRBM_EE_BUSY)
3788		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3789
3790	/* GRBM_STATUS2 */
3791	tmp = RREG32(GRBM_STATUS2);
3792	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3793		reset_mask |= RADEON_RESET_RLC;
3794
3795	/* DMA_STATUS_REG 0 */
3796	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3797	if (!(tmp & DMA_IDLE))
3798		reset_mask |= RADEON_RESET_DMA;
3799
3800	/* DMA_STATUS_REG 1 */
3801	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3802	if (!(tmp & DMA_IDLE))
3803		reset_mask |= RADEON_RESET_DMA1;
3804
3805	/* SRBM_STATUS2 */
3806	tmp = RREG32(SRBM_STATUS2);
3807	if (tmp & DMA_BUSY)
3808		reset_mask |= RADEON_RESET_DMA;
3809
3810	if (tmp & DMA1_BUSY)
3811		reset_mask |= RADEON_RESET_DMA1;
3812
3813	/* SRBM_STATUS */
3814	tmp = RREG32(SRBM_STATUS);
3815
3816	if (tmp & IH_BUSY)
3817		reset_mask |= RADEON_RESET_IH;
3818
3819	if (tmp & SEM_BUSY)
3820		reset_mask |= RADEON_RESET_SEM;
3821
3822	if (tmp & GRBM_RQ_PENDING)
3823		reset_mask |= RADEON_RESET_GRBM;
3824
3825	if (tmp & VMC_BUSY)
3826		reset_mask |= RADEON_RESET_VMC;
3827
3828	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3829		   MCC_BUSY | MCD_BUSY))
3830		reset_mask |= RADEON_RESET_MC;
3831
3832	if (evergreen_is_display_hung(rdev))
3833		reset_mask |= RADEON_RESET_DISPLAY;
3834
3835	/* VM_L2_STATUS */
3836	tmp = RREG32(VM_L2_STATUS);
3837	if (tmp & L2_BUSY)
3838		reset_mask |= RADEON_RESET_VMC;
3839
3840	/* Skip MC reset as it's mostly likely not hung, just busy */
3841	if (reset_mask & RADEON_RESET_MC) {
3842		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3843		reset_mask &= ~RADEON_RESET_MC;
3844	}
3845
3846	return reset_mask;
3847}
3848
3849static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3850{
3851	struct evergreen_mc_save save;
3852	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3853	u32 tmp;
3854
3855	if (reset_mask == 0)
3856		return;
3857
3858	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3859
3860	evergreen_print_gpu_status_regs(rdev);
3861	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3862		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3863	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3864		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3865
3866	/* disable PG/CG */
3867	si_fini_pg(rdev);
3868	si_fini_cg(rdev);
3869
3870	/* stop the rlc */
3871	si_rlc_stop(rdev);
3872
3873	/* Disable CP parsing/prefetching */
3874	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3875
3876	if (reset_mask & RADEON_RESET_DMA) {
3877		/* dma0 */
3878		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3879		tmp &= ~DMA_RB_ENABLE;
3880		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3881	}
3882	if (reset_mask & RADEON_RESET_DMA1) {
3883		/* dma1 */
3884		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3885		tmp &= ~DMA_RB_ENABLE;
3886		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3887	}
3888
3889	udelay(50);
3890
3891	evergreen_mc_stop(rdev, &save);
3892	if (evergreen_mc_wait_for_idle(rdev)) {
3893		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3894	}
3895
3896	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3897		grbm_soft_reset = SOFT_RESET_CB |
3898			SOFT_RESET_DB |
3899			SOFT_RESET_GDS |
3900			SOFT_RESET_PA |
3901			SOFT_RESET_SC |
3902			SOFT_RESET_BCI |
3903			SOFT_RESET_SPI |
3904			SOFT_RESET_SX |
3905			SOFT_RESET_TC |
3906			SOFT_RESET_TA |
3907			SOFT_RESET_VGT |
3908			SOFT_RESET_IA;
3909	}
3910
3911	if (reset_mask & RADEON_RESET_CP) {
3912		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3913
3914		srbm_soft_reset |= SOFT_RESET_GRBM;
3915	}
3916
3917	if (reset_mask & RADEON_RESET_DMA)
3918		srbm_soft_reset |= SOFT_RESET_DMA;
3919
3920	if (reset_mask & RADEON_RESET_DMA1)
3921		srbm_soft_reset |= SOFT_RESET_DMA1;
3922
3923	if (reset_mask & RADEON_RESET_DISPLAY)
3924		srbm_soft_reset |= SOFT_RESET_DC;
3925
3926	if (reset_mask & RADEON_RESET_RLC)
3927		grbm_soft_reset |= SOFT_RESET_RLC;
3928
3929	if (reset_mask & RADEON_RESET_SEM)
3930		srbm_soft_reset |= SOFT_RESET_SEM;
3931
3932	if (reset_mask & RADEON_RESET_IH)
3933		srbm_soft_reset |= SOFT_RESET_IH;
3934
3935	if (reset_mask & RADEON_RESET_GRBM)
3936		srbm_soft_reset |= SOFT_RESET_GRBM;
3937
3938	if (reset_mask & RADEON_RESET_VMC)
3939		srbm_soft_reset |= SOFT_RESET_VMC;
3940
3941	if (reset_mask & RADEON_RESET_MC)
3942		srbm_soft_reset |= SOFT_RESET_MC;
3943
3944	if (grbm_soft_reset) {
3945		tmp = RREG32(GRBM_SOFT_RESET);
3946		tmp |= grbm_soft_reset;
3947		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3948		WREG32(GRBM_SOFT_RESET, tmp);
3949		tmp = RREG32(GRBM_SOFT_RESET);
3950
3951		udelay(50);
3952
3953		tmp &= ~grbm_soft_reset;
3954		WREG32(GRBM_SOFT_RESET, tmp);
3955		tmp = RREG32(GRBM_SOFT_RESET);
3956	}
3957
3958	if (srbm_soft_reset) {
3959		tmp = RREG32(SRBM_SOFT_RESET);
3960		tmp |= srbm_soft_reset;
3961		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3962		WREG32(SRBM_SOFT_RESET, tmp);
3963		tmp = RREG32(SRBM_SOFT_RESET);
3964
3965		udelay(50);
3966
3967		tmp &= ~srbm_soft_reset;
3968		WREG32(SRBM_SOFT_RESET, tmp);
3969		tmp = RREG32(SRBM_SOFT_RESET);
3970	}
3971
3972	/* Wait a little for things to settle down */
3973	udelay(50);
3974
3975	evergreen_mc_resume(rdev, &save);
3976	udelay(50);
3977
3978	evergreen_print_gpu_status_regs(rdev);
3979}
3980
3981static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3982{
3983	u32 tmp, i;
3984
3985	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3986	tmp |= SPLL_BYPASS_EN;
3987	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3988
3989	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3990	tmp |= SPLL_CTLREQ_CHG;
3991	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3992
3993	for (i = 0; i < rdev->usec_timeout; i++) {
3994		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3995			break;
3996		udelay(1);
3997	}
3998
3999	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
4000	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
4001	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
4002
4003	tmp = RREG32(MPLL_CNTL_MODE);
4004	tmp &= ~MPLL_MCLK_SEL;
4005	WREG32(MPLL_CNTL_MODE, tmp);
4006}
4007
4008static void si_spll_powerdown(struct radeon_device *rdev)
4009{
4010	u32 tmp;
4011
4012	tmp = RREG32(SPLL_CNTL_MODE);
4013	tmp |= SPLL_SW_DIR_CONTROL;
4014	WREG32(SPLL_CNTL_MODE, tmp);
4015
4016	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4017	tmp |= SPLL_RESET;
4018	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4019
4020	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4021	tmp |= SPLL_SLEEP;
4022	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4023
4024	tmp = RREG32(SPLL_CNTL_MODE);
4025	tmp &= ~SPLL_SW_DIR_CONTROL;
4026	WREG32(SPLL_CNTL_MODE, tmp);
4027}
4028
4029static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4030{
4031	struct evergreen_mc_save save;
4032	u32 tmp, i;
4033
4034	dev_info(rdev->dev, "GPU pci config reset\n");
4035
4036	/* disable dpm? */
4037
4038	/* disable cg/pg */
4039	si_fini_pg(rdev);
4040	si_fini_cg(rdev);
4041
4042	/* Disable CP parsing/prefetching */
4043	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4044	/* dma0 */
4045	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4046	tmp &= ~DMA_RB_ENABLE;
4047	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4048	/* dma1 */
4049	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4050	tmp &= ~DMA_RB_ENABLE;
4051	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4052	/* XXX other engines? */
4053
4054	/* halt the rlc, disable cp internal ints */
4055	si_rlc_stop(rdev);
4056
4057	udelay(50);
4058
4059	/* disable mem access */
4060	evergreen_mc_stop(rdev, &save);
4061	if (evergreen_mc_wait_for_idle(rdev)) {
4062		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4063	}
4064
4065	/* set mclk/sclk to bypass */
4066	si_set_clk_bypass_mode(rdev);
4067	/* powerdown spll */
4068	si_spll_powerdown(rdev);
4069	/* disable BM */
4070	pci_clear_master(rdev->pdev);
4071	/* reset */
4072	radeon_pci_config_reset(rdev);
4073	/* wait for asic to come out of reset */
4074	for (i = 0; i < rdev->usec_timeout; i++) {
4075		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4076			break;
4077		udelay(1);
4078	}
4079}
4080
4081int si_asic_reset(struct radeon_device *rdev)
4082{
4083	u32 reset_mask;
4084
4085	reset_mask = si_gpu_check_soft_reset(rdev);
4086
4087	if (reset_mask)
4088		r600_set_bios_scratch_engine_hung(rdev, true);
4089
4090	/* try soft reset */
4091	si_gpu_soft_reset(rdev, reset_mask);
4092
4093	reset_mask = si_gpu_check_soft_reset(rdev);
4094
4095	/* try pci config reset */
4096	if (reset_mask && radeon_hard_reset)
4097		si_gpu_pci_config_reset(rdev);
4098
4099	reset_mask = si_gpu_check_soft_reset(rdev);
4100
4101	if (!reset_mask)
4102		r600_set_bios_scratch_engine_hung(rdev, false);
4103
4104	return 0;
4105}
4106
4107/**
4108 * si_gfx_is_lockup - Check if the GFX engine is locked up
4109 *
4110 * @rdev: radeon_device pointer
4111 * @ring: radeon_ring structure holding ring information
4112 *
4113 * Check if the GFX engine is locked up.
4114 * Returns true if the engine appears to be locked up, false if not.
4115 */
4116bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4117{
4118	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4119
4120	if (!(reset_mask & (RADEON_RESET_GFX |
4121			    RADEON_RESET_COMPUTE |
4122			    RADEON_RESET_CP))) {
4123		radeon_ring_lockup_update(rdev, ring);
4124		return false;
4125	}
4126	return radeon_ring_test_lockup(rdev, ring);
4127}
4128
4129/* MC */
4130static void si_mc_program(struct radeon_device *rdev)
4131{
4132	struct evergreen_mc_save save;
4133	u32 tmp;
4134	int i, j;
4135
4136	/* Initialize HDP */
4137	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4138		WREG32((0x2c14 + j), 0x00000000);
4139		WREG32((0x2c18 + j), 0x00000000);
4140		WREG32((0x2c1c + j), 0x00000000);
4141		WREG32((0x2c20 + j), 0x00000000);
4142		WREG32((0x2c24 + j), 0x00000000);
4143	}
4144	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4145
4146	evergreen_mc_stop(rdev, &save);
4147	if (radeon_mc_wait_for_idle(rdev)) {
4148		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4149	}
4150	if (!ASIC_IS_NODCE(rdev))
4151		/* Lockout access through VGA aperture*/
4152		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4153	/* Update configuration */
4154	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4155	       rdev->mc.vram_start >> 12);
4156	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4157	       rdev->mc.vram_end >> 12);
4158	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4159	       rdev->vram_scratch.gpu_addr >> 12);
4160	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4161	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4162	WREG32(MC_VM_FB_LOCATION, tmp);
4163	/* XXX double check these! */
4164	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4165	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4166	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4167	WREG32(MC_VM_AGP_BASE, 0);
4168	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4169	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4170	if (radeon_mc_wait_for_idle(rdev)) {
4171		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4172	}
4173	evergreen_mc_resume(rdev, &save);
4174	if (!ASIC_IS_NODCE(rdev)) {
4175		/* we need to own VRAM, so turn off the VGA renderer here
4176		 * to stop it overwriting our objects */
4177		rv515_vga_render_disable(rdev);
4178	}
4179}
4180
4181void si_vram_gtt_location(struct radeon_device *rdev,
4182			  struct radeon_mc *mc)
4183{
4184	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4185		/* leave room for at least 1024M GTT */
4186		dev_warn(rdev->dev, "limiting VRAM\n");
4187		mc->real_vram_size = 0xFFC0000000ULL;
4188		mc->mc_vram_size = 0xFFC0000000ULL;
4189	}
4190	radeon_vram_location(rdev, &rdev->mc, 0);
4191	rdev->mc.gtt_base_align = 0;
4192	radeon_gtt_location(rdev, mc);
4193}
4194
4195static int si_mc_init(struct radeon_device *rdev)
4196{
4197	u32 tmp;
4198	int chansize, numchan;
4199
4200	/* Get VRAM informations */
4201	rdev->mc.vram_is_ddr = true;
4202	tmp = RREG32(MC_ARB_RAMCFG);
4203	if (tmp & CHANSIZE_OVERRIDE) {
4204		chansize = 16;
4205	} else if (tmp & CHANSIZE_MASK) {
4206		chansize = 64;
4207	} else {
4208		chansize = 32;
4209	}
4210	tmp = RREG32(MC_SHARED_CHMAP);
4211	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4212	case 0:
4213	default:
4214		numchan = 1;
4215		break;
4216	case 1:
4217		numchan = 2;
4218		break;
4219	case 2:
4220		numchan = 4;
4221		break;
4222	case 3:
4223		numchan = 8;
4224		break;
4225	case 4:
4226		numchan = 3;
4227		break;
4228	case 5:
4229		numchan = 6;
4230		break;
4231	case 6:
4232		numchan = 10;
4233		break;
4234	case 7:
4235		numchan = 12;
4236		break;
4237	case 8:
4238		numchan = 16;
4239		break;
4240	}
4241	rdev->mc.vram_width = numchan * chansize;
4242	/* Could aper size report 0 ? */
4243	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4244	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4245	/* size in MB on si */
4246	tmp = RREG32(CONFIG_MEMSIZE);
4247	/* some boards may have garbage in the upper 16 bits */
4248	if (tmp & 0xffff0000) {
4249		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4250		if (tmp & 0xffff)
4251			tmp &= 0xffff;
4252	}
4253	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4254	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4255	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4256	si_vram_gtt_location(rdev, &rdev->mc);
4257	radeon_update_bandwidth_info(rdev);
4258
4259	return 0;
4260}
4261
4262/*
4263 * GART
4264 */
4265void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4266{
4267	/* flush hdp cache */
4268	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4269
4270	/* bits 0-15 are the VM contexts0-15 */
4271	WREG32(VM_INVALIDATE_REQUEST, 1);
4272}
4273
4274static int si_pcie_gart_enable(struct radeon_device *rdev)
4275{
4276	int r, i;
4277
4278	if (rdev->gart.robj == NULL) {
4279		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4280		return -EINVAL;
4281	}
4282	r = radeon_gart_table_vram_pin(rdev);
4283	if (r)
4284		return r;
4285	/* Setup TLB control */
4286	WREG32(MC_VM_MX_L1_TLB_CNTL,
4287	       (0xA << 7) |
4288	       ENABLE_L1_TLB |
4289	       ENABLE_L1_FRAGMENT_PROCESSING |
4290	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4291	       ENABLE_ADVANCED_DRIVER_MODEL |
4292	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4293	/* Setup L2 cache */
4294	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4295	       ENABLE_L2_FRAGMENT_PROCESSING |
4296	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4297	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4298	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4299	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4300	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4301	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4302	       BANK_SELECT(4) |
4303	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4304	/* setup context0 */
4305	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4306	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4307	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4308	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4309			(u32)(rdev->dummy_page.addr >> 12));
4310	WREG32(VM_CONTEXT0_CNTL2, 0);
4311	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4312				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4313
4314	WREG32(0x15D4, 0);
4315	WREG32(0x15D8, 0);
4316	WREG32(0x15DC, 0);
4317
4318	/* empty context1-15 */
4319	/* set vm size, must be a multiple of 4 */
4320	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4321	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4322	/* Assign the pt base to something valid for now; the pts used for
4323	 * the VMs are determined by the application and setup and assigned
4324	 * on the fly in the vm part of radeon_gart.c
4325	 */
4326	for (i = 1; i < 16; i++) {
4327		if (i < 8)
4328			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4329			       rdev->vm_manager.saved_table_addr[i]);
4330		else
4331			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4332			       rdev->vm_manager.saved_table_addr[i]);
4333	}
4334
4335	/* enable context1-15 */
4336	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4337	       (u32)(rdev->dummy_page.addr >> 12));
4338	WREG32(VM_CONTEXT1_CNTL2, 4);
4339	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4340				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4341				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4342				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4343				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4344				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4345				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4346				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4347				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4348				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4349				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4350				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4351				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4352				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4353
4354	si_pcie_gart_tlb_flush(rdev);
4355	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4356		 (unsigned)(rdev->mc.gtt_size >> 20),
4357		 (unsigned long long)rdev->gart.table_addr);
4358	rdev->gart.ready = true;
4359	return 0;
4360}
4361
4362static void si_pcie_gart_disable(struct radeon_device *rdev)
4363{
4364	unsigned i;
4365
4366	for (i = 1; i < 16; ++i) {
4367		uint32_t reg;
4368		if (i < 8)
4369			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4370		else
4371			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4372		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4373	}
4374
4375	/* Disable all tables */
4376	WREG32(VM_CONTEXT0_CNTL, 0);
4377	WREG32(VM_CONTEXT1_CNTL, 0);
4378	/* Setup TLB control */
4379	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4380	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4381	/* Setup L2 cache */
4382	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4383	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4384	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4385	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4386	WREG32(VM_L2_CNTL2, 0);
4387	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4388	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4389	radeon_gart_table_vram_unpin(rdev);
4390}
4391
4392static void si_pcie_gart_fini(struct radeon_device *rdev)
4393{
4394	si_pcie_gart_disable(rdev);
4395	radeon_gart_table_vram_free(rdev);
4396	radeon_gart_fini(rdev);
4397}
4398
4399/* vm parser */
4400static bool si_vm_reg_valid(u32 reg)
4401{
4402	/* context regs are fine */
4403	if (reg >= 0x28000)
4404		return true;
4405
4406	/* check config regs */
4407	switch (reg) {
4408	case GRBM_GFX_INDEX:
4409	case CP_STRMOUT_CNTL:
4410	case VGT_VTX_VECT_EJECT_REG:
4411	case VGT_CACHE_INVALIDATION:
4412	case VGT_ESGS_RING_SIZE:
4413	case VGT_GSVS_RING_SIZE:
4414	case VGT_GS_VERTEX_REUSE:
4415	case VGT_PRIMITIVE_TYPE:
4416	case VGT_INDEX_TYPE:
4417	case VGT_NUM_INDICES:
4418	case VGT_NUM_INSTANCES:
4419	case VGT_TF_RING_SIZE:
4420	case VGT_HS_OFFCHIP_PARAM:
4421	case VGT_TF_MEMORY_BASE:
4422	case PA_CL_ENHANCE:
4423	case PA_SU_LINE_STIPPLE_VALUE:
4424	case PA_SC_LINE_STIPPLE_STATE:
4425	case PA_SC_ENHANCE:
4426	case SQC_CACHES:
4427	case SPI_STATIC_THREAD_MGMT_1:
4428	case SPI_STATIC_THREAD_MGMT_2:
4429	case SPI_STATIC_THREAD_MGMT_3:
4430	case SPI_PS_MAX_WAVE_ID:
4431	case SPI_CONFIG_CNTL:
4432	case SPI_CONFIG_CNTL_1:
4433	case TA_CNTL_AUX:
4434		return true;
4435	default:
4436		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4437		return false;
4438	}
4439}
4440
4441static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4442				  u32 *ib, struct radeon_cs_packet *pkt)
4443{
4444	switch (pkt->opcode) {
4445	case PACKET3_NOP:
4446	case PACKET3_SET_BASE:
4447	case PACKET3_SET_CE_DE_COUNTERS:
4448	case PACKET3_LOAD_CONST_RAM:
4449	case PACKET3_WRITE_CONST_RAM:
4450	case PACKET3_WRITE_CONST_RAM_OFFSET:
4451	case PACKET3_DUMP_CONST_RAM:
4452	case PACKET3_INCREMENT_CE_COUNTER:
4453	case PACKET3_WAIT_ON_DE_COUNTER:
4454	case PACKET3_CE_WRITE:
4455		break;
4456	default:
4457		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4458		return -EINVAL;
4459	}
4460	return 0;
4461}
4462
4463static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4464{
4465	u32 start_reg, reg, i;
4466	u32 command = ib[idx + 4];
4467	u32 info = ib[idx + 1];
4468	u32 idx_value = ib[idx];
4469	if (command & PACKET3_CP_DMA_CMD_SAS) {
4470		/* src address space is register */
4471		if (((info & 0x60000000) >> 29) == 0) {
4472			start_reg = idx_value << 2;
4473			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4474				reg = start_reg;
4475				if (!si_vm_reg_valid(reg)) {
4476					DRM_ERROR("CP DMA Bad SRC register\n");
4477					return -EINVAL;
4478				}
4479			} else {
4480				for (i = 0; i < (command & 0x1fffff); i++) {
4481					reg = start_reg + (4 * i);
4482					if (!si_vm_reg_valid(reg)) {
4483						DRM_ERROR("CP DMA Bad SRC register\n");
4484						return -EINVAL;
4485					}
4486				}
4487			}
4488		}
4489	}
4490	if (command & PACKET3_CP_DMA_CMD_DAS) {
4491		/* dst address space is register */
4492		if (((info & 0x00300000) >> 20) == 0) {
4493			start_reg = ib[idx + 2];
4494			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4495				reg = start_reg;
4496				if (!si_vm_reg_valid(reg)) {
4497					DRM_ERROR("CP DMA Bad DST register\n");
4498					return -EINVAL;
4499				}
4500			} else {
4501				for (i = 0; i < (command & 0x1fffff); i++) {
4502					reg = start_reg + (4 * i);
4503				if (!si_vm_reg_valid(reg)) {
4504						DRM_ERROR("CP DMA Bad DST register\n");
4505						return -EINVAL;
4506					}
4507				}
4508			}
4509		}
4510	}
4511	return 0;
4512}
4513
4514static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4515				   u32 *ib, struct radeon_cs_packet *pkt)
4516{
4517	int r;
4518	u32 idx = pkt->idx + 1;
4519	u32 idx_value = ib[idx];
4520	u32 start_reg, end_reg, reg, i;
4521
4522	switch (pkt->opcode) {
4523	case PACKET3_NOP:
4524	case PACKET3_SET_BASE:
4525	case PACKET3_CLEAR_STATE:
4526	case PACKET3_INDEX_BUFFER_SIZE:
4527	case PACKET3_DISPATCH_DIRECT:
4528	case PACKET3_DISPATCH_INDIRECT:
4529	case PACKET3_ALLOC_GDS:
4530	case PACKET3_WRITE_GDS_RAM:
4531	case PACKET3_ATOMIC_GDS:
4532	case PACKET3_ATOMIC:
4533	case PACKET3_OCCLUSION_QUERY:
4534	case PACKET3_SET_PREDICATION:
4535	case PACKET3_COND_EXEC:
4536	case PACKET3_PRED_EXEC:
4537	case PACKET3_DRAW_INDIRECT:
4538	case PACKET3_DRAW_INDEX_INDIRECT:
4539	case PACKET3_INDEX_BASE:
4540	case PACKET3_DRAW_INDEX_2:
4541	case PACKET3_CONTEXT_CONTROL:
4542	case PACKET3_INDEX_TYPE:
4543	case PACKET3_DRAW_INDIRECT_MULTI:
4544	case PACKET3_DRAW_INDEX_AUTO:
4545	case PACKET3_DRAW_INDEX_IMMD:
4546	case PACKET3_NUM_INSTANCES:
4547	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4548	case PACKET3_STRMOUT_BUFFER_UPDATE:
4549	case PACKET3_DRAW_INDEX_OFFSET_2:
4550	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4551	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4552	case PACKET3_MPEG_INDEX:
4553	case PACKET3_WAIT_REG_MEM:
4554	case PACKET3_MEM_WRITE:
4555	case PACKET3_PFP_SYNC_ME:
4556	case PACKET3_SURFACE_SYNC:
4557	case PACKET3_EVENT_WRITE:
4558	case PACKET3_EVENT_WRITE_EOP:
4559	case PACKET3_EVENT_WRITE_EOS:
4560	case PACKET3_SET_CONTEXT_REG:
4561	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4562	case PACKET3_SET_SH_REG:
4563	case PACKET3_SET_SH_REG_OFFSET:
4564	case PACKET3_INCREMENT_DE_COUNTER:
4565	case PACKET3_WAIT_ON_CE_COUNTER:
4566	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4567	case PACKET3_ME_WRITE:
4568		break;
4569	case PACKET3_COPY_DATA:
4570		if ((idx_value & 0xf00) == 0) {
4571			reg = ib[idx + 3] * 4;
4572			if (!si_vm_reg_valid(reg))
4573				return -EINVAL;
4574		}
4575		break;
4576	case PACKET3_WRITE_DATA:
4577		if ((idx_value & 0xf00) == 0) {
4578			start_reg = ib[idx + 1] * 4;
4579			if (idx_value & 0x10000) {
4580				if (!si_vm_reg_valid(start_reg))
4581					return -EINVAL;
4582			} else {
4583				for (i = 0; i < (pkt->count - 2); i++) {
4584					reg = start_reg + (4 * i);
4585					if (!si_vm_reg_valid(reg))
4586						return -EINVAL;
4587				}
4588			}
4589		}
4590		break;
4591	case PACKET3_COND_WRITE:
4592		if (idx_value & 0x100) {
4593			reg = ib[idx + 5] * 4;
4594			if (!si_vm_reg_valid(reg))
4595				return -EINVAL;
4596		}
4597		break;
4598	case PACKET3_COPY_DW:
4599		if (idx_value & 0x2) {
4600			reg = ib[idx + 3] * 4;
4601			if (!si_vm_reg_valid(reg))
4602				return -EINVAL;
4603		}
4604		break;
4605	case PACKET3_SET_CONFIG_REG:
4606		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4607		end_reg = 4 * pkt->count + start_reg - 4;
4608		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4609		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4610		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4611			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4612			return -EINVAL;
4613		}
4614		for (i = 0; i < pkt->count; i++) {
4615			reg = start_reg + (4 * i);
4616			if (!si_vm_reg_valid(reg))
4617				return -EINVAL;
4618		}
4619		break;
4620	case PACKET3_CP_DMA:
4621		r = si_vm_packet3_cp_dma_check(ib, idx);
4622		if (r)
4623			return r;
4624		break;
4625	default:
4626		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4627		return -EINVAL;
4628	}
4629	return 0;
4630}
4631
4632static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4633				       u32 *ib, struct radeon_cs_packet *pkt)
4634{
4635	int r;
4636	u32 idx = pkt->idx + 1;
4637	u32 idx_value = ib[idx];
4638	u32 start_reg, reg, i;
4639
4640	switch (pkt->opcode) {
4641	case PACKET3_NOP:
4642	case PACKET3_SET_BASE:
4643	case PACKET3_CLEAR_STATE:
4644	case PACKET3_DISPATCH_DIRECT:
4645	case PACKET3_DISPATCH_INDIRECT:
4646	case PACKET3_ALLOC_GDS:
4647	case PACKET3_WRITE_GDS_RAM:
4648	case PACKET3_ATOMIC_GDS:
4649	case PACKET3_ATOMIC:
4650	case PACKET3_OCCLUSION_QUERY:
4651	case PACKET3_SET_PREDICATION:
4652	case PACKET3_COND_EXEC:
4653	case PACKET3_PRED_EXEC:
4654	case PACKET3_CONTEXT_CONTROL:
4655	case PACKET3_STRMOUT_BUFFER_UPDATE:
4656	case PACKET3_WAIT_REG_MEM:
4657	case PACKET3_MEM_WRITE:
4658	case PACKET3_PFP_SYNC_ME:
4659	case PACKET3_SURFACE_SYNC:
4660	case PACKET3_EVENT_WRITE:
4661	case PACKET3_EVENT_WRITE_EOP:
4662	case PACKET3_EVENT_WRITE_EOS:
4663	case PACKET3_SET_CONTEXT_REG:
4664	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4665	case PACKET3_SET_SH_REG:
4666	case PACKET3_SET_SH_REG_OFFSET:
4667	case PACKET3_INCREMENT_DE_COUNTER:
4668	case PACKET3_WAIT_ON_CE_COUNTER:
4669	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4670	case PACKET3_ME_WRITE:
4671		break;
4672	case PACKET3_COPY_DATA:
4673		if ((idx_value & 0xf00) == 0) {
4674			reg = ib[idx + 3] * 4;
4675			if (!si_vm_reg_valid(reg))
4676				return -EINVAL;
4677		}
4678		break;
4679	case PACKET3_WRITE_DATA:
4680		if ((idx_value & 0xf00) == 0) {
4681			start_reg = ib[idx + 1] * 4;
4682			if (idx_value & 0x10000) {
4683				if (!si_vm_reg_valid(start_reg))
4684					return -EINVAL;
4685			} else {
4686				for (i = 0; i < (pkt->count - 2); i++) {
4687					reg = start_reg + (4 * i);
4688					if (!si_vm_reg_valid(reg))
4689						return -EINVAL;
4690				}
4691			}
4692		}
4693		break;
4694	case PACKET3_COND_WRITE:
4695		if (idx_value & 0x100) {
4696			reg = ib[idx + 5] * 4;
4697			if (!si_vm_reg_valid(reg))
4698				return -EINVAL;
4699		}
4700		break;
4701	case PACKET3_COPY_DW:
4702		if (idx_value & 0x2) {
4703			reg = ib[idx + 3] * 4;
4704			if (!si_vm_reg_valid(reg))
4705				return -EINVAL;
4706		}
4707		break;
4708	case PACKET3_CP_DMA:
4709		r = si_vm_packet3_cp_dma_check(ib, idx);
4710		if (r)
4711			return r;
4712		break;
4713	default:
4714		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4715		return -EINVAL;
4716	}
4717	return 0;
4718}
4719
4720int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4721{
4722	int ret = 0;
4723	u32 idx = 0, i;
4724	struct radeon_cs_packet pkt;
4725
4726	do {
4727		pkt.idx = idx;
4728		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4729		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4730		pkt.one_reg_wr = 0;
4731		switch (pkt.type) {
4732		case RADEON_PACKET_TYPE0:
4733			dev_err(rdev->dev, "Packet0 not allowed!\n");
4734			ret = -EINVAL;
4735			break;
4736		case RADEON_PACKET_TYPE2:
4737			idx += 1;
4738			break;
4739		case RADEON_PACKET_TYPE3:
4740			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4741			if (ib->is_const_ib)
4742				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4743			else {
4744				switch (ib->ring) {
4745				case RADEON_RING_TYPE_GFX_INDEX:
4746					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4747					break;
4748				case CAYMAN_RING_TYPE_CP1_INDEX:
4749				case CAYMAN_RING_TYPE_CP2_INDEX:
4750					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4751					break;
4752				default:
4753					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4754					ret = -EINVAL;
4755					break;
4756				}
4757			}
4758			idx += pkt.count + 2;
4759			break;
4760		default:
4761			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4762			ret = -EINVAL;
4763			break;
4764		}
4765		if (ret) {
4766			for (i = 0; i < ib->length_dw; i++) {
4767				if (i == idx)
4768					printk("\t0x%08x <---\n", ib->ptr[i]);
4769				else
4770					printk("\t0x%08x\n", ib->ptr[i]);
4771			}
4772			break;
4773		}
4774	} while (idx < ib->length_dw);
4775
4776	return ret;
4777}
4778
4779/*
4780 * vm
4781 */
4782int si_vm_init(struct radeon_device *rdev)
4783{
4784	/* number of VMs */
4785	rdev->vm_manager.nvm = 16;
4786	/* base offset of vram pages */
4787	rdev->vm_manager.vram_base_offset = 0;
4788
4789	return 0;
4790}
4791
4792void si_vm_fini(struct radeon_device *rdev)
4793{
4794}
4795
4796/**
4797 * si_vm_decode_fault - print human readable fault info
4798 *
4799 * @rdev: radeon_device pointer
4800 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4801 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4802 *
4803 * Print human readable fault information (SI).
4804 */
4805static void si_vm_decode_fault(struct radeon_device *rdev,
4806			       u32 status, u32 addr)
4807{
4808	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4809	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4810	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4811	char *block;
4812
4813	if (rdev->family == CHIP_TAHITI) {
4814		switch (mc_id) {
4815		case 160:
4816		case 144:
4817		case 96:
4818		case 80:
4819		case 224:
4820		case 208:
4821		case 32:
4822		case 16:
4823			block = "CB";
4824			break;
4825		case 161:
4826		case 145:
4827		case 97:
4828		case 81:
4829		case 225:
4830		case 209:
4831		case 33:
4832		case 17:
4833			block = "CB_FMASK";
4834			break;
4835		case 162:
4836		case 146:
4837		case 98:
4838		case 82:
4839		case 226:
4840		case 210:
4841		case 34:
4842		case 18:
4843			block = "CB_CMASK";
4844			break;
4845		case 163:
4846		case 147:
4847		case 99:
4848		case 83:
4849		case 227:
4850		case 211:
4851		case 35:
4852		case 19:
4853			block = "CB_IMMED";
4854			break;
4855		case 164:
4856		case 148:
4857		case 100:
4858		case 84:
4859		case 228:
4860		case 212:
4861		case 36:
4862		case 20:
4863			block = "DB";
4864			break;
4865		case 165:
4866		case 149:
4867		case 101:
4868		case 85:
4869		case 229:
4870		case 213:
4871		case 37:
4872		case 21:
4873			block = "DB_HTILE";
4874			break;
4875		case 167:
4876		case 151:
4877		case 103:
4878		case 87:
4879		case 231:
4880		case 215:
4881		case 39:
4882		case 23:
4883			block = "DB_STEN";
4884			break;
4885		case 72:
4886		case 68:
4887		case 64:
4888		case 8:
4889		case 4:
4890		case 0:
4891		case 136:
4892		case 132:
4893		case 128:
4894		case 200:
4895		case 196:
4896		case 192:
4897			block = "TC";
4898			break;
4899		case 112:
4900		case 48:
4901			block = "CP";
4902			break;
4903		case 49:
4904		case 177:
4905		case 50:
4906		case 178:
4907			block = "SH";
4908			break;
4909		case 53:
4910		case 190:
4911			block = "VGT";
4912			break;
4913		case 117:
4914			block = "IH";
4915			break;
4916		case 51:
4917		case 115:
4918			block = "RLC";
4919			break;
4920		case 119:
4921		case 183:
4922			block = "DMA0";
4923			break;
4924		case 61:
4925			block = "DMA1";
4926			break;
4927		case 248:
4928		case 120:
4929			block = "HDP";
4930			break;
4931		default:
4932			block = "unknown";
4933			break;
4934		}
4935	} else {
4936		switch (mc_id) {
4937		case 32:
4938		case 16:
4939		case 96:
4940		case 80:
4941		case 160:
4942		case 144:
4943		case 224:
4944		case 208:
4945			block = "CB";
4946			break;
4947		case 33:
4948		case 17:
4949		case 97:
4950		case 81:
4951		case 161:
4952		case 145:
4953		case 225:
4954		case 209:
4955			block = "CB_FMASK";
4956			break;
4957		case 34:
4958		case 18:
4959		case 98:
4960		case 82:
4961		case 162:
4962		case 146:
4963		case 226:
4964		case 210:
4965			block = "CB_CMASK";
4966			break;
4967		case 35:
4968		case 19:
4969		case 99:
4970		case 83:
4971		case 163:
4972		case 147:
4973		case 227:
4974		case 211:
4975			block = "CB_IMMED";
4976			break;
4977		case 36:
4978		case 20:
4979		case 100:
4980		case 84:
4981		case 164:
4982		case 148:
4983		case 228:
4984		case 212:
4985			block = "DB";
4986			break;
4987		case 37:
4988		case 21:
4989		case 101:
4990		case 85:
4991		case 165:
4992		case 149:
4993		case 229:
4994		case 213:
4995			block = "DB_HTILE";
4996			break;
4997		case 39:
4998		case 23:
4999		case 103:
5000		case 87:
5001		case 167:
5002		case 151:
5003		case 231:
5004		case 215:
5005			block = "DB_STEN";
5006			break;
5007		case 72:
5008		case 68:
5009		case 8:
5010		case 4:
5011		case 136:
5012		case 132:
5013		case 200:
5014		case 196:
5015			block = "TC";
5016			break;
5017		case 112:
5018		case 48:
5019			block = "CP";
5020			break;
5021		case 49:
5022		case 177:
5023		case 50:
5024		case 178:
5025			block = "SH";
5026			break;
5027		case 53:
5028			block = "VGT";
5029			break;
5030		case 117:
5031			block = "IH";
5032			break;
5033		case 51:
5034		case 115:
5035			block = "RLC";
5036			break;
5037		case 119:
5038		case 183:
5039			block = "DMA0";
5040			break;
5041		case 61:
5042			block = "DMA1";
5043			break;
5044		case 248:
5045		case 120:
5046			block = "HDP";
5047			break;
5048		default:
5049			block = "unknown";
5050			break;
5051		}
5052	}
5053
5054	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5055	       protections, vmid, addr,
5056	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5057	       block, mc_id);
5058}
5059
5060void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5061		 unsigned vm_id, uint64_t pd_addr)
5062{
5063	/* write new base address */
5064	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5065	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5066				 WRITE_DATA_DST_SEL(0)));
5067
5068	if (vm_id < 8) {
5069		radeon_ring_write(ring,
5070				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5071	} else {
5072		radeon_ring_write(ring,
5073				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5074	}
5075	radeon_ring_write(ring, 0);
5076	radeon_ring_write(ring, pd_addr >> 12);
5077
5078	/* flush hdp cache */
5079	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5080	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5081				 WRITE_DATA_DST_SEL(0)));
5082	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5083	radeon_ring_write(ring, 0);
5084	radeon_ring_write(ring, 0x1);
5085
5086	/* bits 0-15 are the VM contexts0-15 */
5087	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5088	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5089				 WRITE_DATA_DST_SEL(0)));
5090	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5091	radeon_ring_write(ring, 0);
5092	radeon_ring_write(ring, 1 << vm_id);
5093
5094	/* wait for the invalidate to complete */
5095	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5096	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5097				 WAIT_REG_MEM_ENGINE(0))); /* me */
5098	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5099	radeon_ring_write(ring, 0);
5100	radeon_ring_write(ring, 0); /* ref */
5101	radeon_ring_write(ring, 0); /* mask */
5102	radeon_ring_write(ring, 0x20); /* poll interval */
5103
5104	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5105	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5106	radeon_ring_write(ring, 0x0);
5107}
5108
5109/*
5110 *  Power and clock gating
5111 */
5112static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5113{
5114	int i;
5115
5116	for (i = 0; i < rdev->usec_timeout; i++) {
5117		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5118			break;
5119		udelay(1);
5120	}
5121
5122	for (i = 0; i < rdev->usec_timeout; i++) {
5123		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5124			break;
5125		udelay(1);
5126	}
5127}
5128
5129static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5130					 bool enable)
5131{
5132	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5133	u32 mask;
5134	int i;
5135
5136	if (enable)
5137		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5138	else
5139		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5140	WREG32(CP_INT_CNTL_RING0, tmp);
5141
5142	if (!enable) {
5143		/* read a gfx register */
5144		tmp = RREG32(DB_DEPTH_INFO);
5145
5146		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5147		for (i = 0; i < rdev->usec_timeout; i++) {
5148			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5149				break;
5150			udelay(1);
5151		}
5152	}
5153}
5154
5155static void si_set_uvd_dcm(struct radeon_device *rdev,
5156			   bool sw_mode)
5157{
5158	u32 tmp, tmp2;
5159
5160	tmp = RREG32(UVD_CGC_CTRL);
5161	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5162	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5163
5164	if (sw_mode) {
5165		tmp &= ~0x7ffff800;
5166		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5167	} else {
5168		tmp |= 0x7ffff800;
5169		tmp2 = 0;
5170	}
5171
5172	WREG32(UVD_CGC_CTRL, tmp);
5173	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5174}
5175
5176void si_init_uvd_internal_cg(struct radeon_device *rdev)
5177{
5178	bool hw_mode = true;
5179
5180	if (hw_mode) {
5181		si_set_uvd_dcm(rdev, false);
5182	} else {
5183		u32 tmp = RREG32(UVD_CGC_CTRL);
5184		tmp &= ~DCM;
5185		WREG32(UVD_CGC_CTRL, tmp);
5186	}
5187}
5188
5189static u32 si_halt_rlc(struct radeon_device *rdev)
5190{
5191	u32 data, orig;
5192
5193	orig = data = RREG32(RLC_CNTL);
5194
5195	if (data & RLC_ENABLE) {
5196		data &= ~RLC_ENABLE;
5197		WREG32(RLC_CNTL, data);
5198
5199		si_wait_for_rlc_serdes(rdev);
5200	}
5201
5202	return orig;
5203}
5204
5205static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5206{
5207	u32 tmp;
5208
5209	tmp = RREG32(RLC_CNTL);
5210	if (tmp != rlc)
5211		WREG32(RLC_CNTL, rlc);
5212}
5213
5214static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5215{
5216	u32 data, orig;
5217
5218	orig = data = RREG32(DMA_PG);
5219	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5220		data |= PG_CNTL_ENABLE;
5221	else
5222		data &= ~PG_CNTL_ENABLE;
5223	if (orig != data)
5224		WREG32(DMA_PG, data);
5225}
5226
5227static void si_init_dma_pg(struct radeon_device *rdev)
5228{
5229	u32 tmp;
5230
5231	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5232	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5233
5234	for (tmp = 0; tmp < 5; tmp++)
5235		WREG32(DMA_PGFSM_WRITE, 0);
5236}
5237
5238static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5239			       bool enable)
5240{
5241	u32 tmp;
5242
5243	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5244		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5245		WREG32(RLC_TTOP_D, tmp);
5246
5247		tmp = RREG32(RLC_PG_CNTL);
5248		tmp |= GFX_PG_ENABLE;
5249		WREG32(RLC_PG_CNTL, tmp);
5250
5251		tmp = RREG32(RLC_AUTO_PG_CTRL);
5252		tmp |= AUTO_PG_EN;
5253		WREG32(RLC_AUTO_PG_CTRL, tmp);
5254	} else {
5255		tmp = RREG32(RLC_AUTO_PG_CTRL);
5256		tmp &= ~AUTO_PG_EN;
5257		WREG32(RLC_AUTO_PG_CTRL, tmp);
5258
5259		tmp = RREG32(DB_RENDER_CONTROL);
5260	}
5261}
5262
5263static void si_init_gfx_cgpg(struct radeon_device *rdev)
5264{
5265	u32 tmp;
5266
5267	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5268
5269	tmp = RREG32(RLC_PG_CNTL);
5270	tmp |= GFX_PG_SRC;
5271	WREG32(RLC_PG_CNTL, tmp);
5272
5273	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5274
5275	tmp = RREG32(RLC_AUTO_PG_CTRL);
5276
5277	tmp &= ~GRBM_REG_SGIT_MASK;
5278	tmp |= GRBM_REG_SGIT(0x700);
5279	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5280	WREG32(RLC_AUTO_PG_CTRL, tmp);
5281}
5282
5283static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5284{
5285	u32 mask = 0, tmp, tmp1;
5286	int i;
5287
5288	si_select_se_sh(rdev, se, sh);
5289	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5290	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5291	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5292
5293	tmp &= 0xffff0000;
5294
5295	tmp |= tmp1;
5296	tmp >>= 16;
5297
5298	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5299		mask <<= 1;
5300		mask |= 1;
5301	}
5302
5303	return (~tmp) & mask;
5304}
5305
5306static void si_init_ao_cu_mask(struct radeon_device *rdev)
5307{
5308	u32 i, j, k, active_cu_number = 0;
5309	u32 mask, counter, cu_bitmap;
5310	u32 tmp = 0;
5311
5312	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5313		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5314			mask = 1;
5315			cu_bitmap = 0;
5316			counter  = 0;
5317			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5318				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5319					if (counter < 2)
5320						cu_bitmap |= mask;
5321					counter++;
5322				}
5323				mask <<= 1;
5324			}
5325
5326			active_cu_number += counter;
5327			tmp |= (cu_bitmap << (i * 16 + j * 8));
5328		}
5329	}
5330
5331	WREG32(RLC_PG_AO_CU_MASK, tmp);
5332
5333	tmp = RREG32(RLC_MAX_PG_CU);
5334	tmp &= ~MAX_PU_CU_MASK;
5335	tmp |= MAX_PU_CU(active_cu_number);
5336	WREG32(RLC_MAX_PG_CU, tmp);
5337}
5338
5339static void si_enable_cgcg(struct radeon_device *rdev,
5340			   bool enable)
5341{
5342	u32 data, orig, tmp;
5343
5344	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5345
5346	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5347		si_enable_gui_idle_interrupt(rdev, true);
5348
5349		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5350
5351		tmp = si_halt_rlc(rdev);
5352
5353		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5354		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5355		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5356
5357		si_wait_for_rlc_serdes(rdev);
5358
5359		si_update_rlc(rdev, tmp);
5360
5361		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5362
5363		data |= CGCG_EN | CGLS_EN;
5364	} else {
5365		si_enable_gui_idle_interrupt(rdev, false);
5366
5367		RREG32(CB_CGTT_SCLK_CTRL);
5368		RREG32(CB_CGTT_SCLK_CTRL);
5369		RREG32(CB_CGTT_SCLK_CTRL);
5370		RREG32(CB_CGTT_SCLK_CTRL);
5371
5372		data &= ~(CGCG_EN | CGLS_EN);
5373	}
5374
5375	if (orig != data)
5376		WREG32(RLC_CGCG_CGLS_CTRL, data);
5377}
5378
5379static void si_enable_mgcg(struct radeon_device *rdev,
5380			   bool enable)
5381{
5382	u32 data, orig, tmp = 0;
5383
5384	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5385		orig = data = RREG32(CGTS_SM_CTRL_REG);
5386		data = 0x96940200;
5387		if (orig != data)
5388			WREG32(CGTS_SM_CTRL_REG, data);
5389
5390		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5391			orig = data = RREG32(CP_MEM_SLP_CNTL);
5392			data |= CP_MEM_LS_EN;
5393			if (orig != data)
5394				WREG32(CP_MEM_SLP_CNTL, data);
5395		}
5396
5397		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5398		data &= 0xffffffc0;
5399		if (orig != data)
5400			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5401
5402		tmp = si_halt_rlc(rdev);
5403
5404		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5405		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5406		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5407
5408		si_update_rlc(rdev, tmp);
5409	} else {
5410		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5411		data |= 0x00000003;
5412		if (orig != data)
5413			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5414
5415		data = RREG32(CP_MEM_SLP_CNTL);
5416		if (data & CP_MEM_LS_EN) {
5417			data &= ~CP_MEM_LS_EN;
5418			WREG32(CP_MEM_SLP_CNTL, data);
5419		}
5420		orig = data = RREG32(CGTS_SM_CTRL_REG);
5421		data |= LS_OVERRIDE | OVERRIDE;
5422		if (orig != data)
5423			WREG32(CGTS_SM_CTRL_REG, data);
5424
5425		tmp = si_halt_rlc(rdev);
5426
5427		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5428		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5429		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5430
5431		si_update_rlc(rdev, tmp);
5432	}
5433}
5434
5435static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5436			       bool enable)
5437{
5438	u32 orig, data, tmp;
5439
5440	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5441		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5442		tmp |= 0x3fff;
5443		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5444
5445		orig = data = RREG32(UVD_CGC_CTRL);
5446		data |= DCM;
5447		if (orig != data)
5448			WREG32(UVD_CGC_CTRL, data);
5449
5450		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5451		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5452	} else {
5453		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5454		tmp &= ~0x3fff;
5455		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5456
5457		orig = data = RREG32(UVD_CGC_CTRL);
5458		data &= ~DCM;
5459		if (orig != data)
5460			WREG32(UVD_CGC_CTRL, data);
5461
5462		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5463		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5464	}
5465}
5466
5467static const u32 mc_cg_registers[] =
5468{
5469	MC_HUB_MISC_HUB_CG,
5470	MC_HUB_MISC_SIP_CG,
5471	MC_HUB_MISC_VM_CG,
5472	MC_XPB_CLK_GAT,
5473	ATC_MISC_CG,
5474	MC_CITF_MISC_WR_CG,
5475	MC_CITF_MISC_RD_CG,
5476	MC_CITF_MISC_VM_CG,
5477	VM_L2_CG,
5478};
5479
5480static void si_enable_mc_ls(struct radeon_device *rdev,
5481			    bool enable)
5482{
5483	int i;
5484	u32 orig, data;
5485
5486	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5487		orig = data = RREG32(mc_cg_registers[i]);
5488		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5489			data |= MC_LS_ENABLE;
5490		else
5491			data &= ~MC_LS_ENABLE;
5492		if (data != orig)
5493			WREG32(mc_cg_registers[i], data);
5494	}
5495}
5496
5497static void si_enable_mc_mgcg(struct radeon_device *rdev,
5498			       bool enable)
5499{
5500	int i;
5501	u32 orig, data;
5502
5503	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5504		orig = data = RREG32(mc_cg_registers[i]);
5505		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5506			data |= MC_CG_ENABLE;
5507		else
5508			data &= ~MC_CG_ENABLE;
5509		if (data != orig)
5510			WREG32(mc_cg_registers[i], data);
5511	}
5512}
5513
5514static void si_enable_dma_mgcg(struct radeon_device *rdev,
5515			       bool enable)
5516{
5517	u32 orig, data, offset;
5518	int i;
5519
5520	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5521		for (i = 0; i < 2; i++) {
5522			if (i == 0)
5523				offset = DMA0_REGISTER_OFFSET;
5524			else
5525				offset = DMA1_REGISTER_OFFSET;
5526			orig = data = RREG32(DMA_POWER_CNTL + offset);
5527			data &= ~MEM_POWER_OVERRIDE;
5528			if (data != orig)
5529				WREG32(DMA_POWER_CNTL + offset, data);
5530			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5531		}
5532	} else {
5533		for (i = 0; i < 2; i++) {
5534			if (i == 0)
5535				offset = DMA0_REGISTER_OFFSET;
5536			else
5537				offset = DMA1_REGISTER_OFFSET;
5538			orig = data = RREG32(DMA_POWER_CNTL + offset);
5539			data |= MEM_POWER_OVERRIDE;
5540			if (data != orig)
5541				WREG32(DMA_POWER_CNTL + offset, data);
5542
5543			orig = data = RREG32(DMA_CLK_CTRL + offset);
5544			data = 0xff000000;
5545			if (data != orig)
5546				WREG32(DMA_CLK_CTRL + offset, data);
5547		}
5548	}
5549}
5550
5551static void si_enable_bif_mgls(struct radeon_device *rdev,
5552			       bool enable)
5553{
5554	u32 orig, data;
5555
5556	orig = data = RREG32_PCIE(PCIE_CNTL2);
5557
5558	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5559		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5560			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5561	else
5562		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5563			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5564
5565	if (orig != data)
5566		WREG32_PCIE(PCIE_CNTL2, data);
5567}
5568
5569static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5570			       bool enable)
5571{
5572	u32 orig, data;
5573
5574	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5575
5576	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5577		data &= ~CLOCK_GATING_DIS;
5578	else
5579		data |= CLOCK_GATING_DIS;
5580
5581	if (orig != data)
5582		WREG32(HDP_HOST_PATH_CNTL, data);
5583}
5584
5585static void si_enable_hdp_ls(struct radeon_device *rdev,
5586			     bool enable)
5587{
5588	u32 orig, data;
5589
5590	orig = data = RREG32(HDP_MEM_POWER_LS);
5591
5592	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5593		data |= HDP_LS_ENABLE;
5594	else
5595		data &= ~HDP_LS_ENABLE;
5596
5597	if (orig != data)
5598		WREG32(HDP_MEM_POWER_LS, data);
5599}
5600
5601static void si_update_cg(struct radeon_device *rdev,
5602			 u32 block, bool enable)
5603{
5604	if (block & RADEON_CG_BLOCK_GFX) {
5605		si_enable_gui_idle_interrupt(rdev, false);
5606		/* order matters! */
5607		if (enable) {
5608			si_enable_mgcg(rdev, true);
5609			si_enable_cgcg(rdev, true);
5610		} else {
5611			si_enable_cgcg(rdev, false);
5612			si_enable_mgcg(rdev, false);
5613		}
5614		si_enable_gui_idle_interrupt(rdev, true);
5615	}
5616
5617	if (block & RADEON_CG_BLOCK_MC) {
5618		si_enable_mc_mgcg(rdev, enable);
5619		si_enable_mc_ls(rdev, enable);
5620	}
5621
5622	if (block & RADEON_CG_BLOCK_SDMA) {
5623		si_enable_dma_mgcg(rdev, enable);
5624	}
5625
5626	if (block & RADEON_CG_BLOCK_BIF) {
5627		si_enable_bif_mgls(rdev, enable);
5628	}
5629
5630	if (block & RADEON_CG_BLOCK_UVD) {
5631		if (rdev->has_uvd) {
5632			si_enable_uvd_mgcg(rdev, enable);
5633		}
5634	}
5635
5636	if (block & RADEON_CG_BLOCK_HDP) {
5637		si_enable_hdp_mgcg(rdev, enable);
5638		si_enable_hdp_ls(rdev, enable);
5639	}
5640}
5641
5642static void si_init_cg(struct radeon_device *rdev)
5643{
5644	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5645			    RADEON_CG_BLOCK_MC |
5646			    RADEON_CG_BLOCK_SDMA |
5647			    RADEON_CG_BLOCK_BIF |
5648			    RADEON_CG_BLOCK_HDP), true);
5649	if (rdev->has_uvd) {
5650		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5651		si_init_uvd_internal_cg(rdev);
5652	}
5653}
5654
5655static void si_fini_cg(struct radeon_device *rdev)
5656{
5657	if (rdev->has_uvd) {
5658		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5659	}
5660	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5661			    RADEON_CG_BLOCK_MC |
5662			    RADEON_CG_BLOCK_SDMA |
5663			    RADEON_CG_BLOCK_BIF |
5664			    RADEON_CG_BLOCK_HDP), false);
5665}
5666
5667u32 si_get_csb_size(struct radeon_device *rdev)
5668{
5669	u32 count = 0;
5670	const struct cs_section_def *sect = NULL;
5671	const struct cs_extent_def *ext = NULL;
5672
5673	if (rdev->rlc.cs_data == NULL)
5674		return 0;
5675
5676	/* begin clear state */
5677	count += 2;
5678	/* context control state */
5679	count += 3;
5680
5681	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5682		for (ext = sect->section; ext->extent != NULL; ++ext) {
5683			if (sect->id == SECT_CONTEXT)
5684				count += 2 + ext->reg_count;
5685			else
5686				return 0;
5687		}
5688	}
5689	/* pa_sc_raster_config */
5690	count += 3;
5691	/* end clear state */
5692	count += 2;
5693	/* clear state */
5694	count += 2;
5695
5696	return count;
5697}
5698
5699void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5700{
5701	u32 count = 0, i;
5702	const struct cs_section_def *sect = NULL;
5703	const struct cs_extent_def *ext = NULL;
5704
5705	if (rdev->rlc.cs_data == NULL)
5706		return;
5707	if (buffer == NULL)
5708		return;
5709
5710	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5711	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5712
5713	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5714	buffer[count++] = cpu_to_le32(0x80000000);
5715	buffer[count++] = cpu_to_le32(0x80000000);
5716
5717	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5718		for (ext = sect->section; ext->extent != NULL; ++ext) {
5719			if (sect->id == SECT_CONTEXT) {
5720				buffer[count++] =
5721					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5722				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5723				for (i = 0; i < ext->reg_count; i++)
5724					buffer[count++] = cpu_to_le32(ext->extent[i]);
5725			} else {
5726				return;
5727			}
5728		}
5729	}
5730
5731	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5732	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5733	switch (rdev->family) {
5734	case CHIP_TAHITI:
5735	case CHIP_PITCAIRN:
5736		buffer[count++] = cpu_to_le32(0x2a00126a);
5737		break;
5738	case CHIP_VERDE:
5739		buffer[count++] = cpu_to_le32(0x0000124a);
5740		break;
5741	case CHIP_OLAND:
5742		buffer[count++] = cpu_to_le32(0x00000082);
5743		break;
5744	case CHIP_HAINAN:
5745		buffer[count++] = cpu_to_le32(0x00000000);
5746		break;
5747	default:
5748		buffer[count++] = cpu_to_le32(0x00000000);
5749		break;
5750	}
5751
5752	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5753	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5754
5755	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5756	buffer[count++] = cpu_to_le32(0);
5757}
5758
5759static void si_init_pg(struct radeon_device *rdev)
5760{
5761	if (rdev->pg_flags) {
5762		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5763			si_init_dma_pg(rdev);
5764		}
5765		si_init_ao_cu_mask(rdev);
5766		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5767			si_init_gfx_cgpg(rdev);
5768		} else {
5769			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5770			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5771		}
5772		si_enable_dma_pg(rdev, true);
5773		si_enable_gfx_cgpg(rdev, true);
5774	} else {
5775		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5776		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5777	}
5778}
5779
5780static void si_fini_pg(struct radeon_device *rdev)
5781{
5782	if (rdev->pg_flags) {
5783		si_enable_dma_pg(rdev, false);
5784		si_enable_gfx_cgpg(rdev, false);
5785	}
5786}
5787
5788/*
5789 * RLC
5790 */
5791void si_rlc_reset(struct radeon_device *rdev)
5792{
5793	u32 tmp = RREG32(GRBM_SOFT_RESET);
5794
5795	tmp |= SOFT_RESET_RLC;
5796	WREG32(GRBM_SOFT_RESET, tmp);
5797	udelay(50);
5798	tmp &= ~SOFT_RESET_RLC;
5799	WREG32(GRBM_SOFT_RESET, tmp);
5800	udelay(50);
5801}
5802
5803static void si_rlc_stop(struct radeon_device *rdev)
5804{
5805	WREG32(RLC_CNTL, 0);
5806
5807	si_enable_gui_idle_interrupt(rdev, false);
5808
5809	si_wait_for_rlc_serdes(rdev);
5810}
5811
5812static void si_rlc_start(struct radeon_device *rdev)
5813{
5814	WREG32(RLC_CNTL, RLC_ENABLE);
5815
5816	si_enable_gui_idle_interrupt(rdev, true);
5817
5818	udelay(50);
5819}
5820
5821static bool si_lbpw_supported(struct radeon_device *rdev)
5822{
5823	u32 tmp;
5824
5825	/* Enable LBPW only for DDR3 */
5826	tmp = RREG32(MC_SEQ_MISC0);
5827	if ((tmp & 0xF0000000) == 0xB0000000)
5828		return true;
5829	return false;
5830}
5831
5832static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5833{
5834	u32 tmp;
5835
5836	tmp = RREG32(RLC_LB_CNTL);
5837	if (enable)
5838		tmp |= LOAD_BALANCE_ENABLE;
5839	else
5840		tmp &= ~LOAD_BALANCE_ENABLE;
5841	WREG32(RLC_LB_CNTL, tmp);
5842
5843	if (!enable) {
5844		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5845		WREG32(SPI_LB_CU_MASK, 0x00ff);
5846	}
5847}
5848
5849static int si_rlc_resume(struct radeon_device *rdev)
5850{
5851	u32 i;
5852
5853	if (!rdev->rlc_fw)
5854		return -EINVAL;
5855
5856	si_rlc_stop(rdev);
5857
5858	si_rlc_reset(rdev);
5859
5860	si_init_pg(rdev);
5861
5862	si_init_cg(rdev);
5863
5864	WREG32(RLC_RL_BASE, 0);
5865	WREG32(RLC_RL_SIZE, 0);
5866	WREG32(RLC_LB_CNTL, 0);
5867	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5868	WREG32(RLC_LB_CNTR_INIT, 0);
5869	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5870
5871	WREG32(RLC_MC_CNTL, 0);
5872	WREG32(RLC_UCODE_CNTL, 0);
5873
5874	if (rdev->new_fw) {
5875		const struct rlc_firmware_header_v1_0 *hdr =
5876			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5877		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5878		const __le32 *fw_data = (const __le32 *)
5879			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5880
5881		radeon_ucode_print_rlc_hdr(&hdr->header);
5882
5883		for (i = 0; i < fw_size; i++) {
5884			WREG32(RLC_UCODE_ADDR, i);
5885			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5886		}
5887	} else {
5888		const __be32 *fw_data =
5889			(const __be32 *)rdev->rlc_fw->data;
5890		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5891			WREG32(RLC_UCODE_ADDR, i);
5892			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5893		}
5894	}
5895	WREG32(RLC_UCODE_ADDR, 0);
5896
5897	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5898
5899	si_rlc_start(rdev);
5900
5901	return 0;
5902}
5903
5904static void si_enable_interrupts(struct radeon_device *rdev)
5905{
5906	u32 ih_cntl = RREG32(IH_CNTL);
5907	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5908
5909	ih_cntl |= ENABLE_INTR;
5910	ih_rb_cntl |= IH_RB_ENABLE;
5911	WREG32(IH_CNTL, ih_cntl);
5912	WREG32(IH_RB_CNTL, ih_rb_cntl);
5913	rdev->ih.enabled = true;
5914}
5915
5916static void si_disable_interrupts(struct radeon_device *rdev)
5917{
5918	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5919	u32 ih_cntl = RREG32(IH_CNTL);
5920
5921	ih_rb_cntl &= ~IH_RB_ENABLE;
5922	ih_cntl &= ~ENABLE_INTR;
5923	WREG32(IH_RB_CNTL, ih_rb_cntl);
5924	WREG32(IH_CNTL, ih_cntl);
5925	/* set rptr, wptr to 0 */
5926	WREG32(IH_RB_RPTR, 0);
5927	WREG32(IH_RB_WPTR, 0);
5928	rdev->ih.enabled = false;
5929	rdev->ih.rptr = 0;
5930}
5931
5932static void si_disable_interrupt_state(struct radeon_device *rdev)
5933{
5934	u32 tmp;
5935
5936	tmp = RREG32(CP_INT_CNTL_RING0) &
5937		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5938	WREG32(CP_INT_CNTL_RING0, tmp);
5939	WREG32(CP_INT_CNTL_RING1, 0);
5940	WREG32(CP_INT_CNTL_RING2, 0);
5941	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5942	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5943	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5944	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5945	WREG32(GRBM_INT_CNTL, 0);
5946	WREG32(SRBM_INT_CNTL, 0);
5947	if (rdev->num_crtc >= 2) {
5948		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5949		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5950	}
5951	if (rdev->num_crtc >= 4) {
5952		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5953		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5954	}
5955	if (rdev->num_crtc >= 6) {
5956		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5957		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5958	}
5959
5960	if (rdev->num_crtc >= 2) {
5961		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5962		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5963	}
5964	if (rdev->num_crtc >= 4) {
5965		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5966		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5967	}
5968	if (rdev->num_crtc >= 6) {
5969		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5970		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5971	}
5972
5973	if (!ASIC_IS_NODCE(rdev)) {
5974		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5975
5976		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5977		WREG32(DC_HPD1_INT_CONTROL, tmp);
5978		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5979		WREG32(DC_HPD2_INT_CONTROL, tmp);
5980		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5981		WREG32(DC_HPD3_INT_CONTROL, tmp);
5982		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5983		WREG32(DC_HPD4_INT_CONTROL, tmp);
5984		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5985		WREG32(DC_HPD5_INT_CONTROL, tmp);
5986		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5987		WREG32(DC_HPD6_INT_CONTROL, tmp);
5988	}
5989}
5990
5991static int si_irq_init(struct radeon_device *rdev)
5992{
5993	int ret = 0;
5994	int rb_bufsz;
5995	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5996
5997	/* allocate ring */
5998	ret = r600_ih_ring_alloc(rdev);
5999	if (ret)
6000		return ret;
6001
6002	/* disable irqs */
6003	si_disable_interrupts(rdev);
6004
6005	/* init rlc */
6006	ret = si_rlc_resume(rdev);
6007	if (ret) {
6008		r600_ih_ring_fini(rdev);
6009		return ret;
6010	}
6011
6012	/* setup interrupt control */
6013	/* set dummy read address to ring address */
6014	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6015	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6016	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6017	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6018	 */
6019	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6020	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6021	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6022	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6023
6024	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6025	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6026
6027	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6028		      IH_WPTR_OVERFLOW_CLEAR |
6029		      (rb_bufsz << 1));
6030
6031	if (rdev->wb.enabled)
6032		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6033
6034	/* set the writeback address whether it's enabled or not */
6035	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6036	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6037
6038	WREG32(IH_RB_CNTL, ih_rb_cntl);
6039
6040	/* set rptr, wptr to 0 */
6041	WREG32(IH_RB_RPTR, 0);
6042	WREG32(IH_RB_WPTR, 0);
6043
6044	/* Default settings for IH_CNTL (disabled at first) */
6045	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6046	/* RPTR_REARM only works if msi's are enabled */
6047	if (rdev->msi_enabled)
6048		ih_cntl |= RPTR_REARM;
6049	WREG32(IH_CNTL, ih_cntl);
6050
6051	/* force the active interrupt state to all disabled */
6052	si_disable_interrupt_state(rdev);
6053
6054	pci_set_master(rdev->pdev);
6055
6056	/* enable irqs */
6057	si_enable_interrupts(rdev);
6058
6059	return ret;
6060}
6061
6062int si_irq_set(struct radeon_device *rdev)
6063{
6064	u32 cp_int_cntl;
6065	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6066	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6067	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
6068	u32 grbm_int_cntl = 0;
6069	u32 dma_cntl, dma_cntl1;
6070	u32 thermal_int = 0;
6071
6072	if (!rdev->irq.installed) {
6073		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6074		return -EINVAL;
6075	}
6076	/* don't enable anything if the ih is disabled */
6077	if (!rdev->ih.enabled) {
6078		si_disable_interrupts(rdev);
6079		/* force the active interrupt state to all disabled */
6080		si_disable_interrupt_state(rdev);
6081		return 0;
6082	}
6083
6084	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6085		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6086
6087	if (!ASIC_IS_NODCE(rdev)) {
6088		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6089		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6090		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6091		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6092		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6093		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6094	}
6095
6096	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6097	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6098
6099	thermal_int = RREG32(CG_THERMAL_INT) &
6100		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6101
6102	/* enable CP interrupts on all rings */
6103	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6104		DRM_DEBUG("si_irq_set: sw int gfx\n");
6105		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6106	}
6107	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6108		DRM_DEBUG("si_irq_set: sw int cp1\n");
6109		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6110	}
6111	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6112		DRM_DEBUG("si_irq_set: sw int cp2\n");
6113		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6114	}
6115	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6116		DRM_DEBUG("si_irq_set: sw int dma\n");
6117		dma_cntl |= TRAP_ENABLE;
6118	}
6119
6120	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6121		DRM_DEBUG("si_irq_set: sw int dma1\n");
6122		dma_cntl1 |= TRAP_ENABLE;
6123	}
6124	if (rdev->irq.crtc_vblank_int[0] ||
6125	    atomic_read(&rdev->irq.pflip[0])) {
6126		DRM_DEBUG("si_irq_set: vblank 0\n");
6127		crtc1 |= VBLANK_INT_MASK;
6128	}
6129	if (rdev->irq.crtc_vblank_int[1] ||
6130	    atomic_read(&rdev->irq.pflip[1])) {
6131		DRM_DEBUG("si_irq_set: vblank 1\n");
6132		crtc2 |= VBLANK_INT_MASK;
6133	}
6134	if (rdev->irq.crtc_vblank_int[2] ||
6135	    atomic_read(&rdev->irq.pflip[2])) {
6136		DRM_DEBUG("si_irq_set: vblank 2\n");
6137		crtc3 |= VBLANK_INT_MASK;
6138	}
6139	if (rdev->irq.crtc_vblank_int[3] ||
6140	    atomic_read(&rdev->irq.pflip[3])) {
6141		DRM_DEBUG("si_irq_set: vblank 3\n");
6142		crtc4 |= VBLANK_INT_MASK;
6143	}
6144	if (rdev->irq.crtc_vblank_int[4] ||
6145	    atomic_read(&rdev->irq.pflip[4])) {
6146		DRM_DEBUG("si_irq_set: vblank 4\n");
6147		crtc5 |= VBLANK_INT_MASK;
6148	}
6149	if (rdev->irq.crtc_vblank_int[5] ||
6150	    atomic_read(&rdev->irq.pflip[5])) {
6151		DRM_DEBUG("si_irq_set: vblank 5\n");
6152		crtc6 |= VBLANK_INT_MASK;
6153	}
6154	if (rdev->irq.hpd[0]) {
6155		DRM_DEBUG("si_irq_set: hpd 1\n");
6156		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6157	}
6158	if (rdev->irq.hpd[1]) {
6159		DRM_DEBUG("si_irq_set: hpd 2\n");
6160		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6161	}
6162	if (rdev->irq.hpd[2]) {
6163		DRM_DEBUG("si_irq_set: hpd 3\n");
6164		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6165	}
6166	if (rdev->irq.hpd[3]) {
6167		DRM_DEBUG("si_irq_set: hpd 4\n");
6168		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6169	}
6170	if (rdev->irq.hpd[4]) {
6171		DRM_DEBUG("si_irq_set: hpd 5\n");
6172		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6173	}
6174	if (rdev->irq.hpd[5]) {
6175		DRM_DEBUG("si_irq_set: hpd 6\n");
6176		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6177	}
6178
6179	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6180	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6181	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6182
6183	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6184	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6185
6186	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6187
6188	if (rdev->irq.dpm_thermal) {
6189		DRM_DEBUG("dpm thermal\n");
6190		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6191	}
6192
6193	if (rdev->num_crtc >= 2) {
6194		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6195		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6196	}
6197	if (rdev->num_crtc >= 4) {
6198		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6199		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6200	}
6201	if (rdev->num_crtc >= 6) {
6202		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6203		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6204	}
6205
6206	if (rdev->num_crtc >= 2) {
6207		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
6208		       GRPH_PFLIP_INT_MASK);
6209		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
6210		       GRPH_PFLIP_INT_MASK);
6211	}
6212	if (rdev->num_crtc >= 4) {
6213		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
6214		       GRPH_PFLIP_INT_MASK);
6215		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
6216		       GRPH_PFLIP_INT_MASK);
6217	}
6218	if (rdev->num_crtc >= 6) {
6219		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
6220		       GRPH_PFLIP_INT_MASK);
6221		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
6222		       GRPH_PFLIP_INT_MASK);
6223	}
6224
6225	if (!ASIC_IS_NODCE(rdev)) {
6226		WREG32(DC_HPD1_INT_CONTROL, hpd1);
6227		WREG32(DC_HPD2_INT_CONTROL, hpd2);
6228		WREG32(DC_HPD3_INT_CONTROL, hpd3);
6229		WREG32(DC_HPD4_INT_CONTROL, hpd4);
6230		WREG32(DC_HPD5_INT_CONTROL, hpd5);
6231		WREG32(DC_HPD6_INT_CONTROL, hpd6);
6232	}
6233
6234	WREG32(CG_THERMAL_INT, thermal_int);
6235
6236	/* posting read */
6237	RREG32(SRBM_STATUS);
6238
6239	return 0;
6240}
6241
6242static inline void si_irq_ack(struct radeon_device *rdev)
6243{
6244	u32 tmp;
6245
6246	if (ASIC_IS_NODCE(rdev))
6247		return;
6248
6249	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6250	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6251	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6252	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6253	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6254	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6255	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6256	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6257	if (rdev->num_crtc >= 4) {
6258		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6259		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6260	}
6261	if (rdev->num_crtc >= 6) {
6262		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6263		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6264	}
6265
6266	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6267		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6268	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6269		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6270	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6271		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6272	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6273		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6274	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6275		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6276	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6277		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6278
6279	if (rdev->num_crtc >= 4) {
6280		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6281			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6282		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6283			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6284		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6285			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6286		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6287			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6288		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6289			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6290		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6291			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6292	}
6293
6294	if (rdev->num_crtc >= 6) {
6295		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6296			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6297		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6298			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6299		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6300			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6301		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6302			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6303		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6304			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6305		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6306			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6307	}
6308
6309	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6310		tmp = RREG32(DC_HPD1_INT_CONTROL);
6311		tmp |= DC_HPDx_INT_ACK;
6312		WREG32(DC_HPD1_INT_CONTROL, tmp);
6313	}
6314	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6315		tmp = RREG32(DC_HPD2_INT_CONTROL);
6316		tmp |= DC_HPDx_INT_ACK;
6317		WREG32(DC_HPD2_INT_CONTROL, tmp);
6318	}
6319	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6320		tmp = RREG32(DC_HPD3_INT_CONTROL);
6321		tmp |= DC_HPDx_INT_ACK;
6322		WREG32(DC_HPD3_INT_CONTROL, tmp);
6323	}
6324	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6325		tmp = RREG32(DC_HPD4_INT_CONTROL);
6326		tmp |= DC_HPDx_INT_ACK;
6327		WREG32(DC_HPD4_INT_CONTROL, tmp);
6328	}
6329	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6330		tmp = RREG32(DC_HPD5_INT_CONTROL);
6331		tmp |= DC_HPDx_INT_ACK;
6332		WREG32(DC_HPD5_INT_CONTROL, tmp);
6333	}
6334	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6335		tmp = RREG32(DC_HPD5_INT_CONTROL);
6336		tmp |= DC_HPDx_INT_ACK;
6337		WREG32(DC_HPD6_INT_CONTROL, tmp);
6338	}
6339
6340	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) {
6341		tmp = RREG32(DC_HPD1_INT_CONTROL);
6342		tmp |= DC_HPDx_RX_INT_ACK;
6343		WREG32(DC_HPD1_INT_CONTROL, tmp);
6344	}
6345	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
6346		tmp = RREG32(DC_HPD2_INT_CONTROL);
6347		tmp |= DC_HPDx_RX_INT_ACK;
6348		WREG32(DC_HPD2_INT_CONTROL, tmp);
6349	}
6350	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
6351		tmp = RREG32(DC_HPD3_INT_CONTROL);
6352		tmp |= DC_HPDx_RX_INT_ACK;
6353		WREG32(DC_HPD3_INT_CONTROL, tmp);
6354	}
6355	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
6356		tmp = RREG32(DC_HPD4_INT_CONTROL);
6357		tmp |= DC_HPDx_RX_INT_ACK;
6358		WREG32(DC_HPD4_INT_CONTROL, tmp);
6359	}
6360	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
6361		tmp = RREG32(DC_HPD5_INT_CONTROL);
6362		tmp |= DC_HPDx_RX_INT_ACK;
6363		WREG32(DC_HPD5_INT_CONTROL, tmp);
6364	}
6365	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
6366		tmp = RREG32(DC_HPD5_INT_CONTROL);
6367		tmp |= DC_HPDx_RX_INT_ACK;
6368		WREG32(DC_HPD6_INT_CONTROL, tmp);
6369	}
6370}
6371
6372static void si_irq_disable(struct radeon_device *rdev)
6373{
6374	si_disable_interrupts(rdev);
6375	/* Wait and acknowledge irq */
6376	mdelay(1);
6377	si_irq_ack(rdev);
6378	si_disable_interrupt_state(rdev);
6379}
6380
6381static void si_irq_suspend(struct radeon_device *rdev)
6382{
6383	si_irq_disable(rdev);
6384	si_rlc_stop(rdev);
6385}
6386
6387static void si_irq_fini(struct radeon_device *rdev)
6388{
6389	si_irq_suspend(rdev);
6390	r600_ih_ring_fini(rdev);
6391}
6392
6393static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6394{
6395	u32 wptr, tmp;
6396
6397	if (rdev->wb.enabled)
6398		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6399	else
6400		wptr = RREG32(IH_RB_WPTR);
6401
6402	if (wptr & RB_OVERFLOW) {
6403		wptr &= ~RB_OVERFLOW;
6404		/* When a ring buffer overflow happen start parsing interrupt
6405		 * from the last not overwritten vector (wptr + 16). Hopefully
6406		 * this should allow us to catchup.
6407		 */
6408		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6409			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6410		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6411		tmp = RREG32(IH_RB_CNTL);
6412		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6413		WREG32(IH_RB_CNTL, tmp);
6414	}
6415	return (wptr & rdev->ih.ptr_mask);
6416}
6417
6418/*        SI IV Ring
6419 * Each IV ring entry is 128 bits:
6420 * [7:0]    - interrupt source id
6421 * [31:8]   - reserved
6422 * [59:32]  - interrupt source data
6423 * [63:60]  - reserved
6424 * [71:64]  - RINGID
6425 * [79:72]  - VMID
6426 * [127:80] - reserved
6427 */
6428int si_irq_process(struct radeon_device *rdev)
6429{
6430	u32 wptr;
6431	u32 rptr;
6432	u32 src_id, src_data, ring_id;
6433	u32 ring_index;
6434	bool queue_hotplug = false;
6435	bool queue_dp = false;
6436	bool queue_thermal = false;
6437	u32 status, addr;
6438
6439	if (!rdev->ih.enabled || rdev->shutdown)
6440		return IRQ_NONE;
6441
6442	wptr = si_get_ih_wptr(rdev);
6443
6444restart_ih:
6445	/* is somebody else already processing irqs? */
6446	if (atomic_xchg(&rdev->ih.lock, 1))
6447		return IRQ_NONE;
6448
6449	rptr = rdev->ih.rptr;
6450	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6451
6452	/* Order reading of wptr vs. reading of IH ring data */
6453	rmb();
6454
6455	/* display interrupts */
6456	si_irq_ack(rdev);
6457
6458	while (rptr != wptr) {
6459		/* wptr/rptr are in bytes! */
6460		ring_index = rptr / 4;
6461		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6462		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6463		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6464
6465		switch (src_id) {
6466		case 1: /* D1 vblank/vline */
6467			switch (src_data) {
6468			case 0: /* D1 vblank */
6469				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT))
6470					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6471
6472				if (rdev->irq.crtc_vblank_int[0]) {
6473					drm_handle_vblank(rdev->ddev, 0);
6474					rdev->pm.vblank_sync = true;
6475					wake_up(&rdev->irq.vblank_queue);
6476				}
6477				if (atomic_read(&rdev->irq.pflip[0]))
6478					radeon_crtc_handle_vblank(rdev, 0);
6479				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6480				DRM_DEBUG("IH: D1 vblank\n");
6481
6482				break;
6483			case 1: /* D1 vline */
6484				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT))
6485					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6486
6487				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6488				DRM_DEBUG("IH: D1 vline\n");
6489
6490				break;
6491			default:
6492				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6493				break;
6494			}
6495			break;
6496		case 2: /* D2 vblank/vline */
6497			switch (src_data) {
6498			case 0: /* D2 vblank */
6499				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
6500					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6501
6502				if (rdev->irq.crtc_vblank_int[1]) {
6503					drm_handle_vblank(rdev->ddev, 1);
6504					rdev->pm.vblank_sync = true;
6505					wake_up(&rdev->irq.vblank_queue);
6506				}
6507				if (atomic_read(&rdev->irq.pflip[1]))
6508					radeon_crtc_handle_vblank(rdev, 1);
6509				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6510				DRM_DEBUG("IH: D2 vblank\n");
6511
6512				break;
6513			case 1: /* D2 vline */
6514				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT))
6515					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6516
6517				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6518				DRM_DEBUG("IH: D2 vline\n");
6519
6520				break;
6521			default:
6522				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6523				break;
6524			}
6525			break;
6526		case 3: /* D3 vblank/vline */
6527			switch (src_data) {
6528			case 0: /* D3 vblank */
6529				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
6530					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6531
6532				if (rdev->irq.crtc_vblank_int[2]) {
6533					drm_handle_vblank(rdev->ddev, 2);
6534					rdev->pm.vblank_sync = true;
6535					wake_up(&rdev->irq.vblank_queue);
6536				}
6537				if (atomic_read(&rdev->irq.pflip[2]))
6538					radeon_crtc_handle_vblank(rdev, 2);
6539				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6540				DRM_DEBUG("IH: D3 vblank\n");
6541
6542				break;
6543			case 1: /* D3 vline */
6544				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
6545					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6546
6547				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6548				DRM_DEBUG("IH: D3 vline\n");
6549
6550				break;
6551			default:
6552				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6553				break;
6554			}
6555			break;
6556		case 4: /* D4 vblank/vline */
6557			switch (src_data) {
6558			case 0: /* D4 vblank */
6559				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
6560					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6561
6562				if (rdev->irq.crtc_vblank_int[3]) {
6563					drm_handle_vblank(rdev->ddev, 3);
6564					rdev->pm.vblank_sync = true;
6565					wake_up(&rdev->irq.vblank_queue);
6566				}
6567				if (atomic_read(&rdev->irq.pflip[3]))
6568					radeon_crtc_handle_vblank(rdev, 3);
6569				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6570				DRM_DEBUG("IH: D4 vblank\n");
6571
6572				break;
6573			case 1: /* D4 vline */
6574				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
6575					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6576
6577				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6578				DRM_DEBUG("IH: D4 vline\n");
6579
6580				break;
6581			default:
6582				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6583				break;
6584			}
6585			break;
6586		case 5: /* D5 vblank/vline */
6587			switch (src_data) {
6588			case 0: /* D5 vblank */
6589				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
6590					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6591
6592				if (rdev->irq.crtc_vblank_int[4]) {
6593					drm_handle_vblank(rdev->ddev, 4);
6594					rdev->pm.vblank_sync = true;
6595					wake_up(&rdev->irq.vblank_queue);
6596				}
6597				if (atomic_read(&rdev->irq.pflip[4]))
6598					radeon_crtc_handle_vblank(rdev, 4);
6599				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6600				DRM_DEBUG("IH: D5 vblank\n");
6601
6602				break;
6603			case 1: /* D5 vline */
6604				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
6605					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6606
6607				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6608				DRM_DEBUG("IH: D5 vline\n");
6609
6610				break;
6611			default:
6612				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6613				break;
6614			}
6615			break;
6616		case 6: /* D6 vblank/vline */
6617			switch (src_data) {
6618			case 0: /* D6 vblank */
6619				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
6620					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6621
6622				if (rdev->irq.crtc_vblank_int[5]) {
6623					drm_handle_vblank(rdev->ddev, 5);
6624					rdev->pm.vblank_sync = true;
6625					wake_up(&rdev->irq.vblank_queue);
6626				}
6627				if (atomic_read(&rdev->irq.pflip[5]))
6628					radeon_crtc_handle_vblank(rdev, 5);
6629				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6630				DRM_DEBUG("IH: D6 vblank\n");
6631
6632				break;
6633			case 1: /* D6 vline */
6634				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
6635					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6636
6637				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6638				DRM_DEBUG("IH: D6 vline\n");
6639
6640				break;
6641			default:
6642				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6643				break;
6644			}
6645			break;
6646		case 8: /* D1 page flip */
6647		case 10: /* D2 page flip */
6648		case 12: /* D3 page flip */
6649		case 14: /* D4 page flip */
6650		case 16: /* D5 page flip */
6651		case 18: /* D6 page flip */
6652			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6653			if (radeon_use_pflipirq > 0)
6654				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6655			break;
6656		case 42: /* HPD hotplug */
6657			switch (src_data) {
6658			case 0:
6659				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT))
6660					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6661
6662				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6663				queue_hotplug = true;
6664				DRM_DEBUG("IH: HPD1\n");
6665
6666				break;
6667			case 1:
6668				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT))
6669					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6670
6671				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6672				queue_hotplug = true;
6673				DRM_DEBUG("IH: HPD2\n");
6674
6675				break;
6676			case 2:
6677				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT))
6678					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6679
6680				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6681				queue_hotplug = true;
6682				DRM_DEBUG("IH: HPD3\n");
6683
6684				break;
6685			case 3:
6686				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT))
6687					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6688
6689				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6690				queue_hotplug = true;
6691				DRM_DEBUG("IH: HPD4\n");
6692
6693				break;
6694			case 4:
6695				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT))
6696					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6697
6698				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6699				queue_hotplug = true;
6700				DRM_DEBUG("IH: HPD5\n");
6701
6702				break;
6703			case 5:
6704				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT))
6705					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6706
6707				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6708				queue_hotplug = true;
6709				DRM_DEBUG("IH: HPD6\n");
6710
6711				break;
6712			case 6:
6713				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT))
6714					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6715
6716				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT;
6717				queue_dp = true;
6718				DRM_DEBUG("IH: HPD_RX 1\n");
6719
6720				break;
6721			case 7:
6722				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT))
6723					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6724
6725				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
6726				queue_dp = true;
6727				DRM_DEBUG("IH: HPD_RX 2\n");
6728
6729				break;
6730			case 8:
6731				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
6732					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6733
6734				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
6735				queue_dp = true;
6736				DRM_DEBUG("IH: HPD_RX 3\n");
6737
6738				break;
6739			case 9:
6740				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
6741					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6742
6743				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
6744				queue_dp = true;
6745				DRM_DEBUG("IH: HPD_RX 4\n");
6746
6747				break;
6748			case 10:
6749				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
6750					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6751
6752				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
6753				queue_dp = true;
6754				DRM_DEBUG("IH: HPD_RX 5\n");
6755
6756				break;
6757			case 11:
6758				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
6759					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6760
6761				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
6762				queue_dp = true;
6763				DRM_DEBUG("IH: HPD_RX 6\n");
6764
6765				break;
6766			default:
6767				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6768				break;
6769			}
6770			break;
6771		case 96:
6772			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6773			WREG32(SRBM_INT_ACK, 0x1);
6774			break;
6775		case 124: /* UVD */
6776			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6777			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6778			break;
6779		case 146:
6780		case 147:
6781			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6782			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6783			/* reset addr and status */
6784			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6785			if (addr == 0x0 && status == 0x0)
6786				break;
6787			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6788			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6789				addr);
6790			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6791				status);
6792			si_vm_decode_fault(rdev, status, addr);
6793			break;
6794		case 176: /* RINGID0 CP_INT */
6795			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6796			break;
6797		case 177: /* RINGID1 CP_INT */
6798			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6799			break;
6800		case 178: /* RINGID2 CP_INT */
6801			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6802			break;
6803		case 181: /* CP EOP event */
6804			DRM_DEBUG("IH: CP EOP\n");
6805			switch (ring_id) {
6806			case 0:
6807				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6808				break;
6809			case 1:
6810				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6811				break;
6812			case 2:
6813				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6814				break;
6815			}
6816			break;
6817		case 224: /* DMA trap event */
6818			DRM_DEBUG("IH: DMA trap\n");
6819			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6820			break;
6821		case 230: /* thermal low to high */
6822			DRM_DEBUG("IH: thermal low to high\n");
6823			rdev->pm.dpm.thermal.high_to_low = false;
6824			queue_thermal = true;
6825			break;
6826		case 231: /* thermal high to low */
6827			DRM_DEBUG("IH: thermal high to low\n");
6828			rdev->pm.dpm.thermal.high_to_low = true;
6829			queue_thermal = true;
6830			break;
6831		case 233: /* GUI IDLE */
6832			DRM_DEBUG("IH: GUI idle\n");
6833			break;
6834		case 244: /* DMA trap event */
6835			DRM_DEBUG("IH: DMA1 trap\n");
6836			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6837			break;
6838		default:
6839			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6840			break;
6841		}
6842
6843		/* wptr/rptr are in bytes! */
6844		rptr += 16;
6845		rptr &= rdev->ih.ptr_mask;
6846		WREG32(IH_RB_RPTR, rptr);
6847	}
6848	if (queue_dp)
6849		schedule_work(&rdev->dp_work);
6850	if (queue_hotplug)
6851		schedule_work(&rdev->hotplug_work);
6852	if (queue_thermal && rdev->pm.dpm_enabled)
6853		schedule_work(&rdev->pm.dpm.thermal.work);
6854	rdev->ih.rptr = rptr;
6855	atomic_set(&rdev->ih.lock, 0);
6856
6857	/* make sure wptr hasn't changed while processing */
6858	wptr = si_get_ih_wptr(rdev);
6859	if (wptr != rptr)
6860		goto restart_ih;
6861
6862	return IRQ_HANDLED;
6863}
6864
6865/*
6866 * startup/shutdown callbacks
6867 */
6868static int si_startup(struct radeon_device *rdev)
6869{
6870	struct radeon_ring *ring;
6871	int r;
6872
6873	/* enable pcie gen2/3 link */
6874	si_pcie_gen3_enable(rdev);
6875	/* enable aspm */
6876	si_program_aspm(rdev);
6877
6878	/* scratch needs to be initialized before MC */
6879	r = r600_vram_scratch_init(rdev);
6880	if (r)
6881		return r;
6882
6883	si_mc_program(rdev);
6884
6885	if (!rdev->pm.dpm_enabled) {
6886		r = si_mc_load_microcode(rdev);
6887		if (r) {
6888			DRM_ERROR("Failed to load MC firmware!\n");
6889			return r;
6890		}
6891	}
6892
6893	r = si_pcie_gart_enable(rdev);
6894	if (r)
6895		return r;
6896	si_gpu_init(rdev);
6897
6898	/* allocate rlc buffers */
6899	if (rdev->family == CHIP_VERDE) {
6900		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6901		rdev->rlc.reg_list_size =
6902			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6903	}
6904	rdev->rlc.cs_data = si_cs_data;
6905	r = sumo_rlc_init(rdev);
6906	if (r) {
6907		DRM_ERROR("Failed to init rlc BOs!\n");
6908		return r;
6909	}
6910
6911	/* allocate wb buffer */
6912	r = radeon_wb_init(rdev);
6913	if (r)
6914		return r;
6915
6916	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6917	if (r) {
6918		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6919		return r;
6920	}
6921
6922	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6923	if (r) {
6924		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6925		return r;
6926	}
6927
6928	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6929	if (r) {
6930		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6931		return r;
6932	}
6933
6934	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6935	if (r) {
6936		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6937		return r;
6938	}
6939
6940	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6941	if (r) {
6942		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6943		return r;
6944	}
6945
6946	if (rdev->has_uvd) {
6947		r = uvd_v2_2_resume(rdev);
6948		if (!r) {
6949			r = radeon_fence_driver_start_ring(rdev,
6950							   R600_RING_TYPE_UVD_INDEX);
6951			if (r)
6952				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6953		}
6954		if (r)
6955			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6956	}
6957
6958	/* Enable IRQ */
6959	if (!rdev->irq.installed) {
6960		r = radeon_irq_kms_init(rdev);
6961		if (r)
6962			return r;
6963	}
6964
6965	r = si_irq_init(rdev);
6966	if (r) {
6967		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6968		radeon_irq_kms_fini(rdev);
6969		return r;
6970	}
6971	si_irq_set(rdev);
6972
6973	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6974	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6975			     RADEON_CP_PACKET2);
6976	if (r)
6977		return r;
6978
6979	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6980	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6981			     RADEON_CP_PACKET2);
6982	if (r)
6983		return r;
6984
6985	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6986	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6987			     RADEON_CP_PACKET2);
6988	if (r)
6989		return r;
6990
6991	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6992	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6993			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6994	if (r)
6995		return r;
6996
6997	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6998	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6999			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7000	if (r)
7001		return r;
7002
7003	r = si_cp_load_microcode(rdev);
7004	if (r)
7005		return r;
7006	r = si_cp_resume(rdev);
7007	if (r)
7008		return r;
7009
7010	r = cayman_dma_resume(rdev);
7011	if (r)
7012		return r;
7013
7014	if (rdev->has_uvd) {
7015		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7016		if (ring->ring_size) {
7017			r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7018					     RADEON_CP_PACKET2);
7019			if (!r)
7020				r = uvd_v1_0_init(rdev);
7021			if (r)
7022				DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7023		}
7024	}
7025
7026	r = radeon_ib_pool_init(rdev);
7027	if (r) {
7028		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7029		return r;
7030	}
7031
7032	r = radeon_vm_manager_init(rdev);
7033	if (r) {
7034		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7035		return r;
7036	}
7037
7038	r = radeon_audio_init(rdev);
7039	if (r)
7040		return r;
7041
7042	return 0;
7043}
7044
7045int si_resume(struct radeon_device *rdev)
7046{
7047	int r;
7048
7049	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
7050	 * posting will perform necessary task to bring back GPU into good
7051	 * shape.
7052	 */
7053	/* post card */
7054	atom_asic_init(rdev->mode_info.atom_context);
7055
7056	/* init golden registers */
7057	si_init_golden_registers(rdev);
7058
7059	if (rdev->pm.pm_method == PM_METHOD_DPM)
7060		radeon_pm_resume(rdev);
7061
7062	rdev->accel_working = true;
7063	r = si_startup(rdev);
7064	if (r) {
7065		DRM_ERROR("si startup failed on resume\n");
7066		rdev->accel_working = false;
7067		return r;
7068	}
7069
7070	return r;
7071
7072}
7073
7074int si_suspend(struct radeon_device *rdev)
7075{
7076	radeon_pm_suspend(rdev);
7077	radeon_audio_fini(rdev);
7078	radeon_vm_manager_fini(rdev);
7079	si_cp_enable(rdev, false);
7080	cayman_dma_stop(rdev);
7081	if (rdev->has_uvd) {
7082		uvd_v1_0_fini(rdev);
7083		radeon_uvd_suspend(rdev);
7084	}
7085	si_fini_pg(rdev);
7086	si_fini_cg(rdev);
7087	si_irq_suspend(rdev);
7088	radeon_wb_disable(rdev);
7089	si_pcie_gart_disable(rdev);
7090	return 0;
7091}
7092
7093/* Plan is to move initialization in that function and use
7094 * helper function so that radeon_device_init pretty much
7095 * do nothing more than calling asic specific function. This
7096 * should also allow to remove a bunch of callback function
7097 * like vram_info.
7098 */
7099int si_init(struct radeon_device *rdev)
7100{
7101	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7102	int r;
7103
7104	/* Read BIOS */
7105	if (!radeon_get_bios(rdev)) {
7106		if (ASIC_IS_AVIVO(rdev))
7107			return -EINVAL;
7108	}
7109	/* Must be an ATOMBIOS */
7110	if (!rdev->is_atom_bios) {
7111		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7112		return -EINVAL;
7113	}
7114	r = radeon_atombios_init(rdev);
7115	if (r)
7116		return r;
7117
7118	/* Post card if necessary */
7119	if (!radeon_card_posted(rdev)) {
7120		if (!rdev->bios) {
7121			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7122			return -EINVAL;
7123		}
7124		DRM_INFO("GPU not posted. posting now...\n");
7125		atom_asic_init(rdev->mode_info.atom_context);
7126	}
7127	/* init golden registers */
7128	si_init_golden_registers(rdev);
7129	/* Initialize scratch registers */
7130	si_scratch_init(rdev);
7131	/* Initialize surface registers */
7132	radeon_surface_init(rdev);
7133	/* Initialize clocks */
7134	radeon_get_clock_info(rdev->ddev);
7135
7136	/* Fence driver */
7137	r = radeon_fence_driver_init(rdev);
7138	if (r)
7139		return r;
7140
7141	/* initialize memory controller */
7142	r = si_mc_init(rdev);
7143	if (r)
7144		return r;
7145	/* Memory manager */
7146	r = radeon_bo_init(rdev);
7147	if (r)
7148		return r;
7149
7150	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7151	    !rdev->rlc_fw || !rdev->mc_fw) {
7152		r = si_init_microcode(rdev);
7153		if (r) {
7154			DRM_ERROR("Failed to load firmware!\n");
7155			return r;
7156		}
7157	}
7158
7159	/* Initialize power management */
7160	radeon_pm_init(rdev);
7161
7162	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7163	ring->ring_obj = NULL;
7164	r600_ring_init(rdev, ring, 1024 * 1024);
7165
7166	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7167	ring->ring_obj = NULL;
7168	r600_ring_init(rdev, ring, 1024 * 1024);
7169
7170	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7171	ring->ring_obj = NULL;
7172	r600_ring_init(rdev, ring, 1024 * 1024);
7173
7174	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7175	ring->ring_obj = NULL;
7176	r600_ring_init(rdev, ring, 64 * 1024);
7177
7178	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7179	ring->ring_obj = NULL;
7180	r600_ring_init(rdev, ring, 64 * 1024);
7181
7182	if (rdev->has_uvd) {
7183		r = radeon_uvd_init(rdev);
7184		if (!r) {
7185			ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7186			ring->ring_obj = NULL;
7187			r600_ring_init(rdev, ring, 4096);
7188		}
7189	}
7190
7191	rdev->ih.ring_obj = NULL;
7192	r600_ih_ring_init(rdev, 64 * 1024);
7193
7194	r = r600_pcie_gart_init(rdev);
7195	if (r)
7196		return r;
7197
7198	rdev->accel_working = true;
7199	r = si_startup(rdev);
7200	if (r) {
7201		dev_err(rdev->dev, "disabling GPU acceleration\n");
7202		si_cp_fini(rdev);
7203		cayman_dma_fini(rdev);
7204		si_irq_fini(rdev);
7205		sumo_rlc_fini(rdev);
7206		radeon_wb_fini(rdev);
7207		radeon_ib_pool_fini(rdev);
7208		radeon_vm_manager_fini(rdev);
7209		radeon_irq_kms_fini(rdev);
7210		si_pcie_gart_fini(rdev);
7211		rdev->accel_working = false;
7212	}
7213
7214	/* Don't start up if the MC ucode is missing.
7215	 * The default clocks and voltages before the MC ucode
7216	 * is loaded are not suffient for advanced operations.
7217	 */
7218	if (!rdev->mc_fw) {
7219		DRM_ERROR("radeon: MC ucode required for NI+.\n");
7220		return -EINVAL;
7221	}
7222
7223	return 0;
7224}
7225
7226void si_fini(struct radeon_device *rdev)
7227{
7228	radeon_pm_fini(rdev);
7229	si_cp_fini(rdev);
7230	cayman_dma_fini(rdev);
7231	si_fini_pg(rdev);
7232	si_fini_cg(rdev);
7233	si_irq_fini(rdev);
7234	sumo_rlc_fini(rdev);
7235	radeon_wb_fini(rdev);
7236	radeon_vm_manager_fini(rdev);
7237	radeon_ib_pool_fini(rdev);
7238	radeon_irq_kms_fini(rdev);
7239	if (rdev->has_uvd) {
7240		uvd_v1_0_fini(rdev);
7241		radeon_uvd_fini(rdev);
7242	}
7243	si_pcie_gart_fini(rdev);
7244	r600_vram_scratch_fini(rdev);
7245	radeon_gem_fini(rdev);
7246	radeon_fence_driver_fini(rdev);
7247	radeon_bo_fini(rdev);
7248	radeon_atombios_fini(rdev);
7249	kfree(rdev->bios);
7250	rdev->bios = NULL;
7251}
7252
7253/**
7254 * si_get_gpu_clock_counter - return GPU clock counter snapshot
7255 *
7256 * @rdev: radeon_device pointer
7257 *
7258 * Fetches a GPU clock counter snapshot (SI).
7259 * Returns the 64 bit clock counter snapshot.
7260 */
7261uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
7262{
7263	uint64_t clock;
7264
7265	mutex_lock(&rdev->gpu_clock_mutex);
7266	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7267	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7268	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7269	mutex_unlock(&rdev->gpu_clock_mutex);
7270	return clock;
7271}
7272
7273int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7274{
7275	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7276	int r;
7277
7278	/* bypass vclk and dclk with bclk */
7279	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7280		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7281		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7282
7283	/* put PLL in bypass mode */
7284	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7285
7286	if (!vclk || !dclk) {
7287		/* keep the Bypass mode */
7288		return 0;
7289	}
7290
7291	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7292					  16384, 0x03FFFFFF, 0, 128, 5,
7293					  &fb_div, &vclk_div, &dclk_div);
7294	if (r)
7295		return r;
7296
7297	/* set RESET_ANTI_MUX to 0 */
7298	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7299
7300	/* set VCO_MODE to 1 */
7301	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7302
7303	/* disable sleep mode */
7304	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7305
7306	/* deassert UPLL_RESET */
7307	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7308
7309	mdelay(1);
7310
7311	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7312	if (r)
7313		return r;
7314
7315	/* assert UPLL_RESET again */
7316	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7317
7318	/* disable spread spectrum. */
7319	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7320
7321	/* set feedback divider */
7322	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7323
7324	/* set ref divider to 0 */
7325	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7326
7327	if (fb_div < 307200)
7328		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7329	else
7330		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7331
7332	/* set PDIV_A and PDIV_B */
7333	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7334		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7335		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7336
7337	/* give the PLL some time to settle */
7338	mdelay(15);
7339
7340	/* deassert PLL_RESET */
7341	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7342
7343	mdelay(15);
7344
7345	/* switch from bypass mode to normal mode */
7346	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7347
7348	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7349	if (r)
7350		return r;
7351
7352	/* switch VCLK and DCLK selection */
7353	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7354		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7355		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7356
7357	mdelay(100);
7358
7359	return 0;
7360}
7361
7362static void si_pcie_gen3_enable(struct radeon_device *rdev)
7363{
7364	struct pci_dev *root = rdev->pdev->bus->self;
7365	int bridge_pos, gpu_pos;
7366	u32 speed_cntl, mask, current_data_rate;
7367	int ret, i;
7368	u16 tmp16;
7369
7370	if (pci_is_root_bus(rdev->pdev->bus))
7371		return;
7372
7373	if (radeon_pcie_gen2 == 0)
7374		return;
7375
7376	if (rdev->flags & RADEON_IS_IGP)
7377		return;
7378
7379	if (!(rdev->flags & RADEON_IS_PCIE))
7380		return;
7381
7382	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7383	if (ret != 0)
7384		return;
7385
7386	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7387		return;
7388
7389	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7390	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7391		LC_CURRENT_DATA_RATE_SHIFT;
7392	if (mask & DRM_PCIE_SPEED_80) {
7393		if (current_data_rate == 2) {
7394			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7395			return;
7396		}
7397		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7398	} else if (mask & DRM_PCIE_SPEED_50) {
7399		if (current_data_rate == 1) {
7400			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7401			return;
7402		}
7403		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7404	}
7405
7406	bridge_pos = pci_pcie_cap(root);
7407	if (!bridge_pos)
7408		return;
7409
7410	gpu_pos = pci_pcie_cap(rdev->pdev);
7411	if (!gpu_pos)
7412		return;
7413
7414	if (mask & DRM_PCIE_SPEED_80) {
7415		/* re-try equalization if gen3 is not already enabled */
7416		if (current_data_rate != 2) {
7417			u16 bridge_cfg, gpu_cfg;
7418			u16 bridge_cfg2, gpu_cfg2;
7419			u32 max_lw, current_lw, tmp;
7420
7421			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7422			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7423
7424			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7425			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7426
7427			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7428			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7429
7430			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7431			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7432			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7433
7434			if (current_lw < max_lw) {
7435				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7436				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7437					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7438					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7439					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7440					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7441				}
7442			}
7443
7444			for (i = 0; i < 10; i++) {
7445				/* check status */
7446				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7447				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7448					break;
7449
7450				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7451				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7452
7453				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7454				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7455
7456				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7457				tmp |= LC_SET_QUIESCE;
7458				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7459
7460				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7461				tmp |= LC_REDO_EQ;
7462				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7463
7464				mdelay(100);
7465
7466				/* linkctl */
7467				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7468				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7469				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7470				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7471
7472				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7473				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7474				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7475				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7476
7477				/* linkctl2 */
7478				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7479				tmp16 &= ~((1 << 4) | (7 << 9));
7480				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7481				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7482
7483				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7484				tmp16 &= ~((1 << 4) | (7 << 9));
7485				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7486				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7487
7488				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7489				tmp &= ~LC_SET_QUIESCE;
7490				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7491			}
7492		}
7493	}
7494
7495	/* set the link speed */
7496	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7497	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7498	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7499
7500	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7501	tmp16 &= ~0xf;
7502	if (mask & DRM_PCIE_SPEED_80)
7503		tmp16 |= 3; /* gen3 */
7504	else if (mask & DRM_PCIE_SPEED_50)
7505		tmp16 |= 2; /* gen2 */
7506	else
7507		tmp16 |= 1; /* gen1 */
7508	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7509
7510	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7511	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7512	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7513
7514	for (i = 0; i < rdev->usec_timeout; i++) {
7515		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7516		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7517			break;
7518		udelay(1);
7519	}
7520}
7521
7522static void si_program_aspm(struct radeon_device *rdev)
7523{
7524	u32 data, orig;
7525	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7526	bool disable_clkreq = false;
7527
7528	if (radeon_aspm == 0)
7529		return;
7530
7531	if (!(rdev->flags & RADEON_IS_PCIE))
7532		return;
7533
7534	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7535	data &= ~LC_XMIT_N_FTS_MASK;
7536	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7537	if (orig != data)
7538		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7539
7540	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7541	data |= LC_GO_TO_RECOVERY;
7542	if (orig != data)
7543		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7544
7545	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7546	data |= P_IGNORE_EDB_ERR;
7547	if (orig != data)
7548		WREG32_PCIE(PCIE_P_CNTL, data);
7549
7550	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7551	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7552	data |= LC_PMI_TO_L1_DIS;
7553	if (!disable_l0s)
7554		data |= LC_L0S_INACTIVITY(7);
7555
7556	if (!disable_l1) {
7557		data |= LC_L1_INACTIVITY(7);
7558		data &= ~LC_PMI_TO_L1_DIS;
7559		if (orig != data)
7560			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7561
7562		if (!disable_plloff_in_l1) {
7563			bool clk_req_support;
7564
7565			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7566			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7567			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7568			if (orig != data)
7569				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7570
7571			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7572			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7573			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7574			if (orig != data)
7575				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7576
7577			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7578			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7579			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7580			if (orig != data)
7581				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7582
7583			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7584			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7585			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7586			if (orig != data)
7587				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7588
7589			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7590				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7591				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7592				if (orig != data)
7593					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7594
7595				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7596				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7597				if (orig != data)
7598					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7599
7600				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7601				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7602				if (orig != data)
7603					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7604
7605				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7606				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7607				if (orig != data)
7608					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7609
7610				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7611				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7612				if (orig != data)
7613					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7614
7615				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7616				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7617				if (orig != data)
7618					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7619
7620				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7621				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7622				if (orig != data)
7623					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7624
7625				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7626				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7627				if (orig != data)
7628					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7629			}
7630			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7631			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7632			data |= LC_DYN_LANES_PWR_STATE(3);
7633			if (orig != data)
7634				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7635
7636			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7637			data &= ~LS2_EXIT_TIME_MASK;
7638			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7639				data |= LS2_EXIT_TIME(5);
7640			if (orig != data)
7641				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7642
7643			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7644			data &= ~LS2_EXIT_TIME_MASK;
7645			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7646				data |= LS2_EXIT_TIME(5);
7647			if (orig != data)
7648				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7649
7650			if (!disable_clkreq &&
7651			    !pci_is_root_bus(rdev->pdev->bus)) {
7652				struct pci_dev *root = rdev->pdev->bus->self;
7653				u32 lnkcap;
7654
7655				clk_req_support = false;
7656				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7657				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7658					clk_req_support = true;
7659			} else {
7660				clk_req_support = false;
7661			}
7662
7663			if (clk_req_support) {
7664				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7665				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7666				if (orig != data)
7667					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7668
7669				orig = data = RREG32(THM_CLK_CNTL);
7670				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7671				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7672				if (orig != data)
7673					WREG32(THM_CLK_CNTL, data);
7674
7675				orig = data = RREG32(MISC_CLK_CNTL);
7676				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7677				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7678				if (orig != data)
7679					WREG32(MISC_CLK_CNTL, data);
7680
7681				orig = data = RREG32(CG_CLKPIN_CNTL);
7682				data &= ~BCLK_AS_XCLK;
7683				if (orig != data)
7684					WREG32(CG_CLKPIN_CNTL, data);
7685
7686				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7687				data &= ~FORCE_BIF_REFCLK_EN;
7688				if (orig != data)
7689					WREG32(CG_CLKPIN_CNTL_2, data);
7690
7691				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7692				data &= ~MPLL_CLKOUT_SEL_MASK;
7693				data |= MPLL_CLKOUT_SEL(4);
7694				if (orig != data)
7695					WREG32(MPLL_BYPASSCLK_SEL, data);
7696
7697				orig = data = RREG32(SPLL_CNTL_MODE);
7698				data &= ~SPLL_REFCLK_SEL_MASK;
7699				if (orig != data)
7700					WREG32(SPLL_CNTL_MODE, data);
7701			}
7702		}
7703	} else {
7704		if (orig != data)
7705			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7706	}
7707
7708	orig = data = RREG32_PCIE(PCIE_CNTL2);
7709	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7710	if (orig != data)
7711		WREG32_PCIE(PCIE_CNTL2, data);
7712
7713	if (!disable_l0s) {
7714		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7715		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7716			data = RREG32_PCIE(PCIE_LC_STATUS1);
7717			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7718				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7719				data &= ~LC_L0S_INACTIVITY_MASK;
7720				if (orig != data)
7721					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7722			}
7723		}
7724	}
7725}
7726