1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23#include <linux/firmware.h>
24#include "drmP.h"
25#include "amdgpu.h"
26#include "amdgpu_ih.h"
27#include "amdgpu_gfx.h"
28#include "cikd.h"
29#include "cik.h"
30#include "atom.h"
31#include "amdgpu_ucode.h"
32#include "clearstate_ci.h"
33
34#include "uvd/uvd_4_2_d.h"
35
36#include "dce/dce_8_0_d.h"
37#include "dce/dce_8_0_sh_mask.h"
38
39#include "bif/bif_4_1_d.h"
40#include "bif/bif_4_1_sh_mask.h"
41
42#include "gca/gfx_7_0_d.h"
43#include "gca/gfx_7_2_enum.h"
44#include "gca/gfx_7_2_sh_mask.h"
45
46#include "gmc/gmc_7_0_d.h"
47#include "gmc/gmc_7_0_sh_mask.h"
48
49#include "oss/oss_2_0_d.h"
50#include "oss/oss_2_0_sh_mask.h"
51
52#define GFX7_NUM_GFX_RINGS     1
53#define GFX7_NUM_COMPUTE_RINGS 8
54
55static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
56static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
57static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
58int gfx_v7_0_get_cu_info(struct amdgpu_device *, struct amdgpu_cu_info *);
59
60MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
61MODULE_FIRMWARE("radeon/bonaire_me.bin");
62MODULE_FIRMWARE("radeon/bonaire_ce.bin");
63MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
64MODULE_FIRMWARE("radeon/bonaire_mec.bin");
65
66MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
67MODULE_FIRMWARE("radeon/hawaii_me.bin");
68MODULE_FIRMWARE("radeon/hawaii_ce.bin");
69MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
70MODULE_FIRMWARE("radeon/hawaii_mec.bin");
71
72MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
73MODULE_FIRMWARE("radeon/kaveri_me.bin");
74MODULE_FIRMWARE("radeon/kaveri_ce.bin");
75MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
76MODULE_FIRMWARE("radeon/kaveri_mec.bin");
77MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
78
79MODULE_FIRMWARE("radeon/kabini_pfp.bin");
80MODULE_FIRMWARE("radeon/kabini_me.bin");
81MODULE_FIRMWARE("radeon/kabini_ce.bin");
82MODULE_FIRMWARE("radeon/kabini_rlc.bin");
83MODULE_FIRMWARE("radeon/kabini_mec.bin");
84
85MODULE_FIRMWARE("radeon/mullins_pfp.bin");
86MODULE_FIRMWARE("radeon/mullins_me.bin");
87MODULE_FIRMWARE("radeon/mullins_ce.bin");
88MODULE_FIRMWARE("radeon/mullins_rlc.bin");
89MODULE_FIRMWARE("radeon/mullins_mec.bin");
90
91static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
92{
93	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
94	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
95	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
96	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
97	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
98	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
99	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
100	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
101	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
102	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
103	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
104	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
105	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
106	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
107	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
108	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
109};
110
111static const u32 spectre_rlc_save_restore_register_list[] =
112{
113	(0x0e00 << 16) | (0xc12c >> 2),
114	0x00000000,
115	(0x0e00 << 16) | (0xc140 >> 2),
116	0x00000000,
117	(0x0e00 << 16) | (0xc150 >> 2),
118	0x00000000,
119	(0x0e00 << 16) | (0xc15c >> 2),
120	0x00000000,
121	(0x0e00 << 16) | (0xc168 >> 2),
122	0x00000000,
123	(0x0e00 << 16) | (0xc170 >> 2),
124	0x00000000,
125	(0x0e00 << 16) | (0xc178 >> 2),
126	0x00000000,
127	(0x0e00 << 16) | (0xc204 >> 2),
128	0x00000000,
129	(0x0e00 << 16) | (0xc2b4 >> 2),
130	0x00000000,
131	(0x0e00 << 16) | (0xc2b8 >> 2),
132	0x00000000,
133	(0x0e00 << 16) | (0xc2bc >> 2),
134	0x00000000,
135	(0x0e00 << 16) | (0xc2c0 >> 2),
136	0x00000000,
137	(0x0e00 << 16) | (0x8228 >> 2),
138	0x00000000,
139	(0x0e00 << 16) | (0x829c >> 2),
140	0x00000000,
141	(0x0e00 << 16) | (0x869c >> 2),
142	0x00000000,
143	(0x0600 << 16) | (0x98f4 >> 2),
144	0x00000000,
145	(0x0e00 << 16) | (0x98f8 >> 2),
146	0x00000000,
147	(0x0e00 << 16) | (0x9900 >> 2),
148	0x00000000,
149	(0x0e00 << 16) | (0xc260 >> 2),
150	0x00000000,
151	(0x0e00 << 16) | (0x90e8 >> 2),
152	0x00000000,
153	(0x0e00 << 16) | (0x3c000 >> 2),
154	0x00000000,
155	(0x0e00 << 16) | (0x3c00c >> 2),
156	0x00000000,
157	(0x0e00 << 16) | (0x8c1c >> 2),
158	0x00000000,
159	(0x0e00 << 16) | (0x9700 >> 2),
160	0x00000000,
161	(0x0e00 << 16) | (0xcd20 >> 2),
162	0x00000000,
163	(0x4e00 << 16) | (0xcd20 >> 2),
164	0x00000000,
165	(0x5e00 << 16) | (0xcd20 >> 2),
166	0x00000000,
167	(0x6e00 << 16) | (0xcd20 >> 2),
168	0x00000000,
169	(0x7e00 << 16) | (0xcd20 >> 2),
170	0x00000000,
171	(0x8e00 << 16) | (0xcd20 >> 2),
172	0x00000000,
173	(0x9e00 << 16) | (0xcd20 >> 2),
174	0x00000000,
175	(0xae00 << 16) | (0xcd20 >> 2),
176	0x00000000,
177	(0xbe00 << 16) | (0xcd20 >> 2),
178	0x00000000,
179	(0x0e00 << 16) | (0x89bc >> 2),
180	0x00000000,
181	(0x0e00 << 16) | (0x8900 >> 2),
182	0x00000000,
183	0x3,
184	(0x0e00 << 16) | (0xc130 >> 2),
185	0x00000000,
186	(0x0e00 << 16) | (0xc134 >> 2),
187	0x00000000,
188	(0x0e00 << 16) | (0xc1fc >> 2),
189	0x00000000,
190	(0x0e00 << 16) | (0xc208 >> 2),
191	0x00000000,
192	(0x0e00 << 16) | (0xc264 >> 2),
193	0x00000000,
194	(0x0e00 << 16) | (0xc268 >> 2),
195	0x00000000,
196	(0x0e00 << 16) | (0xc26c >> 2),
197	0x00000000,
198	(0x0e00 << 16) | (0xc270 >> 2),
199	0x00000000,
200	(0x0e00 << 16) | (0xc274 >> 2),
201	0x00000000,
202	(0x0e00 << 16) | (0xc278 >> 2),
203	0x00000000,
204	(0x0e00 << 16) | (0xc27c >> 2),
205	0x00000000,
206	(0x0e00 << 16) | (0xc280 >> 2),
207	0x00000000,
208	(0x0e00 << 16) | (0xc284 >> 2),
209	0x00000000,
210	(0x0e00 << 16) | (0xc288 >> 2),
211	0x00000000,
212	(0x0e00 << 16) | (0xc28c >> 2),
213	0x00000000,
214	(0x0e00 << 16) | (0xc290 >> 2),
215	0x00000000,
216	(0x0e00 << 16) | (0xc294 >> 2),
217	0x00000000,
218	(0x0e00 << 16) | (0xc298 >> 2),
219	0x00000000,
220	(0x0e00 << 16) | (0xc29c >> 2),
221	0x00000000,
222	(0x0e00 << 16) | (0xc2a0 >> 2),
223	0x00000000,
224	(0x0e00 << 16) | (0xc2a4 >> 2),
225	0x00000000,
226	(0x0e00 << 16) | (0xc2a8 >> 2),
227	0x00000000,
228	(0x0e00 << 16) | (0xc2ac  >> 2),
229	0x00000000,
230	(0x0e00 << 16) | (0xc2b0 >> 2),
231	0x00000000,
232	(0x0e00 << 16) | (0x301d0 >> 2),
233	0x00000000,
234	(0x0e00 << 16) | (0x30238 >> 2),
235	0x00000000,
236	(0x0e00 << 16) | (0x30250 >> 2),
237	0x00000000,
238	(0x0e00 << 16) | (0x30254 >> 2),
239	0x00000000,
240	(0x0e00 << 16) | (0x30258 >> 2),
241	0x00000000,
242	(0x0e00 << 16) | (0x3025c >> 2),
243	0x00000000,
244	(0x4e00 << 16) | (0xc900 >> 2),
245	0x00000000,
246	(0x5e00 << 16) | (0xc900 >> 2),
247	0x00000000,
248	(0x6e00 << 16) | (0xc900 >> 2),
249	0x00000000,
250	(0x7e00 << 16) | (0xc900 >> 2),
251	0x00000000,
252	(0x8e00 << 16) | (0xc900 >> 2),
253	0x00000000,
254	(0x9e00 << 16) | (0xc900 >> 2),
255	0x00000000,
256	(0xae00 << 16) | (0xc900 >> 2),
257	0x00000000,
258	(0xbe00 << 16) | (0xc900 >> 2),
259	0x00000000,
260	(0x4e00 << 16) | (0xc904 >> 2),
261	0x00000000,
262	(0x5e00 << 16) | (0xc904 >> 2),
263	0x00000000,
264	(0x6e00 << 16) | (0xc904 >> 2),
265	0x00000000,
266	(0x7e00 << 16) | (0xc904 >> 2),
267	0x00000000,
268	(0x8e00 << 16) | (0xc904 >> 2),
269	0x00000000,
270	(0x9e00 << 16) | (0xc904 >> 2),
271	0x00000000,
272	(0xae00 << 16) | (0xc904 >> 2),
273	0x00000000,
274	(0xbe00 << 16) | (0xc904 >> 2),
275	0x00000000,
276	(0x4e00 << 16) | (0xc908 >> 2),
277	0x00000000,
278	(0x5e00 << 16) | (0xc908 >> 2),
279	0x00000000,
280	(0x6e00 << 16) | (0xc908 >> 2),
281	0x00000000,
282	(0x7e00 << 16) | (0xc908 >> 2),
283	0x00000000,
284	(0x8e00 << 16) | (0xc908 >> 2),
285	0x00000000,
286	(0x9e00 << 16) | (0xc908 >> 2),
287	0x00000000,
288	(0xae00 << 16) | (0xc908 >> 2),
289	0x00000000,
290	(0xbe00 << 16) | (0xc908 >> 2),
291	0x00000000,
292	(0x4e00 << 16) | (0xc90c >> 2),
293	0x00000000,
294	(0x5e00 << 16) | (0xc90c >> 2),
295	0x00000000,
296	(0x6e00 << 16) | (0xc90c >> 2),
297	0x00000000,
298	(0x7e00 << 16) | (0xc90c >> 2),
299	0x00000000,
300	(0x8e00 << 16) | (0xc90c >> 2),
301	0x00000000,
302	(0x9e00 << 16) | (0xc90c >> 2),
303	0x00000000,
304	(0xae00 << 16) | (0xc90c >> 2),
305	0x00000000,
306	(0xbe00 << 16) | (0xc90c >> 2),
307	0x00000000,
308	(0x4e00 << 16) | (0xc910 >> 2),
309	0x00000000,
310	(0x5e00 << 16) | (0xc910 >> 2),
311	0x00000000,
312	(0x6e00 << 16) | (0xc910 >> 2),
313	0x00000000,
314	(0x7e00 << 16) | (0xc910 >> 2),
315	0x00000000,
316	(0x8e00 << 16) | (0xc910 >> 2),
317	0x00000000,
318	(0x9e00 << 16) | (0xc910 >> 2),
319	0x00000000,
320	(0xae00 << 16) | (0xc910 >> 2),
321	0x00000000,
322	(0xbe00 << 16) | (0xc910 >> 2),
323	0x00000000,
324	(0x0e00 << 16) | (0xc99c >> 2),
325	0x00000000,
326	(0x0e00 << 16) | (0x9834 >> 2),
327	0x00000000,
328	(0x0000 << 16) | (0x30f00 >> 2),
329	0x00000000,
330	(0x0001 << 16) | (0x30f00 >> 2),
331	0x00000000,
332	(0x0000 << 16) | (0x30f04 >> 2),
333	0x00000000,
334	(0x0001 << 16) | (0x30f04 >> 2),
335	0x00000000,
336	(0x0000 << 16) | (0x30f08 >> 2),
337	0x00000000,
338	(0x0001 << 16) | (0x30f08 >> 2),
339	0x00000000,
340	(0x0000 << 16) | (0x30f0c >> 2),
341	0x00000000,
342	(0x0001 << 16) | (0x30f0c >> 2),
343	0x00000000,
344	(0x0600 << 16) | (0x9b7c >> 2),
345	0x00000000,
346	(0x0e00 << 16) | (0x8a14 >> 2),
347	0x00000000,
348	(0x0e00 << 16) | (0x8a18 >> 2),
349	0x00000000,
350	(0x0600 << 16) | (0x30a00 >> 2),
351	0x00000000,
352	(0x0e00 << 16) | (0x8bf0 >> 2),
353	0x00000000,
354	(0x0e00 << 16) | (0x8bcc >> 2),
355	0x00000000,
356	(0x0e00 << 16) | (0x8b24 >> 2),
357	0x00000000,
358	(0x0e00 << 16) | (0x30a04 >> 2),
359	0x00000000,
360	(0x0600 << 16) | (0x30a10 >> 2),
361	0x00000000,
362	(0x0600 << 16) | (0x30a14 >> 2),
363	0x00000000,
364	(0x0600 << 16) | (0x30a18 >> 2),
365	0x00000000,
366	(0x0600 << 16) | (0x30a2c >> 2),
367	0x00000000,
368	(0x0e00 << 16) | (0xc700 >> 2),
369	0x00000000,
370	(0x0e00 << 16) | (0xc704 >> 2),
371	0x00000000,
372	(0x0e00 << 16) | (0xc708 >> 2),
373	0x00000000,
374	(0x0e00 << 16) | (0xc768 >> 2),
375	0x00000000,
376	(0x0400 << 16) | (0xc770 >> 2),
377	0x00000000,
378	(0x0400 << 16) | (0xc774 >> 2),
379	0x00000000,
380	(0x0400 << 16) | (0xc778 >> 2),
381	0x00000000,
382	(0x0400 << 16) | (0xc77c >> 2),
383	0x00000000,
384	(0x0400 << 16) | (0xc780 >> 2),
385	0x00000000,
386	(0x0400 << 16) | (0xc784 >> 2),
387	0x00000000,
388	(0x0400 << 16) | (0xc788 >> 2),
389	0x00000000,
390	(0x0400 << 16) | (0xc78c >> 2),
391	0x00000000,
392	(0x0400 << 16) | (0xc798 >> 2),
393	0x00000000,
394	(0x0400 << 16) | (0xc79c >> 2),
395	0x00000000,
396	(0x0400 << 16) | (0xc7a0 >> 2),
397	0x00000000,
398	(0x0400 << 16) | (0xc7a4 >> 2),
399	0x00000000,
400	(0x0400 << 16) | (0xc7a8 >> 2),
401	0x00000000,
402	(0x0400 << 16) | (0xc7ac >> 2),
403	0x00000000,
404	(0x0400 << 16) | (0xc7b0 >> 2),
405	0x00000000,
406	(0x0400 << 16) | (0xc7b4 >> 2),
407	0x00000000,
408	(0x0e00 << 16) | (0x9100 >> 2),
409	0x00000000,
410	(0x0e00 << 16) | (0x3c010 >> 2),
411	0x00000000,
412	(0x0e00 << 16) | (0x92a8 >> 2),
413	0x00000000,
414	(0x0e00 << 16) | (0x92ac >> 2),
415	0x00000000,
416	(0x0e00 << 16) | (0x92b4 >> 2),
417	0x00000000,
418	(0x0e00 << 16) | (0x92b8 >> 2),
419	0x00000000,
420	(0x0e00 << 16) | (0x92bc >> 2),
421	0x00000000,
422	(0x0e00 << 16) | (0x92c0 >> 2),
423	0x00000000,
424	(0x0e00 << 16) | (0x92c4 >> 2),
425	0x00000000,
426	(0x0e00 << 16) | (0x92c8 >> 2),
427	0x00000000,
428	(0x0e00 << 16) | (0x92cc >> 2),
429	0x00000000,
430	(0x0e00 << 16) | (0x92d0 >> 2),
431	0x00000000,
432	(0x0e00 << 16) | (0x8c00 >> 2),
433	0x00000000,
434	(0x0e00 << 16) | (0x8c04 >> 2),
435	0x00000000,
436	(0x0e00 << 16) | (0x8c20 >> 2),
437	0x00000000,
438	(0x0e00 << 16) | (0x8c38 >> 2),
439	0x00000000,
440	(0x0e00 << 16) | (0x8c3c >> 2),
441	0x00000000,
442	(0x0e00 << 16) | (0xae00 >> 2),
443	0x00000000,
444	(0x0e00 << 16) | (0x9604 >> 2),
445	0x00000000,
446	(0x0e00 << 16) | (0xac08 >> 2),
447	0x00000000,
448	(0x0e00 << 16) | (0xac0c >> 2),
449	0x00000000,
450	(0x0e00 << 16) | (0xac10 >> 2),
451	0x00000000,
452	(0x0e00 << 16) | (0xac14 >> 2),
453	0x00000000,
454	(0x0e00 << 16) | (0xac58 >> 2),
455	0x00000000,
456	(0x0e00 << 16) | (0xac68 >> 2),
457	0x00000000,
458	(0x0e00 << 16) | (0xac6c >> 2),
459	0x00000000,
460	(0x0e00 << 16) | (0xac70 >> 2),
461	0x00000000,
462	(0x0e00 << 16) | (0xac74 >> 2),
463	0x00000000,
464	(0x0e00 << 16) | (0xac78 >> 2),
465	0x00000000,
466	(0x0e00 << 16) | (0xac7c >> 2),
467	0x00000000,
468	(0x0e00 << 16) | (0xac80 >> 2),
469	0x00000000,
470	(0x0e00 << 16) | (0xac84 >> 2),
471	0x00000000,
472	(0x0e00 << 16) | (0xac88 >> 2),
473	0x00000000,
474	(0x0e00 << 16) | (0xac8c >> 2),
475	0x00000000,
476	(0x0e00 << 16) | (0x970c >> 2),
477	0x00000000,
478	(0x0e00 << 16) | (0x9714 >> 2),
479	0x00000000,
480	(0x0e00 << 16) | (0x9718 >> 2),
481	0x00000000,
482	(0x0e00 << 16) | (0x971c >> 2),
483	0x00000000,
484	(0x0e00 << 16) | (0x31068 >> 2),
485	0x00000000,
486	(0x4e00 << 16) | (0x31068 >> 2),
487	0x00000000,
488	(0x5e00 << 16) | (0x31068 >> 2),
489	0x00000000,
490	(0x6e00 << 16) | (0x31068 >> 2),
491	0x00000000,
492	(0x7e00 << 16) | (0x31068 >> 2),
493	0x00000000,
494	(0x8e00 << 16) | (0x31068 >> 2),
495	0x00000000,
496	(0x9e00 << 16) | (0x31068 >> 2),
497	0x00000000,
498	(0xae00 << 16) | (0x31068 >> 2),
499	0x00000000,
500	(0xbe00 << 16) | (0x31068 >> 2),
501	0x00000000,
502	(0x0e00 << 16) | (0xcd10 >> 2),
503	0x00000000,
504	(0x0e00 << 16) | (0xcd14 >> 2),
505	0x00000000,
506	(0x0e00 << 16) | (0x88b0 >> 2),
507	0x00000000,
508	(0x0e00 << 16) | (0x88b4 >> 2),
509	0x00000000,
510	(0x0e00 << 16) | (0x88b8 >> 2),
511	0x00000000,
512	(0x0e00 << 16) | (0x88bc >> 2),
513	0x00000000,
514	(0x0400 << 16) | (0x89c0 >> 2),
515	0x00000000,
516	(0x0e00 << 16) | (0x88c4 >> 2),
517	0x00000000,
518	(0x0e00 << 16) | (0x88c8 >> 2),
519	0x00000000,
520	(0x0e00 << 16) | (0x88d0 >> 2),
521	0x00000000,
522	(0x0e00 << 16) | (0x88d4 >> 2),
523	0x00000000,
524	(0x0e00 << 16) | (0x88d8 >> 2),
525	0x00000000,
526	(0x0e00 << 16) | (0x8980 >> 2),
527	0x00000000,
528	(0x0e00 << 16) | (0x30938 >> 2),
529	0x00000000,
530	(0x0e00 << 16) | (0x3093c >> 2),
531	0x00000000,
532	(0x0e00 << 16) | (0x30940 >> 2),
533	0x00000000,
534	(0x0e00 << 16) | (0x89a0 >> 2),
535	0x00000000,
536	(0x0e00 << 16) | (0x30900 >> 2),
537	0x00000000,
538	(0x0e00 << 16) | (0x30904 >> 2),
539	0x00000000,
540	(0x0e00 << 16) | (0x89b4 >> 2),
541	0x00000000,
542	(0x0e00 << 16) | (0x3c210 >> 2),
543	0x00000000,
544	(0x0e00 << 16) | (0x3c214 >> 2),
545	0x00000000,
546	(0x0e00 << 16) | (0x3c218 >> 2),
547	0x00000000,
548	(0x0e00 << 16) | (0x8904 >> 2),
549	0x00000000,
550	0x5,
551	(0x0e00 << 16) | (0x8c28 >> 2),
552	(0x0e00 << 16) | (0x8c2c >> 2),
553	(0x0e00 << 16) | (0x8c30 >> 2),
554	(0x0e00 << 16) | (0x8c34 >> 2),
555	(0x0e00 << 16) | (0x9600 >> 2),
556};
557
558static const u32 kalindi_rlc_save_restore_register_list[] =
559{
560	(0x0e00 << 16) | (0xc12c >> 2),
561	0x00000000,
562	(0x0e00 << 16) | (0xc140 >> 2),
563	0x00000000,
564	(0x0e00 << 16) | (0xc150 >> 2),
565	0x00000000,
566	(0x0e00 << 16) | (0xc15c >> 2),
567	0x00000000,
568	(0x0e00 << 16) | (0xc168 >> 2),
569	0x00000000,
570	(0x0e00 << 16) | (0xc170 >> 2),
571	0x00000000,
572	(0x0e00 << 16) | (0xc204 >> 2),
573	0x00000000,
574	(0x0e00 << 16) | (0xc2b4 >> 2),
575	0x00000000,
576	(0x0e00 << 16) | (0xc2b8 >> 2),
577	0x00000000,
578	(0x0e00 << 16) | (0xc2bc >> 2),
579	0x00000000,
580	(0x0e00 << 16) | (0xc2c0 >> 2),
581	0x00000000,
582	(0x0e00 << 16) | (0x8228 >> 2),
583	0x00000000,
584	(0x0e00 << 16) | (0x829c >> 2),
585	0x00000000,
586	(0x0e00 << 16) | (0x869c >> 2),
587	0x00000000,
588	(0x0600 << 16) | (0x98f4 >> 2),
589	0x00000000,
590	(0x0e00 << 16) | (0x98f8 >> 2),
591	0x00000000,
592	(0x0e00 << 16) | (0x9900 >> 2),
593	0x00000000,
594	(0x0e00 << 16) | (0xc260 >> 2),
595	0x00000000,
596	(0x0e00 << 16) | (0x90e8 >> 2),
597	0x00000000,
598	(0x0e00 << 16) | (0x3c000 >> 2),
599	0x00000000,
600	(0x0e00 << 16) | (0x3c00c >> 2),
601	0x00000000,
602	(0x0e00 << 16) | (0x8c1c >> 2),
603	0x00000000,
604	(0x0e00 << 16) | (0x9700 >> 2),
605	0x00000000,
606	(0x0e00 << 16) | (0xcd20 >> 2),
607	0x00000000,
608	(0x4e00 << 16) | (0xcd20 >> 2),
609	0x00000000,
610	(0x5e00 << 16) | (0xcd20 >> 2),
611	0x00000000,
612	(0x6e00 << 16) | (0xcd20 >> 2),
613	0x00000000,
614	(0x7e00 << 16) | (0xcd20 >> 2),
615	0x00000000,
616	(0x0e00 << 16) | (0x89bc >> 2),
617	0x00000000,
618	(0x0e00 << 16) | (0x8900 >> 2),
619	0x00000000,
620	0x3,
621	(0x0e00 << 16) | (0xc130 >> 2),
622	0x00000000,
623	(0x0e00 << 16) | (0xc134 >> 2),
624	0x00000000,
625	(0x0e00 << 16) | (0xc1fc >> 2),
626	0x00000000,
627	(0x0e00 << 16) | (0xc208 >> 2),
628	0x00000000,
629	(0x0e00 << 16) | (0xc264 >> 2),
630	0x00000000,
631	(0x0e00 << 16) | (0xc268 >> 2),
632	0x00000000,
633	(0x0e00 << 16) | (0xc26c >> 2),
634	0x00000000,
635	(0x0e00 << 16) | (0xc270 >> 2),
636	0x00000000,
637	(0x0e00 << 16) | (0xc274 >> 2),
638	0x00000000,
639	(0x0e00 << 16) | (0xc28c >> 2),
640	0x00000000,
641	(0x0e00 << 16) | (0xc290 >> 2),
642	0x00000000,
643	(0x0e00 << 16) | (0xc294 >> 2),
644	0x00000000,
645	(0x0e00 << 16) | (0xc298 >> 2),
646	0x00000000,
647	(0x0e00 << 16) | (0xc2a0 >> 2),
648	0x00000000,
649	(0x0e00 << 16) | (0xc2a4 >> 2),
650	0x00000000,
651	(0x0e00 << 16) | (0xc2a8 >> 2),
652	0x00000000,
653	(0x0e00 << 16) | (0xc2ac >> 2),
654	0x00000000,
655	(0x0e00 << 16) | (0x301d0 >> 2),
656	0x00000000,
657	(0x0e00 << 16) | (0x30238 >> 2),
658	0x00000000,
659	(0x0e00 << 16) | (0x30250 >> 2),
660	0x00000000,
661	(0x0e00 << 16) | (0x30254 >> 2),
662	0x00000000,
663	(0x0e00 << 16) | (0x30258 >> 2),
664	0x00000000,
665	(0x0e00 << 16) | (0x3025c >> 2),
666	0x00000000,
667	(0x4e00 << 16) | (0xc900 >> 2),
668	0x00000000,
669	(0x5e00 << 16) | (0xc900 >> 2),
670	0x00000000,
671	(0x6e00 << 16) | (0xc900 >> 2),
672	0x00000000,
673	(0x7e00 << 16) | (0xc900 >> 2),
674	0x00000000,
675	(0x4e00 << 16) | (0xc904 >> 2),
676	0x00000000,
677	(0x5e00 << 16) | (0xc904 >> 2),
678	0x00000000,
679	(0x6e00 << 16) | (0xc904 >> 2),
680	0x00000000,
681	(0x7e00 << 16) | (0xc904 >> 2),
682	0x00000000,
683	(0x4e00 << 16) | (0xc908 >> 2),
684	0x00000000,
685	(0x5e00 << 16) | (0xc908 >> 2),
686	0x00000000,
687	(0x6e00 << 16) | (0xc908 >> 2),
688	0x00000000,
689	(0x7e00 << 16) | (0xc908 >> 2),
690	0x00000000,
691	(0x4e00 << 16) | (0xc90c >> 2),
692	0x00000000,
693	(0x5e00 << 16) | (0xc90c >> 2),
694	0x00000000,
695	(0x6e00 << 16) | (0xc90c >> 2),
696	0x00000000,
697	(0x7e00 << 16) | (0xc90c >> 2),
698	0x00000000,
699	(0x4e00 << 16) | (0xc910 >> 2),
700	0x00000000,
701	(0x5e00 << 16) | (0xc910 >> 2),
702	0x00000000,
703	(0x6e00 << 16) | (0xc910 >> 2),
704	0x00000000,
705	(0x7e00 << 16) | (0xc910 >> 2),
706	0x00000000,
707	(0x0e00 << 16) | (0xc99c >> 2),
708	0x00000000,
709	(0x0e00 << 16) | (0x9834 >> 2),
710	0x00000000,
711	(0x0000 << 16) | (0x30f00 >> 2),
712	0x00000000,
713	(0x0000 << 16) | (0x30f04 >> 2),
714	0x00000000,
715	(0x0000 << 16) | (0x30f08 >> 2),
716	0x00000000,
717	(0x0000 << 16) | (0x30f0c >> 2),
718	0x00000000,
719	(0x0600 << 16) | (0x9b7c >> 2),
720	0x00000000,
721	(0x0e00 << 16) | (0x8a14 >> 2),
722	0x00000000,
723	(0x0e00 << 16) | (0x8a18 >> 2),
724	0x00000000,
725	(0x0600 << 16) | (0x30a00 >> 2),
726	0x00000000,
727	(0x0e00 << 16) | (0x8bf0 >> 2),
728	0x00000000,
729	(0x0e00 << 16) | (0x8bcc >> 2),
730	0x00000000,
731	(0x0e00 << 16) | (0x8b24 >> 2),
732	0x00000000,
733	(0x0e00 << 16) | (0x30a04 >> 2),
734	0x00000000,
735	(0x0600 << 16) | (0x30a10 >> 2),
736	0x00000000,
737	(0x0600 << 16) | (0x30a14 >> 2),
738	0x00000000,
739	(0x0600 << 16) | (0x30a18 >> 2),
740	0x00000000,
741	(0x0600 << 16) | (0x30a2c >> 2),
742	0x00000000,
743	(0x0e00 << 16) | (0xc700 >> 2),
744	0x00000000,
745	(0x0e00 << 16) | (0xc704 >> 2),
746	0x00000000,
747	(0x0e00 << 16) | (0xc708 >> 2),
748	0x00000000,
749	(0x0e00 << 16) | (0xc768 >> 2),
750	0x00000000,
751	(0x0400 << 16) | (0xc770 >> 2),
752	0x00000000,
753	(0x0400 << 16) | (0xc774 >> 2),
754	0x00000000,
755	(0x0400 << 16) | (0xc798 >> 2),
756	0x00000000,
757	(0x0400 << 16) | (0xc79c >> 2),
758	0x00000000,
759	(0x0e00 << 16) | (0x9100 >> 2),
760	0x00000000,
761	(0x0e00 << 16) | (0x3c010 >> 2),
762	0x00000000,
763	(0x0e00 << 16) | (0x8c00 >> 2),
764	0x00000000,
765	(0x0e00 << 16) | (0x8c04 >> 2),
766	0x00000000,
767	(0x0e00 << 16) | (0x8c20 >> 2),
768	0x00000000,
769	(0x0e00 << 16) | (0x8c38 >> 2),
770	0x00000000,
771	(0x0e00 << 16) | (0x8c3c >> 2),
772	0x00000000,
773	(0x0e00 << 16) | (0xae00 >> 2),
774	0x00000000,
775	(0x0e00 << 16) | (0x9604 >> 2),
776	0x00000000,
777	(0x0e00 << 16) | (0xac08 >> 2),
778	0x00000000,
779	(0x0e00 << 16) | (0xac0c >> 2),
780	0x00000000,
781	(0x0e00 << 16) | (0xac10 >> 2),
782	0x00000000,
783	(0x0e00 << 16) | (0xac14 >> 2),
784	0x00000000,
785	(0x0e00 << 16) | (0xac58 >> 2),
786	0x00000000,
787	(0x0e00 << 16) | (0xac68 >> 2),
788	0x00000000,
789	(0x0e00 << 16) | (0xac6c >> 2),
790	0x00000000,
791	(0x0e00 << 16) | (0xac70 >> 2),
792	0x00000000,
793	(0x0e00 << 16) | (0xac74 >> 2),
794	0x00000000,
795	(0x0e00 << 16) | (0xac78 >> 2),
796	0x00000000,
797	(0x0e00 << 16) | (0xac7c >> 2),
798	0x00000000,
799	(0x0e00 << 16) | (0xac80 >> 2),
800	0x00000000,
801	(0x0e00 << 16) | (0xac84 >> 2),
802	0x00000000,
803	(0x0e00 << 16) | (0xac88 >> 2),
804	0x00000000,
805	(0x0e00 << 16) | (0xac8c >> 2),
806	0x00000000,
807	(0x0e00 << 16) | (0x970c >> 2),
808	0x00000000,
809	(0x0e00 << 16) | (0x9714 >> 2),
810	0x00000000,
811	(0x0e00 << 16) | (0x9718 >> 2),
812	0x00000000,
813	(0x0e00 << 16) | (0x971c >> 2),
814	0x00000000,
815	(0x0e00 << 16) | (0x31068 >> 2),
816	0x00000000,
817	(0x4e00 << 16) | (0x31068 >> 2),
818	0x00000000,
819	(0x5e00 << 16) | (0x31068 >> 2),
820	0x00000000,
821	(0x6e00 << 16) | (0x31068 >> 2),
822	0x00000000,
823	(0x7e00 << 16) | (0x31068 >> 2),
824	0x00000000,
825	(0x0e00 << 16) | (0xcd10 >> 2),
826	0x00000000,
827	(0x0e00 << 16) | (0xcd14 >> 2),
828	0x00000000,
829	(0x0e00 << 16) | (0x88b0 >> 2),
830	0x00000000,
831	(0x0e00 << 16) | (0x88b4 >> 2),
832	0x00000000,
833	(0x0e00 << 16) | (0x88b8 >> 2),
834	0x00000000,
835	(0x0e00 << 16) | (0x88bc >> 2),
836	0x00000000,
837	(0x0400 << 16) | (0x89c0 >> 2),
838	0x00000000,
839	(0x0e00 << 16) | (0x88c4 >> 2),
840	0x00000000,
841	(0x0e00 << 16) | (0x88c8 >> 2),
842	0x00000000,
843	(0x0e00 << 16) | (0x88d0 >> 2),
844	0x00000000,
845	(0x0e00 << 16) | (0x88d4 >> 2),
846	0x00000000,
847	(0x0e00 << 16) | (0x88d8 >> 2),
848	0x00000000,
849	(0x0e00 << 16) | (0x8980 >> 2),
850	0x00000000,
851	(0x0e00 << 16) | (0x30938 >> 2),
852	0x00000000,
853	(0x0e00 << 16) | (0x3093c >> 2),
854	0x00000000,
855	(0x0e00 << 16) | (0x30940 >> 2),
856	0x00000000,
857	(0x0e00 << 16) | (0x89a0 >> 2),
858	0x00000000,
859	(0x0e00 << 16) | (0x30900 >> 2),
860	0x00000000,
861	(0x0e00 << 16) | (0x30904 >> 2),
862	0x00000000,
863	(0x0e00 << 16) | (0x89b4 >> 2),
864	0x00000000,
865	(0x0e00 << 16) | (0x3e1fc >> 2),
866	0x00000000,
867	(0x0e00 << 16) | (0x3c210 >> 2),
868	0x00000000,
869	(0x0e00 << 16) | (0x3c214 >> 2),
870	0x00000000,
871	(0x0e00 << 16) | (0x3c218 >> 2),
872	0x00000000,
873	(0x0e00 << 16) | (0x8904 >> 2),
874	0x00000000,
875	0x5,
876	(0x0e00 << 16) | (0x8c28 >> 2),
877	(0x0e00 << 16) | (0x8c2c >> 2),
878	(0x0e00 << 16) | (0x8c30 >> 2),
879	(0x0e00 << 16) | (0x8c34 >> 2),
880	(0x0e00 << 16) | (0x9600 >> 2),
881};
882
883static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev);
884static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
885static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev);
886static void gfx_v7_0_init_pg(struct amdgpu_device *adev);
887
888/*
889 * Core functions
890 */
891/**
892 * gfx_v7_0_init_microcode - load ucode images from disk
893 *
894 * @adev: amdgpu_device pointer
895 *
896 * Use the firmware interface to load the ucode images into
897 * the driver (not loaded into hw).
898 * Returns 0 on success, error on failure.
899 */
900static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
901{
902	const char *chip_name;
903	char fw_name[30];
904	int err;
905
906	DRM_DEBUG("\n");
907
908	switch (adev->asic_type) {
909	case CHIP_BONAIRE:
910		chip_name = "bonaire";
911		break;
912	case CHIP_HAWAII:
913		chip_name = "hawaii";
914		break;
915	case CHIP_KAVERI:
916		chip_name = "kaveri";
917		break;
918	case CHIP_KABINI:
919		chip_name = "kabini";
920		break;
921	case CHIP_MULLINS:
922		chip_name = "mullins";
923		break;
924	default: BUG();
925	}
926
927	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
928	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
929	if (err)
930		goto out;
931	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
932	if (err)
933		goto out;
934
935	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
936	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
937	if (err)
938		goto out;
939	err = amdgpu_ucode_validate(adev->gfx.me_fw);
940	if (err)
941		goto out;
942
943	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
944	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
945	if (err)
946		goto out;
947	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
948	if (err)
949		goto out;
950
951	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
952	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
953	if (err)
954		goto out;
955	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
956	if (err)
957		goto out;
958
959	if (adev->asic_type == CHIP_KAVERI) {
960		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", chip_name);
961		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
962		if (err)
963			goto out;
964		err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
965		if (err)
966			goto out;
967	}
968
969	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
970	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
971	if (err)
972		goto out;
973	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
974
975out:
976	if (err) {
977		printk(KERN_ERR
978		       "gfx7: Failed to load firmware \"%s\"\n",
979		       fw_name);
980		release_firmware(adev->gfx.pfp_fw);
981		adev->gfx.pfp_fw = NULL;
982		release_firmware(adev->gfx.me_fw);
983		adev->gfx.me_fw = NULL;
984		release_firmware(adev->gfx.ce_fw);
985		adev->gfx.ce_fw = NULL;
986		release_firmware(adev->gfx.mec_fw);
987		adev->gfx.mec_fw = NULL;
988		release_firmware(adev->gfx.mec2_fw);
989		adev->gfx.mec2_fw = NULL;
990		release_firmware(adev->gfx.rlc_fw);
991		adev->gfx.rlc_fw = NULL;
992	}
993	return err;
994}
995
996/**
997 * gfx_v7_0_tiling_mode_table_init - init the hw tiling table
998 *
999 * @adev: amdgpu_device pointer
1000 *
1001 * Starting with SI, the tiling setup is done globally in a
1002 * set of 32 tiling modes.  Rather than selecting each set of
1003 * parameters per surface as on older asics, we just select
1004 * which index in the tiling table we want to use, and the
1005 * surface uses those parameters (CIK).
1006 */
1007static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)
1008{
1009	const u32 num_tile_mode_states = 32;
1010	const u32 num_secondary_tile_mode_states = 16;
1011	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1012
1013	switch (adev->gfx.config.mem_row_size_in_kb) {
1014	case 1:
1015		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1016		break;
1017	case 2:
1018	default:
1019		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1020		break;
1021	case 4:
1022		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1023		break;
1024	}
1025
1026	switch (adev->asic_type) {
1027	case CHIP_BONAIRE:
1028		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1029			switch (reg_offset) {
1030			case 0:
1031				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1032						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1033						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1034						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1035				break;
1036			case 1:
1037				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1038						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1039						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1040						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1041				break;
1042			case 2:
1043				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1044						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1045						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1046						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1047				break;
1048			case 3:
1049				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1050						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1051						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1052						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1053				break;
1054			case 4:
1055				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1056						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1057						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1058						TILE_SPLIT(split_equal_to_row_size));
1059				break;
1060			case 5:
1061				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1062						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1063						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1064				break;
1065			case 6:
1066				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1067						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1068						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1069						TILE_SPLIT(split_equal_to_row_size));
1070				break;
1071			case 7:
1072				gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
1073				break;
1074
1075			case 8:
1076				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1077						PIPE_CONFIG(ADDR_SURF_P4_16x16));
1078				break;
1079			case 9:
1080				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1081						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1082						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1083				break;
1084			case 10:
1085				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1086						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1087						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1088						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1089				break;
1090			case 11:
1091				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1092						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1093						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1094						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1095				break;
1096			case 12:
1097				gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
1098				break;
1099			case 13:
1100				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1101						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1102						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1103				break;
1104			case 14:
1105				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1106						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1107						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1108						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1109				break;
1110			case 15:
1111				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1112						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1113						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1114						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1115				break;
1116			case 16:
1117				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1118						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1119						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1120						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1121				break;
1122			case 17:
1123				gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
1124				break;
1125			case 18:
1126				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1127						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1128						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1129						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1130				break;
1131			case 19:
1132				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1133						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1134						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1135				break;
1136			case 20:
1137				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1138						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1139						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1140						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1141				break;
1142			case 21:
1143				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1144						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1145						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1146						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1147				break;
1148			case 22:
1149				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1150						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1151						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1152						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1153				break;
1154			case 23:
1155				gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
1156				break;
1157			case 24:
1158				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1159						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1160						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1161						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1162				break;
1163			case 25:
1164				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1165						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1166						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1167						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1168				break;
1169			case 26:
1170				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1171						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1172						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1173						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1174				break;
1175			case 27:
1176				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1177						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1178						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1179				break;
1180			case 28:
1181				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1182						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1183						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1184						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1185				break;
1186			case 29:
1187				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1188						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1189						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1190						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1191				break;
1192			case 30:
1193				gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
1194				break;
1195			default:
1196				gb_tile_moden = 0;
1197				break;
1198			}
1199			adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
1200			WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
1201		}
1202		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1203			switch (reg_offset) {
1204			case 0:
1205				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1206						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1207						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1208						NUM_BANKS(ADDR_SURF_16_BANK));
1209				break;
1210			case 1:
1211				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1212						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1213						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1214						NUM_BANKS(ADDR_SURF_16_BANK));
1215				break;
1216			case 2:
1217				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1218						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1219						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1220						NUM_BANKS(ADDR_SURF_16_BANK));
1221				break;
1222			case 3:
1223				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1224						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1225						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1226						NUM_BANKS(ADDR_SURF_16_BANK));
1227				break;
1228			case 4:
1229				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1230						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1231						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1232						NUM_BANKS(ADDR_SURF_16_BANK));
1233				break;
1234			case 5:
1235				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1236						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1237						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1238						NUM_BANKS(ADDR_SURF_8_BANK));
1239				break;
1240			case 6:
1241				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1242						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1243						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1244						NUM_BANKS(ADDR_SURF_4_BANK));
1245				break;
1246			case 8:
1247				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1248						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1249						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1250						NUM_BANKS(ADDR_SURF_16_BANK));
1251				break;
1252			case 9:
1253				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1254						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1255						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1256						NUM_BANKS(ADDR_SURF_16_BANK));
1257				break;
1258			case 10:
1259				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1260						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1261						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1262						NUM_BANKS(ADDR_SURF_16_BANK));
1263				break;
1264			case 11:
1265				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1266						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1267						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1268						NUM_BANKS(ADDR_SURF_16_BANK));
1269				break;
1270			case 12:
1271				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1272						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1273						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1274						NUM_BANKS(ADDR_SURF_16_BANK));
1275				break;
1276			case 13:
1277				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1278						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1279						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1280						NUM_BANKS(ADDR_SURF_8_BANK));
1281				break;
1282			case 14:
1283				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1284						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1285						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1286						NUM_BANKS(ADDR_SURF_4_BANK));
1287				break;
1288			default:
1289				gb_tile_moden = 0;
1290				break;
1291			}
1292			adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
1293			WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
1294		}
1295		break;
1296	case CHIP_HAWAII:
1297		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1298			switch (reg_offset) {
1299			case 0:
1300				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1301						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1302						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1303						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1304				break;
1305			case 1:
1306				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1307						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1308						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1309						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1310				break;
1311			case 2:
1312				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1313						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1314						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1315						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1316				break;
1317			case 3:
1318				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1319						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1320						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1321						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1322				break;
1323			case 4:
1324				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1325						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1326						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1327						TILE_SPLIT(split_equal_to_row_size));
1328				break;
1329			case 5:
1330				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1331						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1332						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1333						TILE_SPLIT(split_equal_to_row_size));
1334				break;
1335			case 6:
1336				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1337						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1338						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1339						TILE_SPLIT(split_equal_to_row_size));
1340				break;
1341			case 7:
1342				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1343						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1344						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1345						TILE_SPLIT(split_equal_to_row_size));
1346				break;
1347
1348			case 8:
1349				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1350						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1351				break;
1352			case 9:
1353				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1354						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1355						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1356				break;
1357			case 10:
1358				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1359						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1360						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1361						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1362				break;
1363			case 11:
1364				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1365						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1366						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1367						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1368				break;
1369			case 12:
1370				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1371						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1372						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1373						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1374				break;
1375			case 13:
1376				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1377						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1378						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1379				break;
1380			case 14:
1381				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1382						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1383						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1384						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1385				break;
1386			case 15:
1387				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1388						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1389						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1390						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1391				break;
1392			case 16:
1393				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1394						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1395						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1396						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1397				break;
1398			case 17:
1399				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1400						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1401						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1402						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1403				break;
1404			case 18:
1405				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1406						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1407						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1408						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1409				break;
1410			case 19:
1411				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1412						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1413						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
1414				break;
1415			case 20:
1416				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1417						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1418						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1419						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1420				break;
1421			case 21:
1422				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1423						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1424						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1425						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1426				break;
1427			case 22:
1428				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1429						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1430						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1431						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1432				break;
1433			case 23:
1434				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1435						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1436						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1437						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1438				break;
1439			case 24:
1440				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1441						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1442						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1443						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1444				break;
1445			case 25:
1446				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1447						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1448						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1449						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1450				break;
1451			case 26:
1452				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1453						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1454						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1455						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1456				break;
1457			case 27:
1458				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1459						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1460						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1461				break;
1462			case 28:
1463				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1464						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1465						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1466						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1467				break;
1468			case 29:
1469				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1470						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1471						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1472						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1473				break;
1474			case 30:
1475				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1476						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1477						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1478						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1479				break;
1480			default:
1481				gb_tile_moden = 0;
1482				break;
1483			}
1484			adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
1485			WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
1486		}
1487		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1488			switch (reg_offset) {
1489			case 0:
1490				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1491						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1492						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1493						NUM_BANKS(ADDR_SURF_16_BANK));
1494				break;
1495			case 1:
1496				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1497						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1498						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1499						NUM_BANKS(ADDR_SURF_16_BANK));
1500				break;
1501			case 2:
1502				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1503						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1504						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1505						NUM_BANKS(ADDR_SURF_16_BANK));
1506				break;
1507			case 3:
1508				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1509						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1510						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1511						NUM_BANKS(ADDR_SURF_16_BANK));
1512				break;
1513			case 4:
1514				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1515						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1516						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1517						NUM_BANKS(ADDR_SURF_8_BANK));
1518				break;
1519			case 5:
1520				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1521						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1522						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1523						NUM_BANKS(ADDR_SURF_4_BANK));
1524				break;
1525			case 6:
1526				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1527						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1528						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1529						NUM_BANKS(ADDR_SURF_4_BANK));
1530				break;
1531			case 8:
1532				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1533						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1534						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1535						NUM_BANKS(ADDR_SURF_16_BANK));
1536				break;
1537			case 9:
1538				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1539						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1540						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1541						NUM_BANKS(ADDR_SURF_16_BANK));
1542				break;
1543			case 10:
1544				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1545						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1546						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1547						NUM_BANKS(ADDR_SURF_16_BANK));
1548				break;
1549			case 11:
1550				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1551						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1552						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1553						NUM_BANKS(ADDR_SURF_8_BANK));
1554				break;
1555			case 12:
1556				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1557						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1558						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1559						NUM_BANKS(ADDR_SURF_16_BANK));
1560				break;
1561			case 13:
1562				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1563						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1564						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1565						NUM_BANKS(ADDR_SURF_8_BANK));
1566				break;
1567			case 14:
1568				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1569						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1570						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1571						NUM_BANKS(ADDR_SURF_4_BANK));
1572				break;
1573			default:
1574				gb_tile_moden = 0;
1575				break;
1576			}
1577			adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
1578			WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
1579		}
1580		break;
1581	case CHIP_KABINI:
1582	case CHIP_KAVERI:
1583	case CHIP_MULLINS:
1584	default:
1585		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1586			switch (reg_offset) {
1587			case 0:
1588				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1589						PIPE_CONFIG(ADDR_SURF_P2) |
1590						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1591						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1592				break;
1593			case 1:
1594				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1595						PIPE_CONFIG(ADDR_SURF_P2) |
1596						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1597						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1598				break;
1599			case 2:
1600				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1601						PIPE_CONFIG(ADDR_SURF_P2) |
1602						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1603						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1604				break;
1605			case 3:
1606				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1607						PIPE_CONFIG(ADDR_SURF_P2) |
1608						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1609						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1610				break;
1611			case 4:
1612				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1613						PIPE_CONFIG(ADDR_SURF_P2) |
1614						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1615						TILE_SPLIT(split_equal_to_row_size));
1616				break;
1617			case 5:
1618				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1619						PIPE_CONFIG(ADDR_SURF_P2) |
1620						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1621				break;
1622			case 6:
1623				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1624						PIPE_CONFIG(ADDR_SURF_P2) |
1625						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1626						TILE_SPLIT(split_equal_to_row_size));
1627				break;
1628			case 7:
1629				gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
1630				break;
1631
1632			case 8:
1633				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1634						PIPE_CONFIG(ADDR_SURF_P2));
1635				break;
1636			case 9:
1637				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1638						PIPE_CONFIG(ADDR_SURF_P2) |
1639						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1640				break;
1641			case 10:
1642				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1643						PIPE_CONFIG(ADDR_SURF_P2) |
1644						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1645						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1646				break;
1647			case 11:
1648				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1649						PIPE_CONFIG(ADDR_SURF_P2) |
1650						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1651						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1652				break;
1653			case 12:
1654				gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
1655				break;
1656			case 13:
1657				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1658						PIPE_CONFIG(ADDR_SURF_P2) |
1659						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1660				break;
1661			case 14:
1662				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1663						PIPE_CONFIG(ADDR_SURF_P2) |
1664						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1665						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1666				break;
1667			case 15:
1668				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1669						PIPE_CONFIG(ADDR_SURF_P2) |
1670						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1671						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1672				break;
1673			case 16:
1674				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1675						PIPE_CONFIG(ADDR_SURF_P2) |
1676						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1677						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1678				break;
1679			case 17:
1680				gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
1681				break;
1682			case 18:
1683				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1684						PIPE_CONFIG(ADDR_SURF_P2) |
1685						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1686						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1687				break;
1688			case 19:
1689				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1690						PIPE_CONFIG(ADDR_SURF_P2) |
1691						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
1692				break;
1693			case 20:
1694				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1695						PIPE_CONFIG(ADDR_SURF_P2) |
1696						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1697						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1698				break;
1699			case 21:
1700				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1701						PIPE_CONFIG(ADDR_SURF_P2) |
1702						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1703						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1704				break;
1705			case 22:
1706				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1707						PIPE_CONFIG(ADDR_SURF_P2) |
1708						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1709						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1710				break;
1711			case 23:
1712				gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
1713				break;
1714			case 24:
1715				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1716						PIPE_CONFIG(ADDR_SURF_P2) |
1717						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1718						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1719				break;
1720			case 25:
1721				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1722						PIPE_CONFIG(ADDR_SURF_P2) |
1723						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1724						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1725				break;
1726			case 26:
1727				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1728						PIPE_CONFIG(ADDR_SURF_P2) |
1729						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1730						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1731				break;
1732			case 27:
1733				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1734						PIPE_CONFIG(ADDR_SURF_P2) |
1735						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1736				break;
1737			case 28:
1738				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1739						PIPE_CONFIG(ADDR_SURF_P2) |
1740						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1741						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1742				break;
1743			case 29:
1744				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1745						PIPE_CONFIG(ADDR_SURF_P2) |
1746						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1747						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1748				break;
1749			case 30:
1750				gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
1751				break;
1752			default:
1753				gb_tile_moden = 0;
1754				break;
1755			}
1756			adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
1757			WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
1758		}
1759		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1760			switch (reg_offset) {
1761			case 0:
1762				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1763						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1764						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1765						NUM_BANKS(ADDR_SURF_8_BANK));
1766				break;
1767			case 1:
1768				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1769						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1770						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1771						NUM_BANKS(ADDR_SURF_8_BANK));
1772				break;
1773			case 2:
1774				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1775						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1776						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1777						NUM_BANKS(ADDR_SURF_8_BANK));
1778				break;
1779			case 3:
1780				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1781						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1782						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1783						NUM_BANKS(ADDR_SURF_8_BANK));
1784				break;
1785			case 4:
1786				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1787						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1788						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1789						NUM_BANKS(ADDR_SURF_8_BANK));
1790				break;
1791			case 5:
1792				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1793						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1794						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1795						NUM_BANKS(ADDR_SURF_8_BANK));
1796				break;
1797			case 6:
1798				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1799						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1800						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1801						NUM_BANKS(ADDR_SURF_8_BANK));
1802				break;
1803			case 8:
1804				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1805						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1806						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1807						NUM_BANKS(ADDR_SURF_16_BANK));
1808				break;
1809			case 9:
1810				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1811						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1812						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1813						NUM_BANKS(ADDR_SURF_16_BANK));
1814				break;
1815			case 10:
1816				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1817						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1818						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1819						NUM_BANKS(ADDR_SURF_16_BANK));
1820				break;
1821			case 11:
1822				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1823						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1824						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1825						NUM_BANKS(ADDR_SURF_16_BANK));
1826				break;
1827			case 12:
1828				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1829						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1830						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1831						NUM_BANKS(ADDR_SURF_16_BANK));
1832				break;
1833			case 13:
1834				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1835						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1836						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1837						NUM_BANKS(ADDR_SURF_16_BANK));
1838				break;
1839			case 14:
1840				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1841						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1842						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1843						NUM_BANKS(ADDR_SURF_8_BANK));
1844				break;
1845			default:
1846				gb_tile_moden = 0;
1847				break;
1848			}
1849			adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
1850			WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
1851		}
1852		break;
1853	}
1854}
1855
1856/**
1857 * gfx_v7_0_select_se_sh - select which SE, SH to address
1858 *
1859 * @adev: amdgpu_device pointer
1860 * @se_num: shader engine to address
1861 * @sh_num: sh block to address
1862 *
1863 * Select which SE, SH combinations to address. Certain
1864 * registers are instanced per SE or SH.  0xffffffff means
1865 * broadcast to all SEs or SHs (CIK).
1866 */
1867void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
1868{
1869	u32 data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK;
1870
1871	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1872		data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
1873			GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK;
1874	else if (se_num == 0xffffffff)
1875		data |= GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK |
1876			(sh_num << GRBM_GFX_INDEX__SH_INDEX__SHIFT);
1877	else if (sh_num == 0xffffffff)
1878		data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
1879			(se_num << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
1880	else
1881		data |= (sh_num << GRBM_GFX_INDEX__SH_INDEX__SHIFT) |
1882			(se_num << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
1883	WREG32(mmGRBM_GFX_INDEX, data);
1884}
1885
1886/**
1887 * gfx_v7_0_create_bitmask - create a bitmask
1888 *
1889 * @bit_width: length of the mask
1890 *
1891 * create a variable length bit mask (CIK).
1892 * Returns the bitmask.
1893 */
1894static u32 gfx_v7_0_create_bitmask(u32 bit_width)
1895{
1896	u32 i, mask = 0;
1897
1898	for (i = 0; i < bit_width; i++) {
1899		mask <<= 1;
1900		mask |= 1;
1901	}
1902	return mask;
1903}
1904
1905/**
1906 * gfx_v7_0_get_rb_disabled - computes the mask of disabled RBs
1907 *
1908 * @adev: amdgpu_device pointer
1909 * @max_rb_num: max RBs (render backends) for the asic
1910 * @se_num: number of SEs (shader engines) for the asic
1911 * @sh_per_se: number of SH blocks per SE for the asic
1912 *
1913 * Calculates the bitmask of disabled RBs (CIK).
1914 * Returns the disabled RB bitmask.
1915 */
1916static u32 gfx_v7_0_get_rb_disabled(struct amdgpu_device *adev,
1917				    u32 max_rb_num_per_se,
1918				    u32 sh_per_se)
1919{
1920	u32 data, mask;
1921
1922	data = RREG32(mmCC_RB_BACKEND_DISABLE);
1923	if (data & 1)
1924		data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1925	else
1926		data = 0;
1927
1928	data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
1929
1930	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1931
1932	mask = gfx_v7_0_create_bitmask(max_rb_num_per_se / sh_per_se);
1933
1934	return data & mask;
1935}
1936
1937/**
1938 * gfx_v7_0_setup_rb - setup the RBs on the asic
1939 *
1940 * @adev: amdgpu_device pointer
1941 * @se_num: number of SEs (shader engines) for the asic
1942 * @sh_per_se: number of SH blocks per SE for the asic
1943 * @max_rb_num: max RBs (render backends) for the asic
1944 *
1945 * Configures per-SE/SH RB registers (CIK).
1946 */
1947static void gfx_v7_0_setup_rb(struct amdgpu_device *adev,
1948			      u32 se_num, u32 sh_per_se,
1949			      u32 max_rb_num_per_se)
1950{
1951	int i, j;
1952	u32 data, mask;
1953	u32 disabled_rbs = 0;
1954	u32 enabled_rbs = 0;
1955
1956	mutex_lock(&adev->grbm_idx_mutex);
1957	for (i = 0; i < se_num; i++) {
1958		for (j = 0; j < sh_per_se; j++) {
1959			gfx_v7_0_select_se_sh(adev, i, j);
1960			data = gfx_v7_0_get_rb_disabled(adev, max_rb_num_per_se, sh_per_se);
1961			if (adev->asic_type == CHIP_HAWAII)
1962				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
1963			else
1964				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1965		}
1966	}
1967	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
1968	mutex_unlock(&adev->grbm_idx_mutex);
1969
1970	mask = 1;
1971	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
1972		if (!(disabled_rbs & mask))
1973			enabled_rbs |= mask;
1974		mask <<= 1;
1975	}
1976
1977	adev->gfx.config.backend_enable_mask = enabled_rbs;
1978
1979	mutex_lock(&adev->grbm_idx_mutex);
1980	for (i = 0; i < se_num; i++) {
1981		gfx_v7_0_select_se_sh(adev, i, 0xffffffff);
1982		data = 0;
1983		for (j = 0; j < sh_per_se; j++) {
1984			switch (enabled_rbs & 3) {
1985			case 0:
1986				if (j == 0)
1987					data |= (RASTER_CONFIG_RB_MAP_3 <<
1988						PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
1989				else
1990					data |= (RASTER_CONFIG_RB_MAP_0 <<
1991						PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
1992				break;
1993			case 1:
1994				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1995				break;
1996			case 2:
1997				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1998				break;
1999			case 3:
2000			default:
2001				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2002				break;
2003			}
2004			enabled_rbs >>= 2;
2005		}
2006		WREG32(mmPA_SC_RASTER_CONFIG, data);
2007	}
2008	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2009	mutex_unlock(&adev->grbm_idx_mutex);
2010}
2011
2012/**
2013 * gmc_v7_0_init_compute_vmid - gart enable
2014 *
2015 * @rdev: amdgpu_device pointer
2016 *
2017 * Initialize compute vmid sh_mem registers
2018 *
2019 */
2020#define DEFAULT_SH_MEM_BASES	(0x6000)
2021#define FIRST_COMPUTE_VMID	(8)
2022#define LAST_COMPUTE_VMID	(16)
2023static void gmc_v7_0_init_compute_vmid(struct amdgpu_device *adev)
2024{
2025	int i;
2026	uint32_t sh_mem_config;
2027	uint32_t sh_mem_bases;
2028
2029	/*
2030	 * Configure apertures:
2031	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2032	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2033	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2034	*/
2035	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2036	sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2037			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2038	sh_mem_config |= MTYPE_NONCACHED << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT;
2039	mutex_lock(&adev->srbm_mutex);
2040	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2041		cik_srbm_select(adev, 0, 0, 0, i);
2042		/* CP and shaders */
2043		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
2044		WREG32(mmSH_MEM_APE1_BASE, 1);
2045		WREG32(mmSH_MEM_APE1_LIMIT, 0);
2046		WREG32(mmSH_MEM_BASES, sh_mem_bases);
2047	}
2048	cik_srbm_select(adev, 0, 0, 0, 0);
2049	mutex_unlock(&adev->srbm_mutex);
2050}
2051
2052/**
2053 * gfx_v7_0_gpu_init - setup the 3D engine
2054 *
2055 * @adev: amdgpu_device pointer
2056 *
2057 * Configures the 3D engine and tiling configuration
2058 * registers so that the 3D engine is usable.
2059 */
2060static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
2061{
2062	u32 gb_addr_config;
2063	u32 mc_shared_chmap, mc_arb_ramcfg;
2064	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
2065	u32 sh_mem_cfg;
2066	u32 tmp;
2067	int i;
2068
2069	switch (adev->asic_type) {
2070	case CHIP_BONAIRE:
2071		adev->gfx.config.max_shader_engines = 2;
2072		adev->gfx.config.max_tile_pipes = 4;
2073		adev->gfx.config.max_cu_per_sh = 7;
2074		adev->gfx.config.max_sh_per_se = 1;
2075		adev->gfx.config.max_backends_per_se = 2;
2076		adev->gfx.config.max_texture_channel_caches = 4;
2077		adev->gfx.config.max_gprs = 256;
2078		adev->gfx.config.max_gs_threads = 32;
2079		adev->gfx.config.max_hw_contexts = 8;
2080
2081		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2082		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2083		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2084		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2085		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2086		break;
2087	case CHIP_HAWAII:
2088		adev->gfx.config.max_shader_engines = 4;
2089		adev->gfx.config.max_tile_pipes = 16;
2090		adev->gfx.config.max_cu_per_sh = 11;
2091		adev->gfx.config.max_sh_per_se = 1;
2092		adev->gfx.config.max_backends_per_se = 4;
2093		adev->gfx.config.max_texture_channel_caches = 16;
2094		adev->gfx.config.max_gprs = 256;
2095		adev->gfx.config.max_gs_threads = 32;
2096		adev->gfx.config.max_hw_contexts = 8;
2097
2098		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2099		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2100		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2101		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2102		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
2103		break;
2104	case CHIP_KAVERI:
2105		adev->gfx.config.max_shader_engines = 1;
2106		adev->gfx.config.max_tile_pipes = 4;
2107		if ((adev->pdev->device == 0x1304) ||
2108		    (adev->pdev->device == 0x1305) ||
2109		    (adev->pdev->device == 0x130C) ||
2110		    (adev->pdev->device == 0x130F) ||
2111		    (adev->pdev->device == 0x1310) ||
2112		    (adev->pdev->device == 0x1311) ||
2113		    (adev->pdev->device == 0x131C)) {
2114			adev->gfx.config.max_cu_per_sh = 8;
2115			adev->gfx.config.max_backends_per_se = 2;
2116		} else if ((adev->pdev->device == 0x1309) ||
2117			   (adev->pdev->device == 0x130A) ||
2118			   (adev->pdev->device == 0x130D) ||
2119			   (adev->pdev->device == 0x1313) ||
2120			   (adev->pdev->device == 0x131D)) {
2121			adev->gfx.config.max_cu_per_sh = 6;
2122			adev->gfx.config.max_backends_per_se = 2;
2123		} else if ((adev->pdev->device == 0x1306) ||
2124			   (adev->pdev->device == 0x1307) ||
2125			   (adev->pdev->device == 0x130B) ||
2126			   (adev->pdev->device == 0x130E) ||
2127			   (adev->pdev->device == 0x1315) ||
2128			   (adev->pdev->device == 0x131B)) {
2129			adev->gfx.config.max_cu_per_sh = 4;
2130			adev->gfx.config.max_backends_per_se = 1;
2131		} else {
2132			adev->gfx.config.max_cu_per_sh = 3;
2133			adev->gfx.config.max_backends_per_se = 1;
2134		}
2135		adev->gfx.config.max_sh_per_se = 1;
2136		adev->gfx.config.max_texture_channel_caches = 4;
2137		adev->gfx.config.max_gprs = 256;
2138		adev->gfx.config.max_gs_threads = 16;
2139		adev->gfx.config.max_hw_contexts = 8;
2140
2141		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2142		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2143		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2144		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2145		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2146		break;
2147	case CHIP_KABINI:
2148	case CHIP_MULLINS:
2149	default:
2150		adev->gfx.config.max_shader_engines = 1;
2151		adev->gfx.config.max_tile_pipes = 2;
2152		adev->gfx.config.max_cu_per_sh = 2;
2153		adev->gfx.config.max_sh_per_se = 1;
2154		adev->gfx.config.max_backends_per_se = 1;
2155		adev->gfx.config.max_texture_channel_caches = 2;
2156		adev->gfx.config.max_gprs = 256;
2157		adev->gfx.config.max_gs_threads = 16;
2158		adev->gfx.config.max_hw_contexts = 8;
2159
2160		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2161		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2162		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2163		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2164		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2165		break;
2166	}
2167
2168	WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT));
2169
2170	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
2171	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
2172	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
2173
2174	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
2175	adev->gfx.config.mem_max_burst_length_bytes = 256;
2176	if (adev->flags & AMD_IS_APU) {
2177		/* Get memory bank mapping mode. */
2178		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
2179		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2180		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2181
2182		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
2183		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2184		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2185
2186		/* Validate settings in case only one DIMM installed. */
2187		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
2188			dimm00_addr_map = 0;
2189		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
2190			dimm01_addr_map = 0;
2191		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
2192			dimm10_addr_map = 0;
2193		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
2194			dimm11_addr_map = 0;
2195
2196		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
2197		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
2198		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
2199			adev->gfx.config.mem_row_size_in_kb = 2;
2200		else
2201			adev->gfx.config.mem_row_size_in_kb = 1;
2202	} else {
2203		tmp = (mc_arb_ramcfg & MC_ARB_RAMCFG__NOOFCOLS_MASK) >> MC_ARB_RAMCFG__NOOFCOLS__SHIFT;
2204		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2205		if (adev->gfx.config.mem_row_size_in_kb > 4)
2206			adev->gfx.config.mem_row_size_in_kb = 4;
2207	}
2208	/* XXX use MC settings? */
2209	adev->gfx.config.shader_engine_tile_size = 32;
2210	adev->gfx.config.num_gpus = 1;
2211	adev->gfx.config.multi_gpu_tile_size = 64;
2212
2213	/* fix up row size */
2214	gb_addr_config &= ~GB_ADDR_CONFIG__ROW_SIZE_MASK;
2215	switch (adev->gfx.config.mem_row_size_in_kb) {
2216	case 1:
2217	default:
2218		gb_addr_config |= (0 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
2219		break;
2220	case 2:
2221		gb_addr_config |= (1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
2222		break;
2223	case 4:
2224		gb_addr_config |= (2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
2225		break;
2226	}
2227	adev->gfx.config.gb_addr_config = gb_addr_config;
2228
2229	WREG32(mmGB_ADDR_CONFIG, gb_addr_config);
2230	WREG32(mmHDP_ADDR_CONFIG, gb_addr_config);
2231	WREG32(mmDMIF_ADDR_CALC, gb_addr_config);
2232	WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2233	WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
2234	WREG32(mmUVD_UDEC_ADDR_CONFIG, gb_addr_config);
2235	WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2236	WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2237
2238	gfx_v7_0_tiling_mode_table_init(adev);
2239
2240	gfx_v7_0_setup_rb(adev, adev->gfx.config.max_shader_engines,
2241			  adev->gfx.config.max_sh_per_se,
2242			  adev->gfx.config.max_backends_per_se);
2243
2244	/* set HW defaults for 3D engine */
2245	WREG32(mmCP_MEQ_THRESHOLDS,
2246			(0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
2247			(0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
2248
2249	mutex_lock(&adev->grbm_idx_mutex);
2250	/*
2251	 * making sure that the following register writes will be broadcasted
2252	 * to all the shaders
2253	 */
2254	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2255
2256	/* XXX SH_MEM regs */
2257	/* where to put LDS, scratch, GPUVM in FSA64 space */
2258	sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2259				   SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2260
2261	mutex_lock(&adev->srbm_mutex);
2262	for (i = 0; i < 16; i++) {
2263		cik_srbm_select(adev, 0, 0, 0, i);
2264		/* CP and shaders */
2265		WREG32(mmSH_MEM_CONFIG, sh_mem_cfg);
2266		WREG32(mmSH_MEM_APE1_BASE, 1);
2267		WREG32(mmSH_MEM_APE1_LIMIT, 0);
2268		WREG32(mmSH_MEM_BASES, 0);
2269	}
2270	cik_srbm_select(adev, 0, 0, 0, 0);
2271	mutex_unlock(&adev->srbm_mutex);
2272
2273	gmc_v7_0_init_compute_vmid(adev);
2274
2275	WREG32(mmSX_DEBUG_1, 0x20);
2276
2277	WREG32(mmTA_CNTL_AUX, 0x00010000);
2278
2279	tmp = RREG32(mmSPI_CONFIG_CNTL);
2280	tmp |= 0x03000000;
2281	WREG32(mmSPI_CONFIG_CNTL, tmp);
2282
2283	WREG32(mmSQ_CONFIG, 1);
2284
2285	WREG32(mmDB_DEBUG, 0);
2286
2287	tmp = RREG32(mmDB_DEBUG2) & ~0xf00fffff;
2288	tmp |= 0x00000400;
2289	WREG32(mmDB_DEBUG2, tmp);
2290
2291	tmp = RREG32(mmDB_DEBUG3) & ~0x0002021c;
2292	tmp |= 0x00020200;
2293	WREG32(mmDB_DEBUG3, tmp);
2294
2295	tmp = RREG32(mmCB_HW_CONTROL) & ~0x00010000;
2296	tmp |= 0x00018208;
2297	WREG32(mmCB_HW_CONTROL, tmp);
2298
2299	WREG32(mmSPI_CONFIG_CNTL_1, (4 << SPI_CONFIG_CNTL_1__VTX_DONE_DELAY__SHIFT));
2300
2301	WREG32(mmPA_SC_FIFO_SIZE,
2302		((adev->gfx.config.sc_prim_fifo_size_frontend << PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2303		(adev->gfx.config.sc_prim_fifo_size_backend << PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2304		(adev->gfx.config.sc_hiz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2305		(adev->gfx.config.sc_earlyz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)));
2306
2307	WREG32(mmVGT_NUM_INSTANCES, 1);
2308
2309	WREG32(mmCP_PERFMON_CNTL, 0);
2310
2311	WREG32(mmSQ_CONFIG, 0);
2312
2313	WREG32(mmPA_SC_FORCE_EOV_MAX_CNTS,
2314		((4095 << PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_CLK_CNT__SHIFT) |
2315		(255 << PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_REZ_CNT__SHIFT)));
2316
2317	WREG32(mmVGT_CACHE_INVALIDATION,
2318		(VC_AND_TC << VGT_CACHE_INVALIDATION__CACHE_INVALIDATION__SHIFT) |
2319		(ES_AND_GS_AUTO << VGT_CACHE_INVALIDATION__AUTO_INVLD_EN__SHIFT));
2320
2321	WREG32(mmVGT_GS_VERTEX_REUSE, 16);
2322	WREG32(mmPA_SC_LINE_STIPPLE_STATE, 0);
2323
2324	WREG32(mmPA_CL_ENHANCE, PA_CL_ENHANCE__CLIP_VTX_REORDER_ENA_MASK |
2325			(3 << PA_CL_ENHANCE__NUM_CLIP_SEQ__SHIFT));
2326	WREG32(mmPA_SC_ENHANCE, PA_SC_ENHANCE__ENABLE_PA_SC_OUT_OF_ORDER_MASK);
2327	mutex_unlock(&adev->grbm_idx_mutex);
2328
2329	udelay(50);
2330}
2331
2332/*
2333 * GPU scratch registers helpers function.
2334 */
2335/**
2336 * gfx_v7_0_scratch_init - setup driver info for CP scratch regs
2337 *
2338 * @adev: amdgpu_device pointer
2339 *
2340 * Set up the number and offset of the CP scratch registers.
2341 * NOTE: use of CP scratch registers is a legacy inferface and
2342 * is not used by default on newer asics (r6xx+).  On newer asics,
2343 * memory buffers are used for fences rather than scratch regs.
2344 */
2345static void gfx_v7_0_scratch_init(struct amdgpu_device *adev)
2346{
2347	int i;
2348
2349	adev->gfx.scratch.num_reg = 7;
2350	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
2351	for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
2352		adev->gfx.scratch.free[i] = true;
2353		adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
2354	}
2355}
2356
2357/**
2358 * gfx_v7_0_ring_test_ring - basic gfx ring test
2359 *
2360 * @adev: amdgpu_device pointer
2361 * @ring: amdgpu_ring structure holding ring information
2362 *
2363 * Allocate a scratch register and write to it using the gfx ring (CIK).
2364 * Provides a basic gfx ring test to verify that the ring is working.
2365 * Used by gfx_v7_0_cp_gfx_resume();
2366 * Returns 0 on success, error on failure.
2367 */
2368static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
2369{
2370	struct amdgpu_device *adev = ring->adev;
2371	uint32_t scratch;
2372	uint32_t tmp = 0;
2373	unsigned i;
2374	int r;
2375
2376	r = amdgpu_gfx_scratch_get(adev, &scratch);
2377	if (r) {
2378		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
2379		return r;
2380	}
2381	WREG32(scratch, 0xCAFEDEAD);
2382	r = amdgpu_ring_lock(ring, 3);
2383	if (r) {
2384		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r);
2385		amdgpu_gfx_scratch_free(adev, scratch);
2386		return r;
2387	}
2388	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2389	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
2390	amdgpu_ring_write(ring, 0xDEADBEEF);
2391	amdgpu_ring_unlock_commit(ring);
2392
2393	for (i = 0; i < adev->usec_timeout; i++) {
2394		tmp = RREG32(scratch);
2395		if (tmp == 0xDEADBEEF)
2396			break;
2397		DRM_UDELAY(1);
2398	}
2399	if (i < adev->usec_timeout) {
2400		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2401	} else {
2402		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2403			  ring->idx, scratch, tmp);
2404		r = -EINVAL;
2405	}
2406	amdgpu_gfx_scratch_free(adev, scratch);
2407	return r;
2408}
2409
2410/**
2411 * gfx_v7_0_ring_emit_hdp - emit an hdp flush on the cp
2412 *
2413 * @adev: amdgpu_device pointer
2414 * @ridx: amdgpu ring index
2415 *
2416 * Emits an hdp flush on the cp.
2417 */
2418static void gfx_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
2419{
2420	u32 ref_and_mask;
2421	int usepfp = ring->type == AMDGPU_RING_TYPE_COMPUTE ? 0 : 1;
2422
2423	if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
2424		switch (ring->me) {
2425		case 1:
2426			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
2427			break;
2428		case 2:
2429			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
2430			break;
2431		default:
2432			return;
2433		}
2434	} else {
2435		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
2436	}
2437
2438	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
2439	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
2440				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
2441				 WAIT_REG_MEM_ENGINE(usepfp)));   /* pfp or me */
2442	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
2443	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
2444	amdgpu_ring_write(ring, ref_and_mask);
2445	amdgpu_ring_write(ring, ref_and_mask);
2446	amdgpu_ring_write(ring, 0x20); /* poll interval */
2447}
2448
2449/**
2450 * gfx_v7_0_ring_emit_fence_gfx - emit a fence on the gfx ring
2451 *
2452 * @adev: amdgpu_device pointer
2453 * @fence: amdgpu fence object
2454 *
2455 * Emits a fence sequnce number on the gfx ring and flushes
2456 * GPU caches.
2457 */
2458static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
2459					 u64 seq, unsigned flags)
2460{
2461	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
2462	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
2463	/* Workaround for cache flush problems. First send a dummy EOP
2464	 * event down the pipe with seq one below.
2465	 */
2466	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2467	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
2468				 EOP_TC_ACTION_EN |
2469				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2470				 EVENT_INDEX(5)));
2471	amdgpu_ring_write(ring, addr & 0xfffffffc);
2472	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
2473				DATA_SEL(1) | INT_SEL(0));
2474	amdgpu_ring_write(ring, lower_32_bits(seq - 1));
2475	amdgpu_ring_write(ring, upper_32_bits(seq - 1));
2476
2477	/* Then send the real EOP event down the pipe. */
2478	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2479	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
2480				 EOP_TC_ACTION_EN |
2481				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2482				 EVENT_INDEX(5)));
2483	amdgpu_ring_write(ring, addr & 0xfffffffc);
2484	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
2485				DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
2486	amdgpu_ring_write(ring, lower_32_bits(seq));
2487	amdgpu_ring_write(ring, upper_32_bits(seq));
2488}
2489
2490/**
2491 * gfx_v7_0_ring_emit_fence_compute - emit a fence on the compute ring
2492 *
2493 * @adev: amdgpu_device pointer
2494 * @fence: amdgpu fence object
2495 *
2496 * Emits a fence sequnce number on the compute ring and flushes
2497 * GPU caches.
2498 */
2499static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
2500					     u64 addr, u64 seq,
2501					     unsigned flags)
2502{
2503	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
2504	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
2505
2506	/* RELEASE_MEM - flush caches, send int */
2507	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2508	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
2509				 EOP_TC_ACTION_EN |
2510				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2511				 EVENT_INDEX(5)));
2512	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
2513	amdgpu_ring_write(ring, addr & 0xfffffffc);
2514	amdgpu_ring_write(ring, upper_32_bits(addr));
2515	amdgpu_ring_write(ring, lower_32_bits(seq));
2516	amdgpu_ring_write(ring, upper_32_bits(seq));
2517}
2518
2519/**
2520 * gfx_v7_0_ring_emit_semaphore - emit a semaphore on the CP ring
2521 *
2522 * @ring: amdgpu ring buffer object
2523 * @semaphore: amdgpu semaphore object
2524 * @emit_wait: Is this a sempahore wait?
2525 *
2526 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
2527 * from running ahead of semaphore waits.
2528 */
2529static bool gfx_v7_0_ring_emit_semaphore(struct amdgpu_ring *ring,
2530					 struct amdgpu_semaphore *semaphore,
2531					 bool emit_wait)
2532{
2533	uint64_t addr = semaphore->gpu_addr;
2534	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
2535
2536	amdgpu_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2537	amdgpu_ring_write(ring, addr & 0xffffffff);
2538	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
2539
2540	if (emit_wait && (ring->type == AMDGPU_RING_TYPE_GFX)) {
2541		/* Prevent the PFP from running ahead of the semaphore wait */
2542		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
2543		amdgpu_ring_write(ring, 0x0);
2544	}
2545
2546	return true;
2547}
2548
2549/*
2550 * IB stuff
2551 */
2552/**
2553 * gfx_v7_0_ring_emit_ib - emit an IB (Indirect Buffer) on the ring
2554 *
2555 * @ring: amdgpu_ring structure holding ring information
2556 * @ib: amdgpu indirect buffer object
2557 *
2558 * Emits an DE (drawing engine) or CE (constant engine) IB
2559 * on the gfx ring.  IBs are usually generated by userspace
2560 * acceleration drivers and submitted to the kernel for
2561 * sheduling on the ring.  This function schedules the IB
2562 * on the gfx ring for execution by the GPU.
2563 */
2564static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
2565				  struct amdgpu_ib *ib)
2566{
2567	bool need_ctx_switch = ring->current_ctx != ib->ctx;
2568	u32 header, control = 0;
2569	u32 next_rptr = ring->wptr + 5;
2570
2571	/* drop the CE preamble IB for the same context */
2572	if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
2573		return;
2574
2575	if (need_ctx_switch)
2576		next_rptr += 2;
2577
2578	next_rptr += 4;
2579	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2580	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
2581	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2582	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2583	amdgpu_ring_write(ring, next_rptr);
2584
2585	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
2586	if (need_ctx_switch) {
2587		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2588		amdgpu_ring_write(ring, 0);
2589	}
2590
2591	if (ib->flags & AMDGPU_IB_FLAG_CE)
2592		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2593	else
2594		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2595
2596	control |= ib->length_dw |
2597		(ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
2598
2599	amdgpu_ring_write(ring, header);
2600	amdgpu_ring_write(ring,
2601#ifdef __BIG_ENDIAN
2602			  (2 << 0) |
2603#endif
2604			  (ib->gpu_addr & 0xFFFFFFFC));
2605	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2606	amdgpu_ring_write(ring, control);
2607}
2608
2609static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
2610				  struct amdgpu_ib *ib)
2611{
2612	u32 header, control = 0;
2613	u32 next_rptr = ring->wptr + 5;
2614
2615	control |= INDIRECT_BUFFER_VALID;
2616	next_rptr += 4;
2617	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2618	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
2619	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2620	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2621	amdgpu_ring_write(ring, next_rptr);
2622
2623	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2624
2625	control |= ib->length_dw |
2626			   (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
2627
2628	amdgpu_ring_write(ring, header);
2629	amdgpu_ring_write(ring,
2630#ifdef __BIG_ENDIAN
2631					  (2 << 0) |
2632#endif
2633					  (ib->gpu_addr & 0xFFFFFFFC));
2634	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2635	amdgpu_ring_write(ring, control);
2636}
2637
2638/**
2639 * gfx_v7_0_ring_test_ib - basic ring IB test
2640 *
2641 * @ring: amdgpu_ring structure holding ring information
2642 *
2643 * Allocate an IB and execute it on the gfx ring (CIK).
2644 * Provides a basic gfx ring test to verify that IBs are working.
2645 * Returns 0 on success, error on failure.
2646 */
2647static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring)
2648{
2649	struct amdgpu_device *adev = ring->adev;
2650	struct amdgpu_ib ib;
2651	struct fence *f = NULL;
2652	uint32_t scratch;
2653	uint32_t tmp = 0;
2654	unsigned i;
2655	int r;
2656
2657	r = amdgpu_gfx_scratch_get(adev, &scratch);
2658	if (r) {
2659		DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
2660		return r;
2661	}
2662	WREG32(scratch, 0xCAFEDEAD);
2663	memset(&ib, 0, sizeof(ib));
2664	r = amdgpu_ib_get(ring, NULL, 256, &ib);
2665	if (r) {
2666		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
2667		goto err1;
2668	}
2669	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2670	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
2671	ib.ptr[2] = 0xDEADBEEF;
2672	ib.length_dw = 3;
2673
2674	r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
2675						 AMDGPU_FENCE_OWNER_UNDEFINED,
2676						 &f);
2677	if (r)
2678		goto err2;
2679
2680	r = fence_wait(f, false);
2681	if (r) {
2682		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
2683		goto err2;
2684	}
2685	for (i = 0; i < adev->usec_timeout; i++) {
2686		tmp = RREG32(scratch);
2687		if (tmp == 0xDEADBEEF)
2688			break;
2689		DRM_UDELAY(1);
2690	}
2691	if (i < adev->usec_timeout) {
2692		DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
2693			 ring->idx, i);
2694		goto err2;
2695	} else {
2696		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
2697			  scratch, tmp);
2698		r = -EINVAL;
2699	}
2700
2701err2:
2702	fence_put(f);
2703	amdgpu_ib_free(adev, &ib);
2704err1:
2705	amdgpu_gfx_scratch_free(adev, scratch);
2706	return r;
2707}
2708
2709/*
2710 * CP.
2711 * On CIK, gfx and compute now have independant command processors.
2712 *
2713 * GFX
2714 * Gfx consists of a single ring and can process both gfx jobs and
2715 * compute jobs.  The gfx CP consists of three microengines (ME):
2716 * PFP - Pre-Fetch Parser
2717 * ME - Micro Engine
2718 * CE - Constant Engine
2719 * The PFP and ME make up what is considered the Drawing Engine (DE).
2720 * The CE is an asynchronous engine used for updating buffer desciptors
2721 * used by the DE so that they can be loaded into cache in parallel
2722 * while the DE is processing state update packets.
2723 *
2724 * Compute
2725 * The compute CP consists of two microengines (ME):
2726 * MEC1 - Compute MicroEngine 1
2727 * MEC2 - Compute MicroEngine 2
2728 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
2729 * The queues are exposed to userspace and are programmed directly
2730 * by the compute runtime.
2731 */
2732/**
2733 * gfx_v7_0_cp_gfx_enable - enable/disable the gfx CP MEs
2734 *
2735 * @adev: amdgpu_device pointer
2736 * @enable: enable or disable the MEs
2737 *
2738 * Halts or unhalts the gfx MEs.
2739 */
2740static void gfx_v7_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2741{
2742	int i;
2743
2744	if (enable) {
2745		WREG32(mmCP_ME_CNTL, 0);
2746	} else {
2747		WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK));
2748		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2749			adev->gfx.gfx_ring[i].ready = false;
2750	}
2751	udelay(50);
2752}
2753
2754/**
2755 * gfx_v7_0_cp_gfx_load_microcode - load the gfx CP ME ucode
2756 *
2757 * @adev: amdgpu_device pointer
2758 *
2759 * Loads the gfx PFP, ME, and CE ucode.
2760 * Returns 0 for success, -EINVAL if the ucode is not available.
2761 */
2762static int gfx_v7_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2763{
2764	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2765	const struct gfx_firmware_header_v1_0 *ce_hdr;
2766	const struct gfx_firmware_header_v1_0 *me_hdr;
2767	const __le32 *fw_data;
2768	unsigned i, fw_size;
2769
2770	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2771		return -EINVAL;
2772
2773	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
2774	ce_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
2775	me_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
2776
2777	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2778	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2779	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2780	adev->gfx.pfp_fw_version = le32_to_cpu(pfp_hdr->header.ucode_version);
2781	adev->gfx.ce_fw_version = le32_to_cpu(ce_hdr->header.ucode_version);
2782	adev->gfx.me_fw_version = le32_to_cpu(me_hdr->header.ucode_version);
2783	adev->gfx.me_feature_version = le32_to_cpu(me_hdr->ucode_feature_version);
2784	adev->gfx.ce_feature_version = le32_to_cpu(ce_hdr->ucode_feature_version);
2785	adev->gfx.pfp_feature_version = le32_to_cpu(pfp_hdr->ucode_feature_version);
2786
2787	gfx_v7_0_cp_gfx_enable(adev, false);
2788
2789	/* PFP */
2790	fw_data = (const __le32 *)
2791		(adev->gfx.pfp_fw->data +
2792		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2793	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2794	WREG32(mmCP_PFP_UCODE_ADDR, 0);
2795	for (i = 0; i < fw_size; i++)
2796		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2797	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2798
2799	/* CE */
2800	fw_data = (const __le32 *)
2801		(adev->gfx.ce_fw->data +
2802		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2803	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2804	WREG32(mmCP_CE_UCODE_ADDR, 0);
2805	for (i = 0; i < fw_size; i++)
2806		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2807	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2808
2809	/* ME */
2810	fw_data = (const __le32 *)
2811		(adev->gfx.me_fw->data +
2812		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2813	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2814	WREG32(mmCP_ME_RAM_WADDR, 0);
2815	for (i = 0; i < fw_size; i++)
2816		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2817	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2818
2819	return 0;
2820}
2821
2822/**
2823 * gfx_v7_0_cp_gfx_start - start the gfx ring
2824 *
2825 * @adev: amdgpu_device pointer
2826 *
2827 * Enables the ring and loads the clear state context and other
2828 * packets required to init the ring.
2829 * Returns 0 for success, error for failure.
2830 */
2831static int gfx_v7_0_cp_gfx_start(struct amdgpu_device *adev)
2832{
2833	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2834	const struct cs_section_def *sect = NULL;
2835	const struct cs_extent_def *ext = NULL;
2836	int r, i;
2837
2838	/* init the CP */
2839	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2840	WREG32(mmCP_ENDIAN_SWAP, 0);
2841	WREG32(mmCP_DEVICE_ID, 1);
2842
2843	gfx_v7_0_cp_gfx_enable(adev, true);
2844
2845	r = amdgpu_ring_lock(ring, gfx_v7_0_get_csb_size(adev) + 8);
2846	if (r) {
2847		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2848		return r;
2849	}
2850
2851	/* init the CE partitions.  CE only used for gfx on CIK */
2852	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2853	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2854	amdgpu_ring_write(ring, 0x8000);
2855	amdgpu_ring_write(ring, 0x8000);
2856
2857	/* clear state buffer */
2858	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2859	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2860
2861	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2862	amdgpu_ring_write(ring, 0x80000000);
2863	amdgpu_ring_write(ring, 0x80000000);
2864
2865	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
2866		for (ext = sect->section; ext->extent != NULL; ++ext) {
2867			if (sect->id == SECT_CONTEXT) {
2868				amdgpu_ring_write(ring,
2869						  PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
2870				amdgpu_ring_write(ring, ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2871				for (i = 0; i < ext->reg_count; i++)
2872					amdgpu_ring_write(ring, ext->extent[i]);
2873			}
2874		}
2875	}
2876
2877	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2878	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
2879	switch (adev->asic_type) {
2880	case CHIP_BONAIRE:
2881		amdgpu_ring_write(ring, 0x16000012);
2882		amdgpu_ring_write(ring, 0x00000000);
2883		break;
2884	case CHIP_KAVERI:
2885		amdgpu_ring_write(ring, 0x00000000); /* XXX */
2886		amdgpu_ring_write(ring, 0x00000000);
2887		break;
2888	case CHIP_KABINI:
2889	case CHIP_MULLINS:
2890		amdgpu_ring_write(ring, 0x00000000); /* XXX */
2891		amdgpu_ring_write(ring, 0x00000000);
2892		break;
2893	case CHIP_HAWAII:
2894		amdgpu_ring_write(ring, 0x3a00161a);
2895		amdgpu_ring_write(ring, 0x0000002e);
2896		break;
2897	default:
2898		amdgpu_ring_write(ring, 0x00000000);
2899		amdgpu_ring_write(ring, 0x00000000);
2900		break;
2901	}
2902
2903	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2904	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2905
2906	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2907	amdgpu_ring_write(ring, 0);
2908
2909	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2910	amdgpu_ring_write(ring, 0x00000316);
2911	amdgpu_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2912	amdgpu_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2913
2914	amdgpu_ring_unlock_commit(ring);
2915
2916	return 0;
2917}
2918
2919/**
2920 * gfx_v7_0_cp_gfx_resume - setup the gfx ring buffer registers
2921 *
2922 * @adev: amdgpu_device pointer
2923 *
2924 * Program the location and size of the gfx ring buffer
2925 * and test it to make sure it's working.
2926 * Returns 0 for success, error for failure.
2927 */
2928static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
2929{
2930	struct amdgpu_ring *ring;
2931	u32 tmp;
2932	u32 rb_bufsz;
2933	u64 rb_addr, rptr_addr;
2934	int r;
2935
2936	WREG32(mmCP_SEM_WAIT_TIMER, 0x0);
2937	if (adev->asic_type != CHIP_HAWAII)
2938		WREG32(mmCP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2939
2940	/* Set the write pointer delay */
2941	WREG32(mmCP_RB_WPTR_DELAY, 0);
2942
2943	/* set the RB to use vmid 0 */
2944	WREG32(mmCP_RB_VMID, 0);
2945
2946	WREG32(mmSCRATCH_ADDR, 0);
2947
2948	/* ring 0 - compute and gfx */
2949	/* Set ring buffer size */
2950	ring = &adev->gfx.gfx_ring[0];
2951	rb_bufsz = order_base_2(ring->ring_size / 8);
2952	tmp = (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2953#ifdef __BIG_ENDIAN
2954	tmp |= 2 << CP_RB0_CNTL__BUF_SWAP__SHIFT;
2955#endif
2956	WREG32(mmCP_RB0_CNTL, tmp);
2957
2958	/* Initialize the ring buffer's read and write pointers */
2959	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
2960	ring->wptr = 0;
2961	WREG32(mmCP_RB0_WPTR, ring->wptr);
2962
2963	/* set the wb address wether it's enabled or not */
2964	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2965	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2966	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
2967
2968	/* scratch register shadowing is no longer supported */
2969	WREG32(mmSCRATCH_UMSK, 0);
2970
2971	mdelay(1);
2972	WREG32(mmCP_RB0_CNTL, tmp);
2973
2974	rb_addr = ring->gpu_addr >> 8;
2975	WREG32(mmCP_RB0_BASE, rb_addr);
2976	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2977
2978	/* start the ring */
2979	gfx_v7_0_cp_gfx_start(adev);
2980	ring->ready = true;
2981	r = amdgpu_ring_test_ring(ring);
2982	if (r) {
2983		ring->ready = false;
2984		return r;
2985	}
2986
2987	return 0;
2988}
2989
2990static u32 gfx_v7_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
2991{
2992	u32 rptr;
2993
2994	rptr = ring->adev->wb.wb[ring->rptr_offs];
2995
2996	return rptr;
2997}
2998
2999static u32 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
3000{
3001	struct amdgpu_device *adev = ring->adev;
3002	u32 wptr;
3003
3004	wptr = RREG32(mmCP_RB0_WPTR);
3005
3006	return wptr;
3007}
3008
3009static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
3010{
3011	struct amdgpu_device *adev = ring->adev;
3012
3013	WREG32(mmCP_RB0_WPTR, ring->wptr);
3014	(void)RREG32(mmCP_RB0_WPTR);
3015}
3016
3017static u32 gfx_v7_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
3018{
3019	u32 rptr;
3020
3021	rptr = ring->adev->wb.wb[ring->rptr_offs];
3022
3023	return rptr;
3024}
3025
3026static u32 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
3027{
3028	u32 wptr;
3029
3030	/* XXX check if swapping is necessary on BE */
3031	wptr = ring->adev->wb.wb[ring->wptr_offs];
3032
3033	return wptr;
3034}
3035
3036static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
3037{
3038	struct amdgpu_device *adev = ring->adev;
3039
3040	/* XXX check if swapping is necessary on BE */
3041	adev->wb.wb[ring->wptr_offs] = ring->wptr;
3042	WDOORBELL32(ring->doorbell_index, ring->wptr);
3043}
3044
3045/**
3046 * gfx_v7_0_cp_compute_enable - enable/disable the compute CP MEs
3047 *
3048 * @adev: amdgpu_device pointer
3049 * @enable: enable or disable the MEs
3050 *
3051 * Halts or unhalts the compute MEs.
3052 */
3053static void gfx_v7_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3054{
3055	int i;
3056
3057	if (enable) {
3058		WREG32(mmCP_MEC_CNTL, 0);
3059	} else {
3060		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3061		for (i = 0; i < adev->gfx.num_compute_rings; i++)
3062			adev->gfx.compute_ring[i].ready = false;
3063	}
3064	udelay(50);
3065}
3066
3067/**
3068 * gfx_v7_0_cp_compute_load_microcode - load the compute CP ME ucode
3069 *
3070 * @adev: amdgpu_device pointer
3071 *
3072 * Loads the compute MEC1&2 ucode.
3073 * Returns 0 for success, -EINVAL if the ucode is not available.
3074 */
3075static int gfx_v7_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3076{
3077	const struct gfx_firmware_header_v1_0 *mec_hdr;
3078	const __le32 *fw_data;
3079	unsigned i, fw_size;
3080
3081	if (!adev->gfx.mec_fw)
3082		return -EINVAL;
3083
3084	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3085	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3086	adev->gfx.mec_fw_version = le32_to_cpu(mec_hdr->header.ucode_version);
3087	adev->gfx.mec_feature_version = le32_to_cpu(
3088					mec_hdr->ucode_feature_version);
3089
3090	gfx_v7_0_cp_compute_enable(adev, false);
3091
3092	/* MEC1 */
3093	fw_data = (const __le32 *)
3094		(adev->gfx.mec_fw->data +
3095		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3096	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
3097	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
3098	for (i = 0; i < fw_size; i++)
3099		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
3100	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
3101
3102	if (adev->asic_type == CHIP_KAVERI) {
3103		const struct gfx_firmware_header_v1_0 *mec2_hdr;
3104
3105		if (!adev->gfx.mec2_fw)
3106			return -EINVAL;
3107
3108		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3109		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
3110		adev->gfx.mec2_fw_version = le32_to_cpu(mec2_hdr->header.ucode_version);
3111		adev->gfx.mec2_feature_version = le32_to_cpu(
3112				mec2_hdr->ucode_feature_version);
3113
3114		/* MEC2 */
3115		fw_data = (const __le32 *)
3116			(adev->gfx.mec2_fw->data +
3117			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
3118		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
3119		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
3120		for (i = 0; i < fw_size; i++)
3121			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
3122		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
3123	}
3124
3125	return 0;
3126}
3127
3128/**
3129 * gfx_v7_0_cp_compute_start - start the compute queues
3130 *
3131 * @adev: amdgpu_device pointer
3132 *
3133 * Enable the compute queues.
3134 * Returns 0 for success, error for failure.
3135 */
3136static int gfx_v7_0_cp_compute_start(struct amdgpu_device *adev)
3137{
3138	gfx_v7_0_cp_compute_enable(adev, true);
3139
3140	return 0;
3141}
3142
3143/**
3144 * gfx_v7_0_cp_compute_fini - stop the compute queues
3145 *
3146 * @adev: amdgpu_device pointer
3147 *
3148 * Stop the compute queues and tear down the driver queue
3149 * info.
3150 */
3151static void gfx_v7_0_cp_compute_fini(struct amdgpu_device *adev)
3152{
3153	int i, r;
3154
3155	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3156		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3157
3158		if (ring->mqd_obj) {
3159			r = amdgpu_bo_reserve(ring->mqd_obj, false);
3160			if (unlikely(r != 0))
3161				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
3162
3163			amdgpu_bo_unpin(ring->mqd_obj);
3164			amdgpu_bo_unreserve(ring->mqd_obj);
3165
3166			amdgpu_bo_unref(&ring->mqd_obj);
3167			ring->mqd_obj = NULL;
3168		}
3169	}
3170}
3171
3172static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
3173{
3174	int r;
3175
3176	if (adev->gfx.mec.hpd_eop_obj) {
3177		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
3178		if (unlikely(r != 0))
3179			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3180		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
3181		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
3182
3183		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
3184		adev->gfx.mec.hpd_eop_obj = NULL;
3185	}
3186}
3187
3188#define MEC_HPD_SIZE 2048
3189
3190static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
3191{
3192	int r;
3193	u32 *hpd;
3194
3195	/*
3196	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3197	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3198	 * Nonetheless, we assign only 1 pipe because all other pipes will
3199	 * be handled by KFD
3200	 */
3201	adev->gfx.mec.num_mec = 1;
3202	adev->gfx.mec.num_pipe = 1;
3203	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
3204
3205	if (adev->gfx.mec.hpd_eop_obj == NULL) {
3206		r = amdgpu_bo_create(adev,
3207				     adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
3208				     PAGE_SIZE, true,
3209				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
3210				     &adev->gfx.mec.hpd_eop_obj);
3211		if (r) {
3212			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
3213			return r;
3214		}
3215	}
3216
3217	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
3218	if (unlikely(r != 0)) {
3219		gfx_v7_0_mec_fini(adev);
3220		return r;
3221	}
3222	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
3223			  &adev->gfx.mec.hpd_eop_gpu_addr);
3224	if (r) {
3225		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
3226		gfx_v7_0_mec_fini(adev);
3227		return r;
3228	}
3229	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
3230	if (r) {
3231		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
3232		gfx_v7_0_mec_fini(adev);
3233		return r;
3234	}
3235
3236	/* clear memory.  Not sure if this is required or not */
3237	memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
3238
3239	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
3240	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
3241
3242	return 0;
3243}
3244
3245struct hqd_registers
3246{
3247	u32 cp_mqd_base_addr;
3248	u32 cp_mqd_base_addr_hi;
3249	u32 cp_hqd_active;
3250	u32 cp_hqd_vmid;
3251	u32 cp_hqd_persistent_state;
3252	u32 cp_hqd_pipe_priority;
3253	u32 cp_hqd_queue_priority;
3254	u32 cp_hqd_quantum;
3255	u32 cp_hqd_pq_base;
3256	u32 cp_hqd_pq_base_hi;
3257	u32 cp_hqd_pq_rptr;
3258	u32 cp_hqd_pq_rptr_report_addr;
3259	u32 cp_hqd_pq_rptr_report_addr_hi;
3260	u32 cp_hqd_pq_wptr_poll_addr;
3261	u32 cp_hqd_pq_wptr_poll_addr_hi;
3262	u32 cp_hqd_pq_doorbell_control;
3263	u32 cp_hqd_pq_wptr;
3264	u32 cp_hqd_pq_control;
3265	u32 cp_hqd_ib_base_addr;
3266	u32 cp_hqd_ib_base_addr_hi;
3267	u32 cp_hqd_ib_rptr;
3268	u32 cp_hqd_ib_control;
3269	u32 cp_hqd_iq_timer;
3270	u32 cp_hqd_iq_rptr;
3271	u32 cp_hqd_dequeue_request;
3272	u32 cp_hqd_dma_offload;
3273	u32 cp_hqd_sema_cmd;
3274	u32 cp_hqd_msg_type;
3275	u32 cp_hqd_atomic0_preop_lo;
3276	u32 cp_hqd_atomic0_preop_hi;
3277	u32 cp_hqd_atomic1_preop_lo;
3278	u32 cp_hqd_atomic1_preop_hi;
3279	u32 cp_hqd_hq_scheduler0;
3280	u32 cp_hqd_hq_scheduler1;
3281	u32 cp_mqd_control;
3282};
3283
3284struct bonaire_mqd
3285{
3286	u32 header;
3287	u32 dispatch_initiator;
3288	u32 dimensions[3];
3289	u32 start_idx[3];
3290	u32 num_threads[3];
3291	u32 pipeline_stat_enable;
3292	u32 perf_counter_enable;
3293	u32 pgm[2];
3294	u32 tba[2];
3295	u32 tma[2];
3296	u32 pgm_rsrc[2];
3297	u32 vmid;
3298	u32 resource_limits;
3299	u32 static_thread_mgmt01[2];
3300	u32 tmp_ring_size;
3301	u32 static_thread_mgmt23[2];
3302	u32 restart[3];
3303	u32 thread_trace_enable;
3304	u32 reserved1;
3305	u32 user_data[16];
3306	u32 vgtcs_invoke_count[2];
3307	struct hqd_registers queue_state;
3308	u32 dequeue_cntr;
3309	u32 interrupt_queue[64];
3310};
3311
3312/**
3313 * gfx_v7_0_cp_compute_resume - setup the compute queue registers
3314 *
3315 * @adev: amdgpu_device pointer
3316 *
3317 * Program the compute queues and test them to make sure they
3318 * are working.
3319 * Returns 0 for success, error for failure.
3320 */
3321static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
3322{
3323	int r, i, j;
3324	u32 tmp;
3325	bool use_doorbell = true;
3326	u64 hqd_gpu_addr;
3327	u64 mqd_gpu_addr;
3328	u64 eop_gpu_addr;
3329	u64 wb_gpu_addr;
3330	u32 *buf;
3331	struct bonaire_mqd *mqd;
3332
3333	r = gfx_v7_0_cp_compute_start(adev);
3334	if (r)
3335		return r;
3336
3337	/* fix up chicken bits */
3338	tmp = RREG32(mmCP_CPF_DEBUG);
3339	tmp |= (1 << 23);
3340	WREG32(mmCP_CPF_DEBUG, tmp);
3341
3342	/* init the pipes */
3343	mutex_lock(&adev->srbm_mutex);
3344	for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
3345		int me = (i < 4) ? 1 : 2;
3346		int pipe = (i < 4) ? i : (i - 4);
3347
3348		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3349
3350		cik_srbm_select(adev, me, pipe, 0, 0);
3351
3352		/* write the EOP addr */
3353		WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3354		WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3355
3356		/* set the VMID assigned */
3357		WREG32(mmCP_HPD_EOP_VMID, 0);
3358
3359		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3360		tmp = RREG32(mmCP_HPD_EOP_CONTROL);
3361		tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
3362		tmp |= order_base_2(MEC_HPD_SIZE / 8);
3363		WREG32(mmCP_HPD_EOP_CONTROL, tmp);
3364	}
3365	cik_srbm_select(adev, 0, 0, 0, 0);
3366	mutex_unlock(&adev->srbm_mutex);
3367
3368	/* init the queues.  Just two for now. */
3369	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3370		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3371
3372		if (ring->mqd_obj == NULL) {
3373			r = amdgpu_bo_create(adev,
3374					     sizeof(struct bonaire_mqd),
3375					     PAGE_SIZE, true,
3376					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
3377					     &ring->mqd_obj);
3378			if (r) {
3379				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3380				return r;
3381			}
3382		}
3383
3384		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3385		if (unlikely(r != 0)) {
3386			gfx_v7_0_cp_compute_fini(adev);
3387			return r;
3388		}
3389		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
3390				  &mqd_gpu_addr);
3391		if (r) {
3392			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
3393			gfx_v7_0_cp_compute_fini(adev);
3394			return r;
3395		}
3396		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
3397		if (r) {
3398			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
3399			gfx_v7_0_cp_compute_fini(adev);
3400			return r;
3401		}
3402
3403		/* init the mqd struct */
3404		memset(buf, 0, sizeof(struct bonaire_mqd));
3405
3406		mqd = (struct bonaire_mqd *)buf;
3407		mqd->header = 0xC0310800;
3408		mqd->static_thread_mgmt01[0] = 0xffffffff;
3409		mqd->static_thread_mgmt01[1] = 0xffffffff;
3410		mqd->static_thread_mgmt23[0] = 0xffffffff;
3411		mqd->static_thread_mgmt23[1] = 0xffffffff;
3412
3413		mutex_lock(&adev->srbm_mutex);
3414		cik_srbm_select(adev, ring->me,
3415				ring->pipe,
3416				ring->queue, 0);
3417
3418		/* disable wptr polling */
3419		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3420		tmp &= ~CP_PQ_WPTR_POLL_CNTL__EN_MASK;
3421		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3422
3423		/* enable doorbell? */
3424		mqd->queue_state.cp_hqd_pq_doorbell_control =
3425			RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3426		if (use_doorbell)
3427			mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
3428		else
3429			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
3430		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
3431		       mqd->queue_state.cp_hqd_pq_doorbell_control);
3432
3433		/* disable the queue if it's active */
3434		mqd->queue_state.cp_hqd_dequeue_request = 0;
3435		mqd->queue_state.cp_hqd_pq_rptr = 0;
3436		mqd->queue_state.cp_hqd_pq_wptr= 0;
3437		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
3438			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
3439			for (j = 0; j < adev->usec_timeout; j++) {
3440				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
3441					break;
3442				udelay(1);
3443			}
3444			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3445			WREG32(mmCP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3446			WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3447		}
3448
3449		/* set the pointer to the MQD */
3450		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3451		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3452		WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3453		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3454		/* set MQD vmid to 0 */
3455		mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
3456		mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
3457		WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3458
3459		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3460		hqd_gpu_addr = ring->gpu_addr >> 8;
3461		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3462		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3463		WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3464		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3465
3466		/* set up the HQD, this is similar to CP_RB0_CNTL */
3467		mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
3468		mqd->queue_state.cp_hqd_pq_control &=
3469			~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
3470					CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
3471
3472		mqd->queue_state.cp_hqd_pq_control |=
3473			order_base_2(ring->ring_size / 8);
3474		mqd->queue_state.cp_hqd_pq_control |=
3475			(order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
3476#ifdef __BIG_ENDIAN
3477		mqd->queue_state.cp_hqd_pq_control |=
3478			2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
3479#endif
3480		mqd->queue_state.cp_hqd_pq_control &=
3481			~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
3482				CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK |
3483				CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK);
3484		mqd->queue_state.cp_hqd_pq_control |=
3485			CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
3486			CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */
3487		WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3488
3489		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3490		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3491		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3492		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3493		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3494		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3495		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3496
3497		/* set the wb address wether it's enabled or not */
3498		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3499		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3500		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3501			upper_32_bits(wb_gpu_addr) & 0xffff;
3502		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3503		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3504		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3505		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3506
3507		/* enable the doorbell if requested */
3508		if (use_doorbell) {
3509			mqd->queue_state.cp_hqd_pq_doorbell_control =
3510				RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3511			mqd->queue_state.cp_hqd_pq_doorbell_control &=
3512				~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
3513			mqd->queue_state.cp_hqd_pq_doorbell_control |=
3514				(ring->doorbell_index <<
3515				 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
3516			mqd->queue_state.cp_hqd_pq_doorbell_control |=
3517				CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
3518			mqd->queue_state.cp_hqd_pq_doorbell_control &=
3519				~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
3520				CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
3521
3522		} else {
3523			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3524		}
3525		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
3526		       mqd->queue_state.cp_hqd_pq_doorbell_control);
3527
3528		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3529		ring->wptr = 0;
3530		mqd->queue_state.cp_hqd_pq_wptr = ring->wptr;
3531		WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3532		mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3533
3534		/* set the vmid for the queue */
3535		mqd->queue_state.cp_hqd_vmid = 0;
3536		WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3537
3538		/* activate the queue */
3539		mqd->queue_state.cp_hqd_active = 1;
3540		WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3541
3542		cik_srbm_select(adev, 0, 0, 0, 0);
3543		mutex_unlock(&adev->srbm_mutex);
3544
3545		amdgpu_bo_kunmap(ring->mqd_obj);
3546		amdgpu_bo_unreserve(ring->mqd_obj);
3547
3548		ring->ready = true;
3549		r = amdgpu_ring_test_ring(ring);
3550		if (r)
3551			ring->ready = false;
3552	}
3553
3554	return 0;
3555}
3556
3557static void gfx_v7_0_cp_enable(struct amdgpu_device *adev, bool enable)
3558{
3559	gfx_v7_0_cp_gfx_enable(adev, enable);
3560	gfx_v7_0_cp_compute_enable(adev, enable);
3561}
3562
3563static int gfx_v7_0_cp_load_microcode(struct amdgpu_device *adev)
3564{
3565	int r;
3566
3567	r = gfx_v7_0_cp_gfx_load_microcode(adev);
3568	if (r)
3569		return r;
3570	r = gfx_v7_0_cp_compute_load_microcode(adev);
3571	if (r)
3572		return r;
3573
3574	return 0;
3575}
3576
3577static void gfx_v7_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3578					       bool enable)
3579{
3580	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3581
3582	if (enable)
3583		tmp |= (CP_INT_CNTL_RING0__CNTX_BUSY_INT_ENABLE_MASK |
3584				CP_INT_CNTL_RING0__CNTX_EMPTY_INT_ENABLE_MASK);
3585	else
3586		tmp &= ~(CP_INT_CNTL_RING0__CNTX_BUSY_INT_ENABLE_MASK |
3587				CP_INT_CNTL_RING0__CNTX_EMPTY_INT_ENABLE_MASK);
3588	WREG32(mmCP_INT_CNTL_RING0, tmp);
3589}
3590
3591static int gfx_v7_0_cp_resume(struct amdgpu_device *adev)
3592{
3593	int r;
3594
3595	gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3596
3597	r = gfx_v7_0_cp_load_microcode(adev);
3598	if (r)
3599		return r;
3600
3601	r = gfx_v7_0_cp_gfx_resume(adev);
3602	if (r)
3603		return r;
3604	r = gfx_v7_0_cp_compute_resume(adev);
3605	if (r)
3606		return r;
3607
3608	gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3609
3610	return 0;
3611}
3612
3613/*
3614 * vm
3615 * VMID 0 is the physical GPU addresses as used by the kernel.
3616 * VMIDs 1-15 are used for userspace clients and are handled
3617 * by the amdgpu vm/hsa code.
3618 */
3619/**
3620 * gfx_v7_0_ring_emit_vm_flush - cik vm flush using the CP
3621 *
3622 * @adev: amdgpu_device pointer
3623 *
3624 * Update the page table base and flush the VM TLB
3625 * using the CP (CIK).
3626 */
3627static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
3628					unsigned vm_id, uint64_t pd_addr)
3629{
3630	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
3631	uint32_t seq = ring->fence_drv.sync_seq[ring->idx];
3632	uint64_t addr = ring->fence_drv.gpu_addr;
3633
3634	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3635	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
3636				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
3637				 WAIT_REG_MEM_ENGINE(usepfp)));   /* pfp or me */
3638	amdgpu_ring_write(ring, addr & 0xfffffffc);
3639	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3640	amdgpu_ring_write(ring, seq);
3641	amdgpu_ring_write(ring, 0xffffffff);
3642	amdgpu_ring_write(ring, 4); /* poll interval */
3643
3644	if (usepfp) {
3645		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
3646		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3647		amdgpu_ring_write(ring, 0);
3648		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3649		amdgpu_ring_write(ring, 0);
3650	}
3651
3652	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3653	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
3654				 WRITE_DATA_DST_SEL(0)));
3655	if (vm_id < 8) {
3656		amdgpu_ring_write(ring,
3657				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
3658	} else {
3659		amdgpu_ring_write(ring,
3660				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
3661	}
3662	amdgpu_ring_write(ring, 0);
3663	amdgpu_ring_write(ring, pd_addr >> 12);
3664
3665	/* bits 0-15 are the VM contexts0-15 */
3666	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3667	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3668				 WRITE_DATA_DST_SEL(0)));
3669	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
3670	amdgpu_ring_write(ring, 0);
3671	amdgpu_ring_write(ring, 1 << vm_id);
3672
3673	/* wait for the invalidate to complete */
3674	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3675	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
3676				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
3677				 WAIT_REG_MEM_ENGINE(0))); /* me */
3678	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
3679	amdgpu_ring_write(ring, 0);
3680	amdgpu_ring_write(ring, 0); /* ref */
3681	amdgpu_ring_write(ring, 0); /* mask */
3682	amdgpu_ring_write(ring, 0x20); /* poll interval */
3683
3684	/* compute doesn't have PFP */
3685	if (usepfp) {
3686		/* sync PFP to ME, otherwise we might get invalid PFP reads */
3687		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3688		amdgpu_ring_write(ring, 0x0);
3689
3690		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
3691		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3692		amdgpu_ring_write(ring, 0);
3693		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3694		amdgpu_ring_write(ring, 0);
3695	}
3696}
3697
3698/*
3699 * RLC
3700 * The RLC is a multi-purpose microengine that handles a
3701 * variety of functions.
3702 */
3703static void gfx_v7_0_rlc_fini(struct amdgpu_device *adev)
3704{
3705	int r;
3706
3707	/* save restore block */
3708	if (adev->gfx.rlc.save_restore_obj) {
3709		r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, false);
3710		if (unlikely(r != 0))
3711			dev_warn(adev->dev, "(%d) reserve RLC sr bo failed\n", r);
3712		amdgpu_bo_unpin(adev->gfx.rlc.save_restore_obj);
3713		amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
3714
3715		amdgpu_bo_unref(&adev->gfx.rlc.save_restore_obj);
3716		adev->gfx.rlc.save_restore_obj = NULL;
3717	}
3718
3719	/* clear state block */
3720	if (adev->gfx.rlc.clear_state_obj) {
3721		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
3722		if (unlikely(r != 0))
3723			dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
3724		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
3725		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
3726
3727		amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
3728		adev->gfx.rlc.clear_state_obj = NULL;
3729	}
3730
3731	/* clear state block */
3732	if (adev->gfx.rlc.cp_table_obj) {
3733		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
3734		if (unlikely(r != 0))
3735			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
3736		amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
3737		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
3738
3739		amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
3740		adev->gfx.rlc.cp_table_obj = NULL;
3741	}
3742}
3743
3744static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
3745{
3746	const u32 *src_ptr;
3747	volatile u32 *dst_ptr;
3748	u32 dws, i;
3749	const struct cs_section_def *cs_data;
3750	int r;
3751
3752	/* allocate rlc buffers */
3753	if (adev->flags & AMD_IS_APU) {
3754		if (adev->asic_type == CHIP_KAVERI) {
3755			adev->gfx.rlc.reg_list = spectre_rlc_save_restore_register_list;
3756			adev->gfx.rlc.reg_list_size =
3757				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
3758		} else {
3759			adev->gfx.rlc.reg_list = kalindi_rlc_save_restore_register_list;
3760			adev->gfx.rlc.reg_list_size =
3761				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
3762		}
3763	}
3764	adev->gfx.rlc.cs_data = ci_cs_data;
3765	adev->gfx.rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
3766
3767	src_ptr = adev->gfx.rlc.reg_list;
3768	dws = adev->gfx.rlc.reg_list_size;
3769	dws += (5 * 16) + 48 + 48 + 64;
3770
3771	cs_data = adev->gfx.rlc.cs_data;
3772
3773	if (src_ptr) {
3774		/* save restore block */
3775		if (adev->gfx.rlc.save_restore_obj == NULL) {
3776			r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
3777					     AMDGPU_GEM_DOMAIN_VRAM,
3778					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
3779					     NULL, NULL,
3780					     &adev->gfx.rlc.save_restore_obj);
3781			if (r) {
3782				dev_warn(adev->dev, "(%d) create RLC sr bo failed\n", r);
3783				return r;
3784			}
3785		}
3786
3787		r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, false);
3788		if (unlikely(r != 0)) {
3789			gfx_v7_0_rlc_fini(adev);
3790			return r;
3791		}
3792		r = amdgpu_bo_pin(adev->gfx.rlc.save_restore_obj, AMDGPU_GEM_DOMAIN_VRAM,
3793				  &adev->gfx.rlc.save_restore_gpu_addr);
3794		if (r) {
3795			amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
3796			dev_warn(adev->dev, "(%d) pin RLC sr bo failed\n", r);
3797			gfx_v7_0_rlc_fini(adev);
3798			return r;
3799		}
3800
3801		r = amdgpu_bo_kmap(adev->gfx.rlc.save_restore_obj, (void **)&adev->gfx.rlc.sr_ptr);
3802		if (r) {
3803			dev_warn(adev->dev, "(%d) map RLC sr bo failed\n", r);
3804			gfx_v7_0_rlc_fini(adev);
3805			return r;
3806		}
3807		/* write the sr buffer */
3808		dst_ptr = adev->gfx.rlc.sr_ptr;
3809		for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
3810			dst_ptr[i] = cpu_to_le32(src_ptr[i]);
3811		amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj);
3812		amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
3813	}
3814
3815	if (cs_data) {
3816		/* clear state block */
3817		adev->gfx.rlc.clear_state_size = dws = gfx_v7_0_get_csb_size(adev);
3818
3819		if (adev->gfx.rlc.clear_state_obj == NULL) {
3820			r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
3821					     AMDGPU_GEM_DOMAIN_VRAM,
3822					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
3823					     NULL, NULL,
3824					     &adev->gfx.rlc.clear_state_obj);
3825			if (r) {
3826				dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
3827				gfx_v7_0_rlc_fini(adev);
3828				return r;
3829			}
3830		}
3831		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
3832		if (unlikely(r != 0)) {
3833			gfx_v7_0_rlc_fini(adev);
3834			return r;
3835		}
3836		r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
3837				  &adev->gfx.rlc.clear_state_gpu_addr);
3838		if (r) {
3839			amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
3840			dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r);
3841			gfx_v7_0_rlc_fini(adev);
3842			return r;
3843		}
3844
3845		r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
3846		if (r) {
3847			dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r);
3848			gfx_v7_0_rlc_fini(adev);
3849			return r;
3850		}
3851		/* set up the cs buffer */
3852		dst_ptr = adev->gfx.rlc.cs_ptr;
3853		gfx_v7_0_get_csb_buffer(adev, dst_ptr);
3854		amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
3855		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
3856	}
3857
3858	if (adev->gfx.rlc.cp_table_size) {
3859		if (adev->gfx.rlc.cp_table_obj == NULL) {
3860			r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
3861					     AMDGPU_GEM_DOMAIN_VRAM,
3862					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
3863					     NULL, NULL,
3864					     &adev->gfx.rlc.cp_table_obj);
3865			if (r) {
3866				dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
3867				gfx_v7_0_rlc_fini(adev);
3868				return r;
3869			}
3870		}
3871
3872		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
3873		if (unlikely(r != 0)) {
3874			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
3875			gfx_v7_0_rlc_fini(adev);
3876			return r;
3877		}
3878		r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
3879				  &adev->gfx.rlc.cp_table_gpu_addr);
3880		if (r) {
3881			amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
3882			dev_warn(adev->dev, "(%d) pin RLC cp_table bo failed\n", r);
3883			gfx_v7_0_rlc_fini(adev);
3884			return r;
3885		}
3886		r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
3887		if (r) {
3888			dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
3889			gfx_v7_0_rlc_fini(adev);
3890			return r;
3891		}
3892
3893		gfx_v7_0_init_cp_pg_table(adev);
3894
3895		amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
3896		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
3897
3898	}
3899
3900	return 0;
3901}
3902
3903static void gfx_v7_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
3904{
3905	u32 tmp;
3906
3907	tmp = RREG32(mmRLC_LB_CNTL);
3908	if (enable)
3909		tmp |= RLC_LB_CNTL__LOAD_BALANCE_ENABLE_MASK;
3910	else
3911		tmp &= ~RLC_LB_CNTL__LOAD_BALANCE_ENABLE_MASK;
3912	WREG32(mmRLC_LB_CNTL, tmp);
3913}
3914
3915static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3916{
3917	u32 i, j, k;
3918	u32 mask;
3919
3920	mutex_lock(&adev->grbm_idx_mutex);
3921	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3922		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3923			gfx_v7_0_select_se_sh(adev, i, j);
3924			for (k = 0; k < adev->usec_timeout; k++) {
3925				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3926					break;
3927				udelay(1);
3928			}
3929		}
3930	}
3931	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3932	mutex_unlock(&adev->grbm_idx_mutex);
3933
3934	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3935		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3936		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3937		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3938	for (k = 0; k < adev->usec_timeout; k++) {
3939		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3940			break;
3941		udelay(1);
3942	}
3943}
3944
3945static void gfx_v7_0_update_rlc(struct amdgpu_device *adev, u32 rlc)
3946{
3947	u32 tmp;
3948
3949	tmp = RREG32(mmRLC_CNTL);
3950	if (tmp != rlc)
3951		WREG32(mmRLC_CNTL, rlc);
3952}
3953
3954static u32 gfx_v7_0_halt_rlc(struct amdgpu_device *adev)
3955{
3956	u32 data, orig;
3957
3958	orig = data = RREG32(mmRLC_CNTL);
3959
3960	if (data & RLC_CNTL__RLC_ENABLE_F32_MASK) {
3961		u32 i;
3962
3963		data &= ~RLC_CNTL__RLC_ENABLE_F32_MASK;
3964		WREG32(mmRLC_CNTL, data);
3965
3966		for (i = 0; i < adev->usec_timeout; i++) {
3967			if ((RREG32(mmRLC_GPM_STAT) & RLC_GPM_STAT__RLC_BUSY_MASK) == 0)
3968				break;
3969			udelay(1);
3970		}
3971
3972		gfx_v7_0_wait_for_rlc_serdes(adev);
3973	}
3974
3975	return orig;
3976}
3977
3978void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
3979{
3980	u32 tmp, i, mask;
3981
3982	tmp = 0x1 | (1 << 1);
3983	WREG32(mmRLC_GPR_REG2, tmp);
3984
3985	mask = RLC_GPM_STAT__GFX_POWER_STATUS_MASK |
3986		RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK;
3987	for (i = 0; i < adev->usec_timeout; i++) {
3988		if ((RREG32(mmRLC_GPM_STAT) & mask) == mask)
3989			break;
3990		udelay(1);
3991	}
3992
3993	for (i = 0; i < adev->usec_timeout; i++) {
3994		if ((RREG32(mmRLC_GPR_REG2) & 0x1) == 0)
3995			break;
3996		udelay(1);
3997	}
3998}
3999
4000void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
4001{
4002	u32 tmp;
4003
4004	tmp = 0x1 | (0 << 1);
4005	WREG32(mmRLC_GPR_REG2, tmp);
4006}
4007
4008/**
4009 * gfx_v7_0_rlc_stop - stop the RLC ME
4010 *
4011 * @adev: amdgpu_device pointer
4012 *
4013 * Halt the RLC ME (MicroEngine) (CIK).
4014 */
4015void gfx_v7_0_rlc_stop(struct amdgpu_device *adev)
4016{
4017	WREG32(mmRLC_CNTL, 0);
4018
4019	gfx_v7_0_enable_gui_idle_interrupt(adev, false);
4020
4021	gfx_v7_0_wait_for_rlc_serdes(adev);
4022}
4023
4024/**
4025 * gfx_v7_0_rlc_start - start the RLC ME
4026 *
4027 * @adev: amdgpu_device pointer
4028 *
4029 * Unhalt the RLC ME (MicroEngine) (CIK).
4030 */
4031static void gfx_v7_0_rlc_start(struct amdgpu_device *adev)
4032{
4033	WREG32(mmRLC_CNTL, RLC_CNTL__RLC_ENABLE_F32_MASK);
4034
4035	gfx_v7_0_enable_gui_idle_interrupt(adev, true);
4036
4037	udelay(50);
4038}
4039
4040static void gfx_v7_0_rlc_reset(struct amdgpu_device *adev)
4041{
4042	u32 tmp = RREG32(mmGRBM_SOFT_RESET);
4043
4044	tmp |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
4045	WREG32(mmGRBM_SOFT_RESET, tmp);
4046	udelay(50);
4047	tmp &= ~GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
4048	WREG32(mmGRBM_SOFT_RESET, tmp);
4049	udelay(50);
4050}
4051
4052/**
4053 * gfx_v7_0_rlc_resume - setup the RLC hw
4054 *
4055 * @adev: amdgpu_device pointer
4056 *
4057 * Initialize the RLC registers, load the ucode,
4058 * and start the RLC (CIK).
4059 * Returns 0 for success, -EINVAL if the ucode is not available.
4060 */
4061static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
4062{
4063	const struct rlc_firmware_header_v1_0 *hdr;
4064	const __le32 *fw_data;
4065	unsigned i, fw_size;
4066	u32 tmp;
4067
4068	if (!adev->gfx.rlc_fw)
4069		return -EINVAL;
4070
4071	hdr = (const struct rlc_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
4072	amdgpu_ucode_print_rlc_hdr(&hdr->header);
4073	adev->gfx.rlc_fw_version = le32_to_cpu(hdr->header.ucode_version);
4074	adev->gfx.rlc_feature_version = le32_to_cpu(
4075					hdr->ucode_feature_version);
4076
4077	gfx_v7_0_rlc_stop(adev);
4078
4079	/* disable CG */
4080	tmp = RREG32(mmRLC_CGCG_CGLS_CTRL) & 0xfffffffc;
4081	WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4082
4083	gfx_v7_0_rlc_reset(adev);
4084
4085	gfx_v7_0_init_pg(adev);
4086
4087	WREG32(mmRLC_LB_CNTR_INIT, 0);
4088	WREG32(mmRLC_LB_CNTR_MAX, 0x00008000);
4089
4090	mutex_lock(&adev->grbm_idx_mutex);
4091	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4092	WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff);
4093	WREG32(mmRLC_LB_PARAMS, 0x00600408);
4094	WREG32(mmRLC_LB_CNTL, 0x80000004);
4095	mutex_unlock(&adev->grbm_idx_mutex);
4096
4097	WREG32(mmRLC_MC_CNTL, 0);
4098	WREG32(mmRLC_UCODE_CNTL, 0);
4099
4100	fw_data = (const __le32 *)
4101		(adev->gfx.rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4102	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4103	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4104	for (i = 0; i < fw_size; i++)
4105		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4106	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4107
4108	/* XXX - find out what chips support lbpw */
4109	gfx_v7_0_enable_lbpw(adev, false);
4110
4111	if (adev->asic_type == CHIP_BONAIRE)
4112		WREG32(mmRLC_DRIVER_CPDMA_STATUS, 0);
4113
4114	gfx_v7_0_rlc_start(adev);
4115
4116	return 0;
4117}
4118
4119static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
4120{
4121	u32 data, orig, tmp, tmp2;
4122
4123	orig = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
4124
4125	if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_CGCG)) {
4126		gfx_v7_0_enable_gui_idle_interrupt(adev, true);
4127
4128		tmp = gfx_v7_0_halt_rlc(adev);
4129
4130		mutex_lock(&adev->grbm_idx_mutex);
4131		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4132		WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
4133		WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
4134		tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
4135			RLC_SERDES_WR_CTRL__CGCG_OVERRIDE_0_MASK |
4136			RLC_SERDES_WR_CTRL__CGLS_ENABLE_MASK;
4137		WREG32(mmRLC_SERDES_WR_CTRL, tmp2);
4138		mutex_unlock(&adev->grbm_idx_mutex);
4139
4140		gfx_v7_0_update_rlc(adev, tmp);
4141
4142		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4143	} else {
4144		gfx_v7_0_enable_gui_idle_interrupt(adev, false);
4145
4146		RREG32(mmCB_CGTT_SCLK_CTRL);
4147		RREG32(mmCB_CGTT_SCLK_CTRL);
4148		RREG32(mmCB_CGTT_SCLK_CTRL);
4149		RREG32(mmCB_CGTT_SCLK_CTRL);
4150
4151		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4152	}
4153
4154	if (orig != data)
4155		WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4156
4157}
4158
4159static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
4160{
4161	u32 data, orig, tmp = 0;
4162
4163	if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_MGCG)) {
4164		if (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_MGLS) {
4165			if (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_CP_LS) {
4166				orig = data = RREG32(mmCP_MEM_SLP_CNTL);
4167				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4168				if (orig != data)
4169					WREG32(mmCP_MEM_SLP_CNTL, data);
4170			}
4171		}
4172
4173		orig = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4174		data |= 0x00000001;
4175		data &= 0xfffffffd;
4176		if (orig != data)
4177			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4178
4179		tmp = gfx_v7_0_halt_rlc(adev);
4180
4181		mutex_lock(&adev->grbm_idx_mutex);
4182		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4183		WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
4184		WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
4185		data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
4186			RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_0_MASK;
4187		WREG32(mmRLC_SERDES_WR_CTRL, data);
4188		mutex_unlock(&adev->grbm_idx_mutex);
4189
4190		gfx_v7_0_update_rlc(adev, tmp);
4191
4192		if (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_CGTS) {
4193			orig = data = RREG32(mmCGTS_SM_CTRL_REG);
4194			data &= ~CGTS_SM_CTRL_REG__SM_MODE_MASK;
4195			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
4196			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
4197			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
4198			if ((adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_MGLS) &&
4199			    (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_CGTS_LS))
4200				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
4201			data &= ~CGTS_SM_CTRL_REG__ON_MONITOR_ADD_MASK;
4202			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
4203			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
4204			if (orig != data)
4205				WREG32(mmCGTS_SM_CTRL_REG, data);
4206		}
4207	} else {
4208		orig = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4209		data |= 0x00000003;
4210		if (orig != data)
4211			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4212
4213		data = RREG32(mmRLC_MEM_SLP_CNTL);
4214		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4215			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4216			WREG32(mmRLC_MEM_SLP_CNTL, data);
4217		}
4218
4219		data = RREG32(mmCP_MEM_SLP_CNTL);
4220		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4221			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4222			WREG32(mmCP_MEM_SLP_CNTL, data);
4223		}
4224
4225		orig = data = RREG32(mmCGTS_SM_CTRL_REG);
4226		data |= CGTS_SM_CTRL_REG__OVERRIDE_MASK | CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
4227		if (orig != data)
4228			WREG32(mmCGTS_SM_CTRL_REG, data);
4229
4230		tmp = gfx_v7_0_halt_rlc(adev);
4231
4232		mutex_lock(&adev->grbm_idx_mutex);
4233		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4234		WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
4235		WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
4236		data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK;
4237		WREG32(mmRLC_SERDES_WR_CTRL, data);
4238		mutex_unlock(&adev->grbm_idx_mutex);
4239
4240		gfx_v7_0_update_rlc(adev, tmp);
4241	}
4242}
4243
4244static void gfx_v7_0_update_cg(struct amdgpu_device *adev,
4245			       bool enable)
4246{
4247	gfx_v7_0_enable_gui_idle_interrupt(adev, false);
4248	/* order matters! */
4249	if (enable) {
4250		gfx_v7_0_enable_mgcg(adev, true);
4251		gfx_v7_0_enable_cgcg(adev, true);
4252	} else {
4253		gfx_v7_0_enable_cgcg(adev, false);
4254		gfx_v7_0_enable_mgcg(adev, false);
4255	}
4256	gfx_v7_0_enable_gui_idle_interrupt(adev, true);
4257}
4258
4259static void gfx_v7_0_enable_sclk_slowdown_on_pu(struct amdgpu_device *adev,
4260						bool enable)
4261{
4262	u32 data, orig;
4263
4264	orig = data = RREG32(mmRLC_PG_CNTL);
4265	if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_RLC_SMU_HS))
4266		data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
4267	else
4268		data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
4269	if (orig != data)
4270		WREG32(mmRLC_PG_CNTL, data);
4271}
4272
4273static void gfx_v7_0_enable_sclk_slowdown_on_pd(struct amdgpu_device *adev,
4274						bool enable)
4275{
4276	u32 data, orig;
4277
4278	orig = data = RREG32(mmRLC_PG_CNTL);
4279	if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_RLC_SMU_HS))
4280		data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
4281	else
4282		data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
4283	if (orig != data)
4284		WREG32(mmRLC_PG_CNTL, data);
4285}
4286
4287static void gfx_v7_0_enable_cp_pg(struct amdgpu_device *adev, bool enable)
4288{
4289	u32 data, orig;
4290
4291	orig = data = RREG32(mmRLC_PG_CNTL);
4292	if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_CP))
4293		data &= ~0x8000;
4294	else
4295		data |= 0x8000;
4296	if (orig != data)
4297		WREG32(mmRLC_PG_CNTL, data);
4298}
4299
4300static void gfx_v7_0_enable_gds_pg(struct amdgpu_device *adev, bool enable)
4301{
4302	u32 data, orig;
4303
4304	orig = data = RREG32(mmRLC_PG_CNTL);
4305	if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_GDS))
4306		data &= ~0x2000;
4307	else
4308		data |= 0x2000;
4309	if (orig != data)
4310		WREG32(mmRLC_PG_CNTL, data);
4311}
4312
4313static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev)
4314{
4315	const __le32 *fw_data;
4316	volatile u32 *dst_ptr;
4317	int me, i, max_me = 4;
4318	u32 bo_offset = 0;
4319	u32 table_offset, table_size;
4320
4321	if (adev->asic_type == CHIP_KAVERI)
4322		max_me = 5;
4323
4324	if (adev->gfx.rlc.cp_table_ptr == NULL)
4325		return;
4326
4327	/* write the cp table buffer */
4328	dst_ptr = adev->gfx.rlc.cp_table_ptr;
4329	for (me = 0; me < max_me; me++) {
4330		if (me == 0) {
4331			const struct gfx_firmware_header_v1_0 *hdr =
4332				(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
4333			fw_data = (const __le32 *)
4334				(adev->gfx.ce_fw->data +
4335				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4336			table_offset = le32_to_cpu(hdr->jt_offset);
4337			table_size = le32_to_cpu(hdr->jt_size);
4338		} else if (me == 1) {
4339			const struct gfx_firmware_header_v1_0 *hdr =
4340				(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
4341			fw_data = (const __le32 *)
4342				(adev->gfx.pfp_fw->data +
4343				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4344			table_offset = le32_to_cpu(hdr->jt_offset);
4345			table_size = le32_to_cpu(hdr->jt_size);
4346		} else if (me == 2) {
4347			const struct gfx_firmware_header_v1_0 *hdr =
4348				(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
4349			fw_data = (const __le32 *)
4350				(adev->gfx.me_fw->data +
4351				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4352			table_offset = le32_to_cpu(hdr->jt_offset);
4353			table_size = le32_to_cpu(hdr->jt_size);
4354		} else if (me == 3) {
4355			const struct gfx_firmware_header_v1_0 *hdr =
4356				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4357			fw_data = (const __le32 *)
4358				(adev->gfx.mec_fw->data +
4359				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4360			table_offset = le32_to_cpu(hdr->jt_offset);
4361			table_size = le32_to_cpu(hdr->jt_size);
4362		} else {
4363			const struct gfx_firmware_header_v1_0 *hdr =
4364				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4365			fw_data = (const __le32 *)
4366				(adev->gfx.mec2_fw->data +
4367				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4368			table_offset = le32_to_cpu(hdr->jt_offset);
4369			table_size = le32_to_cpu(hdr->jt_size);
4370		}
4371
4372		for (i = 0; i < table_size; i ++) {
4373			dst_ptr[bo_offset + i] =
4374				cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
4375		}
4376
4377		bo_offset += table_size;
4378	}
4379}
4380
4381static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev,
4382				     bool enable)
4383{
4384	u32 data, orig;
4385
4386	if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PG)) {
4387		orig = data = RREG32(mmRLC_PG_CNTL);
4388		data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
4389		if (orig != data)
4390			WREG32(mmRLC_PG_CNTL, data);
4391
4392		orig = data = RREG32(mmRLC_AUTO_PG_CTRL);
4393		data |= RLC_AUTO_PG_CTRL__AUTO_PG_EN_MASK;
4394		if (orig != data)
4395			WREG32(mmRLC_AUTO_PG_CTRL, data);
4396	} else {
4397		orig = data = RREG32(mmRLC_PG_CNTL);
4398		data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
4399		if (orig != data)
4400			WREG32(mmRLC_PG_CNTL, data);
4401
4402		orig = data = RREG32(mmRLC_AUTO_PG_CTRL);
4403		data &= ~RLC_AUTO_PG_CTRL__AUTO_PG_EN_MASK;
4404		if (orig != data)
4405			WREG32(mmRLC_AUTO_PG_CTRL, data);
4406
4407		data = RREG32(mmDB_RENDER_CONTROL);
4408	}
4409}
4410
4411static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev,
4412					 u32 se, u32 sh)
4413{
4414	u32 mask = 0, tmp, tmp1;
4415	int i;
4416
4417	gfx_v7_0_select_se_sh(adev, se, sh);
4418	tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
4419	tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
4420	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4421
4422	tmp &= 0xffff0000;
4423
4424	tmp |= tmp1;
4425	tmp >>= 16;
4426
4427	for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) {
4428		mask <<= 1;
4429		mask |= 1;
4430	}
4431
4432	return (~tmp) & mask;
4433}
4434
4435static void gfx_v7_0_init_ao_cu_mask(struct amdgpu_device *adev)
4436{
4437	uint32_t tmp, active_cu_number;
4438	struct amdgpu_cu_info cu_info;
4439
4440	gfx_v7_0_get_cu_info(adev, &cu_info);
4441	tmp = cu_info.ao_cu_mask;
4442	active_cu_number = cu_info.number;
4443
4444	WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, tmp);
4445
4446	tmp = RREG32(mmRLC_MAX_PG_CU);
4447	tmp &= ~RLC_MAX_PG_CU__MAX_POWERED_UP_CU_MASK;
4448	tmp |= (active_cu_number << RLC_MAX_PG_CU__MAX_POWERED_UP_CU__SHIFT);
4449	WREG32(mmRLC_MAX_PG_CU, tmp);
4450}
4451
4452static void gfx_v7_0_enable_gfx_static_mgpg(struct amdgpu_device *adev,
4453					    bool enable)
4454{
4455	u32 data, orig;
4456
4457	orig = data = RREG32(mmRLC_PG_CNTL);
4458	if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_SMG))
4459		data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
4460	else
4461		data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
4462	if (orig != data)
4463		WREG32(mmRLC_PG_CNTL, data);
4464}
4465
4466static void gfx_v7_0_enable_gfx_dynamic_mgpg(struct amdgpu_device *adev,
4467					     bool enable)
4468{
4469	u32 data, orig;
4470
4471	orig = data = RREG32(mmRLC_PG_CNTL);
4472	if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_DMG))
4473		data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
4474	else
4475		data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
4476	if (orig != data)
4477		WREG32(mmRLC_PG_CNTL, data);
4478}
4479
4480#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
4481#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
4482
4483static void gfx_v7_0_init_gfx_cgpg(struct amdgpu_device *adev)
4484{
4485	u32 data, orig;
4486	u32 i;
4487
4488	if (adev->gfx.rlc.cs_data) {
4489		WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
4490		WREG32(mmRLC_GPM_SCRATCH_DATA, upper_32_bits(adev->gfx.rlc.clear_state_gpu_addr));
4491		WREG32(mmRLC_GPM_SCRATCH_DATA, lower_32_bits(adev->gfx.rlc.clear_state_gpu_addr));
4492		WREG32(mmRLC_GPM_SCRATCH_DATA, adev->gfx.rlc.clear_state_size);
4493	} else {
4494		WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
4495		for (i = 0; i < 3; i++)
4496			WREG32(mmRLC_GPM_SCRATCH_DATA, 0);
4497	}
4498	if (adev->gfx.rlc.reg_list) {
4499		WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
4500		for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
4501			WREG32(mmRLC_GPM_SCRATCH_DATA, adev->gfx.rlc.reg_list[i]);
4502	}
4503
4504	orig = data = RREG32(mmRLC_PG_CNTL);
4505	data |= RLC_PG_CNTL__GFX_POWER_GATING_SRC_MASK;
4506	if (orig != data)
4507		WREG32(mmRLC_PG_CNTL, data);
4508
4509	WREG32(mmRLC_SAVE_AND_RESTORE_BASE, adev->gfx.rlc.save_restore_gpu_addr >> 8);
4510	WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4511
4512	data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
4513	data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
4514	data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4515	WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
4516
4517	data = 0x10101010;
4518	WREG32(mmRLC_PG_DELAY, data);
4519
4520	data = RREG32(mmRLC_PG_DELAY_2);
4521	data &= ~0xff;
4522	data |= 0x3;
4523	WREG32(mmRLC_PG_DELAY_2, data);
4524
4525	data = RREG32(mmRLC_AUTO_PG_CTRL);
4526	data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
4527	data |= (0x700 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
4528	WREG32(mmRLC_AUTO_PG_CTRL, data);
4529
4530}
4531
4532static void gfx_v7_0_update_gfx_pg(struct amdgpu_device *adev, bool enable)
4533{
4534	gfx_v7_0_enable_gfx_cgpg(adev, enable);
4535	gfx_v7_0_enable_gfx_static_mgpg(adev, enable);
4536	gfx_v7_0_enable_gfx_dynamic_mgpg(adev, enable);
4537}
4538
4539static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev)
4540{
4541	u32 count = 0;
4542	const struct cs_section_def *sect = NULL;
4543	const struct cs_extent_def *ext = NULL;
4544
4545	if (adev->gfx.rlc.cs_data == NULL)
4546		return 0;
4547
4548	/* begin clear state */
4549	count += 2;
4550	/* context control state */
4551	count += 3;
4552
4553	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
4554		for (ext = sect->section; ext->extent != NULL; ++ext) {
4555			if (sect->id == SECT_CONTEXT)
4556				count += 2 + ext->reg_count;
4557			else
4558				return 0;
4559		}
4560	}
4561	/* pa_sc_raster_config/pa_sc_raster_config1 */
4562	count += 4;
4563	/* end clear state */
4564	count += 2;
4565	/* clear state */
4566	count += 2;
4567
4568	return count;
4569}
4570
4571static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev,
4572				    volatile u32 *buffer)
4573{
4574	u32 count = 0, i;
4575	const struct cs_section_def *sect = NULL;
4576	const struct cs_extent_def *ext = NULL;
4577
4578	if (adev->gfx.rlc.cs_data == NULL)
4579		return;
4580	if (buffer == NULL)
4581		return;
4582
4583	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4584	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4585
4586	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4587	buffer[count++] = cpu_to_le32(0x80000000);
4588	buffer[count++] = cpu_to_le32(0x80000000);
4589
4590	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
4591		for (ext = sect->section; ext->extent != NULL; ++ext) {
4592			if (sect->id == SECT_CONTEXT) {
4593				buffer[count++] =
4594					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
4595				buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4596				for (i = 0; i < ext->reg_count; i++)
4597					buffer[count++] = cpu_to_le32(ext->extent[i]);
4598			} else {
4599				return;
4600			}
4601		}
4602	}
4603
4604	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4605	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4606	switch (adev->asic_type) {
4607	case CHIP_BONAIRE:
4608		buffer[count++] = cpu_to_le32(0x16000012);
4609		buffer[count++] = cpu_to_le32(0x00000000);
4610		break;
4611	case CHIP_KAVERI:
4612		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
4613		buffer[count++] = cpu_to_le32(0x00000000);
4614		break;
4615	case CHIP_KABINI:
4616	case CHIP_MULLINS:
4617		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
4618		buffer[count++] = cpu_to_le32(0x00000000);
4619		break;
4620	case CHIP_HAWAII:
4621		buffer[count++] = cpu_to_le32(0x3a00161a);
4622		buffer[count++] = cpu_to_le32(0x0000002e);
4623		break;
4624	default:
4625		buffer[count++] = cpu_to_le32(0x00000000);
4626		buffer[count++] = cpu_to_le32(0x00000000);
4627		break;
4628	}
4629
4630	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4631	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
4632
4633	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
4634	buffer[count++] = cpu_to_le32(0);
4635}
4636
4637static void gfx_v7_0_init_pg(struct amdgpu_device *adev)
4638{
4639	if (adev->pg_flags & (AMDGPU_PG_SUPPORT_GFX_PG |
4640			      AMDGPU_PG_SUPPORT_GFX_SMG |
4641			      AMDGPU_PG_SUPPORT_GFX_DMG |
4642			      AMDGPU_PG_SUPPORT_CP |
4643			      AMDGPU_PG_SUPPORT_GDS |
4644			      AMDGPU_PG_SUPPORT_RLC_SMU_HS)) {
4645		gfx_v7_0_enable_sclk_slowdown_on_pu(adev, true);
4646		gfx_v7_0_enable_sclk_slowdown_on_pd(adev, true);
4647		if (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PG) {
4648			gfx_v7_0_init_gfx_cgpg(adev);
4649			gfx_v7_0_enable_cp_pg(adev, true);
4650			gfx_v7_0_enable_gds_pg(adev, true);
4651		}
4652		gfx_v7_0_init_ao_cu_mask(adev);
4653		gfx_v7_0_update_gfx_pg(adev, true);
4654	}
4655}
4656
4657static void gfx_v7_0_fini_pg(struct amdgpu_device *adev)
4658{
4659	if (adev->pg_flags & (AMDGPU_PG_SUPPORT_GFX_PG |
4660			      AMDGPU_PG_SUPPORT_GFX_SMG |
4661			      AMDGPU_PG_SUPPORT_GFX_DMG |
4662			      AMDGPU_PG_SUPPORT_CP |
4663			      AMDGPU_PG_SUPPORT_GDS |
4664			      AMDGPU_PG_SUPPORT_RLC_SMU_HS)) {
4665		gfx_v7_0_update_gfx_pg(adev, false);
4666		if (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PG) {
4667			gfx_v7_0_enable_cp_pg(adev, false);
4668			gfx_v7_0_enable_gds_pg(adev, false);
4669		}
4670	}
4671}
4672
4673/**
4674 * gfx_v7_0_get_gpu_clock_counter - return GPU clock counter snapshot
4675 *
4676 * @adev: amdgpu_device pointer
4677 *
4678 * Fetches a GPU clock counter snapshot (SI).
4679 * Returns the 64 bit clock counter snapshot.
4680 */
4681uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4682{
4683	uint64_t clock;
4684
4685	mutex_lock(&adev->gfx.gpu_clock_mutex);
4686	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4687	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4688		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4689	mutex_unlock(&adev->gfx.gpu_clock_mutex);
4690	return clock;
4691}
4692
4693static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4694					  uint32_t vmid,
4695					  uint32_t gds_base, uint32_t gds_size,
4696					  uint32_t gws_base, uint32_t gws_size,
4697					  uint32_t oa_base, uint32_t oa_size)
4698{
4699	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4700	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4701
4702	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4703	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4704
4705	oa_base = oa_base >> AMDGPU_OA_SHIFT;
4706	oa_size = oa_size >> AMDGPU_OA_SHIFT;
4707
4708	/* GDS Base */
4709	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4710	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4711				WRITE_DATA_DST_SEL(0)));
4712	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4713	amdgpu_ring_write(ring, 0);
4714	amdgpu_ring_write(ring, gds_base);
4715
4716	/* GDS Size */
4717	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4718	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4719				WRITE_DATA_DST_SEL(0)));
4720	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4721	amdgpu_ring_write(ring, 0);
4722	amdgpu_ring_write(ring, gds_size);
4723
4724	/* GWS */
4725	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4726	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4727				WRITE_DATA_DST_SEL(0)));
4728	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4729	amdgpu_ring_write(ring, 0);
4730	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4731
4732	/* OA */
4733	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4734	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4735				WRITE_DATA_DST_SEL(0)));
4736	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4737	amdgpu_ring_write(ring, 0);
4738	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4739}
4740
4741static int gfx_v7_0_early_init(void *handle)
4742{
4743	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4744
4745	adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
4746	adev->gfx.num_compute_rings = GFX7_NUM_COMPUTE_RINGS;
4747	gfx_v7_0_set_ring_funcs(adev);
4748	gfx_v7_0_set_irq_funcs(adev);
4749	gfx_v7_0_set_gds_init(adev);
4750
4751	return 0;
4752}
4753
4754static int gfx_v7_0_sw_init(void *handle)
4755{
4756	struct amdgpu_ring *ring;
4757	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4758	int i, r;
4759
4760	/* EOP Event */
4761	r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
4762	if (r)
4763		return r;
4764
4765	/* Privileged reg */
4766	r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
4767	if (r)
4768		return r;
4769
4770	/* Privileged inst */
4771	r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
4772	if (r)
4773		return r;
4774
4775	gfx_v7_0_scratch_init(adev);
4776
4777	r = gfx_v7_0_init_microcode(adev);
4778	if (r) {
4779		DRM_ERROR("Failed to load gfx firmware!\n");
4780		return r;
4781	}
4782
4783	r = gfx_v7_0_rlc_init(adev);
4784	if (r) {
4785		DRM_ERROR("Failed to init rlc BOs!\n");
4786		return r;
4787	}
4788
4789	/* allocate mec buffers */
4790	r = gfx_v7_0_mec_init(adev);
4791	if (r) {
4792		DRM_ERROR("Failed to init MEC BOs!\n");
4793		return r;
4794	}
4795
4796	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4797		ring = &adev->gfx.gfx_ring[i];
4798		ring->ring_obj = NULL;
4799		sprintf(ring->name, "gfx");
4800		r = amdgpu_ring_init(adev, ring, 1024 * 1024,
4801				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
4802				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
4803				     AMDGPU_RING_TYPE_GFX);
4804		if (r)
4805			return r;
4806	}
4807
4808	/* set up the compute queues */
4809	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4810		unsigned irq_type;
4811
4812		/* max 32 queues per MEC */
4813		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
4814			DRM_ERROR("Too many (%d) compute rings!\n", i);
4815			break;
4816		}
4817		ring = &adev->gfx.compute_ring[i];
4818		ring->ring_obj = NULL;
4819		ring->use_doorbell = true;
4820		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
4821		ring->me = 1; /* first MEC */
4822		ring->pipe = i / 8;
4823		ring->queue = i % 8;
4824		sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
4825		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
4826		/* type-2 packets are deprecated on MEC, use type-3 instead */
4827		r = amdgpu_ring_init(adev, ring, 1024 * 1024,
4828				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
4829				     &adev->gfx.eop_irq, irq_type,
4830				     AMDGPU_RING_TYPE_COMPUTE);
4831		if (r)
4832			return r;
4833	}
4834
4835	/* reserve GDS, GWS and OA resource for gfx */
4836	r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
4837			PAGE_SIZE, true,
4838			AMDGPU_GEM_DOMAIN_GDS, 0,
4839			NULL, NULL, &adev->gds.gds_gfx_bo);
4840	if (r)
4841		return r;
4842
4843	r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
4844		PAGE_SIZE, true,
4845		AMDGPU_GEM_DOMAIN_GWS, 0,
4846		NULL, NULL, &adev->gds.gws_gfx_bo);
4847	if (r)
4848		return r;
4849
4850	r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
4851			PAGE_SIZE, true,
4852			AMDGPU_GEM_DOMAIN_OA, 0,
4853			NULL, NULL, &adev->gds.oa_gfx_bo);
4854	if (r)
4855		return r;
4856
4857	return r;
4858}
4859
4860static int gfx_v7_0_sw_fini(void *handle)
4861{
4862	int i;
4863	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4864
4865	amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
4866	amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
4867	amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
4868
4869	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4870		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
4871	for (i = 0; i < adev->gfx.num_compute_rings; i++)
4872		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
4873
4874	gfx_v7_0_cp_compute_fini(adev);
4875	gfx_v7_0_rlc_fini(adev);
4876	gfx_v7_0_mec_fini(adev);
4877
4878	return 0;
4879}
4880
4881static int gfx_v7_0_hw_init(void *handle)
4882{
4883	int r;
4884	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4885
4886	gfx_v7_0_gpu_init(adev);
4887
4888	/* init rlc */
4889	r = gfx_v7_0_rlc_resume(adev);
4890	if (r)
4891		return r;
4892
4893	r = gfx_v7_0_cp_resume(adev);
4894	if (r)
4895		return r;
4896
4897	adev->gfx.ce_ram_size = 0x8000;
4898
4899	return r;
4900}
4901
4902static int gfx_v7_0_hw_fini(void *handle)
4903{
4904	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4905
4906	gfx_v7_0_cp_enable(adev, false);
4907	gfx_v7_0_rlc_stop(adev);
4908	gfx_v7_0_fini_pg(adev);
4909
4910	return 0;
4911}
4912
4913static int gfx_v7_0_suspend(void *handle)
4914{
4915	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4916
4917	return gfx_v7_0_hw_fini(adev);
4918}
4919
4920static int gfx_v7_0_resume(void *handle)
4921{
4922	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4923
4924	return gfx_v7_0_hw_init(adev);
4925}
4926
4927static bool gfx_v7_0_is_idle(void *handle)
4928{
4929	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4930
4931	if (RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK)
4932		return false;
4933	else
4934		return true;
4935}
4936
4937static int gfx_v7_0_wait_for_idle(void *handle)
4938{
4939	unsigned i;
4940	u32 tmp;
4941	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4942
4943	for (i = 0; i < adev->usec_timeout; i++) {
4944		/* read MC_STATUS */
4945		tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
4946
4947		if (!tmp)
4948			return 0;
4949		udelay(1);
4950	}
4951	return -ETIMEDOUT;
4952}
4953
4954static void gfx_v7_0_print_status(void *handle)
4955{
4956	int i;
4957	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4958
4959	dev_info(adev->dev, "GFX 7.x registers\n");
4960	dev_info(adev->dev, "  GRBM_STATUS=0x%08X\n",
4961		RREG32(mmGRBM_STATUS));
4962	dev_info(adev->dev, "  GRBM_STATUS2=0x%08X\n",
4963		RREG32(mmGRBM_STATUS2));
4964	dev_info(adev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4965		RREG32(mmGRBM_STATUS_SE0));
4966	dev_info(adev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4967		RREG32(mmGRBM_STATUS_SE1));
4968	dev_info(adev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4969		RREG32(mmGRBM_STATUS_SE2));
4970	dev_info(adev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4971		RREG32(mmGRBM_STATUS_SE3));
4972	dev_info(adev->dev, "  CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
4973	dev_info(adev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4974		 RREG32(mmCP_STALLED_STAT1));
4975	dev_info(adev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4976		 RREG32(mmCP_STALLED_STAT2));
4977	dev_info(adev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4978		 RREG32(mmCP_STALLED_STAT3));
4979	dev_info(adev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4980		 RREG32(mmCP_CPF_BUSY_STAT));
4981	dev_info(adev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4982		 RREG32(mmCP_CPF_STALLED_STAT1));
4983	dev_info(adev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
4984	dev_info(adev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
4985	dev_info(adev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4986		 RREG32(mmCP_CPC_STALLED_STAT1));
4987	dev_info(adev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
4988
4989	for (i = 0; i < 32; i++) {
4990		dev_info(adev->dev, "  GB_TILE_MODE%d=0x%08X\n",
4991			 i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
4992	}
4993	for (i = 0; i < 16; i++) {
4994		dev_info(adev->dev, "  GB_MACROTILE_MODE%d=0x%08X\n",
4995			 i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
4996	}
4997	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
4998		dev_info(adev->dev, "  se: %d\n", i);
4999		gfx_v7_0_select_se_sh(adev, i, 0xffffffff);
5000		dev_info(adev->dev, "  PA_SC_RASTER_CONFIG=0x%08X\n",
5001			 RREG32(mmPA_SC_RASTER_CONFIG));
5002		dev_info(adev->dev, "  PA_SC_RASTER_CONFIG_1=0x%08X\n",
5003			 RREG32(mmPA_SC_RASTER_CONFIG_1));
5004	}
5005	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5006
5007	dev_info(adev->dev, "  GB_ADDR_CONFIG=0x%08X\n",
5008		 RREG32(mmGB_ADDR_CONFIG));
5009	dev_info(adev->dev, "  HDP_ADDR_CONFIG=0x%08X\n",
5010		 RREG32(mmHDP_ADDR_CONFIG));
5011	dev_info(adev->dev, "  DMIF_ADDR_CALC=0x%08X\n",
5012		 RREG32(mmDMIF_ADDR_CALC));
5013	dev_info(adev->dev, "  SDMA0_TILING_CONFIG=0x%08X\n",
5014		 RREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET));
5015	dev_info(adev->dev, "  SDMA1_TILING_CONFIG=0x%08X\n",
5016		 RREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET));
5017	dev_info(adev->dev, "  UVD_UDEC_ADDR_CONFIG=0x%08X\n",
5018		 RREG32(mmUVD_UDEC_ADDR_CONFIG));
5019	dev_info(adev->dev, "  UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
5020		 RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
5021	dev_info(adev->dev, "  UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
5022		 RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
5023
5024	dev_info(adev->dev, "  CP_MEQ_THRESHOLDS=0x%08X\n",
5025		 RREG32(mmCP_MEQ_THRESHOLDS));
5026	dev_info(adev->dev, "  SX_DEBUG_1=0x%08X\n",
5027		 RREG32(mmSX_DEBUG_1));
5028	dev_info(adev->dev, "  TA_CNTL_AUX=0x%08X\n",
5029		 RREG32(mmTA_CNTL_AUX));
5030	dev_info(adev->dev, "  SPI_CONFIG_CNTL=0x%08X\n",
5031		 RREG32(mmSPI_CONFIG_CNTL));
5032	dev_info(adev->dev, "  SQ_CONFIG=0x%08X\n",
5033		 RREG32(mmSQ_CONFIG));
5034	dev_info(adev->dev, "  DB_DEBUG=0x%08X\n",
5035		 RREG32(mmDB_DEBUG));
5036	dev_info(adev->dev, "  DB_DEBUG2=0x%08X\n",
5037		 RREG32(mmDB_DEBUG2));
5038	dev_info(adev->dev, "  DB_DEBUG3=0x%08X\n",
5039		 RREG32(mmDB_DEBUG3));
5040	dev_info(adev->dev, "  CB_HW_CONTROL=0x%08X\n",
5041		 RREG32(mmCB_HW_CONTROL));
5042	dev_info(adev->dev, "  SPI_CONFIG_CNTL_1=0x%08X\n",
5043		 RREG32(mmSPI_CONFIG_CNTL_1));
5044	dev_info(adev->dev, "  PA_SC_FIFO_SIZE=0x%08X\n",
5045		 RREG32(mmPA_SC_FIFO_SIZE));
5046	dev_info(adev->dev, "  VGT_NUM_INSTANCES=0x%08X\n",
5047		 RREG32(mmVGT_NUM_INSTANCES));
5048	dev_info(adev->dev, "  CP_PERFMON_CNTL=0x%08X\n",
5049		 RREG32(mmCP_PERFMON_CNTL));
5050	dev_info(adev->dev, "  PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
5051		 RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
5052	dev_info(adev->dev, "  VGT_CACHE_INVALIDATION=0x%08X\n",
5053		 RREG32(mmVGT_CACHE_INVALIDATION));
5054	dev_info(adev->dev, "  VGT_GS_VERTEX_REUSE=0x%08X\n",
5055		 RREG32(mmVGT_GS_VERTEX_REUSE));
5056	dev_info(adev->dev, "  PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
5057		 RREG32(mmPA_SC_LINE_STIPPLE_STATE));
5058	dev_info(adev->dev, "  PA_CL_ENHANCE=0x%08X\n",
5059		 RREG32(mmPA_CL_ENHANCE));
5060	dev_info(adev->dev, "  PA_SC_ENHANCE=0x%08X\n",
5061		 RREG32(mmPA_SC_ENHANCE));
5062
5063	dev_info(adev->dev, "  CP_ME_CNTL=0x%08X\n",
5064		 RREG32(mmCP_ME_CNTL));
5065	dev_info(adev->dev, "  CP_MAX_CONTEXT=0x%08X\n",
5066		 RREG32(mmCP_MAX_CONTEXT));
5067	dev_info(adev->dev, "  CP_ENDIAN_SWAP=0x%08X\n",
5068		 RREG32(mmCP_ENDIAN_SWAP));
5069	dev_info(adev->dev, "  CP_DEVICE_ID=0x%08X\n",
5070		 RREG32(mmCP_DEVICE_ID));
5071
5072	dev_info(adev->dev, "  CP_SEM_WAIT_TIMER=0x%08X\n",
5073		 RREG32(mmCP_SEM_WAIT_TIMER));
5074	if (adev->asic_type != CHIP_HAWAII)
5075		dev_info(adev->dev, "  CP_SEM_INCOMPLETE_TIMER_CNTL=0x%08X\n",
5076			 RREG32(mmCP_SEM_INCOMPLETE_TIMER_CNTL));
5077
5078	dev_info(adev->dev, "  CP_RB_WPTR_DELAY=0x%08X\n",
5079		 RREG32(mmCP_RB_WPTR_DELAY));
5080	dev_info(adev->dev, "  CP_RB_VMID=0x%08X\n",
5081		 RREG32(mmCP_RB_VMID));
5082	dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
5083		 RREG32(mmCP_RB0_CNTL));
5084	dev_info(adev->dev, "  CP_RB0_WPTR=0x%08X\n",
5085		 RREG32(mmCP_RB0_WPTR));
5086	dev_info(adev->dev, "  CP_RB0_RPTR_ADDR=0x%08X\n",
5087		 RREG32(mmCP_RB0_RPTR_ADDR));
5088	dev_info(adev->dev, "  CP_RB0_RPTR_ADDR_HI=0x%08X\n",
5089		 RREG32(mmCP_RB0_RPTR_ADDR_HI));
5090	dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
5091		 RREG32(mmCP_RB0_CNTL));
5092	dev_info(adev->dev, "  CP_RB0_BASE=0x%08X\n",
5093		 RREG32(mmCP_RB0_BASE));
5094	dev_info(adev->dev, "  CP_RB0_BASE_HI=0x%08X\n",
5095		 RREG32(mmCP_RB0_BASE_HI));
5096	dev_info(adev->dev, "  CP_MEC_CNTL=0x%08X\n",
5097		 RREG32(mmCP_MEC_CNTL));
5098	dev_info(adev->dev, "  CP_CPF_DEBUG=0x%08X\n",
5099		 RREG32(mmCP_CPF_DEBUG));
5100
5101	dev_info(adev->dev, "  SCRATCH_ADDR=0x%08X\n",
5102		 RREG32(mmSCRATCH_ADDR));
5103	dev_info(adev->dev, "  SCRATCH_UMSK=0x%08X\n",
5104		 RREG32(mmSCRATCH_UMSK));
5105
5106	/* init the pipes */
5107	mutex_lock(&adev->srbm_mutex);
5108	for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
5109		int me = (i < 4) ? 1 : 2;
5110		int pipe = (i < 4) ? i : (i - 4);
5111		int queue;
5112
5113		dev_info(adev->dev, "  me: %d, pipe: %d\n", me, pipe);
5114		cik_srbm_select(adev, me, pipe, 0, 0);
5115		dev_info(adev->dev, "  CP_HPD_EOP_BASE_ADDR=0x%08X\n",
5116			 RREG32(mmCP_HPD_EOP_BASE_ADDR));
5117		dev_info(adev->dev, "  CP_HPD_EOP_BASE_ADDR_HI=0x%08X\n",
5118			 RREG32(mmCP_HPD_EOP_BASE_ADDR_HI));
5119		dev_info(adev->dev, "  CP_HPD_EOP_VMID=0x%08X\n",
5120			 RREG32(mmCP_HPD_EOP_VMID));
5121		dev_info(adev->dev, "  CP_HPD_EOP_CONTROL=0x%08X\n",
5122			 RREG32(mmCP_HPD_EOP_CONTROL));
5123
5124		for (queue = 0; queue < 8; queue++) {
5125			cik_srbm_select(adev, me, pipe, queue, 0);
5126			dev_info(adev->dev, "  queue: %d\n", queue);
5127			dev_info(adev->dev, "  CP_PQ_WPTR_POLL_CNTL=0x%08X\n",
5128				 RREG32(mmCP_PQ_WPTR_POLL_CNTL));
5129			dev_info(adev->dev, "  CP_HQD_PQ_DOORBELL_CONTROL=0x%08X\n",
5130				 RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL));
5131			dev_info(adev->dev, "  CP_HQD_ACTIVE=0x%08X\n",
5132				 RREG32(mmCP_HQD_ACTIVE));
5133			dev_info(adev->dev, "  CP_HQD_DEQUEUE_REQUEST=0x%08X\n",
5134				 RREG32(mmCP_HQD_DEQUEUE_REQUEST));
5135			dev_info(adev->dev, "  CP_HQD_PQ_RPTR=0x%08X\n",
5136				 RREG32(mmCP_HQD_PQ_RPTR));
5137			dev_info(adev->dev, "  CP_HQD_PQ_WPTR=0x%08X\n",
5138				 RREG32(mmCP_HQD_PQ_WPTR));
5139			dev_info(adev->dev, "  CP_HQD_PQ_BASE=0x%08X\n",
5140				 RREG32(mmCP_HQD_PQ_BASE));
5141			dev_info(adev->dev, "  CP_HQD_PQ_BASE_HI=0x%08X\n",
5142				 RREG32(mmCP_HQD_PQ_BASE_HI));
5143			dev_info(adev->dev, "  CP_HQD_PQ_CONTROL=0x%08X\n",
5144				 RREG32(mmCP_HQD_PQ_CONTROL));
5145			dev_info(adev->dev, "  CP_HQD_PQ_WPTR_POLL_ADDR=0x%08X\n",
5146				 RREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR));
5147			dev_info(adev->dev, "  CP_HQD_PQ_WPTR_POLL_ADDR_HI=0x%08X\n",
5148				 RREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI));
5149			dev_info(adev->dev, "  CP_HQD_PQ_RPTR_REPORT_ADDR=0x%08X\n",
5150				 RREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR));
5151			dev_info(adev->dev, "  CP_HQD_PQ_RPTR_REPORT_ADDR_HI=0x%08X\n",
5152				 RREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI));
5153			dev_info(adev->dev, "  CP_HQD_PQ_DOORBELL_CONTROL=0x%08X\n",
5154				 RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL));
5155			dev_info(adev->dev, "  CP_HQD_PQ_WPTR=0x%08X\n",
5156				 RREG32(mmCP_HQD_PQ_WPTR));
5157			dev_info(adev->dev, "  CP_HQD_VMID=0x%08X\n",
5158				 RREG32(mmCP_HQD_VMID));
5159			dev_info(adev->dev, "  CP_MQD_BASE_ADDR=0x%08X\n",
5160				 RREG32(mmCP_MQD_BASE_ADDR));
5161			dev_info(adev->dev, "  CP_MQD_BASE_ADDR_HI=0x%08X\n",
5162				 RREG32(mmCP_MQD_BASE_ADDR_HI));
5163			dev_info(adev->dev, "  CP_MQD_CONTROL=0x%08X\n",
5164				 RREG32(mmCP_MQD_CONTROL));
5165		}
5166	}
5167	cik_srbm_select(adev, 0, 0, 0, 0);
5168	mutex_unlock(&adev->srbm_mutex);
5169
5170	dev_info(adev->dev, "  CP_INT_CNTL_RING0=0x%08X\n",
5171		 RREG32(mmCP_INT_CNTL_RING0));
5172	dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
5173		 RREG32(mmRLC_LB_CNTL));
5174	dev_info(adev->dev, "  RLC_CNTL=0x%08X\n",
5175		 RREG32(mmRLC_CNTL));
5176	dev_info(adev->dev, "  RLC_CGCG_CGLS_CTRL=0x%08X\n",
5177		 RREG32(mmRLC_CGCG_CGLS_CTRL));
5178	dev_info(adev->dev, "  RLC_LB_CNTR_INIT=0x%08X\n",
5179		 RREG32(mmRLC_LB_CNTR_INIT));
5180	dev_info(adev->dev, "  RLC_LB_CNTR_MAX=0x%08X\n",
5181		 RREG32(mmRLC_LB_CNTR_MAX));
5182	dev_info(adev->dev, "  RLC_LB_INIT_CU_MASK=0x%08X\n",
5183		 RREG32(mmRLC_LB_INIT_CU_MASK));
5184	dev_info(adev->dev, "  RLC_LB_PARAMS=0x%08X\n",
5185		 RREG32(mmRLC_LB_PARAMS));
5186	dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
5187		 RREG32(mmRLC_LB_CNTL));
5188	dev_info(adev->dev, "  RLC_MC_CNTL=0x%08X\n",
5189		 RREG32(mmRLC_MC_CNTL));
5190	dev_info(adev->dev, "  RLC_UCODE_CNTL=0x%08X\n",
5191		 RREG32(mmRLC_UCODE_CNTL));
5192
5193	if (adev->asic_type == CHIP_BONAIRE)
5194		dev_info(adev->dev, "  RLC_DRIVER_CPDMA_STATUS=0x%08X\n",
5195			 RREG32(mmRLC_DRIVER_CPDMA_STATUS));
5196
5197	mutex_lock(&adev->srbm_mutex);
5198	for (i = 0; i < 16; i++) {
5199		cik_srbm_select(adev, 0, 0, 0, i);
5200		dev_info(adev->dev, "  VM %d:\n", i);
5201		dev_info(adev->dev, "  SH_MEM_CONFIG=0x%08X\n",
5202			 RREG32(mmSH_MEM_CONFIG));
5203		dev_info(adev->dev, "  SH_MEM_APE1_BASE=0x%08X\n",
5204			 RREG32(mmSH_MEM_APE1_BASE));
5205		dev_info(adev->dev, "  SH_MEM_APE1_LIMIT=0x%08X\n",
5206			 RREG32(mmSH_MEM_APE1_LIMIT));
5207		dev_info(adev->dev, "  SH_MEM_BASES=0x%08X\n",
5208			 RREG32(mmSH_MEM_BASES));
5209	}
5210	cik_srbm_select(adev, 0, 0, 0, 0);
5211	mutex_unlock(&adev->srbm_mutex);
5212}
5213
5214static int gfx_v7_0_soft_reset(void *handle)
5215{
5216	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5217	u32 tmp;
5218	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5219
5220	/* GRBM_STATUS */
5221	tmp = RREG32(mmGRBM_STATUS);
5222	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5223		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5224		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5225		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5226		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5227		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK))
5228		grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK |
5229			GRBM_SOFT_RESET__SOFT_RESET_GFX_MASK;
5230
5231	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5232		grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK;
5233		srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
5234	}
5235
5236	/* GRBM_STATUS2 */
5237	tmp = RREG32(mmGRBM_STATUS2);
5238	if (tmp & GRBM_STATUS2__RLC_BUSY_MASK)
5239		grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
5240
5241	/* SRBM_STATUS */
5242	tmp = RREG32(mmSRBM_STATUS);
5243	if (tmp & SRBM_STATUS__GRBM_RQ_PENDING_MASK)
5244		srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
5245
5246	if (grbm_soft_reset || srbm_soft_reset) {
5247		gfx_v7_0_print_status((void *)adev);
5248		/* disable CG/PG */
5249		gfx_v7_0_fini_pg(adev);
5250		gfx_v7_0_update_cg(adev, false);
5251
5252		/* stop the rlc */
5253		gfx_v7_0_rlc_stop(adev);
5254
5255		/* Disable GFX parsing/prefetching */
5256		WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK);
5257
5258		/* Disable MEC parsing/prefetching */
5259		WREG32(mmCP_MEC_CNTL, CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK);
5260
5261		if (grbm_soft_reset) {
5262			tmp = RREG32(mmGRBM_SOFT_RESET);
5263			tmp |= grbm_soft_reset;
5264			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5265			WREG32(mmGRBM_SOFT_RESET, tmp);
5266			tmp = RREG32(mmGRBM_SOFT_RESET);
5267
5268			udelay(50);
5269
5270			tmp &= ~grbm_soft_reset;
5271			WREG32(mmGRBM_SOFT_RESET, tmp);
5272			tmp = RREG32(mmGRBM_SOFT_RESET);
5273		}
5274
5275		if (srbm_soft_reset) {
5276			tmp = RREG32(mmSRBM_SOFT_RESET);
5277			tmp |= srbm_soft_reset;
5278			dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5279			WREG32(mmSRBM_SOFT_RESET, tmp);
5280			tmp = RREG32(mmSRBM_SOFT_RESET);
5281
5282			udelay(50);
5283
5284			tmp &= ~srbm_soft_reset;
5285			WREG32(mmSRBM_SOFT_RESET, tmp);
5286			tmp = RREG32(mmSRBM_SOFT_RESET);
5287		}
5288		/* Wait a little for things to settle down */
5289		udelay(50);
5290		gfx_v7_0_print_status((void *)adev);
5291	}
5292	return 0;
5293}
5294
5295static void gfx_v7_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5296						 enum amdgpu_interrupt_state state)
5297{
5298	u32 cp_int_cntl;
5299
5300	switch (state) {
5301	case AMDGPU_IRQ_STATE_DISABLE:
5302		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5303		cp_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
5304		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5305		break;
5306	case AMDGPU_IRQ_STATE_ENABLE:
5307		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5308		cp_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
5309		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5310		break;
5311	default:
5312		break;
5313	}
5314}
5315
5316static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5317						     int me, int pipe,
5318						     enum amdgpu_interrupt_state state)
5319{
5320	u32 mec_int_cntl, mec_int_cntl_reg;
5321
5322	/*
5323	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
5324	 * handles the setting of interrupts for this specific pipe. All other
5325	 * pipes' interrupts are set by amdkfd.
5326	 */
5327
5328	if (me == 1) {
5329		switch (pipe) {
5330		case 0:
5331			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
5332			break;
5333		default:
5334			DRM_DEBUG("invalid pipe %d\n", pipe);
5335			return;
5336		}
5337	} else {
5338		DRM_DEBUG("invalid me %d\n", me);
5339		return;
5340	}
5341
5342	switch (state) {
5343	case AMDGPU_IRQ_STATE_DISABLE:
5344		mec_int_cntl = RREG32(mec_int_cntl_reg);
5345		mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
5346		WREG32(mec_int_cntl_reg, mec_int_cntl);
5347		break;
5348	case AMDGPU_IRQ_STATE_ENABLE:
5349		mec_int_cntl = RREG32(mec_int_cntl_reg);
5350		mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
5351		WREG32(mec_int_cntl_reg, mec_int_cntl);
5352		break;
5353	default:
5354		break;
5355	}
5356}
5357
5358static int gfx_v7_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5359					     struct amdgpu_irq_src *src,
5360					     unsigned type,
5361					     enum amdgpu_interrupt_state state)
5362{
5363	u32 cp_int_cntl;
5364
5365	switch (state) {
5366	case AMDGPU_IRQ_STATE_DISABLE:
5367		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5368		cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
5369		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5370		break;
5371	case AMDGPU_IRQ_STATE_ENABLE:
5372		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5373		cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
5374		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5375		break;
5376	default:
5377		break;
5378	}
5379
5380	return 0;
5381}
5382
5383static int gfx_v7_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5384					      struct amdgpu_irq_src *src,
5385					      unsigned type,
5386					      enum amdgpu_interrupt_state state)
5387{
5388	u32 cp_int_cntl;
5389
5390	switch (state) {
5391	case AMDGPU_IRQ_STATE_DISABLE:
5392		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5393		cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK;
5394		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5395		break;
5396	case AMDGPU_IRQ_STATE_ENABLE:
5397		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5398		cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK;
5399		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5400		break;
5401	default:
5402		break;
5403	}
5404
5405	return 0;
5406}
5407
5408static int gfx_v7_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5409					    struct amdgpu_irq_src *src,
5410					    unsigned type,
5411					    enum amdgpu_interrupt_state state)
5412{
5413	switch (type) {
5414	case AMDGPU_CP_IRQ_GFX_EOP:
5415		gfx_v7_0_set_gfx_eop_interrupt_state(adev, state);
5416		break;
5417	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5418		gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5419		break;
5420	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5421		gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5422		break;
5423	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5424		gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5425		break;
5426	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5427		gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5428		break;
5429	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5430		gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5431		break;
5432	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5433		gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5434		break;
5435	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5436		gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5437		break;
5438	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5439		gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5440		break;
5441	default:
5442		break;
5443	}
5444	return 0;
5445}
5446
5447static int gfx_v7_0_eop_irq(struct amdgpu_device *adev,
5448			    struct amdgpu_irq_src *source,
5449			    struct amdgpu_iv_entry *entry)
5450{
5451	u8 me_id, pipe_id;
5452	struct amdgpu_ring *ring;
5453	int i;
5454
5455	DRM_DEBUG("IH: CP EOP\n");
5456	me_id = (entry->ring_id & 0x0c) >> 2;
5457	pipe_id = (entry->ring_id & 0x03) >> 0;
5458	switch (me_id) {
5459	case 0:
5460		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5461		break;
5462	case 1:
5463	case 2:
5464		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5465			ring = &adev->gfx.compute_ring[i];
5466			if ((ring->me == me_id) & (ring->pipe == pipe_id))
5467				amdgpu_fence_process(ring);
5468		}
5469		break;
5470	}
5471	return 0;
5472}
5473
5474static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev,
5475				 struct amdgpu_irq_src *source,
5476				 struct amdgpu_iv_entry *entry)
5477{
5478	DRM_ERROR("Illegal register access in command stream\n");
5479	schedule_work(&adev->reset_work);
5480	return 0;
5481}
5482
5483static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev,
5484				  struct amdgpu_irq_src *source,
5485				  struct amdgpu_iv_entry *entry)
5486{
5487	DRM_ERROR("Illegal instruction in command stream\n");
5488	// XXX soft reset the gfx block only
5489	schedule_work(&adev->reset_work);
5490	return 0;
5491}
5492
5493static int gfx_v7_0_set_clockgating_state(void *handle,
5494					  enum amd_clockgating_state state)
5495{
5496	bool gate = false;
5497	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5498
5499	if (state == AMD_CG_STATE_GATE)
5500		gate = true;
5501
5502	gfx_v7_0_enable_gui_idle_interrupt(adev, false);
5503	/* order matters! */
5504	if (gate) {
5505		gfx_v7_0_enable_mgcg(adev, true);
5506		gfx_v7_0_enable_cgcg(adev, true);
5507	} else {
5508		gfx_v7_0_enable_cgcg(adev, false);
5509		gfx_v7_0_enable_mgcg(adev, false);
5510	}
5511	gfx_v7_0_enable_gui_idle_interrupt(adev, true);
5512
5513	return 0;
5514}
5515
5516static int gfx_v7_0_set_powergating_state(void *handle,
5517					  enum amd_powergating_state state)
5518{
5519	bool gate = false;
5520	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5521
5522	if (state == AMD_PG_STATE_GATE)
5523		gate = true;
5524
5525	if (adev->pg_flags & (AMDGPU_PG_SUPPORT_GFX_PG |
5526			      AMDGPU_PG_SUPPORT_GFX_SMG |
5527			      AMDGPU_PG_SUPPORT_GFX_DMG |
5528			      AMDGPU_PG_SUPPORT_CP |
5529			      AMDGPU_PG_SUPPORT_GDS |
5530			      AMDGPU_PG_SUPPORT_RLC_SMU_HS)) {
5531		gfx_v7_0_update_gfx_pg(adev, gate);
5532		if (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PG) {
5533			gfx_v7_0_enable_cp_pg(adev, gate);
5534			gfx_v7_0_enable_gds_pg(adev, gate);
5535		}
5536	}
5537
5538	return 0;
5539}
5540
5541const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
5542	.early_init = gfx_v7_0_early_init,
5543	.late_init = NULL,
5544	.sw_init = gfx_v7_0_sw_init,
5545	.sw_fini = gfx_v7_0_sw_fini,
5546	.hw_init = gfx_v7_0_hw_init,
5547	.hw_fini = gfx_v7_0_hw_fini,
5548	.suspend = gfx_v7_0_suspend,
5549	.resume = gfx_v7_0_resume,
5550	.is_idle = gfx_v7_0_is_idle,
5551	.wait_for_idle = gfx_v7_0_wait_for_idle,
5552	.soft_reset = gfx_v7_0_soft_reset,
5553	.print_status = gfx_v7_0_print_status,
5554	.set_clockgating_state = gfx_v7_0_set_clockgating_state,
5555	.set_powergating_state = gfx_v7_0_set_powergating_state,
5556};
5557
5558static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
5559	.get_rptr = gfx_v7_0_ring_get_rptr_gfx,
5560	.get_wptr = gfx_v7_0_ring_get_wptr_gfx,
5561	.set_wptr = gfx_v7_0_ring_set_wptr_gfx,
5562	.parse_cs = NULL,
5563	.emit_ib = gfx_v7_0_ring_emit_ib_gfx,
5564	.emit_fence = gfx_v7_0_ring_emit_fence_gfx,
5565	.emit_semaphore = gfx_v7_0_ring_emit_semaphore,
5566	.emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
5567	.emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
5568	.emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
5569	.test_ring = gfx_v7_0_ring_test_ring,
5570	.test_ib = gfx_v7_0_ring_test_ib,
5571	.insert_nop = amdgpu_ring_insert_nop,
5572};
5573
5574static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
5575	.get_rptr = gfx_v7_0_ring_get_rptr_compute,
5576	.get_wptr = gfx_v7_0_ring_get_wptr_compute,
5577	.set_wptr = gfx_v7_0_ring_set_wptr_compute,
5578	.parse_cs = NULL,
5579	.emit_ib = gfx_v7_0_ring_emit_ib_compute,
5580	.emit_fence = gfx_v7_0_ring_emit_fence_compute,
5581	.emit_semaphore = gfx_v7_0_ring_emit_semaphore,
5582	.emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
5583	.emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
5584	.emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
5585	.test_ring = gfx_v7_0_ring_test_ring,
5586	.test_ib = gfx_v7_0_ring_test_ib,
5587	.insert_nop = amdgpu_ring_insert_nop,
5588};
5589
5590static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
5591{
5592	int i;
5593
5594	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5595		adev->gfx.gfx_ring[i].funcs = &gfx_v7_0_ring_funcs_gfx;
5596	for (i = 0; i < adev->gfx.num_compute_rings; i++)
5597		adev->gfx.compute_ring[i].funcs = &gfx_v7_0_ring_funcs_compute;
5598}
5599
5600static const struct amdgpu_irq_src_funcs gfx_v7_0_eop_irq_funcs = {
5601	.set = gfx_v7_0_set_eop_interrupt_state,
5602	.process = gfx_v7_0_eop_irq,
5603};
5604
5605static const struct amdgpu_irq_src_funcs gfx_v7_0_priv_reg_irq_funcs = {
5606	.set = gfx_v7_0_set_priv_reg_fault_state,
5607	.process = gfx_v7_0_priv_reg_irq,
5608};
5609
5610static const struct amdgpu_irq_src_funcs gfx_v7_0_priv_inst_irq_funcs = {
5611	.set = gfx_v7_0_set_priv_inst_fault_state,
5612	.process = gfx_v7_0_priv_inst_irq,
5613};
5614
5615static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev)
5616{
5617	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5618	adev->gfx.eop_irq.funcs = &gfx_v7_0_eop_irq_funcs;
5619
5620	adev->gfx.priv_reg_irq.num_types = 1;
5621	adev->gfx.priv_reg_irq.funcs = &gfx_v7_0_priv_reg_irq_funcs;
5622
5623	adev->gfx.priv_inst_irq.num_types = 1;
5624	adev->gfx.priv_inst_irq.funcs = &gfx_v7_0_priv_inst_irq_funcs;
5625}
5626
5627static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
5628{
5629	/* init asci gds info */
5630	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
5631	adev->gds.gws.total_size = 64;
5632	adev->gds.oa.total_size = 16;
5633
5634	if (adev->gds.mem.total_size == 64 * 1024) {
5635		adev->gds.mem.gfx_partition_size = 4096;
5636		adev->gds.mem.cs_partition_size = 4096;
5637
5638		adev->gds.gws.gfx_partition_size = 4;
5639		adev->gds.gws.cs_partition_size = 4;
5640
5641		adev->gds.oa.gfx_partition_size = 4;
5642		adev->gds.oa.cs_partition_size = 1;
5643	} else {
5644		adev->gds.mem.gfx_partition_size = 1024;
5645		adev->gds.mem.cs_partition_size = 1024;
5646
5647		adev->gds.gws.gfx_partition_size = 16;
5648		adev->gds.gws.cs_partition_size = 16;
5649
5650		adev->gds.oa.gfx_partition_size = 4;
5651		adev->gds.oa.cs_partition_size = 4;
5652	}
5653}
5654
5655
5656int gfx_v7_0_get_cu_info(struct amdgpu_device *adev,
5657								   struct amdgpu_cu_info *cu_info)
5658{
5659	int i, j, k, counter, active_cu_number = 0;
5660	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5661
5662	if (!adev || !cu_info)
5663		return -EINVAL;
5664
5665	mutex_lock(&adev->grbm_idx_mutex);
5666	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5667		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5668			mask = 1;
5669			ao_bitmap = 0;
5670			counter = 0;
5671			bitmap = gfx_v7_0_get_cu_active_bitmap(adev, i, j);
5672			cu_info->bitmap[i][j] = bitmap;
5673
5674			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5675				if (bitmap & mask) {
5676					if (counter < 2)
5677						ao_bitmap |= mask;
5678					counter ++;
5679				}
5680				mask <<= 1;
5681			}
5682			active_cu_number += counter;
5683			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5684		}
5685	}
5686
5687	cu_info->number = active_cu_number;
5688	cu_info->ao_cu_mask = ao_cu_mask;
5689	mutex_unlock(&adev->grbm_idx_mutex);
5690	return 0;
5691}
5692