This source file includes following definitions.
- gfx_v7_0_init_microcode
- gfx_v7_0_free_microcode
- gfx_v7_0_tiling_mode_table_init
- gfx_v7_0_select_se_sh
- gfx_v7_0_get_rb_active_bitmap
- gfx_v7_0_raster_config
- gfx_v7_0_write_harvested_raster_configs
- gfx_v7_0_setup_rb
- gfx_v7_0_init_compute_vmid
- gfx_v7_0_init_gds_vmid
- gfx_v7_0_config_init
- gfx_v7_0_constants_init
- gfx_v7_0_scratch_init
- gfx_v7_0_ring_test_ring
- gfx_v7_0_ring_emit_hdp_flush
- gfx_v7_0_ring_emit_vgt_flush
- gfx_v7_0_ring_emit_fence_gfx
- gfx_v7_0_ring_emit_fence_compute
- gfx_v7_0_ring_emit_ib_gfx
- gfx_v7_0_ring_emit_ib_compute
- gfx_v7_ring_emit_cntxcntl
- gfx_v7_0_ring_test_ib
- gfx_v7_0_cp_gfx_enable
- gfx_v7_0_cp_gfx_load_microcode
- gfx_v7_0_cp_gfx_start
- gfx_v7_0_cp_gfx_resume
- gfx_v7_0_ring_get_rptr
- gfx_v7_0_ring_get_wptr_gfx
- gfx_v7_0_ring_set_wptr_gfx
- gfx_v7_0_ring_get_wptr_compute
- gfx_v7_0_ring_set_wptr_compute
- gfx_v7_0_cp_compute_enable
- gfx_v7_0_cp_compute_load_microcode
- gfx_v7_0_cp_compute_fini
- gfx_v7_0_mec_fini
- gfx_v7_0_mec_init
- gfx_v7_0_compute_pipe_init
- gfx_v7_0_mqd_deactivate
- gfx_v7_0_mqd_init
- gfx_v7_0_mqd_commit
- gfx_v7_0_compute_queue_init
- gfx_v7_0_cp_compute_resume
- gfx_v7_0_cp_enable
- gfx_v7_0_cp_load_microcode
- gfx_v7_0_enable_gui_idle_interrupt
- gfx_v7_0_cp_resume
- gfx_v7_0_ring_emit_pipeline_sync
- gfx_v7_0_ring_emit_vm_flush
- gfx_v7_0_ring_emit_wreg
- gfx_v7_0_rlc_init
- gfx_v7_0_enable_lbpw
- gfx_v7_0_wait_for_rlc_serdes
- gfx_v7_0_update_rlc
- gfx_v7_0_halt_rlc
- gfx_v7_0_is_rlc_enabled
- gfx_v7_0_set_safe_mode
- gfx_v7_0_unset_safe_mode
- gfx_v7_0_rlc_stop
- gfx_v7_0_rlc_start
- gfx_v7_0_rlc_reset
- gfx_v7_0_rlc_resume
- gfx_v7_0_enable_cgcg
- gfx_v7_0_enable_mgcg
- gfx_v7_0_update_cg
- gfx_v7_0_enable_sclk_slowdown_on_pu
- gfx_v7_0_enable_sclk_slowdown_on_pd
- gfx_v7_0_enable_cp_pg
- gfx_v7_0_enable_gds_pg
- gfx_v7_0_cp_pg_table_num
- gfx_v7_0_enable_gfx_cgpg
- gfx_v7_0_set_user_cu_inactive_bitmap
- gfx_v7_0_get_cu_active_bitmap
- gfx_v7_0_init_ao_cu_mask
- gfx_v7_0_enable_gfx_static_mgpg
- gfx_v7_0_enable_gfx_dynamic_mgpg
- gfx_v7_0_init_gfx_cgpg
- gfx_v7_0_update_gfx_pg
- gfx_v7_0_get_csb_size
- gfx_v7_0_get_csb_buffer
- gfx_v7_0_init_pg
- gfx_v7_0_fini_pg
- gfx_v7_0_get_gpu_clock_counter
- gfx_v7_0_ring_emit_gds_switch
- gfx_v7_0_ring_soft_recovery
- wave_read_ind
- wave_read_regs
- gfx_v7_0_read_wave_data
- gfx_v7_0_read_wave_sgprs
- gfx_v7_0_select_me_pipe_q
- gfx_v7_0_early_init
- gfx_v7_0_late_init
- gfx_v7_0_gpu_early_init
- gfx_v7_0_compute_ring_init
- gfx_v7_0_sw_init
- gfx_v7_0_sw_fini
- gfx_v7_0_hw_init
- gfx_v7_0_hw_fini
- gfx_v7_0_suspend
- gfx_v7_0_resume
- gfx_v7_0_is_idle
- gfx_v7_0_wait_for_idle
- gfx_v7_0_soft_reset
- gfx_v7_0_set_gfx_eop_interrupt_state
- gfx_v7_0_set_compute_eop_interrupt_state
- gfx_v7_0_set_priv_reg_fault_state
- gfx_v7_0_set_priv_inst_fault_state
- gfx_v7_0_set_eop_interrupt_state
- gfx_v7_0_eop_irq
- gfx_v7_0_fault
- gfx_v7_0_priv_reg_irq
- gfx_v7_0_priv_inst_irq
- gfx_v7_0_set_clockgating_state
- gfx_v7_0_set_powergating_state
- gfx_v7_0_set_ring_funcs
- gfx_v7_0_set_irq_funcs
- gfx_v7_0_set_gds_init
- gfx_v7_0_get_cu_info
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26
27 #include "amdgpu.h"
28 #include "amdgpu_ih.h"
29 #include "amdgpu_gfx.h"
30 #include "cikd.h"
31 #include "cik.h"
32 #include "cik_structs.h"
33 #include "atom.h"
34 #include "amdgpu_ucode.h"
35 #include "clearstate_ci.h"
36
37 #include "dce/dce_8_0_d.h"
38 #include "dce/dce_8_0_sh_mask.h"
39
40 #include "bif/bif_4_1_d.h"
41 #include "bif/bif_4_1_sh_mask.h"
42
43 #include "gca/gfx_7_0_d.h"
44 #include "gca/gfx_7_2_enum.h"
45 #include "gca/gfx_7_2_sh_mask.h"
46
47 #include "gmc/gmc_7_0_d.h"
48 #include "gmc/gmc_7_0_sh_mask.h"
49
50 #include "oss/oss_2_0_d.h"
51 #include "oss/oss_2_0_sh_mask.h"
52
53 #define NUM_SIMD_PER_CU 0x4
54
55 #define GFX7_NUM_GFX_RINGS 1
56 #define GFX7_MEC_HPD_SIZE 2048
57
58 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
59 static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
60 static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
61
62 MODULE_FIRMWARE("amdgpu/bonaire_pfp.bin");
63 MODULE_FIRMWARE("amdgpu/bonaire_me.bin");
64 MODULE_FIRMWARE("amdgpu/bonaire_ce.bin");
65 MODULE_FIRMWARE("amdgpu/bonaire_rlc.bin");
66 MODULE_FIRMWARE("amdgpu/bonaire_mec.bin");
67
68 MODULE_FIRMWARE("amdgpu/hawaii_pfp.bin");
69 MODULE_FIRMWARE("amdgpu/hawaii_me.bin");
70 MODULE_FIRMWARE("amdgpu/hawaii_ce.bin");
71 MODULE_FIRMWARE("amdgpu/hawaii_rlc.bin");
72 MODULE_FIRMWARE("amdgpu/hawaii_mec.bin");
73
74 MODULE_FIRMWARE("amdgpu/kaveri_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/kaveri_me.bin");
76 MODULE_FIRMWARE("amdgpu/kaveri_ce.bin");
77 MODULE_FIRMWARE("amdgpu/kaveri_rlc.bin");
78 MODULE_FIRMWARE("amdgpu/kaveri_mec.bin");
79 MODULE_FIRMWARE("amdgpu/kaveri_mec2.bin");
80
81 MODULE_FIRMWARE("amdgpu/kabini_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/kabini_me.bin");
83 MODULE_FIRMWARE("amdgpu/kabini_ce.bin");
84 MODULE_FIRMWARE("amdgpu/kabini_rlc.bin");
85 MODULE_FIRMWARE("amdgpu/kabini_mec.bin");
86
87 MODULE_FIRMWARE("amdgpu/mullins_pfp.bin");
88 MODULE_FIRMWARE("amdgpu/mullins_me.bin");
89 MODULE_FIRMWARE("amdgpu/mullins_ce.bin");
90 MODULE_FIRMWARE("amdgpu/mullins_rlc.bin");
91 MODULE_FIRMWARE("amdgpu/mullins_mec.bin");
92
93 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
94 {
95 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
96 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
97 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
98 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
99 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
100 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
101 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
102 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
103 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
104 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
105 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
106 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
107 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
108 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
109 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
110 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
111 };
112
113 static const u32 spectre_rlc_save_restore_register_list[] =
114 {
115 (0x0e00 << 16) | (0xc12c >> 2),
116 0x00000000,
117 (0x0e00 << 16) | (0xc140 >> 2),
118 0x00000000,
119 (0x0e00 << 16) | (0xc150 >> 2),
120 0x00000000,
121 (0x0e00 << 16) | (0xc15c >> 2),
122 0x00000000,
123 (0x0e00 << 16) | (0xc168 >> 2),
124 0x00000000,
125 (0x0e00 << 16) | (0xc170 >> 2),
126 0x00000000,
127 (0x0e00 << 16) | (0xc178 >> 2),
128 0x00000000,
129 (0x0e00 << 16) | (0xc204 >> 2),
130 0x00000000,
131 (0x0e00 << 16) | (0xc2b4 >> 2),
132 0x00000000,
133 (0x0e00 << 16) | (0xc2b8 >> 2),
134 0x00000000,
135 (0x0e00 << 16) | (0xc2bc >> 2),
136 0x00000000,
137 (0x0e00 << 16) | (0xc2c0 >> 2),
138 0x00000000,
139 (0x0e00 << 16) | (0x8228 >> 2),
140 0x00000000,
141 (0x0e00 << 16) | (0x829c >> 2),
142 0x00000000,
143 (0x0e00 << 16) | (0x869c >> 2),
144 0x00000000,
145 (0x0600 << 16) | (0x98f4 >> 2),
146 0x00000000,
147 (0x0e00 << 16) | (0x98f8 >> 2),
148 0x00000000,
149 (0x0e00 << 16) | (0x9900 >> 2),
150 0x00000000,
151 (0x0e00 << 16) | (0xc260 >> 2),
152 0x00000000,
153 (0x0e00 << 16) | (0x90e8 >> 2),
154 0x00000000,
155 (0x0e00 << 16) | (0x3c000 >> 2),
156 0x00000000,
157 (0x0e00 << 16) | (0x3c00c >> 2),
158 0x00000000,
159 (0x0e00 << 16) | (0x8c1c >> 2),
160 0x00000000,
161 (0x0e00 << 16) | (0x9700 >> 2),
162 0x00000000,
163 (0x0e00 << 16) | (0xcd20 >> 2),
164 0x00000000,
165 (0x4e00 << 16) | (0xcd20 >> 2),
166 0x00000000,
167 (0x5e00 << 16) | (0xcd20 >> 2),
168 0x00000000,
169 (0x6e00 << 16) | (0xcd20 >> 2),
170 0x00000000,
171 (0x7e00 << 16) | (0xcd20 >> 2),
172 0x00000000,
173 (0x8e00 << 16) | (0xcd20 >> 2),
174 0x00000000,
175 (0x9e00 << 16) | (0xcd20 >> 2),
176 0x00000000,
177 (0xae00 << 16) | (0xcd20 >> 2),
178 0x00000000,
179 (0xbe00 << 16) | (0xcd20 >> 2),
180 0x00000000,
181 (0x0e00 << 16) | (0x89bc >> 2),
182 0x00000000,
183 (0x0e00 << 16) | (0x8900 >> 2),
184 0x00000000,
185 0x3,
186 (0x0e00 << 16) | (0xc130 >> 2),
187 0x00000000,
188 (0x0e00 << 16) | (0xc134 >> 2),
189 0x00000000,
190 (0x0e00 << 16) | (0xc1fc >> 2),
191 0x00000000,
192 (0x0e00 << 16) | (0xc208 >> 2),
193 0x00000000,
194 (0x0e00 << 16) | (0xc264 >> 2),
195 0x00000000,
196 (0x0e00 << 16) | (0xc268 >> 2),
197 0x00000000,
198 (0x0e00 << 16) | (0xc26c >> 2),
199 0x00000000,
200 (0x0e00 << 16) | (0xc270 >> 2),
201 0x00000000,
202 (0x0e00 << 16) | (0xc274 >> 2),
203 0x00000000,
204 (0x0e00 << 16) | (0xc278 >> 2),
205 0x00000000,
206 (0x0e00 << 16) | (0xc27c >> 2),
207 0x00000000,
208 (0x0e00 << 16) | (0xc280 >> 2),
209 0x00000000,
210 (0x0e00 << 16) | (0xc284 >> 2),
211 0x00000000,
212 (0x0e00 << 16) | (0xc288 >> 2),
213 0x00000000,
214 (0x0e00 << 16) | (0xc28c >> 2),
215 0x00000000,
216 (0x0e00 << 16) | (0xc290 >> 2),
217 0x00000000,
218 (0x0e00 << 16) | (0xc294 >> 2),
219 0x00000000,
220 (0x0e00 << 16) | (0xc298 >> 2),
221 0x00000000,
222 (0x0e00 << 16) | (0xc29c >> 2),
223 0x00000000,
224 (0x0e00 << 16) | (0xc2a0 >> 2),
225 0x00000000,
226 (0x0e00 << 16) | (0xc2a4 >> 2),
227 0x00000000,
228 (0x0e00 << 16) | (0xc2a8 >> 2),
229 0x00000000,
230 (0x0e00 << 16) | (0xc2ac >> 2),
231 0x00000000,
232 (0x0e00 << 16) | (0xc2b0 >> 2),
233 0x00000000,
234 (0x0e00 << 16) | (0x301d0 >> 2),
235 0x00000000,
236 (0x0e00 << 16) | (0x30238 >> 2),
237 0x00000000,
238 (0x0e00 << 16) | (0x30250 >> 2),
239 0x00000000,
240 (0x0e00 << 16) | (0x30254 >> 2),
241 0x00000000,
242 (0x0e00 << 16) | (0x30258 >> 2),
243 0x00000000,
244 (0x0e00 << 16) | (0x3025c >> 2),
245 0x00000000,
246 (0x4e00 << 16) | (0xc900 >> 2),
247 0x00000000,
248 (0x5e00 << 16) | (0xc900 >> 2),
249 0x00000000,
250 (0x6e00 << 16) | (0xc900 >> 2),
251 0x00000000,
252 (0x7e00 << 16) | (0xc900 >> 2),
253 0x00000000,
254 (0x8e00 << 16) | (0xc900 >> 2),
255 0x00000000,
256 (0x9e00 << 16) | (0xc900 >> 2),
257 0x00000000,
258 (0xae00 << 16) | (0xc900 >> 2),
259 0x00000000,
260 (0xbe00 << 16) | (0xc900 >> 2),
261 0x00000000,
262 (0x4e00 << 16) | (0xc904 >> 2),
263 0x00000000,
264 (0x5e00 << 16) | (0xc904 >> 2),
265 0x00000000,
266 (0x6e00 << 16) | (0xc904 >> 2),
267 0x00000000,
268 (0x7e00 << 16) | (0xc904 >> 2),
269 0x00000000,
270 (0x8e00 << 16) | (0xc904 >> 2),
271 0x00000000,
272 (0x9e00 << 16) | (0xc904 >> 2),
273 0x00000000,
274 (0xae00 << 16) | (0xc904 >> 2),
275 0x00000000,
276 (0xbe00 << 16) | (0xc904 >> 2),
277 0x00000000,
278 (0x4e00 << 16) | (0xc908 >> 2),
279 0x00000000,
280 (0x5e00 << 16) | (0xc908 >> 2),
281 0x00000000,
282 (0x6e00 << 16) | (0xc908 >> 2),
283 0x00000000,
284 (0x7e00 << 16) | (0xc908 >> 2),
285 0x00000000,
286 (0x8e00 << 16) | (0xc908 >> 2),
287 0x00000000,
288 (0x9e00 << 16) | (0xc908 >> 2),
289 0x00000000,
290 (0xae00 << 16) | (0xc908 >> 2),
291 0x00000000,
292 (0xbe00 << 16) | (0xc908 >> 2),
293 0x00000000,
294 (0x4e00 << 16) | (0xc90c >> 2),
295 0x00000000,
296 (0x5e00 << 16) | (0xc90c >> 2),
297 0x00000000,
298 (0x6e00 << 16) | (0xc90c >> 2),
299 0x00000000,
300 (0x7e00 << 16) | (0xc90c >> 2),
301 0x00000000,
302 (0x8e00 << 16) | (0xc90c >> 2),
303 0x00000000,
304 (0x9e00 << 16) | (0xc90c >> 2),
305 0x00000000,
306 (0xae00 << 16) | (0xc90c >> 2),
307 0x00000000,
308 (0xbe00 << 16) | (0xc90c >> 2),
309 0x00000000,
310 (0x4e00 << 16) | (0xc910 >> 2),
311 0x00000000,
312 (0x5e00 << 16) | (0xc910 >> 2),
313 0x00000000,
314 (0x6e00 << 16) | (0xc910 >> 2),
315 0x00000000,
316 (0x7e00 << 16) | (0xc910 >> 2),
317 0x00000000,
318 (0x8e00 << 16) | (0xc910 >> 2),
319 0x00000000,
320 (0x9e00 << 16) | (0xc910 >> 2),
321 0x00000000,
322 (0xae00 << 16) | (0xc910 >> 2),
323 0x00000000,
324 (0xbe00 << 16) | (0xc910 >> 2),
325 0x00000000,
326 (0x0e00 << 16) | (0xc99c >> 2),
327 0x00000000,
328 (0x0e00 << 16) | (0x9834 >> 2),
329 0x00000000,
330 (0x0000 << 16) | (0x30f00 >> 2),
331 0x00000000,
332 (0x0001 << 16) | (0x30f00 >> 2),
333 0x00000000,
334 (0x0000 << 16) | (0x30f04 >> 2),
335 0x00000000,
336 (0x0001 << 16) | (0x30f04 >> 2),
337 0x00000000,
338 (0x0000 << 16) | (0x30f08 >> 2),
339 0x00000000,
340 (0x0001 << 16) | (0x30f08 >> 2),
341 0x00000000,
342 (0x0000 << 16) | (0x30f0c >> 2),
343 0x00000000,
344 (0x0001 << 16) | (0x30f0c >> 2),
345 0x00000000,
346 (0x0600 << 16) | (0x9b7c >> 2),
347 0x00000000,
348 (0x0e00 << 16) | (0x8a14 >> 2),
349 0x00000000,
350 (0x0e00 << 16) | (0x8a18 >> 2),
351 0x00000000,
352 (0x0600 << 16) | (0x30a00 >> 2),
353 0x00000000,
354 (0x0e00 << 16) | (0x8bf0 >> 2),
355 0x00000000,
356 (0x0e00 << 16) | (0x8bcc >> 2),
357 0x00000000,
358 (0x0e00 << 16) | (0x8b24 >> 2),
359 0x00000000,
360 (0x0e00 << 16) | (0x30a04 >> 2),
361 0x00000000,
362 (0x0600 << 16) | (0x30a10 >> 2),
363 0x00000000,
364 (0x0600 << 16) | (0x30a14 >> 2),
365 0x00000000,
366 (0x0600 << 16) | (0x30a18 >> 2),
367 0x00000000,
368 (0x0600 << 16) | (0x30a2c >> 2),
369 0x00000000,
370 (0x0e00 << 16) | (0xc700 >> 2),
371 0x00000000,
372 (0x0e00 << 16) | (0xc704 >> 2),
373 0x00000000,
374 (0x0e00 << 16) | (0xc708 >> 2),
375 0x00000000,
376 (0x0e00 << 16) | (0xc768 >> 2),
377 0x00000000,
378 (0x0400 << 16) | (0xc770 >> 2),
379 0x00000000,
380 (0x0400 << 16) | (0xc774 >> 2),
381 0x00000000,
382 (0x0400 << 16) | (0xc778 >> 2),
383 0x00000000,
384 (0x0400 << 16) | (0xc77c >> 2),
385 0x00000000,
386 (0x0400 << 16) | (0xc780 >> 2),
387 0x00000000,
388 (0x0400 << 16) | (0xc784 >> 2),
389 0x00000000,
390 (0x0400 << 16) | (0xc788 >> 2),
391 0x00000000,
392 (0x0400 << 16) | (0xc78c >> 2),
393 0x00000000,
394 (0x0400 << 16) | (0xc798 >> 2),
395 0x00000000,
396 (0x0400 << 16) | (0xc79c >> 2),
397 0x00000000,
398 (0x0400 << 16) | (0xc7a0 >> 2),
399 0x00000000,
400 (0x0400 << 16) | (0xc7a4 >> 2),
401 0x00000000,
402 (0x0400 << 16) | (0xc7a8 >> 2),
403 0x00000000,
404 (0x0400 << 16) | (0xc7ac >> 2),
405 0x00000000,
406 (0x0400 << 16) | (0xc7b0 >> 2),
407 0x00000000,
408 (0x0400 << 16) | (0xc7b4 >> 2),
409 0x00000000,
410 (0x0e00 << 16) | (0x9100 >> 2),
411 0x00000000,
412 (0x0e00 << 16) | (0x3c010 >> 2),
413 0x00000000,
414 (0x0e00 << 16) | (0x92a8 >> 2),
415 0x00000000,
416 (0x0e00 << 16) | (0x92ac >> 2),
417 0x00000000,
418 (0x0e00 << 16) | (0x92b4 >> 2),
419 0x00000000,
420 (0x0e00 << 16) | (0x92b8 >> 2),
421 0x00000000,
422 (0x0e00 << 16) | (0x92bc >> 2),
423 0x00000000,
424 (0x0e00 << 16) | (0x92c0 >> 2),
425 0x00000000,
426 (0x0e00 << 16) | (0x92c4 >> 2),
427 0x00000000,
428 (0x0e00 << 16) | (0x92c8 >> 2),
429 0x00000000,
430 (0x0e00 << 16) | (0x92cc >> 2),
431 0x00000000,
432 (0x0e00 << 16) | (0x92d0 >> 2),
433 0x00000000,
434 (0x0e00 << 16) | (0x8c00 >> 2),
435 0x00000000,
436 (0x0e00 << 16) | (0x8c04 >> 2),
437 0x00000000,
438 (0x0e00 << 16) | (0x8c20 >> 2),
439 0x00000000,
440 (0x0e00 << 16) | (0x8c38 >> 2),
441 0x00000000,
442 (0x0e00 << 16) | (0x8c3c >> 2),
443 0x00000000,
444 (0x0e00 << 16) | (0xae00 >> 2),
445 0x00000000,
446 (0x0e00 << 16) | (0x9604 >> 2),
447 0x00000000,
448 (0x0e00 << 16) | (0xac08 >> 2),
449 0x00000000,
450 (0x0e00 << 16) | (0xac0c >> 2),
451 0x00000000,
452 (0x0e00 << 16) | (0xac10 >> 2),
453 0x00000000,
454 (0x0e00 << 16) | (0xac14 >> 2),
455 0x00000000,
456 (0x0e00 << 16) | (0xac58 >> 2),
457 0x00000000,
458 (0x0e00 << 16) | (0xac68 >> 2),
459 0x00000000,
460 (0x0e00 << 16) | (0xac6c >> 2),
461 0x00000000,
462 (0x0e00 << 16) | (0xac70 >> 2),
463 0x00000000,
464 (0x0e00 << 16) | (0xac74 >> 2),
465 0x00000000,
466 (0x0e00 << 16) | (0xac78 >> 2),
467 0x00000000,
468 (0x0e00 << 16) | (0xac7c >> 2),
469 0x00000000,
470 (0x0e00 << 16) | (0xac80 >> 2),
471 0x00000000,
472 (0x0e00 << 16) | (0xac84 >> 2),
473 0x00000000,
474 (0x0e00 << 16) | (0xac88 >> 2),
475 0x00000000,
476 (0x0e00 << 16) | (0xac8c >> 2),
477 0x00000000,
478 (0x0e00 << 16) | (0x970c >> 2),
479 0x00000000,
480 (0x0e00 << 16) | (0x9714 >> 2),
481 0x00000000,
482 (0x0e00 << 16) | (0x9718 >> 2),
483 0x00000000,
484 (0x0e00 << 16) | (0x971c >> 2),
485 0x00000000,
486 (0x0e00 << 16) | (0x31068 >> 2),
487 0x00000000,
488 (0x4e00 << 16) | (0x31068 >> 2),
489 0x00000000,
490 (0x5e00 << 16) | (0x31068 >> 2),
491 0x00000000,
492 (0x6e00 << 16) | (0x31068 >> 2),
493 0x00000000,
494 (0x7e00 << 16) | (0x31068 >> 2),
495 0x00000000,
496 (0x8e00 << 16) | (0x31068 >> 2),
497 0x00000000,
498 (0x9e00 << 16) | (0x31068 >> 2),
499 0x00000000,
500 (0xae00 << 16) | (0x31068 >> 2),
501 0x00000000,
502 (0xbe00 << 16) | (0x31068 >> 2),
503 0x00000000,
504 (0x0e00 << 16) | (0xcd10 >> 2),
505 0x00000000,
506 (0x0e00 << 16) | (0xcd14 >> 2),
507 0x00000000,
508 (0x0e00 << 16) | (0x88b0 >> 2),
509 0x00000000,
510 (0x0e00 << 16) | (0x88b4 >> 2),
511 0x00000000,
512 (0x0e00 << 16) | (0x88b8 >> 2),
513 0x00000000,
514 (0x0e00 << 16) | (0x88bc >> 2),
515 0x00000000,
516 (0x0400 << 16) | (0x89c0 >> 2),
517 0x00000000,
518 (0x0e00 << 16) | (0x88c4 >> 2),
519 0x00000000,
520 (0x0e00 << 16) | (0x88c8 >> 2),
521 0x00000000,
522 (0x0e00 << 16) | (0x88d0 >> 2),
523 0x00000000,
524 (0x0e00 << 16) | (0x88d4 >> 2),
525 0x00000000,
526 (0x0e00 << 16) | (0x88d8 >> 2),
527 0x00000000,
528 (0x0e00 << 16) | (0x8980 >> 2),
529 0x00000000,
530 (0x0e00 << 16) | (0x30938 >> 2),
531 0x00000000,
532 (0x0e00 << 16) | (0x3093c >> 2),
533 0x00000000,
534 (0x0e00 << 16) | (0x30940 >> 2),
535 0x00000000,
536 (0x0e00 << 16) | (0x89a0 >> 2),
537 0x00000000,
538 (0x0e00 << 16) | (0x30900 >> 2),
539 0x00000000,
540 (0x0e00 << 16) | (0x30904 >> 2),
541 0x00000000,
542 (0x0e00 << 16) | (0x89b4 >> 2),
543 0x00000000,
544 (0x0e00 << 16) | (0x3c210 >> 2),
545 0x00000000,
546 (0x0e00 << 16) | (0x3c214 >> 2),
547 0x00000000,
548 (0x0e00 << 16) | (0x3c218 >> 2),
549 0x00000000,
550 (0x0e00 << 16) | (0x8904 >> 2),
551 0x00000000,
552 0x5,
553 (0x0e00 << 16) | (0x8c28 >> 2),
554 (0x0e00 << 16) | (0x8c2c >> 2),
555 (0x0e00 << 16) | (0x8c30 >> 2),
556 (0x0e00 << 16) | (0x8c34 >> 2),
557 (0x0e00 << 16) | (0x9600 >> 2),
558 };
559
560 static const u32 kalindi_rlc_save_restore_register_list[] =
561 {
562 (0x0e00 << 16) | (0xc12c >> 2),
563 0x00000000,
564 (0x0e00 << 16) | (0xc140 >> 2),
565 0x00000000,
566 (0x0e00 << 16) | (0xc150 >> 2),
567 0x00000000,
568 (0x0e00 << 16) | (0xc15c >> 2),
569 0x00000000,
570 (0x0e00 << 16) | (0xc168 >> 2),
571 0x00000000,
572 (0x0e00 << 16) | (0xc170 >> 2),
573 0x00000000,
574 (0x0e00 << 16) | (0xc204 >> 2),
575 0x00000000,
576 (0x0e00 << 16) | (0xc2b4 >> 2),
577 0x00000000,
578 (0x0e00 << 16) | (0xc2b8 >> 2),
579 0x00000000,
580 (0x0e00 << 16) | (0xc2bc >> 2),
581 0x00000000,
582 (0x0e00 << 16) | (0xc2c0 >> 2),
583 0x00000000,
584 (0x0e00 << 16) | (0x8228 >> 2),
585 0x00000000,
586 (0x0e00 << 16) | (0x829c >> 2),
587 0x00000000,
588 (0x0e00 << 16) | (0x869c >> 2),
589 0x00000000,
590 (0x0600 << 16) | (0x98f4 >> 2),
591 0x00000000,
592 (0x0e00 << 16) | (0x98f8 >> 2),
593 0x00000000,
594 (0x0e00 << 16) | (0x9900 >> 2),
595 0x00000000,
596 (0x0e00 << 16) | (0xc260 >> 2),
597 0x00000000,
598 (0x0e00 << 16) | (0x90e8 >> 2),
599 0x00000000,
600 (0x0e00 << 16) | (0x3c000 >> 2),
601 0x00000000,
602 (0x0e00 << 16) | (0x3c00c >> 2),
603 0x00000000,
604 (0x0e00 << 16) | (0x8c1c >> 2),
605 0x00000000,
606 (0x0e00 << 16) | (0x9700 >> 2),
607 0x00000000,
608 (0x0e00 << 16) | (0xcd20 >> 2),
609 0x00000000,
610 (0x4e00 << 16) | (0xcd20 >> 2),
611 0x00000000,
612 (0x5e00 << 16) | (0xcd20 >> 2),
613 0x00000000,
614 (0x6e00 << 16) | (0xcd20 >> 2),
615 0x00000000,
616 (0x7e00 << 16) | (0xcd20 >> 2),
617 0x00000000,
618 (0x0e00 << 16) | (0x89bc >> 2),
619 0x00000000,
620 (0x0e00 << 16) | (0x8900 >> 2),
621 0x00000000,
622 0x3,
623 (0x0e00 << 16) | (0xc130 >> 2),
624 0x00000000,
625 (0x0e00 << 16) | (0xc134 >> 2),
626 0x00000000,
627 (0x0e00 << 16) | (0xc1fc >> 2),
628 0x00000000,
629 (0x0e00 << 16) | (0xc208 >> 2),
630 0x00000000,
631 (0x0e00 << 16) | (0xc264 >> 2),
632 0x00000000,
633 (0x0e00 << 16) | (0xc268 >> 2),
634 0x00000000,
635 (0x0e00 << 16) | (0xc26c >> 2),
636 0x00000000,
637 (0x0e00 << 16) | (0xc270 >> 2),
638 0x00000000,
639 (0x0e00 << 16) | (0xc274 >> 2),
640 0x00000000,
641 (0x0e00 << 16) | (0xc28c >> 2),
642 0x00000000,
643 (0x0e00 << 16) | (0xc290 >> 2),
644 0x00000000,
645 (0x0e00 << 16) | (0xc294 >> 2),
646 0x00000000,
647 (0x0e00 << 16) | (0xc298 >> 2),
648 0x00000000,
649 (0x0e00 << 16) | (0xc2a0 >> 2),
650 0x00000000,
651 (0x0e00 << 16) | (0xc2a4 >> 2),
652 0x00000000,
653 (0x0e00 << 16) | (0xc2a8 >> 2),
654 0x00000000,
655 (0x0e00 << 16) | (0xc2ac >> 2),
656 0x00000000,
657 (0x0e00 << 16) | (0x301d0 >> 2),
658 0x00000000,
659 (0x0e00 << 16) | (0x30238 >> 2),
660 0x00000000,
661 (0x0e00 << 16) | (0x30250 >> 2),
662 0x00000000,
663 (0x0e00 << 16) | (0x30254 >> 2),
664 0x00000000,
665 (0x0e00 << 16) | (0x30258 >> 2),
666 0x00000000,
667 (0x0e00 << 16) | (0x3025c >> 2),
668 0x00000000,
669 (0x4e00 << 16) | (0xc900 >> 2),
670 0x00000000,
671 (0x5e00 << 16) | (0xc900 >> 2),
672 0x00000000,
673 (0x6e00 << 16) | (0xc900 >> 2),
674 0x00000000,
675 (0x7e00 << 16) | (0xc900 >> 2),
676 0x00000000,
677 (0x4e00 << 16) | (0xc904 >> 2),
678 0x00000000,
679 (0x5e00 << 16) | (0xc904 >> 2),
680 0x00000000,
681 (0x6e00 << 16) | (0xc904 >> 2),
682 0x00000000,
683 (0x7e00 << 16) | (0xc904 >> 2),
684 0x00000000,
685 (0x4e00 << 16) | (0xc908 >> 2),
686 0x00000000,
687 (0x5e00 << 16) | (0xc908 >> 2),
688 0x00000000,
689 (0x6e00 << 16) | (0xc908 >> 2),
690 0x00000000,
691 (0x7e00 << 16) | (0xc908 >> 2),
692 0x00000000,
693 (0x4e00 << 16) | (0xc90c >> 2),
694 0x00000000,
695 (0x5e00 << 16) | (0xc90c >> 2),
696 0x00000000,
697 (0x6e00 << 16) | (0xc90c >> 2),
698 0x00000000,
699 (0x7e00 << 16) | (0xc90c >> 2),
700 0x00000000,
701 (0x4e00 << 16) | (0xc910 >> 2),
702 0x00000000,
703 (0x5e00 << 16) | (0xc910 >> 2),
704 0x00000000,
705 (0x6e00 << 16) | (0xc910 >> 2),
706 0x00000000,
707 (0x7e00 << 16) | (0xc910 >> 2),
708 0x00000000,
709 (0x0e00 << 16) | (0xc99c >> 2),
710 0x00000000,
711 (0x0e00 << 16) | (0x9834 >> 2),
712 0x00000000,
713 (0x0000 << 16) | (0x30f00 >> 2),
714 0x00000000,
715 (0x0000 << 16) | (0x30f04 >> 2),
716 0x00000000,
717 (0x0000 << 16) | (0x30f08 >> 2),
718 0x00000000,
719 (0x0000 << 16) | (0x30f0c >> 2),
720 0x00000000,
721 (0x0600 << 16) | (0x9b7c >> 2),
722 0x00000000,
723 (0x0e00 << 16) | (0x8a14 >> 2),
724 0x00000000,
725 (0x0e00 << 16) | (0x8a18 >> 2),
726 0x00000000,
727 (0x0600 << 16) | (0x30a00 >> 2),
728 0x00000000,
729 (0x0e00 << 16) | (0x8bf0 >> 2),
730 0x00000000,
731 (0x0e00 << 16) | (0x8bcc >> 2),
732 0x00000000,
733 (0x0e00 << 16) | (0x8b24 >> 2),
734 0x00000000,
735 (0x0e00 << 16) | (0x30a04 >> 2),
736 0x00000000,
737 (0x0600 << 16) | (0x30a10 >> 2),
738 0x00000000,
739 (0x0600 << 16) | (0x30a14 >> 2),
740 0x00000000,
741 (0x0600 << 16) | (0x30a18 >> 2),
742 0x00000000,
743 (0x0600 << 16) | (0x30a2c >> 2),
744 0x00000000,
745 (0x0e00 << 16) | (0xc700 >> 2),
746 0x00000000,
747 (0x0e00 << 16) | (0xc704 >> 2),
748 0x00000000,
749 (0x0e00 << 16) | (0xc708 >> 2),
750 0x00000000,
751 (0x0e00 << 16) | (0xc768 >> 2),
752 0x00000000,
753 (0x0400 << 16) | (0xc770 >> 2),
754 0x00000000,
755 (0x0400 << 16) | (0xc774 >> 2),
756 0x00000000,
757 (0x0400 << 16) | (0xc798 >> 2),
758 0x00000000,
759 (0x0400 << 16) | (0xc79c >> 2),
760 0x00000000,
761 (0x0e00 << 16) | (0x9100 >> 2),
762 0x00000000,
763 (0x0e00 << 16) | (0x3c010 >> 2),
764 0x00000000,
765 (0x0e00 << 16) | (0x8c00 >> 2),
766 0x00000000,
767 (0x0e00 << 16) | (0x8c04 >> 2),
768 0x00000000,
769 (0x0e00 << 16) | (0x8c20 >> 2),
770 0x00000000,
771 (0x0e00 << 16) | (0x8c38 >> 2),
772 0x00000000,
773 (0x0e00 << 16) | (0x8c3c >> 2),
774 0x00000000,
775 (0x0e00 << 16) | (0xae00 >> 2),
776 0x00000000,
777 (0x0e00 << 16) | (0x9604 >> 2),
778 0x00000000,
779 (0x0e00 << 16) | (0xac08 >> 2),
780 0x00000000,
781 (0x0e00 << 16) | (0xac0c >> 2),
782 0x00000000,
783 (0x0e00 << 16) | (0xac10 >> 2),
784 0x00000000,
785 (0x0e00 << 16) | (0xac14 >> 2),
786 0x00000000,
787 (0x0e00 << 16) | (0xac58 >> 2),
788 0x00000000,
789 (0x0e00 << 16) | (0xac68 >> 2),
790 0x00000000,
791 (0x0e00 << 16) | (0xac6c >> 2),
792 0x00000000,
793 (0x0e00 << 16) | (0xac70 >> 2),
794 0x00000000,
795 (0x0e00 << 16) | (0xac74 >> 2),
796 0x00000000,
797 (0x0e00 << 16) | (0xac78 >> 2),
798 0x00000000,
799 (0x0e00 << 16) | (0xac7c >> 2),
800 0x00000000,
801 (0x0e00 << 16) | (0xac80 >> 2),
802 0x00000000,
803 (0x0e00 << 16) | (0xac84 >> 2),
804 0x00000000,
805 (0x0e00 << 16) | (0xac88 >> 2),
806 0x00000000,
807 (0x0e00 << 16) | (0xac8c >> 2),
808 0x00000000,
809 (0x0e00 << 16) | (0x970c >> 2),
810 0x00000000,
811 (0x0e00 << 16) | (0x9714 >> 2),
812 0x00000000,
813 (0x0e00 << 16) | (0x9718 >> 2),
814 0x00000000,
815 (0x0e00 << 16) | (0x971c >> 2),
816 0x00000000,
817 (0x0e00 << 16) | (0x31068 >> 2),
818 0x00000000,
819 (0x4e00 << 16) | (0x31068 >> 2),
820 0x00000000,
821 (0x5e00 << 16) | (0x31068 >> 2),
822 0x00000000,
823 (0x6e00 << 16) | (0x31068 >> 2),
824 0x00000000,
825 (0x7e00 << 16) | (0x31068 >> 2),
826 0x00000000,
827 (0x0e00 << 16) | (0xcd10 >> 2),
828 0x00000000,
829 (0x0e00 << 16) | (0xcd14 >> 2),
830 0x00000000,
831 (0x0e00 << 16) | (0x88b0 >> 2),
832 0x00000000,
833 (0x0e00 << 16) | (0x88b4 >> 2),
834 0x00000000,
835 (0x0e00 << 16) | (0x88b8 >> 2),
836 0x00000000,
837 (0x0e00 << 16) | (0x88bc >> 2),
838 0x00000000,
839 (0x0400 << 16) | (0x89c0 >> 2),
840 0x00000000,
841 (0x0e00 << 16) | (0x88c4 >> 2),
842 0x00000000,
843 (0x0e00 << 16) | (0x88c8 >> 2),
844 0x00000000,
845 (0x0e00 << 16) | (0x88d0 >> 2),
846 0x00000000,
847 (0x0e00 << 16) | (0x88d4 >> 2),
848 0x00000000,
849 (0x0e00 << 16) | (0x88d8 >> 2),
850 0x00000000,
851 (0x0e00 << 16) | (0x8980 >> 2),
852 0x00000000,
853 (0x0e00 << 16) | (0x30938 >> 2),
854 0x00000000,
855 (0x0e00 << 16) | (0x3093c >> 2),
856 0x00000000,
857 (0x0e00 << 16) | (0x30940 >> 2),
858 0x00000000,
859 (0x0e00 << 16) | (0x89a0 >> 2),
860 0x00000000,
861 (0x0e00 << 16) | (0x30900 >> 2),
862 0x00000000,
863 (0x0e00 << 16) | (0x30904 >> 2),
864 0x00000000,
865 (0x0e00 << 16) | (0x89b4 >> 2),
866 0x00000000,
867 (0x0e00 << 16) | (0x3e1fc >> 2),
868 0x00000000,
869 (0x0e00 << 16) | (0x3c210 >> 2),
870 0x00000000,
871 (0x0e00 << 16) | (0x3c214 >> 2),
872 0x00000000,
873 (0x0e00 << 16) | (0x3c218 >> 2),
874 0x00000000,
875 (0x0e00 << 16) | (0x8904 >> 2),
876 0x00000000,
877 0x5,
878 (0x0e00 << 16) | (0x8c28 >> 2),
879 (0x0e00 << 16) | (0x8c2c >> 2),
880 (0x0e00 << 16) | (0x8c30 >> 2),
881 (0x0e00 << 16) | (0x8c34 >> 2),
882 (0x0e00 << 16) | (0x9600 >> 2),
883 };
884
885 static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev);
886 static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
887 static void gfx_v7_0_init_pg(struct amdgpu_device *adev);
888 static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev);
889
890
891
892
893
894
895
896
897
898
899
900
901
902 static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
903 {
904 const char *chip_name;
905 char fw_name[30];
906 int err;
907
908 DRM_DEBUG("\n");
909
910 switch (adev->asic_type) {
911 case CHIP_BONAIRE:
912 chip_name = "bonaire";
913 break;
914 case CHIP_HAWAII:
915 chip_name = "hawaii";
916 break;
917 case CHIP_KAVERI:
918 chip_name = "kaveri";
919 break;
920 case CHIP_KABINI:
921 chip_name = "kabini";
922 break;
923 case CHIP_MULLINS:
924 chip_name = "mullins";
925 break;
926 default: BUG();
927 }
928
929 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
930 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
931 if (err)
932 goto out;
933 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
934 if (err)
935 goto out;
936
937 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
938 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
939 if (err)
940 goto out;
941 err = amdgpu_ucode_validate(adev->gfx.me_fw);
942 if (err)
943 goto out;
944
945 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
946 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
947 if (err)
948 goto out;
949 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
950 if (err)
951 goto out;
952
953 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
954 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
955 if (err)
956 goto out;
957 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
958 if (err)
959 goto out;
960
961 if (adev->asic_type == CHIP_KAVERI) {
962 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
963 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
964 if (err)
965 goto out;
966 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
967 if (err)
968 goto out;
969 }
970
971 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
972 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
973 if (err)
974 goto out;
975 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
976
977 out:
978 if (err) {
979 pr_err("gfx7: Failed to load firmware \"%s\"\n", fw_name);
980 release_firmware(adev->gfx.pfp_fw);
981 adev->gfx.pfp_fw = NULL;
982 release_firmware(adev->gfx.me_fw);
983 adev->gfx.me_fw = NULL;
984 release_firmware(adev->gfx.ce_fw);
985 adev->gfx.ce_fw = NULL;
986 release_firmware(adev->gfx.mec_fw);
987 adev->gfx.mec_fw = NULL;
988 release_firmware(adev->gfx.mec2_fw);
989 adev->gfx.mec2_fw = NULL;
990 release_firmware(adev->gfx.rlc_fw);
991 adev->gfx.rlc_fw = NULL;
992 }
993 return err;
994 }
995
996 static void gfx_v7_0_free_microcode(struct amdgpu_device *adev)
997 {
998 release_firmware(adev->gfx.pfp_fw);
999 adev->gfx.pfp_fw = NULL;
1000 release_firmware(adev->gfx.me_fw);
1001 adev->gfx.me_fw = NULL;
1002 release_firmware(adev->gfx.ce_fw);
1003 adev->gfx.ce_fw = NULL;
1004 release_firmware(adev->gfx.mec_fw);
1005 adev->gfx.mec_fw = NULL;
1006 release_firmware(adev->gfx.mec2_fw);
1007 adev->gfx.mec2_fw = NULL;
1008 release_firmware(adev->gfx.rlc_fw);
1009 adev->gfx.rlc_fw = NULL;
1010 }
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023 static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)
1024 {
1025 const u32 num_tile_mode_states =
1026 ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1027 const u32 num_secondary_tile_mode_states =
1028 ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1029 u32 reg_offset, split_equal_to_row_size;
1030 uint32_t *tile, *macrotile;
1031
1032 tile = adev->gfx.config.tile_mode_array;
1033 macrotile = adev->gfx.config.macrotile_mode_array;
1034
1035 switch (adev->gfx.config.mem_row_size_in_kb) {
1036 case 1:
1037 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1038 break;
1039 case 2:
1040 default:
1041 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1042 break;
1043 case 4:
1044 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1045 break;
1046 }
1047
1048 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1049 tile[reg_offset] = 0;
1050 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1051 macrotile[reg_offset] = 0;
1052
1053 switch (adev->asic_type) {
1054 case CHIP_BONAIRE:
1055 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1056 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1057 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1058 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1059 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1060 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1061 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1062 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1063 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1064 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1065 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1066 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1067 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1068 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1069 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1070 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1071 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1072 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1073 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1074 TILE_SPLIT(split_equal_to_row_size));
1075 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1076 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1077 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1078 tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1079 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1080 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1081 TILE_SPLIT(split_equal_to_row_size));
1082 tile[7] = (TILE_SPLIT(split_equal_to_row_size));
1083 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1084 PIPE_CONFIG(ADDR_SURF_P4_16x16));
1085 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1086 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1087 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1088 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1089 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1090 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1091 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1092 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1093 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1094 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1095 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1096 tile[12] = (TILE_SPLIT(split_equal_to_row_size));
1097 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1098 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1099 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1100 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1101 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1102 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1103 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1104 tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1105 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1106 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1107 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1108 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1109 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1110 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1111 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1112 tile[17] = (TILE_SPLIT(split_equal_to_row_size));
1113 tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1114 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1115 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1116 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1117 tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1118 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1119 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1120 tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1121 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1122 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1123 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1124 tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1125 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1126 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1127 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1128 tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1129 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1130 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1131 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1132 tile[23] = (TILE_SPLIT(split_equal_to_row_size));
1133 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1134 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1135 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1136 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1137 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1138 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1139 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1140 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1141 tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1142 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1143 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1144 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1145 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1146 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1147 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1148 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1149 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1150 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1151 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1152 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1153 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1154 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1155 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1156 tile[30] = (TILE_SPLIT(split_equal_to_row_size));
1157
1158 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1159 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1160 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1161 NUM_BANKS(ADDR_SURF_16_BANK));
1162 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1163 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1164 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1165 NUM_BANKS(ADDR_SURF_16_BANK));
1166 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1167 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1168 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1169 NUM_BANKS(ADDR_SURF_16_BANK));
1170 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1171 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1172 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1173 NUM_BANKS(ADDR_SURF_16_BANK));
1174 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1175 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1176 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1177 NUM_BANKS(ADDR_SURF_16_BANK));
1178 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1179 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1180 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1181 NUM_BANKS(ADDR_SURF_8_BANK));
1182 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1183 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1184 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1185 NUM_BANKS(ADDR_SURF_4_BANK));
1186 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1187 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1188 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1189 NUM_BANKS(ADDR_SURF_16_BANK));
1190 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1191 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1192 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1193 NUM_BANKS(ADDR_SURF_16_BANK));
1194 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1195 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1196 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1197 NUM_BANKS(ADDR_SURF_16_BANK));
1198 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1199 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1200 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1201 NUM_BANKS(ADDR_SURF_16_BANK));
1202 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1203 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1204 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1205 NUM_BANKS(ADDR_SURF_16_BANK));
1206 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1207 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1208 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1209 NUM_BANKS(ADDR_SURF_8_BANK));
1210 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1211 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1212 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1213 NUM_BANKS(ADDR_SURF_4_BANK));
1214
1215 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1216 WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
1217 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1218 if (reg_offset != 7)
1219 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
1220 break;
1221 case CHIP_HAWAII:
1222 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1223 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1224 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1225 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1226 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1227 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1228 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1229 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1230 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1231 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1232 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1233 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1234 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1235 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1236 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1237 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1238 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1239 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1240 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1241 TILE_SPLIT(split_equal_to_row_size));
1242 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1243 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1244 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1245 TILE_SPLIT(split_equal_to_row_size));
1246 tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1247 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1248 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1249 TILE_SPLIT(split_equal_to_row_size));
1250 tile[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1251 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1252 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1253 TILE_SPLIT(split_equal_to_row_size));
1254 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1255 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1256 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1257 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1258 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1259 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1260 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1261 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1262 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1263 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1264 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1265 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1266 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1267 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1268 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1269 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1270 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1271 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1272 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1273 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1274 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1275 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1276 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1278 tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1279 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1280 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1281 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1282 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1283 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1284 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1285 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1286 tile[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1287 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1288 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1290 tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1291 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1292 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1294 tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1295 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1296 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
1297 tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1298 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1299 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1300 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1301 tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1302 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1303 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1304 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1305 tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1306 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1307 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1308 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1309 tile[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1310 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1311 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1312 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1313 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1314 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1315 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1316 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1317 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1318 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1319 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1320 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1321 tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1322 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1323 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1324 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1325 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1326 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1327 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1328 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1329 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1330 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1331 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1332 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1333 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1334 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1335 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1336 tile[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1337 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1338 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1339 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1340
1341 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1342 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1343 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1344 NUM_BANKS(ADDR_SURF_16_BANK));
1345 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1346 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1347 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1348 NUM_BANKS(ADDR_SURF_16_BANK));
1349 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1350 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1351 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1352 NUM_BANKS(ADDR_SURF_16_BANK));
1353 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1354 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1355 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1356 NUM_BANKS(ADDR_SURF_16_BANK));
1357 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1358 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1359 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1360 NUM_BANKS(ADDR_SURF_8_BANK));
1361 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1362 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1363 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1364 NUM_BANKS(ADDR_SURF_4_BANK));
1365 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1366 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1367 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1368 NUM_BANKS(ADDR_SURF_4_BANK));
1369 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1370 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1371 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1372 NUM_BANKS(ADDR_SURF_16_BANK));
1373 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1374 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1375 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1376 NUM_BANKS(ADDR_SURF_16_BANK));
1377 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1378 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1379 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1380 NUM_BANKS(ADDR_SURF_16_BANK));
1381 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1382 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1383 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1384 NUM_BANKS(ADDR_SURF_8_BANK));
1385 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1386 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1387 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1388 NUM_BANKS(ADDR_SURF_16_BANK));
1389 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1390 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1391 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1392 NUM_BANKS(ADDR_SURF_8_BANK));
1393 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1394 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1395 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1396 NUM_BANKS(ADDR_SURF_4_BANK));
1397
1398 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1399 WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
1400 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1401 if (reg_offset != 7)
1402 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
1403 break;
1404 case CHIP_KABINI:
1405 case CHIP_KAVERI:
1406 case CHIP_MULLINS:
1407 default:
1408 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1409 PIPE_CONFIG(ADDR_SURF_P2) |
1410 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1411 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1412 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1413 PIPE_CONFIG(ADDR_SURF_P2) |
1414 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1415 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1416 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1417 PIPE_CONFIG(ADDR_SURF_P2) |
1418 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1419 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1420 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1421 PIPE_CONFIG(ADDR_SURF_P2) |
1422 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1423 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1424 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1425 PIPE_CONFIG(ADDR_SURF_P2) |
1426 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1427 TILE_SPLIT(split_equal_to_row_size));
1428 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1429 PIPE_CONFIG(ADDR_SURF_P2) |
1430 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1431 tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1432 PIPE_CONFIG(ADDR_SURF_P2) |
1433 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1434 TILE_SPLIT(split_equal_to_row_size));
1435 tile[7] = (TILE_SPLIT(split_equal_to_row_size));
1436 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1437 PIPE_CONFIG(ADDR_SURF_P2));
1438 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1439 PIPE_CONFIG(ADDR_SURF_P2) |
1440 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1441 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1442 PIPE_CONFIG(ADDR_SURF_P2) |
1443 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1444 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1445 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1446 PIPE_CONFIG(ADDR_SURF_P2) |
1447 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1448 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1449 tile[12] = (TILE_SPLIT(split_equal_to_row_size));
1450 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1451 PIPE_CONFIG(ADDR_SURF_P2) |
1452 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1453 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1454 PIPE_CONFIG(ADDR_SURF_P2) |
1455 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1456 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1457 tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1458 PIPE_CONFIG(ADDR_SURF_P2) |
1459 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1460 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1461 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1462 PIPE_CONFIG(ADDR_SURF_P2) |
1463 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1464 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1465 tile[17] = (TILE_SPLIT(split_equal_to_row_size));
1466 tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1467 PIPE_CONFIG(ADDR_SURF_P2) |
1468 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1469 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1470 tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1471 PIPE_CONFIG(ADDR_SURF_P2) |
1472 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
1473 tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1474 PIPE_CONFIG(ADDR_SURF_P2) |
1475 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1476 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1477 tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1478 PIPE_CONFIG(ADDR_SURF_P2) |
1479 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1480 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1481 tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1482 PIPE_CONFIG(ADDR_SURF_P2) |
1483 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1484 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1485 tile[23] = (TILE_SPLIT(split_equal_to_row_size));
1486 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1487 PIPE_CONFIG(ADDR_SURF_P2) |
1488 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1489 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1490 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1491 PIPE_CONFIG(ADDR_SURF_P2) |
1492 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1493 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1494 tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1495 PIPE_CONFIG(ADDR_SURF_P2) |
1496 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1497 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1498 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1499 PIPE_CONFIG(ADDR_SURF_P2) |
1500 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1501 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1502 PIPE_CONFIG(ADDR_SURF_P2) |
1503 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1504 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1505 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1506 PIPE_CONFIG(ADDR_SURF_P2) |
1507 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1508 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1509 tile[30] = (TILE_SPLIT(split_equal_to_row_size));
1510
1511 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1512 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1513 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1514 NUM_BANKS(ADDR_SURF_8_BANK));
1515 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1516 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1517 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1518 NUM_BANKS(ADDR_SURF_8_BANK));
1519 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1520 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1521 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1522 NUM_BANKS(ADDR_SURF_8_BANK));
1523 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1524 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1525 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1526 NUM_BANKS(ADDR_SURF_8_BANK));
1527 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1528 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1529 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1530 NUM_BANKS(ADDR_SURF_8_BANK));
1531 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1532 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1533 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1534 NUM_BANKS(ADDR_SURF_8_BANK));
1535 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1536 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1537 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1538 NUM_BANKS(ADDR_SURF_8_BANK));
1539 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1540 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1541 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1542 NUM_BANKS(ADDR_SURF_16_BANK));
1543 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1544 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1545 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1546 NUM_BANKS(ADDR_SURF_16_BANK));
1547 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1548 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1549 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1550 NUM_BANKS(ADDR_SURF_16_BANK));
1551 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1552 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1553 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1554 NUM_BANKS(ADDR_SURF_16_BANK));
1555 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1556 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1557 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1558 NUM_BANKS(ADDR_SURF_16_BANK));
1559 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1560 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1561 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1562 NUM_BANKS(ADDR_SURF_16_BANK));
1563 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1564 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1565 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1566 NUM_BANKS(ADDR_SURF_8_BANK));
1567
1568 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1569 WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
1570 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1571 if (reg_offset != 7)
1572 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
1573 break;
1574 }
1575 }
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588 static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev,
1589 u32 se_num, u32 sh_num, u32 instance)
1590 {
1591 u32 data;
1592
1593 if (instance == 0xffffffff)
1594 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1595 else
1596 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1597
1598 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1599 data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
1600 GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK;
1601 else if (se_num == 0xffffffff)
1602 data |= GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK |
1603 (sh_num << GRBM_GFX_INDEX__SH_INDEX__SHIFT);
1604 else if (sh_num == 0xffffffff)
1605 data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
1606 (se_num << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
1607 else
1608 data |= (sh_num << GRBM_GFX_INDEX__SH_INDEX__SHIFT) |
1609 (se_num << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
1610 WREG32(mmGRBM_GFX_INDEX, data);
1611 }
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621 static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1622 {
1623 u32 data, mask;
1624
1625 data = RREG32(mmCC_RB_BACKEND_DISABLE);
1626 data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
1627
1628 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1629 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1630
1631 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1632 adev->gfx.config.max_sh_per_se);
1633
1634 return (~data) & mask;
1635 }
1636
1637 static void
1638 gfx_v7_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
1639 {
1640 switch (adev->asic_type) {
1641 case CHIP_BONAIRE:
1642 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
1643 SE_XSEL(1) | SE_YSEL(1);
1644 *rconf1 |= 0x0;
1645 break;
1646 case CHIP_HAWAII:
1647 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
1648 RB_XSEL2(1) | PKR_MAP(2) | PKR_XSEL(1) |
1649 PKR_YSEL(1) | SE_MAP(2) | SE_XSEL(2) |
1650 SE_YSEL(3);
1651 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
1652 SE_PAIR_YSEL(2);
1653 break;
1654 case CHIP_KAVERI:
1655 *rconf |= RB_MAP_PKR0(2);
1656 *rconf1 |= 0x0;
1657 break;
1658 case CHIP_KABINI:
1659 case CHIP_MULLINS:
1660 *rconf |= 0x0;
1661 *rconf1 |= 0x0;
1662 break;
1663 default:
1664 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
1665 break;
1666 }
1667 }
1668
1669 static void
1670 gfx_v7_0_write_harvested_raster_configs(struct amdgpu_device *adev,
1671 u32 raster_config, u32 raster_config_1,
1672 unsigned rb_mask, unsigned num_rb)
1673 {
1674 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
1675 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
1676 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
1677 unsigned rb_per_se = num_rb / num_se;
1678 unsigned se_mask[4];
1679 unsigned se;
1680
1681 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
1682 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
1683 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
1684 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
1685
1686 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
1687 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
1688 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
1689
1690 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
1691 (!se_mask[2] && !se_mask[3]))) {
1692 raster_config_1 &= ~SE_PAIR_MAP_MASK;
1693
1694 if (!se_mask[0] && !se_mask[1]) {
1695 raster_config_1 |=
1696 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
1697 } else {
1698 raster_config_1 |=
1699 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
1700 }
1701 }
1702
1703 for (se = 0; se < num_se; se++) {
1704 unsigned raster_config_se = raster_config;
1705 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
1706 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
1707 int idx = (se / 2) * 2;
1708
1709 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
1710 raster_config_se &= ~SE_MAP_MASK;
1711
1712 if (!se_mask[idx]) {
1713 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
1714 } else {
1715 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
1716 }
1717 }
1718
1719 pkr0_mask &= rb_mask;
1720 pkr1_mask &= rb_mask;
1721 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
1722 raster_config_se &= ~PKR_MAP_MASK;
1723
1724 if (!pkr0_mask) {
1725 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
1726 } else {
1727 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
1728 }
1729 }
1730
1731 if (rb_per_se >= 2) {
1732 unsigned rb0_mask = 1 << (se * rb_per_se);
1733 unsigned rb1_mask = rb0_mask << 1;
1734
1735 rb0_mask &= rb_mask;
1736 rb1_mask &= rb_mask;
1737 if (!rb0_mask || !rb1_mask) {
1738 raster_config_se &= ~RB_MAP_PKR0_MASK;
1739
1740 if (!rb0_mask) {
1741 raster_config_se |=
1742 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
1743 } else {
1744 raster_config_se |=
1745 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
1746 }
1747 }
1748
1749 if (rb_per_se > 2) {
1750 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
1751 rb1_mask = rb0_mask << 1;
1752 rb0_mask &= rb_mask;
1753 rb1_mask &= rb_mask;
1754 if (!rb0_mask || !rb1_mask) {
1755 raster_config_se &= ~RB_MAP_PKR1_MASK;
1756
1757 if (!rb0_mask) {
1758 raster_config_se |=
1759 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
1760 } else {
1761 raster_config_se |=
1762 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
1763 }
1764 }
1765 }
1766 }
1767
1768
1769 gfx_v7_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
1770 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
1771 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
1772 }
1773
1774
1775 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1776 }
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787 static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
1788 {
1789 int i, j;
1790 u32 data;
1791 u32 raster_config = 0, raster_config_1 = 0;
1792 u32 active_rbs = 0;
1793 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1794 adev->gfx.config.max_sh_per_se;
1795 unsigned num_rb_pipes;
1796
1797 mutex_lock(&adev->grbm_idx_mutex);
1798 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1799 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1800 gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
1801 data = gfx_v7_0_get_rb_active_bitmap(adev);
1802 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1803 rb_bitmap_width_per_sh);
1804 }
1805 }
1806 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1807
1808 adev->gfx.config.backend_enable_mask = active_rbs;
1809 adev->gfx.config.num_rbs = hweight32(active_rbs);
1810
1811 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
1812 adev->gfx.config.max_shader_engines, 16);
1813
1814 gfx_v7_0_raster_config(adev, &raster_config, &raster_config_1);
1815
1816 if (!adev->gfx.config.backend_enable_mask ||
1817 adev->gfx.config.num_rbs >= num_rb_pipes) {
1818 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
1819 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
1820 } else {
1821 gfx_v7_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
1822 adev->gfx.config.backend_enable_mask,
1823 num_rb_pipes);
1824 }
1825
1826
1827 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1828 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1829 gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
1830 adev->gfx.config.rb_config[i][j].rb_backend_disable =
1831 RREG32(mmCC_RB_BACKEND_DISABLE);
1832 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
1833 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
1834 adev->gfx.config.rb_config[i][j].raster_config =
1835 RREG32(mmPA_SC_RASTER_CONFIG);
1836 adev->gfx.config.rb_config[i][j].raster_config_1 =
1837 RREG32(mmPA_SC_RASTER_CONFIG_1);
1838 }
1839 }
1840 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1841 mutex_unlock(&adev->grbm_idx_mutex);
1842 }
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852 #define DEFAULT_SH_MEM_BASES (0x6000)
1853 #define FIRST_COMPUTE_VMID (8)
1854 #define LAST_COMPUTE_VMID (16)
1855 static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev)
1856 {
1857 int i;
1858 uint32_t sh_mem_config;
1859 uint32_t sh_mem_bases;
1860
1861
1862
1863
1864
1865
1866
1867 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1868 sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1869 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1870 sh_mem_config |= MTYPE_NONCACHED << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT;
1871 mutex_lock(&adev->srbm_mutex);
1872 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1873 cik_srbm_select(adev, 0, 0, 0, i);
1874
1875 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
1876 WREG32(mmSH_MEM_APE1_BASE, 1);
1877 WREG32(mmSH_MEM_APE1_LIMIT, 0);
1878 WREG32(mmSH_MEM_BASES, sh_mem_bases);
1879 }
1880 cik_srbm_select(adev, 0, 0, 0, 0);
1881 mutex_unlock(&adev->srbm_mutex);
1882
1883
1884
1885 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1886 WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
1887 WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
1888 WREG32(amdgpu_gds_reg_offset[i].gws, 0);
1889 WREG32(amdgpu_gds_reg_offset[i].oa, 0);
1890 }
1891 }
1892
1893 static void gfx_v7_0_init_gds_vmid(struct amdgpu_device *adev)
1894 {
1895 int vmid;
1896
1897
1898
1899
1900
1901
1902
1903 for (vmid = 1; vmid < 16; vmid++) {
1904 WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
1905 WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
1906 WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
1907 WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
1908 }
1909 }
1910
1911 static void gfx_v7_0_config_init(struct amdgpu_device *adev)
1912 {
1913 adev->gfx.config.double_offchip_lds_buf = 1;
1914 }
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924 static void gfx_v7_0_constants_init(struct amdgpu_device *adev)
1925 {
1926 u32 sh_mem_cfg, sh_static_mem_cfg, sh_mem_base;
1927 u32 tmp;
1928 int i;
1929
1930 WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT));
1931
1932 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
1933 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
1934 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
1935
1936 gfx_v7_0_tiling_mode_table_init(adev);
1937
1938 gfx_v7_0_setup_rb(adev);
1939 gfx_v7_0_get_cu_info(adev);
1940 gfx_v7_0_config_init(adev);
1941
1942
1943 WREG32(mmCP_MEQ_THRESHOLDS,
1944 (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
1945 (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
1946
1947 mutex_lock(&adev->grbm_idx_mutex);
1948
1949
1950
1951
1952 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1953
1954
1955
1956 sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1957 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1958 sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, DEFAULT_MTYPE,
1959 MTYPE_NC);
1960 sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, APE1_MTYPE,
1961 MTYPE_UC);
1962 sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, PRIVATE_ATC, 0);
1963
1964 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
1965 SWIZZLE_ENABLE, 1);
1966 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
1967 ELEMENT_SIZE, 1);
1968 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
1969 INDEX_STRIDE, 3);
1970 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
1971
1972 mutex_lock(&adev->srbm_mutex);
1973 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
1974 if (i == 0)
1975 sh_mem_base = 0;
1976 else
1977 sh_mem_base = adev->gmc.shared_aperture_start >> 48;
1978 cik_srbm_select(adev, 0, 0, 0, i);
1979
1980 WREG32(mmSH_MEM_CONFIG, sh_mem_cfg);
1981 WREG32(mmSH_MEM_APE1_BASE, 1);
1982 WREG32(mmSH_MEM_APE1_LIMIT, 0);
1983 WREG32(mmSH_MEM_BASES, sh_mem_base);
1984 }
1985 cik_srbm_select(adev, 0, 0, 0, 0);
1986 mutex_unlock(&adev->srbm_mutex);
1987
1988 gfx_v7_0_init_compute_vmid(adev);
1989 gfx_v7_0_init_gds_vmid(adev);
1990
1991 WREG32(mmSX_DEBUG_1, 0x20);
1992
1993 WREG32(mmTA_CNTL_AUX, 0x00010000);
1994
1995 tmp = RREG32(mmSPI_CONFIG_CNTL);
1996 tmp |= 0x03000000;
1997 WREG32(mmSPI_CONFIG_CNTL, tmp);
1998
1999 WREG32(mmSQ_CONFIG, 1);
2000
2001 WREG32(mmDB_DEBUG, 0);
2002
2003 tmp = RREG32(mmDB_DEBUG2) & ~0xf00fffff;
2004 tmp |= 0x00000400;
2005 WREG32(mmDB_DEBUG2, tmp);
2006
2007 tmp = RREG32(mmDB_DEBUG3) & ~0x0002021c;
2008 tmp |= 0x00020200;
2009 WREG32(mmDB_DEBUG3, tmp);
2010
2011 tmp = RREG32(mmCB_HW_CONTROL) & ~0x00010000;
2012 tmp |= 0x00018208;
2013 WREG32(mmCB_HW_CONTROL, tmp);
2014
2015 WREG32(mmSPI_CONFIG_CNTL_1, (4 << SPI_CONFIG_CNTL_1__VTX_DONE_DELAY__SHIFT));
2016
2017 WREG32(mmPA_SC_FIFO_SIZE,
2018 ((adev->gfx.config.sc_prim_fifo_size_frontend << PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2019 (adev->gfx.config.sc_prim_fifo_size_backend << PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2020 (adev->gfx.config.sc_hiz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2021 (adev->gfx.config.sc_earlyz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)));
2022
2023 WREG32(mmVGT_NUM_INSTANCES, 1);
2024
2025 WREG32(mmCP_PERFMON_CNTL, 0);
2026
2027 WREG32(mmSQ_CONFIG, 0);
2028
2029 WREG32(mmPA_SC_FORCE_EOV_MAX_CNTS,
2030 ((4095 << PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_CLK_CNT__SHIFT) |
2031 (255 << PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_REZ_CNT__SHIFT)));
2032
2033 WREG32(mmVGT_CACHE_INVALIDATION,
2034 (VC_AND_TC << VGT_CACHE_INVALIDATION__CACHE_INVALIDATION__SHIFT) |
2035 (ES_AND_GS_AUTO << VGT_CACHE_INVALIDATION__AUTO_INVLD_EN__SHIFT));
2036
2037 WREG32(mmVGT_GS_VERTEX_REUSE, 16);
2038 WREG32(mmPA_SC_LINE_STIPPLE_STATE, 0);
2039
2040 WREG32(mmPA_CL_ENHANCE, PA_CL_ENHANCE__CLIP_VTX_REORDER_ENA_MASK |
2041 (3 << PA_CL_ENHANCE__NUM_CLIP_SEQ__SHIFT));
2042 WREG32(mmPA_SC_ENHANCE, PA_SC_ENHANCE__ENABLE_PA_SC_OUT_OF_ORDER_MASK);
2043
2044 tmp = RREG32(mmSPI_ARB_PRIORITY);
2045 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
2046 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
2047 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
2048 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
2049 WREG32(mmSPI_ARB_PRIORITY, tmp);
2050
2051 mutex_unlock(&adev->grbm_idx_mutex);
2052
2053 udelay(50);
2054 }
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069 static void gfx_v7_0_scratch_init(struct amdgpu_device *adev)
2070 {
2071 adev->gfx.scratch.num_reg = 8;
2072 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
2073 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
2074 }
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087 static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
2088 {
2089 struct amdgpu_device *adev = ring->adev;
2090 uint32_t scratch;
2091 uint32_t tmp = 0;
2092 unsigned i;
2093 int r;
2094
2095 r = amdgpu_gfx_scratch_get(adev, &scratch);
2096 if (r)
2097 return r;
2098
2099 WREG32(scratch, 0xCAFEDEAD);
2100 r = amdgpu_ring_alloc(ring, 3);
2101 if (r)
2102 goto error_free_scratch;
2103
2104 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2105 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
2106 amdgpu_ring_write(ring, 0xDEADBEEF);
2107 amdgpu_ring_commit(ring);
2108
2109 for (i = 0; i < adev->usec_timeout; i++) {
2110 tmp = RREG32(scratch);
2111 if (tmp == 0xDEADBEEF)
2112 break;
2113 udelay(1);
2114 }
2115 if (i >= adev->usec_timeout)
2116 r = -ETIMEDOUT;
2117
2118 error_free_scratch:
2119 amdgpu_gfx_scratch_free(adev, scratch);
2120 return r;
2121 }
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131 static void gfx_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
2132 {
2133 u32 ref_and_mask;
2134 int usepfp = ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE ? 0 : 1;
2135
2136 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
2137 switch (ring->me) {
2138 case 1:
2139 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
2140 break;
2141 case 2:
2142 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
2143 break;
2144 default:
2145 return;
2146 }
2147 } else {
2148 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
2149 }
2150
2151 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
2152 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) |
2153 WAIT_REG_MEM_FUNCTION(3) |
2154 WAIT_REG_MEM_ENGINE(usepfp)));
2155 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
2156 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
2157 amdgpu_ring_write(ring, ref_and_mask);
2158 amdgpu_ring_write(ring, ref_and_mask);
2159 amdgpu_ring_write(ring, 0x20);
2160 }
2161
2162 static void gfx_v7_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
2163 {
2164 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
2165 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
2166 EVENT_INDEX(4));
2167
2168 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
2169 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
2170 EVENT_INDEX(0));
2171 }
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182 static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
2183 u64 seq, unsigned flags)
2184 {
2185 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
2186 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
2187
2188
2189
2190 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2191 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
2192 EOP_TC_ACTION_EN |
2193 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2194 EVENT_INDEX(5)));
2195 amdgpu_ring_write(ring, addr & 0xfffffffc);
2196 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
2197 DATA_SEL(1) | INT_SEL(0));
2198 amdgpu_ring_write(ring, lower_32_bits(seq - 1));
2199 amdgpu_ring_write(ring, upper_32_bits(seq - 1));
2200
2201
2202 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2203 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
2204 EOP_TC_ACTION_EN |
2205 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2206 EVENT_INDEX(5)));
2207 amdgpu_ring_write(ring, addr & 0xfffffffc);
2208 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
2209 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
2210 amdgpu_ring_write(ring, lower_32_bits(seq));
2211 amdgpu_ring_write(ring, upper_32_bits(seq));
2212 }
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223 static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
2224 u64 addr, u64 seq,
2225 unsigned flags)
2226 {
2227 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
2228 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
2229
2230
2231 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2232 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
2233 EOP_TC_ACTION_EN |
2234 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2235 EVENT_INDEX(5)));
2236 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
2237 amdgpu_ring_write(ring, addr & 0xfffffffc);
2238 amdgpu_ring_write(ring, upper_32_bits(addr));
2239 amdgpu_ring_write(ring, lower_32_bits(seq));
2240 amdgpu_ring_write(ring, upper_32_bits(seq));
2241 }
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258 static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
2259 struct amdgpu_job *job,
2260 struct amdgpu_ib *ib,
2261 uint32_t flags)
2262 {
2263 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
2264 u32 header, control = 0;
2265
2266
2267 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
2268 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2269 amdgpu_ring_write(ring, 0);
2270 }
2271
2272 if (ib->flags & AMDGPU_IB_FLAG_CE)
2273 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2274 else
2275 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2276
2277 control |= ib->length_dw | (vmid << 24);
2278
2279 amdgpu_ring_write(ring, header);
2280 amdgpu_ring_write(ring,
2281 #ifdef __BIG_ENDIAN
2282 (2 << 0) |
2283 #endif
2284 (ib->gpu_addr & 0xFFFFFFFC));
2285 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2286 amdgpu_ring_write(ring, control);
2287 }
2288
2289 static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
2290 struct amdgpu_job *job,
2291 struct amdgpu_ib *ib,
2292 uint32_t flags)
2293 {
2294 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
2295 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
2308 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2309 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
2310 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
2311 }
2312
2313 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
2314 amdgpu_ring_write(ring,
2315 #ifdef __BIG_ENDIAN
2316 (2 << 0) |
2317 #endif
2318 (ib->gpu_addr & 0xFFFFFFFC));
2319 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2320 amdgpu_ring_write(ring, control);
2321 }
2322
2323 static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
2324 {
2325 uint32_t dw2 = 0;
2326
2327 dw2 |= 0x80000000;
2328 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
2329 gfx_v7_0_ring_emit_vgt_flush(ring);
2330
2331 dw2 |= 0x8001;
2332
2333 dw2 |= 0x01000000;
2334
2335 dw2 |= 0x10002;
2336 }
2337
2338 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2339 amdgpu_ring_write(ring, dw2);
2340 amdgpu_ring_write(ring, 0);
2341 }
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352 static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
2353 {
2354 struct amdgpu_device *adev = ring->adev;
2355 struct amdgpu_ib ib;
2356 struct dma_fence *f = NULL;
2357 uint32_t scratch;
2358 uint32_t tmp = 0;
2359 long r;
2360
2361 r = amdgpu_gfx_scratch_get(adev, &scratch);
2362 if (r)
2363 return r;
2364
2365 WREG32(scratch, 0xCAFEDEAD);
2366 memset(&ib, 0, sizeof(ib));
2367 r = amdgpu_ib_get(adev, NULL, 256, &ib);
2368 if (r)
2369 goto err1;
2370
2371 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2372 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
2373 ib.ptr[2] = 0xDEADBEEF;
2374 ib.length_dw = 3;
2375
2376 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
2377 if (r)
2378 goto err2;
2379
2380 r = dma_fence_wait_timeout(f, false, timeout);
2381 if (r == 0) {
2382 r = -ETIMEDOUT;
2383 goto err2;
2384 } else if (r < 0) {
2385 goto err2;
2386 }
2387 tmp = RREG32(scratch);
2388 if (tmp == 0xDEADBEEF)
2389 r = 0;
2390 else
2391 r = -EINVAL;
2392
2393 err2:
2394 amdgpu_ib_free(adev, &ib, NULL);
2395 dma_fence_put(f);
2396 err1:
2397 amdgpu_gfx_scratch_free(adev, scratch);
2398 return r;
2399 }
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432 static void gfx_v7_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2433 {
2434 int i;
2435
2436 if (enable) {
2437 WREG32(mmCP_ME_CNTL, 0);
2438 } else {
2439 WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK));
2440 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2441 adev->gfx.gfx_ring[i].sched.ready = false;
2442 }
2443 udelay(50);
2444 }
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454 static int gfx_v7_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2455 {
2456 const struct gfx_firmware_header_v1_0 *pfp_hdr;
2457 const struct gfx_firmware_header_v1_0 *ce_hdr;
2458 const struct gfx_firmware_header_v1_0 *me_hdr;
2459 const __le32 *fw_data;
2460 unsigned i, fw_size;
2461
2462 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2463 return -EINVAL;
2464
2465 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
2466 ce_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
2467 me_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
2468
2469 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2470 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2471 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2472 adev->gfx.pfp_fw_version = le32_to_cpu(pfp_hdr->header.ucode_version);
2473 adev->gfx.ce_fw_version = le32_to_cpu(ce_hdr->header.ucode_version);
2474 adev->gfx.me_fw_version = le32_to_cpu(me_hdr->header.ucode_version);
2475 adev->gfx.me_feature_version = le32_to_cpu(me_hdr->ucode_feature_version);
2476 adev->gfx.ce_feature_version = le32_to_cpu(ce_hdr->ucode_feature_version);
2477 adev->gfx.pfp_feature_version = le32_to_cpu(pfp_hdr->ucode_feature_version);
2478
2479 gfx_v7_0_cp_gfx_enable(adev, false);
2480
2481
2482 fw_data = (const __le32 *)
2483 (adev->gfx.pfp_fw->data +
2484 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2485 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2486 WREG32(mmCP_PFP_UCODE_ADDR, 0);
2487 for (i = 0; i < fw_size; i++)
2488 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2489 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2490
2491
2492 fw_data = (const __le32 *)
2493 (adev->gfx.ce_fw->data +
2494 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2495 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2496 WREG32(mmCP_CE_UCODE_ADDR, 0);
2497 for (i = 0; i < fw_size; i++)
2498 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2499 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2500
2501
2502 fw_data = (const __le32 *)
2503 (adev->gfx.me_fw->data +
2504 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2505 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2506 WREG32(mmCP_ME_RAM_WADDR, 0);
2507 for (i = 0; i < fw_size; i++)
2508 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2509 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2510
2511 return 0;
2512 }
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523 static int gfx_v7_0_cp_gfx_start(struct amdgpu_device *adev)
2524 {
2525 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2526 const struct cs_section_def *sect = NULL;
2527 const struct cs_extent_def *ext = NULL;
2528 int r, i;
2529
2530
2531 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2532 WREG32(mmCP_ENDIAN_SWAP, 0);
2533 WREG32(mmCP_DEVICE_ID, 1);
2534
2535 gfx_v7_0_cp_gfx_enable(adev, true);
2536
2537 r = amdgpu_ring_alloc(ring, gfx_v7_0_get_csb_size(adev) + 8);
2538 if (r) {
2539 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2540 return r;
2541 }
2542
2543
2544 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2545 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2546 amdgpu_ring_write(ring, 0x8000);
2547 amdgpu_ring_write(ring, 0x8000);
2548
2549
2550 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2551 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2552
2553 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2554 amdgpu_ring_write(ring, 0x80000000);
2555 amdgpu_ring_write(ring, 0x80000000);
2556
2557 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
2558 for (ext = sect->section; ext->extent != NULL; ++ext) {
2559 if (sect->id == SECT_CONTEXT) {
2560 amdgpu_ring_write(ring,
2561 PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
2562 amdgpu_ring_write(ring, ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2563 for (i = 0; i < ext->reg_count; i++)
2564 amdgpu_ring_write(ring, ext->extent[i]);
2565 }
2566 }
2567 }
2568
2569 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2570 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
2571 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
2572 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
2573
2574 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2575 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2576
2577 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2578 amdgpu_ring_write(ring, 0);
2579
2580 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2581 amdgpu_ring_write(ring, 0x00000316);
2582 amdgpu_ring_write(ring, 0x0000000e);
2583 amdgpu_ring_write(ring, 0x00000010);
2584
2585 amdgpu_ring_commit(ring);
2586
2587 return 0;
2588 }
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599 static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
2600 {
2601 struct amdgpu_ring *ring;
2602 u32 tmp;
2603 u32 rb_bufsz;
2604 u64 rb_addr, rptr_addr;
2605 int r;
2606
2607 WREG32(mmCP_SEM_WAIT_TIMER, 0x0);
2608 if (adev->asic_type != CHIP_HAWAII)
2609 WREG32(mmCP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2610
2611
2612 WREG32(mmCP_RB_WPTR_DELAY, 0);
2613
2614
2615 WREG32(mmCP_RB_VMID, 0);
2616
2617 WREG32(mmSCRATCH_ADDR, 0);
2618
2619
2620
2621 ring = &adev->gfx.gfx_ring[0];
2622 rb_bufsz = order_base_2(ring->ring_size / 8);
2623 tmp = (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2624 #ifdef __BIG_ENDIAN
2625 tmp |= 2 << CP_RB0_CNTL__BUF_SWAP__SHIFT;
2626 #endif
2627 WREG32(mmCP_RB0_CNTL, tmp);
2628
2629
2630 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
2631 ring->wptr = 0;
2632 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2633
2634
2635 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2636 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2637 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
2638
2639
2640 WREG32(mmSCRATCH_UMSK, 0);
2641
2642 mdelay(1);
2643 WREG32(mmCP_RB0_CNTL, tmp);
2644
2645 rb_addr = ring->gpu_addr >> 8;
2646 WREG32(mmCP_RB0_BASE, rb_addr);
2647 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2648
2649
2650 gfx_v7_0_cp_gfx_start(adev);
2651 r = amdgpu_ring_test_helper(ring);
2652 if (r)
2653 return r;
2654
2655 return 0;
2656 }
2657
2658 static u64 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
2659 {
2660 return ring->adev->wb.wb[ring->rptr_offs];
2661 }
2662
2663 static u64 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
2664 {
2665 struct amdgpu_device *adev = ring->adev;
2666
2667 return RREG32(mmCP_RB0_WPTR);
2668 }
2669
2670 static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
2671 {
2672 struct amdgpu_device *adev = ring->adev;
2673
2674 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2675 (void)RREG32(mmCP_RB0_WPTR);
2676 }
2677
2678 static u64 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
2679 {
2680
2681 return ring->adev->wb.wb[ring->wptr_offs];
2682 }
2683
2684 static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
2685 {
2686 struct amdgpu_device *adev = ring->adev;
2687
2688
2689 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
2690 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
2691 }
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701 static void gfx_v7_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2702 {
2703 int i;
2704
2705 if (enable) {
2706 WREG32(mmCP_MEC_CNTL, 0);
2707 } else {
2708 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2709 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2710 adev->gfx.compute_ring[i].sched.ready = false;
2711 }
2712 udelay(50);
2713 }
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723 static int gfx_v7_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2724 {
2725 const struct gfx_firmware_header_v1_0 *mec_hdr;
2726 const __le32 *fw_data;
2727 unsigned i, fw_size;
2728
2729 if (!adev->gfx.mec_fw)
2730 return -EINVAL;
2731
2732 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2733 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2734 adev->gfx.mec_fw_version = le32_to_cpu(mec_hdr->header.ucode_version);
2735 adev->gfx.mec_feature_version = le32_to_cpu(
2736 mec_hdr->ucode_feature_version);
2737
2738 gfx_v7_0_cp_compute_enable(adev, false);
2739
2740
2741 fw_data = (const __le32 *)
2742 (adev->gfx.mec_fw->data +
2743 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2744 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
2745 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
2746 for (i = 0; i < fw_size; i++)
2747 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
2748 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
2749
2750 if (adev->asic_type == CHIP_KAVERI) {
2751 const struct gfx_firmware_header_v1_0 *mec2_hdr;
2752
2753 if (!adev->gfx.mec2_fw)
2754 return -EINVAL;
2755
2756 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
2757 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
2758 adev->gfx.mec2_fw_version = le32_to_cpu(mec2_hdr->header.ucode_version);
2759 adev->gfx.mec2_feature_version = le32_to_cpu(
2760 mec2_hdr->ucode_feature_version);
2761
2762
2763 fw_data = (const __le32 *)
2764 (adev->gfx.mec2_fw->data +
2765 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
2766 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
2767 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
2768 for (i = 0; i < fw_size; i++)
2769 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
2770 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
2771 }
2772
2773 return 0;
2774 }
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784 static void gfx_v7_0_cp_compute_fini(struct amdgpu_device *adev)
2785 {
2786 int i;
2787
2788 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2789 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2790
2791 amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL);
2792 }
2793 }
2794
2795 static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
2796 {
2797 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
2798 }
2799
2800 static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
2801 {
2802 int r;
2803 u32 *hpd;
2804 size_t mec_hpd_size;
2805
2806 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
2807
2808
2809 amdgpu_gfx_compute_queue_acquire(adev);
2810
2811
2812 mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec
2813 * GFX7_MEC_HPD_SIZE * 2;
2814
2815 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
2816 AMDGPU_GEM_DOMAIN_VRAM,
2817 &adev->gfx.mec.hpd_eop_obj,
2818 &adev->gfx.mec.hpd_eop_gpu_addr,
2819 (void **)&hpd);
2820 if (r) {
2821 dev_warn(adev->dev, "(%d) create, pin or map of HDP EOP bo failed\n", r);
2822 gfx_v7_0_mec_fini(adev);
2823 return r;
2824 }
2825
2826
2827 memset(hpd, 0, mec_hpd_size);
2828
2829 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
2830 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
2831
2832 return 0;
2833 }
2834
2835 struct hqd_registers
2836 {
2837 u32 cp_mqd_base_addr;
2838 u32 cp_mqd_base_addr_hi;
2839 u32 cp_hqd_active;
2840 u32 cp_hqd_vmid;
2841 u32 cp_hqd_persistent_state;
2842 u32 cp_hqd_pipe_priority;
2843 u32 cp_hqd_queue_priority;
2844 u32 cp_hqd_quantum;
2845 u32 cp_hqd_pq_base;
2846 u32 cp_hqd_pq_base_hi;
2847 u32 cp_hqd_pq_rptr;
2848 u32 cp_hqd_pq_rptr_report_addr;
2849 u32 cp_hqd_pq_rptr_report_addr_hi;
2850 u32 cp_hqd_pq_wptr_poll_addr;
2851 u32 cp_hqd_pq_wptr_poll_addr_hi;
2852 u32 cp_hqd_pq_doorbell_control;
2853 u32 cp_hqd_pq_wptr;
2854 u32 cp_hqd_pq_control;
2855 u32 cp_hqd_ib_base_addr;
2856 u32 cp_hqd_ib_base_addr_hi;
2857 u32 cp_hqd_ib_rptr;
2858 u32 cp_hqd_ib_control;
2859 u32 cp_hqd_iq_timer;
2860 u32 cp_hqd_iq_rptr;
2861 u32 cp_hqd_dequeue_request;
2862 u32 cp_hqd_dma_offload;
2863 u32 cp_hqd_sema_cmd;
2864 u32 cp_hqd_msg_type;
2865 u32 cp_hqd_atomic0_preop_lo;
2866 u32 cp_hqd_atomic0_preop_hi;
2867 u32 cp_hqd_atomic1_preop_lo;
2868 u32 cp_hqd_atomic1_preop_hi;
2869 u32 cp_hqd_hq_scheduler0;
2870 u32 cp_hqd_hq_scheduler1;
2871 u32 cp_mqd_control;
2872 };
2873
2874 static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev,
2875 int mec, int pipe)
2876 {
2877 u64 eop_gpu_addr;
2878 u32 tmp;
2879 size_t eop_offset = (mec * adev->gfx.mec.num_pipe_per_mec + pipe)
2880 * GFX7_MEC_HPD_SIZE * 2;
2881
2882 mutex_lock(&adev->srbm_mutex);
2883 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset;
2884
2885 cik_srbm_select(adev, mec + 1, pipe, 0, 0);
2886
2887
2888 WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2889 WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2890
2891
2892 WREG32(mmCP_HPD_EOP_VMID, 0);
2893
2894
2895 tmp = RREG32(mmCP_HPD_EOP_CONTROL);
2896 tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
2897 tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8);
2898 WREG32(mmCP_HPD_EOP_CONTROL, tmp);
2899
2900 cik_srbm_select(adev, 0, 0, 0, 0);
2901 mutex_unlock(&adev->srbm_mutex);
2902 }
2903
2904 static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev)
2905 {
2906 int i;
2907
2908
2909 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
2910 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
2911 for (i = 0; i < adev->usec_timeout; i++) {
2912 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
2913 break;
2914 udelay(1);
2915 }
2916
2917 if (i == adev->usec_timeout)
2918 return -ETIMEDOUT;
2919
2920 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
2921 WREG32(mmCP_HQD_PQ_RPTR, 0);
2922 WREG32(mmCP_HQD_PQ_WPTR, 0);
2923 }
2924
2925 return 0;
2926 }
2927
2928 static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
2929 struct cik_mqd *mqd,
2930 uint64_t mqd_gpu_addr,
2931 struct amdgpu_ring *ring)
2932 {
2933 u64 hqd_gpu_addr;
2934 u64 wb_gpu_addr;
2935
2936
2937 memset(mqd, 0, sizeof(struct cik_mqd));
2938
2939 mqd->header = 0xC0310800;
2940 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2941 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2942 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2943 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2944
2945
2946 mqd->cp_hqd_pq_doorbell_control =
2947 RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
2948 if (ring->use_doorbell)
2949 mqd->cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
2950 else
2951 mqd->cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
2952
2953
2954 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
2955 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
2956
2957
2958 mqd->cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
2959 mqd->cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
2960
2961
2962 hqd_gpu_addr = ring->gpu_addr >> 8;
2963 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2964 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2965
2966
2967 mqd->cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
2968 mqd->cp_hqd_pq_control &=
2969 ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
2970 CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
2971
2972 mqd->cp_hqd_pq_control |=
2973 order_base_2(ring->ring_size / 8);
2974 mqd->cp_hqd_pq_control |=
2975 (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
2976 #ifdef __BIG_ENDIAN
2977 mqd->cp_hqd_pq_control |=
2978 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
2979 #endif
2980 mqd->cp_hqd_pq_control &=
2981 ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
2982 CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK |
2983 CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK);
2984 mqd->cp_hqd_pq_control |=
2985 CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
2986 CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK;
2987
2988
2989 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2990 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2991 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2992
2993
2994 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2995 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2996 mqd->cp_hqd_pq_rptr_report_addr_hi =
2997 upper_32_bits(wb_gpu_addr) & 0xffff;
2998
2999
3000 if (ring->use_doorbell) {
3001 mqd->cp_hqd_pq_doorbell_control =
3002 RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3003 mqd->cp_hqd_pq_doorbell_control &=
3004 ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
3005 mqd->cp_hqd_pq_doorbell_control |=
3006 (ring->doorbell_index <<
3007 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
3008 mqd->cp_hqd_pq_doorbell_control |=
3009 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
3010 mqd->cp_hqd_pq_doorbell_control &=
3011 ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
3012 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
3013
3014 } else {
3015 mqd->cp_hqd_pq_doorbell_control = 0;
3016 }
3017
3018
3019 ring->wptr = 0;
3020 mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
3021 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3022
3023
3024 mqd->cp_hqd_vmid = 0;
3025
3026
3027 mqd->cp_hqd_ib_control = RREG32(mmCP_HQD_IB_CONTROL);
3028 mqd->cp_hqd_ib_base_addr_lo = RREG32(mmCP_HQD_IB_BASE_ADDR);
3029 mqd->cp_hqd_ib_base_addr_hi = RREG32(mmCP_HQD_IB_BASE_ADDR_HI);
3030 mqd->cp_hqd_ib_rptr = RREG32(mmCP_HQD_IB_RPTR);
3031 mqd->cp_hqd_persistent_state = RREG32(mmCP_HQD_PERSISTENT_STATE);
3032 mqd->cp_hqd_sema_cmd = RREG32(mmCP_HQD_SEMA_CMD);
3033 mqd->cp_hqd_msg_type = RREG32(mmCP_HQD_MSG_TYPE);
3034 mqd->cp_hqd_atomic0_preop_lo = RREG32(mmCP_HQD_ATOMIC0_PREOP_LO);
3035 mqd->cp_hqd_atomic0_preop_hi = RREG32(mmCP_HQD_ATOMIC0_PREOP_HI);
3036 mqd->cp_hqd_atomic1_preop_lo = RREG32(mmCP_HQD_ATOMIC1_PREOP_LO);
3037 mqd->cp_hqd_atomic1_preop_hi = RREG32(mmCP_HQD_ATOMIC1_PREOP_HI);
3038 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3039 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
3040 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
3041 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
3042 mqd->cp_hqd_iq_rptr = RREG32(mmCP_HQD_IQ_RPTR);
3043
3044
3045 mqd->cp_hqd_active = 1;
3046 }
3047
3048 int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
3049 {
3050 uint32_t tmp;
3051 uint32_t mqd_reg;
3052 uint32_t *mqd_data;
3053
3054
3055 mqd_data = &mqd->cp_mqd_base_addr_lo;
3056
3057
3058 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3059 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3060 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3061
3062
3063 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_MQD_CONTROL; mqd_reg++)
3064 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
3065
3066
3067 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
3068 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
3069
3070 return 0;
3071 }
3072
3073 static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id)
3074 {
3075 int r;
3076 u64 mqd_gpu_addr;
3077 struct cik_mqd *mqd;
3078 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
3079
3080 r = amdgpu_bo_create_reserved(adev, sizeof(struct cik_mqd), PAGE_SIZE,
3081 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
3082 &mqd_gpu_addr, (void **)&mqd);
3083 if (r) {
3084 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3085 return r;
3086 }
3087
3088 mutex_lock(&adev->srbm_mutex);
3089 cik_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3090
3091 gfx_v7_0_mqd_init(adev, mqd, mqd_gpu_addr, ring);
3092 gfx_v7_0_mqd_deactivate(adev);
3093 gfx_v7_0_mqd_commit(adev, mqd);
3094
3095 cik_srbm_select(adev, 0, 0, 0, 0);
3096 mutex_unlock(&adev->srbm_mutex);
3097
3098 amdgpu_bo_kunmap(ring->mqd_obj);
3099 amdgpu_bo_unreserve(ring->mqd_obj);
3100 return 0;
3101 }
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112 static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
3113 {
3114 int r, i, j;
3115 u32 tmp;
3116 struct amdgpu_ring *ring;
3117
3118
3119 tmp = RREG32(mmCP_CPF_DEBUG);
3120 tmp |= (1 << 23);
3121 WREG32(mmCP_CPF_DEBUG, tmp);
3122
3123
3124 for (i = 0; i < adev->gfx.mec.num_mec; i++)
3125 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++)
3126 gfx_v7_0_compute_pipe_init(adev, i, j);
3127
3128
3129 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3130 r = gfx_v7_0_compute_queue_init(adev, i);
3131 if (r) {
3132 gfx_v7_0_cp_compute_fini(adev);
3133 return r;
3134 }
3135 }
3136
3137 gfx_v7_0_cp_compute_enable(adev, true);
3138
3139 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3140 ring = &adev->gfx.compute_ring[i];
3141 amdgpu_ring_test_helper(ring);
3142 }
3143
3144 return 0;
3145 }
3146
3147 static void gfx_v7_0_cp_enable(struct amdgpu_device *adev, bool enable)
3148 {
3149 gfx_v7_0_cp_gfx_enable(adev, enable);
3150 gfx_v7_0_cp_compute_enable(adev, enable);
3151 }
3152
3153 static int gfx_v7_0_cp_load_microcode(struct amdgpu_device *adev)
3154 {
3155 int r;
3156
3157 r = gfx_v7_0_cp_gfx_load_microcode(adev);
3158 if (r)
3159 return r;
3160 r = gfx_v7_0_cp_compute_load_microcode(adev);
3161 if (r)
3162 return r;
3163
3164 return 0;
3165 }
3166
3167 static void gfx_v7_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3168 bool enable)
3169 {
3170 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3171
3172 if (enable)
3173 tmp |= (CP_INT_CNTL_RING0__CNTX_BUSY_INT_ENABLE_MASK |
3174 CP_INT_CNTL_RING0__CNTX_EMPTY_INT_ENABLE_MASK);
3175 else
3176 tmp &= ~(CP_INT_CNTL_RING0__CNTX_BUSY_INT_ENABLE_MASK |
3177 CP_INT_CNTL_RING0__CNTX_EMPTY_INT_ENABLE_MASK);
3178 WREG32(mmCP_INT_CNTL_RING0, tmp);
3179 }
3180
3181 static int gfx_v7_0_cp_resume(struct amdgpu_device *adev)
3182 {
3183 int r;
3184
3185 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3186
3187 r = gfx_v7_0_cp_load_microcode(adev);
3188 if (r)
3189 return r;
3190
3191 r = gfx_v7_0_cp_gfx_resume(adev);
3192 if (r)
3193 return r;
3194 r = gfx_v7_0_cp_compute_resume(adev);
3195 if (r)
3196 return r;
3197
3198 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3199
3200 return 0;
3201 }
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211 static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
3212 {
3213 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3214 uint32_t seq = ring->fence_drv.sync_seq;
3215 uint64_t addr = ring->fence_drv.gpu_addr;
3216
3217 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3218 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) |
3219 WAIT_REG_MEM_FUNCTION(3) |
3220 WAIT_REG_MEM_ENGINE(usepfp)));
3221 amdgpu_ring_write(ring, addr & 0xfffffffc);
3222 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3223 amdgpu_ring_write(ring, seq);
3224 amdgpu_ring_write(ring, 0xffffffff);
3225 amdgpu_ring_write(ring, 4);
3226
3227 if (usepfp) {
3228
3229 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3230 amdgpu_ring_write(ring, 0);
3231 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3232 amdgpu_ring_write(ring, 0);
3233 }
3234 }
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250 static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
3251 unsigned vmid, uint64_t pd_addr)
3252 {
3253 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3254
3255 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
3256
3257
3258 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3259 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) |
3260 WAIT_REG_MEM_FUNCTION(0) |
3261 WAIT_REG_MEM_ENGINE(0)));
3262 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
3263 amdgpu_ring_write(ring, 0);
3264 amdgpu_ring_write(ring, 0);
3265 amdgpu_ring_write(ring, 0);
3266 amdgpu_ring_write(ring, 0x20);
3267
3268
3269 if (usepfp) {
3270
3271 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3272 amdgpu_ring_write(ring, 0x0);
3273
3274
3275 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3276 amdgpu_ring_write(ring, 0);
3277 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3278 amdgpu_ring_write(ring, 0);
3279 }
3280 }
3281
3282 static void gfx_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
3283 uint32_t reg, uint32_t val)
3284 {
3285 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3286
3287 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3288 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
3289 WRITE_DATA_DST_SEL(0)));
3290 amdgpu_ring_write(ring, reg);
3291 amdgpu_ring_write(ring, 0);
3292 amdgpu_ring_write(ring, val);
3293 }
3294
3295
3296
3297
3298
3299
3300 static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
3301 {
3302 const u32 *src_ptr;
3303 u32 dws;
3304 const struct cs_section_def *cs_data;
3305 int r;
3306
3307
3308 if (adev->flags & AMD_IS_APU) {
3309 if (adev->asic_type == CHIP_KAVERI) {
3310 adev->gfx.rlc.reg_list = spectre_rlc_save_restore_register_list;
3311 adev->gfx.rlc.reg_list_size =
3312 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
3313 } else {
3314 adev->gfx.rlc.reg_list = kalindi_rlc_save_restore_register_list;
3315 adev->gfx.rlc.reg_list_size =
3316 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
3317 }
3318 }
3319 adev->gfx.rlc.cs_data = ci_cs_data;
3320 adev->gfx.rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048);
3321 adev->gfx.rlc.cp_table_size += 64 * 1024;
3322
3323 src_ptr = adev->gfx.rlc.reg_list;
3324 dws = adev->gfx.rlc.reg_list_size;
3325 dws += (5 * 16) + 48 + 48 + 64;
3326
3327 cs_data = adev->gfx.rlc.cs_data;
3328
3329 if (src_ptr) {
3330
3331 r = amdgpu_gfx_rlc_init_sr(adev, dws);
3332 if (r)
3333 return r;
3334 }
3335
3336 if (cs_data) {
3337
3338 r = amdgpu_gfx_rlc_init_csb(adev);
3339 if (r)
3340 return r;
3341 }
3342
3343 if (adev->gfx.rlc.cp_table_size) {
3344 r = amdgpu_gfx_rlc_init_cpt(adev);
3345 if (r)
3346 return r;
3347 }
3348
3349 return 0;
3350 }
3351
3352 static void gfx_v7_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
3353 {
3354 u32 tmp;
3355
3356 tmp = RREG32(mmRLC_LB_CNTL);
3357 if (enable)
3358 tmp |= RLC_LB_CNTL__LOAD_BALANCE_ENABLE_MASK;
3359 else
3360 tmp &= ~RLC_LB_CNTL__LOAD_BALANCE_ENABLE_MASK;
3361 WREG32(mmRLC_LB_CNTL, tmp);
3362 }
3363
3364 static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3365 {
3366 u32 i, j, k;
3367 u32 mask;
3368
3369 mutex_lock(&adev->grbm_idx_mutex);
3370 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3371 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3372 gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
3373 for (k = 0; k < adev->usec_timeout; k++) {
3374 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3375 break;
3376 udelay(1);
3377 }
3378 }
3379 }
3380 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3381 mutex_unlock(&adev->grbm_idx_mutex);
3382
3383 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3384 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3385 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3386 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3387 for (k = 0; k < adev->usec_timeout; k++) {
3388 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3389 break;
3390 udelay(1);
3391 }
3392 }
3393
3394 static void gfx_v7_0_update_rlc(struct amdgpu_device *adev, u32 rlc)
3395 {
3396 u32 tmp;
3397
3398 tmp = RREG32(mmRLC_CNTL);
3399 if (tmp != rlc)
3400 WREG32(mmRLC_CNTL, rlc);
3401 }
3402
3403 static u32 gfx_v7_0_halt_rlc(struct amdgpu_device *adev)
3404 {
3405 u32 data, orig;
3406
3407 orig = data = RREG32(mmRLC_CNTL);
3408
3409 if (data & RLC_CNTL__RLC_ENABLE_F32_MASK) {
3410 u32 i;
3411
3412 data &= ~RLC_CNTL__RLC_ENABLE_F32_MASK;
3413 WREG32(mmRLC_CNTL, data);
3414
3415 for (i = 0; i < adev->usec_timeout; i++) {
3416 if ((RREG32(mmRLC_GPM_STAT) & RLC_GPM_STAT__RLC_BUSY_MASK) == 0)
3417 break;
3418 udelay(1);
3419 }
3420
3421 gfx_v7_0_wait_for_rlc_serdes(adev);
3422 }
3423
3424 return orig;
3425 }
3426
3427 static bool gfx_v7_0_is_rlc_enabled(struct amdgpu_device *adev)
3428 {
3429 return true;
3430 }
3431
3432 static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev)
3433 {
3434 u32 tmp, i, mask;
3435
3436 tmp = 0x1 | (1 << 1);
3437 WREG32(mmRLC_GPR_REG2, tmp);
3438
3439 mask = RLC_GPM_STAT__GFX_POWER_STATUS_MASK |
3440 RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK;
3441 for (i = 0; i < adev->usec_timeout; i++) {
3442 if ((RREG32(mmRLC_GPM_STAT) & mask) == mask)
3443 break;
3444 udelay(1);
3445 }
3446
3447 for (i = 0; i < adev->usec_timeout; i++) {
3448 if ((RREG32(mmRLC_GPR_REG2) & 0x1) == 0)
3449 break;
3450 udelay(1);
3451 }
3452 }
3453
3454 static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev)
3455 {
3456 u32 tmp;
3457
3458 tmp = 0x1 | (0 << 1);
3459 WREG32(mmRLC_GPR_REG2, tmp);
3460 }
3461
3462
3463
3464
3465
3466
3467
3468
3469 static void gfx_v7_0_rlc_stop(struct amdgpu_device *adev)
3470 {
3471 WREG32(mmRLC_CNTL, 0);
3472
3473 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3474
3475 gfx_v7_0_wait_for_rlc_serdes(adev);
3476 }
3477
3478
3479
3480
3481
3482
3483
3484
3485 static void gfx_v7_0_rlc_start(struct amdgpu_device *adev)
3486 {
3487 WREG32(mmRLC_CNTL, RLC_CNTL__RLC_ENABLE_F32_MASK);
3488
3489 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3490
3491 udelay(50);
3492 }
3493
3494 static void gfx_v7_0_rlc_reset(struct amdgpu_device *adev)
3495 {
3496 u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3497
3498 tmp |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
3499 WREG32(mmGRBM_SOFT_RESET, tmp);
3500 udelay(50);
3501 tmp &= ~GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
3502 WREG32(mmGRBM_SOFT_RESET, tmp);
3503 udelay(50);
3504 }
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515 static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
3516 {
3517 const struct rlc_firmware_header_v1_0 *hdr;
3518 const __le32 *fw_data;
3519 unsigned i, fw_size;
3520 u32 tmp;
3521
3522 if (!adev->gfx.rlc_fw)
3523 return -EINVAL;
3524
3525 hdr = (const struct rlc_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
3526 amdgpu_ucode_print_rlc_hdr(&hdr->header);
3527 adev->gfx.rlc_fw_version = le32_to_cpu(hdr->header.ucode_version);
3528 adev->gfx.rlc_feature_version = le32_to_cpu(
3529 hdr->ucode_feature_version);
3530
3531 adev->gfx.rlc.funcs->stop(adev);
3532
3533
3534 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL) & 0xfffffffc;
3535 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
3536
3537 adev->gfx.rlc.funcs->reset(adev);
3538
3539 gfx_v7_0_init_pg(adev);
3540
3541 WREG32(mmRLC_LB_CNTR_INIT, 0);
3542 WREG32(mmRLC_LB_CNTR_MAX, 0x00008000);
3543
3544 mutex_lock(&adev->grbm_idx_mutex);
3545 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3546 WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff);
3547 WREG32(mmRLC_LB_PARAMS, 0x00600408);
3548 WREG32(mmRLC_LB_CNTL, 0x80000004);
3549 mutex_unlock(&adev->grbm_idx_mutex);
3550
3551 WREG32(mmRLC_MC_CNTL, 0);
3552 WREG32(mmRLC_UCODE_CNTL, 0);
3553
3554 fw_data = (const __le32 *)
3555 (adev->gfx.rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3556 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3557 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3558 for (i = 0; i < fw_size; i++)
3559 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3560 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3561
3562
3563 gfx_v7_0_enable_lbpw(adev, false);
3564
3565 if (adev->asic_type == CHIP_BONAIRE)
3566 WREG32(mmRLC_DRIVER_CPDMA_STATUS, 0);
3567
3568 adev->gfx.rlc.funcs->start(adev);
3569
3570 return 0;
3571 }
3572
3573 static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
3574 {
3575 u32 data, orig, tmp, tmp2;
3576
3577 orig = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
3578
3579 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
3580 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3581
3582 tmp = gfx_v7_0_halt_rlc(adev);
3583
3584 mutex_lock(&adev->grbm_idx_mutex);
3585 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3586 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
3587 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
3588 tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
3589 RLC_SERDES_WR_CTRL__CGCG_OVERRIDE_0_MASK |
3590 RLC_SERDES_WR_CTRL__CGLS_ENABLE_MASK;
3591 WREG32(mmRLC_SERDES_WR_CTRL, tmp2);
3592 mutex_unlock(&adev->grbm_idx_mutex);
3593
3594 gfx_v7_0_update_rlc(adev, tmp);
3595
3596 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
3597 if (orig != data)
3598 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
3599
3600 } else {
3601 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3602
3603 RREG32(mmCB_CGTT_SCLK_CTRL);
3604 RREG32(mmCB_CGTT_SCLK_CTRL);
3605 RREG32(mmCB_CGTT_SCLK_CTRL);
3606 RREG32(mmCB_CGTT_SCLK_CTRL);
3607
3608 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
3609 if (orig != data)
3610 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
3611
3612 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3613 }
3614 }
3615
3616 static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
3617 {
3618 u32 data, orig, tmp = 0;
3619
3620 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
3621 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
3622 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
3623 orig = data = RREG32(mmCP_MEM_SLP_CNTL);
3624 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
3625 if (orig != data)
3626 WREG32(mmCP_MEM_SLP_CNTL, data);
3627 }
3628 }
3629
3630 orig = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
3631 data |= 0x00000001;
3632 data &= 0xfffffffd;
3633 if (orig != data)
3634 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
3635
3636 tmp = gfx_v7_0_halt_rlc(adev);
3637
3638 mutex_lock(&adev->grbm_idx_mutex);
3639 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3640 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
3641 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
3642 data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
3643 RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_0_MASK;
3644 WREG32(mmRLC_SERDES_WR_CTRL, data);
3645 mutex_unlock(&adev->grbm_idx_mutex);
3646
3647 gfx_v7_0_update_rlc(adev, tmp);
3648
3649 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
3650 orig = data = RREG32(mmCGTS_SM_CTRL_REG);
3651 data &= ~CGTS_SM_CTRL_REG__SM_MODE_MASK;
3652 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
3653 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
3654 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
3655 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
3656 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
3657 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
3658 data &= ~CGTS_SM_CTRL_REG__ON_MONITOR_ADD_MASK;
3659 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
3660 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
3661 if (orig != data)
3662 WREG32(mmCGTS_SM_CTRL_REG, data);
3663 }
3664 } else {
3665 orig = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
3666 data |= 0x00000003;
3667 if (orig != data)
3668 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
3669
3670 data = RREG32(mmRLC_MEM_SLP_CNTL);
3671 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
3672 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
3673 WREG32(mmRLC_MEM_SLP_CNTL, data);
3674 }
3675
3676 data = RREG32(mmCP_MEM_SLP_CNTL);
3677 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
3678 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
3679 WREG32(mmCP_MEM_SLP_CNTL, data);
3680 }
3681
3682 orig = data = RREG32(mmCGTS_SM_CTRL_REG);
3683 data |= CGTS_SM_CTRL_REG__OVERRIDE_MASK | CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
3684 if (orig != data)
3685 WREG32(mmCGTS_SM_CTRL_REG, data);
3686
3687 tmp = gfx_v7_0_halt_rlc(adev);
3688
3689 mutex_lock(&adev->grbm_idx_mutex);
3690 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3691 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
3692 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
3693 data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK;
3694 WREG32(mmRLC_SERDES_WR_CTRL, data);
3695 mutex_unlock(&adev->grbm_idx_mutex);
3696
3697 gfx_v7_0_update_rlc(adev, tmp);
3698 }
3699 }
3700
3701 static void gfx_v7_0_update_cg(struct amdgpu_device *adev,
3702 bool enable)
3703 {
3704 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3705
3706 if (enable) {
3707 gfx_v7_0_enable_mgcg(adev, true);
3708 gfx_v7_0_enable_cgcg(adev, true);
3709 } else {
3710 gfx_v7_0_enable_cgcg(adev, false);
3711 gfx_v7_0_enable_mgcg(adev, false);
3712 }
3713 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3714 }
3715
3716 static void gfx_v7_0_enable_sclk_slowdown_on_pu(struct amdgpu_device *adev,
3717 bool enable)
3718 {
3719 u32 data, orig;
3720
3721 orig = data = RREG32(mmRLC_PG_CNTL);
3722 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS))
3723 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3724 else
3725 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3726 if (orig != data)
3727 WREG32(mmRLC_PG_CNTL, data);
3728 }
3729
3730 static void gfx_v7_0_enable_sclk_slowdown_on_pd(struct amdgpu_device *adev,
3731 bool enable)
3732 {
3733 u32 data, orig;
3734
3735 orig = data = RREG32(mmRLC_PG_CNTL);
3736 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS))
3737 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3738 else
3739 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3740 if (orig != data)
3741 WREG32(mmRLC_PG_CNTL, data);
3742 }
3743
3744 static void gfx_v7_0_enable_cp_pg(struct amdgpu_device *adev, bool enable)
3745 {
3746 u32 data, orig;
3747
3748 orig = data = RREG32(mmRLC_PG_CNTL);
3749 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_CP))
3750 data &= ~0x8000;
3751 else
3752 data |= 0x8000;
3753 if (orig != data)
3754 WREG32(mmRLC_PG_CNTL, data);
3755 }
3756
3757 static void gfx_v7_0_enable_gds_pg(struct amdgpu_device *adev, bool enable)
3758 {
3759 u32 data, orig;
3760
3761 orig = data = RREG32(mmRLC_PG_CNTL);
3762 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GDS))
3763 data &= ~0x2000;
3764 else
3765 data |= 0x2000;
3766 if (orig != data)
3767 WREG32(mmRLC_PG_CNTL, data);
3768 }
3769
3770 static int gfx_v7_0_cp_pg_table_num(struct amdgpu_device *adev)
3771 {
3772 if (adev->asic_type == CHIP_KAVERI)
3773 return 5;
3774 else
3775 return 4;
3776 }
3777
3778 static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev,
3779 bool enable)
3780 {
3781 u32 data, orig;
3782
3783 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
3784 orig = data = RREG32(mmRLC_PG_CNTL);
3785 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
3786 if (orig != data)
3787 WREG32(mmRLC_PG_CNTL, data);
3788
3789 orig = data = RREG32(mmRLC_AUTO_PG_CTRL);
3790 data |= RLC_AUTO_PG_CTRL__AUTO_PG_EN_MASK;
3791 if (orig != data)
3792 WREG32(mmRLC_AUTO_PG_CTRL, data);
3793 } else {
3794 orig = data = RREG32(mmRLC_PG_CNTL);
3795 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
3796 if (orig != data)
3797 WREG32(mmRLC_PG_CNTL, data);
3798
3799 orig = data = RREG32(mmRLC_AUTO_PG_CTRL);
3800 data &= ~RLC_AUTO_PG_CTRL__AUTO_PG_EN_MASK;
3801 if (orig != data)
3802 WREG32(mmRLC_AUTO_PG_CTRL, data);
3803
3804 data = RREG32(mmDB_RENDER_CONTROL);
3805 }
3806 }
3807
3808 static void gfx_v7_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
3809 u32 bitmap)
3810 {
3811 u32 data;
3812
3813 if (!bitmap)
3814 return;
3815
3816 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
3817 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
3818
3819 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
3820 }
3821
3822 static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev)
3823 {
3824 u32 data, mask;
3825
3826 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
3827 data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
3828
3829 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
3830 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
3831
3832 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
3833
3834 return (~data) & mask;
3835 }
3836
3837 static void gfx_v7_0_init_ao_cu_mask(struct amdgpu_device *adev)
3838 {
3839 u32 tmp;
3840
3841 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3842
3843 tmp = RREG32(mmRLC_MAX_PG_CU);
3844 tmp &= ~RLC_MAX_PG_CU__MAX_POWERED_UP_CU_MASK;
3845 tmp |= (adev->gfx.cu_info.number << RLC_MAX_PG_CU__MAX_POWERED_UP_CU__SHIFT);
3846 WREG32(mmRLC_MAX_PG_CU, tmp);
3847 }
3848
3849 static void gfx_v7_0_enable_gfx_static_mgpg(struct amdgpu_device *adev,
3850 bool enable)
3851 {
3852 u32 data, orig;
3853
3854 orig = data = RREG32(mmRLC_PG_CNTL);
3855 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG))
3856 data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
3857 else
3858 data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
3859 if (orig != data)
3860 WREG32(mmRLC_PG_CNTL, data);
3861 }
3862
3863 static void gfx_v7_0_enable_gfx_dynamic_mgpg(struct amdgpu_device *adev,
3864 bool enable)
3865 {
3866 u32 data, orig;
3867
3868 orig = data = RREG32(mmRLC_PG_CNTL);
3869 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG))
3870 data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
3871 else
3872 data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
3873 if (orig != data)
3874 WREG32(mmRLC_PG_CNTL, data);
3875 }
3876
3877 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
3878 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
3879
3880 static void gfx_v7_0_init_gfx_cgpg(struct amdgpu_device *adev)
3881 {
3882 u32 data, orig;
3883 u32 i;
3884
3885 if (adev->gfx.rlc.cs_data) {
3886 WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
3887 WREG32(mmRLC_GPM_SCRATCH_DATA, upper_32_bits(adev->gfx.rlc.clear_state_gpu_addr));
3888 WREG32(mmRLC_GPM_SCRATCH_DATA, lower_32_bits(adev->gfx.rlc.clear_state_gpu_addr));
3889 WREG32(mmRLC_GPM_SCRATCH_DATA, adev->gfx.rlc.clear_state_size);
3890 } else {
3891 WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
3892 for (i = 0; i < 3; i++)
3893 WREG32(mmRLC_GPM_SCRATCH_DATA, 0);
3894 }
3895 if (adev->gfx.rlc.reg_list) {
3896 WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
3897 for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
3898 WREG32(mmRLC_GPM_SCRATCH_DATA, adev->gfx.rlc.reg_list[i]);
3899 }
3900
3901 orig = data = RREG32(mmRLC_PG_CNTL);
3902 data |= RLC_PG_CNTL__GFX_POWER_GATING_SRC_MASK;
3903 if (orig != data)
3904 WREG32(mmRLC_PG_CNTL, data);
3905
3906 WREG32(mmRLC_SAVE_AND_RESTORE_BASE, adev->gfx.rlc.save_restore_gpu_addr >> 8);
3907 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3908
3909 data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
3910 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3911 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3912 WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
3913
3914 data = 0x10101010;
3915 WREG32(mmRLC_PG_DELAY, data);
3916
3917 data = RREG32(mmRLC_PG_DELAY_2);
3918 data &= ~0xff;
3919 data |= 0x3;
3920 WREG32(mmRLC_PG_DELAY_2, data);
3921
3922 data = RREG32(mmRLC_AUTO_PG_CTRL);
3923 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
3924 data |= (0x700 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
3925 WREG32(mmRLC_AUTO_PG_CTRL, data);
3926
3927 }
3928
3929 static void gfx_v7_0_update_gfx_pg(struct amdgpu_device *adev, bool enable)
3930 {
3931 gfx_v7_0_enable_gfx_cgpg(adev, enable);
3932 gfx_v7_0_enable_gfx_static_mgpg(adev, enable);
3933 gfx_v7_0_enable_gfx_dynamic_mgpg(adev, enable);
3934 }
3935
3936 static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev)
3937 {
3938 u32 count = 0;
3939 const struct cs_section_def *sect = NULL;
3940 const struct cs_extent_def *ext = NULL;
3941
3942 if (adev->gfx.rlc.cs_data == NULL)
3943 return 0;
3944
3945
3946 count += 2;
3947
3948 count += 3;
3949
3950 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
3951 for (ext = sect->section; ext->extent != NULL; ++ext) {
3952 if (sect->id == SECT_CONTEXT)
3953 count += 2 + ext->reg_count;
3954 else
3955 return 0;
3956 }
3957 }
3958
3959 count += 4;
3960
3961 count += 2;
3962
3963 count += 2;
3964
3965 return count;
3966 }
3967
3968 static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev,
3969 volatile u32 *buffer)
3970 {
3971 u32 count = 0, i;
3972 const struct cs_section_def *sect = NULL;
3973 const struct cs_extent_def *ext = NULL;
3974
3975 if (adev->gfx.rlc.cs_data == NULL)
3976 return;
3977 if (buffer == NULL)
3978 return;
3979
3980 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3981 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3982
3983 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3984 buffer[count++] = cpu_to_le32(0x80000000);
3985 buffer[count++] = cpu_to_le32(0x80000000);
3986
3987 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
3988 for (ext = sect->section; ext->extent != NULL; ++ext) {
3989 if (sect->id == SECT_CONTEXT) {
3990 buffer[count++] =
3991 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
3992 buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3993 for (i = 0; i < ext->reg_count; i++)
3994 buffer[count++] = cpu_to_le32(ext->extent[i]);
3995 } else {
3996 return;
3997 }
3998 }
3999 }
4000
4001 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4002 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4003 switch (adev->asic_type) {
4004 case CHIP_BONAIRE:
4005 buffer[count++] = cpu_to_le32(0x16000012);
4006 buffer[count++] = cpu_to_le32(0x00000000);
4007 break;
4008 case CHIP_KAVERI:
4009 buffer[count++] = cpu_to_le32(0x00000000);
4010 buffer[count++] = cpu_to_le32(0x00000000);
4011 break;
4012 case CHIP_KABINI:
4013 case CHIP_MULLINS:
4014 buffer[count++] = cpu_to_le32(0x00000000);
4015 buffer[count++] = cpu_to_le32(0x00000000);
4016 break;
4017 case CHIP_HAWAII:
4018 buffer[count++] = cpu_to_le32(0x3a00161a);
4019 buffer[count++] = cpu_to_le32(0x0000002e);
4020 break;
4021 default:
4022 buffer[count++] = cpu_to_le32(0x00000000);
4023 buffer[count++] = cpu_to_le32(0x00000000);
4024 break;
4025 }
4026
4027 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4028 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
4029
4030 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
4031 buffer[count++] = cpu_to_le32(0);
4032 }
4033
4034 static void gfx_v7_0_init_pg(struct amdgpu_device *adev)
4035 {
4036 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4037 AMD_PG_SUPPORT_GFX_SMG |
4038 AMD_PG_SUPPORT_GFX_DMG |
4039 AMD_PG_SUPPORT_CP |
4040 AMD_PG_SUPPORT_GDS |
4041 AMD_PG_SUPPORT_RLC_SMU_HS)) {
4042 gfx_v7_0_enable_sclk_slowdown_on_pu(adev, true);
4043 gfx_v7_0_enable_sclk_slowdown_on_pd(adev, true);
4044 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
4045 gfx_v7_0_init_gfx_cgpg(adev);
4046 gfx_v7_0_enable_cp_pg(adev, true);
4047 gfx_v7_0_enable_gds_pg(adev, true);
4048 }
4049 gfx_v7_0_init_ao_cu_mask(adev);
4050 gfx_v7_0_update_gfx_pg(adev, true);
4051 }
4052 }
4053
4054 static void gfx_v7_0_fini_pg(struct amdgpu_device *adev)
4055 {
4056 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4057 AMD_PG_SUPPORT_GFX_SMG |
4058 AMD_PG_SUPPORT_GFX_DMG |
4059 AMD_PG_SUPPORT_CP |
4060 AMD_PG_SUPPORT_GDS |
4061 AMD_PG_SUPPORT_RLC_SMU_HS)) {
4062 gfx_v7_0_update_gfx_pg(adev, false);
4063 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
4064 gfx_v7_0_enable_cp_pg(adev, false);
4065 gfx_v7_0_enable_gds_pg(adev, false);
4066 }
4067 }
4068 }
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078 static uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4079 {
4080 uint64_t clock;
4081
4082 mutex_lock(&adev->gfx.gpu_clock_mutex);
4083 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4084 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4085 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4086 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4087 return clock;
4088 }
4089
4090 static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4091 uint32_t vmid,
4092 uint32_t gds_base, uint32_t gds_size,
4093 uint32_t gws_base, uint32_t gws_size,
4094 uint32_t oa_base, uint32_t oa_size)
4095 {
4096
4097 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4098 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4099 WRITE_DATA_DST_SEL(0)));
4100 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4101 amdgpu_ring_write(ring, 0);
4102 amdgpu_ring_write(ring, gds_base);
4103
4104
4105 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4106 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4107 WRITE_DATA_DST_SEL(0)));
4108 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4109 amdgpu_ring_write(ring, 0);
4110 amdgpu_ring_write(ring, gds_size);
4111
4112
4113 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4114 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4115 WRITE_DATA_DST_SEL(0)));
4116 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4117 amdgpu_ring_write(ring, 0);
4118 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4119
4120
4121 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4122 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4123 WRITE_DATA_DST_SEL(0)));
4124 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4125 amdgpu_ring_write(ring, 0);
4126 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4127 }
4128
4129 static void gfx_v7_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
4130 {
4131 struct amdgpu_device *adev = ring->adev;
4132 uint32_t value = 0;
4133
4134 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4135 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4136 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4137 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
4138 WREG32(mmSQ_CMD, value);
4139 }
4140
4141 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
4142 {
4143 WREG32(mmSQ_IND_INDEX,
4144 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
4145 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
4146 (address << SQ_IND_INDEX__INDEX__SHIFT) |
4147 (SQ_IND_INDEX__FORCE_READ_MASK));
4148 return RREG32(mmSQ_IND_DATA);
4149 }
4150
4151 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
4152 uint32_t wave, uint32_t thread,
4153 uint32_t regno, uint32_t num, uint32_t *out)
4154 {
4155 WREG32(mmSQ_IND_INDEX,
4156 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
4157 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
4158 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
4159 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
4160 (SQ_IND_INDEX__FORCE_READ_MASK) |
4161 (SQ_IND_INDEX__AUTO_INCR_MASK));
4162 while (num--)
4163 *(out++) = RREG32(mmSQ_IND_DATA);
4164 }
4165
4166 static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
4167 {
4168
4169 dst[(*no_fields)++] = 0;
4170 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
4171 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
4172 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
4173 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
4174 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
4175 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
4176 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
4177 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
4178 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
4179 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
4180 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
4181 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
4182 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
4183 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
4184 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
4185 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
4186 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
4187 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
4188 }
4189
4190 static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
4191 uint32_t wave, uint32_t start,
4192 uint32_t size, uint32_t *dst)
4193 {
4194 wave_read_regs(
4195 adev, simd, wave, 0,
4196 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
4197 }
4198
4199 static void gfx_v7_0_select_me_pipe_q(struct amdgpu_device *adev,
4200 u32 me, u32 pipe, u32 q, u32 vm)
4201 {
4202 cik_srbm_select(adev, me, pipe, q, vm);
4203 }
4204
4205 static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = {
4206 .get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
4207 .select_se_sh = &gfx_v7_0_select_se_sh,
4208 .read_wave_data = &gfx_v7_0_read_wave_data,
4209 .read_wave_sgprs = &gfx_v7_0_read_wave_sgprs,
4210 .select_me_pipe_q = &gfx_v7_0_select_me_pipe_q
4211 };
4212
4213 static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
4214 .is_rlc_enabled = gfx_v7_0_is_rlc_enabled,
4215 .set_safe_mode = gfx_v7_0_set_safe_mode,
4216 .unset_safe_mode = gfx_v7_0_unset_safe_mode,
4217 .init = gfx_v7_0_rlc_init,
4218 .get_csb_size = gfx_v7_0_get_csb_size,
4219 .get_csb_buffer = gfx_v7_0_get_csb_buffer,
4220 .get_cp_table_num = gfx_v7_0_cp_pg_table_num,
4221 .resume = gfx_v7_0_rlc_resume,
4222 .stop = gfx_v7_0_rlc_stop,
4223 .reset = gfx_v7_0_rlc_reset,
4224 .start = gfx_v7_0_rlc_start
4225 };
4226
4227 static int gfx_v7_0_early_init(void *handle)
4228 {
4229 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4230
4231 adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
4232 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4233 adev->gfx.funcs = &gfx_v7_0_gfx_funcs;
4234 adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs;
4235 gfx_v7_0_set_ring_funcs(adev);
4236 gfx_v7_0_set_irq_funcs(adev);
4237 gfx_v7_0_set_gds_init(adev);
4238
4239 return 0;
4240 }
4241
4242 static int gfx_v7_0_late_init(void *handle)
4243 {
4244 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4245 int r;
4246
4247 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4248 if (r)
4249 return r;
4250
4251 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4252 if (r)
4253 return r;
4254
4255 return 0;
4256 }
4257
4258 static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
4259 {
4260 u32 gb_addr_config;
4261 u32 mc_shared_chmap, mc_arb_ramcfg;
4262 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
4263 u32 tmp;
4264
4265 switch (adev->asic_type) {
4266 case CHIP_BONAIRE:
4267 adev->gfx.config.max_shader_engines = 2;
4268 adev->gfx.config.max_tile_pipes = 4;
4269 adev->gfx.config.max_cu_per_sh = 7;
4270 adev->gfx.config.max_sh_per_se = 1;
4271 adev->gfx.config.max_backends_per_se = 2;
4272 adev->gfx.config.max_texture_channel_caches = 4;
4273 adev->gfx.config.max_gprs = 256;
4274 adev->gfx.config.max_gs_threads = 32;
4275 adev->gfx.config.max_hw_contexts = 8;
4276
4277 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4278 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4279 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4280 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4281 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
4282 break;
4283 case CHIP_HAWAII:
4284 adev->gfx.config.max_shader_engines = 4;
4285 adev->gfx.config.max_tile_pipes = 16;
4286 adev->gfx.config.max_cu_per_sh = 11;
4287 adev->gfx.config.max_sh_per_se = 1;
4288 adev->gfx.config.max_backends_per_se = 4;
4289 adev->gfx.config.max_texture_channel_caches = 16;
4290 adev->gfx.config.max_gprs = 256;
4291 adev->gfx.config.max_gs_threads = 32;
4292 adev->gfx.config.max_hw_contexts = 8;
4293
4294 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4295 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4296 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4297 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4298 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
4299 break;
4300 case CHIP_KAVERI:
4301 adev->gfx.config.max_shader_engines = 1;
4302 adev->gfx.config.max_tile_pipes = 4;
4303 adev->gfx.config.max_cu_per_sh = 8;
4304 adev->gfx.config.max_backends_per_se = 2;
4305 adev->gfx.config.max_sh_per_se = 1;
4306 adev->gfx.config.max_texture_channel_caches = 4;
4307 adev->gfx.config.max_gprs = 256;
4308 adev->gfx.config.max_gs_threads = 16;
4309 adev->gfx.config.max_hw_contexts = 8;
4310
4311 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4312 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4313 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4314 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4315 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
4316 break;
4317 case CHIP_KABINI:
4318 case CHIP_MULLINS:
4319 default:
4320 adev->gfx.config.max_shader_engines = 1;
4321 adev->gfx.config.max_tile_pipes = 2;
4322 adev->gfx.config.max_cu_per_sh = 2;
4323 adev->gfx.config.max_sh_per_se = 1;
4324 adev->gfx.config.max_backends_per_se = 1;
4325 adev->gfx.config.max_texture_channel_caches = 2;
4326 adev->gfx.config.max_gprs = 256;
4327 adev->gfx.config.max_gs_threads = 16;
4328 adev->gfx.config.max_hw_contexts = 8;
4329
4330 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4331 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4332 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4333 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4334 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
4335 break;
4336 }
4337
4338 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
4339 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
4340 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
4341
4342 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
4343 adev->gfx.config.mem_max_burst_length_bytes = 256;
4344 if (adev->flags & AMD_IS_APU) {
4345
4346 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
4347 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
4348 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
4349
4350 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
4351 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
4352 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
4353
4354
4355 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
4356 dimm00_addr_map = 0;
4357 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
4358 dimm01_addr_map = 0;
4359 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
4360 dimm10_addr_map = 0;
4361 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
4362 dimm11_addr_map = 0;
4363
4364
4365
4366 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
4367 adev->gfx.config.mem_row_size_in_kb = 2;
4368 else
4369 adev->gfx.config.mem_row_size_in_kb = 1;
4370 } else {
4371 tmp = (mc_arb_ramcfg & MC_ARB_RAMCFG__NOOFCOLS_MASK) >> MC_ARB_RAMCFG__NOOFCOLS__SHIFT;
4372 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
4373 if (adev->gfx.config.mem_row_size_in_kb > 4)
4374 adev->gfx.config.mem_row_size_in_kb = 4;
4375 }
4376
4377 adev->gfx.config.shader_engine_tile_size = 32;
4378 adev->gfx.config.num_gpus = 1;
4379 adev->gfx.config.multi_gpu_tile_size = 64;
4380
4381
4382 gb_addr_config &= ~GB_ADDR_CONFIG__ROW_SIZE_MASK;
4383 switch (adev->gfx.config.mem_row_size_in_kb) {
4384 case 1:
4385 default:
4386 gb_addr_config |= (0 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
4387 break;
4388 case 2:
4389 gb_addr_config |= (1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
4390 break;
4391 case 4:
4392 gb_addr_config |= (2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
4393 break;
4394 }
4395 adev->gfx.config.gb_addr_config = gb_addr_config;
4396 }
4397
4398 static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
4399 int mec, int pipe, int queue)
4400 {
4401 int r;
4402 unsigned irq_type;
4403 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
4404
4405
4406 ring->me = mec + 1;
4407 ring->pipe = pipe;
4408 ring->queue = queue;
4409
4410 ring->ring_obj = NULL;
4411 ring->use_doorbell = true;
4412 ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
4413 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
4414
4415 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
4416 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
4417 + ring->pipe;
4418
4419
4420 r = amdgpu_ring_init(adev, ring, 1024,
4421 &adev->gfx.eop_irq, irq_type);
4422 if (r)
4423 return r;
4424
4425
4426 return 0;
4427 }
4428
4429 static int gfx_v7_0_sw_init(void *handle)
4430 {
4431 struct amdgpu_ring *ring;
4432 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4433 int i, j, k, r, ring_id;
4434
4435 switch (adev->asic_type) {
4436 case CHIP_KAVERI:
4437 adev->gfx.mec.num_mec = 2;
4438 break;
4439 case CHIP_BONAIRE:
4440 case CHIP_HAWAII:
4441 case CHIP_KABINI:
4442 case CHIP_MULLINS:
4443 default:
4444 adev->gfx.mec.num_mec = 1;
4445 break;
4446 }
4447 adev->gfx.mec.num_pipe_per_mec = 4;
4448 adev->gfx.mec.num_queue_per_pipe = 8;
4449
4450
4451 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
4452 if (r)
4453 return r;
4454
4455
4456 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 184,
4457 &adev->gfx.priv_reg_irq);
4458 if (r)
4459 return r;
4460
4461
4462 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 185,
4463 &adev->gfx.priv_inst_irq);
4464 if (r)
4465 return r;
4466
4467 gfx_v7_0_scratch_init(adev);
4468
4469 r = gfx_v7_0_init_microcode(adev);
4470 if (r) {
4471 DRM_ERROR("Failed to load gfx firmware!\n");
4472 return r;
4473 }
4474
4475 r = adev->gfx.rlc.funcs->init(adev);
4476 if (r) {
4477 DRM_ERROR("Failed to init rlc BOs!\n");
4478 return r;
4479 }
4480
4481
4482 r = gfx_v7_0_mec_init(adev);
4483 if (r) {
4484 DRM_ERROR("Failed to init MEC BOs!\n");
4485 return r;
4486 }
4487
4488 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4489 ring = &adev->gfx.gfx_ring[i];
4490 ring->ring_obj = NULL;
4491 sprintf(ring->name, "gfx");
4492 r = amdgpu_ring_init(adev, ring, 1024,
4493 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
4494 if (r)
4495 return r;
4496 }
4497
4498
4499 ring_id = 0;
4500 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
4501 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
4502 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
4503 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
4504 continue;
4505
4506 r = gfx_v7_0_compute_ring_init(adev,
4507 ring_id,
4508 i, k, j);
4509 if (r)
4510 return r;
4511
4512 ring_id++;
4513 }
4514 }
4515 }
4516
4517 adev->gfx.ce_ram_size = 0x8000;
4518
4519 gfx_v7_0_gpu_early_init(adev);
4520
4521 return r;
4522 }
4523
4524 static int gfx_v7_0_sw_fini(void *handle)
4525 {
4526 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4527 int i;
4528
4529 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4530 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
4531 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4532 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
4533
4534 gfx_v7_0_cp_compute_fini(adev);
4535 amdgpu_gfx_rlc_fini(adev);
4536 gfx_v7_0_mec_fini(adev);
4537 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
4538 &adev->gfx.rlc.clear_state_gpu_addr,
4539 (void **)&adev->gfx.rlc.cs_ptr);
4540 if (adev->gfx.rlc.cp_table_size) {
4541 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
4542 &adev->gfx.rlc.cp_table_gpu_addr,
4543 (void **)&adev->gfx.rlc.cp_table_ptr);
4544 }
4545 gfx_v7_0_free_microcode(adev);
4546
4547 return 0;
4548 }
4549
4550 static int gfx_v7_0_hw_init(void *handle)
4551 {
4552 int r;
4553 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4554
4555 gfx_v7_0_constants_init(adev);
4556
4557
4558 r = adev->gfx.rlc.funcs->resume(adev);
4559 if (r)
4560 return r;
4561
4562 r = gfx_v7_0_cp_resume(adev);
4563 if (r)
4564 return r;
4565
4566 return r;
4567 }
4568
4569 static int gfx_v7_0_hw_fini(void *handle)
4570 {
4571 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4572
4573 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4574 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4575 gfx_v7_0_cp_enable(adev, false);
4576 adev->gfx.rlc.funcs->stop(adev);
4577 gfx_v7_0_fini_pg(adev);
4578
4579 return 0;
4580 }
4581
4582 static int gfx_v7_0_suspend(void *handle)
4583 {
4584 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4585
4586 return gfx_v7_0_hw_fini(adev);
4587 }
4588
4589 static int gfx_v7_0_resume(void *handle)
4590 {
4591 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4592
4593 return gfx_v7_0_hw_init(adev);
4594 }
4595
4596 static bool gfx_v7_0_is_idle(void *handle)
4597 {
4598 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4599
4600 if (RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK)
4601 return false;
4602 else
4603 return true;
4604 }
4605
4606 static int gfx_v7_0_wait_for_idle(void *handle)
4607 {
4608 unsigned i;
4609 u32 tmp;
4610 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4611
4612 for (i = 0; i < adev->usec_timeout; i++) {
4613
4614 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
4615
4616 if (!tmp)
4617 return 0;
4618 udelay(1);
4619 }
4620 return -ETIMEDOUT;
4621 }
4622
4623 static int gfx_v7_0_soft_reset(void *handle)
4624 {
4625 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4626 u32 tmp;
4627 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4628
4629
4630 tmp = RREG32(mmGRBM_STATUS);
4631 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4632 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4633 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4634 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4635 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4636 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK))
4637 grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK |
4638 GRBM_SOFT_RESET__SOFT_RESET_GFX_MASK;
4639
4640 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4641 grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK;
4642 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
4643 }
4644
4645
4646 tmp = RREG32(mmGRBM_STATUS2);
4647 if (tmp & GRBM_STATUS2__RLC_BUSY_MASK)
4648 grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
4649
4650
4651 tmp = RREG32(mmSRBM_STATUS);
4652 if (tmp & SRBM_STATUS__GRBM_RQ_PENDING_MASK)
4653 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
4654
4655 if (grbm_soft_reset || srbm_soft_reset) {
4656
4657 gfx_v7_0_fini_pg(adev);
4658 gfx_v7_0_update_cg(adev, false);
4659
4660
4661 adev->gfx.rlc.funcs->stop(adev);
4662
4663
4664 WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK);
4665
4666
4667 WREG32(mmCP_MEC_CNTL, CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK);
4668
4669 if (grbm_soft_reset) {
4670 tmp = RREG32(mmGRBM_SOFT_RESET);
4671 tmp |= grbm_soft_reset;
4672 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4673 WREG32(mmGRBM_SOFT_RESET, tmp);
4674 tmp = RREG32(mmGRBM_SOFT_RESET);
4675
4676 udelay(50);
4677
4678 tmp &= ~grbm_soft_reset;
4679 WREG32(mmGRBM_SOFT_RESET, tmp);
4680 tmp = RREG32(mmGRBM_SOFT_RESET);
4681 }
4682
4683 if (srbm_soft_reset) {
4684 tmp = RREG32(mmSRBM_SOFT_RESET);
4685 tmp |= srbm_soft_reset;
4686 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4687 WREG32(mmSRBM_SOFT_RESET, tmp);
4688 tmp = RREG32(mmSRBM_SOFT_RESET);
4689
4690 udelay(50);
4691
4692 tmp &= ~srbm_soft_reset;
4693 WREG32(mmSRBM_SOFT_RESET, tmp);
4694 tmp = RREG32(mmSRBM_SOFT_RESET);
4695 }
4696
4697 udelay(50);
4698 }
4699 return 0;
4700 }
4701
4702 static void gfx_v7_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4703 enum amdgpu_interrupt_state state)
4704 {
4705 u32 cp_int_cntl;
4706
4707 switch (state) {
4708 case AMDGPU_IRQ_STATE_DISABLE:
4709 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4710 cp_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4711 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4712 break;
4713 case AMDGPU_IRQ_STATE_ENABLE:
4714 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4715 cp_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4716 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4717 break;
4718 default:
4719 break;
4720 }
4721 }
4722
4723 static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4724 int me, int pipe,
4725 enum amdgpu_interrupt_state state)
4726 {
4727 u32 mec_int_cntl, mec_int_cntl_reg;
4728
4729
4730
4731
4732
4733
4734
4735 if (me == 1) {
4736 switch (pipe) {
4737 case 0:
4738 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
4739 break;
4740 case 1:
4741 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
4742 break;
4743 case 2:
4744 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
4745 break;
4746 case 3:
4747 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
4748 break;
4749 default:
4750 DRM_DEBUG("invalid pipe %d\n", pipe);
4751 return;
4752 }
4753 } else {
4754 DRM_DEBUG("invalid me %d\n", me);
4755 return;
4756 }
4757
4758 switch (state) {
4759 case AMDGPU_IRQ_STATE_DISABLE:
4760 mec_int_cntl = RREG32(mec_int_cntl_reg);
4761 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4762 WREG32(mec_int_cntl_reg, mec_int_cntl);
4763 break;
4764 case AMDGPU_IRQ_STATE_ENABLE:
4765 mec_int_cntl = RREG32(mec_int_cntl_reg);
4766 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4767 WREG32(mec_int_cntl_reg, mec_int_cntl);
4768 break;
4769 default:
4770 break;
4771 }
4772 }
4773
4774 static int gfx_v7_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4775 struct amdgpu_irq_src *src,
4776 unsigned type,
4777 enum amdgpu_interrupt_state state)
4778 {
4779 u32 cp_int_cntl;
4780
4781 switch (state) {
4782 case AMDGPU_IRQ_STATE_DISABLE:
4783 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4784 cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
4785 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4786 break;
4787 case AMDGPU_IRQ_STATE_ENABLE:
4788 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4789 cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
4790 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4791 break;
4792 default:
4793 break;
4794 }
4795
4796 return 0;
4797 }
4798
4799 static int gfx_v7_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4800 struct amdgpu_irq_src *src,
4801 unsigned type,
4802 enum amdgpu_interrupt_state state)
4803 {
4804 u32 cp_int_cntl;
4805
4806 switch (state) {
4807 case AMDGPU_IRQ_STATE_DISABLE:
4808 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4809 cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK;
4810 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4811 break;
4812 case AMDGPU_IRQ_STATE_ENABLE:
4813 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4814 cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK;
4815 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4816 break;
4817 default:
4818 break;
4819 }
4820
4821 return 0;
4822 }
4823
4824 static int gfx_v7_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4825 struct amdgpu_irq_src *src,
4826 unsigned type,
4827 enum amdgpu_interrupt_state state)
4828 {
4829 switch (type) {
4830 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
4831 gfx_v7_0_set_gfx_eop_interrupt_state(adev, state);
4832 break;
4833 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4834 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4835 break;
4836 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4837 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4838 break;
4839 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4840 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4841 break;
4842 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4843 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4844 break;
4845 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
4846 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
4847 break;
4848 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
4849 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
4850 break;
4851 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
4852 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
4853 break;
4854 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
4855 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
4856 break;
4857 default:
4858 break;
4859 }
4860 return 0;
4861 }
4862
4863 static int gfx_v7_0_eop_irq(struct amdgpu_device *adev,
4864 struct amdgpu_irq_src *source,
4865 struct amdgpu_iv_entry *entry)
4866 {
4867 u8 me_id, pipe_id;
4868 struct amdgpu_ring *ring;
4869 int i;
4870
4871 DRM_DEBUG("IH: CP EOP\n");
4872 me_id = (entry->ring_id & 0x0c) >> 2;
4873 pipe_id = (entry->ring_id & 0x03) >> 0;
4874 switch (me_id) {
4875 case 0:
4876 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
4877 break;
4878 case 1:
4879 case 2:
4880 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4881 ring = &adev->gfx.compute_ring[i];
4882 if ((ring->me == me_id) && (ring->pipe == pipe_id))
4883 amdgpu_fence_process(ring);
4884 }
4885 break;
4886 }
4887 return 0;
4888 }
4889
4890 static void gfx_v7_0_fault(struct amdgpu_device *adev,
4891 struct amdgpu_iv_entry *entry)
4892 {
4893 struct amdgpu_ring *ring;
4894 u8 me_id, pipe_id;
4895 int i;
4896
4897 me_id = (entry->ring_id & 0x0c) >> 2;
4898 pipe_id = (entry->ring_id & 0x03) >> 0;
4899 switch (me_id) {
4900 case 0:
4901 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
4902 break;
4903 case 1:
4904 case 2:
4905 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4906 ring = &adev->gfx.compute_ring[i];
4907 if ((ring->me == me_id) && (ring->pipe == pipe_id))
4908 drm_sched_fault(&ring->sched);
4909 }
4910 break;
4911 }
4912 }
4913
4914 static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev,
4915 struct amdgpu_irq_src *source,
4916 struct amdgpu_iv_entry *entry)
4917 {
4918 DRM_ERROR("Illegal register access in command stream\n");
4919 gfx_v7_0_fault(adev, entry);
4920 return 0;
4921 }
4922
4923 static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev,
4924 struct amdgpu_irq_src *source,
4925 struct amdgpu_iv_entry *entry)
4926 {
4927 DRM_ERROR("Illegal instruction in command stream\n");
4928
4929 gfx_v7_0_fault(adev, entry);
4930 return 0;
4931 }
4932
4933 static int gfx_v7_0_set_clockgating_state(void *handle,
4934 enum amd_clockgating_state state)
4935 {
4936 bool gate = false;
4937 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4938
4939 if (state == AMD_CG_STATE_GATE)
4940 gate = true;
4941
4942 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
4943
4944 if (gate) {
4945 gfx_v7_0_enable_mgcg(adev, true);
4946 gfx_v7_0_enable_cgcg(adev, true);
4947 } else {
4948 gfx_v7_0_enable_cgcg(adev, false);
4949 gfx_v7_0_enable_mgcg(adev, false);
4950 }
4951 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
4952
4953 return 0;
4954 }
4955
4956 static int gfx_v7_0_set_powergating_state(void *handle,
4957 enum amd_powergating_state state)
4958 {
4959 bool gate = false;
4960 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4961
4962 if (state == AMD_PG_STATE_GATE)
4963 gate = true;
4964
4965 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4966 AMD_PG_SUPPORT_GFX_SMG |
4967 AMD_PG_SUPPORT_GFX_DMG |
4968 AMD_PG_SUPPORT_CP |
4969 AMD_PG_SUPPORT_GDS |
4970 AMD_PG_SUPPORT_RLC_SMU_HS)) {
4971 gfx_v7_0_update_gfx_pg(adev, gate);
4972 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
4973 gfx_v7_0_enable_cp_pg(adev, gate);
4974 gfx_v7_0_enable_gds_pg(adev, gate);
4975 }
4976 }
4977
4978 return 0;
4979 }
4980
4981 static const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
4982 .name = "gfx_v7_0",
4983 .early_init = gfx_v7_0_early_init,
4984 .late_init = gfx_v7_0_late_init,
4985 .sw_init = gfx_v7_0_sw_init,
4986 .sw_fini = gfx_v7_0_sw_fini,
4987 .hw_init = gfx_v7_0_hw_init,
4988 .hw_fini = gfx_v7_0_hw_fini,
4989 .suspend = gfx_v7_0_suspend,
4990 .resume = gfx_v7_0_resume,
4991 .is_idle = gfx_v7_0_is_idle,
4992 .wait_for_idle = gfx_v7_0_wait_for_idle,
4993 .soft_reset = gfx_v7_0_soft_reset,
4994 .set_clockgating_state = gfx_v7_0_set_clockgating_state,
4995 .set_powergating_state = gfx_v7_0_set_powergating_state,
4996 };
4997
4998 static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
4999 .type = AMDGPU_RING_TYPE_GFX,
5000 .align_mask = 0xff,
5001 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5002 .support_64bit_ptrs = false,
5003 .get_rptr = gfx_v7_0_ring_get_rptr,
5004 .get_wptr = gfx_v7_0_ring_get_wptr_gfx,
5005 .set_wptr = gfx_v7_0_ring_set_wptr_gfx,
5006 .emit_frame_size =
5007 20 +
5008 7 +
5009 5 +
5010 12 + 12 + 12 +
5011 7 + 4 +
5012 CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 +
5013 3 + 4,
5014 .emit_ib_size = 4,
5015 .emit_ib = gfx_v7_0_ring_emit_ib_gfx,
5016 .emit_fence = gfx_v7_0_ring_emit_fence_gfx,
5017 .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
5018 .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
5019 .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
5020 .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
5021 .test_ring = gfx_v7_0_ring_test_ring,
5022 .test_ib = gfx_v7_0_ring_test_ib,
5023 .insert_nop = amdgpu_ring_insert_nop,
5024 .pad_ib = amdgpu_ring_generic_pad_ib,
5025 .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
5026 .emit_wreg = gfx_v7_0_ring_emit_wreg,
5027 .soft_recovery = gfx_v7_0_ring_soft_recovery,
5028 };
5029
5030 static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
5031 .type = AMDGPU_RING_TYPE_COMPUTE,
5032 .align_mask = 0xff,
5033 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5034 .support_64bit_ptrs = false,
5035 .get_rptr = gfx_v7_0_ring_get_rptr,
5036 .get_wptr = gfx_v7_0_ring_get_wptr_compute,
5037 .set_wptr = gfx_v7_0_ring_set_wptr_compute,
5038 .emit_frame_size =
5039 20 +
5040 7 +
5041 5 +
5042 7 +
5043 CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 +
5044 7 + 7 + 7,
5045 .emit_ib_size = 7,
5046 .emit_ib = gfx_v7_0_ring_emit_ib_compute,
5047 .emit_fence = gfx_v7_0_ring_emit_fence_compute,
5048 .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
5049 .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
5050 .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
5051 .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
5052 .test_ring = gfx_v7_0_ring_test_ring,
5053 .test_ib = gfx_v7_0_ring_test_ib,
5054 .insert_nop = amdgpu_ring_insert_nop,
5055 .pad_ib = amdgpu_ring_generic_pad_ib,
5056 .emit_wreg = gfx_v7_0_ring_emit_wreg,
5057 };
5058
5059 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
5060 {
5061 int i;
5062
5063 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5064 adev->gfx.gfx_ring[i].funcs = &gfx_v7_0_ring_funcs_gfx;
5065 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5066 adev->gfx.compute_ring[i].funcs = &gfx_v7_0_ring_funcs_compute;
5067 }
5068
5069 static const struct amdgpu_irq_src_funcs gfx_v7_0_eop_irq_funcs = {
5070 .set = gfx_v7_0_set_eop_interrupt_state,
5071 .process = gfx_v7_0_eop_irq,
5072 };
5073
5074 static const struct amdgpu_irq_src_funcs gfx_v7_0_priv_reg_irq_funcs = {
5075 .set = gfx_v7_0_set_priv_reg_fault_state,
5076 .process = gfx_v7_0_priv_reg_irq,
5077 };
5078
5079 static const struct amdgpu_irq_src_funcs gfx_v7_0_priv_inst_irq_funcs = {
5080 .set = gfx_v7_0_set_priv_inst_fault_state,
5081 .process = gfx_v7_0_priv_inst_irq,
5082 };
5083
5084 static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev)
5085 {
5086 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5087 adev->gfx.eop_irq.funcs = &gfx_v7_0_eop_irq_funcs;
5088
5089 adev->gfx.priv_reg_irq.num_types = 1;
5090 adev->gfx.priv_reg_irq.funcs = &gfx_v7_0_priv_reg_irq_funcs;
5091
5092 adev->gfx.priv_inst_irq.num_types = 1;
5093 adev->gfx.priv_inst_irq.funcs = &gfx_v7_0_priv_inst_irq_funcs;
5094 }
5095
5096 static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
5097 {
5098
5099 adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
5100 adev->gds.gws_size = 64;
5101 adev->gds.oa_size = 16;
5102 adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
5103 }
5104
5105
5106 static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
5107 {
5108 int i, j, k, counter, active_cu_number = 0;
5109 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5110 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
5111 unsigned disable_masks[4 * 2];
5112 u32 ao_cu_num;
5113
5114 if (adev->flags & AMD_IS_APU)
5115 ao_cu_num = 2;
5116 else
5117 ao_cu_num = adev->gfx.config.max_cu_per_sh;
5118
5119 memset(cu_info, 0, sizeof(*cu_info));
5120
5121 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
5122
5123 mutex_lock(&adev->grbm_idx_mutex);
5124 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5125 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5126 mask = 1;
5127 ao_bitmap = 0;
5128 counter = 0;
5129 gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
5130 if (i < 4 && j < 2)
5131 gfx_v7_0_set_user_cu_inactive_bitmap(
5132 adev, disable_masks[i * 2 + j]);
5133 bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
5134 cu_info->bitmap[i][j] = bitmap;
5135
5136 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5137 if (bitmap & mask) {
5138 if (counter < ao_cu_num)
5139 ao_bitmap |= mask;
5140 counter ++;
5141 }
5142 mask <<= 1;
5143 }
5144 active_cu_number += counter;
5145 if (i < 2 && j < 2)
5146 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5147 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
5148 }
5149 }
5150 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5151 mutex_unlock(&adev->grbm_idx_mutex);
5152
5153 cu_info->number = active_cu_number;
5154 cu_info->ao_cu_mask = ao_cu_mask;
5155 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
5156 cu_info->max_waves_per_simd = 10;
5157 cu_info->max_scratch_slots_per_cu = 32;
5158 cu_info->wave_front_size = 64;
5159 cu_info->lds_size = 64;
5160 }
5161
5162 const struct amdgpu_ip_block_version gfx_v7_0_ip_block =
5163 {
5164 .type = AMD_IP_BLOCK_TYPE_GFX,
5165 .major = 7,
5166 .minor = 0,
5167 .rev = 0,
5168 .funcs = &gfx_v7_0_ip_funcs,
5169 };
5170
5171 const struct amdgpu_ip_block_version gfx_v7_1_ip_block =
5172 {
5173 .type = AMD_IP_BLOCK_TYPE_GFX,
5174 .major = 7,
5175 .minor = 1,
5176 .rev = 0,
5177 .funcs = &gfx_v7_0_ip_funcs,
5178 };
5179
5180 const struct amdgpu_ip_block_version gfx_v7_2_ip_block =
5181 {
5182 .type = AMD_IP_BLOCK_TYPE_GFX,
5183 .major = 7,
5184 .minor = 2,
5185 .rev = 0,
5186 .funcs = &gfx_v7_0_ip_funcs,
5187 };
5188
5189 const struct amdgpu_ip_block_version gfx_v7_3_ip_block =
5190 {
5191 .type = AMD_IP_BLOCK_TYPE_GFX,
5192 .major = 7,
5193 .minor = 3,
5194 .rev = 0,
5195 .funcs = &gfx_v7_0_ip_funcs,
5196 };