root/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. gfx_v8_0_init_golden_registers
  2. gfx_v8_0_scratch_init
  3. gfx_v8_0_ring_test_ring
  4. gfx_v8_0_ring_test_ib
  5. gfx_v8_0_free_microcode
  6. gfx_v8_0_init_microcode
  7. gfx_v8_0_get_csb_buffer
  8. gfx_v8_0_cp_jump_table_num
  9. gfx_v8_0_rlc_init
  10. gfx_v8_0_csb_vram_pin
  11. gfx_v8_0_csb_vram_unpin
  12. gfx_v8_0_mec_fini
  13. gfx_v8_0_mec_init
  14. gfx_v8_0_do_edc_gpr_workarounds
  15. gfx_v8_0_gpu_early_init
  16. gfx_v8_0_compute_ring_init
  17. gfx_v8_0_sw_init
  18. gfx_v8_0_sw_fini
  19. gfx_v8_0_tiling_mode_table_init
  20. gfx_v8_0_select_se_sh
  21. gfx_v8_0_select_me_pipe_q
  22. gfx_v8_0_get_rb_active_bitmap
  23. gfx_v8_0_raster_config
  24. gfx_v8_0_write_harvested_raster_configs
  25. gfx_v8_0_setup_rb
  26. gfx_v8_0_init_compute_vmid
  27. gfx_v8_0_init_gds_vmid
  28. gfx_v8_0_config_init
  29. gfx_v8_0_constants_init
  30. gfx_v8_0_wait_for_rlc_serdes
  31. gfx_v8_0_enable_gui_idle_interrupt
  32. gfx_v8_0_init_csb
  33. gfx_v8_0_parse_ind_reg_list
  34. gfx_v8_0_init_save_restore_list
  35. gfx_v8_0_enable_save_restore_machine
  36. gfx_v8_0_init_power_gating
  37. cz_enable_sck_slow_down_on_power_up
  38. cz_enable_sck_slow_down_on_power_down
  39. cz_enable_cp_power_gating
  40. gfx_v8_0_init_pg
  41. gfx_v8_0_rlc_stop
  42. gfx_v8_0_rlc_reset
  43. gfx_v8_0_rlc_start
  44. gfx_v8_0_rlc_resume
  45. gfx_v8_0_cp_gfx_enable
  46. gfx_v8_0_get_csb_size
  47. gfx_v8_0_cp_gfx_start
  48. gfx_v8_0_set_cpg_door_bell
  49. gfx_v8_0_cp_gfx_resume
  50. gfx_v8_0_cp_compute_enable
  51. gfx_v8_0_kiq_setting
  52. gfx_v8_0_kiq_kcq_enable
  53. gfx_v8_0_deactivate_hqd
  54. gfx_v8_0_mqd_init
  55. gfx_v8_0_mqd_commit
  56. gfx_v8_0_kiq_init_queue
  57. gfx_v8_0_kcq_init_queue
  58. gfx_v8_0_set_mec_doorbell_range
  59. gfx_v8_0_kiq_resume
  60. gfx_v8_0_kcq_resume
  61. gfx_v8_0_cp_test_all_rings
  62. gfx_v8_0_cp_resume
  63. gfx_v8_0_cp_enable
  64. gfx_v8_0_hw_init
  65. gfx_v8_0_kcq_disable
  66. gfx_v8_0_is_idle
  67. gfx_v8_0_rlc_is_idle
  68. gfx_v8_0_wait_for_rlc_idle
  69. gfx_v8_0_wait_for_idle
  70. gfx_v8_0_hw_fini
  71. gfx_v8_0_suspend
  72. gfx_v8_0_resume
  73. gfx_v8_0_check_soft_reset
  74. gfx_v8_0_pre_soft_reset
  75. gfx_v8_0_soft_reset
  76. gfx_v8_0_post_soft_reset
  77. gfx_v8_0_get_gpu_clock_counter
  78. gfx_v8_0_ring_emit_gds_switch
  79. wave_read_ind
  80. wave_read_regs
  81. gfx_v8_0_read_wave_data
  82. gfx_v8_0_read_wave_sgprs
  83. gfx_v8_0_early_init
  84. gfx_v8_0_late_init
  85. gfx_v8_0_enable_gfx_static_mg_power_gating
  86. gfx_v8_0_enable_gfx_dynamic_mg_power_gating
  87. polaris11_enable_gfx_quick_mg_power_gating
  88. cz_enable_gfx_cg_power_gating
  89. cz_enable_gfx_pipeline_power_gating
  90. cz_update_gfx_cg_power_gating
  91. gfx_v8_0_set_powergating_state
  92. gfx_v8_0_get_clockgating_state
  93. gfx_v8_0_send_serdes_cmd
  94. gfx_v8_0_is_rlc_enabled
  95. gfx_v8_0_set_safe_mode
  96. gfx_v8_0_unset_safe_mode
  97. gfx_v8_0_update_medium_grain_clock_gating
  98. gfx_v8_0_update_coarse_grain_clock_gating
  99. gfx_v8_0_update_gfx_clock_gating
  100. gfx_v8_0_tonga_update_gfx_clock_gating
  101. gfx_v8_0_polaris_update_gfx_clock_gating
  102. gfx_v8_0_set_clockgating_state
  103. gfx_v8_0_ring_get_rptr
  104. gfx_v8_0_ring_get_wptr_gfx
  105. gfx_v8_0_ring_set_wptr_gfx
  106. gfx_v8_0_ring_emit_hdp_flush
  107. gfx_v8_0_ring_emit_vgt_flush
  108. gfx_v8_0_ring_emit_ib_gfx
  109. gfx_v8_0_ring_emit_ib_compute
  110. gfx_v8_0_ring_emit_fence_gfx
  111. gfx_v8_0_ring_emit_pipeline_sync
  112. gfx_v8_0_ring_emit_vm_flush
  113. gfx_v8_0_ring_get_wptr_compute
  114. gfx_v8_0_ring_set_wptr_compute
  115. gfx_v8_0_ring_set_pipe_percent
  116. gfx_v8_0_pipe_reserve_resources
  117. gfx_v8_0_hqd_set_priority
  118. gfx_v8_0_ring_set_priority_compute
  119. gfx_v8_0_ring_emit_fence_compute
  120. gfx_v8_0_ring_emit_fence_kiq
  121. gfx_v8_ring_emit_sb
  122. gfx_v8_ring_emit_cntxcntl
  123. gfx_v8_0_ring_emit_init_cond_exec
  124. gfx_v8_0_ring_emit_patch_cond_exec
  125. gfx_v8_0_ring_emit_rreg
  126. gfx_v8_0_ring_emit_wreg
  127. gfx_v8_0_ring_soft_recovery
  128. gfx_v8_0_set_gfx_eop_interrupt_state
  129. gfx_v8_0_set_compute_eop_interrupt_state
  130. gfx_v8_0_set_priv_reg_fault_state
  131. gfx_v8_0_set_priv_inst_fault_state
  132. gfx_v8_0_set_eop_interrupt_state
  133. gfx_v8_0_set_cp_ecc_int_state
  134. gfx_v8_0_set_sq_int_state
  135. gfx_v8_0_eop_irq
  136. gfx_v8_0_fault
  137. gfx_v8_0_priv_reg_irq
  138. gfx_v8_0_priv_inst_irq
  139. gfx_v8_0_cp_ecc_error_irq
  140. gfx_v8_0_parse_sq_irq
  141. gfx_v8_0_sq_irq_work_func
  142. gfx_v8_0_sq_irq
  143. gfx_v8_0_set_ring_funcs
  144. gfx_v8_0_set_irq_funcs
  145. gfx_v8_0_set_rlc_funcs
  146. gfx_v8_0_set_gds_init
  147. gfx_v8_0_set_user_cu_inactive_bitmap
  148. gfx_v8_0_get_cu_active_bitmap
  149. gfx_v8_0_get_cu_info
  150. gfx_v8_0_ring_emit_ce_meta
  151. gfx_v8_0_ring_emit_de_meta

   1 /*
   2  * Copyright 2014 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  */
  23 
  24 #include <linux/delay.h>
  25 #include <linux/kernel.h>
  26 #include <linux/firmware.h>
  27 #include <linux/module.h>
  28 #include <linux/pci.h>
  29 
  30 #include "amdgpu.h"
  31 #include "amdgpu_gfx.h"
  32 #include "vi.h"
  33 #include "vi_structs.h"
  34 #include "vid.h"
  35 #include "amdgpu_ucode.h"
  36 #include "amdgpu_atombios.h"
  37 #include "atombios_i2c.h"
  38 #include "clearstate_vi.h"
  39 
  40 #include "gmc/gmc_8_2_d.h"
  41 #include "gmc/gmc_8_2_sh_mask.h"
  42 
  43 #include "oss/oss_3_0_d.h"
  44 #include "oss/oss_3_0_sh_mask.h"
  45 
  46 #include "bif/bif_5_0_d.h"
  47 #include "bif/bif_5_0_sh_mask.h"
  48 #include "gca/gfx_8_0_d.h"
  49 #include "gca/gfx_8_0_enum.h"
  50 #include "gca/gfx_8_0_sh_mask.h"
  51 
  52 #include "dce/dce_10_0_d.h"
  53 #include "dce/dce_10_0_sh_mask.h"
  54 
  55 #include "smu/smu_7_1_3_d.h"
  56 
  57 #include "ivsrcid/ivsrcid_vislands30.h"
  58 
  59 #define GFX8_NUM_GFX_RINGS     1
  60 #define GFX8_MEC_HPD_SIZE 4096
  61 
  62 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
  63 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
  64 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
  65 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
  66 
  67 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
  68 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
  69 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
  70 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
  71 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
  72 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
  73 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
  74 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
  75 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
  76 
  77 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
  78 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
  79 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
  80 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
  81 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
  82 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
  83 
  84 /* BPM SERDES CMD */
  85 #define SET_BPM_SERDES_CMD    1
  86 #define CLE_BPM_SERDES_CMD    0
  87 
  88 /* BPM Register Address*/
  89 enum {
  90         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
  91         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
  92         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
  93         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
  94         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
  95         BPM_REG_FGCG_MAX
  96 };
  97 
  98 #define RLC_FormatDirectRegListLength        14
  99 
 100 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
 101 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
 102 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
 103 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
 104 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
 105 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
 106 
 107 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
 108 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
 109 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
 110 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
 111 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
 112 
 113 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
 114 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
 115 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
 116 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
 117 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
 118 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
 119 
 120 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
 121 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
 122 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
 123 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
 124 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
 125 
 126 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
 127 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
 128 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
 129 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
 130 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
 131 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
 132 
 133 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
 134 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
 135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
 136 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
 137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
 138 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
 139 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
 140 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
 141 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
 142 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
 143 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
 144 
 145 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
 146 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
 147 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
 148 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
 149 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
 150 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
 151 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
 152 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
 153 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
 154 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
 155 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
 156 
 157 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
 158 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
 159 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
 160 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
 161 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
 162 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
 163 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
 164 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
 165 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
 166 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
 167 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
 168 
 169 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
 170 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
 171 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
 172 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
 173 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
 174 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
 175 
 176 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 177 {
 178         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
 179         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
 180         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
 181         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
 182         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
 183         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
 184         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
 185         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
 186         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
 187         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
 188         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
 189         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
 190         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
 191         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
 192         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
 193         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
 194 };
 195 
 196 static const u32 golden_settings_tonga_a11[] =
 197 {
 198         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
 199         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 200         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 201         mmGB_GPU_ID, 0x0000000f, 0x00000000,
 202         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 203         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
 204         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 205         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 206         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 207         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 208         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 209         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 210         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
 211         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
 212         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
 213         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 214 };
 215 
 216 static const u32 tonga_golden_common_all[] =
 217 {
 218         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 219         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 220         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 221         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 222         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 223         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 224         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 225         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 226 };
 227 
 228 static const u32 tonga_mgcg_cgcg_init[] =
 229 {
 230         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 231         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 232         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 233         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 234         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 235         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 236         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 237         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 238         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 239         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 240         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 241         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 242         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 243         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 244         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 245         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 246         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 247         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 248         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 249         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 250         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 251         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 252         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 253         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 254         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 255         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 256         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 257         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 258         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 259         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 260         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 261         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 262         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 263         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 264         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 265         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 266         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 267         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 268         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 269         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 270         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 271         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 272         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 273         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 274         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 275         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 276         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 277         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 278         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 279         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 280         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 281         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 282         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 283         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 284         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 285         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 286         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 287         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 288         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 289         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 290         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 291         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 292         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 293         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 294         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 295         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 296         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 297         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 298         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 299         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 300         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 301         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 302         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 303         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 304         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 305 };
 306 
 307 static const u32 golden_settings_vegam_a11[] =
 308 {
 309         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
 310         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
 311         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 312         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 313         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 314         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 315         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
 316         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
 317         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 318         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 319         mmSQ_CONFIG, 0x07f80000, 0x01180000,
 320         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 321         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 322         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 323         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 324         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
 325         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 326 };
 327 
 328 static const u32 vegam_golden_common_all[] =
 329 {
 330         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 331         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 332         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 333         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 334         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 335         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 336 };
 337 
 338 static const u32 golden_settings_polaris11_a11[] =
 339 {
 340         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
 341         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 342         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 343         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 344         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 345         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 346         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 347         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 348         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 349         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 350         mmSQ_CONFIG, 0x07f80000, 0x01180000,
 351         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 352         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 353         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
 354         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 355         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
 356         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 357 };
 358 
 359 static const u32 polaris11_golden_common_all[] =
 360 {
 361         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 362         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
 363         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 364         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 365         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 366         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 367 };
 368 
 369 static const u32 golden_settings_polaris10_a11[] =
 370 {
 371         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
 372         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
 373         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 374         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 375         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 376         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 377         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 378         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 379         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
 380         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 381         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 382         mmSQ_CONFIG, 0x07f80000, 0x07180000,
 383         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 384         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 385         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 386         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 387         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 388 };
 389 
 390 static const u32 polaris10_golden_common_all[] =
 391 {
 392         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 393         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 394         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 395         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 396         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 397         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 398         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 399         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 400 };
 401 
 402 static const u32 fiji_golden_common_all[] =
 403 {
 404         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 405         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
 406         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
 407         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 408         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 409         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 410         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 411         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 412         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 413         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
 414 };
 415 
 416 static const u32 golden_settings_fiji_a10[] =
 417 {
 418         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 419         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 420         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 421         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 422         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 423         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 424         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 425         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 426         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 427         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
 428         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 429 };
 430 
 431 static const u32 fiji_mgcg_cgcg_init[] =
 432 {
 433         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 434         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 435         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 436         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 437         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 438         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 439         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 440         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 441         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 442         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 443         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 444         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 445         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 446         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 447         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 448         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 449         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 450         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 451         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 452         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 453         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 454         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 455         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 456         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 457         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 458         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 459         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 460         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 461         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 462         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 463         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 464         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 465         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 466         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 467         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 468 };
 469 
 470 static const u32 golden_settings_iceland_a11[] =
 471 {
 472         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 473         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 474         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
 475         mmGB_GPU_ID, 0x0000000f, 0x00000000,
 476         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 477         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 478         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
 479         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 480         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 481         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 482         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 483         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 484         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 485         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
 486         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 487         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
 488 };
 489 
 490 static const u32 iceland_golden_common_all[] =
 491 {
 492         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 493         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 494         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 495         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 496         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 497         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 498         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 499         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 500 };
 501 
 502 static const u32 iceland_mgcg_cgcg_init[] =
 503 {
 504         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 505         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 506         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 507         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 508         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
 509         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
 510         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
 511         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 512         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 513         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 514         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 515         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 516         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 517         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 518         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 519         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 520         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 521         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 522         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 523         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 524         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 525         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 526         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
 527         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 528         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 529         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 530         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 531         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 532         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 533         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 534         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 535         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 536         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 537         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 538         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 539         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 540         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 541         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 542         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 543         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 544         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 545         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 546         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 547         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 548         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 549         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 550         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 551         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 552         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 553         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 554         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 555         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 556         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 557         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 558         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 559         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 560         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 561         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 562         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 563         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 564         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 565         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 566         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 567         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 568 };
 569 
 570 static const u32 cz_golden_settings_a11[] =
 571 {
 572         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 573         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 574         mmGB_GPU_ID, 0x0000000f, 0x00000000,
 575         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
 576         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 577         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 578         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 579         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
 580         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 581         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 582         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
 583         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
 584 };
 585 
 586 static const u32 cz_golden_common_all[] =
 587 {
 588         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 589         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 590         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 591         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 592         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 593         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 594         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 595         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 596 };
 597 
 598 static const u32 cz_mgcg_cgcg_init[] =
 599 {
 600         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 601         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 602         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 603         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 604         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 605         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 606         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
 607         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 608         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 609         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 610         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 611         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 612         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 613         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 614         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 615         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 616         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 617         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 618         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 619         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 620         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 621         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 622         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 623         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 624         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 625         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 626         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 627         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 628         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 629         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 630         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 631         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 632         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 633         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 634         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 635         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 636         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 637         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 638         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 639         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 640         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 641         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 642         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 643         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 644         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 645         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 646         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 647         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 648         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 649         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 650         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 651         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 652         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 653         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 654         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 655         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 656         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 657         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 658         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 659         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 660         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 661         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 662         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 663         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 664         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 665         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 666         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 667         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 668         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 669         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 670         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 671         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 672         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 673         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 674         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 675 };
 676 
 677 static const u32 stoney_golden_settings_a11[] =
 678 {
 679         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 680         mmGB_GPU_ID, 0x0000000f, 0x00000000,
 681         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 682         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 683         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 684         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 685         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 686         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 687         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
 688         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
 689 };
 690 
 691 static const u32 stoney_golden_common_all[] =
 692 {
 693         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 694         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
 695         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 696         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
 697         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 698         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 699         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 700         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 701 };
 702 
 703 static const u32 stoney_mgcg_cgcg_init[] =
 704 {
 705         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 706         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 707         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 708         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 709         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
 710 };
 711 
 712 
 713 static const char * const sq_edc_source_names[] = {
 714         "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
 715         "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
 716         "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
 717         "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
 718         "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
 719         "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
 720         "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
 721 };
 722 
 723 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
 724 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 725 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
 726 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
 727 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
 728 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
 729 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
 730 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
 731 
 732 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 733 {
 734         switch (adev->asic_type) {
 735         case CHIP_TOPAZ:
 736                 amdgpu_device_program_register_sequence(adev,
 737                                                         iceland_mgcg_cgcg_init,
 738                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
 739                 amdgpu_device_program_register_sequence(adev,
 740                                                         golden_settings_iceland_a11,
 741                                                         ARRAY_SIZE(golden_settings_iceland_a11));
 742                 amdgpu_device_program_register_sequence(adev,
 743                                                         iceland_golden_common_all,
 744                                                         ARRAY_SIZE(iceland_golden_common_all));
 745                 break;
 746         case CHIP_FIJI:
 747                 amdgpu_device_program_register_sequence(adev,
 748                                                         fiji_mgcg_cgcg_init,
 749                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
 750                 amdgpu_device_program_register_sequence(adev,
 751                                                         golden_settings_fiji_a10,
 752                                                         ARRAY_SIZE(golden_settings_fiji_a10));
 753                 amdgpu_device_program_register_sequence(adev,
 754                                                         fiji_golden_common_all,
 755                                                         ARRAY_SIZE(fiji_golden_common_all));
 756                 break;
 757 
 758         case CHIP_TONGA:
 759                 amdgpu_device_program_register_sequence(adev,
 760                                                         tonga_mgcg_cgcg_init,
 761                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
 762                 amdgpu_device_program_register_sequence(adev,
 763                                                         golden_settings_tonga_a11,
 764                                                         ARRAY_SIZE(golden_settings_tonga_a11));
 765                 amdgpu_device_program_register_sequence(adev,
 766                                                         tonga_golden_common_all,
 767                                                         ARRAY_SIZE(tonga_golden_common_all));
 768                 break;
 769         case CHIP_VEGAM:
 770                 amdgpu_device_program_register_sequence(adev,
 771                                                         golden_settings_vegam_a11,
 772                                                         ARRAY_SIZE(golden_settings_vegam_a11));
 773                 amdgpu_device_program_register_sequence(adev,
 774                                                         vegam_golden_common_all,
 775                                                         ARRAY_SIZE(vegam_golden_common_all));
 776                 break;
 777         case CHIP_POLARIS11:
 778         case CHIP_POLARIS12:
 779                 amdgpu_device_program_register_sequence(adev,
 780                                                         golden_settings_polaris11_a11,
 781                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
 782                 amdgpu_device_program_register_sequence(adev,
 783                                                         polaris11_golden_common_all,
 784                                                         ARRAY_SIZE(polaris11_golden_common_all));
 785                 break;
 786         case CHIP_POLARIS10:
 787                 amdgpu_device_program_register_sequence(adev,
 788                                                         golden_settings_polaris10_a11,
 789                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
 790                 amdgpu_device_program_register_sequence(adev,
 791                                                         polaris10_golden_common_all,
 792                                                         ARRAY_SIZE(polaris10_golden_common_all));
 793                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
 794                 if (adev->pdev->revision == 0xc7 &&
 795                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
 796                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
 797                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
 798                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
 799                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
 800                 }
 801                 break;
 802         case CHIP_CARRIZO:
 803                 amdgpu_device_program_register_sequence(adev,
 804                                                         cz_mgcg_cgcg_init,
 805                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
 806                 amdgpu_device_program_register_sequence(adev,
 807                                                         cz_golden_settings_a11,
 808                                                         ARRAY_SIZE(cz_golden_settings_a11));
 809                 amdgpu_device_program_register_sequence(adev,
 810                                                         cz_golden_common_all,
 811                                                         ARRAY_SIZE(cz_golden_common_all));
 812                 break;
 813         case CHIP_STONEY:
 814                 amdgpu_device_program_register_sequence(adev,
 815                                                         stoney_mgcg_cgcg_init,
 816                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
 817                 amdgpu_device_program_register_sequence(adev,
 818                                                         stoney_golden_settings_a11,
 819                                                         ARRAY_SIZE(stoney_golden_settings_a11));
 820                 amdgpu_device_program_register_sequence(adev,
 821                                                         stoney_golden_common_all,
 822                                                         ARRAY_SIZE(stoney_golden_common_all));
 823                 break;
 824         default:
 825                 break;
 826         }
 827 }
 828 
 829 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
 830 {
 831         adev->gfx.scratch.num_reg = 8;
 832         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
 833         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
 834 }
 835 
 836 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
 837 {
 838         struct amdgpu_device *adev = ring->adev;
 839         uint32_t scratch;
 840         uint32_t tmp = 0;
 841         unsigned i;
 842         int r;
 843 
 844         r = amdgpu_gfx_scratch_get(adev, &scratch);
 845         if (r)
 846                 return r;
 847 
 848         WREG32(scratch, 0xCAFEDEAD);
 849         r = amdgpu_ring_alloc(ring, 3);
 850         if (r)
 851                 goto error_free_scratch;
 852 
 853         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
 854         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
 855         amdgpu_ring_write(ring, 0xDEADBEEF);
 856         amdgpu_ring_commit(ring);
 857 
 858         for (i = 0; i < adev->usec_timeout; i++) {
 859                 tmp = RREG32(scratch);
 860                 if (tmp == 0xDEADBEEF)
 861                         break;
 862                 udelay(1);
 863         }
 864 
 865         if (i >= adev->usec_timeout)
 866                 r = -ETIMEDOUT;
 867 
 868 error_free_scratch:
 869         amdgpu_gfx_scratch_free(adev, scratch);
 870         return r;
 871 }
 872 
 873 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 874 {
 875         struct amdgpu_device *adev = ring->adev;
 876         struct amdgpu_ib ib;
 877         struct dma_fence *f = NULL;
 878 
 879         unsigned int index;
 880         uint64_t gpu_addr;
 881         uint32_t tmp;
 882         long r;
 883 
 884         r = amdgpu_device_wb_get(adev, &index);
 885         if (r)
 886                 return r;
 887 
 888         gpu_addr = adev->wb.gpu_addr + (index * 4);
 889         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 890         memset(&ib, 0, sizeof(ib));
 891         r = amdgpu_ib_get(adev, NULL, 16, &ib);
 892         if (r)
 893                 goto err1;
 894 
 895         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
 896         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
 897         ib.ptr[2] = lower_32_bits(gpu_addr);
 898         ib.ptr[3] = upper_32_bits(gpu_addr);
 899         ib.ptr[4] = 0xDEADBEEF;
 900         ib.length_dw = 5;
 901 
 902         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 903         if (r)
 904                 goto err2;
 905 
 906         r = dma_fence_wait_timeout(f, false, timeout);
 907         if (r == 0) {
 908                 r = -ETIMEDOUT;
 909                 goto err2;
 910         } else if (r < 0) {
 911                 goto err2;
 912         }
 913 
 914         tmp = adev->wb.wb[index];
 915         if (tmp == 0xDEADBEEF)
 916                 r = 0;
 917         else
 918                 r = -EINVAL;
 919 
 920 err2:
 921         amdgpu_ib_free(adev, &ib, NULL);
 922         dma_fence_put(f);
 923 err1:
 924         amdgpu_device_wb_free(adev, index);
 925         return r;
 926 }
 927 
 928 
 929 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
 930 {
 931         release_firmware(adev->gfx.pfp_fw);
 932         adev->gfx.pfp_fw = NULL;
 933         release_firmware(adev->gfx.me_fw);
 934         adev->gfx.me_fw = NULL;
 935         release_firmware(adev->gfx.ce_fw);
 936         adev->gfx.ce_fw = NULL;
 937         release_firmware(adev->gfx.rlc_fw);
 938         adev->gfx.rlc_fw = NULL;
 939         release_firmware(adev->gfx.mec_fw);
 940         adev->gfx.mec_fw = NULL;
 941         if ((adev->asic_type != CHIP_STONEY) &&
 942             (adev->asic_type != CHIP_TOPAZ))
 943                 release_firmware(adev->gfx.mec2_fw);
 944         adev->gfx.mec2_fw = NULL;
 945 
 946         kfree(adev->gfx.rlc.register_list_format);
 947 }
 948 
 949 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 950 {
 951         const char *chip_name;
 952         char fw_name[30];
 953         int err;
 954         struct amdgpu_firmware_info *info = NULL;
 955         const struct common_firmware_header *header = NULL;
 956         const struct gfx_firmware_header_v1_0 *cp_hdr;
 957         const struct rlc_firmware_header_v2_0 *rlc_hdr;
 958         unsigned int *tmp = NULL, i;
 959 
 960         DRM_DEBUG("\n");
 961 
 962         switch (adev->asic_type) {
 963         case CHIP_TOPAZ:
 964                 chip_name = "topaz";
 965                 break;
 966         case CHIP_TONGA:
 967                 chip_name = "tonga";
 968                 break;
 969         case CHIP_CARRIZO:
 970                 chip_name = "carrizo";
 971                 break;
 972         case CHIP_FIJI:
 973                 chip_name = "fiji";
 974                 break;
 975         case CHIP_STONEY:
 976                 chip_name = "stoney";
 977                 break;
 978         case CHIP_POLARIS10:
 979                 chip_name = "polaris10";
 980                 break;
 981         case CHIP_POLARIS11:
 982                 chip_name = "polaris11";
 983                 break;
 984         case CHIP_POLARIS12:
 985                 chip_name = "polaris12";
 986                 break;
 987         case CHIP_VEGAM:
 988                 chip_name = "vegam";
 989                 break;
 990         default:
 991                 BUG();
 992         }
 993 
 994         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
 995                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
 996                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
 997                 if (err == -ENOENT) {
 998                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
 999                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1000                 }
1001         } else {
1002                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1003                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1004         }
1005         if (err)
1006                 goto out;
1007         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1008         if (err)
1009                 goto out;
1010         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1011         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1012         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1013 
1014         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1015                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1016                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1017                 if (err == -ENOENT) {
1018                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1019                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1020                 }
1021         } else {
1022                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1023                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1024         }
1025         if (err)
1026                 goto out;
1027         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1028         if (err)
1029                 goto out;
1030         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1031         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1032 
1033         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1034 
1035         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1036                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1037                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1038                 if (err == -ENOENT) {
1039                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1040                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1041                 }
1042         } else {
1043                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1044                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1045         }
1046         if (err)
1047                 goto out;
1048         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1049         if (err)
1050                 goto out;
1051         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1052         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1053         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1054 
1055         /*
1056          * Support for MCBP/Virtualization in combination with chained IBs is
1057          * formal released on feature version #46
1058          */
1059         if (adev->gfx.ce_feature_version >= 46 &&
1060             adev->gfx.pfp_feature_version >= 46) {
1061                 adev->virt.chained_ib_support = true;
1062                 DRM_INFO("Chained IB support enabled!\n");
1063         } else
1064                 adev->virt.chained_ib_support = false;
1065 
1066         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1067         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1068         if (err)
1069                 goto out;
1070         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1071         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1072         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1073         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1074 
1075         adev->gfx.rlc.save_and_restore_offset =
1076                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1077         adev->gfx.rlc.clear_state_descriptor_offset =
1078                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1079         adev->gfx.rlc.avail_scratch_ram_locations =
1080                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1081         adev->gfx.rlc.reg_restore_list_size =
1082                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1083         adev->gfx.rlc.reg_list_format_start =
1084                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1085         adev->gfx.rlc.reg_list_format_separate_start =
1086                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1087         adev->gfx.rlc.starting_offsets_start =
1088                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1089         adev->gfx.rlc.reg_list_format_size_bytes =
1090                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1091         adev->gfx.rlc.reg_list_size_bytes =
1092                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1093 
1094         adev->gfx.rlc.register_list_format =
1095                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1096                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1097 
1098         if (!adev->gfx.rlc.register_list_format) {
1099                 err = -ENOMEM;
1100                 goto out;
1101         }
1102 
1103         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1104                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1105         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1106                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1107 
1108         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1109 
1110         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1111                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1112         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1113                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1114 
1115         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1116                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1117                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1118                 if (err == -ENOENT) {
1119                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1120                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1121                 }
1122         } else {
1123                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1124                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1125         }
1126         if (err)
1127                 goto out;
1128         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1129         if (err)
1130                 goto out;
1131         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1132         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1133         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1134 
1135         if ((adev->asic_type != CHIP_STONEY) &&
1136             (adev->asic_type != CHIP_TOPAZ)) {
1137                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1138                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1139                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1140                         if (err == -ENOENT) {
1141                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1142                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1143                         }
1144                 } else {
1145                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1146                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1147                 }
1148                 if (!err) {
1149                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1150                         if (err)
1151                                 goto out;
1152                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1153                                 adev->gfx.mec2_fw->data;
1154                         adev->gfx.mec2_fw_version =
1155                                 le32_to_cpu(cp_hdr->header.ucode_version);
1156                         adev->gfx.mec2_feature_version =
1157                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1158                 } else {
1159                         err = 0;
1160                         adev->gfx.mec2_fw = NULL;
1161                 }
1162         }
1163 
1164         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1165         info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1166         info->fw = adev->gfx.pfp_fw;
1167         header = (const struct common_firmware_header *)info->fw->data;
1168         adev->firmware.fw_size +=
1169                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1170 
1171         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1172         info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1173         info->fw = adev->gfx.me_fw;
1174         header = (const struct common_firmware_header *)info->fw->data;
1175         adev->firmware.fw_size +=
1176                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1177 
1178         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1179         info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1180         info->fw = adev->gfx.ce_fw;
1181         header = (const struct common_firmware_header *)info->fw->data;
1182         adev->firmware.fw_size +=
1183                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1184 
1185         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1186         info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1187         info->fw = adev->gfx.rlc_fw;
1188         header = (const struct common_firmware_header *)info->fw->data;
1189         adev->firmware.fw_size +=
1190                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1191 
1192         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1193         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1194         info->fw = adev->gfx.mec_fw;
1195         header = (const struct common_firmware_header *)info->fw->data;
1196         adev->firmware.fw_size +=
1197                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1198 
1199         /* we need account JT in */
1200         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1201         adev->firmware.fw_size +=
1202                 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1203 
1204         if (amdgpu_sriov_vf(adev)) {
1205                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1206                 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1207                 info->fw = adev->gfx.mec_fw;
1208                 adev->firmware.fw_size +=
1209                         ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1210         }
1211 
1212         if (adev->gfx.mec2_fw) {
1213                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1214                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1215                 info->fw = adev->gfx.mec2_fw;
1216                 header = (const struct common_firmware_header *)info->fw->data;
1217                 adev->firmware.fw_size +=
1218                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1219         }
1220 
1221 out:
1222         if (err) {
1223                 dev_err(adev->dev,
1224                         "gfx8: Failed to load firmware \"%s\"\n",
1225                         fw_name);
1226                 release_firmware(adev->gfx.pfp_fw);
1227                 adev->gfx.pfp_fw = NULL;
1228                 release_firmware(adev->gfx.me_fw);
1229                 adev->gfx.me_fw = NULL;
1230                 release_firmware(adev->gfx.ce_fw);
1231                 adev->gfx.ce_fw = NULL;
1232                 release_firmware(adev->gfx.rlc_fw);
1233                 adev->gfx.rlc_fw = NULL;
1234                 release_firmware(adev->gfx.mec_fw);
1235                 adev->gfx.mec_fw = NULL;
1236                 release_firmware(adev->gfx.mec2_fw);
1237                 adev->gfx.mec2_fw = NULL;
1238         }
1239         return err;
1240 }
1241 
1242 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1243                                     volatile u32 *buffer)
1244 {
1245         u32 count = 0, i;
1246         const struct cs_section_def *sect = NULL;
1247         const struct cs_extent_def *ext = NULL;
1248 
1249         if (adev->gfx.rlc.cs_data == NULL)
1250                 return;
1251         if (buffer == NULL)
1252                 return;
1253 
1254         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1255         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1256 
1257         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1258         buffer[count++] = cpu_to_le32(0x80000000);
1259         buffer[count++] = cpu_to_le32(0x80000000);
1260 
1261         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1262                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1263                         if (sect->id == SECT_CONTEXT) {
1264                                 buffer[count++] =
1265                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1266                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1267                                                 PACKET3_SET_CONTEXT_REG_START);
1268                                 for (i = 0; i < ext->reg_count; i++)
1269                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1270                         } else {
1271                                 return;
1272                         }
1273                 }
1274         }
1275 
1276         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1277         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1278                         PACKET3_SET_CONTEXT_REG_START);
1279         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1280         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1281 
1282         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1283         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1284 
1285         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1286         buffer[count++] = cpu_to_le32(0);
1287 }
1288 
1289 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1290 {
1291         if (adev->asic_type == CHIP_CARRIZO)
1292                 return 5;
1293         else
1294                 return 4;
1295 }
1296 
1297 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1298 {
1299         const struct cs_section_def *cs_data;
1300         int r;
1301 
1302         adev->gfx.rlc.cs_data = vi_cs_data;
1303 
1304         cs_data = adev->gfx.rlc.cs_data;
1305 
1306         if (cs_data) {
1307                 /* init clear state block */
1308                 r = amdgpu_gfx_rlc_init_csb(adev);
1309                 if (r)
1310                         return r;
1311         }
1312 
1313         if ((adev->asic_type == CHIP_CARRIZO) ||
1314             (adev->asic_type == CHIP_STONEY)) {
1315                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1316                 r = amdgpu_gfx_rlc_init_cpt(adev);
1317                 if (r)
1318                         return r;
1319         }
1320 
1321         return 0;
1322 }
1323 
1324 static int gfx_v8_0_csb_vram_pin(struct amdgpu_device *adev)
1325 {
1326         int r;
1327 
1328         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1329         if (unlikely(r != 0))
1330                 return r;
1331 
1332         r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1333                         AMDGPU_GEM_DOMAIN_VRAM);
1334         if (!r)
1335                 adev->gfx.rlc.clear_state_gpu_addr =
1336                         amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1337 
1338         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1339 
1340         return r;
1341 }
1342 
1343 static void gfx_v8_0_csb_vram_unpin(struct amdgpu_device *adev)
1344 {
1345         int r;
1346 
1347         if (!adev->gfx.rlc.clear_state_obj)
1348                 return;
1349 
1350         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1351         if (likely(r == 0)) {
1352                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1353                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1354         }
1355 }
1356 
1357 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1358 {
1359         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1360 }
1361 
1362 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1363 {
1364         int r;
1365         u32 *hpd;
1366         size_t mec_hpd_size;
1367 
1368         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1369 
1370         /* take ownership of the relevant compute queues */
1371         amdgpu_gfx_compute_queue_acquire(adev);
1372 
1373         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1374 
1375         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1376                                       AMDGPU_GEM_DOMAIN_VRAM,
1377                                       &adev->gfx.mec.hpd_eop_obj,
1378                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1379                                       (void **)&hpd);
1380         if (r) {
1381                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1382                 return r;
1383         }
1384 
1385         memset(hpd, 0, mec_hpd_size);
1386 
1387         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1388         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1389 
1390         return 0;
1391 }
1392 
1393 static const u32 vgpr_init_compute_shader[] =
1394 {
1395         0x7e000209, 0x7e020208,
1396         0x7e040207, 0x7e060206,
1397         0x7e080205, 0x7e0a0204,
1398         0x7e0c0203, 0x7e0e0202,
1399         0x7e100201, 0x7e120200,
1400         0x7e140209, 0x7e160208,
1401         0x7e180207, 0x7e1a0206,
1402         0x7e1c0205, 0x7e1e0204,
1403         0x7e200203, 0x7e220202,
1404         0x7e240201, 0x7e260200,
1405         0x7e280209, 0x7e2a0208,
1406         0x7e2c0207, 0x7e2e0206,
1407         0x7e300205, 0x7e320204,
1408         0x7e340203, 0x7e360202,
1409         0x7e380201, 0x7e3a0200,
1410         0x7e3c0209, 0x7e3e0208,
1411         0x7e400207, 0x7e420206,
1412         0x7e440205, 0x7e460204,
1413         0x7e480203, 0x7e4a0202,
1414         0x7e4c0201, 0x7e4e0200,
1415         0x7e500209, 0x7e520208,
1416         0x7e540207, 0x7e560206,
1417         0x7e580205, 0x7e5a0204,
1418         0x7e5c0203, 0x7e5e0202,
1419         0x7e600201, 0x7e620200,
1420         0x7e640209, 0x7e660208,
1421         0x7e680207, 0x7e6a0206,
1422         0x7e6c0205, 0x7e6e0204,
1423         0x7e700203, 0x7e720202,
1424         0x7e740201, 0x7e760200,
1425         0x7e780209, 0x7e7a0208,
1426         0x7e7c0207, 0x7e7e0206,
1427         0xbf8a0000, 0xbf810000,
1428 };
1429 
1430 static const u32 sgpr_init_compute_shader[] =
1431 {
1432         0xbe8a0100, 0xbe8c0102,
1433         0xbe8e0104, 0xbe900106,
1434         0xbe920108, 0xbe940100,
1435         0xbe960102, 0xbe980104,
1436         0xbe9a0106, 0xbe9c0108,
1437         0xbe9e0100, 0xbea00102,
1438         0xbea20104, 0xbea40106,
1439         0xbea60108, 0xbea80100,
1440         0xbeaa0102, 0xbeac0104,
1441         0xbeae0106, 0xbeb00108,
1442         0xbeb20100, 0xbeb40102,
1443         0xbeb60104, 0xbeb80106,
1444         0xbeba0108, 0xbebc0100,
1445         0xbebe0102, 0xbec00104,
1446         0xbec20106, 0xbec40108,
1447         0xbec60100, 0xbec80102,
1448         0xbee60004, 0xbee70005,
1449         0xbeea0006, 0xbeeb0007,
1450         0xbee80008, 0xbee90009,
1451         0xbefc0000, 0xbf8a0000,
1452         0xbf810000, 0x00000000,
1453 };
1454 
1455 static const u32 vgpr_init_regs[] =
1456 {
1457         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1458         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1459         mmCOMPUTE_NUM_THREAD_X, 256*4,
1460         mmCOMPUTE_NUM_THREAD_Y, 1,
1461         mmCOMPUTE_NUM_THREAD_Z, 1,
1462         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1463         mmCOMPUTE_PGM_RSRC2, 20,
1464         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1465         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1466         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1467         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1468         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1469         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1470         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1471         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1472         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1473         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1474 };
1475 
1476 static const u32 sgpr1_init_regs[] =
1477 {
1478         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1479         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1480         mmCOMPUTE_NUM_THREAD_X, 256*5,
1481         mmCOMPUTE_NUM_THREAD_Y, 1,
1482         mmCOMPUTE_NUM_THREAD_Z, 1,
1483         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1484         mmCOMPUTE_PGM_RSRC2, 20,
1485         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1486         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1487         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1488         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1489         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1490         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1491         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1492         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1493         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1494         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1495 };
1496 
1497 static const u32 sgpr2_init_regs[] =
1498 {
1499         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1500         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1501         mmCOMPUTE_NUM_THREAD_X, 256*5,
1502         mmCOMPUTE_NUM_THREAD_Y, 1,
1503         mmCOMPUTE_NUM_THREAD_Z, 1,
1504         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1505         mmCOMPUTE_PGM_RSRC2, 20,
1506         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1507         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1508         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1509         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1510         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1511         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1512         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1513         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1514         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1515         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1516 };
1517 
1518 static const u32 sec_ded_counter_registers[] =
1519 {
1520         mmCPC_EDC_ATC_CNT,
1521         mmCPC_EDC_SCRATCH_CNT,
1522         mmCPC_EDC_UCODE_CNT,
1523         mmCPF_EDC_ATC_CNT,
1524         mmCPF_EDC_ROQ_CNT,
1525         mmCPF_EDC_TAG_CNT,
1526         mmCPG_EDC_ATC_CNT,
1527         mmCPG_EDC_DMA_CNT,
1528         mmCPG_EDC_TAG_CNT,
1529         mmDC_EDC_CSINVOC_CNT,
1530         mmDC_EDC_RESTORE_CNT,
1531         mmDC_EDC_STATE_CNT,
1532         mmGDS_EDC_CNT,
1533         mmGDS_EDC_GRBM_CNT,
1534         mmGDS_EDC_OA_DED,
1535         mmSPI_EDC_CNT,
1536         mmSQC_ATC_EDC_GATCL1_CNT,
1537         mmSQC_EDC_CNT,
1538         mmSQ_EDC_DED_CNT,
1539         mmSQ_EDC_INFO,
1540         mmSQ_EDC_SEC_CNT,
1541         mmTCC_EDC_CNT,
1542         mmTCP_ATC_EDC_GATCL1_CNT,
1543         mmTCP_EDC_CNT,
1544         mmTD_EDC_CNT
1545 };
1546 
1547 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1548 {
1549         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1550         struct amdgpu_ib ib;
1551         struct dma_fence *f = NULL;
1552         int r, i;
1553         u32 tmp;
1554         unsigned total_size, vgpr_offset, sgpr_offset;
1555         u64 gpu_addr;
1556 
1557         /* only supported on CZ */
1558         if (adev->asic_type != CHIP_CARRIZO)
1559                 return 0;
1560 
1561         /* bail if the compute ring is not ready */
1562         if (!ring->sched.ready)
1563                 return 0;
1564 
1565         tmp = RREG32(mmGB_EDC_MODE);
1566         WREG32(mmGB_EDC_MODE, 0);
1567 
1568         total_size =
1569                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1570         total_size +=
1571                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1572         total_size +=
1573                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1574         total_size = ALIGN(total_size, 256);
1575         vgpr_offset = total_size;
1576         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1577         sgpr_offset = total_size;
1578         total_size += sizeof(sgpr_init_compute_shader);
1579 
1580         /* allocate an indirect buffer to put the commands in */
1581         memset(&ib, 0, sizeof(ib));
1582         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1583         if (r) {
1584                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1585                 return r;
1586         }
1587 
1588         /* load the compute shaders */
1589         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1590                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1591 
1592         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1593                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1594 
1595         /* init the ib length to 0 */
1596         ib.length_dw = 0;
1597 
1598         /* VGPR */
1599         /* write the register state for the compute dispatch */
1600         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1601                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1602                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1603                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1604         }
1605         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1606         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1607         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1608         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1609         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1610         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1611 
1612         /* write dispatch packet */
1613         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1614         ib.ptr[ib.length_dw++] = 8; /* x */
1615         ib.ptr[ib.length_dw++] = 1; /* y */
1616         ib.ptr[ib.length_dw++] = 1; /* z */
1617         ib.ptr[ib.length_dw++] =
1618                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1619 
1620         /* write CS partial flush packet */
1621         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1622         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1623 
1624         /* SGPR1 */
1625         /* write the register state for the compute dispatch */
1626         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1627                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1628                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1629                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1630         }
1631         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1632         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1633         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1634         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1635         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1636         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1637 
1638         /* write dispatch packet */
1639         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1640         ib.ptr[ib.length_dw++] = 8; /* x */
1641         ib.ptr[ib.length_dw++] = 1; /* y */
1642         ib.ptr[ib.length_dw++] = 1; /* z */
1643         ib.ptr[ib.length_dw++] =
1644                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1645 
1646         /* write CS partial flush packet */
1647         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1648         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1649 
1650         /* SGPR2 */
1651         /* write the register state for the compute dispatch */
1652         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1653                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1654                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1655                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1656         }
1657         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1658         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1659         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1660         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1661         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1662         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1663 
1664         /* write dispatch packet */
1665         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1666         ib.ptr[ib.length_dw++] = 8; /* x */
1667         ib.ptr[ib.length_dw++] = 1; /* y */
1668         ib.ptr[ib.length_dw++] = 1; /* z */
1669         ib.ptr[ib.length_dw++] =
1670                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1671 
1672         /* write CS partial flush packet */
1673         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1674         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1675 
1676         /* shedule the ib on the ring */
1677         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1678         if (r) {
1679                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1680                 goto fail;
1681         }
1682 
1683         /* wait for the GPU to finish processing the IB */
1684         r = dma_fence_wait(f, false);
1685         if (r) {
1686                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1687                 goto fail;
1688         }
1689 
1690         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1691         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1692         WREG32(mmGB_EDC_MODE, tmp);
1693 
1694         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1695         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1696         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1697 
1698 
1699         /* read back registers to clear the counters */
1700         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1701                 RREG32(sec_ded_counter_registers[i]);
1702 
1703 fail:
1704         amdgpu_ib_free(adev, &ib, NULL);
1705         dma_fence_put(f);
1706 
1707         return r;
1708 }
1709 
1710 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1711 {
1712         u32 gb_addr_config;
1713         u32 mc_shared_chmap, mc_arb_ramcfg;
1714         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1715         u32 tmp;
1716         int ret;
1717 
1718         switch (adev->asic_type) {
1719         case CHIP_TOPAZ:
1720                 adev->gfx.config.max_shader_engines = 1;
1721                 adev->gfx.config.max_tile_pipes = 2;
1722                 adev->gfx.config.max_cu_per_sh = 6;
1723                 adev->gfx.config.max_sh_per_se = 1;
1724                 adev->gfx.config.max_backends_per_se = 2;
1725                 adev->gfx.config.max_texture_channel_caches = 2;
1726                 adev->gfx.config.max_gprs = 256;
1727                 adev->gfx.config.max_gs_threads = 32;
1728                 adev->gfx.config.max_hw_contexts = 8;
1729 
1730                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1731                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1732                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1733                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1734                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1735                 break;
1736         case CHIP_FIJI:
1737                 adev->gfx.config.max_shader_engines = 4;
1738                 adev->gfx.config.max_tile_pipes = 16;
1739                 adev->gfx.config.max_cu_per_sh = 16;
1740                 adev->gfx.config.max_sh_per_se = 1;
1741                 adev->gfx.config.max_backends_per_se = 4;
1742                 adev->gfx.config.max_texture_channel_caches = 16;
1743                 adev->gfx.config.max_gprs = 256;
1744                 adev->gfx.config.max_gs_threads = 32;
1745                 adev->gfx.config.max_hw_contexts = 8;
1746 
1747                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1748                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1749                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1750                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1751                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1752                 break;
1753         case CHIP_POLARIS11:
1754         case CHIP_POLARIS12:
1755                 ret = amdgpu_atombios_get_gfx_info(adev);
1756                 if (ret)
1757                         return ret;
1758                 adev->gfx.config.max_gprs = 256;
1759                 adev->gfx.config.max_gs_threads = 32;
1760                 adev->gfx.config.max_hw_contexts = 8;
1761 
1762                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1763                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1764                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1765                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1766                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1767                 break;
1768         case CHIP_POLARIS10:
1769         case CHIP_VEGAM:
1770                 ret = amdgpu_atombios_get_gfx_info(adev);
1771                 if (ret)
1772                         return ret;
1773                 adev->gfx.config.max_gprs = 256;
1774                 adev->gfx.config.max_gs_threads = 32;
1775                 adev->gfx.config.max_hw_contexts = 8;
1776 
1777                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1778                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1779                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1780                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1781                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1782                 break;
1783         case CHIP_TONGA:
1784                 adev->gfx.config.max_shader_engines = 4;
1785                 adev->gfx.config.max_tile_pipes = 8;
1786                 adev->gfx.config.max_cu_per_sh = 8;
1787                 adev->gfx.config.max_sh_per_se = 1;
1788                 adev->gfx.config.max_backends_per_se = 2;
1789                 adev->gfx.config.max_texture_channel_caches = 8;
1790                 adev->gfx.config.max_gprs = 256;
1791                 adev->gfx.config.max_gs_threads = 32;
1792                 adev->gfx.config.max_hw_contexts = 8;
1793 
1794                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1795                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1796                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1797                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1798                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1799                 break;
1800         case CHIP_CARRIZO:
1801                 adev->gfx.config.max_shader_engines = 1;
1802                 adev->gfx.config.max_tile_pipes = 2;
1803                 adev->gfx.config.max_sh_per_se = 1;
1804                 adev->gfx.config.max_backends_per_se = 2;
1805                 adev->gfx.config.max_cu_per_sh = 8;
1806                 adev->gfx.config.max_texture_channel_caches = 2;
1807                 adev->gfx.config.max_gprs = 256;
1808                 adev->gfx.config.max_gs_threads = 32;
1809                 adev->gfx.config.max_hw_contexts = 8;
1810 
1811                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1812                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1813                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1814                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1815                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1816                 break;
1817         case CHIP_STONEY:
1818                 adev->gfx.config.max_shader_engines = 1;
1819                 adev->gfx.config.max_tile_pipes = 2;
1820                 adev->gfx.config.max_sh_per_se = 1;
1821                 adev->gfx.config.max_backends_per_se = 1;
1822                 adev->gfx.config.max_cu_per_sh = 3;
1823                 adev->gfx.config.max_texture_channel_caches = 2;
1824                 adev->gfx.config.max_gprs = 256;
1825                 adev->gfx.config.max_gs_threads = 16;
1826                 adev->gfx.config.max_hw_contexts = 8;
1827 
1828                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1829                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1830                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1831                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1832                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1833                 break;
1834         default:
1835                 adev->gfx.config.max_shader_engines = 2;
1836                 adev->gfx.config.max_tile_pipes = 4;
1837                 adev->gfx.config.max_cu_per_sh = 2;
1838                 adev->gfx.config.max_sh_per_se = 1;
1839                 adev->gfx.config.max_backends_per_se = 2;
1840                 adev->gfx.config.max_texture_channel_caches = 4;
1841                 adev->gfx.config.max_gprs = 256;
1842                 adev->gfx.config.max_gs_threads = 32;
1843                 adev->gfx.config.max_hw_contexts = 8;
1844 
1845                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1846                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1847                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1848                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1849                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1850                 break;
1851         }
1852 
1853         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1854         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1855         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1856 
1857         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1858         adev->gfx.config.mem_max_burst_length_bytes = 256;
1859         if (adev->flags & AMD_IS_APU) {
1860                 /* Get memory bank mapping mode. */
1861                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1862                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1863                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1864 
1865                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1866                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1867                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1868 
1869                 /* Validate settings in case only one DIMM installed. */
1870                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1871                         dimm00_addr_map = 0;
1872                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1873                         dimm01_addr_map = 0;
1874                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1875                         dimm10_addr_map = 0;
1876                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1877                         dimm11_addr_map = 0;
1878 
1879                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1880                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1881                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1882                         adev->gfx.config.mem_row_size_in_kb = 2;
1883                 else
1884                         adev->gfx.config.mem_row_size_in_kb = 1;
1885         } else {
1886                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1887                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1888                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1889                         adev->gfx.config.mem_row_size_in_kb = 4;
1890         }
1891 
1892         adev->gfx.config.shader_engine_tile_size = 32;
1893         adev->gfx.config.num_gpus = 1;
1894         adev->gfx.config.multi_gpu_tile_size = 64;
1895 
1896         /* fix up row size */
1897         switch (adev->gfx.config.mem_row_size_in_kb) {
1898         case 1:
1899         default:
1900                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1901                 break;
1902         case 2:
1903                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1904                 break;
1905         case 4:
1906                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1907                 break;
1908         }
1909         adev->gfx.config.gb_addr_config = gb_addr_config;
1910 
1911         return 0;
1912 }
1913 
1914 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1915                                         int mec, int pipe, int queue)
1916 {
1917         int r;
1918         unsigned irq_type;
1919         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1920 
1921         ring = &adev->gfx.compute_ring[ring_id];
1922 
1923         /* mec0 is me1 */
1924         ring->me = mec + 1;
1925         ring->pipe = pipe;
1926         ring->queue = queue;
1927 
1928         ring->ring_obj = NULL;
1929         ring->use_doorbell = true;
1930         ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1931         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1932                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1933         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1934 
1935         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1936                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1937                 + ring->pipe;
1938 
1939         /* type-2 packets are deprecated on MEC, use type-3 instead */
1940         r = amdgpu_ring_init(adev, ring, 1024,
1941                         &adev->gfx.eop_irq, irq_type);
1942         if (r)
1943                 return r;
1944 
1945 
1946         return 0;
1947 }
1948 
1949 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1950 
1951 static int gfx_v8_0_sw_init(void *handle)
1952 {
1953         int i, j, k, r, ring_id;
1954         struct amdgpu_ring *ring;
1955         struct amdgpu_kiq *kiq;
1956         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1957 
1958         switch (adev->asic_type) {
1959         case CHIP_TONGA:
1960         case CHIP_CARRIZO:
1961         case CHIP_FIJI:
1962         case CHIP_POLARIS10:
1963         case CHIP_POLARIS11:
1964         case CHIP_POLARIS12:
1965         case CHIP_VEGAM:
1966                 adev->gfx.mec.num_mec = 2;
1967                 break;
1968         case CHIP_TOPAZ:
1969         case CHIP_STONEY:
1970         default:
1971                 adev->gfx.mec.num_mec = 1;
1972                 break;
1973         }
1974 
1975         adev->gfx.mec.num_pipe_per_mec = 4;
1976         adev->gfx.mec.num_queue_per_pipe = 8;
1977 
1978         /* EOP Event */
1979         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1980         if (r)
1981                 return r;
1982 
1983         /* Privileged reg */
1984         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1985                               &adev->gfx.priv_reg_irq);
1986         if (r)
1987                 return r;
1988 
1989         /* Privileged inst */
1990         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1991                               &adev->gfx.priv_inst_irq);
1992         if (r)
1993                 return r;
1994 
1995         /* Add CP EDC/ECC irq  */
1996         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1997                               &adev->gfx.cp_ecc_error_irq);
1998         if (r)
1999                 return r;
2000 
2001         /* SQ interrupts. */
2002         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
2003                               &adev->gfx.sq_irq);
2004         if (r) {
2005                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
2006                 return r;
2007         }
2008 
2009         INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
2010 
2011         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2012 
2013         gfx_v8_0_scratch_init(adev);
2014 
2015         r = gfx_v8_0_init_microcode(adev);
2016         if (r) {
2017                 DRM_ERROR("Failed to load gfx firmware!\n");
2018                 return r;
2019         }
2020 
2021         r = adev->gfx.rlc.funcs->init(adev);
2022         if (r) {
2023                 DRM_ERROR("Failed to init rlc BOs!\n");
2024                 return r;
2025         }
2026 
2027         r = gfx_v8_0_mec_init(adev);
2028         if (r) {
2029                 DRM_ERROR("Failed to init MEC BOs!\n");
2030                 return r;
2031         }
2032 
2033         /* set up the gfx ring */
2034         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2035                 ring = &adev->gfx.gfx_ring[i];
2036                 ring->ring_obj = NULL;
2037                 sprintf(ring->name, "gfx");
2038                 /* no gfx doorbells on iceland */
2039                 if (adev->asic_type != CHIP_TOPAZ) {
2040                         ring->use_doorbell = true;
2041                         ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2042                 }
2043 
2044                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2045                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2046                 if (r)
2047                         return r;
2048         }
2049 
2050 
2051         /* set up the compute queues - allocate horizontally across pipes */
2052         ring_id = 0;
2053         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2054                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2055                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2056                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2057                                         continue;
2058 
2059                                 r = gfx_v8_0_compute_ring_init(adev,
2060                                                                 ring_id,
2061                                                                 i, k, j);
2062                                 if (r)
2063                                         return r;
2064 
2065                                 ring_id++;
2066                         }
2067                 }
2068         }
2069 
2070         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2071         if (r) {
2072                 DRM_ERROR("Failed to init KIQ BOs!\n");
2073                 return r;
2074         }
2075 
2076         kiq = &adev->gfx.kiq;
2077         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2078         if (r)
2079                 return r;
2080 
2081         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2082         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2083         if (r)
2084                 return r;
2085 
2086         adev->gfx.ce_ram_size = 0x8000;
2087 
2088         r = gfx_v8_0_gpu_early_init(adev);
2089         if (r)
2090                 return r;
2091 
2092         return 0;
2093 }
2094 
2095 static int gfx_v8_0_sw_fini(void *handle)
2096 {
2097         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2098         int i;
2099 
2100         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2101                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2102         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2103                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2104 
2105         amdgpu_gfx_mqd_sw_fini(adev);
2106         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2107         amdgpu_gfx_kiq_fini(adev);
2108 
2109         gfx_v8_0_mec_fini(adev);
2110         amdgpu_gfx_rlc_fini(adev);
2111         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2112                                 &adev->gfx.rlc.clear_state_gpu_addr,
2113                                 (void **)&adev->gfx.rlc.cs_ptr);
2114         if ((adev->asic_type == CHIP_CARRIZO) ||
2115             (adev->asic_type == CHIP_STONEY)) {
2116                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2117                                 &adev->gfx.rlc.cp_table_gpu_addr,
2118                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2119         }
2120         gfx_v8_0_free_microcode(adev);
2121 
2122         return 0;
2123 }
2124 
2125 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2126 {
2127         uint32_t *modearray, *mod2array;
2128         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2129         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2130         u32 reg_offset;
2131 
2132         modearray = adev->gfx.config.tile_mode_array;
2133         mod2array = adev->gfx.config.macrotile_mode_array;
2134 
2135         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2136                 modearray[reg_offset] = 0;
2137 
2138         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2139                 mod2array[reg_offset] = 0;
2140 
2141         switch (adev->asic_type) {
2142         case CHIP_TOPAZ:
2143                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2144                                 PIPE_CONFIG(ADDR_SURF_P2) |
2145                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2146                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2147                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2148                                 PIPE_CONFIG(ADDR_SURF_P2) |
2149                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2150                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2151                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2152                                 PIPE_CONFIG(ADDR_SURF_P2) |
2153                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2154                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2155                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2156                                 PIPE_CONFIG(ADDR_SURF_P2) |
2157                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2158                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2159                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2160                                 PIPE_CONFIG(ADDR_SURF_P2) |
2161                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2162                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2163                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2164                                 PIPE_CONFIG(ADDR_SURF_P2) |
2165                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2166                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2167                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2168                                 PIPE_CONFIG(ADDR_SURF_P2) |
2169                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2170                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2171                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2172                                 PIPE_CONFIG(ADDR_SURF_P2));
2173                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2174                                 PIPE_CONFIG(ADDR_SURF_P2) |
2175                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2176                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2177                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2178                                  PIPE_CONFIG(ADDR_SURF_P2) |
2179                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2180                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2181                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2182                                  PIPE_CONFIG(ADDR_SURF_P2) |
2183                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2184                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2185                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2186                                  PIPE_CONFIG(ADDR_SURF_P2) |
2187                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2188                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2189                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2190                                  PIPE_CONFIG(ADDR_SURF_P2) |
2191                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2192                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2193                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2194                                  PIPE_CONFIG(ADDR_SURF_P2) |
2195                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2196                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2197                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2198                                  PIPE_CONFIG(ADDR_SURF_P2) |
2199                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2200                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2201                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2202                                  PIPE_CONFIG(ADDR_SURF_P2) |
2203                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2204                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2205                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2206                                  PIPE_CONFIG(ADDR_SURF_P2) |
2207                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2208                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2209                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2210                                  PIPE_CONFIG(ADDR_SURF_P2) |
2211                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2212                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2213                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2214                                  PIPE_CONFIG(ADDR_SURF_P2) |
2215                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2216                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2217                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2218                                  PIPE_CONFIG(ADDR_SURF_P2) |
2219                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2220                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2221                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2222                                  PIPE_CONFIG(ADDR_SURF_P2) |
2223                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2224                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2225                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2226                                  PIPE_CONFIG(ADDR_SURF_P2) |
2227                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2228                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2229                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2230                                  PIPE_CONFIG(ADDR_SURF_P2) |
2231                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2232                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2233                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2234                                  PIPE_CONFIG(ADDR_SURF_P2) |
2235                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2236                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2237                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2238                                  PIPE_CONFIG(ADDR_SURF_P2) |
2239                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2240                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2241                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2242                                  PIPE_CONFIG(ADDR_SURF_P2) |
2243                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2244                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2245 
2246                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2247                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2248                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2249                                 NUM_BANKS(ADDR_SURF_8_BANK));
2250                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2251                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2252                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2253                                 NUM_BANKS(ADDR_SURF_8_BANK));
2254                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2255                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2256                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2257                                 NUM_BANKS(ADDR_SURF_8_BANK));
2258                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2259                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2260                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2261                                 NUM_BANKS(ADDR_SURF_8_BANK));
2262                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2263                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2264                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2265                                 NUM_BANKS(ADDR_SURF_8_BANK));
2266                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2267                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2268                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2269                                 NUM_BANKS(ADDR_SURF_8_BANK));
2270                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2271                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2272                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2273                                 NUM_BANKS(ADDR_SURF_8_BANK));
2274                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2275                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2276                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2277                                 NUM_BANKS(ADDR_SURF_16_BANK));
2278                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2279                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2280                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2281                                 NUM_BANKS(ADDR_SURF_16_BANK));
2282                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2283                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2284                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2285                                  NUM_BANKS(ADDR_SURF_16_BANK));
2286                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2287                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2288                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2289                                  NUM_BANKS(ADDR_SURF_16_BANK));
2290                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2291                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2292                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2293                                  NUM_BANKS(ADDR_SURF_16_BANK));
2294                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2295                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2296                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2297                                  NUM_BANKS(ADDR_SURF_16_BANK));
2298                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2299                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2300                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2301                                  NUM_BANKS(ADDR_SURF_8_BANK));
2302 
2303                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2304                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2305                             reg_offset != 23)
2306                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2307 
2308                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2309                         if (reg_offset != 7)
2310                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2311 
2312                 break;
2313         case CHIP_FIJI:
2314         case CHIP_VEGAM:
2315                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2316                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2318                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2319                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2320                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2321                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2322                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2323                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2326                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2327                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2328                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2330                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2331                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2332                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2334                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2335                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2336                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2338                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2339                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2340                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2342                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2343                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2344                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2345                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2346                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2347                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2348                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2349                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2350                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2351                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2352                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2353                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2354                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2355                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2356                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2357                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2358                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2360                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2361                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2362                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2363                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2364                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2365                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2366                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2367                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2368                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2369                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2371                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2372                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2373                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2374                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2375                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2376                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2377                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2378                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2380                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2381                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2382                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2383                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2384                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2385                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2386                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2388                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2389                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2390                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2392                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2393                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2394                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2396                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2397                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2398                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2399                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2400                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2401                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2402                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2404                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2405                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2406                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2407                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2408                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2409                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2410                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2412                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2413                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2414                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2416                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2417                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2418                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2420                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2421                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2422                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2424                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2426                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2427                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2428                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2430                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2432                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2433                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2434                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2435                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2436                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2437 
2438                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2439                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2440                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2441                                 NUM_BANKS(ADDR_SURF_8_BANK));
2442                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2443                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2444                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2445                                 NUM_BANKS(ADDR_SURF_8_BANK));
2446                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2448                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2449                                 NUM_BANKS(ADDR_SURF_8_BANK));
2450                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2452                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2453                                 NUM_BANKS(ADDR_SURF_8_BANK));
2454                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2456                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2457                                 NUM_BANKS(ADDR_SURF_8_BANK));
2458                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2460                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2461                                 NUM_BANKS(ADDR_SURF_8_BANK));
2462                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2464                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2465                                 NUM_BANKS(ADDR_SURF_8_BANK));
2466                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2468                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2469                                 NUM_BANKS(ADDR_SURF_8_BANK));
2470                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2472                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2473                                 NUM_BANKS(ADDR_SURF_8_BANK));
2474                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2476                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2477                                  NUM_BANKS(ADDR_SURF_8_BANK));
2478                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2480                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2481                                  NUM_BANKS(ADDR_SURF_8_BANK));
2482                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2483                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2484                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2485                                  NUM_BANKS(ADDR_SURF_8_BANK));
2486                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2488                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2489                                  NUM_BANKS(ADDR_SURF_8_BANK));
2490                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2492                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2493                                  NUM_BANKS(ADDR_SURF_4_BANK));
2494 
2495                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2496                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2497 
2498                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2499                         if (reg_offset != 7)
2500                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2501 
2502                 break;
2503         case CHIP_TONGA:
2504                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2505                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2506                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2507                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2508                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2509                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2510                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2511                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2512                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2515                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2516                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2519                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2520                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2521                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2523                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2524                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2525                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2527                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2528                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2529                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2531                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2532                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2533                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2534                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2535                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2536                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2537                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2538                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2539                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2540                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2541                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2542                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2543                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2544                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2545                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2546                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2547                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2549                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2550                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2551                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2552                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2553                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2554                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2555                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2557                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2558                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2559                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2561                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2562                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2563                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2564                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2565                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2566                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2567                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2568                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2569                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2570                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2571                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2572                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2573                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2574                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2575                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2577                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2578                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2579                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2580                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2581                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2582                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2583                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2585                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2586                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2587                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2588                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2589                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2590                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2591                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2593                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2594                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2595                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2596                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2597                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2598                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2599                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2601                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2602                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2603                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2604                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2605                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2606                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2607                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2608                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2609                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2610                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2611                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2613                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2614                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2615                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2616                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2617                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2618                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2619                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2620                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2621                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2622                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2623                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2624                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2625                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2626 
2627                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2629                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2630                                 NUM_BANKS(ADDR_SURF_16_BANK));
2631                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2633                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2634                                 NUM_BANKS(ADDR_SURF_16_BANK));
2635                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2637                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2638                                 NUM_BANKS(ADDR_SURF_16_BANK));
2639                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2641                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2642                                 NUM_BANKS(ADDR_SURF_16_BANK));
2643                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2645                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2646                                 NUM_BANKS(ADDR_SURF_16_BANK));
2647                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2649                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2650                                 NUM_BANKS(ADDR_SURF_16_BANK));
2651                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2653                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2654                                 NUM_BANKS(ADDR_SURF_16_BANK));
2655                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2657                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2658                                 NUM_BANKS(ADDR_SURF_16_BANK));
2659                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2661                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2662                                 NUM_BANKS(ADDR_SURF_16_BANK));
2663                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2664                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2665                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2666                                  NUM_BANKS(ADDR_SURF_16_BANK));
2667                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2669                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2670                                  NUM_BANKS(ADDR_SURF_16_BANK));
2671                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2672                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2673                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2674                                  NUM_BANKS(ADDR_SURF_8_BANK));
2675                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2676                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2677                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2678                                  NUM_BANKS(ADDR_SURF_4_BANK));
2679                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2681                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2682                                  NUM_BANKS(ADDR_SURF_4_BANK));
2683 
2684                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2685                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2686 
2687                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2688                         if (reg_offset != 7)
2689                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2690 
2691                 break;
2692         case CHIP_POLARIS11:
2693         case CHIP_POLARIS12:
2694                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2695                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2697                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2698                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2699                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2701                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2702                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2703                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2705                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2706                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2707                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2709                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2710                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2711                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2713                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2714                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2715                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2717                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2718                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2719                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2721                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2722                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2723                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2725                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2726                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2727                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2728                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2729                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2730                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2731                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2732                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2733                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2735                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2736                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2737                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2738                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2739                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2740                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2741                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2742                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2743                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2744                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2745                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2746                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2747                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2748                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2749                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2750                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2751                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2752                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2753                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2754                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2755                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2756                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2757                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2758                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2759                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2760                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2761                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2762                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2763                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2764                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2765                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2766                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2767                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2768                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2769                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2770                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2771                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2772                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2773                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2774                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2775                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2776                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2777                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2778                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2779                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2780                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2781                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2782                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2783                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2784                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2785                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2786                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2787                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2788                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2789                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2790                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2791                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2792                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2793                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2794                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2795                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2796                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2797                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2798                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2799                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2800                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2802                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2803                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2804                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2805                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2806                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2807                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2808                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2809                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2810                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2811                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2812                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2813                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2814                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2815                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2816 
2817                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2819                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2820                                 NUM_BANKS(ADDR_SURF_16_BANK));
2821 
2822                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2823                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2824                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2825                                 NUM_BANKS(ADDR_SURF_16_BANK));
2826 
2827                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2828                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2829                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2830                                 NUM_BANKS(ADDR_SURF_16_BANK));
2831 
2832                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2834                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835                                 NUM_BANKS(ADDR_SURF_16_BANK));
2836 
2837                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2839                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2840                                 NUM_BANKS(ADDR_SURF_16_BANK));
2841 
2842                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2843                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2844                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2845                                 NUM_BANKS(ADDR_SURF_16_BANK));
2846 
2847                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2848                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2849                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2850                                 NUM_BANKS(ADDR_SURF_16_BANK));
2851 
2852                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2853                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2854                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2855                                 NUM_BANKS(ADDR_SURF_16_BANK));
2856 
2857                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2858                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2859                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2860                                 NUM_BANKS(ADDR_SURF_16_BANK));
2861 
2862                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2863                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2864                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2865                                 NUM_BANKS(ADDR_SURF_16_BANK));
2866 
2867                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2868                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2869                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2870                                 NUM_BANKS(ADDR_SURF_16_BANK));
2871 
2872                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2873                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2874                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2875                                 NUM_BANKS(ADDR_SURF_16_BANK));
2876 
2877                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2878                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2879                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2880                                 NUM_BANKS(ADDR_SURF_8_BANK));
2881 
2882                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2883                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2884                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2885                                 NUM_BANKS(ADDR_SURF_4_BANK));
2886 
2887                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2888                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2889 
2890                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2891                         if (reg_offset != 7)
2892                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2893 
2894                 break;
2895         case CHIP_POLARIS10:
2896                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2898                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2899                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2900                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2902                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2903                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2904                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2906                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2907                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2908                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2909                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2910                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2911                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2912                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2913                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2914                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2915                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2916                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2917                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2918                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2919                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2920                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2921                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2923                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2924                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2925                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2926                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2927                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2928                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2929                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2930                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2931                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2932                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2933                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2934                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2935                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2936                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2937                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2938                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2939                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2940                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2941                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2942                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2943                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2944                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2945                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2946                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2947                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2948                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2949                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2950                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2951                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2952                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2953                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2954                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2955                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2956                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2957                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2958                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2959                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2960                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2961                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2962                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2963                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2964                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2965                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2966                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2967                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2968                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2969                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2970                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2971                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2972                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2973                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2974                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2975                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2976                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2977                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2978                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2979                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2980                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2981                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2982                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2983                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2984                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2985                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2986                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2987                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2988                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2989                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2990                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2991                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2992                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2993                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2994                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2995                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2996                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2997                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2998                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2999                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3000                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3001                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3002                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3003                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3004                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3005                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3006                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3007                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3008                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3009                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3010                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3011                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3012                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3013                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3014                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3015                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3016                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3017                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3018 
3019                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3021                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3022                                 NUM_BANKS(ADDR_SURF_16_BANK));
3023 
3024                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3025                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3026                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3027                                 NUM_BANKS(ADDR_SURF_16_BANK));
3028 
3029                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3031                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3032                                 NUM_BANKS(ADDR_SURF_16_BANK));
3033 
3034                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3036                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3037                                 NUM_BANKS(ADDR_SURF_16_BANK));
3038 
3039                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3041                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3042                                 NUM_BANKS(ADDR_SURF_16_BANK));
3043 
3044                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3045                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3046                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3047                                 NUM_BANKS(ADDR_SURF_16_BANK));
3048 
3049                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3050                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3051                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3052                                 NUM_BANKS(ADDR_SURF_16_BANK));
3053 
3054                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3055                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3056                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3057                                 NUM_BANKS(ADDR_SURF_16_BANK));
3058 
3059                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3060                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3061                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3062                                 NUM_BANKS(ADDR_SURF_16_BANK));
3063 
3064                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3065                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3066                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3067                                 NUM_BANKS(ADDR_SURF_16_BANK));
3068 
3069                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3070                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3071                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3072                                 NUM_BANKS(ADDR_SURF_16_BANK));
3073 
3074                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3075                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3076                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3077                                 NUM_BANKS(ADDR_SURF_8_BANK));
3078 
3079                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3080                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3081                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3082                                 NUM_BANKS(ADDR_SURF_4_BANK));
3083 
3084                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3085                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3086                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3087                                 NUM_BANKS(ADDR_SURF_4_BANK));
3088 
3089                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3090                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3091 
3092                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3093                         if (reg_offset != 7)
3094                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3095 
3096                 break;
3097         case CHIP_STONEY:
3098                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3099                                 PIPE_CONFIG(ADDR_SURF_P2) |
3100                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3101                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3102                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3103                                 PIPE_CONFIG(ADDR_SURF_P2) |
3104                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3105                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3106                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3107                                 PIPE_CONFIG(ADDR_SURF_P2) |
3108                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3109                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3110                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3111                                 PIPE_CONFIG(ADDR_SURF_P2) |
3112                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3113                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3114                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3115                                 PIPE_CONFIG(ADDR_SURF_P2) |
3116                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3117                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3118                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3119                                 PIPE_CONFIG(ADDR_SURF_P2) |
3120                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3121                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3122                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3123                                 PIPE_CONFIG(ADDR_SURF_P2) |
3124                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3125                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3126                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3127                                 PIPE_CONFIG(ADDR_SURF_P2));
3128                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3129                                 PIPE_CONFIG(ADDR_SURF_P2) |
3130                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3131                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3132                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3133                                  PIPE_CONFIG(ADDR_SURF_P2) |
3134                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3135                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3136                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3137                                  PIPE_CONFIG(ADDR_SURF_P2) |
3138                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3139                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3140                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3141                                  PIPE_CONFIG(ADDR_SURF_P2) |
3142                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3143                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3144                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3145                                  PIPE_CONFIG(ADDR_SURF_P2) |
3146                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3147                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3148                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3149                                  PIPE_CONFIG(ADDR_SURF_P2) |
3150                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3151                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3152                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3153                                  PIPE_CONFIG(ADDR_SURF_P2) |
3154                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3155                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3156                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3157                                  PIPE_CONFIG(ADDR_SURF_P2) |
3158                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3159                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3160                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3161                                  PIPE_CONFIG(ADDR_SURF_P2) |
3162                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3163                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3164                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3165                                  PIPE_CONFIG(ADDR_SURF_P2) |
3166                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3167                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3168                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3169                                  PIPE_CONFIG(ADDR_SURF_P2) |
3170                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3171                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3172                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3173                                  PIPE_CONFIG(ADDR_SURF_P2) |
3174                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3175                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3176                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3177                                  PIPE_CONFIG(ADDR_SURF_P2) |
3178                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3179                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3180                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3181                                  PIPE_CONFIG(ADDR_SURF_P2) |
3182                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3183                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3184                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3185                                  PIPE_CONFIG(ADDR_SURF_P2) |
3186                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3187                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3188                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3189                                  PIPE_CONFIG(ADDR_SURF_P2) |
3190                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3191                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3192                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3193                                  PIPE_CONFIG(ADDR_SURF_P2) |
3194                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3195                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3196                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3197                                  PIPE_CONFIG(ADDR_SURF_P2) |
3198                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3199                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3200 
3201                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3202                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3203                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3204                                 NUM_BANKS(ADDR_SURF_8_BANK));
3205                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3206                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3207                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3208                                 NUM_BANKS(ADDR_SURF_8_BANK));
3209                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3210                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3211                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3212                                 NUM_BANKS(ADDR_SURF_8_BANK));
3213                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3214                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3215                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3216                                 NUM_BANKS(ADDR_SURF_8_BANK));
3217                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3218                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3219                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3220                                 NUM_BANKS(ADDR_SURF_8_BANK));
3221                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3222                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3223                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3224                                 NUM_BANKS(ADDR_SURF_8_BANK));
3225                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3226                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3227                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3228                                 NUM_BANKS(ADDR_SURF_8_BANK));
3229                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3230                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3231                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3232                                 NUM_BANKS(ADDR_SURF_16_BANK));
3233                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3234                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3235                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3236                                 NUM_BANKS(ADDR_SURF_16_BANK));
3237                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3238                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3239                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3240                                  NUM_BANKS(ADDR_SURF_16_BANK));
3241                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3242                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3243                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3244                                  NUM_BANKS(ADDR_SURF_16_BANK));
3245                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3246                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3247                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3248                                  NUM_BANKS(ADDR_SURF_16_BANK));
3249                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3250                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3251                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3252                                  NUM_BANKS(ADDR_SURF_16_BANK));
3253                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3254                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3255                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3256                                  NUM_BANKS(ADDR_SURF_8_BANK));
3257 
3258                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3259                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3260                             reg_offset != 23)
3261                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3262 
3263                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3264                         if (reg_offset != 7)
3265                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3266 
3267                 break;
3268         default:
3269                 dev_warn(adev->dev,
3270                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3271                          adev->asic_type);
3272                 /* fall through */
3273 
3274         case CHIP_CARRIZO:
3275                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3276                                 PIPE_CONFIG(ADDR_SURF_P2) |
3277                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3278                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3279                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3280                                 PIPE_CONFIG(ADDR_SURF_P2) |
3281                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3282                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3283                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3284                                 PIPE_CONFIG(ADDR_SURF_P2) |
3285                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3286                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3287                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3288                                 PIPE_CONFIG(ADDR_SURF_P2) |
3289                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3290                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3291                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3292                                 PIPE_CONFIG(ADDR_SURF_P2) |
3293                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3294                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3295                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3296                                 PIPE_CONFIG(ADDR_SURF_P2) |
3297                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3298                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3299                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3300                                 PIPE_CONFIG(ADDR_SURF_P2) |
3301                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3302                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3303                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3304                                 PIPE_CONFIG(ADDR_SURF_P2));
3305                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3306                                 PIPE_CONFIG(ADDR_SURF_P2) |
3307                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3308                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3309                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3310                                  PIPE_CONFIG(ADDR_SURF_P2) |
3311                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3312                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3313                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3314                                  PIPE_CONFIG(ADDR_SURF_P2) |
3315                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3316                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3317                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3318                                  PIPE_CONFIG(ADDR_SURF_P2) |
3319                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3320                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3321                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3322                                  PIPE_CONFIG(ADDR_SURF_P2) |
3323                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3324                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3325                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3326                                  PIPE_CONFIG(ADDR_SURF_P2) |
3327                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3328                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3329                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3330                                  PIPE_CONFIG(ADDR_SURF_P2) |
3331                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3332                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3333                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3334                                  PIPE_CONFIG(ADDR_SURF_P2) |
3335                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3336                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3337                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3338                                  PIPE_CONFIG(ADDR_SURF_P2) |
3339                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3340                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3341                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3342                                  PIPE_CONFIG(ADDR_SURF_P2) |
3343                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3344                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3345                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3346                                  PIPE_CONFIG(ADDR_SURF_P2) |
3347                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3348                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3349                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3350                                  PIPE_CONFIG(ADDR_SURF_P2) |
3351                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3352                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3353                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3354                                  PIPE_CONFIG(ADDR_SURF_P2) |
3355                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3356                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3357                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3358                                  PIPE_CONFIG(ADDR_SURF_P2) |
3359                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3360                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3361                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3362                                  PIPE_CONFIG(ADDR_SURF_P2) |
3363                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3364                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3365                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3366                                  PIPE_CONFIG(ADDR_SURF_P2) |
3367                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3368                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3369                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3370                                  PIPE_CONFIG(ADDR_SURF_P2) |
3371                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3372                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3373                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3374                                  PIPE_CONFIG(ADDR_SURF_P2) |
3375                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3376                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3377 
3378                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3379                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3380                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3381                                 NUM_BANKS(ADDR_SURF_8_BANK));
3382                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3383                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3384                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3385                                 NUM_BANKS(ADDR_SURF_8_BANK));
3386                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3387                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3388                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3389                                 NUM_BANKS(ADDR_SURF_8_BANK));
3390                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3391                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3392                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3393                                 NUM_BANKS(ADDR_SURF_8_BANK));
3394                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3395                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3396                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3397                                 NUM_BANKS(ADDR_SURF_8_BANK));
3398                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3399                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3400                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3401                                 NUM_BANKS(ADDR_SURF_8_BANK));
3402                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3403                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3404                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3405                                 NUM_BANKS(ADDR_SURF_8_BANK));
3406                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3407                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3408                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3409                                 NUM_BANKS(ADDR_SURF_16_BANK));
3410                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3411                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3412                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3413                                 NUM_BANKS(ADDR_SURF_16_BANK));
3414                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3415                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3416                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3417                                  NUM_BANKS(ADDR_SURF_16_BANK));
3418                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3419                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3420                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3421                                  NUM_BANKS(ADDR_SURF_16_BANK));
3422                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3423                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3424                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3425                                  NUM_BANKS(ADDR_SURF_16_BANK));
3426                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3427                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3428                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3429                                  NUM_BANKS(ADDR_SURF_16_BANK));
3430                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3431                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3432                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3433                                  NUM_BANKS(ADDR_SURF_8_BANK));
3434 
3435                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3436                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3437                             reg_offset != 23)
3438                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3439 
3440                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3441                         if (reg_offset != 7)
3442                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3443 
3444                 break;
3445         }
3446 }
3447 
3448 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3449                                   u32 se_num, u32 sh_num, u32 instance)
3450 {
3451         u32 data;
3452 
3453         if (instance == 0xffffffff)
3454                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3455         else
3456                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3457 
3458         if (se_num == 0xffffffff)
3459                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3460         else
3461                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3462 
3463         if (sh_num == 0xffffffff)
3464                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3465         else
3466                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3467 
3468         WREG32(mmGRBM_GFX_INDEX, data);
3469 }
3470 
3471 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3472                                   u32 me, u32 pipe, u32 q, u32 vm)
3473 {
3474         vi_srbm_select(adev, me, pipe, q, vm);
3475 }
3476 
3477 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3478 {
3479         u32 data, mask;
3480 
3481         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3482                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3483 
3484         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3485 
3486         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3487                                          adev->gfx.config.max_sh_per_se);
3488 
3489         return (~data) & mask;
3490 }
3491 
3492 static void
3493 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3494 {
3495         switch (adev->asic_type) {
3496         case CHIP_FIJI:
3497         case CHIP_VEGAM:
3498                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3499                           RB_XSEL2(1) | PKR_MAP(2) |
3500                           PKR_XSEL(1) | PKR_YSEL(1) |
3501                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3502                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3503                            SE_PAIR_YSEL(2);
3504                 break;
3505         case CHIP_TONGA:
3506         case CHIP_POLARIS10:
3507                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3508                           SE_XSEL(1) | SE_YSEL(1);
3509                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3510                            SE_PAIR_YSEL(2);
3511                 break;
3512         case CHIP_TOPAZ:
3513         case CHIP_CARRIZO:
3514                 *rconf |= RB_MAP_PKR0(2);
3515                 *rconf1 |= 0x0;
3516                 break;
3517         case CHIP_POLARIS11:
3518         case CHIP_POLARIS12:
3519                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3520                           SE_XSEL(1) | SE_YSEL(1);
3521                 *rconf1 |= 0x0;
3522                 break;
3523         case CHIP_STONEY:
3524                 *rconf |= 0x0;
3525                 *rconf1 |= 0x0;
3526                 break;
3527         default:
3528                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3529                 break;
3530         }
3531 }
3532 
3533 static void
3534 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3535                                         u32 raster_config, u32 raster_config_1,
3536                                         unsigned rb_mask, unsigned num_rb)
3537 {
3538         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3539         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3540         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3541         unsigned rb_per_se = num_rb / num_se;
3542         unsigned se_mask[4];
3543         unsigned se;
3544 
3545         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3546         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3547         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3548         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3549 
3550         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3551         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3552         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3553 
3554         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3555                              (!se_mask[2] && !se_mask[3]))) {
3556                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3557 
3558                 if (!se_mask[0] && !se_mask[1]) {
3559                         raster_config_1 |=
3560                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3561                 } else {
3562                         raster_config_1 |=
3563                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3564                 }
3565         }
3566 
3567         for (se = 0; se < num_se; se++) {
3568                 unsigned raster_config_se = raster_config;
3569                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3570                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3571                 int idx = (se / 2) * 2;
3572 
3573                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3574                         raster_config_se &= ~SE_MAP_MASK;
3575 
3576                         if (!se_mask[idx]) {
3577                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3578                         } else {
3579                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3580                         }
3581                 }
3582 
3583                 pkr0_mask &= rb_mask;
3584                 pkr1_mask &= rb_mask;
3585                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3586                         raster_config_se &= ~PKR_MAP_MASK;
3587 
3588                         if (!pkr0_mask) {
3589                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3590                         } else {
3591                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3592                         }
3593                 }
3594 
3595                 if (rb_per_se >= 2) {
3596                         unsigned rb0_mask = 1 << (se * rb_per_se);
3597                         unsigned rb1_mask = rb0_mask << 1;
3598 
3599                         rb0_mask &= rb_mask;
3600                         rb1_mask &= rb_mask;
3601                         if (!rb0_mask || !rb1_mask) {
3602                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3603 
3604                                 if (!rb0_mask) {
3605                                         raster_config_se |=
3606                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3607                                 } else {
3608                                         raster_config_se |=
3609                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3610                                 }
3611                         }
3612 
3613                         if (rb_per_se > 2) {
3614                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3615                                 rb1_mask = rb0_mask << 1;
3616                                 rb0_mask &= rb_mask;
3617                                 rb1_mask &= rb_mask;
3618                                 if (!rb0_mask || !rb1_mask) {
3619                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3620 
3621                                         if (!rb0_mask) {
3622                                                 raster_config_se |=
3623                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3624                                         } else {
3625                                                 raster_config_se |=
3626                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3627                                         }
3628                                 }
3629                         }
3630                 }
3631 
3632                 /* GRBM_GFX_INDEX has a different offset on VI */
3633                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3634                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3635                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3636         }
3637 
3638         /* GRBM_GFX_INDEX has a different offset on VI */
3639         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3640 }
3641 
3642 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3643 {
3644         int i, j;
3645         u32 data;
3646         u32 raster_config = 0, raster_config_1 = 0;
3647         u32 active_rbs = 0;
3648         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3649                                         adev->gfx.config.max_sh_per_se;
3650         unsigned num_rb_pipes;
3651 
3652         mutex_lock(&adev->grbm_idx_mutex);
3653         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3654                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3655                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3656                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3657                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3658                                                rb_bitmap_width_per_sh);
3659                 }
3660         }
3661         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3662 
3663         adev->gfx.config.backend_enable_mask = active_rbs;
3664         adev->gfx.config.num_rbs = hweight32(active_rbs);
3665 
3666         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3667                              adev->gfx.config.max_shader_engines, 16);
3668 
3669         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3670 
3671         if (!adev->gfx.config.backend_enable_mask ||
3672                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3673                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3674                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3675         } else {
3676                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3677                                                         adev->gfx.config.backend_enable_mask,
3678                                                         num_rb_pipes);
3679         }
3680 
3681         /* cache the values for userspace */
3682         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3683                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3684                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3685                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3686                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3687                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3688                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3689                         adev->gfx.config.rb_config[i][j].raster_config =
3690                                 RREG32(mmPA_SC_RASTER_CONFIG);
3691                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3692                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3693                 }
3694         }
3695         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3696         mutex_unlock(&adev->grbm_idx_mutex);
3697 }
3698 
3699 /**
3700  * gfx_v8_0_init_compute_vmid - gart enable
3701  *
3702  * @adev: amdgpu_device pointer
3703  *
3704  * Initialize compute vmid sh_mem registers
3705  *
3706  */
3707 #define DEFAULT_SH_MEM_BASES    (0x6000)
3708 #define FIRST_COMPUTE_VMID      (8)
3709 #define LAST_COMPUTE_VMID       (16)
3710 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3711 {
3712         int i;
3713         uint32_t sh_mem_config;
3714         uint32_t sh_mem_bases;
3715 
3716         /*
3717          * Configure apertures:
3718          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3719          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3720          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3721          */
3722         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3723 
3724         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3725                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3726                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3727                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3728                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3729                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3730 
3731         mutex_lock(&adev->srbm_mutex);
3732         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3733                 vi_srbm_select(adev, 0, 0, 0, i);
3734                 /* CP and shaders */
3735                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3736                 WREG32(mmSH_MEM_APE1_BASE, 1);
3737                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3738                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3739         }
3740         vi_srbm_select(adev, 0, 0, 0, 0);
3741         mutex_unlock(&adev->srbm_mutex);
3742 
3743         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
3744            acccess. These should be enabled by FW for target VMIDs. */
3745         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3746                 WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3747                 WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3748                 WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3749                 WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3750         }
3751 }
3752 
3753 static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3754 {
3755         int vmid;
3756 
3757         /*
3758          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3759          * access. Compute VMIDs should be enabled by FW for target VMIDs,
3760          * the driver can enable them for graphics. VMID0 should maintain
3761          * access so that HWS firmware can save/restore entries.
3762          */
3763         for (vmid = 1; vmid < 16; vmid++) {
3764                 WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3765                 WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3766                 WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3767                 WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3768         }
3769 }
3770 
3771 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3772 {
3773         switch (adev->asic_type) {
3774         default:
3775                 adev->gfx.config.double_offchip_lds_buf = 1;
3776                 break;
3777         case CHIP_CARRIZO:
3778         case CHIP_STONEY:
3779                 adev->gfx.config.double_offchip_lds_buf = 0;
3780                 break;
3781         }
3782 }
3783 
3784 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3785 {
3786         u32 tmp, sh_static_mem_cfg;
3787         int i;
3788 
3789         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3790         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3791         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3792         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3793 
3794         gfx_v8_0_tiling_mode_table_init(adev);
3795         gfx_v8_0_setup_rb(adev);
3796         gfx_v8_0_get_cu_info(adev);
3797         gfx_v8_0_config_init(adev);
3798 
3799         /* XXX SH_MEM regs */
3800         /* where to put LDS, scratch, GPUVM in FSA64 space */
3801         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3802                                    SWIZZLE_ENABLE, 1);
3803         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3804                                    ELEMENT_SIZE, 1);
3805         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3806                                    INDEX_STRIDE, 3);
3807         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3808 
3809         mutex_lock(&adev->srbm_mutex);
3810         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3811                 vi_srbm_select(adev, 0, 0, 0, i);
3812                 /* CP and shaders */
3813                 if (i == 0) {
3814                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3815                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3816                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3817                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3818                         WREG32(mmSH_MEM_CONFIG, tmp);
3819                         WREG32(mmSH_MEM_BASES, 0);
3820                 } else {
3821                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3822                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3823                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3824                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3825                         WREG32(mmSH_MEM_CONFIG, tmp);
3826                         tmp = adev->gmc.shared_aperture_start >> 48;
3827                         WREG32(mmSH_MEM_BASES, tmp);
3828                 }
3829 
3830                 WREG32(mmSH_MEM_APE1_BASE, 1);
3831                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3832         }
3833         vi_srbm_select(adev, 0, 0, 0, 0);
3834         mutex_unlock(&adev->srbm_mutex);
3835 
3836         gfx_v8_0_init_compute_vmid(adev);
3837         gfx_v8_0_init_gds_vmid(adev);
3838 
3839         mutex_lock(&adev->grbm_idx_mutex);
3840         /*
3841          * making sure that the following register writes will be broadcasted
3842          * to all the shaders
3843          */
3844         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3845 
3846         WREG32(mmPA_SC_FIFO_SIZE,
3847                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3848                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3849                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3850                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3851                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3852                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3853                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3854                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3855 
3856         tmp = RREG32(mmSPI_ARB_PRIORITY);
3857         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3858         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3859         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3860         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3861         WREG32(mmSPI_ARB_PRIORITY, tmp);
3862 
3863         mutex_unlock(&adev->grbm_idx_mutex);
3864 
3865 }
3866 
3867 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3868 {
3869         u32 i, j, k;
3870         u32 mask;
3871 
3872         mutex_lock(&adev->grbm_idx_mutex);
3873         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3874                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3875                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3876                         for (k = 0; k < adev->usec_timeout; k++) {
3877                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3878                                         break;
3879                                 udelay(1);
3880                         }
3881                         if (k == adev->usec_timeout) {
3882                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3883                                                       0xffffffff, 0xffffffff);
3884                                 mutex_unlock(&adev->grbm_idx_mutex);
3885                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3886                                          i, j);
3887                                 return;
3888                         }
3889                 }
3890         }
3891         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3892         mutex_unlock(&adev->grbm_idx_mutex);
3893 
3894         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3895                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3896                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3897                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3898         for (k = 0; k < adev->usec_timeout; k++) {
3899                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3900                         break;
3901                 udelay(1);
3902         }
3903 }
3904 
3905 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3906                                                bool enable)
3907 {
3908         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3909 
3910         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3911         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3912         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3913         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3914 
3915         WREG32(mmCP_INT_CNTL_RING0, tmp);
3916 }
3917 
3918 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3919 {
3920         /* csib */
3921         WREG32(mmRLC_CSIB_ADDR_HI,
3922                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3923         WREG32(mmRLC_CSIB_ADDR_LO,
3924                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3925         WREG32(mmRLC_CSIB_LENGTH,
3926                         adev->gfx.rlc.clear_state_size);
3927 }
3928 
3929 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3930                                 int ind_offset,
3931                                 int list_size,
3932                                 int *unique_indices,
3933                                 int *indices_count,
3934                                 int max_indices,
3935                                 int *ind_start_offsets,
3936                                 int *offset_count,
3937                                 int max_offset)
3938 {
3939         int indices;
3940         bool new_entry = true;
3941 
3942         for (; ind_offset < list_size; ind_offset++) {
3943 
3944                 if (new_entry) {
3945                         new_entry = false;
3946                         ind_start_offsets[*offset_count] = ind_offset;
3947                         *offset_count = *offset_count + 1;
3948                         BUG_ON(*offset_count >= max_offset);
3949                 }
3950 
3951                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3952                         new_entry = true;
3953                         continue;
3954                 }
3955 
3956                 ind_offset += 2;
3957 
3958                 /* look for the matching indice */
3959                 for (indices = 0;
3960                         indices < *indices_count;
3961                         indices++) {
3962                         if (unique_indices[indices] ==
3963                                 register_list_format[ind_offset])
3964                                 break;
3965                 }
3966 
3967                 if (indices >= *indices_count) {
3968                         unique_indices[*indices_count] =
3969                                 register_list_format[ind_offset];
3970                         indices = *indices_count;
3971                         *indices_count = *indices_count + 1;
3972                         BUG_ON(*indices_count >= max_indices);
3973                 }
3974 
3975                 register_list_format[ind_offset] = indices;
3976         }
3977 }
3978 
3979 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3980 {
3981         int i, temp, data;
3982         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3983         int indices_count = 0;
3984         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3985         int offset_count = 0;
3986 
3987         int list_size;
3988         unsigned int *register_list_format =
3989                 kmemdup(adev->gfx.rlc.register_list_format,
3990                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3991         if (!register_list_format)
3992                 return -ENOMEM;
3993 
3994         gfx_v8_0_parse_ind_reg_list(register_list_format,
3995                                 RLC_FormatDirectRegListLength,
3996                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3997                                 unique_indices,
3998                                 &indices_count,
3999                                 ARRAY_SIZE(unique_indices),
4000                                 indirect_start_offsets,
4001                                 &offset_count,
4002                                 ARRAY_SIZE(indirect_start_offsets));
4003 
4004         /* save and restore list */
4005         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4006 
4007         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4008         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4009                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4010 
4011         /* indirect list */
4012         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4013         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4014                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4015 
4016         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4017         list_size = list_size >> 1;
4018         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4019         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4020 
4021         /* starting offsets starts */
4022         WREG32(mmRLC_GPM_SCRATCH_ADDR,
4023                 adev->gfx.rlc.starting_offsets_start);
4024         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4025                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4026                                 indirect_start_offsets[i]);
4027 
4028         /* unique indices */
4029         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4030         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4031         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4032                 if (unique_indices[i] != 0) {
4033                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4034                         WREG32(data + i, unique_indices[i] >> 20);
4035                 }
4036         }
4037         kfree(register_list_format);
4038 
4039         return 0;
4040 }
4041 
4042 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4043 {
4044         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4045 }
4046 
4047 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4048 {
4049         uint32_t data;
4050 
4051         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4052 
4053         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4054         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4055         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4056         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4057         WREG32(mmRLC_PG_DELAY, data);
4058 
4059         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4060         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4061 
4062 }
4063 
4064 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4065                                                 bool enable)
4066 {
4067         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4068 }
4069 
4070 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4071                                                   bool enable)
4072 {
4073         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4074 }
4075 
4076 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4077 {
4078         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4079 }
4080 
4081 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4082 {
4083         if ((adev->asic_type == CHIP_CARRIZO) ||
4084             (adev->asic_type == CHIP_STONEY)) {
4085                 gfx_v8_0_init_csb(adev);
4086                 gfx_v8_0_init_save_restore_list(adev);
4087                 gfx_v8_0_enable_save_restore_machine(adev);
4088                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4089                 gfx_v8_0_init_power_gating(adev);
4090                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4091         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4092                    (adev->asic_type == CHIP_POLARIS12) ||
4093                    (adev->asic_type == CHIP_VEGAM)) {
4094                 gfx_v8_0_init_csb(adev);
4095                 gfx_v8_0_init_save_restore_list(adev);
4096                 gfx_v8_0_enable_save_restore_machine(adev);
4097                 gfx_v8_0_init_power_gating(adev);
4098         }
4099 
4100 }
4101 
4102 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4103 {
4104         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4105 
4106         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4107         gfx_v8_0_wait_for_rlc_serdes(adev);
4108 }
4109 
4110 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4111 {
4112         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4113         udelay(50);
4114 
4115         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4116         udelay(50);
4117 }
4118 
4119 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4120 {
4121         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4122 
4123         /* carrizo do enable cp interrupt after cp inited */
4124         if (!(adev->flags & AMD_IS_APU))
4125                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4126 
4127         udelay(50);
4128 }
4129 
4130 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4131 {
4132         if (amdgpu_sriov_vf(adev)) {
4133                 gfx_v8_0_init_csb(adev);
4134                 return 0;
4135         }
4136 
4137         adev->gfx.rlc.funcs->stop(adev);
4138         adev->gfx.rlc.funcs->reset(adev);
4139         gfx_v8_0_init_pg(adev);
4140         adev->gfx.rlc.funcs->start(adev);
4141 
4142         return 0;
4143 }
4144 
4145 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4146 {
4147         int i;
4148         u32 tmp = RREG32(mmCP_ME_CNTL);
4149 
4150         if (enable) {
4151                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4152                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4153                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4154         } else {
4155                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4156                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4157                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4158                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4159                         adev->gfx.gfx_ring[i].sched.ready = false;
4160         }
4161         WREG32(mmCP_ME_CNTL, tmp);
4162         udelay(50);
4163 }
4164 
4165 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4166 {
4167         u32 count = 0;
4168         const struct cs_section_def *sect = NULL;
4169         const struct cs_extent_def *ext = NULL;
4170 
4171         /* begin clear state */
4172         count += 2;
4173         /* context control state */
4174         count += 3;
4175 
4176         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4177                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4178                         if (sect->id == SECT_CONTEXT)
4179                                 count += 2 + ext->reg_count;
4180                         else
4181                                 return 0;
4182                 }
4183         }
4184         /* pa_sc_raster_config/pa_sc_raster_config1 */
4185         count += 4;
4186         /* end clear state */
4187         count += 2;
4188         /* clear state */
4189         count += 2;
4190 
4191         return count;
4192 }
4193 
4194 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4195 {
4196         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4197         const struct cs_section_def *sect = NULL;
4198         const struct cs_extent_def *ext = NULL;
4199         int r, i;
4200 
4201         /* init the CP */
4202         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4203         WREG32(mmCP_ENDIAN_SWAP, 0);
4204         WREG32(mmCP_DEVICE_ID, 1);
4205 
4206         gfx_v8_0_cp_gfx_enable(adev, true);
4207 
4208         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4209         if (r) {
4210                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4211                 return r;
4212         }
4213 
4214         /* clear state buffer */
4215         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4216         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4217 
4218         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4219         amdgpu_ring_write(ring, 0x80000000);
4220         amdgpu_ring_write(ring, 0x80000000);
4221 
4222         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4223                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4224                         if (sect->id == SECT_CONTEXT) {
4225                                 amdgpu_ring_write(ring,
4226                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4227                                                ext->reg_count));
4228                                 amdgpu_ring_write(ring,
4229                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4230                                 for (i = 0; i < ext->reg_count; i++)
4231                                         amdgpu_ring_write(ring, ext->extent[i]);
4232                         }
4233                 }
4234         }
4235 
4236         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4237         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4238         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4239         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4240 
4241         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4242         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4243 
4244         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4245         amdgpu_ring_write(ring, 0);
4246 
4247         /* init the CE partitions */
4248         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4249         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4250         amdgpu_ring_write(ring, 0x8000);
4251         amdgpu_ring_write(ring, 0x8000);
4252 
4253         amdgpu_ring_commit(ring);
4254 
4255         return 0;
4256 }
4257 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4258 {
4259         u32 tmp;
4260         /* no gfx doorbells on iceland */
4261         if (adev->asic_type == CHIP_TOPAZ)
4262                 return;
4263 
4264         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4265 
4266         if (ring->use_doorbell) {
4267                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4268                                 DOORBELL_OFFSET, ring->doorbell_index);
4269                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4270                                                 DOORBELL_HIT, 0);
4271                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4272                                             DOORBELL_EN, 1);
4273         } else {
4274                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4275         }
4276 
4277         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4278 
4279         if (adev->flags & AMD_IS_APU)
4280                 return;
4281 
4282         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4283                                         DOORBELL_RANGE_LOWER,
4284                                         adev->doorbell_index.gfx_ring0);
4285         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4286 
4287         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4288                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4289 }
4290 
4291 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4292 {
4293         struct amdgpu_ring *ring;
4294         u32 tmp;
4295         u32 rb_bufsz;
4296         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4297 
4298         /* Set the write pointer delay */
4299         WREG32(mmCP_RB_WPTR_DELAY, 0);
4300 
4301         /* set the RB to use vmid 0 */
4302         WREG32(mmCP_RB_VMID, 0);
4303 
4304         /* Set ring buffer size */
4305         ring = &adev->gfx.gfx_ring[0];
4306         rb_bufsz = order_base_2(ring->ring_size / 8);
4307         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4308         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4309         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4310         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4311 #ifdef __BIG_ENDIAN
4312         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4313 #endif
4314         WREG32(mmCP_RB0_CNTL, tmp);
4315 
4316         /* Initialize the ring buffer's read and write pointers */
4317         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4318         ring->wptr = 0;
4319         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4320 
4321         /* set the wb address wether it's enabled or not */
4322         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4323         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4324         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4325 
4326         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4327         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4328         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4329         mdelay(1);
4330         WREG32(mmCP_RB0_CNTL, tmp);
4331 
4332         rb_addr = ring->gpu_addr >> 8;
4333         WREG32(mmCP_RB0_BASE, rb_addr);
4334         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4335 
4336         gfx_v8_0_set_cpg_door_bell(adev, ring);
4337         /* start the ring */
4338         amdgpu_ring_clear_ring(ring);
4339         gfx_v8_0_cp_gfx_start(adev);
4340         ring->sched.ready = true;
4341 
4342         return 0;
4343 }
4344 
4345 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4346 {
4347         int i;
4348 
4349         if (enable) {
4350                 WREG32(mmCP_MEC_CNTL, 0);
4351         } else {
4352                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4353                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4354                         adev->gfx.compute_ring[i].sched.ready = false;
4355                 adev->gfx.kiq.ring.sched.ready = false;
4356         }
4357         udelay(50);
4358 }
4359 
4360 /* KIQ functions */
4361 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4362 {
4363         uint32_t tmp;
4364         struct amdgpu_device *adev = ring->adev;
4365 
4366         /* tell RLC which is KIQ queue */
4367         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4368         tmp &= 0xffffff00;
4369         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4370         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4371         tmp |= 0x80;
4372         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4373 }
4374 
4375 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4376 {
4377         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4378         uint64_t queue_mask = 0;
4379         int r, i;
4380 
4381         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4382                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4383                         continue;
4384 
4385                 /* This situation may be hit in the future if a new HW
4386                  * generation exposes more than 64 queues. If so, the
4387                  * definition of queue_mask needs updating */
4388                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4389                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4390                         break;
4391                 }
4392 
4393                 queue_mask |= (1ull << i);
4394         }
4395 
4396         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4397         if (r) {
4398                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4399                 return r;
4400         }
4401         /* set resources */
4402         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4403         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4404         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4405         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4406         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4407         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4408         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4409         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4410         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4411                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4412                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4413                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4414 
4415                 /* map queues */
4416                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4417                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4418                 amdgpu_ring_write(kiq_ring,
4419                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4420                 amdgpu_ring_write(kiq_ring,
4421                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4422                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4423                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4424                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4425                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4426                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4427                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4428                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4429         }
4430 
4431         amdgpu_ring_commit(kiq_ring);
4432 
4433         return 0;
4434 }
4435 
4436 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4437 {
4438         int i, r = 0;
4439 
4440         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4441                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4442                 for (i = 0; i < adev->usec_timeout; i++) {
4443                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4444                                 break;
4445                         udelay(1);
4446                 }
4447                 if (i == adev->usec_timeout)
4448                         r = -ETIMEDOUT;
4449         }
4450         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4451         WREG32(mmCP_HQD_PQ_RPTR, 0);
4452         WREG32(mmCP_HQD_PQ_WPTR, 0);
4453 
4454         return r;
4455 }
4456 
4457 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4458 {
4459         struct amdgpu_device *adev = ring->adev;
4460         struct vi_mqd *mqd = ring->mqd_ptr;
4461         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4462         uint32_t tmp;
4463 
4464         mqd->header = 0xC0310800;
4465         mqd->compute_pipelinestat_enable = 0x00000001;
4466         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4467         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4468         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4469         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4470         mqd->compute_misc_reserved = 0x00000003;
4471         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4472                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4473         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4474                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4475         eop_base_addr = ring->eop_gpu_addr >> 8;
4476         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4477         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4478 
4479         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4480         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4481         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4482                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4483 
4484         mqd->cp_hqd_eop_control = tmp;
4485 
4486         /* enable doorbell? */
4487         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4488                             CP_HQD_PQ_DOORBELL_CONTROL,
4489                             DOORBELL_EN,
4490                             ring->use_doorbell ? 1 : 0);
4491 
4492         mqd->cp_hqd_pq_doorbell_control = tmp;
4493 
4494         /* set the pointer to the MQD */
4495         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4496         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4497 
4498         /* set MQD vmid to 0 */
4499         tmp = RREG32(mmCP_MQD_CONTROL);
4500         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4501         mqd->cp_mqd_control = tmp;
4502 
4503         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4504         hqd_gpu_addr = ring->gpu_addr >> 8;
4505         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4506         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4507 
4508         /* set up the HQD, this is similar to CP_RB0_CNTL */
4509         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4510         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4511                             (order_base_2(ring->ring_size / 4) - 1));
4512         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4513                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4514 #ifdef __BIG_ENDIAN
4515         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4516 #endif
4517         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4518         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4519         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4520         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4521         mqd->cp_hqd_pq_control = tmp;
4522 
4523         /* set the wb address whether it's enabled or not */
4524         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4525         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4526         mqd->cp_hqd_pq_rptr_report_addr_hi =
4527                 upper_32_bits(wb_gpu_addr) & 0xffff;
4528 
4529         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4530         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4531         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4532         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4533 
4534         tmp = 0;
4535         /* enable the doorbell if requested */
4536         if (ring->use_doorbell) {
4537                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4538                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4539                                 DOORBELL_OFFSET, ring->doorbell_index);
4540 
4541                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4542                                          DOORBELL_EN, 1);
4543                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4544                                          DOORBELL_SOURCE, 0);
4545                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4546                                          DOORBELL_HIT, 0);
4547         }
4548 
4549         mqd->cp_hqd_pq_doorbell_control = tmp;
4550 
4551         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4552         ring->wptr = 0;
4553         mqd->cp_hqd_pq_wptr = ring->wptr;
4554         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4555 
4556         /* set the vmid for the queue */
4557         mqd->cp_hqd_vmid = 0;
4558 
4559         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4560         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4561         mqd->cp_hqd_persistent_state = tmp;
4562 
4563         /* set MTYPE */
4564         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4565         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4566         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4567         mqd->cp_hqd_ib_control = tmp;
4568 
4569         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4570         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4571         mqd->cp_hqd_iq_timer = tmp;
4572 
4573         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4574         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4575         mqd->cp_hqd_ctx_save_control = tmp;
4576 
4577         /* defaults */
4578         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4579         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4580         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4581         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4582         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4583         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4584         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4585         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4586         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4587         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4588         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4589         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4590         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4591         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4592         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4593 
4594         /* activate the queue */
4595         mqd->cp_hqd_active = 1;
4596 
4597         return 0;
4598 }
4599 
4600 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4601                         struct vi_mqd *mqd)
4602 {
4603         uint32_t mqd_reg;
4604         uint32_t *mqd_data;
4605 
4606         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4607         mqd_data = &mqd->cp_mqd_base_addr_lo;
4608 
4609         /* disable wptr polling */
4610         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4611 
4612         /* program all HQD registers */
4613         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4614                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4615 
4616         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4617          * This is safe since EOP RPTR==WPTR for any inactive HQD
4618          * on ASICs that do not support context-save.
4619          * EOP writes/reads can start anywhere in the ring.
4620          */
4621         if (adev->asic_type != CHIP_TONGA) {
4622                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4623                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4624                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4625         }
4626 
4627         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4628                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4629 
4630         /* activate the HQD */
4631         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4632                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4633 
4634         return 0;
4635 }
4636 
4637 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4638 {
4639         struct amdgpu_device *adev = ring->adev;
4640         struct vi_mqd *mqd = ring->mqd_ptr;
4641         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4642 
4643         gfx_v8_0_kiq_setting(ring);
4644 
4645         if (adev->in_gpu_reset) { /* for GPU_RESET case */
4646                 /* reset MQD to a clean status */
4647                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4648                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4649 
4650                 /* reset ring buffer */
4651                 ring->wptr = 0;
4652                 amdgpu_ring_clear_ring(ring);
4653                 mutex_lock(&adev->srbm_mutex);
4654                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4655                 gfx_v8_0_mqd_commit(adev, mqd);
4656                 vi_srbm_select(adev, 0, 0, 0, 0);
4657                 mutex_unlock(&adev->srbm_mutex);
4658         } else {
4659                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4660                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4661                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4662                 mutex_lock(&adev->srbm_mutex);
4663                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4664                 gfx_v8_0_mqd_init(ring);
4665                 gfx_v8_0_mqd_commit(adev, mqd);
4666                 vi_srbm_select(adev, 0, 0, 0, 0);
4667                 mutex_unlock(&adev->srbm_mutex);
4668 
4669                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4670                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4671         }
4672 
4673         return 0;
4674 }
4675 
4676 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4677 {
4678         struct amdgpu_device *adev = ring->adev;
4679         struct vi_mqd *mqd = ring->mqd_ptr;
4680         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4681 
4682         if (!adev->in_gpu_reset && !adev->in_suspend) {
4683                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4684                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4685                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4686                 mutex_lock(&adev->srbm_mutex);
4687                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4688                 gfx_v8_0_mqd_init(ring);
4689                 vi_srbm_select(adev, 0, 0, 0, 0);
4690                 mutex_unlock(&adev->srbm_mutex);
4691 
4692                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4693                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4694         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4695                 /* reset MQD to a clean status */
4696                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4697                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4698                 /* reset ring buffer */
4699                 ring->wptr = 0;
4700                 amdgpu_ring_clear_ring(ring);
4701         } else {
4702                 amdgpu_ring_clear_ring(ring);
4703         }
4704         return 0;
4705 }
4706 
4707 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4708 {
4709         if (adev->asic_type > CHIP_TONGA) {
4710                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4711                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4712         }
4713         /* enable doorbells */
4714         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4715 }
4716 
4717 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4718 {
4719         struct amdgpu_ring *ring;
4720         int r;
4721 
4722         ring = &adev->gfx.kiq.ring;
4723 
4724         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4725         if (unlikely(r != 0))
4726                 return r;
4727 
4728         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4729         if (unlikely(r != 0))
4730                 return r;
4731 
4732         gfx_v8_0_kiq_init_queue(ring);
4733         amdgpu_bo_kunmap(ring->mqd_obj);
4734         ring->mqd_ptr = NULL;
4735         amdgpu_bo_unreserve(ring->mqd_obj);
4736         ring->sched.ready = true;
4737         return 0;
4738 }
4739 
4740 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4741 {
4742         struct amdgpu_ring *ring = NULL;
4743         int r = 0, i;
4744 
4745         gfx_v8_0_cp_compute_enable(adev, true);
4746 
4747         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4748                 ring = &adev->gfx.compute_ring[i];
4749 
4750                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4751                 if (unlikely(r != 0))
4752                         goto done;
4753                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4754                 if (!r) {
4755                         r = gfx_v8_0_kcq_init_queue(ring);
4756                         amdgpu_bo_kunmap(ring->mqd_obj);
4757                         ring->mqd_ptr = NULL;
4758                 }
4759                 amdgpu_bo_unreserve(ring->mqd_obj);
4760                 if (r)
4761                         goto done;
4762         }
4763 
4764         gfx_v8_0_set_mec_doorbell_range(adev);
4765 
4766         r = gfx_v8_0_kiq_kcq_enable(adev);
4767         if (r)
4768                 goto done;
4769 
4770 done:
4771         return r;
4772 }
4773 
4774 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4775 {
4776         int r, i;
4777         struct amdgpu_ring *ring;
4778 
4779         /* collect all the ring_tests here, gfx, kiq, compute */
4780         ring = &adev->gfx.gfx_ring[0];
4781         r = amdgpu_ring_test_helper(ring);
4782         if (r)
4783                 return r;
4784 
4785         ring = &adev->gfx.kiq.ring;
4786         r = amdgpu_ring_test_helper(ring);
4787         if (r)
4788                 return r;
4789 
4790         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4791                 ring = &adev->gfx.compute_ring[i];
4792                 amdgpu_ring_test_helper(ring);
4793         }
4794 
4795         return 0;
4796 }
4797 
4798 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4799 {
4800         int r;
4801 
4802         if (!(adev->flags & AMD_IS_APU))
4803                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4804 
4805         r = gfx_v8_0_kiq_resume(adev);
4806         if (r)
4807                 return r;
4808 
4809         r = gfx_v8_0_cp_gfx_resume(adev);
4810         if (r)
4811                 return r;
4812 
4813         r = gfx_v8_0_kcq_resume(adev);
4814         if (r)
4815                 return r;
4816 
4817         r = gfx_v8_0_cp_test_all_rings(adev);
4818         if (r)
4819                 return r;
4820 
4821         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4822 
4823         return 0;
4824 }
4825 
4826 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4827 {
4828         gfx_v8_0_cp_gfx_enable(adev, enable);
4829         gfx_v8_0_cp_compute_enable(adev, enable);
4830 }
4831 
4832 static int gfx_v8_0_hw_init(void *handle)
4833 {
4834         int r;
4835         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4836 
4837         gfx_v8_0_init_golden_registers(adev);
4838         gfx_v8_0_constants_init(adev);
4839 
4840         r = gfx_v8_0_csb_vram_pin(adev);
4841         if (r)
4842                 return r;
4843 
4844         r = adev->gfx.rlc.funcs->resume(adev);
4845         if (r)
4846                 return r;
4847 
4848         r = gfx_v8_0_cp_resume(adev);
4849 
4850         return r;
4851 }
4852 
4853 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4854 {
4855         int r, i;
4856         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4857 
4858         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4859         if (r)
4860                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4861 
4862         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4863                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4864 
4865                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4866                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4867                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4868                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4869                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4870                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4871                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4872                 amdgpu_ring_write(kiq_ring, 0);
4873                 amdgpu_ring_write(kiq_ring, 0);
4874                 amdgpu_ring_write(kiq_ring, 0);
4875         }
4876         r = amdgpu_ring_test_helper(kiq_ring);
4877         if (r)
4878                 DRM_ERROR("KCQ disable failed\n");
4879 
4880         return r;
4881 }
4882 
4883 static bool gfx_v8_0_is_idle(void *handle)
4884 {
4885         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4886 
4887         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4888                 || RREG32(mmGRBM_STATUS2) != 0x8)
4889                 return false;
4890         else
4891                 return true;
4892 }
4893 
4894 static bool gfx_v8_0_rlc_is_idle(void *handle)
4895 {
4896         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4897 
4898         if (RREG32(mmGRBM_STATUS2) != 0x8)
4899                 return false;
4900         else
4901                 return true;
4902 }
4903 
4904 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4905 {
4906         unsigned int i;
4907         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4908 
4909         for (i = 0; i < adev->usec_timeout; i++) {
4910                 if (gfx_v8_0_rlc_is_idle(handle))
4911                         return 0;
4912 
4913                 udelay(1);
4914         }
4915         return -ETIMEDOUT;
4916 }
4917 
4918 static int gfx_v8_0_wait_for_idle(void *handle)
4919 {
4920         unsigned int i;
4921         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4922 
4923         for (i = 0; i < adev->usec_timeout; i++) {
4924                 if (gfx_v8_0_is_idle(handle))
4925                         return 0;
4926 
4927                 udelay(1);
4928         }
4929         return -ETIMEDOUT;
4930 }
4931 
4932 static int gfx_v8_0_hw_fini(void *handle)
4933 {
4934         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4935 
4936         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4937         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4938 
4939         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4940 
4941         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4942 
4943         /* disable KCQ to avoid CPC touch memory not valid anymore */
4944         gfx_v8_0_kcq_disable(adev);
4945 
4946         if (amdgpu_sriov_vf(adev)) {
4947                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4948                 return 0;
4949         }
4950         amdgpu_gfx_rlc_enter_safe_mode(adev);
4951         if (!gfx_v8_0_wait_for_idle(adev))
4952                 gfx_v8_0_cp_enable(adev, false);
4953         else
4954                 pr_err("cp is busy, skip halt cp\n");
4955         if (!gfx_v8_0_wait_for_rlc_idle(adev))
4956                 adev->gfx.rlc.funcs->stop(adev);
4957         else
4958                 pr_err("rlc is busy, skip halt rlc\n");
4959         amdgpu_gfx_rlc_exit_safe_mode(adev);
4960 
4961         gfx_v8_0_csb_vram_unpin(adev);
4962 
4963         return 0;
4964 }
4965 
4966 static int gfx_v8_0_suspend(void *handle)
4967 {
4968         return gfx_v8_0_hw_fini(handle);
4969 }
4970 
4971 static int gfx_v8_0_resume(void *handle)
4972 {
4973         return gfx_v8_0_hw_init(handle);
4974 }
4975 
4976 static bool gfx_v8_0_check_soft_reset(void *handle)
4977 {
4978         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4979         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4980         u32 tmp;
4981 
4982         /* GRBM_STATUS */
4983         tmp = RREG32(mmGRBM_STATUS);
4984         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4985                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4986                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4987                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4988                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4989                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4990                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4991                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4992                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4993                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4994                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4995                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4996                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4997         }
4998 
4999         /* GRBM_STATUS2 */
5000         tmp = RREG32(mmGRBM_STATUS2);
5001         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5002                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5003                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5004 
5005         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5006             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5007             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5008                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5009                                                 SOFT_RESET_CPF, 1);
5010                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5011                                                 SOFT_RESET_CPC, 1);
5012                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5013                                                 SOFT_RESET_CPG, 1);
5014                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5015                                                 SOFT_RESET_GRBM, 1);
5016         }
5017 
5018         /* SRBM_STATUS */
5019         tmp = RREG32(mmSRBM_STATUS);
5020         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5021                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5022                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5023         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5024                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5025                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5026 
5027         if (grbm_soft_reset || srbm_soft_reset) {
5028                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5029                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5030                 return true;
5031         } else {
5032                 adev->gfx.grbm_soft_reset = 0;
5033                 adev->gfx.srbm_soft_reset = 0;
5034                 return false;
5035         }
5036 }
5037 
5038 static int gfx_v8_0_pre_soft_reset(void *handle)
5039 {
5040         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5041         u32 grbm_soft_reset = 0;
5042 
5043         if ((!adev->gfx.grbm_soft_reset) &&
5044             (!adev->gfx.srbm_soft_reset))
5045                 return 0;
5046 
5047         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5048 
5049         /* stop the rlc */
5050         adev->gfx.rlc.funcs->stop(adev);
5051 
5052         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5053             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5054                 /* Disable GFX parsing/prefetching */
5055                 gfx_v8_0_cp_gfx_enable(adev, false);
5056 
5057         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5058             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5059             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5060             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5061                 int i;
5062 
5063                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5064                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5065 
5066                         mutex_lock(&adev->srbm_mutex);
5067                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5068                         gfx_v8_0_deactivate_hqd(adev, 2);
5069                         vi_srbm_select(adev, 0, 0, 0, 0);
5070                         mutex_unlock(&adev->srbm_mutex);
5071                 }
5072                 /* Disable MEC parsing/prefetching */
5073                 gfx_v8_0_cp_compute_enable(adev, false);
5074         }
5075 
5076        return 0;
5077 }
5078 
5079 static int gfx_v8_0_soft_reset(void *handle)
5080 {
5081         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5082         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5083         u32 tmp;
5084 
5085         if ((!adev->gfx.grbm_soft_reset) &&
5086             (!adev->gfx.srbm_soft_reset))
5087                 return 0;
5088 
5089         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5090         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5091 
5092         if (grbm_soft_reset || srbm_soft_reset) {
5093                 tmp = RREG32(mmGMCON_DEBUG);
5094                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5095                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5096                 WREG32(mmGMCON_DEBUG, tmp);
5097                 udelay(50);
5098         }
5099 
5100         if (grbm_soft_reset) {
5101                 tmp = RREG32(mmGRBM_SOFT_RESET);
5102                 tmp |= grbm_soft_reset;
5103                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5104                 WREG32(mmGRBM_SOFT_RESET, tmp);
5105                 tmp = RREG32(mmGRBM_SOFT_RESET);
5106 
5107                 udelay(50);
5108 
5109                 tmp &= ~grbm_soft_reset;
5110                 WREG32(mmGRBM_SOFT_RESET, tmp);
5111                 tmp = RREG32(mmGRBM_SOFT_RESET);
5112         }
5113 
5114         if (srbm_soft_reset) {
5115                 tmp = RREG32(mmSRBM_SOFT_RESET);
5116                 tmp |= srbm_soft_reset;
5117                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5118                 WREG32(mmSRBM_SOFT_RESET, tmp);
5119                 tmp = RREG32(mmSRBM_SOFT_RESET);
5120 
5121                 udelay(50);
5122 
5123                 tmp &= ~srbm_soft_reset;
5124                 WREG32(mmSRBM_SOFT_RESET, tmp);
5125                 tmp = RREG32(mmSRBM_SOFT_RESET);
5126         }
5127 
5128         if (grbm_soft_reset || srbm_soft_reset) {
5129                 tmp = RREG32(mmGMCON_DEBUG);
5130                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5131                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5132                 WREG32(mmGMCON_DEBUG, tmp);
5133         }
5134 
5135         /* Wait a little for things to settle down */
5136         udelay(50);
5137 
5138         return 0;
5139 }
5140 
5141 static int gfx_v8_0_post_soft_reset(void *handle)
5142 {
5143         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5144         u32 grbm_soft_reset = 0;
5145 
5146         if ((!adev->gfx.grbm_soft_reset) &&
5147             (!adev->gfx.srbm_soft_reset))
5148                 return 0;
5149 
5150         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5151 
5152         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5153             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5154             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5155             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5156                 int i;
5157 
5158                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5159                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5160 
5161                         mutex_lock(&adev->srbm_mutex);
5162                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5163                         gfx_v8_0_deactivate_hqd(adev, 2);
5164                         vi_srbm_select(adev, 0, 0, 0, 0);
5165                         mutex_unlock(&adev->srbm_mutex);
5166                 }
5167                 gfx_v8_0_kiq_resume(adev);
5168                 gfx_v8_0_kcq_resume(adev);
5169         }
5170 
5171         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5172             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5173                 gfx_v8_0_cp_gfx_resume(adev);
5174 
5175         gfx_v8_0_cp_test_all_rings(adev);
5176 
5177         adev->gfx.rlc.funcs->start(adev);
5178 
5179         return 0;
5180 }
5181 
5182 /**
5183  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5184  *
5185  * @adev: amdgpu_device pointer
5186  *
5187  * Fetches a GPU clock counter snapshot.
5188  * Returns the 64 bit clock counter snapshot.
5189  */
5190 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5191 {
5192         uint64_t clock;
5193 
5194         mutex_lock(&adev->gfx.gpu_clock_mutex);
5195         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5196         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5197                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5198         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5199         return clock;
5200 }
5201 
5202 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5203                                           uint32_t vmid,
5204                                           uint32_t gds_base, uint32_t gds_size,
5205                                           uint32_t gws_base, uint32_t gws_size,
5206                                           uint32_t oa_base, uint32_t oa_size)
5207 {
5208         /* GDS Base */
5209         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5210         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5211                                 WRITE_DATA_DST_SEL(0)));
5212         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5213         amdgpu_ring_write(ring, 0);
5214         amdgpu_ring_write(ring, gds_base);
5215 
5216         /* GDS Size */
5217         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5218         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5219                                 WRITE_DATA_DST_SEL(0)));
5220         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5221         amdgpu_ring_write(ring, 0);
5222         amdgpu_ring_write(ring, gds_size);
5223 
5224         /* GWS */
5225         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5226         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5227                                 WRITE_DATA_DST_SEL(0)));
5228         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5229         amdgpu_ring_write(ring, 0);
5230         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5231 
5232         /* OA */
5233         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5234         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5235                                 WRITE_DATA_DST_SEL(0)));
5236         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5237         amdgpu_ring_write(ring, 0);
5238         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5239 }
5240 
5241 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5242 {
5243         WREG32(mmSQ_IND_INDEX,
5244                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5245                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5246                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5247                 (SQ_IND_INDEX__FORCE_READ_MASK));
5248         return RREG32(mmSQ_IND_DATA);
5249 }
5250 
5251 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5252                            uint32_t wave, uint32_t thread,
5253                            uint32_t regno, uint32_t num, uint32_t *out)
5254 {
5255         WREG32(mmSQ_IND_INDEX,
5256                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5257                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5258                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5259                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5260                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5261                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5262         while (num--)
5263                 *(out++) = RREG32(mmSQ_IND_DATA);
5264 }
5265 
5266 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5267 {
5268         /* type 0 wave data */
5269         dst[(*no_fields)++] = 0;
5270         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5271         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5272         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5273         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5274         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5275         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5276         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5277         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5278         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5279         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5280         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5281         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5282         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5283         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5284         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5285         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5286         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5287         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5288 }
5289 
5290 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5291                                      uint32_t wave, uint32_t start,
5292                                      uint32_t size, uint32_t *dst)
5293 {
5294         wave_read_regs(
5295                 adev, simd, wave, 0,
5296                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5297 }
5298 
5299 
5300 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5301         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5302         .select_se_sh = &gfx_v8_0_select_se_sh,
5303         .read_wave_data = &gfx_v8_0_read_wave_data,
5304         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5305         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5306 };
5307 
5308 static int gfx_v8_0_early_init(void *handle)
5309 {
5310         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5311 
5312         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5313         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5314         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5315         gfx_v8_0_set_ring_funcs(adev);
5316         gfx_v8_0_set_irq_funcs(adev);
5317         gfx_v8_0_set_gds_init(adev);
5318         gfx_v8_0_set_rlc_funcs(adev);
5319 
5320         return 0;
5321 }
5322 
5323 static int gfx_v8_0_late_init(void *handle)
5324 {
5325         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5326         int r;
5327 
5328         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5329         if (r)
5330                 return r;
5331 
5332         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5333         if (r)
5334                 return r;
5335 
5336         /* requires IBs so do in late init after IB pool is initialized */
5337         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5338         if (r)
5339                 return r;
5340 
5341         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5342         if (r) {
5343                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5344                 return r;
5345         }
5346 
5347         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5348         if (r) {
5349                 DRM_ERROR(
5350                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5351                         r);
5352                 return r;
5353         }
5354 
5355         return 0;
5356 }
5357 
5358 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5359                                                        bool enable)
5360 {
5361         if (((adev->asic_type == CHIP_POLARIS11) ||
5362             (adev->asic_type == CHIP_POLARIS12) ||
5363             (adev->asic_type == CHIP_VEGAM)) &&
5364             adev->powerplay.pp_funcs->set_powergating_by_smu)
5365                 /* Send msg to SMU via Powerplay */
5366                 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5367 
5368         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5369 }
5370 
5371 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5372                                                         bool enable)
5373 {
5374         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5375 }
5376 
5377 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5378                 bool enable)
5379 {
5380         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5381 }
5382 
5383 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5384                                           bool enable)
5385 {
5386         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5387 }
5388 
5389 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5390                                                 bool enable)
5391 {
5392         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5393 
5394         /* Read any GFX register to wake up GFX. */
5395         if (!enable)
5396                 RREG32(mmDB_RENDER_CONTROL);
5397 }
5398 
5399 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5400                                           bool enable)
5401 {
5402         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5403                 cz_enable_gfx_cg_power_gating(adev, true);
5404                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5405                         cz_enable_gfx_pipeline_power_gating(adev, true);
5406         } else {
5407                 cz_enable_gfx_cg_power_gating(adev, false);
5408                 cz_enable_gfx_pipeline_power_gating(adev, false);
5409         }
5410 }
5411 
5412 static int gfx_v8_0_set_powergating_state(void *handle,
5413                                           enum amd_powergating_state state)
5414 {
5415         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5416         bool enable = (state == AMD_PG_STATE_GATE);
5417 
5418         if (amdgpu_sriov_vf(adev))
5419                 return 0;
5420 
5421         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5422                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5423                                 AMD_PG_SUPPORT_CP |
5424                                 AMD_PG_SUPPORT_GFX_DMG))
5425                 amdgpu_gfx_rlc_enter_safe_mode(adev);
5426         switch (adev->asic_type) {
5427         case CHIP_CARRIZO:
5428         case CHIP_STONEY:
5429 
5430                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5431                         cz_enable_sck_slow_down_on_power_up(adev, true);
5432                         cz_enable_sck_slow_down_on_power_down(adev, true);
5433                 } else {
5434                         cz_enable_sck_slow_down_on_power_up(adev, false);
5435                         cz_enable_sck_slow_down_on_power_down(adev, false);
5436                 }
5437                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5438                         cz_enable_cp_power_gating(adev, true);
5439                 else
5440                         cz_enable_cp_power_gating(adev, false);
5441 
5442                 cz_update_gfx_cg_power_gating(adev, enable);
5443 
5444                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5445                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5446                 else
5447                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5448 
5449                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5450                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5451                 else
5452                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5453                 break;
5454         case CHIP_POLARIS11:
5455         case CHIP_POLARIS12:
5456         case CHIP_VEGAM:
5457                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5458                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5459                 else
5460                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5461 
5462                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5463                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5464                 else
5465                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5466 
5467                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5468                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5469                 else
5470                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5471                 break;
5472         default:
5473                 break;
5474         }
5475         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5476                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5477                                 AMD_PG_SUPPORT_CP |
5478                                 AMD_PG_SUPPORT_GFX_DMG))
5479                 amdgpu_gfx_rlc_exit_safe_mode(adev);
5480         return 0;
5481 }
5482 
5483 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5484 {
5485         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5486         int data;
5487 
5488         if (amdgpu_sriov_vf(adev))
5489                 *flags = 0;
5490 
5491         /* AMD_CG_SUPPORT_GFX_MGCG */
5492         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5493         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5494                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5495 
5496         /* AMD_CG_SUPPORT_GFX_CGLG */
5497         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5498         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5499                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5500 
5501         /* AMD_CG_SUPPORT_GFX_CGLS */
5502         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5503                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5504 
5505         /* AMD_CG_SUPPORT_GFX_CGTS */
5506         data = RREG32(mmCGTS_SM_CTRL_REG);
5507         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5508                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5509 
5510         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5511         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5512                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5513 
5514         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5515         data = RREG32(mmRLC_MEM_SLP_CNTL);
5516         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5517                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5518 
5519         /* AMD_CG_SUPPORT_GFX_CP_LS */
5520         data = RREG32(mmCP_MEM_SLP_CNTL);
5521         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5522                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5523 }
5524 
5525 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5526                                      uint32_t reg_addr, uint32_t cmd)
5527 {
5528         uint32_t data;
5529 
5530         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5531 
5532         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5533         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5534 
5535         data = RREG32(mmRLC_SERDES_WR_CTRL);
5536         if (adev->asic_type == CHIP_STONEY)
5537                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5538                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5539                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5540                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5541                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5542                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5543                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5544                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5545                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5546         else
5547                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5548                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5549                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5550                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5551                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5552                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5553                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5554                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5555                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5556                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5557                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5558         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5559                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5560                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5561                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5562 
5563         WREG32(mmRLC_SERDES_WR_CTRL, data);
5564 }
5565 
5566 #define MSG_ENTER_RLC_SAFE_MODE     1
5567 #define MSG_EXIT_RLC_SAFE_MODE      0
5568 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5569 #define RLC_GPR_REG2__REQ__SHIFT 0
5570 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5571 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5572 
5573 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5574 {
5575         uint32_t rlc_setting;
5576 
5577         rlc_setting = RREG32(mmRLC_CNTL);
5578         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5579                 return false;
5580 
5581         return true;
5582 }
5583 
5584 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5585 {
5586         uint32_t data;
5587         unsigned i;
5588         data = RREG32(mmRLC_CNTL);
5589         data |= RLC_SAFE_MODE__CMD_MASK;
5590         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5591         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5592         WREG32(mmRLC_SAFE_MODE, data);
5593 
5594         /* wait for RLC_SAFE_MODE */
5595         for (i = 0; i < adev->usec_timeout; i++) {
5596                 if ((RREG32(mmRLC_GPM_STAT) &
5597                      (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5598                       RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5599                     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5600                      RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5601                         break;
5602                 udelay(1);
5603         }
5604         for (i = 0; i < adev->usec_timeout; i++) {
5605                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5606                         break;
5607                 udelay(1);
5608         }
5609 }
5610 
5611 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5612 {
5613         uint32_t data;
5614         unsigned i;
5615 
5616         data = RREG32(mmRLC_CNTL);
5617         data |= RLC_SAFE_MODE__CMD_MASK;
5618         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5619         WREG32(mmRLC_SAFE_MODE, data);
5620 
5621         for (i = 0; i < adev->usec_timeout; i++) {
5622                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5623                         break;
5624                 udelay(1);
5625         }
5626 }
5627 
5628 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5629         .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5630         .set_safe_mode = gfx_v8_0_set_safe_mode,
5631         .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5632         .init = gfx_v8_0_rlc_init,
5633         .get_csb_size = gfx_v8_0_get_csb_size,
5634         .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5635         .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5636         .resume = gfx_v8_0_rlc_resume,
5637         .stop = gfx_v8_0_rlc_stop,
5638         .reset = gfx_v8_0_rlc_reset,
5639         .start = gfx_v8_0_rlc_start
5640 };
5641 
5642 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5643                                                       bool enable)
5644 {
5645         uint32_t temp, data;
5646 
5647         amdgpu_gfx_rlc_enter_safe_mode(adev);
5648 
5649         /* It is disabled by HW by default */
5650         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5651                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5652                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5653                                 /* 1 - RLC memory Light sleep */
5654                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5655 
5656                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5657                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5658                 }
5659 
5660                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5661                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5662                 if (adev->flags & AMD_IS_APU)
5663                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5664                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5665                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5666                 else
5667                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5668                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5669                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5670                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5671 
5672                 if (temp != data)
5673                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5674 
5675                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5676                 gfx_v8_0_wait_for_rlc_serdes(adev);
5677 
5678                 /* 5 - clear mgcg override */
5679                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5680 
5681                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5682                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5683                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5684                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5685                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5686                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5687                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5688                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5689                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5690                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5691                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5692                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5693                         if (temp != data)
5694                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5695                 }
5696                 udelay(50);
5697 
5698                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5699                 gfx_v8_0_wait_for_rlc_serdes(adev);
5700         } else {
5701                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5702                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5703                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5704                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5705                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5706                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5707                 if (temp != data)
5708                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5709 
5710                 /* 2 - disable MGLS in RLC */
5711                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5712                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5713                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5714                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5715                 }
5716 
5717                 /* 3 - disable MGLS in CP */
5718                 data = RREG32(mmCP_MEM_SLP_CNTL);
5719                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5720                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5721                         WREG32(mmCP_MEM_SLP_CNTL, data);
5722                 }
5723 
5724                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5725                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5726                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5727                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5728                 if (temp != data)
5729                         WREG32(mmCGTS_SM_CTRL_REG, data);
5730 
5731                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5732                 gfx_v8_0_wait_for_rlc_serdes(adev);
5733 
5734                 /* 6 - set mgcg override */
5735                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5736 
5737                 udelay(50);
5738 
5739                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5740                 gfx_v8_0_wait_for_rlc_serdes(adev);
5741         }
5742 
5743         amdgpu_gfx_rlc_exit_safe_mode(adev);
5744 }
5745 
5746 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5747                                                       bool enable)
5748 {
5749         uint32_t temp, temp1, data, data1;
5750 
5751         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5752 
5753         amdgpu_gfx_rlc_enter_safe_mode(adev);
5754 
5755         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5756                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5757                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5758                 if (temp1 != data1)
5759                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5760 
5761                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5762                 gfx_v8_0_wait_for_rlc_serdes(adev);
5763 
5764                 /* 2 - clear cgcg override */
5765                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5766 
5767                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5768                 gfx_v8_0_wait_for_rlc_serdes(adev);
5769 
5770                 /* 3 - write cmd to set CGLS */
5771                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5772 
5773                 /* 4 - enable cgcg */
5774                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5775 
5776                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5777                         /* enable cgls*/
5778                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5779 
5780                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5781                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5782 
5783                         if (temp1 != data1)
5784                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5785                 } else {
5786                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5787                 }
5788 
5789                 if (temp != data)
5790                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5791 
5792                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5793                  * Cmp_busy/GFX_Idle interrupts
5794                  */
5795                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5796         } else {
5797                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5798                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5799 
5800                 /* TEST CGCG */
5801                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5802                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5803                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5804                 if (temp1 != data1)
5805                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5806 
5807                 /* read gfx register to wake up cgcg */
5808                 RREG32(mmCB_CGTT_SCLK_CTRL);
5809                 RREG32(mmCB_CGTT_SCLK_CTRL);
5810                 RREG32(mmCB_CGTT_SCLK_CTRL);
5811                 RREG32(mmCB_CGTT_SCLK_CTRL);
5812 
5813                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5814                 gfx_v8_0_wait_for_rlc_serdes(adev);
5815 
5816                 /* write cmd to Set CGCG Overrride */
5817                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5818 
5819                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5820                 gfx_v8_0_wait_for_rlc_serdes(adev);
5821 
5822                 /* write cmd to Clear CGLS */
5823                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5824 
5825                 /* disable cgcg, cgls should be disabled too. */
5826                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5827                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5828                 if (temp != data)
5829                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5830                 /* enable interrupts again for PG */
5831                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5832         }
5833 
5834         gfx_v8_0_wait_for_rlc_serdes(adev);
5835 
5836         amdgpu_gfx_rlc_exit_safe_mode(adev);
5837 }
5838 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5839                                             bool enable)
5840 {
5841         if (enable) {
5842                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5843                  * ===  MGCG + MGLS + TS(CG/LS) ===
5844                  */
5845                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5846                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5847         } else {
5848                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5849                  * ===  CGCG + CGLS ===
5850                  */
5851                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5852                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5853         }
5854         return 0;
5855 }
5856 
5857 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5858                                           enum amd_clockgating_state state)
5859 {
5860         uint32_t msg_id, pp_state = 0;
5861         uint32_t pp_support_state = 0;
5862 
5863         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5864                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5865                         pp_support_state = PP_STATE_SUPPORT_LS;
5866                         pp_state = PP_STATE_LS;
5867                 }
5868                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5869                         pp_support_state |= PP_STATE_SUPPORT_CG;
5870                         pp_state |= PP_STATE_CG;
5871                 }
5872                 if (state == AMD_CG_STATE_UNGATE)
5873                         pp_state = 0;
5874 
5875                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5876                                 PP_BLOCK_GFX_CG,
5877                                 pp_support_state,
5878                                 pp_state);
5879                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5880                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5881         }
5882 
5883         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5884                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5885                         pp_support_state = PP_STATE_SUPPORT_LS;
5886                         pp_state = PP_STATE_LS;
5887                 }
5888 
5889                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5890                         pp_support_state |= PP_STATE_SUPPORT_CG;
5891                         pp_state |= PP_STATE_CG;
5892                 }
5893 
5894                 if (state == AMD_CG_STATE_UNGATE)
5895                         pp_state = 0;
5896 
5897                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5898                                 PP_BLOCK_GFX_MG,
5899                                 pp_support_state,
5900                                 pp_state);
5901                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5902                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5903         }
5904 
5905         return 0;
5906 }
5907 
5908 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5909                                           enum amd_clockgating_state state)
5910 {
5911 
5912         uint32_t msg_id, pp_state = 0;
5913         uint32_t pp_support_state = 0;
5914 
5915         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5916                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5917                         pp_support_state = PP_STATE_SUPPORT_LS;
5918                         pp_state = PP_STATE_LS;
5919                 }
5920                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5921                         pp_support_state |= PP_STATE_SUPPORT_CG;
5922                         pp_state |= PP_STATE_CG;
5923                 }
5924                 if (state == AMD_CG_STATE_UNGATE)
5925                         pp_state = 0;
5926 
5927                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5928                                 PP_BLOCK_GFX_CG,
5929                                 pp_support_state,
5930                                 pp_state);
5931                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5932                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5933         }
5934 
5935         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5936                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5937                         pp_support_state = PP_STATE_SUPPORT_LS;
5938                         pp_state = PP_STATE_LS;
5939                 }
5940                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5941                         pp_support_state |= PP_STATE_SUPPORT_CG;
5942                         pp_state |= PP_STATE_CG;
5943                 }
5944                 if (state == AMD_CG_STATE_UNGATE)
5945                         pp_state = 0;
5946 
5947                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5948                                 PP_BLOCK_GFX_3D,
5949                                 pp_support_state,
5950                                 pp_state);
5951                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5952                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5953         }
5954 
5955         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5956                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5957                         pp_support_state = PP_STATE_SUPPORT_LS;
5958                         pp_state = PP_STATE_LS;
5959                 }
5960 
5961                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5962                         pp_support_state |= PP_STATE_SUPPORT_CG;
5963                         pp_state |= PP_STATE_CG;
5964                 }
5965 
5966                 if (state == AMD_CG_STATE_UNGATE)
5967                         pp_state = 0;
5968 
5969                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5970                                 PP_BLOCK_GFX_MG,
5971                                 pp_support_state,
5972                                 pp_state);
5973                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5974                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5975         }
5976 
5977         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5978                 pp_support_state = PP_STATE_SUPPORT_LS;
5979 
5980                 if (state == AMD_CG_STATE_UNGATE)
5981                         pp_state = 0;
5982                 else
5983                         pp_state = PP_STATE_LS;
5984 
5985                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5986                                 PP_BLOCK_GFX_RLC,
5987                                 pp_support_state,
5988                                 pp_state);
5989                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5990                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5991         }
5992 
5993         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5994                 pp_support_state = PP_STATE_SUPPORT_LS;
5995 
5996                 if (state == AMD_CG_STATE_UNGATE)
5997                         pp_state = 0;
5998                 else
5999                         pp_state = PP_STATE_LS;
6000                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6001                         PP_BLOCK_GFX_CP,
6002                         pp_support_state,
6003                         pp_state);
6004                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6005                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6006         }
6007 
6008         return 0;
6009 }
6010 
6011 static int gfx_v8_0_set_clockgating_state(void *handle,
6012                                           enum amd_clockgating_state state)
6013 {
6014         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6015 
6016         if (amdgpu_sriov_vf(adev))
6017                 return 0;
6018 
6019         switch (adev->asic_type) {
6020         case CHIP_FIJI:
6021         case CHIP_CARRIZO:
6022         case CHIP_STONEY:
6023                 gfx_v8_0_update_gfx_clock_gating(adev,
6024                                                  state == AMD_CG_STATE_GATE);
6025                 break;
6026         case CHIP_TONGA:
6027                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6028                 break;
6029         case CHIP_POLARIS10:
6030         case CHIP_POLARIS11:
6031         case CHIP_POLARIS12:
6032         case CHIP_VEGAM:
6033                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6034                 break;
6035         default:
6036                 break;
6037         }
6038         return 0;
6039 }
6040 
6041 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6042 {
6043         return ring->adev->wb.wb[ring->rptr_offs];
6044 }
6045 
6046 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6047 {
6048         struct amdgpu_device *adev = ring->adev;
6049 
6050         if (ring->use_doorbell)
6051                 /* XXX check if swapping is necessary on BE */
6052                 return ring->adev->wb.wb[ring->wptr_offs];
6053         else
6054                 return RREG32(mmCP_RB0_WPTR);
6055 }
6056 
6057 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6058 {
6059         struct amdgpu_device *adev = ring->adev;
6060 
6061         if (ring->use_doorbell) {
6062                 /* XXX check if swapping is necessary on BE */
6063                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6064                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6065         } else {
6066                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6067                 (void)RREG32(mmCP_RB0_WPTR);
6068         }
6069 }
6070 
6071 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6072 {
6073         u32 ref_and_mask, reg_mem_engine;
6074 
6075         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6076             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6077                 switch (ring->me) {
6078                 case 1:
6079                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6080                         break;
6081                 case 2:
6082                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6083                         break;
6084                 default:
6085                         return;
6086                 }
6087                 reg_mem_engine = 0;
6088         } else {
6089                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6090                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6091         }
6092 
6093         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6094         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6095                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6096                                  reg_mem_engine));
6097         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6098         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6099         amdgpu_ring_write(ring, ref_and_mask);
6100         amdgpu_ring_write(ring, ref_and_mask);
6101         amdgpu_ring_write(ring, 0x20); /* poll interval */
6102 }
6103 
6104 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6105 {
6106         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6107         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6108                 EVENT_INDEX(4));
6109 
6110         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6111         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6112                 EVENT_INDEX(0));
6113 }
6114 
6115 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6116                                         struct amdgpu_job *job,
6117                                         struct amdgpu_ib *ib,
6118                                         uint32_t flags)
6119 {
6120         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6121         u32 header, control = 0;
6122 
6123         if (ib->flags & AMDGPU_IB_FLAG_CE)
6124                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6125         else
6126                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6127 
6128         control |= ib->length_dw | (vmid << 24);
6129 
6130         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6131                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6132 
6133                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6134                         gfx_v8_0_ring_emit_de_meta(ring);
6135         }
6136 
6137         amdgpu_ring_write(ring, header);
6138         amdgpu_ring_write(ring,
6139 #ifdef __BIG_ENDIAN
6140                           (2 << 0) |
6141 #endif
6142                           (ib->gpu_addr & 0xFFFFFFFC));
6143         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6144         amdgpu_ring_write(ring, control);
6145 }
6146 
6147 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6148                                           struct amdgpu_job *job,
6149                                           struct amdgpu_ib *ib,
6150                                           uint32_t flags)
6151 {
6152         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6153         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6154 
6155         /* Currently, there is a high possibility to get wave ID mismatch
6156          * between ME and GDS, leading to a hw deadlock, because ME generates
6157          * different wave IDs than the GDS expects. This situation happens
6158          * randomly when at least 5 compute pipes use GDS ordered append.
6159          * The wave IDs generated by ME are also wrong after suspend/resume.
6160          * Those are probably bugs somewhere else in the kernel driver.
6161          *
6162          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6163          * GDS to 0 for this ring (me/pipe).
6164          */
6165         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6166                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6167                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6168                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6169         }
6170 
6171         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6172         amdgpu_ring_write(ring,
6173 #ifdef __BIG_ENDIAN
6174                                 (2 << 0) |
6175 #endif
6176                                 (ib->gpu_addr & 0xFFFFFFFC));
6177         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6178         amdgpu_ring_write(ring, control);
6179 }
6180 
6181 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6182                                          u64 seq, unsigned flags)
6183 {
6184         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6185         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6186 
6187         /* Workaround for cache flush problems. First send a dummy EOP
6188          * event down the pipe with seq one below.
6189          */
6190         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6191         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6192                                  EOP_TC_ACTION_EN |
6193                                  EOP_TC_WB_ACTION_EN |
6194                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6195                                  EVENT_INDEX(5)));
6196         amdgpu_ring_write(ring, addr & 0xfffffffc);
6197         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6198                                 DATA_SEL(1) | INT_SEL(0));
6199         amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6200         amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6201 
6202         /* Then send the real EOP event down the pipe:
6203          * EVENT_WRITE_EOP - flush caches, send int */
6204         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6205         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6206                                  EOP_TC_ACTION_EN |
6207                                  EOP_TC_WB_ACTION_EN |
6208                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6209                                  EVENT_INDEX(5)));
6210         amdgpu_ring_write(ring, addr & 0xfffffffc);
6211         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6212                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6213         amdgpu_ring_write(ring, lower_32_bits(seq));
6214         amdgpu_ring_write(ring, upper_32_bits(seq));
6215 
6216 }
6217 
6218 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6219 {
6220         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6221         uint32_t seq = ring->fence_drv.sync_seq;
6222         uint64_t addr = ring->fence_drv.gpu_addr;
6223 
6224         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6225         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6226                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6227                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6228         amdgpu_ring_write(ring, addr & 0xfffffffc);
6229         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6230         amdgpu_ring_write(ring, seq);
6231         amdgpu_ring_write(ring, 0xffffffff);
6232         amdgpu_ring_write(ring, 4); /* poll interval */
6233 }
6234 
6235 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6236                                         unsigned vmid, uint64_t pd_addr)
6237 {
6238         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6239 
6240         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6241 
6242         /* wait for the invalidate to complete */
6243         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6244         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6245                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6246                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6247         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6248         amdgpu_ring_write(ring, 0);
6249         amdgpu_ring_write(ring, 0); /* ref */
6250         amdgpu_ring_write(ring, 0); /* mask */
6251         amdgpu_ring_write(ring, 0x20); /* poll interval */
6252 
6253         /* compute doesn't have PFP */
6254         if (usepfp) {
6255                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6256                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6257                 amdgpu_ring_write(ring, 0x0);
6258         }
6259 }
6260 
6261 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6262 {
6263         return ring->adev->wb.wb[ring->wptr_offs];
6264 }
6265 
6266 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6267 {
6268         struct amdgpu_device *adev = ring->adev;
6269 
6270         /* XXX check if swapping is necessary on BE */
6271         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6272         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6273 }
6274 
6275 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6276                                            bool acquire)
6277 {
6278         struct amdgpu_device *adev = ring->adev;
6279         int pipe_num, tmp, reg;
6280         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6281 
6282         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6283 
6284         /* first me only has 2 entries, GFX and HP3D */
6285         if (ring->me > 0)
6286                 pipe_num -= 2;
6287 
6288         reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6289         tmp = RREG32(reg);
6290         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6291         WREG32(reg, tmp);
6292 }
6293 
6294 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6295                                             struct amdgpu_ring *ring,
6296                                             bool acquire)
6297 {
6298         int i, pipe;
6299         bool reserve;
6300         struct amdgpu_ring *iring;
6301 
6302         mutex_lock(&adev->gfx.pipe_reserve_mutex);
6303         pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
6304         if (acquire)
6305                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6306         else
6307                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6308 
6309         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6310                 /* Clear all reservations - everyone reacquires all resources */
6311                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6312                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6313                                                        true);
6314 
6315                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6316                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6317                                                        true);
6318         } else {
6319                 /* Lower all pipes without a current reservation */
6320                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6321                         iring = &adev->gfx.gfx_ring[i];
6322                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
6323                                                            iring->me,
6324                                                            iring->pipe,
6325                                                            0);
6326                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6327                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6328                 }
6329 
6330                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6331                         iring = &adev->gfx.compute_ring[i];
6332                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
6333                                                            iring->me,
6334                                                            iring->pipe,
6335                                                            0);
6336                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6337                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6338                 }
6339         }
6340 
6341         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6342 }
6343 
6344 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6345                                       struct amdgpu_ring *ring,
6346                                       bool acquire)
6347 {
6348         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6349         uint32_t queue_priority = acquire ? 0xf : 0x0;
6350 
6351         mutex_lock(&adev->srbm_mutex);
6352         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6353 
6354         WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6355         WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6356 
6357         vi_srbm_select(adev, 0, 0, 0, 0);
6358         mutex_unlock(&adev->srbm_mutex);
6359 }
6360 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6361                                                enum drm_sched_priority priority)
6362 {
6363         struct amdgpu_device *adev = ring->adev;
6364         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6365 
6366         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6367                 return;
6368 
6369         gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6370         gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6371 }
6372 
6373 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6374                                              u64 addr, u64 seq,
6375                                              unsigned flags)
6376 {
6377         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6378         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6379 
6380         /* RELEASE_MEM - flush caches, send int */
6381         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6382         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6383                                  EOP_TC_ACTION_EN |
6384                                  EOP_TC_WB_ACTION_EN |
6385                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6386                                  EVENT_INDEX(5)));
6387         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6388         amdgpu_ring_write(ring, addr & 0xfffffffc);
6389         amdgpu_ring_write(ring, upper_32_bits(addr));
6390         amdgpu_ring_write(ring, lower_32_bits(seq));
6391         amdgpu_ring_write(ring, upper_32_bits(seq));
6392 }
6393 
6394 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6395                                          u64 seq, unsigned int flags)
6396 {
6397         /* we only allocate 32bit for each seq wb address */
6398         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6399 
6400         /* write fence seq to the "addr" */
6401         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6402         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6403                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6404         amdgpu_ring_write(ring, lower_32_bits(addr));
6405         amdgpu_ring_write(ring, upper_32_bits(addr));
6406         amdgpu_ring_write(ring, lower_32_bits(seq));
6407 
6408         if (flags & AMDGPU_FENCE_FLAG_INT) {
6409                 /* set register to trigger INT */
6410                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6411                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6412                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6413                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6414                 amdgpu_ring_write(ring, 0);
6415                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6416         }
6417 }
6418 
6419 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6420 {
6421         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6422         amdgpu_ring_write(ring, 0);
6423 }
6424 
6425 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6426 {
6427         uint32_t dw2 = 0;
6428 
6429         if (amdgpu_sriov_vf(ring->adev))
6430                 gfx_v8_0_ring_emit_ce_meta(ring);
6431 
6432         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6433         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6434                 gfx_v8_0_ring_emit_vgt_flush(ring);
6435                 /* set load_global_config & load_global_uconfig */
6436                 dw2 |= 0x8001;
6437                 /* set load_cs_sh_regs */
6438                 dw2 |= 0x01000000;
6439                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6440                 dw2 |= 0x10002;
6441 
6442                 /* set load_ce_ram if preamble presented */
6443                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6444                         dw2 |= 0x10000000;
6445         } else {
6446                 /* still load_ce_ram if this is the first time preamble presented
6447                  * although there is no context switch happens.
6448                  */
6449                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6450                         dw2 |= 0x10000000;
6451         }
6452 
6453         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6454         amdgpu_ring_write(ring, dw2);
6455         amdgpu_ring_write(ring, 0);
6456 }
6457 
6458 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6459 {
6460         unsigned ret;
6461 
6462         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6463         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6464         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6465         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6466         ret = ring->wptr & ring->buf_mask;
6467         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6468         return ret;
6469 }
6470 
6471 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6472 {
6473         unsigned cur;
6474 
6475         BUG_ON(offset > ring->buf_mask);
6476         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6477 
6478         cur = (ring->wptr & ring->buf_mask) - 1;
6479         if (likely(cur > offset))
6480                 ring->ring[offset] = cur - offset;
6481         else
6482                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6483 }
6484 
6485 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6486 {
6487         struct amdgpu_device *adev = ring->adev;
6488 
6489         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6490         amdgpu_ring_write(ring, 0 |     /* src: register*/
6491                                 (5 << 8) |      /* dst: memory */
6492                                 (1 << 20));     /* write confirm */
6493         amdgpu_ring_write(ring, reg);
6494         amdgpu_ring_write(ring, 0);
6495         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6496                                 adev->virt.reg_val_offs * 4));
6497         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6498                                 adev->virt.reg_val_offs * 4));
6499 }
6500 
6501 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6502                                   uint32_t val)
6503 {
6504         uint32_t cmd;
6505 
6506         switch (ring->funcs->type) {
6507         case AMDGPU_RING_TYPE_GFX:
6508                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6509                 break;
6510         case AMDGPU_RING_TYPE_KIQ:
6511                 cmd = 1 << 16; /* no inc addr */
6512                 break;
6513         default:
6514                 cmd = WR_CONFIRM;
6515                 break;
6516         }
6517 
6518         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6519         amdgpu_ring_write(ring, cmd);
6520         amdgpu_ring_write(ring, reg);
6521         amdgpu_ring_write(ring, 0);
6522         amdgpu_ring_write(ring, val);
6523 }
6524 
6525 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6526 {
6527         struct amdgpu_device *adev = ring->adev;
6528         uint32_t value = 0;
6529 
6530         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6531         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6532         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6533         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6534         WREG32(mmSQ_CMD, value);
6535 }
6536 
6537 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6538                                                  enum amdgpu_interrupt_state state)
6539 {
6540         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6541                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6542 }
6543 
6544 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6545                                                      int me, int pipe,
6546                                                      enum amdgpu_interrupt_state state)
6547 {
6548         u32 mec_int_cntl, mec_int_cntl_reg;
6549 
6550         /*
6551          * amdgpu controls only the first MEC. That's why this function only
6552          * handles the setting of interrupts for this specific MEC. All other
6553          * pipes' interrupts are set by amdkfd.
6554          */
6555 
6556         if (me == 1) {
6557                 switch (pipe) {
6558                 case 0:
6559                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6560                         break;
6561                 case 1:
6562                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6563                         break;
6564                 case 2:
6565                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6566                         break;
6567                 case 3:
6568                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6569                         break;
6570                 default:
6571                         DRM_DEBUG("invalid pipe %d\n", pipe);
6572                         return;
6573                 }
6574         } else {
6575                 DRM_DEBUG("invalid me %d\n", me);
6576                 return;
6577         }
6578 
6579         switch (state) {
6580         case AMDGPU_IRQ_STATE_DISABLE:
6581                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6582                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6583                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6584                 break;
6585         case AMDGPU_IRQ_STATE_ENABLE:
6586                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6587                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6588                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6589                 break;
6590         default:
6591                 break;
6592         }
6593 }
6594 
6595 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6596                                              struct amdgpu_irq_src *source,
6597                                              unsigned type,
6598                                              enum amdgpu_interrupt_state state)
6599 {
6600         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6601                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6602 
6603         return 0;
6604 }
6605 
6606 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6607                                               struct amdgpu_irq_src *source,
6608                                               unsigned type,
6609                                               enum amdgpu_interrupt_state state)
6610 {
6611         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6612                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6613 
6614         return 0;
6615 }
6616 
6617 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6618                                             struct amdgpu_irq_src *src,
6619                                             unsigned type,
6620                                             enum amdgpu_interrupt_state state)
6621 {
6622         switch (type) {
6623         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6624                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6625                 break;
6626         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6627                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6628                 break;
6629         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6630                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6631                 break;
6632         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6633                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6634                 break;
6635         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6636                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6637                 break;
6638         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6639                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6640                 break;
6641         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6642                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6643                 break;
6644         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6645                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6646                 break;
6647         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6648                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6649                 break;
6650         default:
6651                 break;
6652         }
6653         return 0;
6654 }
6655 
6656 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6657                                          struct amdgpu_irq_src *source,
6658                                          unsigned int type,
6659                                          enum amdgpu_interrupt_state state)
6660 {
6661         int enable_flag;
6662 
6663         switch (state) {
6664         case AMDGPU_IRQ_STATE_DISABLE:
6665                 enable_flag = 0;
6666                 break;
6667 
6668         case AMDGPU_IRQ_STATE_ENABLE:
6669                 enable_flag = 1;
6670                 break;
6671 
6672         default:
6673                 return -EINVAL;
6674         }
6675 
6676         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6677         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6678         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6679         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6680         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6681         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6682                      enable_flag);
6683         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6684                      enable_flag);
6685         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6686                      enable_flag);
6687         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6688                      enable_flag);
6689         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6690                      enable_flag);
6691         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6692                      enable_flag);
6693         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6694                      enable_flag);
6695         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6696                      enable_flag);
6697 
6698         return 0;
6699 }
6700 
6701 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6702                                      struct amdgpu_irq_src *source,
6703                                      unsigned int type,
6704                                      enum amdgpu_interrupt_state state)
6705 {
6706         int enable_flag;
6707 
6708         switch (state) {
6709         case AMDGPU_IRQ_STATE_DISABLE:
6710                 enable_flag = 1;
6711                 break;
6712 
6713         case AMDGPU_IRQ_STATE_ENABLE:
6714                 enable_flag = 0;
6715                 break;
6716 
6717         default:
6718                 return -EINVAL;
6719         }
6720 
6721         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6722                      enable_flag);
6723 
6724         return 0;
6725 }
6726 
6727 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6728                             struct amdgpu_irq_src *source,
6729                             struct amdgpu_iv_entry *entry)
6730 {
6731         int i;
6732         u8 me_id, pipe_id, queue_id;
6733         struct amdgpu_ring *ring;
6734 
6735         DRM_DEBUG("IH: CP EOP\n");
6736         me_id = (entry->ring_id & 0x0c) >> 2;
6737         pipe_id = (entry->ring_id & 0x03) >> 0;
6738         queue_id = (entry->ring_id & 0x70) >> 4;
6739 
6740         switch (me_id) {
6741         case 0:
6742                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6743                 break;
6744         case 1:
6745         case 2:
6746                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6747                         ring = &adev->gfx.compute_ring[i];
6748                         /* Per-queue interrupt is supported for MEC starting from VI.
6749                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6750                           */
6751                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6752                                 amdgpu_fence_process(ring);
6753                 }
6754                 break;
6755         }
6756         return 0;
6757 }
6758 
6759 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6760                            struct amdgpu_iv_entry *entry)
6761 {
6762         u8 me_id, pipe_id, queue_id;
6763         struct amdgpu_ring *ring;
6764         int i;
6765 
6766         me_id = (entry->ring_id & 0x0c) >> 2;
6767         pipe_id = (entry->ring_id & 0x03) >> 0;
6768         queue_id = (entry->ring_id & 0x70) >> 4;
6769 
6770         switch (me_id) {
6771         case 0:
6772                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6773                 break;
6774         case 1:
6775         case 2:
6776                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6777                         ring = &adev->gfx.compute_ring[i];
6778                         if (ring->me == me_id && ring->pipe == pipe_id &&
6779                             ring->queue == queue_id)
6780                                 drm_sched_fault(&ring->sched);
6781                 }
6782                 break;
6783         }
6784 }
6785 
6786 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6787                                  struct amdgpu_irq_src *source,
6788                                  struct amdgpu_iv_entry *entry)
6789 {
6790         DRM_ERROR("Illegal register access in command stream\n");
6791         gfx_v8_0_fault(adev, entry);
6792         return 0;
6793 }
6794 
6795 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6796                                   struct amdgpu_irq_src *source,
6797                                   struct amdgpu_iv_entry *entry)
6798 {
6799         DRM_ERROR("Illegal instruction in command stream\n");
6800         gfx_v8_0_fault(adev, entry);
6801         return 0;
6802 }
6803 
6804 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6805                                      struct amdgpu_irq_src *source,
6806                                      struct amdgpu_iv_entry *entry)
6807 {
6808         DRM_ERROR("CP EDC/ECC error detected.");
6809         return 0;
6810 }
6811 
6812 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6813 {
6814         u32 enc, se_id, sh_id, cu_id;
6815         char type[20];
6816         int sq_edc_source = -1;
6817 
6818         enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6819         se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6820 
6821         switch (enc) {
6822                 case 0:
6823                         DRM_INFO("SQ general purpose intr detected:"
6824                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6825                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6826                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6827                                         "wlt %d, thread_trace %d.\n",
6828                                         se_id,
6829                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6830                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6831                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6832                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6833                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6834                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6835                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6836                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6837                                         );
6838                         break;
6839                 case 1:
6840                 case 2:
6841 
6842                         cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6843                         sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6844 
6845                         /*
6846                          * This function can be called either directly from ISR
6847                          * or from BH in which case we can access SQ_EDC_INFO
6848                          * instance
6849                          */
6850                         if (in_task()) {
6851                                 mutex_lock(&adev->grbm_idx_mutex);
6852                                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6853 
6854                                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6855 
6856                                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6857                                 mutex_unlock(&adev->grbm_idx_mutex);
6858                         }
6859 
6860                         if (enc == 1)
6861                                 sprintf(type, "instruction intr");
6862                         else
6863                                 sprintf(type, "EDC/ECC error");
6864 
6865                         DRM_INFO(
6866                                 "SQ %s detected: "
6867                                         "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6868                                         "trap %s, sq_ed_info.source %s.\n",
6869                                         type, se_id, sh_id, cu_id,
6870                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6871                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6872                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6873                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6874                                         (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6875                                 );
6876                         break;
6877                 default:
6878                         DRM_ERROR("SQ invalid encoding type\n.");
6879         }
6880 }
6881 
6882 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6883 {
6884 
6885         struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6886         struct sq_work *sq_work = container_of(work, struct sq_work, work);
6887 
6888         gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6889 }
6890 
6891 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6892                            struct amdgpu_irq_src *source,
6893                            struct amdgpu_iv_entry *entry)
6894 {
6895         unsigned ih_data = entry->src_data[0];
6896 
6897         /*
6898          * Try to submit work so SQ_EDC_INFO can be accessed from
6899          * BH. If previous work submission hasn't finished yet
6900          * just print whatever info is possible directly from the ISR.
6901          */
6902         if (work_pending(&adev->gfx.sq_work.work)) {
6903                 gfx_v8_0_parse_sq_irq(adev, ih_data);
6904         } else {
6905                 adev->gfx.sq_work.ih_data = ih_data;
6906                 schedule_work(&adev->gfx.sq_work.work);
6907         }
6908 
6909         return 0;
6910 }
6911 
6912 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6913         .name = "gfx_v8_0",
6914         .early_init = gfx_v8_0_early_init,
6915         .late_init = gfx_v8_0_late_init,
6916         .sw_init = gfx_v8_0_sw_init,
6917         .sw_fini = gfx_v8_0_sw_fini,
6918         .hw_init = gfx_v8_0_hw_init,
6919         .hw_fini = gfx_v8_0_hw_fini,
6920         .suspend = gfx_v8_0_suspend,
6921         .resume = gfx_v8_0_resume,
6922         .is_idle = gfx_v8_0_is_idle,
6923         .wait_for_idle = gfx_v8_0_wait_for_idle,
6924         .check_soft_reset = gfx_v8_0_check_soft_reset,
6925         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6926         .soft_reset = gfx_v8_0_soft_reset,
6927         .post_soft_reset = gfx_v8_0_post_soft_reset,
6928         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6929         .set_powergating_state = gfx_v8_0_set_powergating_state,
6930         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6931 };
6932 
6933 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6934         .type = AMDGPU_RING_TYPE_GFX,
6935         .align_mask = 0xff,
6936         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6937         .support_64bit_ptrs = false,
6938         .get_rptr = gfx_v8_0_ring_get_rptr,
6939         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6940         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6941         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6942                 5 +  /* COND_EXEC */
6943                 7 +  /* PIPELINE_SYNC */
6944                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6945                 12 +  /* FENCE for VM_FLUSH */
6946                 20 + /* GDS switch */
6947                 4 + /* double SWITCH_BUFFER,
6948                        the first COND_EXEC jump to the place just
6949                            prior to this double SWITCH_BUFFER  */
6950                 5 + /* COND_EXEC */
6951                 7 +      /*     HDP_flush */
6952                 4 +      /*     VGT_flush */
6953                 14 + /* CE_META */
6954                 31 + /* DE_META */
6955                 3 + /* CNTX_CTRL */
6956                 5 + /* HDP_INVL */
6957                 12 + 12 + /* FENCE x2 */
6958                 2, /* SWITCH_BUFFER */
6959         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6960         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6961         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6962         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6963         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6964         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6965         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6966         .test_ring = gfx_v8_0_ring_test_ring,
6967         .test_ib = gfx_v8_0_ring_test_ib,
6968         .insert_nop = amdgpu_ring_insert_nop,
6969         .pad_ib = amdgpu_ring_generic_pad_ib,
6970         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6971         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6972         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6973         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6974         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6975         .soft_recovery = gfx_v8_0_ring_soft_recovery,
6976 };
6977 
6978 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6979         .type = AMDGPU_RING_TYPE_COMPUTE,
6980         .align_mask = 0xff,
6981         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6982         .support_64bit_ptrs = false,
6983         .get_rptr = gfx_v8_0_ring_get_rptr,
6984         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6985         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6986         .emit_frame_size =
6987                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6988                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6989                 5 + /* hdp_invalidate */
6990                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6991                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6992                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6993         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6994         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6995         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6996         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6997         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6998         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6999         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7000         .test_ring = gfx_v8_0_ring_test_ring,
7001         .test_ib = gfx_v8_0_ring_test_ib,
7002         .insert_nop = amdgpu_ring_insert_nop,
7003         .pad_ib = amdgpu_ring_generic_pad_ib,
7004         .set_priority = gfx_v8_0_ring_set_priority_compute,
7005         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7006 };
7007 
7008 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7009         .type = AMDGPU_RING_TYPE_KIQ,
7010         .align_mask = 0xff,
7011         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7012         .support_64bit_ptrs = false,
7013         .get_rptr = gfx_v8_0_ring_get_rptr,
7014         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7015         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7016         .emit_frame_size =
7017                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7018                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7019                 5 + /* hdp_invalidate */
7020                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7021                 17 + /* gfx_v8_0_ring_emit_vm_flush */
7022                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7023         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
7024         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7025         .test_ring = gfx_v8_0_ring_test_ring,
7026         .insert_nop = amdgpu_ring_insert_nop,
7027         .pad_ib = amdgpu_ring_generic_pad_ib,
7028         .emit_rreg = gfx_v8_0_ring_emit_rreg,
7029         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7030 };
7031 
7032 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7033 {
7034         int i;
7035 
7036         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7037 
7038         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7039                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7040 
7041         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7042                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7043 }
7044 
7045 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7046         .set = gfx_v8_0_set_eop_interrupt_state,
7047         .process = gfx_v8_0_eop_irq,
7048 };
7049 
7050 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7051         .set = gfx_v8_0_set_priv_reg_fault_state,
7052         .process = gfx_v8_0_priv_reg_irq,
7053 };
7054 
7055 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7056         .set = gfx_v8_0_set_priv_inst_fault_state,
7057         .process = gfx_v8_0_priv_inst_irq,
7058 };
7059 
7060 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7061         .set = gfx_v8_0_set_cp_ecc_int_state,
7062         .process = gfx_v8_0_cp_ecc_error_irq,
7063 };
7064 
7065 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7066         .set = gfx_v8_0_set_sq_int_state,
7067         .process = gfx_v8_0_sq_irq,
7068 };
7069 
7070 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7071 {
7072         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7073         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7074 
7075         adev->gfx.priv_reg_irq.num_types = 1;
7076         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7077 
7078         adev->gfx.priv_inst_irq.num_types = 1;
7079         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7080 
7081         adev->gfx.cp_ecc_error_irq.num_types = 1;
7082         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7083 
7084         adev->gfx.sq_irq.num_types = 1;
7085         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7086 }
7087 
7088 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7089 {
7090         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7091 }
7092 
7093 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7094 {
7095         /* init asci gds info */
7096         adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7097         adev->gds.gws_size = 64;
7098         adev->gds.oa_size = 16;
7099         adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7100 }
7101 
7102 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7103                                                  u32 bitmap)
7104 {
7105         u32 data;
7106 
7107         if (!bitmap)
7108                 return;
7109 
7110         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7111         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7112 
7113         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7114 }
7115 
7116 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7117 {
7118         u32 data, mask;
7119 
7120         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7121                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7122 
7123         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7124 
7125         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7126 }
7127 
7128 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7129 {
7130         int i, j, k, counter, active_cu_number = 0;
7131         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7132         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7133         unsigned disable_masks[4 * 2];
7134         u32 ao_cu_num;
7135 
7136         memset(cu_info, 0, sizeof(*cu_info));
7137 
7138         if (adev->flags & AMD_IS_APU)
7139                 ao_cu_num = 2;
7140         else
7141                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7142 
7143         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7144 
7145         mutex_lock(&adev->grbm_idx_mutex);
7146         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7147                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7148                         mask = 1;
7149                         ao_bitmap = 0;
7150                         counter = 0;
7151                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7152                         if (i < 4 && j < 2)
7153                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7154                                         adev, disable_masks[i * 2 + j]);
7155                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7156                         cu_info->bitmap[i][j] = bitmap;
7157 
7158                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7159                                 if (bitmap & mask) {
7160                                         if (counter < ao_cu_num)
7161                                                 ao_bitmap |= mask;
7162                                         counter ++;
7163                                 }
7164                                 mask <<= 1;
7165                         }
7166                         active_cu_number += counter;
7167                         if (i < 2 && j < 2)
7168                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7169                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7170                 }
7171         }
7172         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7173         mutex_unlock(&adev->grbm_idx_mutex);
7174 
7175         cu_info->number = active_cu_number;
7176         cu_info->ao_cu_mask = ao_cu_mask;
7177         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7178         cu_info->max_waves_per_simd = 10;
7179         cu_info->max_scratch_slots_per_cu = 32;
7180         cu_info->wave_front_size = 64;
7181         cu_info->lds_size = 64;
7182 }
7183 
7184 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7185 {
7186         .type = AMD_IP_BLOCK_TYPE_GFX,
7187         .major = 8,
7188         .minor = 0,
7189         .rev = 0,
7190         .funcs = &gfx_v8_0_ip_funcs,
7191 };
7192 
7193 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7194 {
7195         .type = AMD_IP_BLOCK_TYPE_GFX,
7196         .major = 8,
7197         .minor = 1,
7198         .rev = 0,
7199         .funcs = &gfx_v8_0_ip_funcs,
7200 };
7201 
7202 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7203 {
7204         uint64_t ce_payload_addr;
7205         int cnt_ce;
7206         union {
7207                 struct vi_ce_ib_state regular;
7208                 struct vi_ce_ib_state_chained_ib chained;
7209         } ce_payload = {};
7210 
7211         if (ring->adev->virt.chained_ib_support) {
7212                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7213                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7214                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7215         } else {
7216                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7217                         offsetof(struct vi_gfx_meta_data, ce_payload);
7218                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7219         }
7220 
7221         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7222         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7223                                 WRITE_DATA_DST_SEL(8) |
7224                                 WR_CONFIRM) |
7225                                 WRITE_DATA_CACHE_POLICY(0));
7226         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7227         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7228         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7229 }
7230 
7231 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7232 {
7233         uint64_t de_payload_addr, gds_addr, csa_addr;
7234         int cnt_de;
7235         union {
7236                 struct vi_de_ib_state regular;
7237                 struct vi_de_ib_state_chained_ib chained;
7238         } de_payload = {};
7239 
7240         csa_addr = amdgpu_csa_vaddr(ring->adev);
7241         gds_addr = csa_addr + 4096;
7242         if (ring->adev->virt.chained_ib_support) {
7243                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7244                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7245                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7246                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7247         } else {
7248                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7249                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7250                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7251                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7252         }
7253 
7254         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7255         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7256                                 WRITE_DATA_DST_SEL(8) |
7257                                 WR_CONFIRM) |
7258                                 WRITE_DATA_CACHE_POLICY(0));
7259         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7260         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7261         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7262 }

/* [<][>][^][v][top][bottom][index][help] */