root/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. gfx_v9_0_init_golden_registers
  2. gfx_v9_0_scratch_init
  3. gfx_v9_0_write_data_to_reg
  4. gfx_v9_0_wait_reg_mem
  5. gfx_v9_0_ring_test_ring
  6. gfx_v9_0_ring_test_ib
  7. gfx_v9_0_free_microcode
  8. gfx_v9_0_init_rlc_ext_microcode
  9. gfx_v9_0_check_fw_write_wait
  10. gfx_v9_0_check_if_need_gfxoff
  11. gfx_v9_0_init_cp_gfx_microcode
  12. gfx_v9_0_init_rlc_microcode
  13. gfx_v9_0_init_cp_compute_microcode
  14. gfx_v9_0_init_microcode
  15. gfx_v9_0_get_csb_size
  16. gfx_v9_0_get_csb_buffer
  17. gfx_v9_0_init_always_on_cu_mask
  18. gfx_v9_0_init_lbpw
  19. gfx_v9_4_init_lbpw
  20. gfx_v9_0_enable_lbpw
  21. gfx_v9_0_cp_jump_table_num
  22. gfx_v9_0_rlc_init
  23. gfx_v9_0_csb_vram_pin
  24. gfx_v9_0_csb_vram_unpin
  25. gfx_v9_0_mec_fini
  26. gfx_v9_0_mec_init
  27. wave_read_ind
  28. wave_read_regs
  29. gfx_v9_0_read_wave_data
  30. gfx_v9_0_read_wave_sgprs
  31. gfx_v9_0_read_wave_vgprs
  32. gfx_v9_0_select_me_pipe_q
  33. gfx_v9_0_gpu_early_init
  34. gfx_v9_0_ngg_create_buf
  35. gfx_v9_0_ngg_fini
  36. gfx_v9_0_ngg_init
  37. gfx_v9_0_ngg_en
  38. gfx_v9_0_compute_ring_init
  39. gfx_v9_0_sw_init
  40. gfx_v9_0_sw_fini
  41. gfx_v9_0_tiling_mode_table_init
  42. gfx_v9_0_select_se_sh
  43. gfx_v9_0_get_rb_active_bitmap
  44. gfx_v9_0_setup_rb
  45. gfx_v9_0_init_compute_vmid
  46. gfx_v9_0_init_gds_vmid
  47. gfx_v9_0_constants_init
  48. gfx_v9_0_wait_for_rlc_serdes
  49. gfx_v9_0_enable_gui_idle_interrupt
  50. gfx_v9_0_init_csb
  51. gfx_v9_1_parse_ind_reg_list
  52. gfx_v9_1_init_rlc_save_restore_list
  53. gfx_v9_0_enable_save_restore_machine
  54. pwr_10_0_gfxip_control_over_cgpg
  55. gfx_v9_0_init_gfx_power_gating
  56. gfx_v9_0_enable_sck_slow_down_on_power_up
  57. gfx_v9_0_enable_sck_slow_down_on_power_down
  58. gfx_v9_0_enable_cp_power_gating
  59. gfx_v9_0_enable_gfx_cg_power_gating
  60. gfx_v9_0_enable_gfx_pipeline_powergating
  61. gfx_v9_0_enable_gfx_static_mg_power_gating
  62. gfx_v9_0_enable_gfx_dynamic_mg_power_gating
  63. gfx_v9_0_init_pg
  64. gfx_v9_0_rlc_stop
  65. gfx_v9_0_rlc_reset
  66. gfx_v9_0_rlc_start
  67. gfx_v9_0_rlc_load_microcode
  68. gfx_v9_0_rlc_resume
  69. gfx_v9_0_cp_gfx_enable
  70. gfx_v9_0_cp_gfx_load_microcode
  71. gfx_v9_0_cp_gfx_start
  72. gfx_v9_0_cp_gfx_resume
  73. gfx_v9_0_cp_compute_enable
  74. gfx_v9_0_cp_compute_load_microcode
  75. gfx_v9_0_kiq_setting
  76. gfx_v9_0_kiq_kcq_enable
  77. gfx_v9_0_mqd_init
  78. gfx_v9_0_kiq_init_register
  79. gfx_v9_0_kiq_fini_register
  80. gfx_v9_0_kiq_init_queue
  81. gfx_v9_0_kcq_init_queue
  82. gfx_v9_0_kiq_resume
  83. gfx_v9_0_kcq_resume
  84. gfx_v9_0_cp_resume
  85. gfx_v9_0_cp_enable
  86. gfx_v9_0_hw_init
  87. gfx_v9_0_kcq_disable
  88. gfx_v9_0_hw_fini
  89. gfx_v9_0_suspend
  90. gfx_v9_0_resume
  91. gfx_v9_0_is_idle
  92. gfx_v9_0_wait_for_idle
  93. gfx_v9_0_soft_reset
  94. gfx_v9_0_get_gpu_clock_counter
  95. gfx_v9_0_ring_emit_gds_switch
  96. gfx_v9_0_do_edc_gds_workarounds
  97. gfx_v9_0_do_edc_gpr_workarounds
  98. gfx_v9_0_early_init
  99. gfx_v9_0_ecc_late_init
  100. gfx_v9_0_late_init
  101. gfx_v9_0_is_rlc_enabled
  102. gfx_v9_0_set_safe_mode
  103. gfx_v9_0_unset_safe_mode
  104. gfx_v9_0_update_gfx_cg_power_gating
  105. gfx_v9_0_update_gfx_mg_power_gating
  106. gfx_v9_0_update_medium_grain_clock_gating
  107. gfx_v9_0_update_3d_clock_gating
  108. gfx_v9_0_update_coarse_grain_clock_gating
  109. gfx_v9_0_update_gfx_clock_gating
  110. gfx_v9_0_set_powergating_state
  111. gfx_v9_0_set_clockgating_state
  112. gfx_v9_0_get_clockgating_state
  113. gfx_v9_0_ring_get_rptr_gfx
  114. gfx_v9_0_ring_get_wptr_gfx
  115. gfx_v9_0_ring_set_wptr_gfx
  116. gfx_v9_0_ring_emit_hdp_flush
  117. gfx_v9_0_ring_emit_ib_gfx
  118. gfx_v9_0_ring_emit_ib_compute
  119. gfx_v9_0_ring_emit_fence
  120. gfx_v9_0_ring_emit_pipeline_sync
  121. gfx_v9_0_ring_emit_vm_flush
  122. gfx_v9_0_ring_get_rptr_compute
  123. gfx_v9_0_ring_get_wptr_compute
  124. gfx_v9_0_ring_set_pipe_percent
  125. gfx_v9_0_pipe_reserve_resources
  126. gfx_v9_0_hqd_set_priority
  127. gfx_v9_0_ring_set_priority_compute
  128. gfx_v9_0_ring_set_wptr_compute
  129. gfx_v9_0_ring_emit_fence_kiq
  130. gfx_v9_ring_emit_sb
  131. gfx_v9_0_ring_emit_ce_meta
  132. gfx_v9_0_ring_emit_de_meta
  133. gfx_v9_0_ring_emit_tmz
  134. gfx_v9_ring_emit_cntxcntl
  135. gfx_v9_0_ring_emit_init_cond_exec
  136. gfx_v9_0_ring_emit_patch_cond_exec
  137. gfx_v9_0_ring_emit_rreg
  138. gfx_v9_0_ring_emit_wreg
  139. gfx_v9_0_ring_emit_reg_wait
  140. gfx_v9_0_ring_emit_reg_write_reg_wait
  141. gfx_v9_0_ring_soft_recovery
  142. gfx_v9_0_set_gfx_eop_interrupt_state
  143. gfx_v9_0_set_compute_eop_interrupt_state
  144. gfx_v9_0_set_priv_reg_fault_state
  145. gfx_v9_0_set_priv_inst_fault_state
  146. gfx_v9_0_set_cp_ecc_error_state
  147. gfx_v9_0_set_eop_interrupt_state
  148. gfx_v9_0_eop_irq
  149. gfx_v9_0_fault
  150. gfx_v9_0_priv_reg_irq
  151. gfx_v9_0_priv_inst_irq
  152. gfx_v9_0_process_ras_data_cb
  153. gfx_v9_0_ras_error_inject
  154. gfx_v9_0_query_ras_error_count
  155. gfx_v9_0_cp_ecc_error_irq
  156. gfx_v9_0_set_ring_funcs
  157. gfx_v9_0_set_irq_funcs
  158. gfx_v9_0_set_rlc_funcs
  159. gfx_v9_0_set_gds_init
  160. gfx_v9_0_set_user_cu_inactive_bitmap
  161. gfx_v9_0_get_cu_active_bitmap
  162. gfx_v9_0_get_cu_info

   1 /*
   2  * Copyright 2016 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  */
  23 
  24 #include <linux/delay.h>
  25 #include <linux/kernel.h>
  26 #include <linux/firmware.h>
  27 #include <linux/module.h>
  28 #include <linux/pci.h>
  29 
  30 #include "amdgpu.h"
  31 #include "amdgpu_gfx.h"
  32 #include "soc15.h"
  33 #include "soc15d.h"
  34 #include "amdgpu_atomfirmware.h"
  35 #include "amdgpu_pm.h"
  36 
  37 #include "gc/gc_9_0_offset.h"
  38 #include "gc/gc_9_0_sh_mask.h"
  39 
  40 #include "vega10_enum.h"
  41 #include "hdp/hdp_4_0_offset.h"
  42 
  43 #include "soc15_common.h"
  44 #include "clearstate_gfx9.h"
  45 #include "v9_structs.h"
  46 
  47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
  48 
  49 #include "amdgpu_ras.h"
  50 
  51 #define GFX9_NUM_GFX_RINGS     1
  52 #define GFX9_MEC_HPD_SIZE 4096
  53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
  54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
  55 
  56 #define mmPWR_MISC_CNTL_STATUS                                  0x0183
  57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX                         0
  58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT        0x0
  59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT          0x1
  60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK          0x00000001L
  61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK            0x00000006L
  62 
  63 #define mmGCEA_PROBE_MAP                        0x070c
  64 #define mmGCEA_PROBE_MAP_BASE_IDX               0
  65 
  66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
  67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
  68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
  69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
  70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
  71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
  72 
  73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
  74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
  75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
  76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
  77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
  78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
  79 
  80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
  81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
  82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
  83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
  84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
  85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
  86 
  87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
  88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
  89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
  90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
  91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
  92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
  93 
  94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
  95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
  96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
  97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
  98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
  99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
 100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
 101 
 102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
 103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
 104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
 105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
 106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
 107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
 108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
 109 
 110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
 111 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
 112 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
 113 
 114 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
 115 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
 116 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
 117 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
 118 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
 119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
 120 
 121 #define mmTCP_CHAN_STEER_0_ARCT                                                         0x0b03
 122 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX                                                        0
 123 #define mmTCP_CHAN_STEER_1_ARCT                                                         0x0b04
 124 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX                                                        0
 125 #define mmTCP_CHAN_STEER_2_ARCT                                                         0x0b09
 126 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX                                                        0
 127 #define mmTCP_CHAN_STEER_3_ARCT                                                         0x0b0a
 128 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX                                                        0
 129 #define mmTCP_CHAN_STEER_4_ARCT                                                         0x0b0b
 130 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX                                                        0
 131 #define mmTCP_CHAN_STEER_5_ARCT                                                         0x0b0c
 132 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                        0
 133 
 134 enum ta_ras_gfx_subblock {
 135         /*CPC*/
 136         TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
 137         TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
 138         TA_RAS_BLOCK__GFX_CPC_UCODE,
 139         TA_RAS_BLOCK__GFX_DC_STATE_ME1,
 140         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
 141         TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
 142         TA_RAS_BLOCK__GFX_DC_STATE_ME2,
 143         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
 144         TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
 145         TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
 146         /* CPF*/
 147         TA_RAS_BLOCK__GFX_CPF_INDEX_START,
 148         TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
 149         TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
 150         TA_RAS_BLOCK__GFX_CPF_TAG,
 151         TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
 152         /* CPG*/
 153         TA_RAS_BLOCK__GFX_CPG_INDEX_START,
 154         TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
 155         TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
 156         TA_RAS_BLOCK__GFX_CPG_TAG,
 157         TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
 158         /* GDS*/
 159         TA_RAS_BLOCK__GFX_GDS_INDEX_START,
 160         TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
 161         TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
 162         TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
 163         TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
 164         TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
 165         TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
 166         /* SPI*/
 167         TA_RAS_BLOCK__GFX_SPI_SR_MEM,
 168         /* SQ*/
 169         TA_RAS_BLOCK__GFX_SQ_INDEX_START,
 170         TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
 171         TA_RAS_BLOCK__GFX_SQ_LDS_D,
 172         TA_RAS_BLOCK__GFX_SQ_LDS_I,
 173         TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
 174         TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
 175         /* SQC (3 ranges)*/
 176         TA_RAS_BLOCK__GFX_SQC_INDEX_START,
 177         /* SQC range 0*/
 178         TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
 179         TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
 180                 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
 181         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
 182         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
 183         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
 184         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
 185         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
 186         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
 187         TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
 188                 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
 189         /* SQC range 1*/
 190         TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
 191         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
 192                 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
 193         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
 194         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
 195         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
 196         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
 197         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
 198         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
 199         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
 200         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
 201         TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
 202                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
 203         /* SQC range 2*/
 204         TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
 205         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
 206                 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
 207         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
 208         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
 209         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
 210         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
 211         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
 212         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
 213         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
 214         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
 215         TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
 216                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
 217         TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
 218         /* TA*/
 219         TA_RAS_BLOCK__GFX_TA_INDEX_START,
 220         TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
 221         TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
 222         TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
 223         TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
 224         TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
 225         TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
 226         /* TCA*/
 227         TA_RAS_BLOCK__GFX_TCA_INDEX_START,
 228         TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
 229         TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
 230         TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
 231         /* TCC (5 sub-ranges)*/
 232         TA_RAS_BLOCK__GFX_TCC_INDEX_START,
 233         /* TCC range 0*/
 234         TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
 235         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
 236         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
 237         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
 238         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
 239         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
 240         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
 241         TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
 242         TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
 243         TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
 244         /* TCC range 1*/
 245         TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
 246         TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
 247         TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
 248         TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
 249                 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
 250         /* TCC range 2*/
 251         TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
 252         TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
 253         TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
 254         TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
 255         TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
 256         TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
 257         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
 258         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
 259         TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
 260         TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
 261                 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
 262         /* TCC range 3*/
 263         TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
 264         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
 265         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
 266         TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
 267                 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
 268         /* TCC range 4*/
 269         TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
 270         TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
 271                 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
 272         TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
 273         TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
 274                 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
 275         TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
 276         /* TCI*/
 277         TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
 278         /* TCP*/
 279         TA_RAS_BLOCK__GFX_TCP_INDEX_START,
 280         TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
 281         TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
 282         TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
 283         TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
 284         TA_RAS_BLOCK__GFX_TCP_DB_RAM,
 285         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
 286         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
 287         TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
 288         /* TD*/
 289         TA_RAS_BLOCK__GFX_TD_INDEX_START,
 290         TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
 291         TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
 292         TA_RAS_BLOCK__GFX_TD_CS_FIFO,
 293         TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
 294         /* EA (3 sub-ranges)*/
 295         TA_RAS_BLOCK__GFX_EA_INDEX_START,
 296         /* EA range 0*/
 297         TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
 298         TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
 299         TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
 300         TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
 301         TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
 302         TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
 303         TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
 304         TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
 305         TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
 306         TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
 307         /* EA range 1*/
 308         TA_RAS_BLOCK__GFX_EA_INDEX1_START,
 309         TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
 310         TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
 311         TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
 312         TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
 313         TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
 314         TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
 315         TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
 316         TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
 317         /* EA range 2*/
 318         TA_RAS_BLOCK__GFX_EA_INDEX2_START,
 319         TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
 320         TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
 321         TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
 322         TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
 323         TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
 324         TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
 325         /* UTC VM L2 bank*/
 326         TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
 327         /* UTC VM walker*/
 328         TA_RAS_BLOCK__UTC_VML2_WALKER,
 329         /* UTC ATC L2 2MB cache*/
 330         TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
 331         /* UTC ATC L2 4KB cache*/
 332         TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
 333         TA_RAS_BLOCK__GFX_MAX
 334 };
 335 
 336 struct ras_gfx_subblock {
 337         unsigned char *name;
 338         int ta_subblock;
 339         int hw_supported_error_type;
 340         int sw_supported_error_type;
 341 };
 342 
 343 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
 344         [AMDGPU_RAS_BLOCK__##subblock] = {                                     \
 345                 #subblock,                                                     \
 346                 TA_RAS_BLOCK__##subblock,                                      \
 347                 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
 348                 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
 349         }
 350 
 351 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
 352         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
 353         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
 354         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
 355         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
 356         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
 357         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
 358         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
 359         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
 360         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
 361         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
 362         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
 363         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
 364         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
 365         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
 366         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
 367         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
 368         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
 369                              0),
 370         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
 371                              0),
 372         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
 373         AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
 374         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
 375         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
 376         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
 377         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
 378         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
 379         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
 380                              0, 0),
 381         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
 382                              0),
 383         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
 384                              0, 0),
 385         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
 386                              0),
 387         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
 388                              0, 0),
 389         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
 390                              0),
 391         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
 392                              1),
 393         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
 394                              0, 0, 0),
 395         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
 396                              0),
 397         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
 398                              0),
 399         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
 400                              0),
 401         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
 402                              0),
 403         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
 404                              0),
 405         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
 406                              0, 0),
 407         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
 408                              0),
 409         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
 410                              0),
 411         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
 412                              0, 0, 0),
 413         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
 414                              0),
 415         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
 416                              0),
 417         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
 418                              0),
 419         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
 420                              0),
 421         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
 422                              0),
 423         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
 424                              0, 0),
 425         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
 426                              0),
 427         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
 428         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 429         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 430         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 431         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 432         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
 433         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 434         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
 435         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
 436                              1),
 437         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
 438                              1),
 439         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
 440                              1),
 441         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
 442                              0),
 443         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
 444                              0),
 445         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
 446         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
 447         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
 448         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
 449         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
 450         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
 451         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 452         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
 453         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
 454         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
 455         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
 456         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
 457                              0),
 458         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 459         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
 460                              0),
 461         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
 462                              0, 0),
 463         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
 464                              0),
 465         AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
 466         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
 467         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
 468         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 469         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
 470         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
 471         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
 472         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
 473         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
 474         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
 475         AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 476         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
 477         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 478         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 479         AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 480         AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 481         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 482         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 483         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 484         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 485         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 486         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 487         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 488         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 489         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 490         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 491         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
 492         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
 493         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
 494         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
 495         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
 496         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
 497         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
 498         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
 499 };
 500 
 501 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
 502 {
 503         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
 504         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
 505         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
 506         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
 507         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
 508         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 509         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
 510         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
 511         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
 512         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
 513         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
 514         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
 515         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 516         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
 517         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
 518         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
 519         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
 520         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
 521         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
 522         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
 523 };
 524 
 525 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
 526 {
 527         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
 528         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
 529         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
 530         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
 531         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
 532         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
 533         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
 534         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
 535         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
 536         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
 537         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
 538         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
 539         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
 540         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
 541         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
 542         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
 543         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
 544         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
 545 };
 546 
 547 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
 548 {
 549         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
 550         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
 551         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
 552         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
 553         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
 554         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
 555         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
 556         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
 557         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
 558         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
 559         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
 560 };
 561 
 562 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
 563 {
 564         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
 565         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
 566         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
 567         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
 568         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
 569         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
 570         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
 571         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
 572         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
 573         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 574         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
 575         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
 576         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
 577         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
 578         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
 579         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 580         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
 581         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
 582         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
 583         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
 584         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
 585         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
 586         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
 587         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
 588 };
 589 
 590 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
 591 {
 592         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
 593         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
 594         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
 595         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
 596         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
 597         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
 598         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
 599 };
 600 
 601 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
 602 {
 603         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
 604         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
 605         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
 606         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
 607         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
 608         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
 609         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
 610         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
 611         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
 612         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
 613         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
 614         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
 615         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
 616         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
 617         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
 618         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
 619         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
 620         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
 621         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
 622 };
 623 
 624 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
 625 {
 626         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
 627         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
 628         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
 629         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
 630         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
 631         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
 632         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
 633         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 634         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 635         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
 636         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
 637         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
 638 };
 639 
 640 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
 641 {
 642         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
 643         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
 644         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
 645 };
 646 
 647 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
 648 {
 649         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
 650         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
 651         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
 652         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
 653         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 654         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
 655         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
 656         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
 657         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
 658         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
 659         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
 660         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 661         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
 662         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
 663         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
 664         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
 665 };
 666 
 667 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
 668 {
 669         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
 670         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
 671         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
 672         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
 673         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
 674         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
 675         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
 676         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
 677         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
 678         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
 679         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
 680         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
 681         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
 682 };
 683 
 684 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
 685 {
 686         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
 687         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
 688         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
 689         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
 690         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
 691         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
 692         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
 693         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
 694 };
 695 
 696 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
 697 {
 698         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 699         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 700         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 701         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 702         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 703         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 704         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 705         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 706 };
 707 
 708 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
 709 {
 710         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 711         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 712         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 713         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 714         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 715         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 716         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 717         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 718 };
 719 
 720 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
 721 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
 722 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
 723 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
 724 
 725 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
 726 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
 727 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
 728 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
 729 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
 730                                  struct amdgpu_cu_info *cu_info);
 731 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
 732 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
 733 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
 734 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
 735 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
 736                                           void *ras_error_status);
 737 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
 738                                      void *inject_if);
 739 
 740 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
 741 {
 742         switch (adev->asic_type) {
 743         case CHIP_VEGA10:
 744                 soc15_program_register_sequence(adev,
 745                                                 golden_settings_gc_9_0,
 746                                                 ARRAY_SIZE(golden_settings_gc_9_0));
 747                 soc15_program_register_sequence(adev,
 748                                                 golden_settings_gc_9_0_vg10,
 749                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
 750                 break;
 751         case CHIP_VEGA12:
 752                 soc15_program_register_sequence(adev,
 753                                                 golden_settings_gc_9_2_1,
 754                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
 755                 soc15_program_register_sequence(adev,
 756                                                 golden_settings_gc_9_2_1_vg12,
 757                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
 758                 break;
 759         case CHIP_VEGA20:
 760                 soc15_program_register_sequence(adev,
 761                                                 golden_settings_gc_9_0,
 762                                                 ARRAY_SIZE(golden_settings_gc_9_0));
 763                 soc15_program_register_sequence(adev,
 764                                                 golden_settings_gc_9_0_vg20,
 765                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
 766                 break;
 767         case CHIP_ARCTURUS:
 768                 soc15_program_register_sequence(adev,
 769                                                 golden_settings_gc_9_4_1_arct,
 770                                                 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
 771                 break;
 772         case CHIP_RAVEN:
 773                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
 774                                                 ARRAY_SIZE(golden_settings_gc_9_1));
 775                 if (adev->rev_id >= 8)
 776                         soc15_program_register_sequence(adev,
 777                                                         golden_settings_gc_9_1_rv2,
 778                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
 779                 else
 780                         soc15_program_register_sequence(adev,
 781                                                         golden_settings_gc_9_1_rv1,
 782                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
 783                 break;
 784          case CHIP_RENOIR:
 785                 soc15_program_register_sequence(adev,
 786                                                 golden_settings_gc_9_1_rn,
 787                                                 ARRAY_SIZE(golden_settings_gc_9_1_rn));
 788                 return; /* for renoir, don't need common goldensetting */
 789         default:
 790                 break;
 791         }
 792 
 793         if (adev->asic_type != CHIP_ARCTURUS)
 794                 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
 795                                                 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
 796 }
 797 
 798 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
 799 {
 800         adev->gfx.scratch.num_reg = 8;
 801         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
 802         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
 803 }
 804 
 805 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
 806                                        bool wc, uint32_t reg, uint32_t val)
 807 {
 808         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
 809         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
 810                                 WRITE_DATA_DST_SEL(0) |
 811                                 (wc ? WR_CONFIRM : 0));
 812         amdgpu_ring_write(ring, reg);
 813         amdgpu_ring_write(ring, 0);
 814         amdgpu_ring_write(ring, val);
 815 }
 816 
 817 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
 818                                   int mem_space, int opt, uint32_t addr0,
 819                                   uint32_t addr1, uint32_t ref, uint32_t mask,
 820                                   uint32_t inv)
 821 {
 822         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
 823         amdgpu_ring_write(ring,
 824                                  /* memory (1) or register (0) */
 825                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
 826                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
 827                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
 828                                  WAIT_REG_MEM_ENGINE(eng_sel)));
 829 
 830         if (mem_space)
 831                 BUG_ON(addr0 & 0x3); /* Dword align */
 832         amdgpu_ring_write(ring, addr0);
 833         amdgpu_ring_write(ring, addr1);
 834         amdgpu_ring_write(ring, ref);
 835         amdgpu_ring_write(ring, mask);
 836         amdgpu_ring_write(ring, inv); /* poll interval */
 837 }
 838 
 839 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
 840 {
 841         struct amdgpu_device *adev = ring->adev;
 842         uint32_t scratch;
 843         uint32_t tmp = 0;
 844         unsigned i;
 845         int r;
 846 
 847         r = amdgpu_gfx_scratch_get(adev, &scratch);
 848         if (r)
 849                 return r;
 850 
 851         WREG32(scratch, 0xCAFEDEAD);
 852         r = amdgpu_ring_alloc(ring, 3);
 853         if (r)
 854                 goto error_free_scratch;
 855 
 856         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
 857         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
 858         amdgpu_ring_write(ring, 0xDEADBEEF);
 859         amdgpu_ring_commit(ring);
 860 
 861         for (i = 0; i < adev->usec_timeout; i++) {
 862                 tmp = RREG32(scratch);
 863                 if (tmp == 0xDEADBEEF)
 864                         break;
 865                 udelay(1);
 866         }
 867 
 868         if (i >= adev->usec_timeout)
 869                 r = -ETIMEDOUT;
 870 
 871 error_free_scratch:
 872         amdgpu_gfx_scratch_free(adev, scratch);
 873         return r;
 874 }
 875 
 876 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 877 {
 878         struct amdgpu_device *adev = ring->adev;
 879         struct amdgpu_ib ib;
 880         struct dma_fence *f = NULL;
 881 
 882         unsigned index;
 883         uint64_t gpu_addr;
 884         uint32_t tmp;
 885         long r;
 886 
 887         r = amdgpu_device_wb_get(adev, &index);
 888         if (r)
 889                 return r;
 890 
 891         gpu_addr = adev->wb.gpu_addr + (index * 4);
 892         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 893         memset(&ib, 0, sizeof(ib));
 894         r = amdgpu_ib_get(adev, NULL, 16, &ib);
 895         if (r)
 896                 goto err1;
 897 
 898         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
 899         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
 900         ib.ptr[2] = lower_32_bits(gpu_addr);
 901         ib.ptr[3] = upper_32_bits(gpu_addr);
 902         ib.ptr[4] = 0xDEADBEEF;
 903         ib.length_dw = 5;
 904 
 905         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 906         if (r)
 907                 goto err2;
 908 
 909         r = dma_fence_wait_timeout(f, false, timeout);
 910         if (r == 0) {
 911                 r = -ETIMEDOUT;
 912                 goto err2;
 913         } else if (r < 0) {
 914                 goto err2;
 915         }
 916 
 917         tmp = adev->wb.wb[index];
 918         if (tmp == 0xDEADBEEF)
 919                 r = 0;
 920         else
 921                 r = -EINVAL;
 922 
 923 err2:
 924         amdgpu_ib_free(adev, &ib, NULL);
 925         dma_fence_put(f);
 926 err1:
 927         amdgpu_device_wb_free(adev, index);
 928         return r;
 929 }
 930 
 931 
 932 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
 933 {
 934         release_firmware(adev->gfx.pfp_fw);
 935         adev->gfx.pfp_fw = NULL;
 936         release_firmware(adev->gfx.me_fw);
 937         adev->gfx.me_fw = NULL;
 938         release_firmware(adev->gfx.ce_fw);
 939         adev->gfx.ce_fw = NULL;
 940         release_firmware(adev->gfx.rlc_fw);
 941         adev->gfx.rlc_fw = NULL;
 942         release_firmware(adev->gfx.mec_fw);
 943         adev->gfx.mec_fw = NULL;
 944         release_firmware(adev->gfx.mec2_fw);
 945         adev->gfx.mec2_fw = NULL;
 946 
 947         kfree(adev->gfx.rlc.register_list_format);
 948 }
 949 
 950 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
 951 {
 952         const struct rlc_firmware_header_v2_1 *rlc_hdr;
 953 
 954         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
 955         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
 956         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
 957         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
 958         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
 959         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
 960         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
 961         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
 962         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
 963         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
 964         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
 965         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
 966         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
 967         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
 968                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
 969 }
 970 
 971 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
 972 {
 973         adev->gfx.me_fw_write_wait = false;
 974         adev->gfx.mec_fw_write_wait = false;
 975 
 976         if ((adev->gfx.mec_fw_version < 0x000001a5) ||
 977             (adev->gfx.mec_feature_version < 46) ||
 978             (adev->gfx.pfp_fw_version < 0x000000b7) ||
 979             (adev->gfx.pfp_feature_version < 46))
 980                 DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
 981                               GRBM requires 1-cycle delay in cp firmware\n");
 982 
 983         switch (adev->asic_type) {
 984         case CHIP_VEGA10:
 985                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
 986                     (adev->gfx.me_feature_version >= 42) &&
 987                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
 988                     (adev->gfx.pfp_feature_version >= 42))
 989                         adev->gfx.me_fw_write_wait = true;
 990 
 991                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
 992                     (adev->gfx.mec_feature_version >= 42))
 993                         adev->gfx.mec_fw_write_wait = true;
 994                 break;
 995         case CHIP_VEGA12:
 996                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
 997                     (adev->gfx.me_feature_version >= 44) &&
 998                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
 999                     (adev->gfx.pfp_feature_version >= 44))
1000                         adev->gfx.me_fw_write_wait = true;
1001 
1002                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1003                     (adev->gfx.mec_feature_version >= 44))
1004                         adev->gfx.mec_fw_write_wait = true;
1005                 break;
1006         case CHIP_VEGA20:
1007                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1008                     (adev->gfx.me_feature_version >= 44) &&
1009                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1010                     (adev->gfx.pfp_feature_version >= 44))
1011                         adev->gfx.me_fw_write_wait = true;
1012 
1013                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1014                     (adev->gfx.mec_feature_version >= 44))
1015                         adev->gfx.mec_fw_write_wait = true;
1016                 break;
1017         case CHIP_RAVEN:
1018                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1019                     (adev->gfx.me_feature_version >= 42) &&
1020                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1021                     (adev->gfx.pfp_feature_version >= 42))
1022                         adev->gfx.me_fw_write_wait = true;
1023 
1024                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1025                     (adev->gfx.mec_feature_version >= 42))
1026                         adev->gfx.mec_fw_write_wait = true;
1027                 break;
1028         default:
1029                 adev->gfx.me_fw_write_wait = true;
1030                 adev->gfx.mec_fw_write_wait = true;
1031                 break;
1032         }
1033 }
1034 
1035 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1036 {
1037         switch (adev->asic_type) {
1038         case CHIP_VEGA10:
1039         case CHIP_VEGA12:
1040         case CHIP_VEGA20:
1041                 break;
1042         case CHIP_RAVEN:
1043                 if (!(adev->rev_id >= 0x8 ||
1044                       adev->pdev->device == 0x15d8) &&
1045                     (adev->pm.fw_version < 0x41e2b || /* not raven1 fresh */
1046                      !adev->gfx.rlc.is_rlc_v2_1)) /* without rlc save restore ucodes */
1047                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1048 
1049                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1050                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1051                                 AMD_PG_SUPPORT_CP |
1052                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1053                 break;
1054         case CHIP_RENOIR:
1055                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1056                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1057                                 AMD_PG_SUPPORT_CP |
1058                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1059                 break;
1060         default:
1061                 break;
1062         }
1063 }
1064 
1065 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1066                                           const char *chip_name)
1067 {
1068         char fw_name[30];
1069         int err;
1070         struct amdgpu_firmware_info *info = NULL;
1071         const struct common_firmware_header *header = NULL;
1072         const struct gfx_firmware_header_v1_0 *cp_hdr;
1073 
1074         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1075         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1076         if (err)
1077                 goto out;
1078         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1079         if (err)
1080                 goto out;
1081         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1082         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1083         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1084 
1085         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1086         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1087         if (err)
1088                 goto out;
1089         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1090         if (err)
1091                 goto out;
1092         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1093         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1094         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1095 
1096         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1097         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1098         if (err)
1099                 goto out;
1100         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1101         if (err)
1102                 goto out;
1103         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1104         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1105         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1106 
1107         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1108                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1109                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1110                 info->fw = adev->gfx.pfp_fw;
1111                 header = (const struct common_firmware_header *)info->fw->data;
1112                 adev->firmware.fw_size +=
1113                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1114 
1115                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1116                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1117                 info->fw = adev->gfx.me_fw;
1118                 header = (const struct common_firmware_header *)info->fw->data;
1119                 adev->firmware.fw_size +=
1120                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1121 
1122                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1123                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1124                 info->fw = adev->gfx.ce_fw;
1125                 header = (const struct common_firmware_header *)info->fw->data;
1126                 adev->firmware.fw_size +=
1127                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1128         }
1129 
1130 out:
1131         if (err) {
1132                 dev_err(adev->dev,
1133                         "gfx9: Failed to load firmware \"%s\"\n",
1134                         fw_name);
1135                 release_firmware(adev->gfx.pfp_fw);
1136                 adev->gfx.pfp_fw = NULL;
1137                 release_firmware(adev->gfx.me_fw);
1138                 adev->gfx.me_fw = NULL;
1139                 release_firmware(adev->gfx.ce_fw);
1140                 adev->gfx.ce_fw = NULL;
1141         }
1142         return err;
1143 }
1144 
1145 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1146                                           const char *chip_name)
1147 {
1148         char fw_name[30];
1149         int err;
1150         struct amdgpu_firmware_info *info = NULL;
1151         const struct common_firmware_header *header = NULL;
1152         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1153         unsigned int *tmp = NULL;
1154         unsigned int i = 0;
1155         uint16_t version_major;
1156         uint16_t version_minor;
1157         uint32_t smu_version;
1158 
1159         /*
1160          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1161          * instead of picasso_rlc.bin.
1162          * Judgment method:
1163          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1164          *          or revision >= 0xD8 && revision <= 0xDF
1165          * otherwise is PCO FP5
1166          */
1167         if (!strcmp(chip_name, "picasso") &&
1168                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1169                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1170                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1171         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1172                 (smu_version >= 0x41e2b))
1173                 /**
1174                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1175                 */
1176                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1177         else
1178                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1179         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1180         if (err)
1181                 goto out;
1182         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1183         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1184 
1185         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1186         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1187         if (version_major == 2 && version_minor == 1)
1188                 adev->gfx.rlc.is_rlc_v2_1 = true;
1189 
1190         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1191         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1192         adev->gfx.rlc.save_and_restore_offset =
1193                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1194         adev->gfx.rlc.clear_state_descriptor_offset =
1195                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1196         adev->gfx.rlc.avail_scratch_ram_locations =
1197                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1198         adev->gfx.rlc.reg_restore_list_size =
1199                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1200         adev->gfx.rlc.reg_list_format_start =
1201                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1202         adev->gfx.rlc.reg_list_format_separate_start =
1203                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1204         adev->gfx.rlc.starting_offsets_start =
1205                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1206         adev->gfx.rlc.reg_list_format_size_bytes =
1207                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1208         adev->gfx.rlc.reg_list_size_bytes =
1209                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1210         adev->gfx.rlc.register_list_format =
1211                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1212                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1213         if (!adev->gfx.rlc.register_list_format) {
1214                 err = -ENOMEM;
1215                 goto out;
1216         }
1217 
1218         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1219                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1220         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1221                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1222 
1223         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1224 
1225         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1226                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1227         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1228                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1229 
1230         if (adev->gfx.rlc.is_rlc_v2_1)
1231                 gfx_v9_0_init_rlc_ext_microcode(adev);
1232 
1233         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1234                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1235                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1236                 info->fw = adev->gfx.rlc_fw;
1237                 header = (const struct common_firmware_header *)info->fw->data;
1238                 adev->firmware.fw_size +=
1239                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1240 
1241                 if (adev->gfx.rlc.is_rlc_v2_1 &&
1242                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1243                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1244                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1245                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1246                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1247                         info->fw = adev->gfx.rlc_fw;
1248                         adev->firmware.fw_size +=
1249                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1250 
1251                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1252                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1253                         info->fw = adev->gfx.rlc_fw;
1254                         adev->firmware.fw_size +=
1255                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1256 
1257                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1258                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1259                         info->fw = adev->gfx.rlc_fw;
1260                         adev->firmware.fw_size +=
1261                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1262                 }
1263         }
1264 
1265 out:
1266         if (err) {
1267                 dev_err(adev->dev,
1268                         "gfx9: Failed to load firmware \"%s\"\n",
1269                         fw_name);
1270                 release_firmware(adev->gfx.rlc_fw);
1271                 adev->gfx.rlc_fw = NULL;
1272         }
1273         return err;
1274 }
1275 
1276 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1277                                           const char *chip_name)
1278 {
1279         char fw_name[30];
1280         int err;
1281         struct amdgpu_firmware_info *info = NULL;
1282         const struct common_firmware_header *header = NULL;
1283         const struct gfx_firmware_header_v1_0 *cp_hdr;
1284 
1285         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1286         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1287         if (err)
1288                 goto out;
1289         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1290         if (err)
1291                 goto out;
1292         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1293         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1294         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1295 
1296 
1297         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1298         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1299         if (!err) {
1300                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1301                 if (err)
1302                         goto out;
1303                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1304                 adev->gfx.mec2_fw->data;
1305                 adev->gfx.mec2_fw_version =
1306                 le32_to_cpu(cp_hdr->header.ucode_version);
1307                 adev->gfx.mec2_feature_version =
1308                 le32_to_cpu(cp_hdr->ucode_feature_version);
1309         } else {
1310                 err = 0;
1311                 adev->gfx.mec2_fw = NULL;
1312         }
1313 
1314         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1315                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1316                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1317                 info->fw = adev->gfx.mec_fw;
1318                 header = (const struct common_firmware_header *)info->fw->data;
1319                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1320                 adev->firmware.fw_size +=
1321                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1322 
1323                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1324                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1325                 info->fw = adev->gfx.mec_fw;
1326                 adev->firmware.fw_size +=
1327                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1328 
1329                 if (adev->gfx.mec2_fw) {
1330                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1331                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1332                         info->fw = adev->gfx.mec2_fw;
1333                         header = (const struct common_firmware_header *)info->fw->data;
1334                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1335                         adev->firmware.fw_size +=
1336                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1337 
1338                         /* TODO: Determine if MEC2 JT FW loading can be removed
1339                                  for all GFX V9 asic and above */
1340                         if (adev->asic_type != CHIP_ARCTURUS) {
1341                                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1342                                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1343                                 info->fw = adev->gfx.mec2_fw;
1344                                 adev->firmware.fw_size +=
1345                                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1346                                         PAGE_SIZE);
1347                         }
1348                 }
1349         }
1350 
1351 out:
1352         gfx_v9_0_check_if_need_gfxoff(adev);
1353         gfx_v9_0_check_fw_write_wait(adev);
1354         if (err) {
1355                 dev_err(adev->dev,
1356                         "gfx9: Failed to load firmware \"%s\"\n",
1357                         fw_name);
1358                 release_firmware(adev->gfx.mec_fw);
1359                 adev->gfx.mec_fw = NULL;
1360                 release_firmware(adev->gfx.mec2_fw);
1361                 adev->gfx.mec2_fw = NULL;
1362         }
1363         return err;
1364 }
1365 
1366 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1367 {
1368         const char *chip_name;
1369         int r;
1370 
1371         DRM_DEBUG("\n");
1372 
1373         switch (adev->asic_type) {
1374         case CHIP_VEGA10:
1375                 chip_name = "vega10";
1376                 break;
1377         case CHIP_VEGA12:
1378                 chip_name = "vega12";
1379                 break;
1380         case CHIP_VEGA20:
1381                 chip_name = "vega20";
1382                 break;
1383         case CHIP_RAVEN:
1384                 if (adev->rev_id >= 8)
1385                         chip_name = "raven2";
1386                 else if (adev->pdev->device == 0x15d8)
1387                         chip_name = "picasso";
1388                 else
1389                         chip_name = "raven";
1390                 break;
1391         case CHIP_ARCTURUS:
1392                 chip_name = "arcturus";
1393                 break;
1394         case CHIP_RENOIR:
1395                 chip_name = "renoir";
1396                 break;
1397         default:
1398                 BUG();
1399         }
1400 
1401         /* No CPG in Arcturus */
1402         if (adev->asic_type != CHIP_ARCTURUS) {
1403                 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1404                 if (r)
1405                         return r;
1406         }
1407 
1408         r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1409         if (r)
1410                 return r;
1411 
1412         r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1413         if (r)
1414                 return r;
1415 
1416         return r;
1417 }
1418 
1419 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1420 {
1421         u32 count = 0;
1422         const struct cs_section_def *sect = NULL;
1423         const struct cs_extent_def *ext = NULL;
1424 
1425         /* begin clear state */
1426         count += 2;
1427         /* context control state */
1428         count += 3;
1429 
1430         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1431                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1432                         if (sect->id == SECT_CONTEXT)
1433                                 count += 2 + ext->reg_count;
1434                         else
1435                                 return 0;
1436                 }
1437         }
1438 
1439         /* end clear state */
1440         count += 2;
1441         /* clear state */
1442         count += 2;
1443 
1444         return count;
1445 }
1446 
1447 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1448                                     volatile u32 *buffer)
1449 {
1450         u32 count = 0, i;
1451         const struct cs_section_def *sect = NULL;
1452         const struct cs_extent_def *ext = NULL;
1453 
1454         if (adev->gfx.rlc.cs_data == NULL)
1455                 return;
1456         if (buffer == NULL)
1457                 return;
1458 
1459         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1460         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1461 
1462         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1463         buffer[count++] = cpu_to_le32(0x80000000);
1464         buffer[count++] = cpu_to_le32(0x80000000);
1465 
1466         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1467                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1468                         if (sect->id == SECT_CONTEXT) {
1469                                 buffer[count++] =
1470                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1471                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1472                                                 PACKET3_SET_CONTEXT_REG_START);
1473                                 for (i = 0; i < ext->reg_count; i++)
1474                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1475                         } else {
1476                                 return;
1477                         }
1478                 }
1479         }
1480 
1481         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1482         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1483 
1484         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1485         buffer[count++] = cpu_to_le32(0);
1486 }
1487 
1488 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1489 {
1490         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1491         uint32_t pg_always_on_cu_num = 2;
1492         uint32_t always_on_cu_num;
1493         uint32_t i, j, k;
1494         uint32_t mask, cu_bitmap, counter;
1495 
1496         if (adev->flags & AMD_IS_APU)
1497                 always_on_cu_num = 4;
1498         else if (adev->asic_type == CHIP_VEGA12)
1499                 always_on_cu_num = 8;
1500         else
1501                 always_on_cu_num = 12;
1502 
1503         mutex_lock(&adev->grbm_idx_mutex);
1504         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1505                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1506                         mask = 1;
1507                         cu_bitmap = 0;
1508                         counter = 0;
1509                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1510 
1511                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1512                                 if (cu_info->bitmap[i][j] & mask) {
1513                                         if (counter == pg_always_on_cu_num)
1514                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1515                                         if (counter < always_on_cu_num)
1516                                                 cu_bitmap |= mask;
1517                                         else
1518                                                 break;
1519                                         counter++;
1520                                 }
1521                                 mask <<= 1;
1522                         }
1523 
1524                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1525                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1526                 }
1527         }
1528         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1529         mutex_unlock(&adev->grbm_idx_mutex);
1530 }
1531 
1532 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1533 {
1534         uint32_t data;
1535 
1536         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1537         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1538         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1539         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1540         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1541 
1542         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1543         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1544 
1545         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1546         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1547 
1548         mutex_lock(&adev->grbm_idx_mutex);
1549         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1550         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1551         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1552 
1553         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1554         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1555         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1556         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1557         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1558 
1559         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1560         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1561         data &= 0x0000FFFF;
1562         data |= 0x00C00000;
1563         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1564 
1565         /*
1566          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1567          * programmed in gfx_v9_0_init_always_on_cu_mask()
1568          */
1569 
1570         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1571          * but used for RLC_LB_CNTL configuration */
1572         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1573         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1574         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1575         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1576         mutex_unlock(&adev->grbm_idx_mutex);
1577 
1578         gfx_v9_0_init_always_on_cu_mask(adev);
1579 }
1580 
1581 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1582 {
1583         uint32_t data;
1584 
1585         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1586         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1587         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1588         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1589         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1590 
1591         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1592         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1593 
1594         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1595         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1596 
1597         mutex_lock(&adev->grbm_idx_mutex);
1598         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1599         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1600         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1601 
1602         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1603         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1604         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1605         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1606         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1607 
1608         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1609         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1610         data &= 0x0000FFFF;
1611         data |= 0x00C00000;
1612         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1613 
1614         /*
1615          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1616          * programmed in gfx_v9_0_init_always_on_cu_mask()
1617          */
1618 
1619         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1620          * but used for RLC_LB_CNTL configuration */
1621         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1622         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1623         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1624         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1625         mutex_unlock(&adev->grbm_idx_mutex);
1626 
1627         gfx_v9_0_init_always_on_cu_mask(adev);
1628 }
1629 
1630 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1631 {
1632         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1633 }
1634 
1635 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1636 {
1637         return 5;
1638 }
1639 
1640 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1641 {
1642         const struct cs_section_def *cs_data;
1643         int r;
1644 
1645         adev->gfx.rlc.cs_data = gfx9_cs_data;
1646 
1647         cs_data = adev->gfx.rlc.cs_data;
1648 
1649         if (cs_data) {
1650                 /* init clear state block */
1651                 r = amdgpu_gfx_rlc_init_csb(adev);
1652                 if (r)
1653                         return r;
1654         }
1655 
1656         if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1657                 /* TODO: double check the cp_table_size for RV */
1658                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1659                 r = amdgpu_gfx_rlc_init_cpt(adev);
1660                 if (r)
1661                         return r;
1662         }
1663 
1664         switch (adev->asic_type) {
1665         case CHIP_RAVEN:
1666                 gfx_v9_0_init_lbpw(adev);
1667                 break;
1668         case CHIP_VEGA20:
1669                 gfx_v9_4_init_lbpw(adev);
1670                 break;
1671         default:
1672                 break;
1673         }
1674 
1675         return 0;
1676 }
1677 
1678 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1679 {
1680         int r;
1681 
1682         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1683         if (unlikely(r != 0))
1684                 return r;
1685 
1686         r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1687                         AMDGPU_GEM_DOMAIN_VRAM);
1688         if (!r)
1689                 adev->gfx.rlc.clear_state_gpu_addr =
1690                         amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1691 
1692         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1693 
1694         return r;
1695 }
1696 
1697 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1698 {
1699         int r;
1700 
1701         if (!adev->gfx.rlc.clear_state_obj)
1702                 return;
1703 
1704         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1705         if (likely(r == 0)) {
1706                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1707                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1708         }
1709 }
1710 
1711 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1712 {
1713         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1714         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1715 }
1716 
1717 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1718 {
1719         int r;
1720         u32 *hpd;
1721         const __le32 *fw_data;
1722         unsigned fw_size;
1723         u32 *fw;
1724         size_t mec_hpd_size;
1725 
1726         const struct gfx_firmware_header_v1_0 *mec_hdr;
1727 
1728         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1729 
1730         /* take ownership of the relevant compute queues */
1731         amdgpu_gfx_compute_queue_acquire(adev);
1732         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1733 
1734         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1735                                       AMDGPU_GEM_DOMAIN_VRAM,
1736                                       &adev->gfx.mec.hpd_eop_obj,
1737                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1738                                       (void **)&hpd);
1739         if (r) {
1740                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1741                 gfx_v9_0_mec_fini(adev);
1742                 return r;
1743         }
1744 
1745         memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1746 
1747         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1748         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1749 
1750         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1751 
1752         fw_data = (const __le32 *)
1753                 (adev->gfx.mec_fw->data +
1754                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1755         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1756 
1757         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1758                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1759                                       &adev->gfx.mec.mec_fw_obj,
1760                                       &adev->gfx.mec.mec_fw_gpu_addr,
1761                                       (void **)&fw);
1762         if (r) {
1763                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1764                 gfx_v9_0_mec_fini(adev);
1765                 return r;
1766         }
1767 
1768         memcpy(fw, fw_data, fw_size);
1769 
1770         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1771         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1772 
1773         return 0;
1774 }
1775 
1776 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1777 {
1778         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1779                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1780                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1781                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1782                 (SQ_IND_INDEX__FORCE_READ_MASK));
1783         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1784 }
1785 
1786 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1787                            uint32_t wave, uint32_t thread,
1788                            uint32_t regno, uint32_t num, uint32_t *out)
1789 {
1790         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1791                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1792                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1793                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1794                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1795                 (SQ_IND_INDEX__FORCE_READ_MASK) |
1796                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1797         while (num--)
1798                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1799 }
1800 
1801 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1802 {
1803         /* type 1 wave data */
1804         dst[(*no_fields)++] = 1;
1805         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1806         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1807         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1808         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1809         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1810         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1811         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1812         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1813         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1814         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1815         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1816         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1817         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1818         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1819 }
1820 
1821 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1822                                      uint32_t wave, uint32_t start,
1823                                      uint32_t size, uint32_t *dst)
1824 {
1825         wave_read_regs(
1826                 adev, simd, wave, 0,
1827                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1828 }
1829 
1830 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1831                                      uint32_t wave, uint32_t thread,
1832                                      uint32_t start, uint32_t size,
1833                                      uint32_t *dst)
1834 {
1835         wave_read_regs(
1836                 adev, simd, wave, thread,
1837                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1838 }
1839 
1840 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1841                                   u32 me, u32 pipe, u32 q, u32 vm)
1842 {
1843         soc15_grbm_select(adev, me, pipe, q, vm);
1844 }
1845 
1846 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1847         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1848         .select_se_sh = &gfx_v9_0_select_se_sh,
1849         .read_wave_data = &gfx_v9_0_read_wave_data,
1850         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1851         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1852         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1853         .ras_error_inject = &gfx_v9_0_ras_error_inject,
1854         .query_ras_error_count = &gfx_v9_0_query_ras_error_count
1855 };
1856 
1857 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1858 {
1859         u32 gb_addr_config;
1860         int err;
1861 
1862         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1863 
1864         switch (adev->asic_type) {
1865         case CHIP_VEGA10:
1866                 adev->gfx.config.max_hw_contexts = 8;
1867                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1868                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1869                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1870                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1871                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1872                 break;
1873         case CHIP_VEGA12:
1874                 adev->gfx.config.max_hw_contexts = 8;
1875                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1876                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1877                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1878                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1879                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1880                 DRM_INFO("fix gfx.config for vega12\n");
1881                 break;
1882         case CHIP_VEGA20:
1883                 adev->gfx.config.max_hw_contexts = 8;
1884                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1885                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1886                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1887                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1888                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1889                 gb_addr_config &= ~0xf3e777ff;
1890                 gb_addr_config |= 0x22014042;
1891                 /* check vbios table if gpu info is not available */
1892                 err = amdgpu_atomfirmware_get_gfx_info(adev);
1893                 if (err)
1894                         return err;
1895                 break;
1896         case CHIP_RAVEN:
1897                 adev->gfx.config.max_hw_contexts = 8;
1898                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1899                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1900                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1901                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1902                 if (adev->rev_id >= 8)
1903                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1904                 else
1905                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1906                 break;
1907         case CHIP_ARCTURUS:
1908                 adev->gfx.config.max_hw_contexts = 8;
1909                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1910                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1911                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1912                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1913                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1914                 gb_addr_config &= ~0xf3e777ff;
1915                 gb_addr_config |= 0x22014042;
1916                 break;
1917         case CHIP_RENOIR:
1918                 adev->gfx.config.max_hw_contexts = 8;
1919                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1920                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1921                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1922                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1923                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1924                 gb_addr_config &= ~0xf3e777ff;
1925                 gb_addr_config |= 0x22010042;
1926                 break;
1927         default:
1928                 BUG();
1929                 break;
1930         }
1931 
1932         adev->gfx.config.gb_addr_config = gb_addr_config;
1933 
1934         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1935                         REG_GET_FIELD(
1936                                         adev->gfx.config.gb_addr_config,
1937                                         GB_ADDR_CONFIG,
1938                                         NUM_PIPES);
1939 
1940         adev->gfx.config.max_tile_pipes =
1941                 adev->gfx.config.gb_addr_config_fields.num_pipes;
1942 
1943         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1944                         REG_GET_FIELD(
1945                                         adev->gfx.config.gb_addr_config,
1946                                         GB_ADDR_CONFIG,
1947                                         NUM_BANKS);
1948         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1949                         REG_GET_FIELD(
1950                                         adev->gfx.config.gb_addr_config,
1951                                         GB_ADDR_CONFIG,
1952                                         MAX_COMPRESSED_FRAGS);
1953         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1954                         REG_GET_FIELD(
1955                                         adev->gfx.config.gb_addr_config,
1956                                         GB_ADDR_CONFIG,
1957                                         NUM_RB_PER_SE);
1958         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1959                         REG_GET_FIELD(
1960                                         adev->gfx.config.gb_addr_config,
1961                                         GB_ADDR_CONFIG,
1962                                         NUM_SHADER_ENGINES);
1963         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1964                         REG_GET_FIELD(
1965                                         adev->gfx.config.gb_addr_config,
1966                                         GB_ADDR_CONFIG,
1967                                         PIPE_INTERLEAVE_SIZE));
1968 
1969         return 0;
1970 }
1971 
1972 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1973                                    struct amdgpu_ngg_buf *ngg_buf,
1974                                    int size_se,
1975                                    int default_size_se)
1976 {
1977         int r;
1978 
1979         if (size_se < 0) {
1980                 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1981                 return -EINVAL;
1982         }
1983         size_se = size_se ? size_se : default_size_se;
1984 
1985         ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1986         r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1987                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1988                                     &ngg_buf->bo,
1989                                     &ngg_buf->gpu_addr,
1990                                     NULL);
1991         if (r) {
1992                 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1993                 return r;
1994         }
1995         ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1996 
1997         return r;
1998 }
1999 
2000 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
2001 {
2002         int i;
2003 
2004         for (i = 0; i < NGG_BUF_MAX; i++)
2005                 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
2006                                       &adev->gfx.ngg.buf[i].gpu_addr,
2007                                       NULL);
2008 
2009         memset(&adev->gfx.ngg.buf[0], 0,
2010                         sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
2011 
2012         adev->gfx.ngg.init = false;
2013 
2014         return 0;
2015 }
2016 
2017 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
2018 {
2019         int r;
2020 
2021         if (!amdgpu_ngg || adev->gfx.ngg.init == true)
2022                 return 0;
2023 
2024         /* GDS reserve memory: 64 bytes alignment */
2025         adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
2026         adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
2027         adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
2028         adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
2029 
2030         /* Primitive Buffer */
2031         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
2032                                     amdgpu_prim_buf_per_se,
2033                                     64 * 1024);
2034         if (r) {
2035                 dev_err(adev->dev, "Failed to create Primitive Buffer\n");
2036                 goto err;
2037         }
2038 
2039         /* Position Buffer */
2040         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
2041                                     amdgpu_pos_buf_per_se,
2042                                     256 * 1024);
2043         if (r) {
2044                 dev_err(adev->dev, "Failed to create Position Buffer\n");
2045                 goto err;
2046         }
2047 
2048         /* Control Sideband */
2049         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
2050                                     amdgpu_cntl_sb_buf_per_se,
2051                                     256);
2052         if (r) {
2053                 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
2054                 goto err;
2055         }
2056 
2057         /* Parameter Cache, not created by default */
2058         if (amdgpu_param_buf_per_se <= 0)
2059                 goto out;
2060 
2061         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
2062                                     amdgpu_param_buf_per_se,
2063                                     512 * 1024);
2064         if (r) {
2065                 dev_err(adev->dev, "Failed to create Parameter Cache\n");
2066                 goto err;
2067         }
2068 
2069 out:
2070         adev->gfx.ngg.init = true;
2071         return 0;
2072 err:
2073         gfx_v9_0_ngg_fini(adev);
2074         return r;
2075 }
2076 
2077 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
2078 {
2079         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2080         int r;
2081         u32 data, base;
2082 
2083         if (!amdgpu_ngg)
2084                 return 0;
2085 
2086         /* Program buffer size */
2087         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
2088                              adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
2089         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
2090                              adev->gfx.ngg.buf[NGG_POS].size >> 8);
2091         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
2092 
2093         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
2094                              adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
2095         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
2096                              adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
2097         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
2098 
2099         /* Program buffer base address */
2100         base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2101         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
2102         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
2103 
2104         base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2105         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
2106         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
2107 
2108         base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2109         data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
2110         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
2111 
2112         base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2113         data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
2114         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
2115 
2116         base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2117         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
2118         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
2119 
2120         base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2121         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
2122         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
2123 
2124         /* Clear GDS reserved memory */
2125         r = amdgpu_ring_alloc(ring, 17);
2126         if (r) {
2127                 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
2128                           ring->name, r);
2129                 return r;
2130         }
2131 
2132         gfx_v9_0_write_data_to_reg(ring, 0, false,
2133                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
2134                                    (adev->gds.gds_size +
2135                                     adev->gfx.ngg.gds_reserve_size));
2136 
2137         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
2138         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
2139                                 PACKET3_DMA_DATA_DST_SEL(1) |
2140                                 PACKET3_DMA_DATA_SRC_SEL(2)));
2141         amdgpu_ring_write(ring, 0);
2142         amdgpu_ring_write(ring, 0);
2143         amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
2144         amdgpu_ring_write(ring, 0);
2145         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
2146                                 adev->gfx.ngg.gds_reserve_size);
2147 
2148         gfx_v9_0_write_data_to_reg(ring, 0, false,
2149                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
2150 
2151         amdgpu_ring_commit(ring);
2152 
2153         return 0;
2154 }
2155 
2156 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2157                                       int mec, int pipe, int queue)
2158 {
2159         int r;
2160         unsigned irq_type;
2161         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2162 
2163         ring = &adev->gfx.compute_ring[ring_id];
2164 
2165         /* mec0 is me1 */
2166         ring->me = mec + 1;
2167         ring->pipe = pipe;
2168         ring->queue = queue;
2169 
2170         ring->ring_obj = NULL;
2171         ring->use_doorbell = true;
2172         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2173         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2174                                 + (ring_id * GFX9_MEC_HPD_SIZE);
2175         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2176 
2177         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2178                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2179                 + ring->pipe;
2180 
2181         /* type-2 packets are deprecated on MEC, use type-3 instead */
2182         r = amdgpu_ring_init(adev, ring, 1024,
2183                              &adev->gfx.eop_irq, irq_type);
2184         if (r)
2185                 return r;
2186 
2187 
2188         return 0;
2189 }
2190 
2191 static int gfx_v9_0_sw_init(void *handle)
2192 {
2193         int i, j, k, r, ring_id;
2194         struct amdgpu_ring *ring;
2195         struct amdgpu_kiq *kiq;
2196         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2197 
2198         switch (adev->asic_type) {
2199         case CHIP_VEGA10:
2200         case CHIP_VEGA12:
2201         case CHIP_VEGA20:
2202         case CHIP_RAVEN:
2203         case CHIP_ARCTURUS:
2204         case CHIP_RENOIR:
2205                 adev->gfx.mec.num_mec = 2;
2206                 break;
2207         default:
2208                 adev->gfx.mec.num_mec = 1;
2209                 break;
2210         }
2211 
2212         adev->gfx.mec.num_pipe_per_mec = 4;
2213         adev->gfx.mec.num_queue_per_pipe = 8;
2214 
2215         /* EOP Event */
2216         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2217         if (r)
2218                 return r;
2219 
2220         /* Privileged reg */
2221         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2222                               &adev->gfx.priv_reg_irq);
2223         if (r)
2224                 return r;
2225 
2226         /* Privileged inst */
2227         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2228                               &adev->gfx.priv_inst_irq);
2229         if (r)
2230                 return r;
2231 
2232         /* ECC error */
2233         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2234                               &adev->gfx.cp_ecc_error_irq);
2235         if (r)
2236                 return r;
2237 
2238         /* FUE error */
2239         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2240                               &adev->gfx.cp_ecc_error_irq);
2241         if (r)
2242                 return r;
2243 
2244         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2245 
2246         gfx_v9_0_scratch_init(adev);
2247 
2248         r = gfx_v9_0_init_microcode(adev);
2249         if (r) {
2250                 DRM_ERROR("Failed to load gfx firmware!\n");
2251                 return r;
2252         }
2253 
2254         r = adev->gfx.rlc.funcs->init(adev);
2255         if (r) {
2256                 DRM_ERROR("Failed to init rlc BOs!\n");
2257                 return r;
2258         }
2259 
2260         r = gfx_v9_0_mec_init(adev);
2261         if (r) {
2262                 DRM_ERROR("Failed to init MEC BOs!\n");
2263                 return r;
2264         }
2265 
2266         /* set up the gfx ring */
2267         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2268                 ring = &adev->gfx.gfx_ring[i];
2269                 ring->ring_obj = NULL;
2270                 if (!i)
2271                         sprintf(ring->name, "gfx");
2272                 else
2273                         sprintf(ring->name, "gfx_%d", i);
2274                 ring->use_doorbell = true;
2275                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2276                 r = amdgpu_ring_init(adev, ring, 1024,
2277                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2278                 if (r)
2279                         return r;
2280         }
2281 
2282         /* set up the compute queues - allocate horizontally across pipes */
2283         ring_id = 0;
2284         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2285                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2286                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2287                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2288                                         continue;
2289 
2290                                 r = gfx_v9_0_compute_ring_init(adev,
2291                                                                ring_id,
2292                                                                i, k, j);
2293                                 if (r)
2294                                         return r;
2295 
2296                                 ring_id++;
2297                         }
2298                 }
2299         }
2300 
2301         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2302         if (r) {
2303                 DRM_ERROR("Failed to init KIQ BOs!\n");
2304                 return r;
2305         }
2306 
2307         kiq = &adev->gfx.kiq;
2308         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2309         if (r)
2310                 return r;
2311 
2312         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2313         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2314         if (r)
2315                 return r;
2316 
2317         adev->gfx.ce_ram_size = 0x8000;
2318 
2319         r = gfx_v9_0_gpu_early_init(adev);
2320         if (r)
2321                 return r;
2322 
2323         r = gfx_v9_0_ngg_init(adev);
2324         if (r)
2325                 return r;
2326 
2327         return 0;
2328 }
2329 
2330 
2331 static int gfx_v9_0_sw_fini(void *handle)
2332 {
2333         int i;
2334         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2335 
2336         if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
2337                         adev->gfx.ras_if) {
2338                 struct ras_common_if *ras_if = adev->gfx.ras_if;
2339                 struct ras_ih_if ih_info = {
2340                         .head = *ras_if,
2341                 };
2342 
2343                 amdgpu_ras_debugfs_remove(adev, ras_if);
2344                 amdgpu_ras_sysfs_remove(adev, ras_if);
2345                 amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
2346                 amdgpu_ras_feature_enable(adev, ras_if, 0);
2347                 kfree(ras_if);
2348         }
2349 
2350         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2351                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2352         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2353                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2354 
2355         amdgpu_gfx_mqd_sw_fini(adev);
2356         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2357         amdgpu_gfx_kiq_fini(adev);
2358 
2359         gfx_v9_0_mec_fini(adev);
2360         gfx_v9_0_ngg_fini(adev);
2361         amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2362         if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2363                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2364                                 &adev->gfx.rlc.cp_table_gpu_addr,
2365                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2366         }
2367         gfx_v9_0_free_microcode(adev);
2368 
2369         return 0;
2370 }
2371 
2372 
2373 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2374 {
2375         /* TODO */
2376 }
2377 
2378 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2379 {
2380         u32 data;
2381 
2382         if (instance == 0xffffffff)
2383                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2384         else
2385                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2386 
2387         if (se_num == 0xffffffff)
2388                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2389         else
2390                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2391 
2392         if (sh_num == 0xffffffff)
2393                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2394         else
2395                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2396 
2397         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2398 }
2399 
2400 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2401 {
2402         u32 data, mask;
2403 
2404         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2405         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2406 
2407         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2408         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2409 
2410         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2411                                          adev->gfx.config.max_sh_per_se);
2412 
2413         return (~data) & mask;
2414 }
2415 
2416 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2417 {
2418         int i, j;
2419         u32 data;
2420         u32 active_rbs = 0;
2421         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2422                                         adev->gfx.config.max_sh_per_se;
2423 
2424         mutex_lock(&adev->grbm_idx_mutex);
2425         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2426                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2427                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2428                         data = gfx_v9_0_get_rb_active_bitmap(adev);
2429                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2430                                                rb_bitmap_width_per_sh);
2431                 }
2432         }
2433         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2434         mutex_unlock(&adev->grbm_idx_mutex);
2435 
2436         adev->gfx.config.backend_enable_mask = active_rbs;
2437         adev->gfx.config.num_rbs = hweight32(active_rbs);
2438 }
2439 
2440 #define DEFAULT_SH_MEM_BASES    (0x6000)
2441 #define FIRST_COMPUTE_VMID      (8)
2442 #define LAST_COMPUTE_VMID       (16)
2443 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2444 {
2445         int i;
2446         uint32_t sh_mem_config;
2447         uint32_t sh_mem_bases;
2448 
2449         /*
2450          * Configure apertures:
2451          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2452          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2453          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2454          */
2455         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2456 
2457         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2458                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2459                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2460 
2461         mutex_lock(&adev->srbm_mutex);
2462         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2463                 soc15_grbm_select(adev, 0, 0, 0, i);
2464                 /* CP and shaders */
2465                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2466                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2467         }
2468         soc15_grbm_select(adev, 0, 0, 0, 0);
2469         mutex_unlock(&adev->srbm_mutex);
2470 
2471         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2472            acccess. These should be enabled by FW for target VMIDs. */
2473         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2474                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2475                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2476                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2477                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2478         }
2479 }
2480 
2481 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2482 {
2483         int vmid;
2484 
2485         /*
2486          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2487          * access. Compute VMIDs should be enabled by FW for target VMIDs,
2488          * the driver can enable them for graphics. VMID0 should maintain
2489          * access so that HWS firmware can save/restore entries.
2490          */
2491         for (vmid = 1; vmid < 16; vmid++) {
2492                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2493                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2494                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2495                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2496         }
2497 }
2498 
2499 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2500 {
2501         u32 tmp;
2502         int i;
2503 
2504         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2505 
2506         gfx_v9_0_tiling_mode_table_init(adev);
2507 
2508         gfx_v9_0_setup_rb(adev);
2509         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2510         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2511 
2512         /* XXX SH_MEM regs */
2513         /* where to put LDS, scratch, GPUVM in FSA64 space */
2514         mutex_lock(&adev->srbm_mutex);
2515         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2516                 soc15_grbm_select(adev, 0, 0, 0, i);
2517                 /* CP and shaders */
2518                 if (i == 0) {
2519                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2520                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2521                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2522                                             !!amdgpu_noretry);
2523                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2524                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2525                 } else {
2526                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2527                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2528                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2529                                             !!amdgpu_noretry);
2530                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2531                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2532                                 (adev->gmc.private_aperture_start >> 48));
2533                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2534                                 (adev->gmc.shared_aperture_start >> 48));
2535                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2536                 }
2537         }
2538         soc15_grbm_select(adev, 0, 0, 0, 0);
2539 
2540         mutex_unlock(&adev->srbm_mutex);
2541 
2542         gfx_v9_0_init_compute_vmid(adev);
2543         gfx_v9_0_init_gds_vmid(adev);
2544 }
2545 
2546 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2547 {
2548         u32 i, j, k;
2549         u32 mask;
2550 
2551         mutex_lock(&adev->grbm_idx_mutex);
2552         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2553                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2554                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2555                         for (k = 0; k < adev->usec_timeout; k++) {
2556                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2557                                         break;
2558                                 udelay(1);
2559                         }
2560                         if (k == adev->usec_timeout) {
2561                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2562                                                       0xffffffff, 0xffffffff);
2563                                 mutex_unlock(&adev->grbm_idx_mutex);
2564                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2565                                          i, j);
2566                                 return;
2567                         }
2568                 }
2569         }
2570         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2571         mutex_unlock(&adev->grbm_idx_mutex);
2572 
2573         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2574                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2575                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2576                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2577         for (k = 0; k < adev->usec_timeout; k++) {
2578                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2579                         break;
2580                 udelay(1);
2581         }
2582 }
2583 
2584 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2585                                                bool enable)
2586 {
2587         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2588 
2589         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2590         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2591         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2592         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2593 
2594         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2595 }
2596 
2597 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2598 {
2599         /* csib */
2600         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2601                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2602         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2603                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2604         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2605                         adev->gfx.rlc.clear_state_size);
2606 }
2607 
2608 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2609                                 int indirect_offset,
2610                                 int list_size,
2611                                 int *unique_indirect_regs,
2612                                 int unique_indirect_reg_count,
2613                                 int *indirect_start_offsets,
2614                                 int *indirect_start_offsets_count,
2615                                 int max_start_offsets_count)
2616 {
2617         int idx;
2618 
2619         for (; indirect_offset < list_size; indirect_offset++) {
2620                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2621                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2622                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2623 
2624                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2625                         indirect_offset += 2;
2626 
2627                         /* look for the matching indice */
2628                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2629                                 if (unique_indirect_regs[idx] ==
2630                                         register_list_format[indirect_offset] ||
2631                                         !unique_indirect_regs[idx])
2632                                         break;
2633                         }
2634 
2635                         BUG_ON(idx >= unique_indirect_reg_count);
2636 
2637                         if (!unique_indirect_regs[idx])
2638                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2639 
2640                         indirect_offset++;
2641                 }
2642         }
2643 }
2644 
2645 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2646 {
2647         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2648         int unique_indirect_reg_count = 0;
2649 
2650         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2651         int indirect_start_offsets_count = 0;
2652 
2653         int list_size = 0;
2654         int i = 0, j = 0;
2655         u32 tmp = 0;
2656 
2657         u32 *register_list_format =
2658                 kmemdup(adev->gfx.rlc.register_list_format,
2659                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2660         if (!register_list_format)
2661                 return -ENOMEM;
2662 
2663         /* setup unique_indirect_regs array and indirect_start_offsets array */
2664         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2665         gfx_v9_1_parse_ind_reg_list(register_list_format,
2666                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2667                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2668                                     unique_indirect_regs,
2669                                     unique_indirect_reg_count,
2670                                     indirect_start_offsets,
2671                                     &indirect_start_offsets_count,
2672                                     ARRAY_SIZE(indirect_start_offsets));
2673 
2674         /* enable auto inc in case it is disabled */
2675         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2676         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2677         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2678 
2679         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2680         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2681                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2682         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2683                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2684                         adev->gfx.rlc.register_restore[i]);
2685 
2686         /* load indirect register */
2687         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2688                 adev->gfx.rlc.reg_list_format_start);
2689 
2690         /* direct register portion */
2691         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2692                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2693                         register_list_format[i]);
2694 
2695         /* indirect register portion */
2696         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2697                 if (register_list_format[i] == 0xFFFFFFFF) {
2698                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2699                         continue;
2700                 }
2701 
2702                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2703                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2704 
2705                 for (j = 0; j < unique_indirect_reg_count; j++) {
2706                         if (register_list_format[i] == unique_indirect_regs[j]) {
2707                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2708                                 break;
2709                         }
2710                 }
2711 
2712                 BUG_ON(j >= unique_indirect_reg_count);
2713 
2714                 i++;
2715         }
2716 
2717         /* set save/restore list size */
2718         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2719         list_size = list_size >> 1;
2720         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2721                 adev->gfx.rlc.reg_restore_list_size);
2722         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2723 
2724         /* write the starting offsets to RLC scratch ram */
2725         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2726                 adev->gfx.rlc.starting_offsets_start);
2727         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2728                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2729                        indirect_start_offsets[i]);
2730 
2731         /* load unique indirect regs*/
2732         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2733                 if (unique_indirect_regs[i] != 0) {
2734                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2735                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2736                                unique_indirect_regs[i] & 0x3FFFF);
2737 
2738                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2739                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2740                                unique_indirect_regs[i] >> 20);
2741                 }
2742         }
2743 
2744         kfree(register_list_format);
2745         return 0;
2746 }
2747 
2748 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2749 {
2750         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2751 }
2752 
2753 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2754                                              bool enable)
2755 {
2756         uint32_t data = 0;
2757         uint32_t default_data = 0;
2758 
2759         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2760         if (enable == true) {
2761                 /* enable GFXIP control over CGPG */
2762                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2763                 if(default_data != data)
2764                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2765 
2766                 /* update status */
2767                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2768                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2769                 if(default_data != data)
2770                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2771         } else {
2772                 /* restore GFXIP control over GCPG */
2773                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2774                 if(default_data != data)
2775                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2776         }
2777 }
2778 
2779 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2780 {
2781         uint32_t data = 0;
2782 
2783         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2784                               AMD_PG_SUPPORT_GFX_SMG |
2785                               AMD_PG_SUPPORT_GFX_DMG)) {
2786                 /* init IDLE_POLL_COUNT = 60 */
2787                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2788                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2789                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2790                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2791 
2792                 /* init RLC PG Delay */
2793                 data = 0;
2794                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2795                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2796                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2797                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2798                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2799 
2800                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2801                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2802                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2803                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2804 
2805                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2806                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2807                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2808                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2809 
2810                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2811                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2812 
2813                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2814                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2815                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2816 
2817                 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2818         }
2819 }
2820 
2821 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2822                                                 bool enable)
2823 {
2824         uint32_t data = 0;
2825         uint32_t default_data = 0;
2826 
2827         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2828         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2829                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2830                              enable ? 1 : 0);
2831         if (default_data != data)
2832                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2833 }
2834 
2835 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2836                                                 bool enable)
2837 {
2838         uint32_t data = 0;
2839         uint32_t default_data = 0;
2840 
2841         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2842         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2843                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2844                              enable ? 1 : 0);
2845         if(default_data != data)
2846                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2847 }
2848 
2849 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2850                                         bool enable)
2851 {
2852         uint32_t data = 0;
2853         uint32_t default_data = 0;
2854 
2855         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2856         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2857                              CP_PG_DISABLE,
2858                              enable ? 0 : 1);
2859         if(default_data != data)
2860                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2861 }
2862 
2863 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2864                                                 bool enable)
2865 {
2866         uint32_t data, default_data;
2867 
2868         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2869         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2870                              GFX_POWER_GATING_ENABLE,
2871                              enable ? 1 : 0);
2872         if(default_data != data)
2873                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2874 }
2875 
2876 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2877                                                 bool enable)
2878 {
2879         uint32_t data, default_data;
2880 
2881         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2882         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2883                              GFX_PIPELINE_PG_ENABLE,
2884                              enable ? 1 : 0);
2885         if(default_data != data)
2886                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2887 
2888         if (!enable)
2889                 /* read any GFX register to wake up GFX */
2890                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2891 }
2892 
2893 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2894                                                        bool enable)
2895 {
2896         uint32_t data, default_data;
2897 
2898         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2899         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2900                              STATIC_PER_CU_PG_ENABLE,
2901                              enable ? 1 : 0);
2902         if(default_data != data)
2903                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2904 }
2905 
2906 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2907                                                 bool enable)
2908 {
2909         uint32_t data, default_data;
2910 
2911         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2912         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2913                              DYN_PER_CU_PG_ENABLE,
2914                              enable ? 1 : 0);
2915         if(default_data != data)
2916                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2917 }
2918 
2919 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2920 {
2921         gfx_v9_0_init_csb(adev);
2922 
2923         /*
2924          * Rlc save restore list is workable since v2_1.
2925          * And it's needed by gfxoff feature.
2926          */
2927         if (adev->gfx.rlc.is_rlc_v2_1) {
2928                 if (adev->asic_type == CHIP_VEGA12 ||
2929                     (adev->asic_type == CHIP_RAVEN &&
2930                      adev->rev_id >= 8))
2931                         gfx_v9_1_init_rlc_save_restore_list(adev);
2932                 gfx_v9_0_enable_save_restore_machine(adev);
2933         }
2934 
2935         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2936                               AMD_PG_SUPPORT_GFX_SMG |
2937                               AMD_PG_SUPPORT_GFX_DMG |
2938                               AMD_PG_SUPPORT_CP |
2939                               AMD_PG_SUPPORT_GDS |
2940                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2941                 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2942                        adev->gfx.rlc.cp_table_gpu_addr >> 8);
2943                 gfx_v9_0_init_gfx_power_gating(adev);
2944         }
2945 }
2946 
2947 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2948 {
2949         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2950         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2951         gfx_v9_0_wait_for_rlc_serdes(adev);
2952 }
2953 
2954 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2955 {
2956         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2957         udelay(50);
2958         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2959         udelay(50);
2960 }
2961 
2962 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2963 {
2964 #ifdef AMDGPU_RLC_DEBUG_RETRY
2965         u32 rlc_ucode_ver;
2966 #endif
2967 
2968         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2969         udelay(50);
2970 
2971         /* carrizo do enable cp interrupt after cp inited */
2972         if (!(adev->flags & AMD_IS_APU)) {
2973                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2974                 udelay(50);
2975         }
2976 
2977 #ifdef AMDGPU_RLC_DEBUG_RETRY
2978         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2979         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2980         if(rlc_ucode_ver == 0x108) {
2981                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2982                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2983                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2984                  * default is 0x9C4 to create a 100us interval */
2985                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2986                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2987                  * to disable the page fault retry interrupts, default is
2988                  * 0x100 (256) */
2989                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2990         }
2991 #endif
2992 }
2993 
2994 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2995 {
2996         const struct rlc_firmware_header_v2_0 *hdr;
2997         const __le32 *fw_data;
2998         unsigned i, fw_size;
2999 
3000         if (!adev->gfx.rlc_fw)
3001                 return -EINVAL;
3002 
3003         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3004         amdgpu_ucode_print_rlc_hdr(&hdr->header);
3005 
3006         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3007                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3008         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3009 
3010         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3011                         RLCG_UCODE_LOADING_START_ADDRESS);
3012         for (i = 0; i < fw_size; i++)
3013                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3014         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3015 
3016         return 0;
3017 }
3018 
3019 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3020 {
3021         int r;
3022 
3023         if (amdgpu_sriov_vf(adev)) {
3024                 gfx_v9_0_init_csb(adev);
3025                 return 0;
3026         }
3027 
3028         adev->gfx.rlc.funcs->stop(adev);
3029 
3030         /* disable CG */
3031         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3032 
3033         gfx_v9_0_init_pg(adev);
3034 
3035         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3036                 /* legacy rlc firmware loading */
3037                 r = gfx_v9_0_rlc_load_microcode(adev);
3038                 if (r)
3039                         return r;
3040         }
3041 
3042         switch (adev->asic_type) {
3043         case CHIP_RAVEN:
3044                 if (amdgpu_lbpw == 0)
3045                         gfx_v9_0_enable_lbpw(adev, false);
3046                 else
3047                         gfx_v9_0_enable_lbpw(adev, true);
3048                 break;
3049         case CHIP_VEGA20:
3050                 if (amdgpu_lbpw > 0)
3051                         gfx_v9_0_enable_lbpw(adev, true);
3052                 else
3053                         gfx_v9_0_enable_lbpw(adev, false);
3054                 break;
3055         default:
3056                 break;
3057         }
3058 
3059         adev->gfx.rlc.funcs->start(adev);
3060 
3061         return 0;
3062 }
3063 
3064 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3065 {
3066         int i;
3067         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3068 
3069         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3070         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3071         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3072         if (!enable) {
3073                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3074                         adev->gfx.gfx_ring[i].sched.ready = false;
3075         }
3076         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3077         udelay(50);
3078 }
3079 
3080 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3081 {
3082         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3083         const struct gfx_firmware_header_v1_0 *ce_hdr;
3084         const struct gfx_firmware_header_v1_0 *me_hdr;
3085         const __le32 *fw_data;
3086         unsigned i, fw_size;
3087 
3088         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3089                 return -EINVAL;
3090 
3091         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3092                 adev->gfx.pfp_fw->data;
3093         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3094                 adev->gfx.ce_fw->data;
3095         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3096                 adev->gfx.me_fw->data;
3097 
3098         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3099         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3100         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3101 
3102         gfx_v9_0_cp_gfx_enable(adev, false);
3103 
3104         /* PFP */
3105         fw_data = (const __le32 *)
3106                 (adev->gfx.pfp_fw->data +
3107                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3108         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3109         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3110         for (i = 0; i < fw_size; i++)
3111                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3112         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3113 
3114         /* CE */
3115         fw_data = (const __le32 *)
3116                 (adev->gfx.ce_fw->data +
3117                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3118         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3119         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3120         for (i = 0; i < fw_size; i++)
3121                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3122         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3123 
3124         /* ME */
3125         fw_data = (const __le32 *)
3126                 (adev->gfx.me_fw->data +
3127                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3128         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3129         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3130         for (i = 0; i < fw_size; i++)
3131                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3132         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3133 
3134         return 0;
3135 }
3136 
3137 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3138 {
3139         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3140         const struct cs_section_def *sect = NULL;
3141         const struct cs_extent_def *ext = NULL;
3142         int r, i, tmp;
3143 
3144         /* init the CP */
3145         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3146         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3147 
3148         gfx_v9_0_cp_gfx_enable(adev, true);
3149 
3150         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3151         if (r) {
3152                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3153                 return r;
3154         }
3155 
3156         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3157         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3158 
3159         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3160         amdgpu_ring_write(ring, 0x80000000);
3161         amdgpu_ring_write(ring, 0x80000000);
3162 
3163         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3164                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3165                         if (sect->id == SECT_CONTEXT) {
3166                                 amdgpu_ring_write(ring,
3167                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3168                                                ext->reg_count));
3169                                 amdgpu_ring_write(ring,
3170                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3171                                 for (i = 0; i < ext->reg_count; i++)
3172                                         amdgpu_ring_write(ring, ext->extent[i]);
3173                         }
3174                 }
3175         }
3176 
3177         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3178         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3179 
3180         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3181         amdgpu_ring_write(ring, 0);
3182 
3183         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3184         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3185         amdgpu_ring_write(ring, 0x8000);
3186         amdgpu_ring_write(ring, 0x8000);
3187 
3188         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3189         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3190                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3191         amdgpu_ring_write(ring, tmp);
3192         amdgpu_ring_write(ring, 0);
3193 
3194         amdgpu_ring_commit(ring);
3195 
3196         return 0;
3197 }
3198 
3199 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3200 {
3201         struct amdgpu_ring *ring;
3202         u32 tmp;
3203         u32 rb_bufsz;
3204         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3205 
3206         /* Set the write pointer delay */
3207         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3208 
3209         /* set the RB to use vmid 0 */
3210         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3211 
3212         /* Set ring buffer size */
3213         ring = &adev->gfx.gfx_ring[0];
3214         rb_bufsz = order_base_2(ring->ring_size / 8);
3215         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3216         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3217 #ifdef __BIG_ENDIAN
3218         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3219 #endif
3220         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3221 
3222         /* Initialize the ring buffer's write pointers */
3223         ring->wptr = 0;
3224         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3225         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3226 
3227         /* set the wb address wether it's enabled or not */
3228         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3229         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3230         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3231 
3232         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3233         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3234         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3235 
3236         mdelay(1);
3237         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3238 
3239         rb_addr = ring->gpu_addr >> 8;
3240         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3241         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3242 
3243         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3244         if (ring->use_doorbell) {
3245                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3246                                     DOORBELL_OFFSET, ring->doorbell_index);
3247                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3248                                     DOORBELL_EN, 1);
3249         } else {
3250                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3251         }
3252         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3253 
3254         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3255                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
3256         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3257 
3258         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3259                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3260 
3261 
3262         /* start the ring */
3263         gfx_v9_0_cp_gfx_start(adev);
3264         ring->sched.ready = true;
3265 
3266         return 0;
3267 }
3268 
3269 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3270 {
3271         int i;
3272 
3273         if (enable) {
3274                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3275         } else {
3276                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3277                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3278                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3279                         adev->gfx.compute_ring[i].sched.ready = false;
3280                 adev->gfx.kiq.ring.sched.ready = false;
3281         }
3282         udelay(50);
3283 }
3284 
3285 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3286 {
3287         const struct gfx_firmware_header_v1_0 *mec_hdr;
3288         const __le32 *fw_data;
3289         unsigned i;
3290         u32 tmp;
3291 
3292         if (!adev->gfx.mec_fw)
3293                 return -EINVAL;
3294 
3295         gfx_v9_0_cp_compute_enable(adev, false);
3296 
3297         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3298         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3299 
3300         fw_data = (const __le32 *)
3301                 (adev->gfx.mec_fw->data +
3302                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3303         tmp = 0;
3304         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3305         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3306         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3307 
3308         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3309                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3310         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3311                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3312 
3313         /* MEC1 */
3314         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3315                          mec_hdr->jt_offset);
3316         for (i = 0; i < mec_hdr->jt_size; i++)
3317                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3318                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3319 
3320         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3321                         adev->gfx.mec_fw_version);
3322         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3323 
3324         return 0;
3325 }
3326 
3327 /* KIQ functions */
3328 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3329 {
3330         uint32_t tmp;
3331         struct amdgpu_device *adev = ring->adev;
3332 
3333         /* tell RLC which is KIQ queue */
3334         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3335         tmp &= 0xffffff00;
3336         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3337         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3338         tmp |= 0x80;
3339         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3340 }
3341 
3342 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
3343 {
3344         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3345         uint64_t queue_mask = 0;
3346         int r, i;
3347 
3348         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
3349                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
3350                         continue;
3351 
3352                 /* This situation may be hit in the future if a new HW
3353                  * generation exposes more than 64 queues. If so, the
3354                  * definition of queue_mask needs updating */
3355                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
3356                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
3357                         break;
3358                 }
3359 
3360                 queue_mask |= (1ull << i);
3361         }
3362 
3363         r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
3364         if (r) {
3365                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3366                 return r;
3367         }
3368 
3369         /* set resources */
3370         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
3371         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
3372                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
3373         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
3374         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
3375         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
3376         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
3377         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
3378         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
3379         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3380                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3381                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
3382                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3383 
3384                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
3385                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3386                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3387                                   PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
3388                                   PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
3389                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
3390                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
3391                                   PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
3392                                   PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
3393                                   PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
3394                                   PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
3395                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
3396                 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
3397                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
3398                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
3399                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
3400                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
3401         }
3402 
3403         r = amdgpu_ring_test_helper(kiq_ring);
3404         if (r)
3405                 DRM_ERROR("KCQ enable failed\n");
3406 
3407         return r;
3408 }
3409 
3410 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3411 {
3412         struct amdgpu_device *adev = ring->adev;
3413         struct v9_mqd *mqd = ring->mqd_ptr;
3414         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3415         uint32_t tmp;
3416 
3417         mqd->header = 0xC0310800;
3418         mqd->compute_pipelinestat_enable = 0x00000001;
3419         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3420         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3421         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3422         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3423         mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3424         mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3425         mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3426         mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3427         mqd->compute_misc_reserved = 0x00000003;
3428 
3429         mqd->dynamic_cu_mask_addr_lo =
3430                 lower_32_bits(ring->mqd_gpu_addr
3431                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3432         mqd->dynamic_cu_mask_addr_hi =
3433                 upper_32_bits(ring->mqd_gpu_addr
3434                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3435 
3436         eop_base_addr = ring->eop_gpu_addr >> 8;
3437         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3438         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3439 
3440         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3441         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3442         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3443                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3444 
3445         mqd->cp_hqd_eop_control = tmp;
3446 
3447         /* enable doorbell? */
3448         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3449 
3450         if (ring->use_doorbell) {
3451                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3452                                     DOORBELL_OFFSET, ring->doorbell_index);
3453                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3454                                     DOORBELL_EN, 1);
3455                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3456                                     DOORBELL_SOURCE, 0);
3457                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3458                                     DOORBELL_HIT, 0);
3459         } else {
3460                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3461                                          DOORBELL_EN, 0);
3462         }
3463 
3464         mqd->cp_hqd_pq_doorbell_control = tmp;
3465 
3466         /* disable the queue if it's active */
3467         ring->wptr = 0;
3468         mqd->cp_hqd_dequeue_request = 0;
3469         mqd->cp_hqd_pq_rptr = 0;
3470         mqd->cp_hqd_pq_wptr_lo = 0;
3471         mqd->cp_hqd_pq_wptr_hi = 0;
3472 
3473         /* set the pointer to the MQD */
3474         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3475         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3476 
3477         /* set MQD vmid to 0 */
3478         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3479         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3480         mqd->cp_mqd_control = tmp;
3481 
3482         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3483         hqd_gpu_addr = ring->gpu_addr >> 8;
3484         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3485         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3486 
3487         /* set up the HQD, this is similar to CP_RB0_CNTL */
3488         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3489         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3490                             (order_base_2(ring->ring_size / 4) - 1));
3491         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3492                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3493 #ifdef __BIG_ENDIAN
3494         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3495 #endif
3496         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3497         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3498         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3499         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3500         mqd->cp_hqd_pq_control = tmp;
3501 
3502         /* set the wb address whether it's enabled or not */
3503         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3504         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3505         mqd->cp_hqd_pq_rptr_report_addr_hi =
3506                 upper_32_bits(wb_gpu_addr) & 0xffff;
3507 
3508         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3509         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3510         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3511         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3512 
3513         tmp = 0;
3514         /* enable the doorbell if requested */
3515         if (ring->use_doorbell) {
3516                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3517                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3518                                 DOORBELL_OFFSET, ring->doorbell_index);
3519 
3520                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3521                                          DOORBELL_EN, 1);
3522                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3523                                          DOORBELL_SOURCE, 0);
3524                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3525                                          DOORBELL_HIT, 0);
3526         }
3527 
3528         mqd->cp_hqd_pq_doorbell_control = tmp;
3529 
3530         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3531         ring->wptr = 0;
3532         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3533 
3534         /* set the vmid for the queue */
3535         mqd->cp_hqd_vmid = 0;
3536 
3537         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3538         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3539         mqd->cp_hqd_persistent_state = tmp;
3540 
3541         /* set MIN_IB_AVAIL_SIZE */
3542         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3543         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3544         mqd->cp_hqd_ib_control = tmp;
3545 
3546         /* activate the queue */
3547         mqd->cp_hqd_active = 1;
3548 
3549         return 0;
3550 }
3551 
3552 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3553 {
3554         struct amdgpu_device *adev = ring->adev;
3555         struct v9_mqd *mqd = ring->mqd_ptr;
3556         int j;
3557 
3558         /* disable wptr polling */
3559         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3560 
3561         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3562                mqd->cp_hqd_eop_base_addr_lo);
3563         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3564                mqd->cp_hqd_eop_base_addr_hi);
3565 
3566         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3567         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3568                mqd->cp_hqd_eop_control);
3569 
3570         /* enable doorbell? */
3571         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3572                mqd->cp_hqd_pq_doorbell_control);
3573 
3574         /* disable the queue if it's active */
3575         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3576                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3577                 for (j = 0; j < adev->usec_timeout; j++) {
3578                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3579                                 break;
3580                         udelay(1);
3581                 }
3582                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3583                        mqd->cp_hqd_dequeue_request);
3584                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3585                        mqd->cp_hqd_pq_rptr);
3586                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3587                        mqd->cp_hqd_pq_wptr_lo);
3588                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3589                        mqd->cp_hqd_pq_wptr_hi);
3590         }
3591 
3592         /* set the pointer to the MQD */
3593         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3594                mqd->cp_mqd_base_addr_lo);
3595         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3596                mqd->cp_mqd_base_addr_hi);
3597 
3598         /* set MQD vmid to 0 */
3599         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3600                mqd->cp_mqd_control);
3601 
3602         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3603         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3604                mqd->cp_hqd_pq_base_lo);
3605         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3606                mqd->cp_hqd_pq_base_hi);
3607 
3608         /* set up the HQD, this is similar to CP_RB0_CNTL */
3609         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3610                mqd->cp_hqd_pq_control);
3611 
3612         /* set the wb address whether it's enabled or not */
3613         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3614                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3615         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3616                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3617 
3618         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3619         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3620                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3621         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3622                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3623 
3624         /* enable the doorbell if requested */
3625         if (ring->use_doorbell) {
3626                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3627                                         (adev->doorbell_index.kiq * 2) << 2);
3628                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3629                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3630         }
3631 
3632         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3633                mqd->cp_hqd_pq_doorbell_control);
3634 
3635         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3636         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3637                mqd->cp_hqd_pq_wptr_lo);
3638         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3639                mqd->cp_hqd_pq_wptr_hi);
3640 
3641         /* set the vmid for the queue */
3642         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3643 
3644         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3645                mqd->cp_hqd_persistent_state);
3646 
3647         /* activate the queue */
3648         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3649                mqd->cp_hqd_active);
3650 
3651         if (ring->use_doorbell)
3652                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3653 
3654         return 0;
3655 }
3656 
3657 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3658 {
3659         struct amdgpu_device *adev = ring->adev;
3660         int j;
3661 
3662         /* disable the queue if it's active */
3663         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3664 
3665                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3666 
3667                 for (j = 0; j < adev->usec_timeout; j++) {
3668                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3669                                 break;
3670                         udelay(1);
3671                 }
3672 
3673                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3674                         DRM_DEBUG("KIQ dequeue request failed.\n");
3675 
3676                         /* Manual disable if dequeue request times out */
3677                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3678                 }
3679 
3680                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3681                       0);
3682         }
3683 
3684         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3685         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3686         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3687         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3688         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3689         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3690         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3691         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3692 
3693         return 0;
3694 }
3695 
3696 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3697 {
3698         struct amdgpu_device *adev = ring->adev;
3699         struct v9_mqd *mqd = ring->mqd_ptr;
3700         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3701 
3702         gfx_v9_0_kiq_setting(ring);
3703 
3704         if (adev->in_gpu_reset) { /* for GPU_RESET case */
3705                 /* reset MQD to a clean status */
3706                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3707                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3708 
3709                 /* reset ring buffer */
3710                 ring->wptr = 0;
3711                 amdgpu_ring_clear_ring(ring);
3712 
3713                 mutex_lock(&adev->srbm_mutex);
3714                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3715                 gfx_v9_0_kiq_init_register(ring);
3716                 soc15_grbm_select(adev, 0, 0, 0, 0);
3717                 mutex_unlock(&adev->srbm_mutex);
3718         } else {
3719                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3720                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3721                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3722                 mutex_lock(&adev->srbm_mutex);
3723                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3724                 gfx_v9_0_mqd_init(ring);
3725                 gfx_v9_0_kiq_init_register(ring);
3726                 soc15_grbm_select(adev, 0, 0, 0, 0);
3727                 mutex_unlock(&adev->srbm_mutex);
3728 
3729                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3730                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3731         }
3732 
3733         return 0;
3734 }
3735 
3736 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3737 {
3738         struct amdgpu_device *adev = ring->adev;
3739         struct v9_mqd *mqd = ring->mqd_ptr;
3740         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3741 
3742         if (!adev->in_gpu_reset && !adev->in_suspend) {
3743                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3744                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3745                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3746                 mutex_lock(&adev->srbm_mutex);
3747                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3748                 gfx_v9_0_mqd_init(ring);
3749                 soc15_grbm_select(adev, 0, 0, 0, 0);
3750                 mutex_unlock(&adev->srbm_mutex);
3751 
3752                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3753                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3754         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3755                 /* reset MQD to a clean status */
3756                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3757                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3758 
3759                 /* reset ring buffer */
3760                 ring->wptr = 0;
3761                 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
3762                 amdgpu_ring_clear_ring(ring);
3763         } else {
3764                 amdgpu_ring_clear_ring(ring);
3765         }
3766 
3767         return 0;
3768 }
3769 
3770 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3771 {
3772         struct amdgpu_ring *ring;
3773         int r;
3774 
3775         ring = &adev->gfx.kiq.ring;
3776 
3777         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3778         if (unlikely(r != 0))
3779                 return r;
3780 
3781         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3782         if (unlikely(r != 0))
3783                 return r;
3784 
3785         gfx_v9_0_kiq_init_queue(ring);
3786         amdgpu_bo_kunmap(ring->mqd_obj);
3787         ring->mqd_ptr = NULL;
3788         amdgpu_bo_unreserve(ring->mqd_obj);
3789         ring->sched.ready = true;
3790         return 0;
3791 }
3792 
3793 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3794 {
3795         struct amdgpu_ring *ring = NULL;
3796         int r = 0, i;
3797 
3798         gfx_v9_0_cp_compute_enable(adev, true);
3799 
3800         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3801                 ring = &adev->gfx.compute_ring[i];
3802 
3803                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3804                 if (unlikely(r != 0))
3805                         goto done;
3806                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3807                 if (!r) {
3808                         r = gfx_v9_0_kcq_init_queue(ring);
3809                         amdgpu_bo_kunmap(ring->mqd_obj);
3810                         ring->mqd_ptr = NULL;
3811                 }
3812                 amdgpu_bo_unreserve(ring->mqd_obj);
3813                 if (r)
3814                         goto done;
3815         }
3816 
3817         r = gfx_v9_0_kiq_kcq_enable(adev);
3818 done:
3819         return r;
3820 }
3821 
3822 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3823 {
3824         int r, i;
3825         struct amdgpu_ring *ring;
3826 
3827         if (!(adev->flags & AMD_IS_APU))
3828                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3829 
3830         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3831                 if (adev->asic_type != CHIP_ARCTURUS) {
3832                         /* legacy firmware loading */
3833                         r = gfx_v9_0_cp_gfx_load_microcode(adev);
3834                         if (r)
3835                                 return r;
3836                 }
3837 
3838                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3839                 if (r)
3840                         return r;
3841         }
3842 
3843         r = gfx_v9_0_kiq_resume(adev);
3844         if (r)
3845                 return r;
3846 
3847         if (adev->asic_type != CHIP_ARCTURUS) {
3848                 r = gfx_v9_0_cp_gfx_resume(adev);
3849                 if (r)
3850                         return r;
3851         }
3852 
3853         r = gfx_v9_0_kcq_resume(adev);
3854         if (r)
3855                 return r;
3856 
3857         if (adev->asic_type != CHIP_ARCTURUS) {
3858                 ring = &adev->gfx.gfx_ring[0];
3859                 r = amdgpu_ring_test_helper(ring);
3860                 if (r)
3861                         return r;
3862         }
3863 
3864         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3865                 ring = &adev->gfx.compute_ring[i];
3866                 amdgpu_ring_test_helper(ring);
3867         }
3868 
3869         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3870 
3871         return 0;
3872 }
3873 
3874 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3875 {
3876         if (adev->asic_type != CHIP_ARCTURUS)
3877                 gfx_v9_0_cp_gfx_enable(adev, enable);
3878         gfx_v9_0_cp_compute_enable(adev, enable);
3879 }
3880 
3881 static int gfx_v9_0_hw_init(void *handle)
3882 {
3883         int r;
3884         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3885 
3886         if (!amdgpu_sriov_vf(adev))
3887                 gfx_v9_0_init_golden_registers(adev);
3888 
3889         gfx_v9_0_constants_init(adev);
3890 
3891         r = gfx_v9_0_csb_vram_pin(adev);
3892         if (r)
3893                 return r;
3894 
3895         r = adev->gfx.rlc.funcs->resume(adev);
3896         if (r)
3897                 return r;
3898 
3899         r = gfx_v9_0_cp_resume(adev);
3900         if (r)
3901                 return r;
3902 
3903         if (adev->asic_type != CHIP_ARCTURUS) {
3904                 r = gfx_v9_0_ngg_en(adev);
3905                 if (r)
3906                         return r;
3907         }
3908 
3909         return r;
3910 }
3911 
3912 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3913 {
3914         int r, i;
3915         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3916 
3917         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3918         if (r)
3919                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3920 
3921         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3922                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3923 
3924                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3925                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3926                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3927                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3928                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3929                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3930                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3931                 amdgpu_ring_write(kiq_ring, 0);
3932                 amdgpu_ring_write(kiq_ring, 0);
3933                 amdgpu_ring_write(kiq_ring, 0);
3934         }
3935         r = amdgpu_ring_test_helper(kiq_ring);
3936         if (r)
3937                 DRM_ERROR("KCQ disable failed\n");
3938 
3939         return r;
3940 }
3941 
3942 static int gfx_v9_0_hw_fini(void *handle)
3943 {
3944         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3945 
3946         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3947         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3948         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3949 
3950         /* disable KCQ to avoid CPC touch memory not valid anymore */
3951         gfx_v9_0_kcq_disable(adev);
3952 
3953         if (amdgpu_sriov_vf(adev)) {
3954                 gfx_v9_0_cp_gfx_enable(adev, false);
3955                 /* must disable polling for SRIOV when hw finished, otherwise
3956                  * CPC engine may still keep fetching WB address which is already
3957                  * invalid after sw finished and trigger DMAR reading error in
3958                  * hypervisor side.
3959                  */
3960                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3961                 return 0;
3962         }
3963 
3964         /* Use deinitialize sequence from CAIL when unbinding device from driver,
3965          * otherwise KIQ is hanging when binding back
3966          */
3967         if (!adev->in_gpu_reset && !adev->in_suspend) {
3968                 mutex_lock(&adev->srbm_mutex);
3969                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3970                                 adev->gfx.kiq.ring.pipe,
3971                                 adev->gfx.kiq.ring.queue, 0);
3972                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3973                 soc15_grbm_select(adev, 0, 0, 0, 0);
3974                 mutex_unlock(&adev->srbm_mutex);
3975         }
3976 
3977         gfx_v9_0_cp_enable(adev, false);
3978         adev->gfx.rlc.funcs->stop(adev);
3979 
3980         gfx_v9_0_csb_vram_unpin(adev);
3981 
3982         return 0;
3983 }
3984 
3985 static int gfx_v9_0_suspend(void *handle)
3986 {
3987         return gfx_v9_0_hw_fini(handle);
3988 }
3989 
3990 static int gfx_v9_0_resume(void *handle)
3991 {
3992         return gfx_v9_0_hw_init(handle);
3993 }
3994 
3995 static bool gfx_v9_0_is_idle(void *handle)
3996 {
3997         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3998 
3999         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4000                                 GRBM_STATUS, GUI_ACTIVE))
4001                 return false;
4002         else
4003                 return true;
4004 }
4005 
4006 static int gfx_v9_0_wait_for_idle(void *handle)
4007 {
4008         unsigned i;
4009         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4010 
4011         for (i = 0; i < adev->usec_timeout; i++) {
4012                 if (gfx_v9_0_is_idle(handle))
4013                         return 0;
4014                 udelay(1);
4015         }
4016         return -ETIMEDOUT;
4017 }
4018 
4019 static int gfx_v9_0_soft_reset(void *handle)
4020 {
4021         u32 grbm_soft_reset = 0;
4022         u32 tmp;
4023         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4024 
4025         /* GRBM_STATUS */
4026         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4027         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4028                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4029                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4030                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4031                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4032                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4033                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4034                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4035                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4036                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4037         }
4038 
4039         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4040                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4041                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4042         }
4043 
4044         /* GRBM_STATUS2 */
4045         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4046         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4047                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4048                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4049 
4050 
4051         if (grbm_soft_reset) {
4052                 /* stop the rlc */
4053                 adev->gfx.rlc.funcs->stop(adev);
4054 
4055                 if (adev->asic_type != CHIP_ARCTURUS)
4056                         /* Disable GFX parsing/prefetching */
4057                         gfx_v9_0_cp_gfx_enable(adev, false);
4058 
4059                 /* Disable MEC parsing/prefetching */
4060                 gfx_v9_0_cp_compute_enable(adev, false);
4061 
4062                 if (grbm_soft_reset) {
4063                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4064                         tmp |= grbm_soft_reset;
4065                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4066                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4067                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4068 
4069                         udelay(50);
4070 
4071                         tmp &= ~grbm_soft_reset;
4072                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4073                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4074                 }
4075 
4076                 /* Wait a little for things to settle down */
4077                 udelay(50);
4078         }
4079         return 0;
4080 }
4081 
4082 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4083 {
4084         uint64_t clock;
4085 
4086         amdgpu_gfx_off_ctrl(adev, false);
4087         mutex_lock(&adev->gfx.gpu_clock_mutex);
4088         WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4089         clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4090                 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4091         mutex_unlock(&adev->gfx.gpu_clock_mutex);
4092         amdgpu_gfx_off_ctrl(adev, true);
4093         return clock;
4094 }
4095 
4096 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4097                                           uint32_t vmid,
4098                                           uint32_t gds_base, uint32_t gds_size,
4099                                           uint32_t gws_base, uint32_t gws_size,
4100                                           uint32_t oa_base, uint32_t oa_size)
4101 {
4102         struct amdgpu_device *adev = ring->adev;
4103 
4104         /* GDS Base */
4105         gfx_v9_0_write_data_to_reg(ring, 0, false,
4106                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4107                                    gds_base);
4108 
4109         /* GDS Size */
4110         gfx_v9_0_write_data_to_reg(ring, 0, false,
4111                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4112                                    gds_size);
4113 
4114         /* GWS */
4115         gfx_v9_0_write_data_to_reg(ring, 0, false,
4116                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4117                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4118 
4119         /* OA */
4120         gfx_v9_0_write_data_to_reg(ring, 0, false,
4121                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4122                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
4123 }
4124 
4125 static const u32 vgpr_init_compute_shader[] =
4126 {
4127         0xb07c0000, 0xbe8000ff,
4128         0x000000f8, 0xbf110800,
4129         0x7e000280, 0x7e020280,
4130         0x7e040280, 0x7e060280,
4131         0x7e080280, 0x7e0a0280,
4132         0x7e0c0280, 0x7e0e0280,
4133         0x80808800, 0xbe803200,
4134         0xbf84fff5, 0xbf9c0000,
4135         0xd28c0001, 0x0001007f,
4136         0xd28d0001, 0x0002027e,
4137         0x10020288, 0xb8810904,
4138         0xb7814000, 0xd1196a01,
4139         0x00000301, 0xbe800087,
4140         0xbefc00c1, 0xd89c4000,
4141         0x00020201, 0xd89cc080,
4142         0x00040401, 0x320202ff,
4143         0x00000800, 0x80808100,
4144         0xbf84fff8, 0x7e020280,
4145         0xbf810000, 0x00000000,
4146 };
4147 
4148 static const u32 sgpr_init_compute_shader[] =
4149 {
4150         0xb07c0000, 0xbe8000ff,
4151         0x0000005f, 0xbee50080,
4152         0xbe812c65, 0xbe822c65,
4153         0xbe832c65, 0xbe842c65,
4154         0xbe852c65, 0xb77c0005,
4155         0x80808500, 0xbf84fff8,
4156         0xbe800080, 0xbf810000,
4157 };
4158 
4159 static const struct soc15_reg_entry vgpr_init_regs[] = {
4160    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4161    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4162    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4163    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4164    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4165    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4166    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4167    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4168    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
4169    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4170 };
4171 
4172 static const struct soc15_reg_entry sgpr_init_regs[] = {
4173    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4174    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4175    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4176    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4177    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4178    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4179    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4180    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4181    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
4182    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4183 };
4184 
4185 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
4186    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4187    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4188    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4189    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4190    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4191    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4192    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4193    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4194    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4195    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4196    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4197    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4198    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4199    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4200    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4201    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4202    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4203    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4204    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4205    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4206    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4207    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4208    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4209    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4210    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4211    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4212    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4213    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4214    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4215    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4216    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4217    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4218 };
4219 
4220 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4221 {
4222         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4223         int i, r;
4224 
4225         r = amdgpu_ring_alloc(ring, 7);
4226         if (r) {
4227                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4228                         ring->name, r);
4229                 return r;
4230         }
4231 
4232         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4233         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4234 
4235         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4236         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4237                                 PACKET3_DMA_DATA_DST_SEL(1) |
4238                                 PACKET3_DMA_DATA_SRC_SEL(2) |
4239                                 PACKET3_DMA_DATA_ENGINE(0)));
4240         amdgpu_ring_write(ring, 0);
4241         amdgpu_ring_write(ring, 0);
4242         amdgpu_ring_write(ring, 0);
4243         amdgpu_ring_write(ring, 0);
4244         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4245                                 adev->gds.gds_size);
4246 
4247         amdgpu_ring_commit(ring);
4248 
4249         for (i = 0; i < adev->usec_timeout; i++) {
4250                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4251                         break;
4252                 udelay(1);
4253         }
4254 
4255         if (i >= adev->usec_timeout)
4256                 r = -ETIMEDOUT;
4257 
4258         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4259 
4260         return r;
4261 }
4262 
4263 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4264 {
4265         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4266         struct amdgpu_ib ib;
4267         struct dma_fence *f = NULL;
4268         int r, i, j, k;
4269         unsigned total_size, vgpr_offset, sgpr_offset;
4270         u64 gpu_addr;
4271 
4272         /* only support when RAS is enabled */
4273         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4274                 return 0;
4275 
4276         /* bail if the compute ring is not ready */
4277         if (!ring->sched.ready)
4278                 return 0;
4279 
4280         total_size =
4281                 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4282         total_size +=
4283                 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4284         total_size = ALIGN(total_size, 256);
4285         vgpr_offset = total_size;
4286         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4287         sgpr_offset = total_size;
4288         total_size += sizeof(sgpr_init_compute_shader);
4289 
4290         /* allocate an indirect buffer to put the commands in */
4291         memset(&ib, 0, sizeof(ib));
4292         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4293         if (r) {
4294                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4295                 return r;
4296         }
4297 
4298         /* load the compute shaders */
4299         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4300                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4301 
4302         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4303                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4304 
4305         /* init the ib length to 0 */
4306         ib.length_dw = 0;
4307 
4308         /* VGPR */
4309         /* write the register state for the compute dispatch */
4310         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
4311                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4312                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4313                                                                 - PACKET3_SET_SH_REG_START;
4314                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4315         }
4316         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4317         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4318         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4319         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4320                                                         - PACKET3_SET_SH_REG_START;
4321         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4322         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4323 
4324         /* write dispatch packet */
4325         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4326         ib.ptr[ib.length_dw++] = 128; /* x */
4327         ib.ptr[ib.length_dw++] = 1; /* y */
4328         ib.ptr[ib.length_dw++] = 1; /* z */
4329         ib.ptr[ib.length_dw++] =
4330                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4331 
4332         /* write CS partial flush packet */
4333         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4334         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4335 
4336         /* SGPR */
4337         /* write the register state for the compute dispatch */
4338         for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
4339                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4340                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
4341                                                                 - PACKET3_SET_SH_REG_START;
4342                 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
4343         }
4344         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4345         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4346         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4347         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4348                                                         - PACKET3_SET_SH_REG_START;
4349         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4350         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4351 
4352         /* write dispatch packet */
4353         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4354         ib.ptr[ib.length_dw++] = 128; /* x */
4355         ib.ptr[ib.length_dw++] = 1; /* y */
4356         ib.ptr[ib.length_dw++] = 1; /* z */
4357         ib.ptr[ib.length_dw++] =
4358                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4359 
4360         /* write CS partial flush packet */
4361         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4362         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4363 
4364         /* shedule the ib on the ring */
4365         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4366         if (r) {
4367                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4368                 goto fail;
4369         }
4370 
4371         /* wait for the GPU to finish processing the IB */
4372         r = dma_fence_wait(f, false);
4373         if (r) {
4374                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4375                 goto fail;
4376         }
4377 
4378         /* read back registers to clear the counters */
4379         mutex_lock(&adev->grbm_idx_mutex);
4380         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
4381                 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
4382                         for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
4383                                 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
4384                                 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
4385                         }
4386                 }
4387         }
4388         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
4389         mutex_unlock(&adev->grbm_idx_mutex);
4390 
4391 fail:
4392         amdgpu_ib_free(adev, &ib, NULL);
4393         dma_fence_put(f);
4394 
4395         return r;
4396 }
4397 
4398 static int gfx_v9_0_early_init(void *handle)
4399 {
4400         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4401 
4402         if (adev->asic_type == CHIP_ARCTURUS)
4403                 adev->gfx.num_gfx_rings = 0;
4404         else
4405                 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4406         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4407         gfx_v9_0_set_ring_funcs(adev);
4408         gfx_v9_0_set_irq_funcs(adev);
4409         gfx_v9_0_set_gds_init(adev);
4410         gfx_v9_0_set_rlc_funcs(adev);
4411 
4412         return 0;
4413 }
4414 
4415 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
4416                 struct ras_err_data *err_data,
4417                 struct amdgpu_iv_entry *entry);
4418 
4419 static int gfx_v9_0_ecc_late_init(void *handle)
4420 {
4421         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4422         struct ras_common_if **ras_if = &adev->gfx.ras_if;
4423         struct ras_ih_if ih_info = {
4424                 .cb = gfx_v9_0_process_ras_data_cb,
4425         };
4426         struct ras_fs_if fs_info = {
4427                 .sysfs_name = "gfx_err_count",
4428                 .debugfs_name = "gfx_err_inject",
4429         };
4430         struct ras_common_if ras_block = {
4431                 .block = AMDGPU_RAS_BLOCK__GFX,
4432                 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
4433                 .sub_block_index = 0,
4434                 .name = "gfx",
4435         };
4436         int r;
4437 
4438         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
4439                 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
4440                 return 0;
4441         }
4442 
4443         r = gfx_v9_0_do_edc_gds_workarounds(adev);
4444         if (r)
4445                 return r;
4446 
4447         /* requires IBs so do in late init after IB pool is initialized */
4448         r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4449         if (r)
4450                 return r;
4451 
4452         /* handle resume path. */
4453         if (*ras_if) {
4454                 /* resend ras TA enable cmd during resume.
4455                  * prepare to handle failure.
4456                  */
4457                 ih_info.head = **ras_if;
4458                 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4459                 if (r) {
4460                         if (r == -EAGAIN) {
4461                                 /* request a gpu reset. will run again. */
4462                                 amdgpu_ras_request_reset_on_boot(adev,
4463                                                 AMDGPU_RAS_BLOCK__GFX);
4464                                 return 0;
4465                         }
4466                         /* fail to enable ras, cleanup all. */
4467                         goto irq;
4468                 }
4469                 /* enable successfully. continue. */
4470                 goto resume;
4471         }
4472 
4473         *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
4474         if (!*ras_if)
4475                 return -ENOMEM;
4476 
4477         **ras_if = ras_block;
4478 
4479         r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4480         if (r) {
4481                 if (r == -EAGAIN) {
4482                         amdgpu_ras_request_reset_on_boot(adev,
4483                                         AMDGPU_RAS_BLOCK__GFX);
4484                         r = 0;
4485                 }
4486                 goto feature;
4487         }
4488 
4489         ih_info.head = **ras_if;
4490         fs_info.head = **ras_if;
4491 
4492         r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
4493         if (r)
4494                 goto interrupt;
4495 
4496         amdgpu_ras_debugfs_create(adev, &fs_info);
4497 
4498         r = amdgpu_ras_sysfs_create(adev, &fs_info);
4499         if (r)
4500                 goto sysfs;
4501 resume:
4502         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
4503         if (r)
4504                 goto irq;
4505 
4506         return 0;
4507 irq:
4508         amdgpu_ras_sysfs_remove(adev, *ras_if);
4509 sysfs:
4510         amdgpu_ras_debugfs_remove(adev, *ras_if);
4511         amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
4512 interrupt:
4513         amdgpu_ras_feature_enable(adev, *ras_if, 0);
4514 feature:
4515         kfree(*ras_if);
4516         *ras_if = NULL;
4517         return r;
4518 }
4519 
4520 static int gfx_v9_0_late_init(void *handle)
4521 {
4522         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4523         int r;
4524 
4525         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4526         if (r)
4527                 return r;
4528 
4529         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4530         if (r)
4531                 return r;
4532 
4533         r = gfx_v9_0_ecc_late_init(handle);
4534         if (r)
4535                 return r;
4536 
4537         return 0;
4538 }
4539 
4540 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4541 {
4542         uint32_t rlc_setting;
4543 
4544         /* if RLC is not enabled, do nothing */
4545         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4546         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4547                 return false;
4548 
4549         return true;
4550 }
4551 
4552 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4553 {
4554         uint32_t data;
4555         unsigned i;
4556 
4557         data = RLC_SAFE_MODE__CMD_MASK;
4558         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4559         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4560 
4561         /* wait for RLC_SAFE_MODE */
4562         for (i = 0; i < adev->usec_timeout; i++) {
4563                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4564                         break;
4565                 udelay(1);
4566         }
4567 }
4568 
4569 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4570 {
4571         uint32_t data;
4572 
4573         data = RLC_SAFE_MODE__CMD_MASK;
4574         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4575 }
4576 
4577 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4578                                                 bool enable)
4579 {
4580         amdgpu_gfx_rlc_enter_safe_mode(adev);
4581 
4582         if (is_support_sw_smu(adev) && !enable)
4583                 smu_set_gfx_cgpg(&adev->smu, enable);
4584 
4585         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4586                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4587                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4588                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4589         } else {
4590                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4591                 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4592         }
4593 
4594         amdgpu_gfx_rlc_exit_safe_mode(adev);
4595 }
4596 
4597 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4598                                                 bool enable)
4599 {
4600         /* TODO: double check if we need to perform under safe mode */
4601         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4602 
4603         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4604                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4605         else
4606                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4607 
4608         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4609                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4610         else
4611                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4612 
4613         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4614 }
4615 
4616 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4617                                                       bool enable)
4618 {
4619         uint32_t data, def;
4620 
4621         amdgpu_gfx_rlc_enter_safe_mode(adev);
4622 
4623         /* It is disabled by HW by default */
4624         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4625                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4626                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4627 
4628                 if (adev->asic_type != CHIP_VEGA12)
4629                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4630 
4631                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4632                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4633                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4634 
4635                 /* only for Vega10 & Raven1 */
4636                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4637 
4638                 if (def != data)
4639                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4640 
4641                 /* MGLS is a global flag to control all MGLS in GFX */
4642                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4643                         /* 2 - RLC memory Light sleep */
4644                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4645                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4646                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4647                                 if (def != data)
4648                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4649                         }
4650                         /* 3 - CP memory Light sleep */
4651                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4652                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4653                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4654                                 if (def != data)
4655                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4656                         }
4657                 }
4658         } else {
4659                 /* 1 - MGCG_OVERRIDE */
4660                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4661 
4662                 if (adev->asic_type != CHIP_VEGA12)
4663                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4664 
4665                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4666                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4667                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4668                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4669 
4670                 if (def != data)
4671                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4672 
4673                 /* 2 - disable MGLS in RLC */
4674                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4675                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4676                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4677                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4678                 }
4679 
4680                 /* 3 - disable MGLS in CP */
4681                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4682                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4683                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4684                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4685                 }
4686         }
4687 
4688         amdgpu_gfx_rlc_exit_safe_mode(adev);
4689 }
4690 
4691 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4692                                            bool enable)
4693 {
4694         uint32_t data, def;
4695 
4696         if (adev->asic_type == CHIP_ARCTURUS)
4697                 return;
4698 
4699         amdgpu_gfx_rlc_enter_safe_mode(adev);
4700 
4701         /* Enable 3D CGCG/CGLS */
4702         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4703                 /* write cmd to clear cgcg/cgls ov */
4704                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4705                 /* unset CGCG override */
4706                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4707                 /* update CGCG and CGLS override bits */
4708                 if (def != data)
4709                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4710 
4711                 /* enable 3Dcgcg FSM(0x0000363f) */
4712                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4713 
4714                 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4715                         RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4716                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4717                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4718                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4719                 if (def != data)
4720                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4721 
4722                 /* set IDLE_POLL_COUNT(0x00900100) */
4723                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4724                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4725                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4726                 if (def != data)
4727                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4728         } else {
4729                 /* Disable CGCG/CGLS */
4730                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4731                 /* disable cgcg, cgls should be disabled */
4732                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4733                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4734                 /* disable cgcg and cgls in FSM */
4735                 if (def != data)
4736                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4737         }
4738 
4739         amdgpu_gfx_rlc_exit_safe_mode(adev);
4740 }
4741 
4742 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4743                                                       bool enable)
4744 {
4745         uint32_t def, data;
4746 
4747         amdgpu_gfx_rlc_enter_safe_mode(adev);
4748 
4749         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4750                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4751                 /* unset CGCG override */
4752                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4753                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4754                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4755                 else
4756                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4757                 /* update CGCG and CGLS override bits */
4758                 if (def != data)
4759                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4760 
4761                 /* enable cgcg FSM(0x0000363F) */
4762                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4763 
4764                 if (adev->asic_type == CHIP_ARCTURUS)
4765                         data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4766                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4767                 else
4768                         data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4769                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4770                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4771                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4772                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4773                 if (def != data)
4774                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4775 
4776                 /* set IDLE_POLL_COUNT(0x00900100) */
4777                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4778                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4779                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4780                 if (def != data)
4781                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4782         } else {
4783                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4784                 /* reset CGCG/CGLS bits */
4785                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4786                 /* disable cgcg and cgls in FSM */
4787                 if (def != data)
4788                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4789         }
4790 
4791         amdgpu_gfx_rlc_exit_safe_mode(adev);
4792 }
4793 
4794 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4795                                             bool enable)
4796 {
4797         if (enable) {
4798                 /* CGCG/CGLS should be enabled after MGCG/MGLS
4799                  * ===  MGCG + MGLS ===
4800                  */
4801                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4802                 /* ===  CGCG /CGLS for GFX 3D Only === */
4803                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4804                 /* ===  CGCG + CGLS === */
4805                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4806         } else {
4807                 /* CGCG/CGLS should be disabled before MGCG/MGLS
4808                  * ===  CGCG + CGLS ===
4809                  */
4810                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4811                 /* ===  CGCG /CGLS for GFX 3D Only === */
4812                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4813                 /* ===  MGCG + MGLS === */
4814                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4815         }
4816         return 0;
4817 }
4818 
4819 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4820         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4821         .set_safe_mode = gfx_v9_0_set_safe_mode,
4822         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4823         .init = gfx_v9_0_rlc_init,
4824         .get_csb_size = gfx_v9_0_get_csb_size,
4825         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4826         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4827         .resume = gfx_v9_0_rlc_resume,
4828         .stop = gfx_v9_0_rlc_stop,
4829         .reset = gfx_v9_0_rlc_reset,
4830         .start = gfx_v9_0_rlc_start
4831 };
4832 
4833 static int gfx_v9_0_set_powergating_state(void *handle,
4834                                           enum amd_powergating_state state)
4835 {
4836         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4837         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4838 
4839         switch (adev->asic_type) {
4840         case CHIP_RAVEN:
4841         case CHIP_RENOIR:
4842                 if (!enable)
4843                         amdgpu_gfx_off_ctrl(adev, false);
4844 
4845                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4846                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4847                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4848                 } else {
4849                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4850                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4851                 }
4852 
4853                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4854                         gfx_v9_0_enable_cp_power_gating(adev, true);
4855                 else
4856                         gfx_v9_0_enable_cp_power_gating(adev, false);
4857 
4858                 /* update gfx cgpg state */
4859                 if (is_support_sw_smu(adev) && enable)
4860                         smu_set_gfx_cgpg(&adev->smu, enable);
4861                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4862 
4863                 /* update mgcg state */
4864                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4865 
4866                 if (enable)
4867                         amdgpu_gfx_off_ctrl(adev, true);
4868                 break;
4869         case CHIP_VEGA12:
4870                 amdgpu_gfx_off_ctrl(adev, enable);
4871                 break;
4872         default:
4873                 break;
4874         }
4875 
4876         return 0;
4877 }
4878 
4879 static int gfx_v9_0_set_clockgating_state(void *handle,
4880                                           enum amd_clockgating_state state)
4881 {
4882         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4883 
4884         if (amdgpu_sriov_vf(adev))
4885                 return 0;
4886 
4887         switch (adev->asic_type) {
4888         case CHIP_VEGA10:
4889         case CHIP_VEGA12:
4890         case CHIP_VEGA20:
4891         case CHIP_RAVEN:
4892         case CHIP_ARCTURUS:
4893         case CHIP_RENOIR:
4894                 gfx_v9_0_update_gfx_clock_gating(adev,
4895                                                  state == AMD_CG_STATE_GATE ? true : false);
4896                 break;
4897         default:
4898                 break;
4899         }
4900         return 0;
4901 }
4902 
4903 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4904 {
4905         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4906         int data;
4907 
4908         if (amdgpu_sriov_vf(adev))
4909                 *flags = 0;
4910 
4911         /* AMD_CG_SUPPORT_GFX_MGCG */
4912         data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4913         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4914                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4915 
4916         /* AMD_CG_SUPPORT_GFX_CGCG */
4917         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4918         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4919                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4920 
4921         /* AMD_CG_SUPPORT_GFX_CGLS */
4922         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4923                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4924 
4925         /* AMD_CG_SUPPORT_GFX_RLC_LS */
4926         data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4927         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4928                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4929 
4930         /* AMD_CG_SUPPORT_GFX_CP_LS */
4931         data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4932         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4933                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4934 
4935         if (adev->asic_type != CHIP_ARCTURUS) {
4936                 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4937                 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4938                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4939                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4940 
4941                 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4942                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4943                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4944         }
4945 }
4946 
4947 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4948 {
4949         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4950 }
4951 
4952 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4953 {
4954         struct amdgpu_device *adev = ring->adev;
4955         u64 wptr;
4956 
4957         /* XXX check if swapping is necessary on BE */
4958         if (ring->use_doorbell) {
4959                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4960         } else {
4961                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4962                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4963         }
4964 
4965         return wptr;
4966 }
4967 
4968 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4969 {
4970         struct amdgpu_device *adev = ring->adev;
4971 
4972         if (ring->use_doorbell) {
4973                 /* XXX check if swapping is necessary on BE */
4974                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4975                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4976         } else {
4977                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4978                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4979         }
4980 }
4981 
4982 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4983 {
4984         struct amdgpu_device *adev = ring->adev;
4985         u32 ref_and_mask, reg_mem_engine;
4986         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4987 
4988         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4989                 switch (ring->me) {
4990                 case 1:
4991                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4992                         break;
4993                 case 2:
4994                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4995                         break;
4996                 default:
4997                         return;
4998                 }
4999                 reg_mem_engine = 0;
5000         } else {
5001                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5002                 reg_mem_engine = 1; /* pfp */
5003         }
5004 
5005         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5006                               adev->nbio_funcs->get_hdp_flush_req_offset(adev),
5007                               adev->nbio_funcs->get_hdp_flush_done_offset(adev),
5008                               ref_and_mask, ref_and_mask, 0x20);
5009 }
5010 
5011 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5012                                         struct amdgpu_job *job,
5013                                         struct amdgpu_ib *ib,
5014                                         uint32_t flags)
5015 {
5016         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5017         u32 header, control = 0;
5018 
5019         if (ib->flags & AMDGPU_IB_FLAG_CE)
5020                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5021         else
5022                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5023 
5024         control |= ib->length_dw | (vmid << 24);
5025 
5026         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5027                 control |= INDIRECT_BUFFER_PRE_ENB(1);
5028 
5029                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
5030                         gfx_v9_0_ring_emit_de_meta(ring);
5031         }
5032 
5033         amdgpu_ring_write(ring, header);
5034         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5035         amdgpu_ring_write(ring,
5036 #ifdef __BIG_ENDIAN
5037                 (2 << 0) |
5038 #endif
5039                 lower_32_bits(ib->gpu_addr));
5040         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5041         amdgpu_ring_write(ring, control);
5042 }
5043 
5044 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5045                                           struct amdgpu_job *job,
5046                                           struct amdgpu_ib *ib,
5047                                           uint32_t flags)
5048 {
5049         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5050         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5051 
5052         /* Currently, there is a high possibility to get wave ID mismatch
5053          * between ME and GDS, leading to a hw deadlock, because ME generates
5054          * different wave IDs than the GDS expects. This situation happens
5055          * randomly when at least 5 compute pipes use GDS ordered append.
5056          * The wave IDs generated by ME are also wrong after suspend/resume.
5057          * Those are probably bugs somewhere else in the kernel driver.
5058          *
5059          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5060          * GDS to 0 for this ring (me/pipe).
5061          */
5062         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5063                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5064                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5065                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5066         }
5067 
5068         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5069         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5070         amdgpu_ring_write(ring,
5071 #ifdef __BIG_ENDIAN
5072                                 (2 << 0) |
5073 #endif
5074                                 lower_32_bits(ib->gpu_addr));
5075         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5076         amdgpu_ring_write(ring, control);
5077 }
5078 
5079 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5080                                      u64 seq, unsigned flags)
5081 {
5082         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5083         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5084         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5085 
5086         /* RELEASE_MEM - flush caches, send int */
5087         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5088         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5089                                                EOP_TC_NC_ACTION_EN) :
5090                                               (EOP_TCL1_ACTION_EN |
5091                                                EOP_TC_ACTION_EN |
5092                                                EOP_TC_WB_ACTION_EN |
5093                                                EOP_TC_MD_ACTION_EN)) |
5094                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5095                                  EVENT_INDEX(5)));
5096         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5097 
5098         /*
5099          * the address should be Qword aligned if 64bit write, Dword
5100          * aligned if only send 32bit data low (discard data high)
5101          */
5102         if (write64bit)
5103                 BUG_ON(addr & 0x7);
5104         else
5105                 BUG_ON(addr & 0x3);
5106         amdgpu_ring_write(ring, lower_32_bits(addr));
5107         amdgpu_ring_write(ring, upper_32_bits(addr));
5108         amdgpu_ring_write(ring, lower_32_bits(seq));
5109         amdgpu_ring_write(ring, upper_32_bits(seq));
5110         amdgpu_ring_write(ring, 0);
5111 }
5112 
5113 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5114 {
5115         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5116         uint32_t seq = ring->fence_drv.sync_seq;
5117         uint64_t addr = ring->fence_drv.gpu_addr;
5118 
5119         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5120                               lower_32_bits(addr), upper_32_bits(addr),
5121                               seq, 0xffffffff, 4);
5122 }
5123 
5124 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5125                                         unsigned vmid, uint64_t pd_addr)
5126 {
5127         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5128 
5129         /* compute doesn't have PFP */
5130         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5131                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5132                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5133                 amdgpu_ring_write(ring, 0x0);
5134         }
5135 }
5136 
5137 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5138 {
5139         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5140 }
5141 
5142 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5143 {
5144         u64 wptr;
5145 
5146         /* XXX check if swapping is necessary on BE */
5147         if (ring->use_doorbell)
5148                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5149         else
5150                 BUG();
5151         return wptr;
5152 }
5153 
5154 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
5155                                            bool acquire)
5156 {
5157         struct amdgpu_device *adev = ring->adev;
5158         int pipe_num, tmp, reg;
5159         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
5160 
5161         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
5162 
5163         /* first me only has 2 entries, GFX and HP3D */
5164         if (ring->me > 0)
5165                 pipe_num -= 2;
5166 
5167         reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
5168         tmp = RREG32(reg);
5169         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
5170         WREG32(reg, tmp);
5171 }
5172 
5173 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
5174                                             struct amdgpu_ring *ring,
5175                                             bool acquire)
5176 {
5177         int i, pipe;
5178         bool reserve;
5179         struct amdgpu_ring *iring;
5180 
5181         mutex_lock(&adev->gfx.pipe_reserve_mutex);
5182         pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
5183         if (acquire)
5184                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5185         else
5186                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5187 
5188         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
5189                 /* Clear all reservations - everyone reacquires all resources */
5190                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
5191                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
5192                                                        true);
5193 
5194                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
5195                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
5196                                                        true);
5197         } else {
5198                 /* Lower all pipes without a current reservation */
5199                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
5200                         iring = &adev->gfx.gfx_ring[i];
5201                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5202                                                            iring->me,
5203                                                            iring->pipe,
5204                                                            0);
5205                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5206                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5207                 }
5208 
5209                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
5210                         iring = &adev->gfx.compute_ring[i];
5211                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5212                                                            iring->me,
5213                                                            iring->pipe,
5214                                                            0);
5215                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5216                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5217                 }
5218         }
5219 
5220         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
5221 }
5222 
5223 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
5224                                       struct amdgpu_ring *ring,
5225                                       bool acquire)
5226 {
5227         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
5228         uint32_t queue_priority = acquire ? 0xf : 0x0;
5229 
5230         mutex_lock(&adev->srbm_mutex);
5231         soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5232 
5233         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
5234         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
5235 
5236         soc15_grbm_select(adev, 0, 0, 0, 0);
5237         mutex_unlock(&adev->srbm_mutex);
5238 }
5239 
5240 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
5241                                                enum drm_sched_priority priority)
5242 {
5243         struct amdgpu_device *adev = ring->adev;
5244         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
5245 
5246         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
5247                 return;
5248 
5249         gfx_v9_0_hqd_set_priority(adev, ring, acquire);
5250         gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
5251 }
5252 
5253 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5254 {
5255         struct amdgpu_device *adev = ring->adev;
5256 
5257         /* XXX check if swapping is necessary on BE */
5258         if (ring->use_doorbell) {
5259                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5260                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5261         } else{
5262                 BUG(); /* only DOORBELL method supported on gfx9 now */
5263         }
5264 }
5265 
5266 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5267                                          u64 seq, unsigned int flags)
5268 {
5269         struct amdgpu_device *adev = ring->adev;
5270 
5271         /* we only allocate 32bit for each seq wb address */
5272         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5273 
5274         /* write fence seq to the "addr" */
5275         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5276         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5277                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5278         amdgpu_ring_write(ring, lower_32_bits(addr));
5279         amdgpu_ring_write(ring, upper_32_bits(addr));
5280         amdgpu_ring_write(ring, lower_32_bits(seq));
5281 
5282         if (flags & AMDGPU_FENCE_FLAG_INT) {
5283                 /* set register to trigger INT */
5284                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5285                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5286                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5287                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5288                 amdgpu_ring_write(ring, 0);
5289                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5290         }
5291 }
5292 
5293 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5294 {
5295         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5296         amdgpu_ring_write(ring, 0);
5297 }
5298 
5299 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5300 {
5301         struct v9_ce_ib_state ce_payload = {0};
5302         uint64_t csa_addr;
5303         int cnt;
5304 
5305         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5306         csa_addr = amdgpu_csa_vaddr(ring->adev);
5307 
5308         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5309         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5310                                  WRITE_DATA_DST_SEL(8) |
5311                                  WR_CONFIRM) |
5312                                  WRITE_DATA_CACHE_POLICY(0));
5313         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5314         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5315         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5316 }
5317 
5318 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5319 {
5320         struct v9_de_ib_state de_payload = {0};
5321         uint64_t csa_addr, gds_addr;
5322         int cnt;
5323 
5324         csa_addr = amdgpu_csa_vaddr(ring->adev);
5325         gds_addr = csa_addr + 4096;
5326         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5327         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5328 
5329         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5330         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5331         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5332                                  WRITE_DATA_DST_SEL(8) |
5333                                  WR_CONFIRM) |
5334                                  WRITE_DATA_CACHE_POLICY(0));
5335         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5336         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5337         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5338 }
5339 
5340 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5341 {
5342         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5343         amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5344 }
5345 
5346 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5347 {
5348         uint32_t dw2 = 0;
5349 
5350         if (amdgpu_sriov_vf(ring->adev))
5351                 gfx_v9_0_ring_emit_ce_meta(ring);
5352 
5353         gfx_v9_0_ring_emit_tmz(ring, true);
5354 
5355         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5356         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5357                 /* set load_global_config & load_global_uconfig */
5358                 dw2 |= 0x8001;
5359                 /* set load_cs_sh_regs */
5360                 dw2 |= 0x01000000;
5361                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5362                 dw2 |= 0x10002;
5363 
5364                 /* set load_ce_ram if preamble presented */
5365                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5366                         dw2 |= 0x10000000;
5367         } else {
5368                 /* still load_ce_ram if this is the first time preamble presented
5369                  * although there is no context switch happens.
5370                  */
5371                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5372                         dw2 |= 0x10000000;
5373         }
5374 
5375         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5376         amdgpu_ring_write(ring, dw2);
5377         amdgpu_ring_write(ring, 0);
5378 }
5379 
5380 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5381 {
5382         unsigned ret;
5383         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5384         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5385         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5386         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5387         ret = ring->wptr & ring->buf_mask;
5388         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5389         return ret;
5390 }
5391 
5392 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5393 {
5394         unsigned cur;
5395         BUG_ON(offset > ring->buf_mask);
5396         BUG_ON(ring->ring[offset] != 0x55aa55aa);
5397 
5398         cur = (ring->wptr & ring->buf_mask) - 1;
5399         if (likely(cur > offset))
5400                 ring->ring[offset] = cur - offset;
5401         else
5402                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5403 }
5404 
5405 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5406 {
5407         struct amdgpu_device *adev = ring->adev;
5408 
5409         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5410         amdgpu_ring_write(ring, 0 |     /* src: register*/
5411                                 (5 << 8) |      /* dst: memory */
5412                                 (1 << 20));     /* write confirm */
5413         amdgpu_ring_write(ring, reg);
5414         amdgpu_ring_write(ring, 0);
5415         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5416                                 adev->virt.reg_val_offs * 4));
5417         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5418                                 adev->virt.reg_val_offs * 4));
5419 }
5420 
5421 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5422                                     uint32_t val)
5423 {
5424         uint32_t cmd = 0;
5425 
5426         switch (ring->funcs->type) {
5427         case AMDGPU_RING_TYPE_GFX:
5428                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5429                 break;
5430         case AMDGPU_RING_TYPE_KIQ:
5431                 cmd = (1 << 16); /* no inc addr */
5432                 break;
5433         default:
5434                 cmd = WR_CONFIRM;
5435                 break;
5436         }
5437         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5438         amdgpu_ring_write(ring, cmd);
5439         amdgpu_ring_write(ring, reg);
5440         amdgpu_ring_write(ring, 0);
5441         amdgpu_ring_write(ring, val);
5442 }
5443 
5444 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5445                                         uint32_t val, uint32_t mask)
5446 {
5447         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5448 }
5449 
5450 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5451                                                   uint32_t reg0, uint32_t reg1,
5452                                                   uint32_t ref, uint32_t mask)
5453 {
5454         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5455         struct amdgpu_device *adev = ring->adev;
5456         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5457                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5458 
5459         if (fw_version_ok)
5460                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5461                                       ref, mask, 0x20);
5462         else
5463                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5464                                                            ref, mask);
5465 }
5466 
5467 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5468 {
5469         struct amdgpu_device *adev = ring->adev;
5470         uint32_t value = 0;
5471 
5472         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5473         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5474         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5475         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5476         WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5477 }
5478 
5479 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5480                                                  enum amdgpu_interrupt_state state)
5481 {
5482         switch (state) {
5483         case AMDGPU_IRQ_STATE_DISABLE:
5484         case AMDGPU_IRQ_STATE_ENABLE:
5485                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5486                                TIME_STAMP_INT_ENABLE,
5487                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5488                 break;
5489         default:
5490                 break;
5491         }
5492 }
5493 
5494 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5495                                                      int me, int pipe,
5496                                                      enum amdgpu_interrupt_state state)
5497 {
5498         u32 mec_int_cntl, mec_int_cntl_reg;
5499 
5500         /*
5501          * amdgpu controls only the first MEC. That's why this function only
5502          * handles the setting of interrupts for this specific MEC. All other
5503          * pipes' interrupts are set by amdkfd.
5504          */
5505 
5506         if (me == 1) {
5507                 switch (pipe) {
5508                 case 0:
5509                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5510                         break;
5511                 case 1:
5512                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5513                         break;
5514                 case 2:
5515                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5516                         break;
5517                 case 3:
5518                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5519                         break;
5520                 default:
5521                         DRM_DEBUG("invalid pipe %d\n", pipe);
5522                         return;
5523                 }
5524         } else {
5525                 DRM_DEBUG("invalid me %d\n", me);
5526                 return;
5527         }
5528 
5529         switch (state) {
5530         case AMDGPU_IRQ_STATE_DISABLE:
5531                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5532                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5533                                              TIME_STAMP_INT_ENABLE, 0);
5534                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5535                 break;
5536         case AMDGPU_IRQ_STATE_ENABLE:
5537                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5538                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5539                                              TIME_STAMP_INT_ENABLE, 1);
5540                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5541                 break;
5542         default:
5543                 break;
5544         }
5545 }
5546 
5547 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5548                                              struct amdgpu_irq_src *source,
5549                                              unsigned type,
5550                                              enum amdgpu_interrupt_state state)
5551 {
5552         switch (state) {
5553         case AMDGPU_IRQ_STATE_DISABLE:
5554         case AMDGPU_IRQ_STATE_ENABLE:
5555                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5556                                PRIV_REG_INT_ENABLE,
5557                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5558                 break;
5559         default:
5560                 break;
5561         }
5562 
5563         return 0;
5564 }
5565 
5566 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5567                                               struct amdgpu_irq_src *source,
5568                                               unsigned type,
5569                                               enum amdgpu_interrupt_state state)
5570 {
5571         switch (state) {
5572         case AMDGPU_IRQ_STATE_DISABLE:
5573         case AMDGPU_IRQ_STATE_ENABLE:
5574                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5575                                PRIV_INSTR_INT_ENABLE,
5576                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5577         default:
5578                 break;
5579         }
5580 
5581         return 0;
5582 }
5583 
5584 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
5585         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5586                         CP_ECC_ERROR_INT_ENABLE, 1)
5587 
5588 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
5589         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5590                         CP_ECC_ERROR_INT_ENABLE, 0)
5591 
5592 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5593                                               struct amdgpu_irq_src *source,
5594                                               unsigned type,
5595                                               enum amdgpu_interrupt_state state)
5596 {
5597         switch (state) {
5598         case AMDGPU_IRQ_STATE_DISABLE:
5599                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5600                                 CP_ECC_ERROR_INT_ENABLE, 0);
5601                 DISABLE_ECC_ON_ME_PIPE(1, 0);
5602                 DISABLE_ECC_ON_ME_PIPE(1, 1);
5603                 DISABLE_ECC_ON_ME_PIPE(1, 2);
5604                 DISABLE_ECC_ON_ME_PIPE(1, 3);
5605                 break;
5606 
5607         case AMDGPU_IRQ_STATE_ENABLE:
5608                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5609                                 CP_ECC_ERROR_INT_ENABLE, 1);
5610                 ENABLE_ECC_ON_ME_PIPE(1, 0);
5611                 ENABLE_ECC_ON_ME_PIPE(1, 1);
5612                 ENABLE_ECC_ON_ME_PIPE(1, 2);
5613                 ENABLE_ECC_ON_ME_PIPE(1, 3);
5614                 break;
5615         default:
5616                 break;
5617         }
5618 
5619         return 0;
5620 }
5621 
5622 
5623 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5624                                             struct amdgpu_irq_src *src,
5625                                             unsigned type,
5626                                             enum amdgpu_interrupt_state state)
5627 {
5628         switch (type) {
5629         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5630                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5631                 break;
5632         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5633                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5634                 break;
5635         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5636                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5637                 break;
5638         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5639                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5640                 break;
5641         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5642                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5643                 break;
5644         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5645                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5646                 break;
5647         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5648                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5649                 break;
5650         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5651                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5652                 break;
5653         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5654                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5655                 break;
5656         default:
5657                 break;
5658         }
5659         return 0;
5660 }
5661 
5662 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5663                             struct amdgpu_irq_src *source,
5664                             struct amdgpu_iv_entry *entry)
5665 {
5666         int i;
5667         u8 me_id, pipe_id, queue_id;
5668         struct amdgpu_ring *ring;
5669 
5670         DRM_DEBUG("IH: CP EOP\n");
5671         me_id = (entry->ring_id & 0x0c) >> 2;
5672         pipe_id = (entry->ring_id & 0x03) >> 0;
5673         queue_id = (entry->ring_id & 0x70) >> 4;
5674 
5675         switch (me_id) {
5676         case 0:
5677                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5678                 break;
5679         case 1:
5680         case 2:
5681                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5682                         ring = &adev->gfx.compute_ring[i];
5683                         /* Per-queue interrupt is supported for MEC starting from VI.
5684                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5685                           */
5686                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5687                                 amdgpu_fence_process(ring);
5688                 }
5689                 break;
5690         }
5691         return 0;
5692 }
5693 
5694 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5695                            struct amdgpu_iv_entry *entry)
5696 {
5697         u8 me_id, pipe_id, queue_id;
5698         struct amdgpu_ring *ring;
5699         int i;
5700 
5701         me_id = (entry->ring_id & 0x0c) >> 2;
5702         pipe_id = (entry->ring_id & 0x03) >> 0;
5703         queue_id = (entry->ring_id & 0x70) >> 4;
5704 
5705         switch (me_id) {
5706         case 0:
5707                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5708                 break;
5709         case 1:
5710         case 2:
5711                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5712                         ring = &adev->gfx.compute_ring[i];
5713                         if (ring->me == me_id && ring->pipe == pipe_id &&
5714                             ring->queue == queue_id)
5715                                 drm_sched_fault(&ring->sched);
5716                 }
5717                 break;
5718         }
5719 }
5720 
5721 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5722                                  struct amdgpu_irq_src *source,
5723                                  struct amdgpu_iv_entry *entry)
5724 {
5725         DRM_ERROR("Illegal register access in command stream\n");
5726         gfx_v9_0_fault(adev, entry);
5727         return 0;
5728 }
5729 
5730 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5731                                   struct amdgpu_irq_src *source,
5732                                   struct amdgpu_iv_entry *entry)
5733 {
5734         DRM_ERROR("Illegal instruction in command stream\n");
5735         gfx_v9_0_fault(adev, entry);
5736         return 0;
5737 }
5738 
5739 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5740                 struct ras_err_data *err_data,
5741                 struct amdgpu_iv_entry *entry)
5742 {
5743         /* TODO ue will trigger an interrupt. */
5744         kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5745         if (adev->gfx.funcs->query_ras_error_count)
5746                 adev->gfx.funcs->query_ras_error_count(adev, err_data);
5747         amdgpu_ras_reset_gpu(adev, 0);
5748         return AMDGPU_RAS_SUCCESS;
5749 }
5750 
5751 static const struct {
5752         const char *name;
5753         uint32_t ip;
5754         uint32_t inst;
5755         uint32_t seg;
5756         uint32_t reg_offset;
5757         uint32_t per_se_instance;
5758         int32_t num_instance;
5759         uint32_t sec_count_mask;
5760         uint32_t ded_count_mask;
5761 } gfx_ras_edc_regs[] = {
5762         { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1,
5763           REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5764           REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, DED_COUNT) },
5765         { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1,
5766           REG_FIELD_MASK(CPC_EDC_UCODE_CNT, SEC_COUNT),
5767           REG_FIELD_MASK(CPC_EDC_UCODE_CNT, DED_COUNT) },
5768         { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5769           REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME1), 0 },
5770         { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5771           REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME2), 0 },
5772         { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1,
5773           REG_FIELD_MASK(CPF_EDC_TAG_CNT, SEC_COUNT),
5774           REG_FIELD_MASK(CPF_EDC_TAG_CNT, DED_COUNT) },
5775         { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5776           REG_FIELD_MASK(CPG_EDC_DMA_CNT, ROQ_COUNT), 0 },
5777         { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5778           REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5779           REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_DED_COUNT) },
5780         { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1,
5781           REG_FIELD_MASK(CPG_EDC_TAG_CNT, SEC_COUNT),
5782           REG_FIELD_MASK(CPG_EDC_TAG_CNT, DED_COUNT) },
5783         { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1,
5784           REG_FIELD_MASK(DC_EDC_CSINVOC_CNT, COUNT_ME1), 0 },
5785         { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1,
5786           REG_FIELD_MASK(DC_EDC_RESTORE_CNT, COUNT_ME1), 0 },
5787         { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1,
5788           REG_FIELD_MASK(DC_EDC_STATE_CNT, COUNT_ME1), 0 },
5789         { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5790           REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_SEC),
5791           REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_DED) },
5792         { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5793           REG_FIELD_MASK(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 0 },
5794         { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5795           0, 1, REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5796           REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) },
5797         { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5798           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5799           REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5800           REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) },
5801         { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5802           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5803           REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 0 },
5804         { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5805           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5806           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5807           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) },
5808         { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5809           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5810           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5811           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) },
5812         { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5813           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5814           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5815           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) },
5816         { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5817           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5818           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5819           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) },
5820         { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 1, 1,
5821           REG_FIELD_MASK(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 0 },
5822         { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5823           REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5824           REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) },
5825         { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5826           REG_FIELD_MASK(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 0 },
5827         { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5828           REG_FIELD_MASK(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 0 },
5829         { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5830           REG_FIELD_MASK(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 0 },
5831         { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5832           REG_FIELD_MASK(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 0 },
5833         { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5834           REG_FIELD_MASK(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 0 },
5835         { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5836           REG_FIELD_MASK(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 0 },
5837         { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5838           REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5839           REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) },
5840         { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5841           REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5842           REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) },
5843         { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5844           REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5845           REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) },
5846         { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5847           REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5848           REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) },
5849         { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5850           REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5851           REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) },
5852         { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5853           REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 0 },
5854         { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5855           REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 0 },
5856         { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5857           REG_FIELD_MASK(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 0 },
5858         { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5859           REG_FIELD_MASK(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 0 },
5860         { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5861           REG_FIELD_MASK(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 0 },
5862         { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5863           REG_FIELD_MASK(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 0 },
5864         { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5865           REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 0 },
5866         { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5867           REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 0 },
5868         { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5869           16, REG_FIELD_MASK(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 0 },
5870         { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5871           0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5872           0 },
5873         { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5874           16, REG_FIELD_MASK(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 0 },
5875         { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5876           0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5877           0 },
5878         { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5879           16, REG_FIELD_MASK(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 0 },
5880         { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 72,
5881           REG_FIELD_MASK(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 0 },
5882         { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5883           REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5884           REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) },
5885         { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5886           REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5887           REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) },
5888         { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5889           REG_FIELD_MASK(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 0 },
5890         { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5891           REG_FIELD_MASK(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 0 },
5892         { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5893           REG_FIELD_MASK(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0 },
5894         { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5895           REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5896           REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) },
5897         { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5898           REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5899           REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) },
5900         { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5901           REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5902           REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) },
5903         { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5904           REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5905           REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) },
5906         { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5907           REG_FIELD_MASK(TD_EDC_CNT, CS_FIFO_SED_COUNT), 0 },
5908         { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5909           REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5910           REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_DED_COUNT) },
5911         { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5912           REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5913           REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_DED_COUNT) },
5914         { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5915           REG_FIELD_MASK(SQ_EDC_CNT, SGPR_SEC_COUNT),
5916           REG_FIELD_MASK(SQ_EDC_CNT, SGPR_DED_COUNT) },
5917         { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5918           REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5919           REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_DED_COUNT) },
5920         { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5921           REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5922           REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_DED_COUNT) },
5923         { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5924           REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5925           REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_DED_COUNT) },
5926         { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5927           REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5928           REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_DED_COUNT) },
5929         { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5930           1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5931           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) },
5932         { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5933           6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5934           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) },
5935         { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5936           1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5937           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) },
5938         { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5939           6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5940           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) },
5941         { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5942           1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5943           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) },
5944         { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5945           6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5946           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) },
5947         { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5948           6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5949           REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) },
5950         { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5951           6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5952           REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) },
5953         { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5954           6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5955           REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) },
5956         { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5957           6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5958           REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) },
5959         { "SQC_INST_BANKA_UTCL1_MISS_FIFO",
5960           SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5961           REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5962           0 },
5963         { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5964           6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 0 },
5965         { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5966           6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 0 },
5967         { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5968           6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 0 },
5969         { "SQC_DATA_BANKA_DIRTY_BIT_RAM",
5970           SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5971           REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 0 },
5972         { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5973           REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5974           REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) },
5975         { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5976           6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5977           REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) },
5978         { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5979           6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5980           REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) },
5981         { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5982           6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5983           REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) },
5984         { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5985           6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5986           REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) },
5987         { "SQC_INST_BANKB_UTCL1_MISS_FIFO",
5988           SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5989           REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5990           0 },
5991         { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5992           6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 0 },
5993         { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5994           6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 0 },
5995         { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5996           6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 0 },
5997         { "SQC_DATA_BANKB_DIRTY_BIT_RAM",
5998           SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5999           REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 0 },
6000         { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6001           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6002           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) },
6003         { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6004           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6005           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) },
6006         { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6007           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6008           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) },
6009         { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6010           REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6011           REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) },
6012         { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6013           REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6014           REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) },
6015         { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6016           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0 },
6017         { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6018           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0 },
6019         { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6020           REG_FIELD_MASK(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0 },
6021         { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6022           REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0 },
6023         { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6024           REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0 },
6025         { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6026           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6027           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) },
6028         { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6029           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6030           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) },
6031         { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6032           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6033           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) },
6034         { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6035           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0 },
6036         { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6037           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0 },
6038         { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6039           REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 0 },
6040         { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6041           REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 0 },
6042         { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6043           REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 0 },
6044         { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6045           REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 0 },
6046 };
6047 
6048 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6049                                      void *inject_if)
6050 {
6051         struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6052         int ret;
6053         struct ta_ras_trigger_error_input block_info = { 0 };
6054 
6055         if (adev->asic_type != CHIP_VEGA20)
6056                 return -EINVAL;
6057 
6058         if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6059                 return -EINVAL;
6060 
6061         if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6062                 return -EPERM;
6063 
6064         if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6065               info->head.type)) {
6066                 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6067                         ras_gfx_subblocks[info->head.sub_block_index].name,
6068                         info->head.type);
6069                 return -EPERM;
6070         }
6071 
6072         if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6073               info->head.type)) {
6074                 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6075                         ras_gfx_subblocks[info->head.sub_block_index].name,
6076                         info->head.type);
6077                 return -EPERM;
6078         }
6079 
6080         block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6081         block_info.sub_block_index =
6082                 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6083         block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6084         block_info.address = info->address;
6085         block_info.value = info->value;
6086 
6087         mutex_lock(&adev->grbm_idx_mutex);
6088         ret = psp_ras_trigger_error(&adev->psp, &block_info);
6089         mutex_unlock(&adev->grbm_idx_mutex);
6090 
6091         return ret;
6092 }
6093 
6094 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6095                                           void *ras_error_status)
6096 {
6097         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6098         uint32_t sec_count, ded_count;
6099         uint32_t i;
6100         uint32_t reg_value;
6101         uint32_t se_id, instance_id;
6102 
6103         if (adev->asic_type != CHIP_VEGA20)
6104                 return -EINVAL;
6105 
6106         err_data->ue_count = 0;
6107         err_data->ce_count = 0;
6108 
6109         mutex_lock(&adev->grbm_idx_mutex);
6110         for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) {
6111                 for (instance_id = 0; instance_id < 256; instance_id++) {
6112                         for (i = 0;
6113                              i < sizeof(gfx_ras_edc_regs) / sizeof(gfx_ras_edc_regs[0]);
6114                              i++) {
6115                                 if (se_id != 0 &&
6116                                     !gfx_ras_edc_regs[i].per_se_instance)
6117                                         continue;
6118                                 if (instance_id >= gfx_ras_edc_regs[i].num_instance)
6119                                         continue;
6120 
6121                                 gfx_v9_0_select_se_sh(adev, se_id, 0,
6122                                                       instance_id);
6123 
6124                                 reg_value = RREG32(
6125                                         adev->reg_offset[gfx_ras_edc_regs[i].ip]
6126                                                         [gfx_ras_edc_regs[i].inst]
6127                                                         [gfx_ras_edc_regs[i].seg] +
6128                                         gfx_ras_edc_regs[i].reg_offset);
6129                                 sec_count = reg_value &
6130                                             gfx_ras_edc_regs[i].sec_count_mask;
6131                                 ded_count = reg_value &
6132                                             gfx_ras_edc_regs[i].ded_count_mask;
6133                                 if (sec_count) {
6134                                         DRM_INFO(
6135                                                 "Instance[%d][%d]: SubBlock %s, SEC %d\n",
6136                                                 se_id, instance_id,
6137                                                 gfx_ras_edc_regs[i].name,
6138                                                 sec_count);
6139                                         err_data->ce_count++;
6140                                 }
6141 
6142                                 if (ded_count) {
6143                                         DRM_INFO(
6144                                                 "Instance[%d][%d]: SubBlock %s, DED %d\n",
6145                                                 se_id, instance_id,
6146                                                 gfx_ras_edc_regs[i].name,
6147                                                 ded_count);
6148                                         err_data->ue_count++;
6149                                 }
6150                         }
6151                 }
6152         }
6153         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6154         mutex_unlock(&adev->grbm_idx_mutex);
6155 
6156         return 0;
6157 }
6158 
6159 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6160                                   struct amdgpu_irq_src *source,
6161                                   struct amdgpu_iv_entry *entry)
6162 {
6163         struct ras_common_if *ras_if = adev->gfx.ras_if;
6164         struct ras_dispatch_if ih_data = {
6165                 .entry = entry,
6166         };
6167 
6168         if (!ras_if)
6169                 return 0;
6170 
6171         ih_data.head = *ras_if;
6172 
6173         DRM_ERROR("CP ECC ERROR IRQ\n");
6174         amdgpu_ras_interrupt_dispatch(adev, &ih_data);
6175         return 0;
6176 }
6177 
6178 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6179         .name = "gfx_v9_0",
6180         .early_init = gfx_v9_0_early_init,
6181         .late_init = gfx_v9_0_late_init,
6182         .sw_init = gfx_v9_0_sw_init,
6183         .sw_fini = gfx_v9_0_sw_fini,
6184         .hw_init = gfx_v9_0_hw_init,
6185         .hw_fini = gfx_v9_0_hw_fini,
6186         .suspend = gfx_v9_0_suspend,
6187         .resume = gfx_v9_0_resume,
6188         .is_idle = gfx_v9_0_is_idle,
6189         .wait_for_idle = gfx_v9_0_wait_for_idle,
6190         .soft_reset = gfx_v9_0_soft_reset,
6191         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6192         .set_powergating_state = gfx_v9_0_set_powergating_state,
6193         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6194 };
6195 
6196 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6197         .type = AMDGPU_RING_TYPE_GFX,
6198         .align_mask = 0xff,
6199         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6200         .support_64bit_ptrs = true,
6201         .vmhub = AMDGPU_GFXHUB_0,
6202         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6203         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6204         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6205         .emit_frame_size = /* totally 242 maximum if 16 IBs */
6206                 5 +  /* COND_EXEC */
6207                 7 +  /* PIPELINE_SYNC */
6208                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6209                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6210                 2 + /* VM_FLUSH */
6211                 8 +  /* FENCE for VM_FLUSH */
6212                 20 + /* GDS switch */
6213                 4 + /* double SWITCH_BUFFER,
6214                        the first COND_EXEC jump to the place just
6215                            prior to this double SWITCH_BUFFER  */
6216                 5 + /* COND_EXEC */
6217                 7 +      /*     HDP_flush */
6218                 4 +      /*     VGT_flush */
6219                 14 + /* CE_META */
6220                 31 + /* DE_META */
6221                 3 + /* CNTX_CTRL */
6222                 5 + /* HDP_INVL */
6223                 8 + 8 + /* FENCE x2 */
6224                 2, /* SWITCH_BUFFER */
6225         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6226         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6227         .emit_fence = gfx_v9_0_ring_emit_fence,
6228         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6229         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6230         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6231         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6232         .test_ring = gfx_v9_0_ring_test_ring,
6233         .test_ib = gfx_v9_0_ring_test_ib,
6234         .insert_nop = amdgpu_ring_insert_nop,
6235         .pad_ib = amdgpu_ring_generic_pad_ib,
6236         .emit_switch_buffer = gfx_v9_ring_emit_sb,
6237         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6238         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6239         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6240         .emit_tmz = gfx_v9_0_ring_emit_tmz,
6241         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6242         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6243         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6244         .soft_recovery = gfx_v9_0_ring_soft_recovery,
6245 };
6246 
6247 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6248         .type = AMDGPU_RING_TYPE_COMPUTE,
6249         .align_mask = 0xff,
6250         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6251         .support_64bit_ptrs = true,
6252         .vmhub = AMDGPU_GFXHUB_0,
6253         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6254         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6255         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6256         .emit_frame_size =
6257                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6258                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6259                 5 + /* hdp invalidate */
6260                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6261                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6262                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6263                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6264                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6265         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6266         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6267         .emit_fence = gfx_v9_0_ring_emit_fence,
6268         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6269         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6270         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6271         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6272         .test_ring = gfx_v9_0_ring_test_ring,
6273         .test_ib = gfx_v9_0_ring_test_ib,
6274         .insert_nop = amdgpu_ring_insert_nop,
6275         .pad_ib = amdgpu_ring_generic_pad_ib,
6276         .set_priority = gfx_v9_0_ring_set_priority_compute,
6277         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6278         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6279         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6280 };
6281 
6282 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6283         .type = AMDGPU_RING_TYPE_KIQ,
6284         .align_mask = 0xff,
6285         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6286         .support_64bit_ptrs = true,
6287         .vmhub = AMDGPU_GFXHUB_0,
6288         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6289         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6290         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6291         .emit_frame_size =
6292                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6293                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6294                 5 + /* hdp invalidate */
6295                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6296                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6297                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6298                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6299                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6300         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6301         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6302         .test_ring = gfx_v9_0_ring_test_ring,
6303         .insert_nop = amdgpu_ring_insert_nop,
6304         .pad_ib = amdgpu_ring_generic_pad_ib,
6305         .emit_rreg = gfx_v9_0_ring_emit_rreg,
6306         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6307         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6308         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6309 };
6310 
6311 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6312 {
6313         int i;
6314 
6315         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6316 
6317         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6318                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6319 
6320         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6321                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6322 }
6323 
6324 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6325         .set = gfx_v9_0_set_eop_interrupt_state,
6326         .process = gfx_v9_0_eop_irq,
6327 };
6328 
6329 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6330         .set = gfx_v9_0_set_priv_reg_fault_state,
6331         .process = gfx_v9_0_priv_reg_irq,
6332 };
6333 
6334 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6335         .set = gfx_v9_0_set_priv_inst_fault_state,
6336         .process = gfx_v9_0_priv_inst_irq,
6337 };
6338 
6339 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6340         .set = gfx_v9_0_set_cp_ecc_error_state,
6341         .process = gfx_v9_0_cp_ecc_error_irq,
6342 };
6343 
6344 
6345 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6346 {
6347         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6348         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6349 
6350         adev->gfx.priv_reg_irq.num_types = 1;
6351         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6352 
6353         adev->gfx.priv_inst_irq.num_types = 1;
6354         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6355 
6356         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6357         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6358 }
6359 
6360 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6361 {
6362         switch (adev->asic_type) {
6363         case CHIP_VEGA10:
6364         case CHIP_VEGA12:
6365         case CHIP_VEGA20:
6366         case CHIP_RAVEN:
6367         case CHIP_ARCTURUS:
6368         case CHIP_RENOIR:
6369                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6370                 break;
6371         default:
6372                 break;
6373         }
6374 }
6375 
6376 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6377 {
6378         /* init asci gds info */
6379         switch (adev->asic_type) {
6380         case CHIP_VEGA10:
6381         case CHIP_VEGA12:
6382         case CHIP_VEGA20:
6383                 adev->gds.gds_size = 0x10000;
6384                 break;
6385         case CHIP_RAVEN:
6386         case CHIP_ARCTURUS:
6387                 adev->gds.gds_size = 0x1000;
6388                 break;
6389         default:
6390                 adev->gds.gds_size = 0x10000;
6391                 break;
6392         }
6393 
6394         switch (adev->asic_type) {
6395         case CHIP_VEGA10:
6396         case CHIP_VEGA20:
6397                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6398                 break;
6399         case CHIP_VEGA12:
6400                 adev->gds.gds_compute_max_wave_id = 0x27f;
6401                 break;
6402         case CHIP_RAVEN:
6403                 if (adev->rev_id >= 0x8)
6404                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6405                 else
6406                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6407                 break;
6408         case CHIP_ARCTURUS:
6409                 adev->gds.gds_compute_max_wave_id = 0xfff;
6410                 break;
6411         default:
6412                 /* this really depends on the chip */
6413                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6414                 break;
6415         }
6416 
6417         adev->gds.gws_size = 64;
6418         adev->gds.oa_size = 16;
6419 }
6420 
6421 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6422                                                  u32 bitmap)
6423 {
6424         u32 data;
6425 
6426         if (!bitmap)
6427                 return;
6428 
6429         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6430         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6431 
6432         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6433 }
6434 
6435 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6436 {
6437         u32 data, mask;
6438 
6439         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6440         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6441 
6442         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6443         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6444 
6445         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6446 
6447         return (~data) & mask;
6448 }
6449 
6450 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6451                                  struct amdgpu_cu_info *cu_info)
6452 {
6453         int i, j, k, counter, active_cu_number = 0;
6454         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6455         unsigned disable_masks[4 * 4];
6456 
6457         if (!adev || !cu_info)
6458                 return -EINVAL;
6459 
6460         /*
6461          * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6462          */
6463         if (adev->gfx.config.max_shader_engines *
6464                 adev->gfx.config.max_sh_per_se > 16)
6465                 return -EINVAL;
6466 
6467         amdgpu_gfx_parse_disable_cu(disable_masks,
6468                                     adev->gfx.config.max_shader_engines,
6469                                     adev->gfx.config.max_sh_per_se);
6470 
6471         mutex_lock(&adev->grbm_idx_mutex);
6472         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6473                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6474                         mask = 1;
6475                         ao_bitmap = 0;
6476                         counter = 0;
6477                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6478                         gfx_v9_0_set_user_cu_inactive_bitmap(
6479                                 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6480                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6481 
6482                         /*
6483                          * The bitmap(and ao_cu_bitmap) in cu_info structure is
6484                          * 4x4 size array, and it's usually suitable for Vega
6485                          * ASICs which has 4*2 SE/SH layout.
6486                          * But for Arcturus, SE/SH layout is changed to 8*1.
6487                          * To mostly reduce the impact, we make it compatible
6488                          * with current bitmap array as below:
6489                          *    SE4,SH0 --> bitmap[0][1]
6490                          *    SE5,SH0 --> bitmap[1][1]
6491                          *    SE6,SH0 --> bitmap[2][1]
6492                          *    SE7,SH0 --> bitmap[3][1]
6493                          */
6494                         cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6495 
6496                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6497                                 if (bitmap & mask) {
6498                                         if (counter < adev->gfx.config.max_cu_per_sh)
6499                                                 ao_bitmap |= mask;
6500                                         counter ++;
6501                                 }
6502                                 mask <<= 1;
6503                         }
6504                         active_cu_number += counter;
6505                         if (i < 2 && j < 2)
6506                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6507                         cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6508                 }
6509         }
6510         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6511         mutex_unlock(&adev->grbm_idx_mutex);
6512 
6513         cu_info->number = active_cu_number;
6514         cu_info->ao_cu_mask = ao_cu_mask;
6515         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6516 
6517         return 0;
6518 }
6519 
6520 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6521 {
6522         .type = AMD_IP_BLOCK_TYPE_GFX,
6523         .major = 9,
6524         .minor = 0,
6525         .rev = 0,
6526         .funcs = &gfx_v9_0_ip_funcs,
6527 };

/* [<][>][^][v][top][bottom][index][help] */