root/drivers/gpu/drm/radeon/cik.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. cik_get_allowed_info_register
  2. cik_didt_rreg
  3. cik_didt_wreg
  4. ci_get_temp
  5. kv_get_temp
  6. cik_pciep_rreg
  7. cik_pciep_wreg
  8. cik_init_golden_registers
  9. cik_get_xclk
  10. cik_mm_rdoorbell
  11. cik_mm_wdoorbell
  12. cik_srbm_select
  13. ci_mc_load_microcode
  14. cik_init_microcode
  15. cik_tiling_mode_table_init
  16. cik_select_se_sh
  17. cik_create_bitmask
  18. cik_get_rb_disabled
  19. cik_setup_rb
  20. cik_gpu_init
  21. cik_scratch_init
  22. cik_ring_test
  23. cik_hdp_flush_cp_ring_emit
  24. cik_fence_gfx_ring_emit
  25. cik_fence_compute_ring_emit
  26. cik_semaphore_ring_emit
  27. cik_copy_cpdma
  28. cik_ring_ib_execute
  29. cik_ib_test
  30. cik_cp_gfx_enable
  31. cik_cp_gfx_load_microcode
  32. cik_cp_gfx_start
  33. cik_cp_gfx_fini
  34. cik_cp_gfx_resume
  35. cik_gfx_get_rptr
  36. cik_gfx_get_wptr
  37. cik_gfx_set_wptr
  38. cik_compute_get_rptr
  39. cik_compute_get_wptr
  40. cik_compute_set_wptr
  41. cik_compute_stop
  42. cik_cp_compute_enable
  43. cik_cp_compute_load_microcode
  44. cik_cp_compute_start
  45. cik_cp_compute_fini
  46. cik_mec_fini
  47. cik_mec_init
  48. cik_cp_compute_resume
  49. cik_cp_enable
  50. cik_cp_load_microcode
  51. cik_cp_fini
  52. cik_cp_resume
  53. cik_print_gpu_status_regs
  54. cik_gpu_check_soft_reset
  55. cik_gpu_soft_reset
  56. kv_save_regs_for_reset
  57. kv_restore_regs_for_reset
  58. cik_gpu_pci_config_reset
  59. cik_asic_reset
  60. cik_gfx_is_lockup
  61. cik_mc_program
  62. cik_mc_init
  63. cik_pcie_gart_tlb_flush
  64. cik_pcie_gart_enable
  65. cik_pcie_gart_disable
  66. cik_pcie_gart_fini
  67. cik_ib_parse
  68. cik_vm_init
  69. cik_vm_fini
  70. cik_vm_decode_fault
  71. cik_vm_flush
  72. cik_enable_gui_idle_interrupt
  73. cik_enable_lbpw
  74. cik_wait_for_rlc_serdes
  75. cik_update_rlc
  76. cik_halt_rlc
  77. cik_enter_rlc_safe_mode
  78. cik_exit_rlc_safe_mode
  79. cik_rlc_stop
  80. cik_rlc_start
  81. cik_rlc_resume
  82. cik_enable_cgcg
  83. cik_enable_mgcg
  84. cik_enable_mc_ls
  85. cik_enable_mc_mgcg
  86. cik_enable_sdma_mgcg
  87. cik_enable_sdma_mgls
  88. cik_enable_uvd_mgcg
  89. cik_enable_bif_mgls
  90. cik_enable_hdp_mgcg
  91. cik_enable_hdp_ls
  92. cik_update_cg
  93. cik_init_cg
  94. cik_fini_cg
  95. cik_enable_sck_slowdown_on_pu
  96. cik_enable_sck_slowdown_on_pd
  97. cik_enable_cp_pg
  98. cik_enable_gds_pg
  99. cik_init_cp_pg_table
  100. cik_enable_gfx_cgpg
  101. cik_get_cu_active_bitmap
  102. cik_init_ao_cu_mask
  103. cik_enable_gfx_static_mgpg
  104. cik_enable_gfx_dynamic_mgpg
  105. cik_init_gfx_cgpg
  106. cik_update_gfx_pg
  107. cik_get_csb_size
  108. cik_get_csb_buffer
  109. cik_init_pg
  110. cik_fini_pg
  111. cik_enable_interrupts
  112. cik_disable_interrupts
  113. cik_disable_interrupt_state
  114. cik_irq_init
  115. cik_irq_set
  116. cik_irq_ack
  117. cik_irq_disable
  118. cik_irq_suspend
  119. cik_irq_fini
  120. cik_get_ih_wptr
  121. cik_irq_process
  122. cik_uvd_init
  123. cik_uvd_start
  124. cik_uvd_resume
  125. cik_vce_init
  126. cik_vce_start
  127. cik_vce_resume
  128. cik_startup
  129. cik_resume
  130. cik_suspend
  131. cik_init
  132. cik_fini
  133. dce8_program_fmt
  134. dce8_line_buffer_adjust
  135. cik_get_number_of_dram_channels
  136. dce8_dram_bandwidth
  137. dce8_dram_bandwidth_for_display
  138. dce8_data_return_bandwidth
  139. dce8_dmif_request_bandwidth
  140. dce8_available_bandwidth
  141. dce8_average_bandwidth
  142. dce8_latency_watermark
  143. dce8_average_bandwidth_vs_dram_bandwidth_for_display
  144. dce8_average_bandwidth_vs_available_bandwidth
  145. dce8_check_latency_hiding
  146. dce8_program_watermarks
  147. dce8_bandwidth_update
  148. cik_get_gpu_clock_counter
  149. cik_set_uvd_clock
  150. cik_set_uvd_clocks
  151. cik_set_vce_clocks
  152. cik_pcie_gen3_enable
  153. cik_program_aspm

   1 /*
   2  * Copyright 2012 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * Authors: Alex Deucher
  23  */
  24 
  25 #include <linux/firmware.h>
  26 #include <linux/slab.h>
  27 #include <linux/module.h>
  28 
  29 #include <drm/drm_pci.h>
  30 #include <drm/drm_vblank.h>
  31 
  32 #include "atom.h"
  33 #include "cik_blit_shaders.h"
  34 #include "cikd.h"
  35 #include "clearstate_ci.h"
  36 #include "radeon.h"
  37 #include "radeon_asic.h"
  38 #include "radeon_audio.h"
  39 #include "radeon_ucode.h"
  40 
  41 #define SH_MEM_CONFIG_GFX_DEFAULT \
  42         ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
  43 
  44 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
  45 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
  46 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
  47 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
  48 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
  49 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
  50 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
  51 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
  52 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
  53 
  54 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
  55 MODULE_FIRMWARE("radeon/bonaire_me.bin");
  56 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
  57 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
  58 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
  59 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
  60 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
  61 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
  62 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
  63 
  64 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
  65 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
  66 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
  67 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
  68 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
  69 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
  70 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
  71 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
  72 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
  73 
  74 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
  75 MODULE_FIRMWARE("radeon/hawaii_me.bin");
  76 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
  77 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
  78 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
  79 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
  80 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
  81 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
  82 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
  83 
  84 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
  85 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
  86 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
  87 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
  88 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
  89 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
  90 
  91 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
  92 MODULE_FIRMWARE("radeon/kaveri_me.bin");
  93 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
  94 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
  95 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
  96 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
  97 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
  98 
  99 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
 100 MODULE_FIRMWARE("radeon/KABINI_me.bin");
 101 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
 102 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
 103 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
 104 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
 105 
 106 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
 107 MODULE_FIRMWARE("radeon/kabini_me.bin");
 108 MODULE_FIRMWARE("radeon/kabini_ce.bin");
 109 MODULE_FIRMWARE("radeon/kabini_mec.bin");
 110 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
 111 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
 112 
 113 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
 114 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
 115 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
 116 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
 117 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
 118 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
 119 
 120 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
 121 MODULE_FIRMWARE("radeon/mullins_me.bin");
 122 MODULE_FIRMWARE("radeon/mullins_ce.bin");
 123 MODULE_FIRMWARE("radeon/mullins_mec.bin");
 124 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
 125 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
 126 
 127 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
 128 extern void r600_ih_ring_fini(struct radeon_device *rdev);
 129 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
 130 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
 131 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
 132 extern void sumo_rlc_fini(struct radeon_device *rdev);
 133 extern int sumo_rlc_init(struct radeon_device *rdev);
 134 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
 135 extern void si_rlc_reset(struct radeon_device *rdev);
 136 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
 137 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
 138 extern int cik_sdma_resume(struct radeon_device *rdev);
 139 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
 140 extern void cik_sdma_fini(struct radeon_device *rdev);
 141 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
 142 static void cik_rlc_stop(struct radeon_device *rdev);
 143 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
 144 static void cik_program_aspm(struct radeon_device *rdev);
 145 static void cik_init_pg(struct radeon_device *rdev);
 146 static void cik_init_cg(struct radeon_device *rdev);
 147 static void cik_fini_pg(struct radeon_device *rdev);
 148 static void cik_fini_cg(struct radeon_device *rdev);
 149 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
 150                                           bool enable);
 151 
 152 /**
 153  * cik_get_allowed_info_register - fetch the register for the info ioctl
 154  *
 155  * @rdev: radeon_device pointer
 156  * @reg: register offset in bytes
 157  * @val: register value
 158  *
 159  * Returns 0 for success or -EINVAL for an invalid register
 160  *
 161  */
 162 int cik_get_allowed_info_register(struct radeon_device *rdev,
 163                                   u32 reg, u32 *val)
 164 {
 165         switch (reg) {
 166         case GRBM_STATUS:
 167         case GRBM_STATUS2:
 168         case GRBM_STATUS_SE0:
 169         case GRBM_STATUS_SE1:
 170         case GRBM_STATUS_SE2:
 171         case GRBM_STATUS_SE3:
 172         case SRBM_STATUS:
 173         case SRBM_STATUS2:
 174         case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
 175         case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
 176         case UVD_STATUS:
 177         /* TODO VCE */
 178                 *val = RREG32(reg);
 179                 return 0;
 180         default:
 181                 return -EINVAL;
 182         }
 183 }
 184 
 185 /*
 186  * Indirect registers accessor
 187  */
 188 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
 189 {
 190         unsigned long flags;
 191         u32 r;
 192 
 193         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
 194         WREG32(CIK_DIDT_IND_INDEX, (reg));
 195         r = RREG32(CIK_DIDT_IND_DATA);
 196         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
 197         return r;
 198 }
 199 
 200 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
 201 {
 202         unsigned long flags;
 203 
 204         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
 205         WREG32(CIK_DIDT_IND_INDEX, (reg));
 206         WREG32(CIK_DIDT_IND_DATA, (v));
 207         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
 208 }
 209 
 210 /* get temperature in millidegrees */
 211 int ci_get_temp(struct radeon_device *rdev)
 212 {
 213         u32 temp;
 214         int actual_temp = 0;
 215 
 216         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
 217                 CTF_TEMP_SHIFT;
 218 
 219         if (temp & 0x200)
 220                 actual_temp = 255;
 221         else
 222                 actual_temp = temp & 0x1ff;
 223 
 224         actual_temp = actual_temp * 1000;
 225 
 226         return actual_temp;
 227 }
 228 
 229 /* get temperature in millidegrees */
 230 int kv_get_temp(struct radeon_device *rdev)
 231 {
 232         u32 temp;
 233         int actual_temp = 0;
 234 
 235         temp = RREG32_SMC(0xC0300E0C);
 236 
 237         if (temp)
 238                 actual_temp = (temp / 8) - 49;
 239         else
 240                 actual_temp = 0;
 241 
 242         actual_temp = actual_temp * 1000;
 243 
 244         return actual_temp;
 245 }
 246 
 247 /*
 248  * Indirect registers accessor
 249  */
 250 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
 251 {
 252         unsigned long flags;
 253         u32 r;
 254 
 255         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
 256         WREG32(PCIE_INDEX, reg);
 257         (void)RREG32(PCIE_INDEX);
 258         r = RREG32(PCIE_DATA);
 259         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
 260         return r;
 261 }
 262 
 263 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
 264 {
 265         unsigned long flags;
 266 
 267         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
 268         WREG32(PCIE_INDEX, reg);
 269         (void)RREG32(PCIE_INDEX);
 270         WREG32(PCIE_DATA, v);
 271         (void)RREG32(PCIE_DATA);
 272         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
 273 }
 274 
 275 static const u32 spectre_rlc_save_restore_register_list[] =
 276 {
 277         (0x0e00 << 16) | (0xc12c >> 2),
 278         0x00000000,
 279         (0x0e00 << 16) | (0xc140 >> 2),
 280         0x00000000,
 281         (0x0e00 << 16) | (0xc150 >> 2),
 282         0x00000000,
 283         (0x0e00 << 16) | (0xc15c >> 2),
 284         0x00000000,
 285         (0x0e00 << 16) | (0xc168 >> 2),
 286         0x00000000,
 287         (0x0e00 << 16) | (0xc170 >> 2),
 288         0x00000000,
 289         (0x0e00 << 16) | (0xc178 >> 2),
 290         0x00000000,
 291         (0x0e00 << 16) | (0xc204 >> 2),
 292         0x00000000,
 293         (0x0e00 << 16) | (0xc2b4 >> 2),
 294         0x00000000,
 295         (0x0e00 << 16) | (0xc2b8 >> 2),
 296         0x00000000,
 297         (0x0e00 << 16) | (0xc2bc >> 2),
 298         0x00000000,
 299         (0x0e00 << 16) | (0xc2c0 >> 2),
 300         0x00000000,
 301         (0x0e00 << 16) | (0x8228 >> 2),
 302         0x00000000,
 303         (0x0e00 << 16) | (0x829c >> 2),
 304         0x00000000,
 305         (0x0e00 << 16) | (0x869c >> 2),
 306         0x00000000,
 307         (0x0600 << 16) | (0x98f4 >> 2),
 308         0x00000000,
 309         (0x0e00 << 16) | (0x98f8 >> 2),
 310         0x00000000,
 311         (0x0e00 << 16) | (0x9900 >> 2),
 312         0x00000000,
 313         (0x0e00 << 16) | (0xc260 >> 2),
 314         0x00000000,
 315         (0x0e00 << 16) | (0x90e8 >> 2),
 316         0x00000000,
 317         (0x0e00 << 16) | (0x3c000 >> 2),
 318         0x00000000,
 319         (0x0e00 << 16) | (0x3c00c >> 2),
 320         0x00000000,
 321         (0x0e00 << 16) | (0x8c1c >> 2),
 322         0x00000000,
 323         (0x0e00 << 16) | (0x9700 >> 2),
 324         0x00000000,
 325         (0x0e00 << 16) | (0xcd20 >> 2),
 326         0x00000000,
 327         (0x4e00 << 16) | (0xcd20 >> 2),
 328         0x00000000,
 329         (0x5e00 << 16) | (0xcd20 >> 2),
 330         0x00000000,
 331         (0x6e00 << 16) | (0xcd20 >> 2),
 332         0x00000000,
 333         (0x7e00 << 16) | (0xcd20 >> 2),
 334         0x00000000,
 335         (0x8e00 << 16) | (0xcd20 >> 2),
 336         0x00000000,
 337         (0x9e00 << 16) | (0xcd20 >> 2),
 338         0x00000000,
 339         (0xae00 << 16) | (0xcd20 >> 2),
 340         0x00000000,
 341         (0xbe00 << 16) | (0xcd20 >> 2),
 342         0x00000000,
 343         (0x0e00 << 16) | (0x89bc >> 2),
 344         0x00000000,
 345         (0x0e00 << 16) | (0x8900 >> 2),
 346         0x00000000,
 347         0x3,
 348         (0x0e00 << 16) | (0xc130 >> 2),
 349         0x00000000,
 350         (0x0e00 << 16) | (0xc134 >> 2),
 351         0x00000000,
 352         (0x0e00 << 16) | (0xc1fc >> 2),
 353         0x00000000,
 354         (0x0e00 << 16) | (0xc208 >> 2),
 355         0x00000000,
 356         (0x0e00 << 16) | (0xc264 >> 2),
 357         0x00000000,
 358         (0x0e00 << 16) | (0xc268 >> 2),
 359         0x00000000,
 360         (0x0e00 << 16) | (0xc26c >> 2),
 361         0x00000000,
 362         (0x0e00 << 16) | (0xc270 >> 2),
 363         0x00000000,
 364         (0x0e00 << 16) | (0xc274 >> 2),
 365         0x00000000,
 366         (0x0e00 << 16) | (0xc278 >> 2),
 367         0x00000000,
 368         (0x0e00 << 16) | (0xc27c >> 2),
 369         0x00000000,
 370         (0x0e00 << 16) | (0xc280 >> 2),
 371         0x00000000,
 372         (0x0e00 << 16) | (0xc284 >> 2),
 373         0x00000000,
 374         (0x0e00 << 16) | (0xc288 >> 2),
 375         0x00000000,
 376         (0x0e00 << 16) | (0xc28c >> 2),
 377         0x00000000,
 378         (0x0e00 << 16) | (0xc290 >> 2),
 379         0x00000000,
 380         (0x0e00 << 16) | (0xc294 >> 2),
 381         0x00000000,
 382         (0x0e00 << 16) | (0xc298 >> 2),
 383         0x00000000,
 384         (0x0e00 << 16) | (0xc29c >> 2),
 385         0x00000000,
 386         (0x0e00 << 16) | (0xc2a0 >> 2),
 387         0x00000000,
 388         (0x0e00 << 16) | (0xc2a4 >> 2),
 389         0x00000000,
 390         (0x0e00 << 16) | (0xc2a8 >> 2),
 391         0x00000000,
 392         (0x0e00 << 16) | (0xc2ac  >> 2),
 393         0x00000000,
 394         (0x0e00 << 16) | (0xc2b0 >> 2),
 395         0x00000000,
 396         (0x0e00 << 16) | (0x301d0 >> 2),
 397         0x00000000,
 398         (0x0e00 << 16) | (0x30238 >> 2),
 399         0x00000000,
 400         (0x0e00 << 16) | (0x30250 >> 2),
 401         0x00000000,
 402         (0x0e00 << 16) | (0x30254 >> 2),
 403         0x00000000,
 404         (0x0e00 << 16) | (0x30258 >> 2),
 405         0x00000000,
 406         (0x0e00 << 16) | (0x3025c >> 2),
 407         0x00000000,
 408         (0x4e00 << 16) | (0xc900 >> 2),
 409         0x00000000,
 410         (0x5e00 << 16) | (0xc900 >> 2),
 411         0x00000000,
 412         (0x6e00 << 16) | (0xc900 >> 2),
 413         0x00000000,
 414         (0x7e00 << 16) | (0xc900 >> 2),
 415         0x00000000,
 416         (0x8e00 << 16) | (0xc900 >> 2),
 417         0x00000000,
 418         (0x9e00 << 16) | (0xc900 >> 2),
 419         0x00000000,
 420         (0xae00 << 16) | (0xc900 >> 2),
 421         0x00000000,
 422         (0xbe00 << 16) | (0xc900 >> 2),
 423         0x00000000,
 424         (0x4e00 << 16) | (0xc904 >> 2),
 425         0x00000000,
 426         (0x5e00 << 16) | (0xc904 >> 2),
 427         0x00000000,
 428         (0x6e00 << 16) | (0xc904 >> 2),
 429         0x00000000,
 430         (0x7e00 << 16) | (0xc904 >> 2),
 431         0x00000000,
 432         (0x8e00 << 16) | (0xc904 >> 2),
 433         0x00000000,
 434         (0x9e00 << 16) | (0xc904 >> 2),
 435         0x00000000,
 436         (0xae00 << 16) | (0xc904 >> 2),
 437         0x00000000,
 438         (0xbe00 << 16) | (0xc904 >> 2),
 439         0x00000000,
 440         (0x4e00 << 16) | (0xc908 >> 2),
 441         0x00000000,
 442         (0x5e00 << 16) | (0xc908 >> 2),
 443         0x00000000,
 444         (0x6e00 << 16) | (0xc908 >> 2),
 445         0x00000000,
 446         (0x7e00 << 16) | (0xc908 >> 2),
 447         0x00000000,
 448         (0x8e00 << 16) | (0xc908 >> 2),
 449         0x00000000,
 450         (0x9e00 << 16) | (0xc908 >> 2),
 451         0x00000000,
 452         (0xae00 << 16) | (0xc908 >> 2),
 453         0x00000000,
 454         (0xbe00 << 16) | (0xc908 >> 2),
 455         0x00000000,
 456         (0x4e00 << 16) | (0xc90c >> 2),
 457         0x00000000,
 458         (0x5e00 << 16) | (0xc90c >> 2),
 459         0x00000000,
 460         (0x6e00 << 16) | (0xc90c >> 2),
 461         0x00000000,
 462         (0x7e00 << 16) | (0xc90c >> 2),
 463         0x00000000,
 464         (0x8e00 << 16) | (0xc90c >> 2),
 465         0x00000000,
 466         (0x9e00 << 16) | (0xc90c >> 2),
 467         0x00000000,
 468         (0xae00 << 16) | (0xc90c >> 2),
 469         0x00000000,
 470         (0xbe00 << 16) | (0xc90c >> 2),
 471         0x00000000,
 472         (0x4e00 << 16) | (0xc910 >> 2),
 473         0x00000000,
 474         (0x5e00 << 16) | (0xc910 >> 2),
 475         0x00000000,
 476         (0x6e00 << 16) | (0xc910 >> 2),
 477         0x00000000,
 478         (0x7e00 << 16) | (0xc910 >> 2),
 479         0x00000000,
 480         (0x8e00 << 16) | (0xc910 >> 2),
 481         0x00000000,
 482         (0x9e00 << 16) | (0xc910 >> 2),
 483         0x00000000,
 484         (0xae00 << 16) | (0xc910 >> 2),
 485         0x00000000,
 486         (0xbe00 << 16) | (0xc910 >> 2),
 487         0x00000000,
 488         (0x0e00 << 16) | (0xc99c >> 2),
 489         0x00000000,
 490         (0x0e00 << 16) | (0x9834 >> 2),
 491         0x00000000,
 492         (0x0000 << 16) | (0x30f00 >> 2),
 493         0x00000000,
 494         (0x0001 << 16) | (0x30f00 >> 2),
 495         0x00000000,
 496         (0x0000 << 16) | (0x30f04 >> 2),
 497         0x00000000,
 498         (0x0001 << 16) | (0x30f04 >> 2),
 499         0x00000000,
 500         (0x0000 << 16) | (0x30f08 >> 2),
 501         0x00000000,
 502         (0x0001 << 16) | (0x30f08 >> 2),
 503         0x00000000,
 504         (0x0000 << 16) | (0x30f0c >> 2),
 505         0x00000000,
 506         (0x0001 << 16) | (0x30f0c >> 2),
 507         0x00000000,
 508         (0x0600 << 16) | (0x9b7c >> 2),
 509         0x00000000,
 510         (0x0e00 << 16) | (0x8a14 >> 2),
 511         0x00000000,
 512         (0x0e00 << 16) | (0x8a18 >> 2),
 513         0x00000000,
 514         (0x0600 << 16) | (0x30a00 >> 2),
 515         0x00000000,
 516         (0x0e00 << 16) | (0x8bf0 >> 2),
 517         0x00000000,
 518         (0x0e00 << 16) | (0x8bcc >> 2),
 519         0x00000000,
 520         (0x0e00 << 16) | (0x8b24 >> 2),
 521         0x00000000,
 522         (0x0e00 << 16) | (0x30a04 >> 2),
 523         0x00000000,
 524         (0x0600 << 16) | (0x30a10 >> 2),
 525         0x00000000,
 526         (0x0600 << 16) | (0x30a14 >> 2),
 527         0x00000000,
 528         (0x0600 << 16) | (0x30a18 >> 2),
 529         0x00000000,
 530         (0x0600 << 16) | (0x30a2c >> 2),
 531         0x00000000,
 532         (0x0e00 << 16) | (0xc700 >> 2),
 533         0x00000000,
 534         (0x0e00 << 16) | (0xc704 >> 2),
 535         0x00000000,
 536         (0x0e00 << 16) | (0xc708 >> 2),
 537         0x00000000,
 538         (0x0e00 << 16) | (0xc768 >> 2),
 539         0x00000000,
 540         (0x0400 << 16) | (0xc770 >> 2),
 541         0x00000000,
 542         (0x0400 << 16) | (0xc774 >> 2),
 543         0x00000000,
 544         (0x0400 << 16) | (0xc778 >> 2),
 545         0x00000000,
 546         (0x0400 << 16) | (0xc77c >> 2),
 547         0x00000000,
 548         (0x0400 << 16) | (0xc780 >> 2),
 549         0x00000000,
 550         (0x0400 << 16) | (0xc784 >> 2),
 551         0x00000000,
 552         (0x0400 << 16) | (0xc788 >> 2),
 553         0x00000000,
 554         (0x0400 << 16) | (0xc78c >> 2),
 555         0x00000000,
 556         (0x0400 << 16) | (0xc798 >> 2),
 557         0x00000000,
 558         (0x0400 << 16) | (0xc79c >> 2),
 559         0x00000000,
 560         (0x0400 << 16) | (0xc7a0 >> 2),
 561         0x00000000,
 562         (0x0400 << 16) | (0xc7a4 >> 2),
 563         0x00000000,
 564         (0x0400 << 16) | (0xc7a8 >> 2),
 565         0x00000000,
 566         (0x0400 << 16) | (0xc7ac >> 2),
 567         0x00000000,
 568         (0x0400 << 16) | (0xc7b0 >> 2),
 569         0x00000000,
 570         (0x0400 << 16) | (0xc7b4 >> 2),
 571         0x00000000,
 572         (0x0e00 << 16) | (0x9100 >> 2),
 573         0x00000000,
 574         (0x0e00 << 16) | (0x3c010 >> 2),
 575         0x00000000,
 576         (0x0e00 << 16) | (0x92a8 >> 2),
 577         0x00000000,
 578         (0x0e00 << 16) | (0x92ac >> 2),
 579         0x00000000,
 580         (0x0e00 << 16) | (0x92b4 >> 2),
 581         0x00000000,
 582         (0x0e00 << 16) | (0x92b8 >> 2),
 583         0x00000000,
 584         (0x0e00 << 16) | (0x92bc >> 2),
 585         0x00000000,
 586         (0x0e00 << 16) | (0x92c0 >> 2),
 587         0x00000000,
 588         (0x0e00 << 16) | (0x92c4 >> 2),
 589         0x00000000,
 590         (0x0e00 << 16) | (0x92c8 >> 2),
 591         0x00000000,
 592         (0x0e00 << 16) | (0x92cc >> 2),
 593         0x00000000,
 594         (0x0e00 << 16) | (0x92d0 >> 2),
 595         0x00000000,
 596         (0x0e00 << 16) | (0x8c00 >> 2),
 597         0x00000000,
 598         (0x0e00 << 16) | (0x8c04 >> 2),
 599         0x00000000,
 600         (0x0e00 << 16) | (0x8c20 >> 2),
 601         0x00000000,
 602         (0x0e00 << 16) | (0x8c38 >> 2),
 603         0x00000000,
 604         (0x0e00 << 16) | (0x8c3c >> 2),
 605         0x00000000,
 606         (0x0e00 << 16) | (0xae00 >> 2),
 607         0x00000000,
 608         (0x0e00 << 16) | (0x9604 >> 2),
 609         0x00000000,
 610         (0x0e00 << 16) | (0xac08 >> 2),
 611         0x00000000,
 612         (0x0e00 << 16) | (0xac0c >> 2),
 613         0x00000000,
 614         (0x0e00 << 16) | (0xac10 >> 2),
 615         0x00000000,
 616         (0x0e00 << 16) | (0xac14 >> 2),
 617         0x00000000,
 618         (0x0e00 << 16) | (0xac58 >> 2),
 619         0x00000000,
 620         (0x0e00 << 16) | (0xac68 >> 2),
 621         0x00000000,
 622         (0x0e00 << 16) | (0xac6c >> 2),
 623         0x00000000,
 624         (0x0e00 << 16) | (0xac70 >> 2),
 625         0x00000000,
 626         (0x0e00 << 16) | (0xac74 >> 2),
 627         0x00000000,
 628         (0x0e00 << 16) | (0xac78 >> 2),
 629         0x00000000,
 630         (0x0e00 << 16) | (0xac7c >> 2),
 631         0x00000000,
 632         (0x0e00 << 16) | (0xac80 >> 2),
 633         0x00000000,
 634         (0x0e00 << 16) | (0xac84 >> 2),
 635         0x00000000,
 636         (0x0e00 << 16) | (0xac88 >> 2),
 637         0x00000000,
 638         (0x0e00 << 16) | (0xac8c >> 2),
 639         0x00000000,
 640         (0x0e00 << 16) | (0x970c >> 2),
 641         0x00000000,
 642         (0x0e00 << 16) | (0x9714 >> 2),
 643         0x00000000,
 644         (0x0e00 << 16) | (0x9718 >> 2),
 645         0x00000000,
 646         (0x0e00 << 16) | (0x971c >> 2),
 647         0x00000000,
 648         (0x0e00 << 16) | (0x31068 >> 2),
 649         0x00000000,
 650         (0x4e00 << 16) | (0x31068 >> 2),
 651         0x00000000,
 652         (0x5e00 << 16) | (0x31068 >> 2),
 653         0x00000000,
 654         (0x6e00 << 16) | (0x31068 >> 2),
 655         0x00000000,
 656         (0x7e00 << 16) | (0x31068 >> 2),
 657         0x00000000,
 658         (0x8e00 << 16) | (0x31068 >> 2),
 659         0x00000000,
 660         (0x9e00 << 16) | (0x31068 >> 2),
 661         0x00000000,
 662         (0xae00 << 16) | (0x31068 >> 2),
 663         0x00000000,
 664         (0xbe00 << 16) | (0x31068 >> 2),
 665         0x00000000,
 666         (0x0e00 << 16) | (0xcd10 >> 2),
 667         0x00000000,
 668         (0x0e00 << 16) | (0xcd14 >> 2),
 669         0x00000000,
 670         (0x0e00 << 16) | (0x88b0 >> 2),
 671         0x00000000,
 672         (0x0e00 << 16) | (0x88b4 >> 2),
 673         0x00000000,
 674         (0x0e00 << 16) | (0x88b8 >> 2),
 675         0x00000000,
 676         (0x0e00 << 16) | (0x88bc >> 2),
 677         0x00000000,
 678         (0x0400 << 16) | (0x89c0 >> 2),
 679         0x00000000,
 680         (0x0e00 << 16) | (0x88c4 >> 2),
 681         0x00000000,
 682         (0x0e00 << 16) | (0x88c8 >> 2),
 683         0x00000000,
 684         (0x0e00 << 16) | (0x88d0 >> 2),
 685         0x00000000,
 686         (0x0e00 << 16) | (0x88d4 >> 2),
 687         0x00000000,
 688         (0x0e00 << 16) | (0x88d8 >> 2),
 689         0x00000000,
 690         (0x0e00 << 16) | (0x8980 >> 2),
 691         0x00000000,
 692         (0x0e00 << 16) | (0x30938 >> 2),
 693         0x00000000,
 694         (0x0e00 << 16) | (0x3093c >> 2),
 695         0x00000000,
 696         (0x0e00 << 16) | (0x30940 >> 2),
 697         0x00000000,
 698         (0x0e00 << 16) | (0x89a0 >> 2),
 699         0x00000000,
 700         (0x0e00 << 16) | (0x30900 >> 2),
 701         0x00000000,
 702         (0x0e00 << 16) | (0x30904 >> 2),
 703         0x00000000,
 704         (0x0e00 << 16) | (0x89b4 >> 2),
 705         0x00000000,
 706         (0x0e00 << 16) | (0x3c210 >> 2),
 707         0x00000000,
 708         (0x0e00 << 16) | (0x3c214 >> 2),
 709         0x00000000,
 710         (0x0e00 << 16) | (0x3c218 >> 2),
 711         0x00000000,
 712         (0x0e00 << 16) | (0x8904 >> 2),
 713         0x00000000,
 714         0x5,
 715         (0x0e00 << 16) | (0x8c28 >> 2),
 716         (0x0e00 << 16) | (0x8c2c >> 2),
 717         (0x0e00 << 16) | (0x8c30 >> 2),
 718         (0x0e00 << 16) | (0x8c34 >> 2),
 719         (0x0e00 << 16) | (0x9600 >> 2),
 720 };
 721 
 722 static const u32 kalindi_rlc_save_restore_register_list[] =
 723 {
 724         (0x0e00 << 16) | (0xc12c >> 2),
 725         0x00000000,
 726         (0x0e00 << 16) | (0xc140 >> 2),
 727         0x00000000,
 728         (0x0e00 << 16) | (0xc150 >> 2),
 729         0x00000000,
 730         (0x0e00 << 16) | (0xc15c >> 2),
 731         0x00000000,
 732         (0x0e00 << 16) | (0xc168 >> 2),
 733         0x00000000,
 734         (0x0e00 << 16) | (0xc170 >> 2),
 735         0x00000000,
 736         (0x0e00 << 16) | (0xc204 >> 2),
 737         0x00000000,
 738         (0x0e00 << 16) | (0xc2b4 >> 2),
 739         0x00000000,
 740         (0x0e00 << 16) | (0xc2b8 >> 2),
 741         0x00000000,
 742         (0x0e00 << 16) | (0xc2bc >> 2),
 743         0x00000000,
 744         (0x0e00 << 16) | (0xc2c0 >> 2),
 745         0x00000000,
 746         (0x0e00 << 16) | (0x8228 >> 2),
 747         0x00000000,
 748         (0x0e00 << 16) | (0x829c >> 2),
 749         0x00000000,
 750         (0x0e00 << 16) | (0x869c >> 2),
 751         0x00000000,
 752         (0x0600 << 16) | (0x98f4 >> 2),
 753         0x00000000,
 754         (0x0e00 << 16) | (0x98f8 >> 2),
 755         0x00000000,
 756         (0x0e00 << 16) | (0x9900 >> 2),
 757         0x00000000,
 758         (0x0e00 << 16) | (0xc260 >> 2),
 759         0x00000000,
 760         (0x0e00 << 16) | (0x90e8 >> 2),
 761         0x00000000,
 762         (0x0e00 << 16) | (0x3c000 >> 2),
 763         0x00000000,
 764         (0x0e00 << 16) | (0x3c00c >> 2),
 765         0x00000000,
 766         (0x0e00 << 16) | (0x8c1c >> 2),
 767         0x00000000,
 768         (0x0e00 << 16) | (0x9700 >> 2),
 769         0x00000000,
 770         (0x0e00 << 16) | (0xcd20 >> 2),
 771         0x00000000,
 772         (0x4e00 << 16) | (0xcd20 >> 2),
 773         0x00000000,
 774         (0x5e00 << 16) | (0xcd20 >> 2),
 775         0x00000000,
 776         (0x6e00 << 16) | (0xcd20 >> 2),
 777         0x00000000,
 778         (0x7e00 << 16) | (0xcd20 >> 2),
 779         0x00000000,
 780         (0x0e00 << 16) | (0x89bc >> 2),
 781         0x00000000,
 782         (0x0e00 << 16) | (0x8900 >> 2),
 783         0x00000000,
 784         0x3,
 785         (0x0e00 << 16) | (0xc130 >> 2),
 786         0x00000000,
 787         (0x0e00 << 16) | (0xc134 >> 2),
 788         0x00000000,
 789         (0x0e00 << 16) | (0xc1fc >> 2),
 790         0x00000000,
 791         (0x0e00 << 16) | (0xc208 >> 2),
 792         0x00000000,
 793         (0x0e00 << 16) | (0xc264 >> 2),
 794         0x00000000,
 795         (0x0e00 << 16) | (0xc268 >> 2),
 796         0x00000000,
 797         (0x0e00 << 16) | (0xc26c >> 2),
 798         0x00000000,
 799         (0x0e00 << 16) | (0xc270 >> 2),
 800         0x00000000,
 801         (0x0e00 << 16) | (0xc274 >> 2),
 802         0x00000000,
 803         (0x0e00 << 16) | (0xc28c >> 2),
 804         0x00000000,
 805         (0x0e00 << 16) | (0xc290 >> 2),
 806         0x00000000,
 807         (0x0e00 << 16) | (0xc294 >> 2),
 808         0x00000000,
 809         (0x0e00 << 16) | (0xc298 >> 2),
 810         0x00000000,
 811         (0x0e00 << 16) | (0xc2a0 >> 2),
 812         0x00000000,
 813         (0x0e00 << 16) | (0xc2a4 >> 2),
 814         0x00000000,
 815         (0x0e00 << 16) | (0xc2a8 >> 2),
 816         0x00000000,
 817         (0x0e00 << 16) | (0xc2ac >> 2),
 818         0x00000000,
 819         (0x0e00 << 16) | (0x301d0 >> 2),
 820         0x00000000,
 821         (0x0e00 << 16) | (0x30238 >> 2),
 822         0x00000000,
 823         (0x0e00 << 16) | (0x30250 >> 2),
 824         0x00000000,
 825         (0x0e00 << 16) | (0x30254 >> 2),
 826         0x00000000,
 827         (0x0e00 << 16) | (0x30258 >> 2),
 828         0x00000000,
 829         (0x0e00 << 16) | (0x3025c >> 2),
 830         0x00000000,
 831         (0x4e00 << 16) | (0xc900 >> 2),
 832         0x00000000,
 833         (0x5e00 << 16) | (0xc900 >> 2),
 834         0x00000000,
 835         (0x6e00 << 16) | (0xc900 >> 2),
 836         0x00000000,
 837         (0x7e00 << 16) | (0xc900 >> 2),
 838         0x00000000,
 839         (0x4e00 << 16) | (0xc904 >> 2),
 840         0x00000000,
 841         (0x5e00 << 16) | (0xc904 >> 2),
 842         0x00000000,
 843         (0x6e00 << 16) | (0xc904 >> 2),
 844         0x00000000,
 845         (0x7e00 << 16) | (0xc904 >> 2),
 846         0x00000000,
 847         (0x4e00 << 16) | (0xc908 >> 2),
 848         0x00000000,
 849         (0x5e00 << 16) | (0xc908 >> 2),
 850         0x00000000,
 851         (0x6e00 << 16) | (0xc908 >> 2),
 852         0x00000000,
 853         (0x7e00 << 16) | (0xc908 >> 2),
 854         0x00000000,
 855         (0x4e00 << 16) | (0xc90c >> 2),
 856         0x00000000,
 857         (0x5e00 << 16) | (0xc90c >> 2),
 858         0x00000000,
 859         (0x6e00 << 16) | (0xc90c >> 2),
 860         0x00000000,
 861         (0x7e00 << 16) | (0xc90c >> 2),
 862         0x00000000,
 863         (0x4e00 << 16) | (0xc910 >> 2),
 864         0x00000000,
 865         (0x5e00 << 16) | (0xc910 >> 2),
 866         0x00000000,
 867         (0x6e00 << 16) | (0xc910 >> 2),
 868         0x00000000,
 869         (0x7e00 << 16) | (0xc910 >> 2),
 870         0x00000000,
 871         (0x0e00 << 16) | (0xc99c >> 2),
 872         0x00000000,
 873         (0x0e00 << 16) | (0x9834 >> 2),
 874         0x00000000,
 875         (0x0000 << 16) | (0x30f00 >> 2),
 876         0x00000000,
 877         (0x0000 << 16) | (0x30f04 >> 2),
 878         0x00000000,
 879         (0x0000 << 16) | (0x30f08 >> 2),
 880         0x00000000,
 881         (0x0000 << 16) | (0x30f0c >> 2),
 882         0x00000000,
 883         (0x0600 << 16) | (0x9b7c >> 2),
 884         0x00000000,
 885         (0x0e00 << 16) | (0x8a14 >> 2),
 886         0x00000000,
 887         (0x0e00 << 16) | (0x8a18 >> 2),
 888         0x00000000,
 889         (0x0600 << 16) | (0x30a00 >> 2),
 890         0x00000000,
 891         (0x0e00 << 16) | (0x8bf0 >> 2),
 892         0x00000000,
 893         (0x0e00 << 16) | (0x8bcc >> 2),
 894         0x00000000,
 895         (0x0e00 << 16) | (0x8b24 >> 2),
 896         0x00000000,
 897         (0x0e00 << 16) | (0x30a04 >> 2),
 898         0x00000000,
 899         (0x0600 << 16) | (0x30a10 >> 2),
 900         0x00000000,
 901         (0x0600 << 16) | (0x30a14 >> 2),
 902         0x00000000,
 903         (0x0600 << 16) | (0x30a18 >> 2),
 904         0x00000000,
 905         (0x0600 << 16) | (0x30a2c >> 2),
 906         0x00000000,
 907         (0x0e00 << 16) | (0xc700 >> 2),
 908         0x00000000,
 909         (0x0e00 << 16) | (0xc704 >> 2),
 910         0x00000000,
 911         (0x0e00 << 16) | (0xc708 >> 2),
 912         0x00000000,
 913         (0x0e00 << 16) | (0xc768 >> 2),
 914         0x00000000,
 915         (0x0400 << 16) | (0xc770 >> 2),
 916         0x00000000,
 917         (0x0400 << 16) | (0xc774 >> 2),
 918         0x00000000,
 919         (0x0400 << 16) | (0xc798 >> 2),
 920         0x00000000,
 921         (0x0400 << 16) | (0xc79c >> 2),
 922         0x00000000,
 923         (0x0e00 << 16) | (0x9100 >> 2),
 924         0x00000000,
 925         (0x0e00 << 16) | (0x3c010 >> 2),
 926         0x00000000,
 927         (0x0e00 << 16) | (0x8c00 >> 2),
 928         0x00000000,
 929         (0x0e00 << 16) | (0x8c04 >> 2),
 930         0x00000000,
 931         (0x0e00 << 16) | (0x8c20 >> 2),
 932         0x00000000,
 933         (0x0e00 << 16) | (0x8c38 >> 2),
 934         0x00000000,
 935         (0x0e00 << 16) | (0x8c3c >> 2),
 936         0x00000000,
 937         (0x0e00 << 16) | (0xae00 >> 2),
 938         0x00000000,
 939         (0x0e00 << 16) | (0x9604 >> 2),
 940         0x00000000,
 941         (0x0e00 << 16) | (0xac08 >> 2),
 942         0x00000000,
 943         (0x0e00 << 16) | (0xac0c >> 2),
 944         0x00000000,
 945         (0x0e00 << 16) | (0xac10 >> 2),
 946         0x00000000,
 947         (0x0e00 << 16) | (0xac14 >> 2),
 948         0x00000000,
 949         (0x0e00 << 16) | (0xac58 >> 2),
 950         0x00000000,
 951         (0x0e00 << 16) | (0xac68 >> 2),
 952         0x00000000,
 953         (0x0e00 << 16) | (0xac6c >> 2),
 954         0x00000000,
 955         (0x0e00 << 16) | (0xac70 >> 2),
 956         0x00000000,
 957         (0x0e00 << 16) | (0xac74 >> 2),
 958         0x00000000,
 959         (0x0e00 << 16) | (0xac78 >> 2),
 960         0x00000000,
 961         (0x0e00 << 16) | (0xac7c >> 2),
 962         0x00000000,
 963         (0x0e00 << 16) | (0xac80 >> 2),
 964         0x00000000,
 965         (0x0e00 << 16) | (0xac84 >> 2),
 966         0x00000000,
 967         (0x0e00 << 16) | (0xac88 >> 2),
 968         0x00000000,
 969         (0x0e00 << 16) | (0xac8c >> 2),
 970         0x00000000,
 971         (0x0e00 << 16) | (0x970c >> 2),
 972         0x00000000,
 973         (0x0e00 << 16) | (0x9714 >> 2),
 974         0x00000000,
 975         (0x0e00 << 16) | (0x9718 >> 2),
 976         0x00000000,
 977         (0x0e00 << 16) | (0x971c >> 2),
 978         0x00000000,
 979         (0x0e00 << 16) | (0x31068 >> 2),
 980         0x00000000,
 981         (0x4e00 << 16) | (0x31068 >> 2),
 982         0x00000000,
 983         (0x5e00 << 16) | (0x31068 >> 2),
 984         0x00000000,
 985         (0x6e00 << 16) | (0x31068 >> 2),
 986         0x00000000,
 987         (0x7e00 << 16) | (0x31068 >> 2),
 988         0x00000000,
 989         (0x0e00 << 16) | (0xcd10 >> 2),
 990         0x00000000,
 991         (0x0e00 << 16) | (0xcd14 >> 2),
 992         0x00000000,
 993         (0x0e00 << 16) | (0x88b0 >> 2),
 994         0x00000000,
 995         (0x0e00 << 16) | (0x88b4 >> 2),
 996         0x00000000,
 997         (0x0e00 << 16) | (0x88b8 >> 2),
 998         0x00000000,
 999         (0x0e00 << 16) | (0x88bc >> 2),
1000         0x00000000,
1001         (0x0400 << 16) | (0x89c0 >> 2),
1002         0x00000000,
1003         (0x0e00 << 16) | (0x88c4 >> 2),
1004         0x00000000,
1005         (0x0e00 << 16) | (0x88c8 >> 2),
1006         0x00000000,
1007         (0x0e00 << 16) | (0x88d0 >> 2),
1008         0x00000000,
1009         (0x0e00 << 16) | (0x88d4 >> 2),
1010         0x00000000,
1011         (0x0e00 << 16) | (0x88d8 >> 2),
1012         0x00000000,
1013         (0x0e00 << 16) | (0x8980 >> 2),
1014         0x00000000,
1015         (0x0e00 << 16) | (0x30938 >> 2),
1016         0x00000000,
1017         (0x0e00 << 16) | (0x3093c >> 2),
1018         0x00000000,
1019         (0x0e00 << 16) | (0x30940 >> 2),
1020         0x00000000,
1021         (0x0e00 << 16) | (0x89a0 >> 2),
1022         0x00000000,
1023         (0x0e00 << 16) | (0x30900 >> 2),
1024         0x00000000,
1025         (0x0e00 << 16) | (0x30904 >> 2),
1026         0x00000000,
1027         (0x0e00 << 16) | (0x89b4 >> 2),
1028         0x00000000,
1029         (0x0e00 << 16) | (0x3e1fc >> 2),
1030         0x00000000,
1031         (0x0e00 << 16) | (0x3c210 >> 2),
1032         0x00000000,
1033         (0x0e00 << 16) | (0x3c214 >> 2),
1034         0x00000000,
1035         (0x0e00 << 16) | (0x3c218 >> 2),
1036         0x00000000,
1037         (0x0e00 << 16) | (0x8904 >> 2),
1038         0x00000000,
1039         0x5,
1040         (0x0e00 << 16) | (0x8c28 >> 2),
1041         (0x0e00 << 16) | (0x8c2c >> 2),
1042         (0x0e00 << 16) | (0x8c30 >> 2),
1043         (0x0e00 << 16) | (0x8c34 >> 2),
1044         (0x0e00 << 16) | (0x9600 >> 2),
1045 };
1046 
1047 static const u32 bonaire_golden_spm_registers[] =
1048 {
1049         0x30800, 0xe0ffffff, 0xe0000000
1050 };
1051 
1052 static const u32 bonaire_golden_common_registers[] =
1053 {
1054         0xc770, 0xffffffff, 0x00000800,
1055         0xc774, 0xffffffff, 0x00000800,
1056         0xc798, 0xffffffff, 0x00007fbf,
1057         0xc79c, 0xffffffff, 0x00007faf
1058 };
1059 
1060 static const u32 bonaire_golden_registers[] =
1061 {
1062         0x3354, 0x00000333, 0x00000333,
1063         0x3350, 0x000c0fc0, 0x00040200,
1064         0x9a10, 0x00010000, 0x00058208,
1065         0x3c000, 0xffff1fff, 0x00140000,
1066         0x3c200, 0xfdfc0fff, 0x00000100,
1067         0x3c234, 0x40000000, 0x40000200,
1068         0x9830, 0xffffffff, 0x00000000,
1069         0x9834, 0xf00fffff, 0x00000400,
1070         0x9838, 0x0002021c, 0x00020200,
1071         0xc78, 0x00000080, 0x00000000,
1072         0x5bb0, 0x000000f0, 0x00000070,
1073         0x5bc0, 0xf0311fff, 0x80300000,
1074         0x98f8, 0x73773777, 0x12010001,
1075         0x350c, 0x00810000, 0x408af000,
1076         0x7030, 0x31000111, 0x00000011,
1077         0x2f48, 0x73773777, 0x12010001,
1078         0x220c, 0x00007fb6, 0x0021a1b1,
1079         0x2210, 0x00007fb6, 0x002021b1,
1080         0x2180, 0x00007fb6, 0x00002191,
1081         0x2218, 0x00007fb6, 0x002121b1,
1082         0x221c, 0x00007fb6, 0x002021b1,
1083         0x21dc, 0x00007fb6, 0x00002191,
1084         0x21e0, 0x00007fb6, 0x00002191,
1085         0x3628, 0x0000003f, 0x0000000a,
1086         0x362c, 0x0000003f, 0x0000000a,
1087         0x2ae4, 0x00073ffe, 0x000022a2,
1088         0x240c, 0x000007ff, 0x00000000,
1089         0x8a14, 0xf000003f, 0x00000007,
1090         0x8bf0, 0x00002001, 0x00000001,
1091         0x8b24, 0xffffffff, 0x00ffffff,
1092         0x30a04, 0x0000ff0f, 0x00000000,
1093         0x28a4c, 0x07ffffff, 0x06000000,
1094         0x4d8, 0x00000fff, 0x00000100,
1095         0x3e78, 0x00000001, 0x00000002,
1096         0x9100, 0x03000000, 0x0362c688,
1097         0x8c00, 0x000000ff, 0x00000001,
1098         0xe40, 0x00001fff, 0x00001fff,
1099         0x9060, 0x0000007f, 0x00000020,
1100         0x9508, 0x00010000, 0x00010000,
1101         0xac14, 0x000003ff, 0x000000f3,
1102         0xac0c, 0xffffffff, 0x00001032
1103 };
1104 
1105 static const u32 bonaire_mgcg_cgcg_init[] =
1106 {
1107         0xc420, 0xffffffff, 0xfffffffc,
1108         0x30800, 0xffffffff, 0xe0000000,
1109         0x3c2a0, 0xffffffff, 0x00000100,
1110         0x3c208, 0xffffffff, 0x00000100,
1111         0x3c2c0, 0xffffffff, 0xc0000100,
1112         0x3c2c8, 0xffffffff, 0xc0000100,
1113         0x3c2c4, 0xffffffff, 0xc0000100,
1114         0x55e4, 0xffffffff, 0x00600100,
1115         0x3c280, 0xffffffff, 0x00000100,
1116         0x3c214, 0xffffffff, 0x06000100,
1117         0x3c220, 0xffffffff, 0x00000100,
1118         0x3c218, 0xffffffff, 0x06000100,
1119         0x3c204, 0xffffffff, 0x00000100,
1120         0x3c2e0, 0xffffffff, 0x00000100,
1121         0x3c224, 0xffffffff, 0x00000100,
1122         0x3c200, 0xffffffff, 0x00000100,
1123         0x3c230, 0xffffffff, 0x00000100,
1124         0x3c234, 0xffffffff, 0x00000100,
1125         0x3c250, 0xffffffff, 0x00000100,
1126         0x3c254, 0xffffffff, 0x00000100,
1127         0x3c258, 0xffffffff, 0x00000100,
1128         0x3c25c, 0xffffffff, 0x00000100,
1129         0x3c260, 0xffffffff, 0x00000100,
1130         0x3c27c, 0xffffffff, 0x00000100,
1131         0x3c278, 0xffffffff, 0x00000100,
1132         0x3c210, 0xffffffff, 0x06000100,
1133         0x3c290, 0xffffffff, 0x00000100,
1134         0x3c274, 0xffffffff, 0x00000100,
1135         0x3c2b4, 0xffffffff, 0x00000100,
1136         0x3c2b0, 0xffffffff, 0x00000100,
1137         0x3c270, 0xffffffff, 0x00000100,
1138         0x30800, 0xffffffff, 0xe0000000,
1139         0x3c020, 0xffffffff, 0x00010000,
1140         0x3c024, 0xffffffff, 0x00030002,
1141         0x3c028, 0xffffffff, 0x00040007,
1142         0x3c02c, 0xffffffff, 0x00060005,
1143         0x3c030, 0xffffffff, 0x00090008,
1144         0x3c034, 0xffffffff, 0x00010000,
1145         0x3c038, 0xffffffff, 0x00030002,
1146         0x3c03c, 0xffffffff, 0x00040007,
1147         0x3c040, 0xffffffff, 0x00060005,
1148         0x3c044, 0xffffffff, 0x00090008,
1149         0x3c048, 0xffffffff, 0x00010000,
1150         0x3c04c, 0xffffffff, 0x00030002,
1151         0x3c050, 0xffffffff, 0x00040007,
1152         0x3c054, 0xffffffff, 0x00060005,
1153         0x3c058, 0xffffffff, 0x00090008,
1154         0x3c05c, 0xffffffff, 0x00010000,
1155         0x3c060, 0xffffffff, 0x00030002,
1156         0x3c064, 0xffffffff, 0x00040007,
1157         0x3c068, 0xffffffff, 0x00060005,
1158         0x3c06c, 0xffffffff, 0x00090008,
1159         0x3c070, 0xffffffff, 0x00010000,
1160         0x3c074, 0xffffffff, 0x00030002,
1161         0x3c078, 0xffffffff, 0x00040007,
1162         0x3c07c, 0xffffffff, 0x00060005,
1163         0x3c080, 0xffffffff, 0x00090008,
1164         0x3c084, 0xffffffff, 0x00010000,
1165         0x3c088, 0xffffffff, 0x00030002,
1166         0x3c08c, 0xffffffff, 0x00040007,
1167         0x3c090, 0xffffffff, 0x00060005,
1168         0x3c094, 0xffffffff, 0x00090008,
1169         0x3c098, 0xffffffff, 0x00010000,
1170         0x3c09c, 0xffffffff, 0x00030002,
1171         0x3c0a0, 0xffffffff, 0x00040007,
1172         0x3c0a4, 0xffffffff, 0x00060005,
1173         0x3c0a8, 0xffffffff, 0x00090008,
1174         0x3c000, 0xffffffff, 0x96e00200,
1175         0x8708, 0xffffffff, 0x00900100,
1176         0xc424, 0xffffffff, 0x0020003f,
1177         0x38, 0xffffffff, 0x0140001c,
1178         0x3c, 0x000f0000, 0x000f0000,
1179         0x220, 0xffffffff, 0xC060000C,
1180         0x224, 0xc0000fff, 0x00000100,
1181         0xf90, 0xffffffff, 0x00000100,
1182         0xf98, 0x00000101, 0x00000000,
1183         0x20a8, 0xffffffff, 0x00000104,
1184         0x55e4, 0xff000fff, 0x00000100,
1185         0x30cc, 0xc0000fff, 0x00000104,
1186         0xc1e4, 0x00000001, 0x00000001,
1187         0xd00c, 0xff000ff0, 0x00000100,
1188         0xd80c, 0xff000ff0, 0x00000100
1189 };
1190 
1191 static const u32 spectre_golden_spm_registers[] =
1192 {
1193         0x30800, 0xe0ffffff, 0xe0000000
1194 };
1195 
1196 static const u32 spectre_golden_common_registers[] =
1197 {
1198         0xc770, 0xffffffff, 0x00000800,
1199         0xc774, 0xffffffff, 0x00000800,
1200         0xc798, 0xffffffff, 0x00007fbf,
1201         0xc79c, 0xffffffff, 0x00007faf
1202 };
1203 
1204 static const u32 spectre_golden_registers[] =
1205 {
1206         0x3c000, 0xffff1fff, 0x96940200,
1207         0x3c00c, 0xffff0001, 0xff000000,
1208         0x3c200, 0xfffc0fff, 0x00000100,
1209         0x6ed8, 0x00010101, 0x00010000,
1210         0x9834, 0xf00fffff, 0x00000400,
1211         0x9838, 0xfffffffc, 0x00020200,
1212         0x5bb0, 0x000000f0, 0x00000070,
1213         0x5bc0, 0xf0311fff, 0x80300000,
1214         0x98f8, 0x73773777, 0x12010001,
1215         0x9b7c, 0x00ff0000, 0x00fc0000,
1216         0x2f48, 0x73773777, 0x12010001,
1217         0x8a14, 0xf000003f, 0x00000007,
1218         0x8b24, 0xffffffff, 0x00ffffff,
1219         0x28350, 0x3f3f3fff, 0x00000082,
1220         0x28354, 0x0000003f, 0x00000000,
1221         0x3e78, 0x00000001, 0x00000002,
1222         0x913c, 0xffff03df, 0x00000004,
1223         0xc768, 0x00000008, 0x00000008,
1224         0x8c00, 0x000008ff, 0x00000800,
1225         0x9508, 0x00010000, 0x00010000,
1226         0xac0c, 0xffffffff, 0x54763210,
1227         0x214f8, 0x01ff01ff, 0x00000002,
1228         0x21498, 0x007ff800, 0x00200000,
1229         0x2015c, 0xffffffff, 0x00000f40,
1230         0x30934, 0xffffffff, 0x00000001
1231 };
1232 
1233 static const u32 spectre_mgcg_cgcg_init[] =
1234 {
1235         0xc420, 0xffffffff, 0xfffffffc,
1236         0x30800, 0xffffffff, 0xe0000000,
1237         0x3c2a0, 0xffffffff, 0x00000100,
1238         0x3c208, 0xffffffff, 0x00000100,
1239         0x3c2c0, 0xffffffff, 0x00000100,
1240         0x3c2c8, 0xffffffff, 0x00000100,
1241         0x3c2c4, 0xffffffff, 0x00000100,
1242         0x55e4, 0xffffffff, 0x00600100,
1243         0x3c280, 0xffffffff, 0x00000100,
1244         0x3c214, 0xffffffff, 0x06000100,
1245         0x3c220, 0xffffffff, 0x00000100,
1246         0x3c218, 0xffffffff, 0x06000100,
1247         0x3c204, 0xffffffff, 0x00000100,
1248         0x3c2e0, 0xffffffff, 0x00000100,
1249         0x3c224, 0xffffffff, 0x00000100,
1250         0x3c200, 0xffffffff, 0x00000100,
1251         0x3c230, 0xffffffff, 0x00000100,
1252         0x3c234, 0xffffffff, 0x00000100,
1253         0x3c250, 0xffffffff, 0x00000100,
1254         0x3c254, 0xffffffff, 0x00000100,
1255         0x3c258, 0xffffffff, 0x00000100,
1256         0x3c25c, 0xffffffff, 0x00000100,
1257         0x3c260, 0xffffffff, 0x00000100,
1258         0x3c27c, 0xffffffff, 0x00000100,
1259         0x3c278, 0xffffffff, 0x00000100,
1260         0x3c210, 0xffffffff, 0x06000100,
1261         0x3c290, 0xffffffff, 0x00000100,
1262         0x3c274, 0xffffffff, 0x00000100,
1263         0x3c2b4, 0xffffffff, 0x00000100,
1264         0x3c2b0, 0xffffffff, 0x00000100,
1265         0x3c270, 0xffffffff, 0x00000100,
1266         0x30800, 0xffffffff, 0xe0000000,
1267         0x3c020, 0xffffffff, 0x00010000,
1268         0x3c024, 0xffffffff, 0x00030002,
1269         0x3c028, 0xffffffff, 0x00040007,
1270         0x3c02c, 0xffffffff, 0x00060005,
1271         0x3c030, 0xffffffff, 0x00090008,
1272         0x3c034, 0xffffffff, 0x00010000,
1273         0x3c038, 0xffffffff, 0x00030002,
1274         0x3c03c, 0xffffffff, 0x00040007,
1275         0x3c040, 0xffffffff, 0x00060005,
1276         0x3c044, 0xffffffff, 0x00090008,
1277         0x3c048, 0xffffffff, 0x00010000,
1278         0x3c04c, 0xffffffff, 0x00030002,
1279         0x3c050, 0xffffffff, 0x00040007,
1280         0x3c054, 0xffffffff, 0x00060005,
1281         0x3c058, 0xffffffff, 0x00090008,
1282         0x3c05c, 0xffffffff, 0x00010000,
1283         0x3c060, 0xffffffff, 0x00030002,
1284         0x3c064, 0xffffffff, 0x00040007,
1285         0x3c068, 0xffffffff, 0x00060005,
1286         0x3c06c, 0xffffffff, 0x00090008,
1287         0x3c070, 0xffffffff, 0x00010000,
1288         0x3c074, 0xffffffff, 0x00030002,
1289         0x3c078, 0xffffffff, 0x00040007,
1290         0x3c07c, 0xffffffff, 0x00060005,
1291         0x3c080, 0xffffffff, 0x00090008,
1292         0x3c084, 0xffffffff, 0x00010000,
1293         0x3c088, 0xffffffff, 0x00030002,
1294         0x3c08c, 0xffffffff, 0x00040007,
1295         0x3c090, 0xffffffff, 0x00060005,
1296         0x3c094, 0xffffffff, 0x00090008,
1297         0x3c098, 0xffffffff, 0x00010000,
1298         0x3c09c, 0xffffffff, 0x00030002,
1299         0x3c0a0, 0xffffffff, 0x00040007,
1300         0x3c0a4, 0xffffffff, 0x00060005,
1301         0x3c0a8, 0xffffffff, 0x00090008,
1302         0x3c0ac, 0xffffffff, 0x00010000,
1303         0x3c0b0, 0xffffffff, 0x00030002,
1304         0x3c0b4, 0xffffffff, 0x00040007,
1305         0x3c0b8, 0xffffffff, 0x00060005,
1306         0x3c0bc, 0xffffffff, 0x00090008,
1307         0x3c000, 0xffffffff, 0x96e00200,
1308         0x8708, 0xffffffff, 0x00900100,
1309         0xc424, 0xffffffff, 0x0020003f,
1310         0x38, 0xffffffff, 0x0140001c,
1311         0x3c, 0x000f0000, 0x000f0000,
1312         0x220, 0xffffffff, 0xC060000C,
1313         0x224, 0xc0000fff, 0x00000100,
1314         0xf90, 0xffffffff, 0x00000100,
1315         0xf98, 0x00000101, 0x00000000,
1316         0x20a8, 0xffffffff, 0x00000104,
1317         0x55e4, 0xff000fff, 0x00000100,
1318         0x30cc, 0xc0000fff, 0x00000104,
1319         0xc1e4, 0x00000001, 0x00000001,
1320         0xd00c, 0xff000ff0, 0x00000100,
1321         0xd80c, 0xff000ff0, 0x00000100
1322 };
1323 
1324 static const u32 kalindi_golden_spm_registers[] =
1325 {
1326         0x30800, 0xe0ffffff, 0xe0000000
1327 };
1328 
1329 static const u32 kalindi_golden_common_registers[] =
1330 {
1331         0xc770, 0xffffffff, 0x00000800,
1332         0xc774, 0xffffffff, 0x00000800,
1333         0xc798, 0xffffffff, 0x00007fbf,
1334         0xc79c, 0xffffffff, 0x00007faf
1335 };
1336 
1337 static const u32 kalindi_golden_registers[] =
1338 {
1339         0x3c000, 0xffffdfff, 0x6e944040,
1340         0x55e4, 0xff607fff, 0xfc000100,
1341         0x3c220, 0xff000fff, 0x00000100,
1342         0x3c224, 0xff000fff, 0x00000100,
1343         0x3c200, 0xfffc0fff, 0x00000100,
1344         0x6ed8, 0x00010101, 0x00010000,
1345         0x9830, 0xffffffff, 0x00000000,
1346         0x9834, 0xf00fffff, 0x00000400,
1347         0x5bb0, 0x000000f0, 0x00000070,
1348         0x5bc0, 0xf0311fff, 0x80300000,
1349         0x98f8, 0x73773777, 0x12010001,
1350         0x98fc, 0xffffffff, 0x00000010,
1351         0x9b7c, 0x00ff0000, 0x00fc0000,
1352         0x8030, 0x00001f0f, 0x0000100a,
1353         0x2f48, 0x73773777, 0x12010001,
1354         0x2408, 0x000fffff, 0x000c007f,
1355         0x8a14, 0xf000003f, 0x00000007,
1356         0x8b24, 0x3fff3fff, 0x00ffcfff,
1357         0x30a04, 0x0000ff0f, 0x00000000,
1358         0x28a4c, 0x07ffffff, 0x06000000,
1359         0x4d8, 0x00000fff, 0x00000100,
1360         0x3e78, 0x00000001, 0x00000002,
1361         0xc768, 0x00000008, 0x00000008,
1362         0x8c00, 0x000000ff, 0x00000003,
1363         0x214f8, 0x01ff01ff, 0x00000002,
1364         0x21498, 0x007ff800, 0x00200000,
1365         0x2015c, 0xffffffff, 0x00000f40,
1366         0x88c4, 0x001f3ae3, 0x00000082,
1367         0x88d4, 0x0000001f, 0x00000010,
1368         0x30934, 0xffffffff, 0x00000000
1369 };
1370 
1371 static const u32 kalindi_mgcg_cgcg_init[] =
1372 {
1373         0xc420, 0xffffffff, 0xfffffffc,
1374         0x30800, 0xffffffff, 0xe0000000,
1375         0x3c2a0, 0xffffffff, 0x00000100,
1376         0x3c208, 0xffffffff, 0x00000100,
1377         0x3c2c0, 0xffffffff, 0x00000100,
1378         0x3c2c8, 0xffffffff, 0x00000100,
1379         0x3c2c4, 0xffffffff, 0x00000100,
1380         0x55e4, 0xffffffff, 0x00600100,
1381         0x3c280, 0xffffffff, 0x00000100,
1382         0x3c214, 0xffffffff, 0x06000100,
1383         0x3c220, 0xffffffff, 0x00000100,
1384         0x3c218, 0xffffffff, 0x06000100,
1385         0x3c204, 0xffffffff, 0x00000100,
1386         0x3c2e0, 0xffffffff, 0x00000100,
1387         0x3c224, 0xffffffff, 0x00000100,
1388         0x3c200, 0xffffffff, 0x00000100,
1389         0x3c230, 0xffffffff, 0x00000100,
1390         0x3c234, 0xffffffff, 0x00000100,
1391         0x3c250, 0xffffffff, 0x00000100,
1392         0x3c254, 0xffffffff, 0x00000100,
1393         0x3c258, 0xffffffff, 0x00000100,
1394         0x3c25c, 0xffffffff, 0x00000100,
1395         0x3c260, 0xffffffff, 0x00000100,
1396         0x3c27c, 0xffffffff, 0x00000100,
1397         0x3c278, 0xffffffff, 0x00000100,
1398         0x3c210, 0xffffffff, 0x06000100,
1399         0x3c290, 0xffffffff, 0x00000100,
1400         0x3c274, 0xffffffff, 0x00000100,
1401         0x3c2b4, 0xffffffff, 0x00000100,
1402         0x3c2b0, 0xffffffff, 0x00000100,
1403         0x3c270, 0xffffffff, 0x00000100,
1404         0x30800, 0xffffffff, 0xe0000000,
1405         0x3c020, 0xffffffff, 0x00010000,
1406         0x3c024, 0xffffffff, 0x00030002,
1407         0x3c028, 0xffffffff, 0x00040007,
1408         0x3c02c, 0xffffffff, 0x00060005,
1409         0x3c030, 0xffffffff, 0x00090008,
1410         0x3c034, 0xffffffff, 0x00010000,
1411         0x3c038, 0xffffffff, 0x00030002,
1412         0x3c03c, 0xffffffff, 0x00040007,
1413         0x3c040, 0xffffffff, 0x00060005,
1414         0x3c044, 0xffffffff, 0x00090008,
1415         0x3c000, 0xffffffff, 0x96e00200,
1416         0x8708, 0xffffffff, 0x00900100,
1417         0xc424, 0xffffffff, 0x0020003f,
1418         0x38, 0xffffffff, 0x0140001c,
1419         0x3c, 0x000f0000, 0x000f0000,
1420         0x220, 0xffffffff, 0xC060000C,
1421         0x224, 0xc0000fff, 0x00000100,
1422         0x20a8, 0xffffffff, 0x00000104,
1423         0x55e4, 0xff000fff, 0x00000100,
1424         0x30cc, 0xc0000fff, 0x00000104,
1425         0xc1e4, 0x00000001, 0x00000001,
1426         0xd00c, 0xff000ff0, 0x00000100,
1427         0xd80c, 0xff000ff0, 0x00000100
1428 };
1429 
1430 static const u32 hawaii_golden_spm_registers[] =
1431 {
1432         0x30800, 0xe0ffffff, 0xe0000000
1433 };
1434 
1435 static const u32 hawaii_golden_common_registers[] =
1436 {
1437         0x30800, 0xffffffff, 0xe0000000,
1438         0x28350, 0xffffffff, 0x3a00161a,
1439         0x28354, 0xffffffff, 0x0000002e,
1440         0x9a10, 0xffffffff, 0x00018208,
1441         0x98f8, 0xffffffff, 0x12011003
1442 };
1443 
1444 static const u32 hawaii_golden_registers[] =
1445 {
1446         0x3354, 0x00000333, 0x00000333,
1447         0x9a10, 0x00010000, 0x00058208,
1448         0x9830, 0xffffffff, 0x00000000,
1449         0x9834, 0xf00fffff, 0x00000400,
1450         0x9838, 0x0002021c, 0x00020200,
1451         0xc78, 0x00000080, 0x00000000,
1452         0x5bb0, 0x000000f0, 0x00000070,
1453         0x5bc0, 0xf0311fff, 0x80300000,
1454         0x350c, 0x00810000, 0x408af000,
1455         0x7030, 0x31000111, 0x00000011,
1456         0x2f48, 0x73773777, 0x12010001,
1457         0x2120, 0x0000007f, 0x0000001b,
1458         0x21dc, 0x00007fb6, 0x00002191,
1459         0x3628, 0x0000003f, 0x0000000a,
1460         0x362c, 0x0000003f, 0x0000000a,
1461         0x2ae4, 0x00073ffe, 0x000022a2,
1462         0x240c, 0x000007ff, 0x00000000,
1463         0x8bf0, 0x00002001, 0x00000001,
1464         0x8b24, 0xffffffff, 0x00ffffff,
1465         0x30a04, 0x0000ff0f, 0x00000000,
1466         0x28a4c, 0x07ffffff, 0x06000000,
1467         0x3e78, 0x00000001, 0x00000002,
1468         0xc768, 0x00000008, 0x00000008,
1469         0xc770, 0x00000f00, 0x00000800,
1470         0xc774, 0x00000f00, 0x00000800,
1471         0xc798, 0x00ffffff, 0x00ff7fbf,
1472         0xc79c, 0x00ffffff, 0x00ff7faf,
1473         0x8c00, 0x000000ff, 0x00000800,
1474         0xe40, 0x00001fff, 0x00001fff,
1475         0x9060, 0x0000007f, 0x00000020,
1476         0x9508, 0x00010000, 0x00010000,
1477         0xae00, 0x00100000, 0x000ff07c,
1478         0xac14, 0x000003ff, 0x0000000f,
1479         0xac10, 0xffffffff, 0x7564fdec,
1480         0xac0c, 0xffffffff, 0x3120b9a8,
1481         0xac08, 0x20000000, 0x0f9c0000
1482 };
1483 
1484 static const u32 hawaii_mgcg_cgcg_init[] =
1485 {
1486         0xc420, 0xffffffff, 0xfffffffd,
1487         0x30800, 0xffffffff, 0xe0000000,
1488         0x3c2a0, 0xffffffff, 0x00000100,
1489         0x3c208, 0xffffffff, 0x00000100,
1490         0x3c2c0, 0xffffffff, 0x00000100,
1491         0x3c2c8, 0xffffffff, 0x00000100,
1492         0x3c2c4, 0xffffffff, 0x00000100,
1493         0x55e4, 0xffffffff, 0x00200100,
1494         0x3c280, 0xffffffff, 0x00000100,
1495         0x3c214, 0xffffffff, 0x06000100,
1496         0x3c220, 0xffffffff, 0x00000100,
1497         0x3c218, 0xffffffff, 0x06000100,
1498         0x3c204, 0xffffffff, 0x00000100,
1499         0x3c2e0, 0xffffffff, 0x00000100,
1500         0x3c224, 0xffffffff, 0x00000100,
1501         0x3c200, 0xffffffff, 0x00000100,
1502         0x3c230, 0xffffffff, 0x00000100,
1503         0x3c234, 0xffffffff, 0x00000100,
1504         0x3c250, 0xffffffff, 0x00000100,
1505         0x3c254, 0xffffffff, 0x00000100,
1506         0x3c258, 0xffffffff, 0x00000100,
1507         0x3c25c, 0xffffffff, 0x00000100,
1508         0x3c260, 0xffffffff, 0x00000100,
1509         0x3c27c, 0xffffffff, 0x00000100,
1510         0x3c278, 0xffffffff, 0x00000100,
1511         0x3c210, 0xffffffff, 0x06000100,
1512         0x3c290, 0xffffffff, 0x00000100,
1513         0x3c274, 0xffffffff, 0x00000100,
1514         0x3c2b4, 0xffffffff, 0x00000100,
1515         0x3c2b0, 0xffffffff, 0x00000100,
1516         0x3c270, 0xffffffff, 0x00000100,
1517         0x30800, 0xffffffff, 0xe0000000,
1518         0x3c020, 0xffffffff, 0x00010000,
1519         0x3c024, 0xffffffff, 0x00030002,
1520         0x3c028, 0xffffffff, 0x00040007,
1521         0x3c02c, 0xffffffff, 0x00060005,
1522         0x3c030, 0xffffffff, 0x00090008,
1523         0x3c034, 0xffffffff, 0x00010000,
1524         0x3c038, 0xffffffff, 0x00030002,
1525         0x3c03c, 0xffffffff, 0x00040007,
1526         0x3c040, 0xffffffff, 0x00060005,
1527         0x3c044, 0xffffffff, 0x00090008,
1528         0x3c048, 0xffffffff, 0x00010000,
1529         0x3c04c, 0xffffffff, 0x00030002,
1530         0x3c050, 0xffffffff, 0x00040007,
1531         0x3c054, 0xffffffff, 0x00060005,
1532         0x3c058, 0xffffffff, 0x00090008,
1533         0x3c05c, 0xffffffff, 0x00010000,
1534         0x3c060, 0xffffffff, 0x00030002,
1535         0x3c064, 0xffffffff, 0x00040007,
1536         0x3c068, 0xffffffff, 0x00060005,
1537         0x3c06c, 0xffffffff, 0x00090008,
1538         0x3c070, 0xffffffff, 0x00010000,
1539         0x3c074, 0xffffffff, 0x00030002,
1540         0x3c078, 0xffffffff, 0x00040007,
1541         0x3c07c, 0xffffffff, 0x00060005,
1542         0x3c080, 0xffffffff, 0x00090008,
1543         0x3c084, 0xffffffff, 0x00010000,
1544         0x3c088, 0xffffffff, 0x00030002,
1545         0x3c08c, 0xffffffff, 0x00040007,
1546         0x3c090, 0xffffffff, 0x00060005,
1547         0x3c094, 0xffffffff, 0x00090008,
1548         0x3c098, 0xffffffff, 0x00010000,
1549         0x3c09c, 0xffffffff, 0x00030002,
1550         0x3c0a0, 0xffffffff, 0x00040007,
1551         0x3c0a4, 0xffffffff, 0x00060005,
1552         0x3c0a8, 0xffffffff, 0x00090008,
1553         0x3c0ac, 0xffffffff, 0x00010000,
1554         0x3c0b0, 0xffffffff, 0x00030002,
1555         0x3c0b4, 0xffffffff, 0x00040007,
1556         0x3c0b8, 0xffffffff, 0x00060005,
1557         0x3c0bc, 0xffffffff, 0x00090008,
1558         0x3c0c0, 0xffffffff, 0x00010000,
1559         0x3c0c4, 0xffffffff, 0x00030002,
1560         0x3c0c8, 0xffffffff, 0x00040007,
1561         0x3c0cc, 0xffffffff, 0x00060005,
1562         0x3c0d0, 0xffffffff, 0x00090008,
1563         0x3c0d4, 0xffffffff, 0x00010000,
1564         0x3c0d8, 0xffffffff, 0x00030002,
1565         0x3c0dc, 0xffffffff, 0x00040007,
1566         0x3c0e0, 0xffffffff, 0x00060005,
1567         0x3c0e4, 0xffffffff, 0x00090008,
1568         0x3c0e8, 0xffffffff, 0x00010000,
1569         0x3c0ec, 0xffffffff, 0x00030002,
1570         0x3c0f0, 0xffffffff, 0x00040007,
1571         0x3c0f4, 0xffffffff, 0x00060005,
1572         0x3c0f8, 0xffffffff, 0x00090008,
1573         0xc318, 0xffffffff, 0x00020200,
1574         0x3350, 0xffffffff, 0x00000200,
1575         0x15c0, 0xffffffff, 0x00000400,
1576         0x55e8, 0xffffffff, 0x00000000,
1577         0x2f50, 0xffffffff, 0x00000902,
1578         0x3c000, 0xffffffff, 0x96940200,
1579         0x8708, 0xffffffff, 0x00900100,
1580         0xc424, 0xffffffff, 0x0020003f,
1581         0x38, 0xffffffff, 0x0140001c,
1582         0x3c, 0x000f0000, 0x000f0000,
1583         0x220, 0xffffffff, 0xc060000c,
1584         0x224, 0xc0000fff, 0x00000100,
1585         0xf90, 0xffffffff, 0x00000100,
1586         0xf98, 0x00000101, 0x00000000,
1587         0x20a8, 0xffffffff, 0x00000104,
1588         0x55e4, 0xff000fff, 0x00000100,
1589         0x30cc, 0xc0000fff, 0x00000104,
1590         0xc1e4, 0x00000001, 0x00000001,
1591         0xd00c, 0xff000ff0, 0x00000100,
1592         0xd80c, 0xff000ff0, 0x00000100
1593 };
1594 
1595 static const u32 godavari_golden_registers[] =
1596 {
1597         0x55e4, 0xff607fff, 0xfc000100,
1598         0x6ed8, 0x00010101, 0x00010000,
1599         0x9830, 0xffffffff, 0x00000000,
1600         0x98302, 0xf00fffff, 0x00000400,
1601         0x6130, 0xffffffff, 0x00010000,
1602         0x5bb0, 0x000000f0, 0x00000070,
1603         0x5bc0, 0xf0311fff, 0x80300000,
1604         0x98f8, 0x73773777, 0x12010001,
1605         0x98fc, 0xffffffff, 0x00000010,
1606         0x8030, 0x00001f0f, 0x0000100a,
1607         0x2f48, 0x73773777, 0x12010001,
1608         0x2408, 0x000fffff, 0x000c007f,
1609         0x8a14, 0xf000003f, 0x00000007,
1610         0x8b24, 0xffffffff, 0x00ff0fff,
1611         0x30a04, 0x0000ff0f, 0x00000000,
1612         0x28a4c, 0x07ffffff, 0x06000000,
1613         0x4d8, 0x00000fff, 0x00000100,
1614         0xd014, 0x00010000, 0x00810001,
1615         0xd814, 0x00010000, 0x00810001,
1616         0x3e78, 0x00000001, 0x00000002,
1617         0xc768, 0x00000008, 0x00000008,
1618         0xc770, 0x00000f00, 0x00000800,
1619         0xc774, 0x00000f00, 0x00000800,
1620         0xc798, 0x00ffffff, 0x00ff7fbf,
1621         0xc79c, 0x00ffffff, 0x00ff7faf,
1622         0x8c00, 0x000000ff, 0x00000001,
1623         0x214f8, 0x01ff01ff, 0x00000002,
1624         0x21498, 0x007ff800, 0x00200000,
1625         0x2015c, 0xffffffff, 0x00000f40,
1626         0x88c4, 0x001f3ae3, 0x00000082,
1627         0x88d4, 0x0000001f, 0x00000010,
1628         0x30934, 0xffffffff, 0x00000000
1629 };
1630 
1631 
1632 static void cik_init_golden_registers(struct radeon_device *rdev)
1633 {
1634         switch (rdev->family) {
1635         case CHIP_BONAIRE:
1636                 radeon_program_register_sequence(rdev,
1637                                                  bonaire_mgcg_cgcg_init,
1638                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1639                 radeon_program_register_sequence(rdev,
1640                                                  bonaire_golden_registers,
1641                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1642                 radeon_program_register_sequence(rdev,
1643                                                  bonaire_golden_common_registers,
1644                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1645                 radeon_program_register_sequence(rdev,
1646                                                  bonaire_golden_spm_registers,
1647                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1648                 break;
1649         case CHIP_KABINI:
1650                 radeon_program_register_sequence(rdev,
1651                                                  kalindi_mgcg_cgcg_init,
1652                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1653                 radeon_program_register_sequence(rdev,
1654                                                  kalindi_golden_registers,
1655                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1656                 radeon_program_register_sequence(rdev,
1657                                                  kalindi_golden_common_registers,
1658                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1659                 radeon_program_register_sequence(rdev,
1660                                                  kalindi_golden_spm_registers,
1661                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1662                 break;
1663         case CHIP_MULLINS:
1664                 radeon_program_register_sequence(rdev,
1665                                                  kalindi_mgcg_cgcg_init,
1666                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1667                 radeon_program_register_sequence(rdev,
1668                                                  godavari_golden_registers,
1669                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1670                 radeon_program_register_sequence(rdev,
1671                                                  kalindi_golden_common_registers,
1672                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1673                 radeon_program_register_sequence(rdev,
1674                                                  kalindi_golden_spm_registers,
1675                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1676                 break;
1677         case CHIP_KAVERI:
1678                 radeon_program_register_sequence(rdev,
1679                                                  spectre_mgcg_cgcg_init,
1680                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1681                 radeon_program_register_sequence(rdev,
1682                                                  spectre_golden_registers,
1683                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1684                 radeon_program_register_sequence(rdev,
1685                                                  spectre_golden_common_registers,
1686                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1687                 radeon_program_register_sequence(rdev,
1688                                                  spectre_golden_spm_registers,
1689                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1690                 break;
1691         case CHIP_HAWAII:
1692                 radeon_program_register_sequence(rdev,
1693                                                  hawaii_mgcg_cgcg_init,
1694                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1695                 radeon_program_register_sequence(rdev,
1696                                                  hawaii_golden_registers,
1697                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1698                 radeon_program_register_sequence(rdev,
1699                                                  hawaii_golden_common_registers,
1700                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1701                 radeon_program_register_sequence(rdev,
1702                                                  hawaii_golden_spm_registers,
1703                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1704                 break;
1705         default:
1706                 break;
1707         }
1708 }
1709 
1710 /**
1711  * cik_get_xclk - get the xclk
1712  *
1713  * @rdev: radeon_device pointer
1714  *
1715  * Returns the reference clock used by the gfx engine
1716  * (CIK).
1717  */
1718 u32 cik_get_xclk(struct radeon_device *rdev)
1719 {
1720         u32 reference_clock = rdev->clock.spll.reference_freq;
1721 
1722         if (rdev->flags & RADEON_IS_IGP) {
1723                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1724                         return reference_clock / 2;
1725         } else {
1726                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1727                         return reference_clock / 4;
1728         }
1729         return reference_clock;
1730 }
1731 
1732 /**
1733  * cik_mm_rdoorbell - read a doorbell dword
1734  *
1735  * @rdev: radeon_device pointer
1736  * @index: doorbell index
1737  *
1738  * Returns the value in the doorbell aperture at the
1739  * requested doorbell index (CIK).
1740  */
1741 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1742 {
1743         if (index < rdev->doorbell.num_doorbells) {
1744                 return readl(rdev->doorbell.ptr + index);
1745         } else {
1746                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1747                 return 0;
1748         }
1749 }
1750 
1751 /**
1752  * cik_mm_wdoorbell - write a doorbell dword
1753  *
1754  * @rdev: radeon_device pointer
1755  * @index: doorbell index
1756  * @v: value to write
1757  *
1758  * Writes @v to the doorbell aperture at the
1759  * requested doorbell index (CIK).
1760  */
1761 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1762 {
1763         if (index < rdev->doorbell.num_doorbells) {
1764                 writel(v, rdev->doorbell.ptr + index);
1765         } else {
1766                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1767         }
1768 }
1769 
1770 #define BONAIRE_IO_MC_REGS_SIZE 36
1771 
1772 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1773 {
1774         {0x00000070, 0x04400000},
1775         {0x00000071, 0x80c01803},
1776         {0x00000072, 0x00004004},
1777         {0x00000073, 0x00000100},
1778         {0x00000074, 0x00ff0000},
1779         {0x00000075, 0x34000000},
1780         {0x00000076, 0x08000014},
1781         {0x00000077, 0x00cc08ec},
1782         {0x00000078, 0x00000400},
1783         {0x00000079, 0x00000000},
1784         {0x0000007a, 0x04090000},
1785         {0x0000007c, 0x00000000},
1786         {0x0000007e, 0x4408a8e8},
1787         {0x0000007f, 0x00000304},
1788         {0x00000080, 0x00000000},
1789         {0x00000082, 0x00000001},
1790         {0x00000083, 0x00000002},
1791         {0x00000084, 0xf3e4f400},
1792         {0x00000085, 0x052024e3},
1793         {0x00000087, 0x00000000},
1794         {0x00000088, 0x01000000},
1795         {0x0000008a, 0x1c0a0000},
1796         {0x0000008b, 0xff010000},
1797         {0x0000008d, 0xffffefff},
1798         {0x0000008e, 0xfff3efff},
1799         {0x0000008f, 0xfff3efbf},
1800         {0x00000092, 0xf7ffffff},
1801         {0x00000093, 0xffffff7f},
1802         {0x00000095, 0x00101101},
1803         {0x00000096, 0x00000fff},
1804         {0x00000097, 0x00116fff},
1805         {0x00000098, 0x60010000},
1806         {0x00000099, 0x10010000},
1807         {0x0000009a, 0x00006000},
1808         {0x0000009b, 0x00001000},
1809         {0x0000009f, 0x00b48000}
1810 };
1811 
1812 #define HAWAII_IO_MC_REGS_SIZE 22
1813 
1814 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1815 {
1816         {0x0000007d, 0x40000000},
1817         {0x0000007e, 0x40180304},
1818         {0x0000007f, 0x0000ff00},
1819         {0x00000081, 0x00000000},
1820         {0x00000083, 0x00000800},
1821         {0x00000086, 0x00000000},
1822         {0x00000087, 0x00000100},
1823         {0x00000088, 0x00020100},
1824         {0x00000089, 0x00000000},
1825         {0x0000008b, 0x00040000},
1826         {0x0000008c, 0x00000100},
1827         {0x0000008e, 0xff010000},
1828         {0x00000090, 0xffffefff},
1829         {0x00000091, 0xfff3efff},
1830         {0x00000092, 0xfff3efbf},
1831         {0x00000093, 0xf7ffffff},
1832         {0x00000094, 0xffffff7f},
1833         {0x00000095, 0x00000fff},
1834         {0x00000096, 0x00116fff},
1835         {0x00000097, 0x60010000},
1836         {0x00000098, 0x10010000},
1837         {0x0000009f, 0x00c79000}
1838 };
1839 
1840 
1841 /**
1842  * cik_srbm_select - select specific register instances
1843  *
1844  * @rdev: radeon_device pointer
1845  * @me: selected ME (micro engine)
1846  * @pipe: pipe
1847  * @queue: queue
1848  * @vmid: VMID
1849  *
1850  * Switches the currently active registers instances.  Some
1851  * registers are instanced per VMID, others are instanced per
1852  * me/pipe/queue combination.
1853  */
1854 static void cik_srbm_select(struct radeon_device *rdev,
1855                             u32 me, u32 pipe, u32 queue, u32 vmid)
1856 {
1857         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1858                              MEID(me & 0x3) |
1859                              VMID(vmid & 0xf) |
1860                              QUEUEID(queue & 0x7));
1861         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1862 }
1863 
1864 /* ucode loading */
1865 /**
1866  * ci_mc_load_microcode - load MC ucode into the hw
1867  *
1868  * @rdev: radeon_device pointer
1869  *
1870  * Load the GDDR MC ucode into the hw (CIK).
1871  * Returns 0 on success, error on failure.
1872  */
1873 int ci_mc_load_microcode(struct radeon_device *rdev)
1874 {
1875         const __be32 *fw_data = NULL;
1876         const __le32 *new_fw_data = NULL;
1877         u32 running, tmp;
1878         u32 *io_mc_regs = NULL;
1879         const __le32 *new_io_mc_regs = NULL;
1880         int i, regs_size, ucode_size;
1881 
1882         if (!rdev->mc_fw)
1883                 return -EINVAL;
1884 
1885         if (rdev->new_fw) {
1886                 const struct mc_firmware_header_v1_0 *hdr =
1887                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1888 
1889                 radeon_ucode_print_mc_hdr(&hdr->header);
1890 
1891                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1892                 new_io_mc_regs = (const __le32 *)
1893                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1894                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1895                 new_fw_data = (const __le32 *)
1896                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1897         } else {
1898                 ucode_size = rdev->mc_fw->size / 4;
1899 
1900                 switch (rdev->family) {
1901                 case CHIP_BONAIRE:
1902                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1903                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1904                         break;
1905                 case CHIP_HAWAII:
1906                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1907                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1908                         break;
1909                 default:
1910                         return -EINVAL;
1911                 }
1912                 fw_data = (const __be32 *)rdev->mc_fw->data;
1913         }
1914 
1915         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1916 
1917         if (running == 0) {
1918                 /* reset the engine and set to writable */
1919                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1920                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1921 
1922                 /* load mc io regs */
1923                 for (i = 0; i < regs_size; i++) {
1924                         if (rdev->new_fw) {
1925                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1926                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1927                         } else {
1928                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1929                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1930                         }
1931                 }
1932 
1933                 tmp = RREG32(MC_SEQ_MISC0);
1934                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1935                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1936                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1937                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1938                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1939                 }
1940 
1941                 /* load the MC ucode */
1942                 for (i = 0; i < ucode_size; i++) {
1943                         if (rdev->new_fw)
1944                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1945                         else
1946                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1947                 }
1948 
1949                 /* put the engine back into the active state */
1950                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1951                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1952                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1953 
1954                 /* wait for training to complete */
1955                 for (i = 0; i < rdev->usec_timeout; i++) {
1956                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1957                                 break;
1958                         udelay(1);
1959                 }
1960                 for (i = 0; i < rdev->usec_timeout; i++) {
1961                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1962                                 break;
1963                         udelay(1);
1964                 }
1965         }
1966 
1967         return 0;
1968 }
1969 
1970 /**
1971  * cik_init_microcode - load ucode images from disk
1972  *
1973  * @rdev: radeon_device pointer
1974  *
1975  * Use the firmware interface to load the ucode images into
1976  * the driver (not loaded into hw).
1977  * Returns 0 on success, error on failure.
1978  */
1979 static int cik_init_microcode(struct radeon_device *rdev)
1980 {
1981         const char *chip_name;
1982         const char *new_chip_name;
1983         size_t pfp_req_size, me_req_size, ce_req_size,
1984                 mec_req_size, rlc_req_size, mc_req_size = 0,
1985                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1986         char fw_name[30];
1987         int new_fw = 0;
1988         int err;
1989         int num_fw;
1990         bool new_smc = false;
1991 
1992         DRM_DEBUG("\n");
1993 
1994         switch (rdev->family) {
1995         case CHIP_BONAIRE:
1996                 chip_name = "BONAIRE";
1997                 if ((rdev->pdev->revision == 0x80) ||
1998                     (rdev->pdev->revision == 0x81) ||
1999                     (rdev->pdev->device == 0x665f))
2000                         new_smc = true;
2001                 new_chip_name = "bonaire";
2002                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2003                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2004                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2005                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2006                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2007                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2008                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2009                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2010                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2011                 num_fw = 8;
2012                 break;
2013         case CHIP_HAWAII:
2014                 chip_name = "HAWAII";
2015                 if (rdev->pdev->revision == 0x80)
2016                         new_smc = true;
2017                 new_chip_name = "hawaii";
2018                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2019                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2020                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2021                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2022                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2023                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2024                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2025                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2026                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2027                 num_fw = 8;
2028                 break;
2029         case CHIP_KAVERI:
2030                 chip_name = "KAVERI";
2031                 new_chip_name = "kaveri";
2032                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2033                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2034                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2035                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2036                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2037                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2038                 num_fw = 7;
2039                 break;
2040         case CHIP_KABINI:
2041                 chip_name = "KABINI";
2042                 new_chip_name = "kabini";
2043                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2044                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2045                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2046                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2047                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2048                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2049                 num_fw = 6;
2050                 break;
2051         case CHIP_MULLINS:
2052                 chip_name = "MULLINS";
2053                 new_chip_name = "mullins";
2054                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2055                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2056                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2057                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2058                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2059                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2060                 num_fw = 6;
2061                 break;
2062         default: BUG();
2063         }
2064 
2065         DRM_INFO("Loading %s Microcode\n", new_chip_name);
2066 
2067         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2068         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2069         if (err) {
2070                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2071                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2072                 if (err)
2073                         goto out;
2074                 if (rdev->pfp_fw->size != pfp_req_size) {
2075                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2076                                rdev->pfp_fw->size, fw_name);
2077                         err = -EINVAL;
2078                         goto out;
2079                 }
2080         } else {
2081                 err = radeon_ucode_validate(rdev->pfp_fw);
2082                 if (err) {
2083                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2084                                fw_name);
2085                         goto out;
2086                 } else {
2087                         new_fw++;
2088                 }
2089         }
2090 
2091         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2092         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2093         if (err) {
2094                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2095                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2096                 if (err)
2097                         goto out;
2098                 if (rdev->me_fw->size != me_req_size) {
2099                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2100                                rdev->me_fw->size, fw_name);
2101                         err = -EINVAL;
2102                 }
2103         } else {
2104                 err = radeon_ucode_validate(rdev->me_fw);
2105                 if (err) {
2106                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2107                                fw_name);
2108                         goto out;
2109                 } else {
2110                         new_fw++;
2111                 }
2112         }
2113 
2114         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2115         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2116         if (err) {
2117                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2118                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2119                 if (err)
2120                         goto out;
2121                 if (rdev->ce_fw->size != ce_req_size) {
2122                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2123                                rdev->ce_fw->size, fw_name);
2124                         err = -EINVAL;
2125                 }
2126         } else {
2127                 err = radeon_ucode_validate(rdev->ce_fw);
2128                 if (err) {
2129                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2130                                fw_name);
2131                         goto out;
2132                 } else {
2133                         new_fw++;
2134                 }
2135         }
2136 
2137         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2138         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2139         if (err) {
2140                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2141                 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2142                 if (err)
2143                         goto out;
2144                 if (rdev->mec_fw->size != mec_req_size) {
2145                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2146                                rdev->mec_fw->size, fw_name);
2147                         err = -EINVAL;
2148                 }
2149         } else {
2150                 err = radeon_ucode_validate(rdev->mec_fw);
2151                 if (err) {
2152                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2153                                fw_name);
2154                         goto out;
2155                 } else {
2156                         new_fw++;
2157                 }
2158         }
2159 
2160         if (rdev->family == CHIP_KAVERI) {
2161                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2162                 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2163                 if (err) {
2164                         goto out;
2165                 } else {
2166                         err = radeon_ucode_validate(rdev->mec2_fw);
2167                         if (err) {
2168                                 goto out;
2169                         } else {
2170                                 new_fw++;
2171                         }
2172                 }
2173         }
2174 
2175         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2176         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2177         if (err) {
2178                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2179                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2180                 if (err)
2181                         goto out;
2182                 if (rdev->rlc_fw->size != rlc_req_size) {
2183                         pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2184                                rdev->rlc_fw->size, fw_name);
2185                         err = -EINVAL;
2186                 }
2187         } else {
2188                 err = radeon_ucode_validate(rdev->rlc_fw);
2189                 if (err) {
2190                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2191                                fw_name);
2192                         goto out;
2193                 } else {
2194                         new_fw++;
2195                 }
2196         }
2197 
2198         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2199         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2200         if (err) {
2201                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2202                 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2203                 if (err)
2204                         goto out;
2205                 if (rdev->sdma_fw->size != sdma_req_size) {
2206                         pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2207                                rdev->sdma_fw->size, fw_name);
2208                         err = -EINVAL;
2209                 }
2210         } else {
2211                 err = radeon_ucode_validate(rdev->sdma_fw);
2212                 if (err) {
2213                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2214                                fw_name);
2215                         goto out;
2216                 } else {
2217                         new_fw++;
2218                 }
2219         }
2220 
2221         /* No SMC, MC ucode on APUs */
2222         if (!(rdev->flags & RADEON_IS_IGP)) {
2223                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2224                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2225                 if (err) {
2226                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2227                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2228                         if (err) {
2229                                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2230                                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2231                                 if (err)
2232                                         goto out;
2233                         }
2234                         if ((rdev->mc_fw->size != mc_req_size) &&
2235                             (rdev->mc_fw->size != mc2_req_size)){
2236                                 pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2237                                        rdev->mc_fw->size, fw_name);
2238                                 err = -EINVAL;
2239                         }
2240                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2241                 } else {
2242                         err = radeon_ucode_validate(rdev->mc_fw);
2243                         if (err) {
2244                                 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2245                                        fw_name);
2246                                 goto out;
2247                         } else {
2248                                 new_fw++;
2249                         }
2250                 }
2251 
2252                 if (new_smc)
2253                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2254                 else
2255                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2256                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2257                 if (err) {
2258                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2259                         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2260                         if (err) {
2261                                 pr_err("smc: error loading firmware \"%s\"\n",
2262                                        fw_name);
2263                                 release_firmware(rdev->smc_fw);
2264                                 rdev->smc_fw = NULL;
2265                                 err = 0;
2266                         } else if (rdev->smc_fw->size != smc_req_size) {
2267                                 pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2268                                        rdev->smc_fw->size, fw_name);
2269                                 err = -EINVAL;
2270                         }
2271                 } else {
2272                         err = radeon_ucode_validate(rdev->smc_fw);
2273                         if (err) {
2274                                 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2275                                        fw_name);
2276                                 goto out;
2277                         } else {
2278                                 new_fw++;
2279                         }
2280                 }
2281         }
2282 
2283         if (new_fw == 0) {
2284                 rdev->new_fw = false;
2285         } else if (new_fw < num_fw) {
2286                 pr_err("ci_fw: mixing new and old firmware!\n");
2287                 err = -EINVAL;
2288         } else {
2289                 rdev->new_fw = true;
2290         }
2291 
2292 out:
2293         if (err) {
2294                 if (err != -EINVAL)
2295                         pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2296                                fw_name);
2297                 release_firmware(rdev->pfp_fw);
2298                 rdev->pfp_fw = NULL;
2299                 release_firmware(rdev->me_fw);
2300                 rdev->me_fw = NULL;
2301                 release_firmware(rdev->ce_fw);
2302                 rdev->ce_fw = NULL;
2303                 release_firmware(rdev->mec_fw);
2304                 rdev->mec_fw = NULL;
2305                 release_firmware(rdev->mec2_fw);
2306                 rdev->mec2_fw = NULL;
2307                 release_firmware(rdev->rlc_fw);
2308                 rdev->rlc_fw = NULL;
2309                 release_firmware(rdev->sdma_fw);
2310                 rdev->sdma_fw = NULL;
2311                 release_firmware(rdev->mc_fw);
2312                 rdev->mc_fw = NULL;
2313                 release_firmware(rdev->smc_fw);
2314                 rdev->smc_fw = NULL;
2315         }
2316         return err;
2317 }
2318 
2319 /*
2320  * Core functions
2321  */
2322 /**
2323  * cik_tiling_mode_table_init - init the hw tiling table
2324  *
2325  * @rdev: radeon_device pointer
2326  *
2327  * Starting with SI, the tiling setup is done globally in a
2328  * set of 32 tiling modes.  Rather than selecting each set of
2329  * parameters per surface as on older asics, we just select
2330  * which index in the tiling table we want to use, and the
2331  * surface uses those parameters (CIK).
2332  */
2333 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2334 {
2335         u32 *tile = rdev->config.cik.tile_mode_array;
2336         u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2337         const u32 num_tile_mode_states =
2338                         ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2339         const u32 num_secondary_tile_mode_states =
2340                         ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2341         u32 reg_offset, split_equal_to_row_size;
2342         u32 num_pipe_configs;
2343         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2344                 rdev->config.cik.max_shader_engines;
2345 
2346         switch (rdev->config.cik.mem_row_size_in_kb) {
2347         case 1:
2348                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2349                 break;
2350         case 2:
2351         default:
2352                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2353                 break;
2354         case 4:
2355                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2356                 break;
2357         }
2358 
2359         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2360         if (num_pipe_configs > 8)
2361                 num_pipe_configs = 16;
2362 
2363         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2364                 tile[reg_offset] = 0;
2365         for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2366                 macrotile[reg_offset] = 0;
2367 
2368         switch(num_pipe_configs) {
2369         case 16:
2370                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2372                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2374                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2376                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2378                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2380                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2382                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2384                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2386                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2388                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389                            TILE_SPLIT(split_equal_to_row_size));
2390                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2391                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2393                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2394                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2397                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2398                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2399                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400                            TILE_SPLIT(split_equal_to_row_size));
2401                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2402                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2403                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2404                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2406                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2408                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2411                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2412                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2413                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2414                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2415                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2416                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2417                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2418                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2419                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2421                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2422                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2423                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2426                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2427                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2428                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2430                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2431                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2434                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2436                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2438                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2439                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2442                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2443                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2445                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2446                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2447                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2448 
2449                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2451                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452                            NUM_BANKS(ADDR_SURF_16_BANK));
2453                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2455                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2456                            NUM_BANKS(ADDR_SURF_16_BANK));
2457                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460                            NUM_BANKS(ADDR_SURF_16_BANK));
2461                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464                            NUM_BANKS(ADDR_SURF_16_BANK));
2465                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468                            NUM_BANKS(ADDR_SURF_8_BANK));
2469                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472                            NUM_BANKS(ADDR_SURF_4_BANK));
2473                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2475                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2476                            NUM_BANKS(ADDR_SURF_2_BANK));
2477                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2479                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480                            NUM_BANKS(ADDR_SURF_16_BANK));
2481                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2483                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2484                            NUM_BANKS(ADDR_SURF_16_BANK));
2485                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488                             NUM_BANKS(ADDR_SURF_16_BANK));
2489                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2492                             NUM_BANKS(ADDR_SURF_8_BANK));
2493                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2496                             NUM_BANKS(ADDR_SURF_4_BANK));
2497                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2500                             NUM_BANKS(ADDR_SURF_2_BANK));
2501                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2503                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2504                             NUM_BANKS(ADDR_SURF_2_BANK));
2505 
2506                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2507                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2508                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2509                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2510                 break;
2511 
2512         case 8:
2513                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2515                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2517                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2519                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2521                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2523                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2525                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2527                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2529                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2530                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2531                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2532                            TILE_SPLIT(split_equal_to_row_size));
2533                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2534                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2536                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2537                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2540                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2541                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2542                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543                            TILE_SPLIT(split_equal_to_row_size));
2544                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2545                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2546                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2547                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2549                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2551                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2554                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2555                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2556                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2558                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2559                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2561                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2562                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2564                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2566                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2569                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2570                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2571                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2573                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2574                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2576                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2577                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2579                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2581                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2584                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2585                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2587                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2588                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2589                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591 
2592                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2594                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2595                                 NUM_BANKS(ADDR_SURF_16_BANK));
2596                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2598                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2599                                 NUM_BANKS(ADDR_SURF_16_BANK));
2600                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2602                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2603                                 NUM_BANKS(ADDR_SURF_16_BANK));
2604                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2605                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2606                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2607                                 NUM_BANKS(ADDR_SURF_16_BANK));
2608                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2610                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2611                                 NUM_BANKS(ADDR_SURF_8_BANK));
2612                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2613                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2614                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2615                                 NUM_BANKS(ADDR_SURF_4_BANK));
2616                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2618                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2619                                 NUM_BANKS(ADDR_SURF_2_BANK));
2620                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2622                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2623                                 NUM_BANKS(ADDR_SURF_16_BANK));
2624                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2626                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2627                                 NUM_BANKS(ADDR_SURF_16_BANK));
2628                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2630                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2631                                 NUM_BANKS(ADDR_SURF_16_BANK));
2632                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2633                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2634                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2635                                 NUM_BANKS(ADDR_SURF_16_BANK));
2636                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2638                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2639                                 NUM_BANKS(ADDR_SURF_8_BANK));
2640                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2642                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2643                                 NUM_BANKS(ADDR_SURF_4_BANK));
2644                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2645                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2646                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2647                                 NUM_BANKS(ADDR_SURF_2_BANK));
2648 
2649                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2650                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2651                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2652                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2653                 break;
2654 
2655         case 4:
2656                 if (num_rbs == 4) {
2657                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2659                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2660                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2661                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2663                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2664                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2665                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2667                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2669                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2671                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2673                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2675                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2676                            TILE_SPLIT(split_equal_to_row_size));
2677                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2678                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2680                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2681                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2684                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2685                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2686                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                            TILE_SPLIT(split_equal_to_row_size));
2688                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2689                            PIPE_CONFIG(ADDR_SURF_P4_16x16));
2690                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2691                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2693                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2695                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2697                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2698                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2699                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2700                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2701                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2702                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2703                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2705                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2708                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2709                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2710                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2713                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2714                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2715                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2717                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2718                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2720                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2721                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2723                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2724                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2725                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2726                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2729                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2730                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2732                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2733                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2735 
2736                 } else if (num_rbs < 4) {
2737                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2739                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2740                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2741                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2742                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2743                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2744                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2745                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2746                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2747                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2749                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2750                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2751                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2752                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2753                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2754                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2755                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2756                            TILE_SPLIT(split_equal_to_row_size));
2757                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2758                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2760                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2761                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2764                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2765                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2766                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767                            TILE_SPLIT(split_equal_to_row_size));
2768                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2769                            PIPE_CONFIG(ADDR_SURF_P4_8x16));
2770                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2771                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2772                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2773                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2775                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2776                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2777                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2778                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2779                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2780                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2781                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2782                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2783                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2784                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2785                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2786                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2788                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2789                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2790                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2792                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2793                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2794                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2796                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2797                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2798                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2799                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2800                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2803                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2804                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2805                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2808                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2809                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2812                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2813                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815                 }
2816 
2817                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2819                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2820                                 NUM_BANKS(ADDR_SURF_16_BANK));
2821                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2823                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2824                                 NUM_BANKS(ADDR_SURF_16_BANK));
2825                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2827                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2828                                 NUM_BANKS(ADDR_SURF_16_BANK));
2829                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2831                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2832                                 NUM_BANKS(ADDR_SURF_16_BANK));
2833                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2835                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2836                                 NUM_BANKS(ADDR_SURF_16_BANK));
2837                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2839                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2840                                 NUM_BANKS(ADDR_SURF_8_BANK));
2841                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2843                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2844                                 NUM_BANKS(ADDR_SURF_4_BANK));
2845                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2846                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2847                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2848                                 NUM_BANKS(ADDR_SURF_16_BANK));
2849                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2850                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2851                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2852                                 NUM_BANKS(ADDR_SURF_16_BANK));
2853                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2855                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2856                                 NUM_BANKS(ADDR_SURF_16_BANK));
2857                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2859                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2860                                 NUM_BANKS(ADDR_SURF_16_BANK));
2861                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2863                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2864                                 NUM_BANKS(ADDR_SURF_16_BANK));
2865                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2866                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2867                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2868                                 NUM_BANKS(ADDR_SURF_8_BANK));
2869                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2870                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2871                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2872                                 NUM_BANKS(ADDR_SURF_4_BANK));
2873 
2874                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2875                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2876                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2877                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2878                 break;
2879 
2880         case 2:
2881                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2882                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2883                            PIPE_CONFIG(ADDR_SURF_P2) |
2884                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2885                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2887                            PIPE_CONFIG(ADDR_SURF_P2) |
2888                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2889                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2890                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2891                            PIPE_CONFIG(ADDR_SURF_P2) |
2892                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2893                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2895                            PIPE_CONFIG(ADDR_SURF_P2) |
2896                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2897                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2898                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2899                            PIPE_CONFIG(ADDR_SURF_P2) |
2900                            TILE_SPLIT(split_equal_to_row_size));
2901                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2902                            PIPE_CONFIG(ADDR_SURF_P2) |
2903                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2904                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2905                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906                            PIPE_CONFIG(ADDR_SURF_P2) |
2907                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2908                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2909                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2910                            PIPE_CONFIG(ADDR_SURF_P2) |
2911                            TILE_SPLIT(split_equal_to_row_size));
2912                 tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2913                            PIPE_CONFIG(ADDR_SURF_P2);
2914                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2915                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2916                            PIPE_CONFIG(ADDR_SURF_P2));
2917                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2919                             PIPE_CONFIG(ADDR_SURF_P2) |
2920                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2921                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2922                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2923                             PIPE_CONFIG(ADDR_SURF_P2) |
2924                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2926                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2927                             PIPE_CONFIG(ADDR_SURF_P2) |
2928                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2929                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2930                             PIPE_CONFIG(ADDR_SURF_P2) |
2931                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2932                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2933                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2934                             PIPE_CONFIG(ADDR_SURF_P2) |
2935                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2937                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2938                             PIPE_CONFIG(ADDR_SURF_P2) |
2939                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2941                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2942                             PIPE_CONFIG(ADDR_SURF_P2) |
2943                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2945                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2946                             PIPE_CONFIG(ADDR_SURF_P2));
2947                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2948                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2949                             PIPE_CONFIG(ADDR_SURF_P2) |
2950                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2952                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2953                             PIPE_CONFIG(ADDR_SURF_P2) |
2954                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2956                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2957                             PIPE_CONFIG(ADDR_SURF_P2) |
2958                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959 
2960                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2961                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2962                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2963                                 NUM_BANKS(ADDR_SURF_16_BANK));
2964                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2965                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2966                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2967                                 NUM_BANKS(ADDR_SURF_16_BANK));
2968                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2970                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971                                 NUM_BANKS(ADDR_SURF_16_BANK));
2972                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2973                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2974                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975                                 NUM_BANKS(ADDR_SURF_16_BANK));
2976                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2977                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2978                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2979                                 NUM_BANKS(ADDR_SURF_16_BANK));
2980                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2983                                 NUM_BANKS(ADDR_SURF_16_BANK));
2984                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2985                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2986                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2987                                 NUM_BANKS(ADDR_SURF_8_BANK));
2988                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2989                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2990                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2991                                 NUM_BANKS(ADDR_SURF_16_BANK));
2992                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2993                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2994                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995                                 NUM_BANKS(ADDR_SURF_16_BANK));
2996                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2997                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2998                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999                                 NUM_BANKS(ADDR_SURF_16_BANK));
3000                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3001                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3002                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003                                 NUM_BANKS(ADDR_SURF_16_BANK));
3004                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3006                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007                                 NUM_BANKS(ADDR_SURF_16_BANK));
3008                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3010                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3011                                 NUM_BANKS(ADDR_SURF_16_BANK));
3012                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3013                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3014                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3015                                 NUM_BANKS(ADDR_SURF_8_BANK));
3016 
3017                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3018                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3019                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3020                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3021                 break;
3022 
3023         default:
3024                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3025         }
3026 }
3027 
3028 /**
3029  * cik_select_se_sh - select which SE, SH to address
3030  *
3031  * @rdev: radeon_device pointer
3032  * @se_num: shader engine to address
3033  * @sh_num: sh block to address
3034  *
3035  * Select which SE, SH combinations to address. Certain
3036  * registers are instanced per SE or SH.  0xffffffff means
3037  * broadcast to all SEs or SHs (CIK).
3038  */
3039 static void cik_select_se_sh(struct radeon_device *rdev,
3040                              u32 se_num, u32 sh_num)
3041 {
3042         u32 data = INSTANCE_BROADCAST_WRITES;
3043 
3044         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3045                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3046         else if (se_num == 0xffffffff)
3047                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3048         else if (sh_num == 0xffffffff)
3049                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3050         else
3051                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3052         WREG32(GRBM_GFX_INDEX, data);
3053 }
3054 
3055 /**
3056  * cik_create_bitmask - create a bitmask
3057  *
3058  * @bit_width: length of the mask
3059  *
3060  * create a variable length bit mask (CIK).
3061  * Returns the bitmask.
3062  */
3063 static u32 cik_create_bitmask(u32 bit_width)
3064 {
3065         u32 i, mask = 0;
3066 
3067         for (i = 0; i < bit_width; i++) {
3068                 mask <<= 1;
3069                 mask |= 1;
3070         }
3071         return mask;
3072 }
3073 
3074 /**
3075  * cik_get_rb_disabled - computes the mask of disabled RBs
3076  *
3077  * @rdev: radeon_device pointer
3078  * @max_rb_num: max RBs (render backends) for the asic
3079  * @se_num: number of SEs (shader engines) for the asic
3080  * @sh_per_se: number of SH blocks per SE for the asic
3081  *
3082  * Calculates the bitmask of disabled RBs (CIK).
3083  * Returns the disabled RB bitmask.
3084  */
3085 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3086                               u32 max_rb_num_per_se,
3087                               u32 sh_per_se)
3088 {
3089         u32 data, mask;
3090 
3091         data = RREG32(CC_RB_BACKEND_DISABLE);
3092         if (data & 1)
3093                 data &= BACKEND_DISABLE_MASK;
3094         else
3095                 data = 0;
3096         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3097 
3098         data >>= BACKEND_DISABLE_SHIFT;
3099 
3100         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3101 
3102         return data & mask;
3103 }
3104 
3105 /**
3106  * cik_setup_rb - setup the RBs on the asic
3107  *
3108  * @rdev: radeon_device pointer
3109  * @se_num: number of SEs (shader engines) for the asic
3110  * @sh_per_se: number of SH blocks per SE for the asic
3111  * @max_rb_num: max RBs (render backends) for the asic
3112  *
3113  * Configures per-SE/SH RB registers (CIK).
3114  */
3115 static void cik_setup_rb(struct radeon_device *rdev,
3116                          u32 se_num, u32 sh_per_se,
3117                          u32 max_rb_num_per_se)
3118 {
3119         int i, j;
3120         u32 data, mask;
3121         u32 disabled_rbs = 0;
3122         u32 enabled_rbs = 0;
3123 
3124         for (i = 0; i < se_num; i++) {
3125                 for (j = 0; j < sh_per_se; j++) {
3126                         cik_select_se_sh(rdev, i, j);
3127                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3128                         if (rdev->family == CHIP_HAWAII)
3129                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3130                         else
3131                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3132                 }
3133         }
3134         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3135 
3136         mask = 1;
3137         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3138                 if (!(disabled_rbs & mask))
3139                         enabled_rbs |= mask;
3140                 mask <<= 1;
3141         }
3142 
3143         rdev->config.cik.backend_enable_mask = enabled_rbs;
3144 
3145         for (i = 0; i < se_num; i++) {
3146                 cik_select_se_sh(rdev, i, 0xffffffff);
3147                 data = 0;
3148                 for (j = 0; j < sh_per_se; j++) {
3149                         switch (enabled_rbs & 3) {
3150                         case 0:
3151                                 if (j == 0)
3152                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3153                                 else
3154                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3155                                 break;
3156                         case 1:
3157                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3158                                 break;
3159                         case 2:
3160                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3161                                 break;
3162                         case 3:
3163                         default:
3164                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3165                                 break;
3166                         }
3167                         enabled_rbs >>= 2;
3168                 }
3169                 WREG32(PA_SC_RASTER_CONFIG, data);
3170         }
3171         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3172 }
3173 
3174 /**
3175  * cik_gpu_init - setup the 3D engine
3176  *
3177  * @rdev: radeon_device pointer
3178  *
3179  * Configures the 3D engine and tiling configuration
3180  * registers so that the 3D engine is usable.
3181  */
3182 static void cik_gpu_init(struct radeon_device *rdev)
3183 {
3184         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3185         u32 mc_shared_chmap, mc_arb_ramcfg;
3186         u32 hdp_host_path_cntl;
3187         u32 tmp;
3188         int i, j;
3189 
3190         switch (rdev->family) {
3191         case CHIP_BONAIRE:
3192                 rdev->config.cik.max_shader_engines = 2;
3193                 rdev->config.cik.max_tile_pipes = 4;
3194                 rdev->config.cik.max_cu_per_sh = 7;
3195                 rdev->config.cik.max_sh_per_se = 1;
3196                 rdev->config.cik.max_backends_per_se = 2;
3197                 rdev->config.cik.max_texture_channel_caches = 4;
3198                 rdev->config.cik.max_gprs = 256;
3199                 rdev->config.cik.max_gs_threads = 32;
3200                 rdev->config.cik.max_hw_contexts = 8;
3201 
3202                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3203                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3204                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3205                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3206                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3207                 break;
3208         case CHIP_HAWAII:
3209                 rdev->config.cik.max_shader_engines = 4;
3210                 rdev->config.cik.max_tile_pipes = 16;
3211                 rdev->config.cik.max_cu_per_sh = 11;
3212                 rdev->config.cik.max_sh_per_se = 1;
3213                 rdev->config.cik.max_backends_per_se = 4;
3214                 rdev->config.cik.max_texture_channel_caches = 16;
3215                 rdev->config.cik.max_gprs = 256;
3216                 rdev->config.cik.max_gs_threads = 32;
3217                 rdev->config.cik.max_hw_contexts = 8;
3218 
3219                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3220                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3221                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3222                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3223                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3224                 break;
3225         case CHIP_KAVERI:
3226                 rdev->config.cik.max_shader_engines = 1;
3227                 rdev->config.cik.max_tile_pipes = 4;
3228                 rdev->config.cik.max_cu_per_sh = 8;
3229                 rdev->config.cik.max_backends_per_se = 2;
3230                 rdev->config.cik.max_sh_per_se = 1;
3231                 rdev->config.cik.max_texture_channel_caches = 4;
3232                 rdev->config.cik.max_gprs = 256;
3233                 rdev->config.cik.max_gs_threads = 16;
3234                 rdev->config.cik.max_hw_contexts = 8;
3235 
3236                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3237                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3238                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3239                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3240                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3241                 break;
3242         case CHIP_KABINI:
3243         case CHIP_MULLINS:
3244         default:
3245                 rdev->config.cik.max_shader_engines = 1;
3246                 rdev->config.cik.max_tile_pipes = 2;
3247                 rdev->config.cik.max_cu_per_sh = 2;
3248                 rdev->config.cik.max_sh_per_se = 1;
3249                 rdev->config.cik.max_backends_per_se = 1;
3250                 rdev->config.cik.max_texture_channel_caches = 2;
3251                 rdev->config.cik.max_gprs = 256;
3252                 rdev->config.cik.max_gs_threads = 16;
3253                 rdev->config.cik.max_hw_contexts = 8;
3254 
3255                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3256                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3257                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3258                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3259                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3260                 break;
3261         }
3262 
3263         /* Initialize HDP */
3264         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3265                 WREG32((0x2c14 + j), 0x00000000);
3266                 WREG32((0x2c18 + j), 0x00000000);
3267                 WREG32((0x2c1c + j), 0x00000000);
3268                 WREG32((0x2c20 + j), 0x00000000);
3269                 WREG32((0x2c24 + j), 0x00000000);
3270         }
3271 
3272         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3273         WREG32(SRBM_INT_CNTL, 0x1);
3274         WREG32(SRBM_INT_ACK, 0x1);
3275 
3276         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3277 
3278         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3279         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3280 
3281         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3282         rdev->config.cik.mem_max_burst_length_bytes = 256;
3283         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3284         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3285         if (rdev->config.cik.mem_row_size_in_kb > 4)
3286                 rdev->config.cik.mem_row_size_in_kb = 4;
3287         /* XXX use MC settings? */
3288         rdev->config.cik.shader_engine_tile_size = 32;
3289         rdev->config.cik.num_gpus = 1;
3290         rdev->config.cik.multi_gpu_tile_size = 64;
3291 
3292         /* fix up row size */
3293         gb_addr_config &= ~ROW_SIZE_MASK;
3294         switch (rdev->config.cik.mem_row_size_in_kb) {
3295         case 1:
3296         default:
3297                 gb_addr_config |= ROW_SIZE(0);
3298                 break;
3299         case 2:
3300                 gb_addr_config |= ROW_SIZE(1);
3301                 break;
3302         case 4:
3303                 gb_addr_config |= ROW_SIZE(2);
3304                 break;
3305         }
3306 
3307         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3308          * not have bank info, so create a custom tiling dword.
3309          * bits 3:0   num_pipes
3310          * bits 7:4   num_banks
3311          * bits 11:8  group_size
3312          * bits 15:12 row_size
3313          */
3314         rdev->config.cik.tile_config = 0;
3315         switch (rdev->config.cik.num_tile_pipes) {
3316         case 1:
3317                 rdev->config.cik.tile_config |= (0 << 0);
3318                 break;
3319         case 2:
3320                 rdev->config.cik.tile_config |= (1 << 0);
3321                 break;
3322         case 4:
3323                 rdev->config.cik.tile_config |= (2 << 0);
3324                 break;
3325         case 8:
3326         default:
3327                 /* XXX what about 12? */
3328                 rdev->config.cik.tile_config |= (3 << 0);
3329                 break;
3330         }
3331         rdev->config.cik.tile_config |=
3332                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3333         rdev->config.cik.tile_config |=
3334                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3335         rdev->config.cik.tile_config |=
3336                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3337 
3338         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3339         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3340         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3341         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3342         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3343         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3344         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3345         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3346 
3347         cik_tiling_mode_table_init(rdev);
3348 
3349         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3350                      rdev->config.cik.max_sh_per_se,
3351                      rdev->config.cik.max_backends_per_se);
3352 
3353         rdev->config.cik.active_cus = 0;
3354         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3355                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3356                         rdev->config.cik.active_cus +=
3357                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3358                 }
3359         }
3360 
3361         /* set HW defaults for 3D engine */
3362         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3363 
3364         WREG32(SX_DEBUG_1, 0x20);
3365 
3366         WREG32(TA_CNTL_AUX, 0x00010000);
3367 
3368         tmp = RREG32(SPI_CONFIG_CNTL);
3369         tmp |= 0x03000000;
3370         WREG32(SPI_CONFIG_CNTL, tmp);
3371 
3372         WREG32(SQ_CONFIG, 1);
3373 
3374         WREG32(DB_DEBUG, 0);
3375 
3376         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3377         tmp |= 0x00000400;
3378         WREG32(DB_DEBUG2, tmp);
3379 
3380         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3381         tmp |= 0x00020200;
3382         WREG32(DB_DEBUG3, tmp);
3383 
3384         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3385         tmp |= 0x00018208;
3386         WREG32(CB_HW_CONTROL, tmp);
3387 
3388         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3389 
3390         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3391                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3392                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3393                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3394 
3395         WREG32(VGT_NUM_INSTANCES, 1);
3396 
3397         WREG32(CP_PERFMON_CNTL, 0);
3398 
3399         WREG32(SQ_CONFIG, 0);
3400 
3401         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3402                                           FORCE_EOV_MAX_REZ_CNT(255)));
3403 
3404         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3405                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3406 
3407         WREG32(VGT_GS_VERTEX_REUSE, 16);
3408         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3409 
3410         tmp = RREG32(HDP_MISC_CNTL);
3411         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3412         WREG32(HDP_MISC_CNTL, tmp);
3413 
3414         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3415         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3416 
3417         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3418         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3419 
3420         udelay(50);
3421 }
3422 
3423 /*
3424  * GPU scratch registers helpers function.
3425  */
3426 /**
3427  * cik_scratch_init - setup driver info for CP scratch regs
3428  *
3429  * @rdev: radeon_device pointer
3430  *
3431  * Set up the number and offset of the CP scratch registers.
3432  * NOTE: use of CP scratch registers is a legacy inferface and
3433  * is not used by default on newer asics (r6xx+).  On newer asics,
3434  * memory buffers are used for fences rather than scratch regs.
3435  */
3436 static void cik_scratch_init(struct radeon_device *rdev)
3437 {
3438         int i;
3439 
3440         rdev->scratch.num_reg = 7;
3441         rdev->scratch.reg_base = SCRATCH_REG0;
3442         for (i = 0; i < rdev->scratch.num_reg; i++) {
3443                 rdev->scratch.free[i] = true;
3444                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3445         }
3446 }
3447 
3448 /**
3449  * cik_ring_test - basic gfx ring test
3450  *
3451  * @rdev: radeon_device pointer
3452  * @ring: radeon_ring structure holding ring information
3453  *
3454  * Allocate a scratch register and write to it using the gfx ring (CIK).
3455  * Provides a basic gfx ring test to verify that the ring is working.
3456  * Used by cik_cp_gfx_resume();
3457  * Returns 0 on success, error on failure.
3458  */
3459 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3460 {
3461         uint32_t scratch;
3462         uint32_t tmp = 0;
3463         unsigned i;
3464         int r;
3465 
3466         r = radeon_scratch_get(rdev, &scratch);
3467         if (r) {
3468                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3469                 return r;
3470         }
3471         WREG32(scratch, 0xCAFEDEAD);
3472         r = radeon_ring_lock(rdev, ring, 3);
3473         if (r) {
3474                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3475                 radeon_scratch_free(rdev, scratch);
3476                 return r;
3477         }
3478         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3479         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3480         radeon_ring_write(ring, 0xDEADBEEF);
3481         radeon_ring_unlock_commit(rdev, ring, false);
3482 
3483         for (i = 0; i < rdev->usec_timeout; i++) {
3484                 tmp = RREG32(scratch);
3485                 if (tmp == 0xDEADBEEF)
3486                         break;
3487                 udelay(1);
3488         }
3489         if (i < rdev->usec_timeout) {
3490                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3491         } else {
3492                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3493                           ring->idx, scratch, tmp);
3494                 r = -EINVAL;
3495         }
3496         radeon_scratch_free(rdev, scratch);
3497         return r;
3498 }
3499 
3500 /**
3501  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3502  *
3503  * @rdev: radeon_device pointer
3504  * @ridx: radeon ring index
3505  *
3506  * Emits an hdp flush on the cp.
3507  */
3508 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3509                                        int ridx)
3510 {
3511         struct radeon_ring *ring = &rdev->ring[ridx];
3512         u32 ref_and_mask;
3513 
3514         switch (ring->idx) {
3515         case CAYMAN_RING_TYPE_CP1_INDEX:
3516         case CAYMAN_RING_TYPE_CP2_INDEX:
3517         default:
3518                 switch (ring->me) {
3519                 case 0:
3520                         ref_and_mask = CP2 << ring->pipe;
3521                         break;
3522                 case 1:
3523                         ref_and_mask = CP6 << ring->pipe;
3524                         break;
3525                 default:
3526                         return;
3527                 }
3528                 break;
3529         case RADEON_RING_TYPE_GFX_INDEX:
3530                 ref_and_mask = CP0;
3531                 break;
3532         }
3533 
3534         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3535         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3536                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3537                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3538         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3539         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3540         radeon_ring_write(ring, ref_and_mask);
3541         radeon_ring_write(ring, ref_and_mask);
3542         radeon_ring_write(ring, 0x20); /* poll interval */
3543 }
3544 
3545 /**
3546  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3547  *
3548  * @rdev: radeon_device pointer
3549  * @fence: radeon fence object
3550  *
3551  * Emits a fence sequnce number on the gfx ring and flushes
3552  * GPU caches.
3553  */
3554 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3555                              struct radeon_fence *fence)
3556 {
3557         struct radeon_ring *ring = &rdev->ring[fence->ring];
3558         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3559 
3560         /* Workaround for cache flush problems. First send a dummy EOP
3561          * event down the pipe with seq one below.
3562          */
3563         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3564         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3565                                  EOP_TC_ACTION_EN |
3566                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3567                                  EVENT_INDEX(5)));
3568         radeon_ring_write(ring, addr & 0xfffffffc);
3569         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3570                                 DATA_SEL(1) | INT_SEL(0));
3571         radeon_ring_write(ring, fence->seq - 1);
3572         radeon_ring_write(ring, 0);
3573 
3574         /* Then send the real EOP event down the pipe. */
3575         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3576         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3577                                  EOP_TC_ACTION_EN |
3578                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3579                                  EVENT_INDEX(5)));
3580         radeon_ring_write(ring, addr & 0xfffffffc);
3581         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3582         radeon_ring_write(ring, fence->seq);
3583         radeon_ring_write(ring, 0);
3584 }
3585 
3586 /**
3587  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3588  *
3589  * @rdev: radeon_device pointer
3590  * @fence: radeon fence object
3591  *
3592  * Emits a fence sequnce number on the compute ring and flushes
3593  * GPU caches.
3594  */
3595 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3596                                  struct radeon_fence *fence)
3597 {
3598         struct radeon_ring *ring = &rdev->ring[fence->ring];
3599         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3600 
3601         /* RELEASE_MEM - flush caches, send int */
3602         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3603         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3604                                  EOP_TC_ACTION_EN |
3605                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3606                                  EVENT_INDEX(5)));
3607         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3608         radeon_ring_write(ring, addr & 0xfffffffc);
3609         radeon_ring_write(ring, upper_32_bits(addr));
3610         radeon_ring_write(ring, fence->seq);
3611         radeon_ring_write(ring, 0);
3612 }
3613 
3614 /**
3615  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3616  *
3617  * @rdev: radeon_device pointer
3618  * @ring: radeon ring buffer object
3619  * @semaphore: radeon semaphore object
3620  * @emit_wait: Is this a sempahore wait?
3621  *
3622  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3623  * from running ahead of semaphore waits.
3624  */
3625 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3626                              struct radeon_ring *ring,
3627                              struct radeon_semaphore *semaphore,
3628                              bool emit_wait)
3629 {
3630         uint64_t addr = semaphore->gpu_addr;
3631         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3632 
3633         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3634         radeon_ring_write(ring, lower_32_bits(addr));
3635         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3636 
3637         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3638                 /* Prevent the PFP from running ahead of the semaphore wait */
3639                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3640                 radeon_ring_write(ring, 0x0);
3641         }
3642 
3643         return true;
3644 }
3645 
3646 /**
3647  * cik_copy_cpdma - copy pages using the CP DMA engine
3648  *
3649  * @rdev: radeon_device pointer
3650  * @src_offset: src GPU address
3651  * @dst_offset: dst GPU address
3652  * @num_gpu_pages: number of GPU pages to xfer
3653  * @resv: reservation object to sync to
3654  *
3655  * Copy GPU paging using the CP DMA engine (CIK+).
3656  * Used by the radeon ttm implementation to move pages if
3657  * registered as the asic copy callback.
3658  */
3659 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3660                                     uint64_t src_offset, uint64_t dst_offset,
3661                                     unsigned num_gpu_pages,
3662                                     struct dma_resv *resv)
3663 {
3664         struct radeon_fence *fence;
3665         struct radeon_sync sync;
3666         int ring_index = rdev->asic->copy.blit_ring_index;
3667         struct radeon_ring *ring = &rdev->ring[ring_index];
3668         u32 size_in_bytes, cur_size_in_bytes, control;
3669         int i, num_loops;
3670         int r = 0;
3671 
3672         radeon_sync_create(&sync);
3673 
3674         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3675         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3676         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3677         if (r) {
3678                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3679                 radeon_sync_free(rdev, &sync, NULL);
3680                 return ERR_PTR(r);
3681         }
3682 
3683         radeon_sync_resv(rdev, &sync, resv, false);
3684         radeon_sync_rings(rdev, &sync, ring->idx);
3685 
3686         for (i = 0; i < num_loops; i++) {
3687                 cur_size_in_bytes = size_in_bytes;
3688                 if (cur_size_in_bytes > 0x1fffff)
3689                         cur_size_in_bytes = 0x1fffff;
3690                 size_in_bytes -= cur_size_in_bytes;
3691                 control = 0;
3692                 if (size_in_bytes == 0)
3693                         control |= PACKET3_DMA_DATA_CP_SYNC;
3694                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3695                 radeon_ring_write(ring, control);
3696                 radeon_ring_write(ring, lower_32_bits(src_offset));
3697                 radeon_ring_write(ring, upper_32_bits(src_offset));
3698                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3699                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3700                 radeon_ring_write(ring, cur_size_in_bytes);
3701                 src_offset += cur_size_in_bytes;
3702                 dst_offset += cur_size_in_bytes;
3703         }
3704 
3705         r = radeon_fence_emit(rdev, &fence, ring->idx);
3706         if (r) {
3707                 radeon_ring_unlock_undo(rdev, ring);
3708                 radeon_sync_free(rdev, &sync, NULL);
3709                 return ERR_PTR(r);
3710         }
3711 
3712         radeon_ring_unlock_commit(rdev, ring, false);
3713         radeon_sync_free(rdev, &sync, fence);
3714 
3715         return fence;
3716 }
3717 
3718 /*
3719  * IB stuff
3720  */
3721 /**
3722  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3723  *
3724  * @rdev: radeon_device pointer
3725  * @ib: radeon indirect buffer object
3726  *
3727  * Emits a DE (drawing engine) or CE (constant engine) IB
3728  * on the gfx ring.  IBs are usually generated by userspace
3729  * acceleration drivers and submitted to the kernel for
3730  * scheduling on the ring.  This function schedules the IB
3731  * on the gfx ring for execution by the GPU.
3732  */
3733 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3734 {
3735         struct radeon_ring *ring = &rdev->ring[ib->ring];
3736         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3737         u32 header, control = INDIRECT_BUFFER_VALID;
3738 
3739         if (ib->is_const_ib) {
3740                 /* set switch buffer packet before const IB */
3741                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3742                 radeon_ring_write(ring, 0);
3743 
3744                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3745         } else {
3746                 u32 next_rptr;
3747                 if (ring->rptr_save_reg) {
3748                         next_rptr = ring->wptr + 3 + 4;
3749                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3750                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3751                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3752                         radeon_ring_write(ring, next_rptr);
3753                 } else if (rdev->wb.enabled) {
3754                         next_rptr = ring->wptr + 5 + 4;
3755                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3756                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3757                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3758                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3759                         radeon_ring_write(ring, next_rptr);
3760                 }
3761 
3762                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3763         }
3764 
3765         control |= ib->length_dw | (vm_id << 24);
3766 
3767         radeon_ring_write(ring, header);
3768         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3769         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3770         radeon_ring_write(ring, control);
3771 }
3772 
3773 /**
3774  * cik_ib_test - basic gfx ring IB test
3775  *
3776  * @rdev: radeon_device pointer
3777  * @ring: radeon_ring structure holding ring information
3778  *
3779  * Allocate an IB and execute it on the gfx ring (CIK).
3780  * Provides a basic gfx ring test to verify that IBs are working.
3781  * Returns 0 on success, error on failure.
3782  */
3783 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3784 {
3785         struct radeon_ib ib;
3786         uint32_t scratch;
3787         uint32_t tmp = 0;
3788         unsigned i;
3789         int r;
3790 
3791         r = radeon_scratch_get(rdev, &scratch);
3792         if (r) {
3793                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3794                 return r;
3795         }
3796         WREG32(scratch, 0xCAFEDEAD);
3797         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3798         if (r) {
3799                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3800                 radeon_scratch_free(rdev, scratch);
3801                 return r;
3802         }
3803         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3804         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3805         ib.ptr[2] = 0xDEADBEEF;
3806         ib.length_dw = 3;
3807         r = radeon_ib_schedule(rdev, &ib, NULL, false);
3808         if (r) {
3809                 radeon_scratch_free(rdev, scratch);
3810                 radeon_ib_free(rdev, &ib);
3811                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3812                 return r;
3813         }
3814         r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3815                 RADEON_USEC_IB_TEST_TIMEOUT));
3816         if (r < 0) {
3817                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3818                 radeon_scratch_free(rdev, scratch);
3819                 radeon_ib_free(rdev, &ib);
3820                 return r;
3821         } else if (r == 0) {
3822                 DRM_ERROR("radeon: fence wait timed out.\n");
3823                 radeon_scratch_free(rdev, scratch);
3824                 radeon_ib_free(rdev, &ib);
3825                 return -ETIMEDOUT;
3826         }
3827         r = 0;
3828         for (i = 0; i < rdev->usec_timeout; i++) {
3829                 tmp = RREG32(scratch);
3830                 if (tmp == 0xDEADBEEF)
3831                         break;
3832                 udelay(1);
3833         }
3834         if (i < rdev->usec_timeout) {
3835                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3836         } else {
3837                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3838                           scratch, tmp);
3839                 r = -EINVAL;
3840         }
3841         radeon_scratch_free(rdev, scratch);
3842         radeon_ib_free(rdev, &ib);
3843         return r;
3844 }
3845 
3846 /*
3847  * CP.
3848  * On CIK, gfx and compute now have independant command processors.
3849  *
3850  * GFX
3851  * Gfx consists of a single ring and can process both gfx jobs and
3852  * compute jobs.  The gfx CP consists of three microengines (ME):
3853  * PFP - Pre-Fetch Parser
3854  * ME - Micro Engine
3855  * CE - Constant Engine
3856  * The PFP and ME make up what is considered the Drawing Engine (DE).
3857  * The CE is an asynchronous engine used for updating buffer desciptors
3858  * used by the DE so that they can be loaded into cache in parallel
3859  * while the DE is processing state update packets.
3860  *
3861  * Compute
3862  * The compute CP consists of two microengines (ME):
3863  * MEC1 - Compute MicroEngine 1
3864  * MEC2 - Compute MicroEngine 2
3865  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3866  * The queues are exposed to userspace and are programmed directly
3867  * by the compute runtime.
3868  */
3869 /**
3870  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3871  *
3872  * @rdev: radeon_device pointer
3873  * @enable: enable or disable the MEs
3874  *
3875  * Halts or unhalts the gfx MEs.
3876  */
3877 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3878 {
3879         if (enable)
3880                 WREG32(CP_ME_CNTL, 0);
3881         else {
3882                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3883                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3884                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3885                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3886         }
3887         udelay(50);
3888 }
3889 
3890 /**
3891  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3892  *
3893  * @rdev: radeon_device pointer
3894  *
3895  * Loads the gfx PFP, ME, and CE ucode.
3896  * Returns 0 for success, -EINVAL if the ucode is not available.
3897  */
3898 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3899 {
3900         int i;
3901 
3902         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3903                 return -EINVAL;
3904 
3905         cik_cp_gfx_enable(rdev, false);
3906 
3907         if (rdev->new_fw) {
3908                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3909                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3910                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3911                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3912                 const struct gfx_firmware_header_v1_0 *me_hdr =
3913                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3914                 const __le32 *fw_data;
3915                 u32 fw_size;
3916 
3917                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3918                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3919                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3920 
3921                 /* PFP */
3922                 fw_data = (const __le32 *)
3923                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3924                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3925                 WREG32(CP_PFP_UCODE_ADDR, 0);
3926                 for (i = 0; i < fw_size; i++)
3927                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3928                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3929 
3930                 /* CE */
3931                 fw_data = (const __le32 *)
3932                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3933                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3934                 WREG32(CP_CE_UCODE_ADDR, 0);
3935                 for (i = 0; i < fw_size; i++)
3936                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3937                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3938 
3939                 /* ME */
3940                 fw_data = (const __be32 *)
3941                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3942                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3943                 WREG32(CP_ME_RAM_WADDR, 0);
3944                 for (i = 0; i < fw_size; i++)
3945                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3946                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3947                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3948         } else {
3949                 const __be32 *fw_data;
3950 
3951                 /* PFP */
3952                 fw_data = (const __be32 *)rdev->pfp_fw->data;
3953                 WREG32(CP_PFP_UCODE_ADDR, 0);
3954                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3955                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3956                 WREG32(CP_PFP_UCODE_ADDR, 0);
3957 
3958                 /* CE */
3959                 fw_data = (const __be32 *)rdev->ce_fw->data;
3960                 WREG32(CP_CE_UCODE_ADDR, 0);
3961                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3962                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3963                 WREG32(CP_CE_UCODE_ADDR, 0);
3964 
3965                 /* ME */
3966                 fw_data = (const __be32 *)rdev->me_fw->data;
3967                 WREG32(CP_ME_RAM_WADDR, 0);
3968                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3969                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3970                 WREG32(CP_ME_RAM_WADDR, 0);
3971         }
3972 
3973         return 0;
3974 }
3975 
3976 /**
3977  * cik_cp_gfx_start - start the gfx ring
3978  *
3979  * @rdev: radeon_device pointer
3980  *
3981  * Enables the ring and loads the clear state context and other
3982  * packets required to init the ring.
3983  * Returns 0 for success, error for failure.
3984  */
3985 static int cik_cp_gfx_start(struct radeon_device *rdev)
3986 {
3987         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3988         int r, i;
3989 
3990         /* init the CP */
3991         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3992         WREG32(CP_ENDIAN_SWAP, 0);
3993         WREG32(CP_DEVICE_ID, 1);
3994 
3995         cik_cp_gfx_enable(rdev, true);
3996 
3997         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3998         if (r) {
3999                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4000                 return r;
4001         }
4002 
4003         /* init the CE partitions.  CE only used for gfx on CIK */
4004         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4005         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4006         radeon_ring_write(ring, 0x8000);
4007         radeon_ring_write(ring, 0x8000);
4008 
4009         /* setup clear context state */
4010         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4011         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4012 
4013         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4014         radeon_ring_write(ring, 0x80000000);
4015         radeon_ring_write(ring, 0x80000000);
4016 
4017         for (i = 0; i < cik_default_size; i++)
4018                 radeon_ring_write(ring, cik_default_state[i]);
4019 
4020         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4021         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4022 
4023         /* set clear context state */
4024         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4025         radeon_ring_write(ring, 0);
4026 
4027         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4028         radeon_ring_write(ring, 0x00000316);
4029         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4030         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4031 
4032         radeon_ring_unlock_commit(rdev, ring, false);
4033 
4034         return 0;
4035 }
4036 
4037 /**
4038  * cik_cp_gfx_fini - stop the gfx ring
4039  *
4040  * @rdev: radeon_device pointer
4041  *
4042  * Stop the gfx ring and tear down the driver ring
4043  * info.
4044  */
4045 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4046 {
4047         cik_cp_gfx_enable(rdev, false);
4048         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4049 }
4050 
4051 /**
4052  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4053  *
4054  * @rdev: radeon_device pointer
4055  *
4056  * Program the location and size of the gfx ring buffer
4057  * and test it to make sure it's working.
4058  * Returns 0 for success, error for failure.
4059  */
4060 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4061 {
4062         struct radeon_ring *ring;
4063         u32 tmp;
4064         u32 rb_bufsz;
4065         u64 rb_addr;
4066         int r;
4067 
4068         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4069         if (rdev->family != CHIP_HAWAII)
4070                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4071 
4072         /* Set the write pointer delay */
4073         WREG32(CP_RB_WPTR_DELAY, 0);
4074 
4075         /* set the RB to use vmid 0 */
4076         WREG32(CP_RB_VMID, 0);
4077 
4078         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4079 
4080         /* ring 0 - compute and gfx */
4081         /* Set ring buffer size */
4082         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4083         rb_bufsz = order_base_2(ring->ring_size / 8);
4084         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4085 #ifdef __BIG_ENDIAN
4086         tmp |= BUF_SWAP_32BIT;
4087 #endif
4088         WREG32(CP_RB0_CNTL, tmp);
4089 
4090         /* Initialize the ring buffer's read and write pointers */
4091         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4092         ring->wptr = 0;
4093         WREG32(CP_RB0_WPTR, ring->wptr);
4094 
4095         /* set the wb address wether it's enabled or not */
4096         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4097         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4098 
4099         /* scratch register shadowing is no longer supported */
4100         WREG32(SCRATCH_UMSK, 0);
4101 
4102         if (!rdev->wb.enabled)
4103                 tmp |= RB_NO_UPDATE;
4104 
4105         mdelay(1);
4106         WREG32(CP_RB0_CNTL, tmp);
4107 
4108         rb_addr = ring->gpu_addr >> 8;
4109         WREG32(CP_RB0_BASE, rb_addr);
4110         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4111 
4112         /* start the ring */
4113         cik_cp_gfx_start(rdev);
4114         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4115         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4116         if (r) {
4117                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4118                 return r;
4119         }
4120 
4121         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4122                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4123 
4124         return 0;
4125 }
4126 
4127 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4128                      struct radeon_ring *ring)
4129 {
4130         u32 rptr;
4131 
4132         if (rdev->wb.enabled)
4133                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4134         else
4135                 rptr = RREG32(CP_RB0_RPTR);
4136 
4137         return rptr;
4138 }
4139 
4140 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4141                      struct radeon_ring *ring)
4142 {
4143         return RREG32(CP_RB0_WPTR);
4144 }
4145 
4146 void cik_gfx_set_wptr(struct radeon_device *rdev,
4147                       struct radeon_ring *ring)
4148 {
4149         WREG32(CP_RB0_WPTR, ring->wptr);
4150         (void)RREG32(CP_RB0_WPTR);
4151 }
4152 
4153 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4154                          struct radeon_ring *ring)
4155 {
4156         u32 rptr;
4157 
4158         if (rdev->wb.enabled) {
4159                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4160         } else {
4161                 mutex_lock(&rdev->srbm_mutex);
4162                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4163                 rptr = RREG32(CP_HQD_PQ_RPTR);
4164                 cik_srbm_select(rdev, 0, 0, 0, 0);
4165                 mutex_unlock(&rdev->srbm_mutex);
4166         }
4167 
4168         return rptr;
4169 }
4170 
4171 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4172                          struct radeon_ring *ring)
4173 {
4174         u32 wptr;
4175 
4176         if (rdev->wb.enabled) {
4177                 /* XXX check if swapping is necessary on BE */
4178                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4179         } else {
4180                 mutex_lock(&rdev->srbm_mutex);
4181                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4182                 wptr = RREG32(CP_HQD_PQ_WPTR);
4183                 cik_srbm_select(rdev, 0, 0, 0, 0);
4184                 mutex_unlock(&rdev->srbm_mutex);
4185         }
4186 
4187         return wptr;
4188 }
4189 
4190 void cik_compute_set_wptr(struct radeon_device *rdev,
4191                           struct radeon_ring *ring)
4192 {
4193         /* XXX check if swapping is necessary on BE */
4194         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4195         WDOORBELL32(ring->doorbell_index, ring->wptr);
4196 }
4197 
4198 static void cik_compute_stop(struct radeon_device *rdev,
4199                              struct radeon_ring *ring)
4200 {
4201         u32 j, tmp;
4202 
4203         cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4204         /* Disable wptr polling. */
4205         tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4206         tmp &= ~WPTR_POLL_EN;
4207         WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4208         /* Disable HQD. */
4209         if (RREG32(CP_HQD_ACTIVE) & 1) {
4210                 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4211                 for (j = 0; j < rdev->usec_timeout; j++) {
4212                         if (!(RREG32(CP_HQD_ACTIVE) & 1))
4213                                 break;
4214                         udelay(1);
4215                 }
4216                 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4217                 WREG32(CP_HQD_PQ_RPTR, 0);
4218                 WREG32(CP_HQD_PQ_WPTR, 0);
4219         }
4220         cik_srbm_select(rdev, 0, 0, 0, 0);
4221 }
4222 
4223 /**
4224  * cik_cp_compute_enable - enable/disable the compute CP MEs
4225  *
4226  * @rdev: radeon_device pointer
4227  * @enable: enable or disable the MEs
4228  *
4229  * Halts or unhalts the compute MEs.
4230  */
4231 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4232 {
4233         if (enable)
4234                 WREG32(CP_MEC_CNTL, 0);
4235         else {
4236                 /*
4237                  * To make hibernation reliable we need to clear compute ring
4238                  * configuration before halting the compute ring.
4239                  */
4240                 mutex_lock(&rdev->srbm_mutex);
4241                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4242                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4243                 mutex_unlock(&rdev->srbm_mutex);
4244 
4245                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4246                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4247                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4248         }
4249         udelay(50);
4250 }
4251 
4252 /**
4253  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4254  *
4255  * @rdev: radeon_device pointer
4256  *
4257  * Loads the compute MEC1&2 ucode.
4258  * Returns 0 for success, -EINVAL if the ucode is not available.
4259  */
4260 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4261 {
4262         int i;
4263 
4264         if (!rdev->mec_fw)
4265                 return -EINVAL;
4266 
4267         cik_cp_compute_enable(rdev, false);
4268 
4269         if (rdev->new_fw) {
4270                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4271                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4272                 const __le32 *fw_data;
4273                 u32 fw_size;
4274 
4275                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4276 
4277                 /* MEC1 */
4278                 fw_data = (const __le32 *)
4279                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4280                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4281                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4282                 for (i = 0; i < fw_size; i++)
4283                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4284                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4285 
4286                 /* MEC2 */
4287                 if (rdev->family == CHIP_KAVERI) {
4288                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4289                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4290 
4291                         fw_data = (const __le32 *)
4292                                 (rdev->mec2_fw->data +
4293                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4294                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4295                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4296                         for (i = 0; i < fw_size; i++)
4297                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4298                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4299                 }
4300         } else {
4301                 const __be32 *fw_data;
4302 
4303                 /* MEC1 */
4304                 fw_data = (const __be32 *)rdev->mec_fw->data;
4305                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4306                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4307                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4308                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4309 
4310                 if (rdev->family == CHIP_KAVERI) {
4311                         /* MEC2 */
4312                         fw_data = (const __be32 *)rdev->mec_fw->data;
4313                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4314                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4315                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4316                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4317                 }
4318         }
4319 
4320         return 0;
4321 }
4322 
4323 /**
4324  * cik_cp_compute_start - start the compute queues
4325  *
4326  * @rdev: radeon_device pointer
4327  *
4328  * Enable the compute queues.
4329  * Returns 0 for success, error for failure.
4330  */
4331 static int cik_cp_compute_start(struct radeon_device *rdev)
4332 {
4333         cik_cp_compute_enable(rdev, true);
4334 
4335         return 0;
4336 }
4337 
4338 /**
4339  * cik_cp_compute_fini - stop the compute queues
4340  *
4341  * @rdev: radeon_device pointer
4342  *
4343  * Stop the compute queues and tear down the driver queue
4344  * info.
4345  */
4346 static void cik_cp_compute_fini(struct radeon_device *rdev)
4347 {
4348         int i, idx, r;
4349 
4350         cik_cp_compute_enable(rdev, false);
4351 
4352         for (i = 0; i < 2; i++) {
4353                 if (i == 0)
4354                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4355                 else
4356                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4357 
4358                 if (rdev->ring[idx].mqd_obj) {
4359                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4360                         if (unlikely(r != 0))
4361                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4362 
4363                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4364                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4365 
4366                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4367                         rdev->ring[idx].mqd_obj = NULL;
4368                 }
4369         }
4370 }
4371 
4372 static void cik_mec_fini(struct radeon_device *rdev)
4373 {
4374         int r;
4375 
4376         if (rdev->mec.hpd_eop_obj) {
4377                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4378                 if (unlikely(r != 0))
4379                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4380                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4381                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4382 
4383                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4384                 rdev->mec.hpd_eop_obj = NULL;
4385         }
4386 }
4387 
4388 #define MEC_HPD_SIZE 2048
4389 
4390 static int cik_mec_init(struct radeon_device *rdev)
4391 {
4392         int r;
4393         u32 *hpd;
4394 
4395         /*
4396          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4397          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4398          */
4399         if (rdev->family == CHIP_KAVERI)
4400                 rdev->mec.num_mec = 2;
4401         else
4402                 rdev->mec.num_mec = 1;
4403         rdev->mec.num_pipe = 4;
4404         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4405 
4406         if (rdev->mec.hpd_eop_obj == NULL) {
4407                 r = radeon_bo_create(rdev,
4408                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4409                                      PAGE_SIZE, true,
4410                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4411                                      &rdev->mec.hpd_eop_obj);
4412                 if (r) {
4413                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4414                         return r;
4415                 }
4416         }
4417 
4418         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4419         if (unlikely(r != 0)) {
4420                 cik_mec_fini(rdev);
4421                 return r;
4422         }
4423         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4424                           &rdev->mec.hpd_eop_gpu_addr);
4425         if (r) {
4426                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4427                 cik_mec_fini(rdev);
4428                 return r;
4429         }
4430         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4431         if (r) {
4432                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4433                 cik_mec_fini(rdev);
4434                 return r;
4435         }
4436 
4437         /* clear memory.  Not sure if this is required or not */
4438         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4439 
4440         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4441         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4442 
4443         return 0;
4444 }
4445 
4446 struct hqd_registers
4447 {
4448         u32 cp_mqd_base_addr;
4449         u32 cp_mqd_base_addr_hi;
4450         u32 cp_hqd_active;
4451         u32 cp_hqd_vmid;
4452         u32 cp_hqd_persistent_state;
4453         u32 cp_hqd_pipe_priority;
4454         u32 cp_hqd_queue_priority;
4455         u32 cp_hqd_quantum;
4456         u32 cp_hqd_pq_base;
4457         u32 cp_hqd_pq_base_hi;
4458         u32 cp_hqd_pq_rptr;
4459         u32 cp_hqd_pq_rptr_report_addr;
4460         u32 cp_hqd_pq_rptr_report_addr_hi;
4461         u32 cp_hqd_pq_wptr_poll_addr;
4462         u32 cp_hqd_pq_wptr_poll_addr_hi;
4463         u32 cp_hqd_pq_doorbell_control;
4464         u32 cp_hqd_pq_wptr;
4465         u32 cp_hqd_pq_control;
4466         u32 cp_hqd_ib_base_addr;
4467         u32 cp_hqd_ib_base_addr_hi;
4468         u32 cp_hqd_ib_rptr;
4469         u32 cp_hqd_ib_control;
4470         u32 cp_hqd_iq_timer;
4471         u32 cp_hqd_iq_rptr;
4472         u32 cp_hqd_dequeue_request;
4473         u32 cp_hqd_dma_offload;
4474         u32 cp_hqd_sema_cmd;
4475         u32 cp_hqd_msg_type;
4476         u32 cp_hqd_atomic0_preop_lo;
4477         u32 cp_hqd_atomic0_preop_hi;
4478         u32 cp_hqd_atomic1_preop_lo;
4479         u32 cp_hqd_atomic1_preop_hi;
4480         u32 cp_hqd_hq_scheduler0;
4481         u32 cp_hqd_hq_scheduler1;
4482         u32 cp_mqd_control;
4483 };
4484 
4485 struct bonaire_mqd
4486 {
4487         u32 header;
4488         u32 dispatch_initiator;
4489         u32 dimensions[3];
4490         u32 start_idx[3];
4491         u32 num_threads[3];
4492         u32 pipeline_stat_enable;
4493         u32 perf_counter_enable;
4494         u32 pgm[2];
4495         u32 tba[2];
4496         u32 tma[2];
4497         u32 pgm_rsrc[2];
4498         u32 vmid;
4499         u32 resource_limits;
4500         u32 static_thread_mgmt01[2];
4501         u32 tmp_ring_size;
4502         u32 static_thread_mgmt23[2];
4503         u32 restart[3];
4504         u32 thread_trace_enable;
4505         u32 reserved1;
4506         u32 user_data[16];
4507         u32 vgtcs_invoke_count[2];
4508         struct hqd_registers queue_state;
4509         u32 dequeue_cntr;
4510         u32 interrupt_queue[64];
4511 };
4512 
4513 /**
4514  * cik_cp_compute_resume - setup the compute queue registers
4515  *
4516  * @rdev: radeon_device pointer
4517  *
4518  * Program the compute queues and test them to make sure they
4519  * are working.
4520  * Returns 0 for success, error for failure.
4521  */
4522 static int cik_cp_compute_resume(struct radeon_device *rdev)
4523 {
4524         int r, i, j, idx;
4525         u32 tmp;
4526         bool use_doorbell = true;
4527         u64 hqd_gpu_addr;
4528         u64 mqd_gpu_addr;
4529         u64 eop_gpu_addr;
4530         u64 wb_gpu_addr;
4531         u32 *buf;
4532         struct bonaire_mqd *mqd;
4533 
4534         r = cik_cp_compute_start(rdev);
4535         if (r)
4536                 return r;
4537 
4538         /* fix up chicken bits */
4539         tmp = RREG32(CP_CPF_DEBUG);
4540         tmp |= (1 << 23);
4541         WREG32(CP_CPF_DEBUG, tmp);
4542 
4543         /* init the pipes */
4544         mutex_lock(&rdev->srbm_mutex);
4545 
4546         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); ++i) {
4547                 int me = (i < 4) ? 1 : 2;
4548                 int pipe = (i < 4) ? i : (i - 4);
4549 
4550                 cik_srbm_select(rdev, me, pipe, 0, 0);
4551 
4552                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4553                 /* write the EOP addr */
4554                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4555                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4556 
4557                 /* set the VMID assigned */
4558                 WREG32(CP_HPD_EOP_VMID, 0);
4559 
4560                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4561                 tmp = RREG32(CP_HPD_EOP_CONTROL);
4562                 tmp &= ~EOP_SIZE_MASK;
4563                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4564                 WREG32(CP_HPD_EOP_CONTROL, tmp);
4565 
4566         }
4567         cik_srbm_select(rdev, 0, 0, 0, 0);
4568         mutex_unlock(&rdev->srbm_mutex);
4569 
4570         /* init the queues.  Just two for now. */
4571         for (i = 0; i < 2; i++) {
4572                 if (i == 0)
4573                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4574                 else
4575                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4576 
4577                 if (rdev->ring[idx].mqd_obj == NULL) {
4578                         r = radeon_bo_create(rdev,
4579                                              sizeof(struct bonaire_mqd),
4580                                              PAGE_SIZE, true,
4581                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4582                                              NULL, &rdev->ring[idx].mqd_obj);
4583                         if (r) {
4584                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4585                                 return r;
4586                         }
4587                 }
4588 
4589                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4590                 if (unlikely(r != 0)) {
4591                         cik_cp_compute_fini(rdev);
4592                         return r;
4593                 }
4594                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4595                                   &mqd_gpu_addr);
4596                 if (r) {
4597                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4598                         cik_cp_compute_fini(rdev);
4599                         return r;
4600                 }
4601                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4602                 if (r) {
4603                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4604                         cik_cp_compute_fini(rdev);
4605                         return r;
4606                 }
4607 
4608                 /* init the mqd struct */
4609                 memset(buf, 0, sizeof(struct bonaire_mqd));
4610 
4611                 mqd = (struct bonaire_mqd *)buf;
4612                 mqd->header = 0xC0310800;
4613                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4614                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4615                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4616                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4617 
4618                 mutex_lock(&rdev->srbm_mutex);
4619                 cik_srbm_select(rdev, rdev->ring[idx].me,
4620                                 rdev->ring[idx].pipe,
4621                                 rdev->ring[idx].queue, 0);
4622 
4623                 /* disable wptr polling */
4624                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4625                 tmp &= ~WPTR_POLL_EN;
4626                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4627 
4628                 /* enable doorbell? */
4629                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4630                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4631                 if (use_doorbell)
4632                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4633                 else
4634                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4635                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4636                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4637 
4638                 /* disable the queue if it's active */
4639                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4640                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4641                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4642                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4643                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4644                         for (j = 0; j < rdev->usec_timeout; j++) {
4645                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4646                                         break;
4647                                 udelay(1);
4648                         }
4649                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4650                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4651                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4652                 }
4653 
4654                 /* set the pointer to the MQD */
4655                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4656                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4657                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4658                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4659                 /* set MQD vmid to 0 */
4660                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4661                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4662                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4663 
4664                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4665                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4666                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4667                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4668                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4669                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4670 
4671                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4672                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4673                 mqd->queue_state.cp_hqd_pq_control &=
4674                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4675 
4676                 mqd->queue_state.cp_hqd_pq_control |=
4677                         order_base_2(rdev->ring[idx].ring_size / 8);
4678                 mqd->queue_state.cp_hqd_pq_control |=
4679                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4680 #ifdef __BIG_ENDIAN
4681                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4682 #endif
4683                 mqd->queue_state.cp_hqd_pq_control &=
4684                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4685                 mqd->queue_state.cp_hqd_pq_control |=
4686                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4687                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4688 
4689                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4690                 if (i == 0)
4691                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4692                 else
4693                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4694                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4695                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4696                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4697                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4698                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4699 
4700                 /* set the wb address wether it's enabled or not */
4701                 if (i == 0)
4702                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4703                 else
4704                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4705                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4706                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4707                         upper_32_bits(wb_gpu_addr) & 0xffff;
4708                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4709                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4710                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4711                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4712 
4713                 /* enable the doorbell if requested */
4714                 if (use_doorbell) {
4715                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4716                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4717                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4718                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4719                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4720                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4721                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4722                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4723 
4724                 } else {
4725                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4726                 }
4727                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4728                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4729 
4730                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4731                 rdev->ring[idx].wptr = 0;
4732                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4733                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4734                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4735 
4736                 /* set the vmid for the queue */
4737                 mqd->queue_state.cp_hqd_vmid = 0;
4738                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4739 
4740                 /* activate the queue */
4741                 mqd->queue_state.cp_hqd_active = 1;
4742                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4743 
4744                 cik_srbm_select(rdev, 0, 0, 0, 0);
4745                 mutex_unlock(&rdev->srbm_mutex);
4746 
4747                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4748                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4749 
4750                 rdev->ring[idx].ready = true;
4751                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4752                 if (r)
4753                         rdev->ring[idx].ready = false;
4754         }
4755 
4756         return 0;
4757 }
4758 
4759 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4760 {
4761         cik_cp_gfx_enable(rdev, enable);
4762         cik_cp_compute_enable(rdev, enable);
4763 }
4764 
4765 static int cik_cp_load_microcode(struct radeon_device *rdev)
4766 {
4767         int r;
4768 
4769         r = cik_cp_gfx_load_microcode(rdev);
4770         if (r)
4771                 return r;
4772         r = cik_cp_compute_load_microcode(rdev);
4773         if (r)
4774                 return r;
4775 
4776         return 0;
4777 }
4778 
4779 static void cik_cp_fini(struct radeon_device *rdev)
4780 {
4781         cik_cp_gfx_fini(rdev);
4782         cik_cp_compute_fini(rdev);
4783 }
4784 
4785 static int cik_cp_resume(struct radeon_device *rdev)
4786 {
4787         int r;
4788 
4789         cik_enable_gui_idle_interrupt(rdev, false);
4790 
4791         r = cik_cp_load_microcode(rdev);
4792         if (r)
4793                 return r;
4794 
4795         r = cik_cp_gfx_resume(rdev);
4796         if (r)
4797                 return r;
4798         r = cik_cp_compute_resume(rdev);
4799         if (r)
4800                 return r;
4801 
4802         cik_enable_gui_idle_interrupt(rdev, true);
4803 
4804         return 0;
4805 }
4806 
4807 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4808 {
4809         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4810                 RREG32(GRBM_STATUS));
4811         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4812                 RREG32(GRBM_STATUS2));
4813         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4814                 RREG32(GRBM_STATUS_SE0));
4815         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4816                 RREG32(GRBM_STATUS_SE1));
4817         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4818                 RREG32(GRBM_STATUS_SE2));
4819         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4820                 RREG32(GRBM_STATUS_SE3));
4821         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4822                 RREG32(SRBM_STATUS));
4823         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4824                 RREG32(SRBM_STATUS2));
4825         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4826                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4827         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4828                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4829         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4830         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4831                  RREG32(CP_STALLED_STAT1));
4832         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4833                  RREG32(CP_STALLED_STAT2));
4834         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4835                  RREG32(CP_STALLED_STAT3));
4836         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4837                  RREG32(CP_CPF_BUSY_STAT));
4838         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4839                  RREG32(CP_CPF_STALLED_STAT1));
4840         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4841         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4842         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4843                  RREG32(CP_CPC_STALLED_STAT1));
4844         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4845 }
4846 
4847 /**
4848  * cik_gpu_check_soft_reset - check which blocks are busy
4849  *
4850  * @rdev: radeon_device pointer
4851  *
4852  * Check which blocks are busy and return the relevant reset
4853  * mask to be used by cik_gpu_soft_reset().
4854  * Returns a mask of the blocks to be reset.
4855  */
4856 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4857 {
4858         u32 reset_mask = 0;
4859         u32 tmp;
4860 
4861         /* GRBM_STATUS */
4862         tmp = RREG32(GRBM_STATUS);
4863         if (tmp & (PA_BUSY | SC_BUSY |
4864                    BCI_BUSY | SX_BUSY |
4865                    TA_BUSY | VGT_BUSY |
4866                    DB_BUSY | CB_BUSY |
4867                    GDS_BUSY | SPI_BUSY |
4868                    IA_BUSY | IA_BUSY_NO_DMA))
4869                 reset_mask |= RADEON_RESET_GFX;
4870 
4871         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4872                 reset_mask |= RADEON_RESET_CP;
4873 
4874         /* GRBM_STATUS2 */
4875         tmp = RREG32(GRBM_STATUS2);
4876         if (tmp & RLC_BUSY)
4877                 reset_mask |= RADEON_RESET_RLC;
4878 
4879         /* SDMA0_STATUS_REG */
4880         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4881         if (!(tmp & SDMA_IDLE))
4882                 reset_mask |= RADEON_RESET_DMA;
4883 
4884         /* SDMA1_STATUS_REG */
4885         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4886         if (!(tmp & SDMA_IDLE))
4887                 reset_mask |= RADEON_RESET_DMA1;
4888 
4889         /* SRBM_STATUS2 */
4890         tmp = RREG32(SRBM_STATUS2);
4891         if (tmp & SDMA_BUSY)
4892                 reset_mask |= RADEON_RESET_DMA;
4893 
4894         if (tmp & SDMA1_BUSY)
4895                 reset_mask |= RADEON_RESET_DMA1;
4896 
4897         /* SRBM_STATUS */
4898         tmp = RREG32(SRBM_STATUS);
4899 
4900         if (tmp & IH_BUSY)
4901                 reset_mask |= RADEON_RESET_IH;
4902 
4903         if (tmp & SEM_BUSY)
4904                 reset_mask |= RADEON_RESET_SEM;
4905 
4906         if (tmp & GRBM_RQ_PENDING)
4907                 reset_mask |= RADEON_RESET_GRBM;
4908 
4909         if (tmp & VMC_BUSY)
4910                 reset_mask |= RADEON_RESET_VMC;
4911 
4912         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4913                    MCC_BUSY | MCD_BUSY))
4914                 reset_mask |= RADEON_RESET_MC;
4915 
4916         if (evergreen_is_display_hung(rdev))
4917                 reset_mask |= RADEON_RESET_DISPLAY;
4918 
4919         /* Skip MC reset as it's mostly likely not hung, just busy */
4920         if (reset_mask & RADEON_RESET_MC) {
4921                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4922                 reset_mask &= ~RADEON_RESET_MC;
4923         }
4924 
4925         return reset_mask;
4926 }
4927 
4928 /**
4929  * cik_gpu_soft_reset - soft reset GPU
4930  *
4931  * @rdev: radeon_device pointer
4932  * @reset_mask: mask of which blocks to reset
4933  *
4934  * Soft reset the blocks specified in @reset_mask.
4935  */
4936 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4937 {
4938         struct evergreen_mc_save save;
4939         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4940         u32 tmp;
4941 
4942         if (reset_mask == 0)
4943                 return;
4944 
4945         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4946 
4947         cik_print_gpu_status_regs(rdev);
4948         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4949                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4950         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4951                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4952 
4953         /* disable CG/PG */
4954         cik_fini_pg(rdev);
4955         cik_fini_cg(rdev);
4956 
4957         /* stop the rlc */
4958         cik_rlc_stop(rdev);
4959 
4960         /* Disable GFX parsing/prefetching */
4961         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4962 
4963         /* Disable MEC parsing/prefetching */
4964         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4965 
4966         if (reset_mask & RADEON_RESET_DMA) {
4967                 /* sdma0 */
4968                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4969                 tmp |= SDMA_HALT;
4970                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4971         }
4972         if (reset_mask & RADEON_RESET_DMA1) {
4973                 /* sdma1 */
4974                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4975                 tmp |= SDMA_HALT;
4976                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4977         }
4978 
4979         evergreen_mc_stop(rdev, &save);
4980         if (evergreen_mc_wait_for_idle(rdev)) {
4981                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4982         }
4983 
4984         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4985                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4986 
4987         if (reset_mask & RADEON_RESET_CP) {
4988                 grbm_soft_reset |= SOFT_RESET_CP;
4989 
4990                 srbm_soft_reset |= SOFT_RESET_GRBM;
4991         }
4992 
4993         if (reset_mask & RADEON_RESET_DMA)
4994                 srbm_soft_reset |= SOFT_RESET_SDMA;
4995 
4996         if (reset_mask & RADEON_RESET_DMA1)
4997                 srbm_soft_reset |= SOFT_RESET_SDMA1;
4998 
4999         if (reset_mask & RADEON_RESET_DISPLAY)
5000                 srbm_soft_reset |= SOFT_RESET_DC;
5001 
5002         if (reset_mask & RADEON_RESET_RLC)
5003                 grbm_soft_reset |= SOFT_RESET_RLC;
5004 
5005         if (reset_mask & RADEON_RESET_SEM)
5006                 srbm_soft_reset |= SOFT_RESET_SEM;
5007 
5008         if (reset_mask & RADEON_RESET_IH)
5009                 srbm_soft_reset |= SOFT_RESET_IH;
5010 
5011         if (reset_mask & RADEON_RESET_GRBM)
5012                 srbm_soft_reset |= SOFT_RESET_GRBM;
5013 
5014         if (reset_mask & RADEON_RESET_VMC)
5015                 srbm_soft_reset |= SOFT_RESET_VMC;
5016 
5017         if (!(rdev->flags & RADEON_IS_IGP)) {
5018                 if (reset_mask & RADEON_RESET_MC)
5019                         srbm_soft_reset |= SOFT_RESET_MC;
5020         }
5021 
5022         if (grbm_soft_reset) {
5023                 tmp = RREG32(GRBM_SOFT_RESET);
5024                 tmp |= grbm_soft_reset;
5025                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5026                 WREG32(GRBM_SOFT_RESET, tmp);
5027                 tmp = RREG32(GRBM_SOFT_RESET);
5028 
5029                 udelay(50);
5030 
5031                 tmp &= ~grbm_soft_reset;
5032                 WREG32(GRBM_SOFT_RESET, tmp);
5033                 tmp = RREG32(GRBM_SOFT_RESET);
5034         }
5035 
5036         if (srbm_soft_reset) {
5037                 tmp = RREG32(SRBM_SOFT_RESET);
5038                 tmp |= srbm_soft_reset;
5039                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5040                 WREG32(SRBM_SOFT_RESET, tmp);
5041                 tmp = RREG32(SRBM_SOFT_RESET);
5042 
5043                 udelay(50);
5044 
5045                 tmp &= ~srbm_soft_reset;
5046                 WREG32(SRBM_SOFT_RESET, tmp);
5047                 tmp = RREG32(SRBM_SOFT_RESET);
5048         }
5049 
5050         /* Wait a little for things to settle down */
5051         udelay(50);
5052 
5053         evergreen_mc_resume(rdev, &save);
5054         udelay(50);
5055 
5056         cik_print_gpu_status_regs(rdev);
5057 }
5058 
5059 struct kv_reset_save_regs {
5060         u32 gmcon_reng_execute;
5061         u32 gmcon_misc;
5062         u32 gmcon_misc3;
5063 };
5064 
5065 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5066                                    struct kv_reset_save_regs *save)
5067 {
5068         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5069         save->gmcon_misc = RREG32(GMCON_MISC);
5070         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5071 
5072         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5073         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5074                                                 STCTRL_STUTTER_EN));
5075 }
5076 
5077 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5078                                       struct kv_reset_save_regs *save)
5079 {
5080         int i;
5081 
5082         WREG32(GMCON_PGFSM_WRITE, 0);
5083         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5084 
5085         for (i = 0; i < 5; i++)
5086                 WREG32(GMCON_PGFSM_WRITE, 0);
5087 
5088         WREG32(GMCON_PGFSM_WRITE, 0);
5089         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5090 
5091         for (i = 0; i < 5; i++)
5092                 WREG32(GMCON_PGFSM_WRITE, 0);
5093 
5094         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5095         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5096 
5097         for (i = 0; i < 5; i++)
5098                 WREG32(GMCON_PGFSM_WRITE, 0);
5099 
5100         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5101         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5102 
5103         for (i = 0; i < 5; i++)
5104                 WREG32(GMCON_PGFSM_WRITE, 0);
5105 
5106         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5107         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5108 
5109         for (i = 0; i < 5; i++)
5110                 WREG32(GMCON_PGFSM_WRITE, 0);
5111 
5112         WREG32(GMCON_PGFSM_WRITE, 0);
5113         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5114 
5115         for (i = 0; i < 5; i++)
5116                 WREG32(GMCON_PGFSM_WRITE, 0);
5117 
5118         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5119         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5120 
5121         for (i = 0; i < 5; i++)
5122                 WREG32(GMCON_PGFSM_WRITE, 0);
5123 
5124         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5125         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5126 
5127         for (i = 0; i < 5; i++)
5128                 WREG32(GMCON_PGFSM_WRITE, 0);
5129 
5130         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5131         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5132 
5133         for (i = 0; i < 5; i++)
5134                 WREG32(GMCON_PGFSM_WRITE, 0);
5135 
5136         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5137         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5138 
5139         for (i = 0; i < 5; i++)
5140                 WREG32(GMCON_PGFSM_WRITE, 0);
5141 
5142         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5143         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5144 
5145         WREG32(GMCON_MISC3, save->gmcon_misc3);
5146         WREG32(GMCON_MISC, save->gmcon_misc);
5147         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5148 }
5149 
5150 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5151 {
5152         struct evergreen_mc_save save;
5153         struct kv_reset_save_regs kv_save = { 0 };
5154         u32 tmp, i;
5155 
5156         dev_info(rdev->dev, "GPU pci config reset\n");
5157 
5158         /* disable dpm? */
5159 
5160         /* disable cg/pg */
5161         cik_fini_pg(rdev);
5162         cik_fini_cg(rdev);
5163 
5164         /* Disable GFX parsing/prefetching */
5165         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5166 
5167         /* Disable MEC parsing/prefetching */
5168         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5169 
5170         /* sdma0 */
5171         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5172         tmp |= SDMA_HALT;
5173         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5174         /* sdma1 */
5175         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5176         tmp |= SDMA_HALT;
5177         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5178         /* XXX other engines? */
5179 
5180         /* halt the rlc, disable cp internal ints */
5181         cik_rlc_stop(rdev);
5182 
5183         udelay(50);
5184 
5185         /* disable mem access */
5186         evergreen_mc_stop(rdev, &save);
5187         if (evergreen_mc_wait_for_idle(rdev)) {
5188                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5189         }
5190 
5191         if (rdev->flags & RADEON_IS_IGP)
5192                 kv_save_regs_for_reset(rdev, &kv_save);
5193 
5194         /* disable BM */
5195         pci_clear_master(rdev->pdev);
5196         /* reset */
5197         radeon_pci_config_reset(rdev);
5198 
5199         udelay(100);
5200 
5201         /* wait for asic to come out of reset */
5202         for (i = 0; i < rdev->usec_timeout; i++) {
5203                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5204                         break;
5205                 udelay(1);
5206         }
5207 
5208         /* does asic init need to be run first??? */
5209         if (rdev->flags & RADEON_IS_IGP)
5210                 kv_restore_regs_for_reset(rdev, &kv_save);
5211 }
5212 
5213 /**
5214  * cik_asic_reset - soft reset GPU
5215  *
5216  * @rdev: radeon_device pointer
5217  * @hard: force hard reset
5218  *
5219  * Look up which blocks are hung and attempt
5220  * to reset them.
5221  * Returns 0 for success.
5222  */
5223 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5224 {
5225         u32 reset_mask;
5226 
5227         if (hard) {
5228                 cik_gpu_pci_config_reset(rdev);
5229                 return 0;
5230         }
5231 
5232         reset_mask = cik_gpu_check_soft_reset(rdev);
5233 
5234         if (reset_mask)
5235                 r600_set_bios_scratch_engine_hung(rdev, true);
5236 
5237         /* try soft reset */
5238         cik_gpu_soft_reset(rdev, reset_mask);
5239 
5240         reset_mask = cik_gpu_check_soft_reset(rdev);
5241 
5242         /* try pci config reset */
5243         if (reset_mask && radeon_hard_reset)
5244                 cik_gpu_pci_config_reset(rdev);
5245 
5246         reset_mask = cik_gpu_check_soft_reset(rdev);
5247 
5248         if (!reset_mask)
5249                 r600_set_bios_scratch_engine_hung(rdev, false);
5250 
5251         return 0;
5252 }
5253 
5254 /**
5255  * cik_gfx_is_lockup - check if the 3D engine is locked up
5256  *
5257  * @rdev: radeon_device pointer
5258  * @ring: radeon_ring structure holding ring information
5259  *
5260  * Check if the 3D engine is locked up (CIK).
5261  * Returns true if the engine is locked, false if not.
5262  */
5263 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5264 {
5265         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5266 
5267         if (!(reset_mask & (RADEON_RESET_GFX |
5268                             RADEON_RESET_COMPUTE |
5269                             RADEON_RESET_CP))) {
5270                 radeon_ring_lockup_update(rdev, ring);
5271                 return false;
5272         }
5273         return radeon_ring_test_lockup(rdev, ring);
5274 }
5275 
5276 /* MC */
5277 /**
5278  * cik_mc_program - program the GPU memory controller
5279  *
5280  * @rdev: radeon_device pointer
5281  *
5282  * Set the location of vram, gart, and AGP in the GPU's
5283  * physical address space (CIK).
5284  */
5285 static void cik_mc_program(struct radeon_device *rdev)
5286 {
5287         struct evergreen_mc_save save;
5288         u32 tmp;
5289         int i, j;
5290 
5291         /* Initialize HDP */
5292         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5293                 WREG32((0x2c14 + j), 0x00000000);
5294                 WREG32((0x2c18 + j), 0x00000000);
5295                 WREG32((0x2c1c + j), 0x00000000);
5296                 WREG32((0x2c20 + j), 0x00000000);
5297                 WREG32((0x2c24 + j), 0x00000000);
5298         }
5299         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5300 
5301         evergreen_mc_stop(rdev, &save);
5302         if (radeon_mc_wait_for_idle(rdev)) {
5303                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5304         }
5305         /* Lockout access through VGA aperture*/
5306         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5307         /* Update configuration */
5308         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5309                rdev->mc.vram_start >> 12);
5310         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5311                rdev->mc.vram_end >> 12);
5312         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5313                rdev->vram_scratch.gpu_addr >> 12);
5314         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5315         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5316         WREG32(MC_VM_FB_LOCATION, tmp);
5317         /* XXX double check these! */
5318         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5319         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5320         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5321         WREG32(MC_VM_AGP_BASE, 0);
5322         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5323         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5324         if (radeon_mc_wait_for_idle(rdev)) {
5325                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5326         }
5327         evergreen_mc_resume(rdev, &save);
5328         /* we need to own VRAM, so turn off the VGA renderer here
5329          * to stop it overwriting our objects */
5330         rv515_vga_render_disable(rdev);
5331 }
5332 
5333 /**
5334  * cik_mc_init - initialize the memory controller driver params
5335  *
5336  * @rdev: radeon_device pointer
5337  *
5338  * Look up the amount of vram, vram width, and decide how to place
5339  * vram and gart within the GPU's physical address space (CIK).
5340  * Returns 0 for success.
5341  */
5342 static int cik_mc_init(struct radeon_device *rdev)
5343 {
5344         u32 tmp;
5345         int chansize, numchan;
5346 
5347         /* Get VRAM informations */
5348         rdev->mc.vram_is_ddr = true;
5349         tmp = RREG32(MC_ARB_RAMCFG);
5350         if (tmp & CHANSIZE_MASK) {
5351                 chansize = 64;
5352         } else {
5353                 chansize = 32;
5354         }
5355         tmp = RREG32(MC_SHARED_CHMAP);
5356         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5357         case 0:
5358         default:
5359                 numchan = 1;
5360                 break;
5361         case 1:
5362                 numchan = 2;
5363                 break;
5364         case 2:
5365                 numchan = 4;
5366                 break;
5367         case 3:
5368                 numchan = 8;
5369                 break;
5370         case 4:
5371                 numchan = 3;
5372                 break;
5373         case 5:
5374                 numchan = 6;
5375                 break;
5376         case 6:
5377                 numchan = 10;
5378                 break;
5379         case 7:
5380                 numchan = 12;
5381                 break;
5382         case 8:
5383                 numchan = 16;
5384                 break;
5385         }
5386         rdev->mc.vram_width = numchan * chansize;
5387         /* Could aper size report 0 ? */
5388         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5389         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5390         /* size in MB on si */
5391         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5392         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5393         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5394         si_vram_gtt_location(rdev, &rdev->mc);
5395         radeon_update_bandwidth_info(rdev);
5396 
5397         return 0;
5398 }
5399 
5400 /*
5401  * GART
5402  * VMID 0 is the physical GPU addresses as used by the kernel.
5403  * VMIDs 1-15 are used for userspace clients and are handled
5404  * by the radeon vm/hsa code.
5405  */
5406 /**
5407  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5408  *
5409  * @rdev: radeon_device pointer
5410  *
5411  * Flush the TLB for the VMID 0 page table (CIK).
5412  */
5413 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5414 {
5415         /* flush hdp cache */
5416         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5417 
5418         /* bits 0-15 are the VM contexts0-15 */
5419         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5420 }
5421 
5422 /**
5423  * cik_pcie_gart_enable - gart enable
5424  *
5425  * @rdev: radeon_device pointer
5426  *
5427  * This sets up the TLBs, programs the page tables for VMID0,
5428  * sets up the hw for VMIDs 1-15 which are allocated on
5429  * demand, and sets up the global locations for the LDS, GDS,
5430  * and GPUVM for FSA64 clients (CIK).
5431  * Returns 0 for success, errors for failure.
5432  */
5433 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5434 {
5435         int r, i;
5436 
5437         if (rdev->gart.robj == NULL) {
5438                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5439                 return -EINVAL;
5440         }
5441         r = radeon_gart_table_vram_pin(rdev);
5442         if (r)
5443                 return r;
5444         /* Setup TLB control */
5445         WREG32(MC_VM_MX_L1_TLB_CNTL,
5446                (0xA << 7) |
5447                ENABLE_L1_TLB |
5448                ENABLE_L1_FRAGMENT_PROCESSING |
5449                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5450                ENABLE_ADVANCED_DRIVER_MODEL |
5451                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5452         /* Setup L2 cache */
5453         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5454                ENABLE_L2_FRAGMENT_PROCESSING |
5455                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5456                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5457                EFFECTIVE_L2_QUEUE_SIZE(7) |
5458                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5459         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5460         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5461                BANK_SELECT(4) |
5462                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5463         /* setup context0 */
5464         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5465         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5466         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5467         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5468                         (u32)(rdev->dummy_page.addr >> 12));
5469         WREG32(VM_CONTEXT0_CNTL2, 0);
5470         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5471                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5472 
5473         WREG32(0x15D4, 0);
5474         WREG32(0x15D8, 0);
5475         WREG32(0x15DC, 0);
5476 
5477         /* restore context1-15 */
5478         /* set vm size, must be a multiple of 4 */
5479         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5480         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5481         for (i = 1; i < 16; i++) {
5482                 if (i < 8)
5483                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5484                                rdev->vm_manager.saved_table_addr[i]);
5485                 else
5486                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5487                                rdev->vm_manager.saved_table_addr[i]);
5488         }
5489 
5490         /* enable context1-15 */
5491         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5492                (u32)(rdev->dummy_page.addr >> 12));
5493         WREG32(VM_CONTEXT1_CNTL2, 4);
5494         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5495                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5496                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5497                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5498                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5499                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5500                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5501                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5502                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5503                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5504                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5505                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5506                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5507                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5508 
5509         if (rdev->family == CHIP_KAVERI) {
5510                 u32 tmp = RREG32(CHUB_CONTROL);
5511                 tmp &= ~BYPASS_VM;
5512                 WREG32(CHUB_CONTROL, tmp);
5513         }
5514 
5515         /* XXX SH_MEM regs */
5516         /* where to put LDS, scratch, GPUVM in FSA64 space */
5517         mutex_lock(&rdev->srbm_mutex);
5518         for (i = 0; i < 16; i++) {
5519                 cik_srbm_select(rdev, 0, 0, 0, i);
5520                 /* CP and shaders */
5521                 WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5522                 WREG32(SH_MEM_APE1_BASE, 1);
5523                 WREG32(SH_MEM_APE1_LIMIT, 0);
5524                 WREG32(SH_MEM_BASES, 0);
5525                 /* SDMA GFX */
5526                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5527                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5528                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5529                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5530                 /* XXX SDMA RLC - todo */
5531         }
5532         cik_srbm_select(rdev, 0, 0, 0, 0);
5533         mutex_unlock(&rdev->srbm_mutex);
5534 
5535         cik_pcie_gart_tlb_flush(rdev);
5536         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5537                  (unsigned)(rdev->mc.gtt_size >> 20),
5538                  (unsigned long long)rdev->gart.table_addr);
5539         rdev->gart.ready = true;
5540         return 0;
5541 }
5542 
5543 /**
5544  * cik_pcie_gart_disable - gart disable
5545  *
5546  * @rdev: radeon_device pointer
5547  *
5548  * This disables all VM page table (CIK).
5549  */
5550 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5551 {
5552         unsigned i;
5553 
5554         for (i = 1; i < 16; ++i) {
5555                 uint32_t reg;
5556                 if (i < 8)
5557                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5558                 else
5559                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5560                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5561         }
5562 
5563         /* Disable all tables */
5564         WREG32(VM_CONTEXT0_CNTL, 0);
5565         WREG32(VM_CONTEXT1_CNTL, 0);
5566         /* Setup TLB control */
5567         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5568                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5569         /* Setup L2 cache */
5570         WREG32(VM_L2_CNTL,
5571                ENABLE_L2_FRAGMENT_PROCESSING |
5572                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5573                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5574                EFFECTIVE_L2_QUEUE_SIZE(7) |
5575                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5576         WREG32(VM_L2_CNTL2, 0);
5577         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5578                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5579         radeon_gart_table_vram_unpin(rdev);
5580 }
5581 
5582 /**
5583  * cik_pcie_gart_fini - vm fini callback
5584  *
5585  * @rdev: radeon_device pointer
5586  *
5587  * Tears down the driver GART/VM setup (CIK).
5588  */
5589 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5590 {
5591         cik_pcie_gart_disable(rdev);
5592         radeon_gart_table_vram_free(rdev);
5593         radeon_gart_fini(rdev);
5594 }
5595 
5596 /* vm parser */
5597 /**
5598  * cik_ib_parse - vm ib_parse callback
5599  *
5600  * @rdev: radeon_device pointer
5601  * @ib: indirect buffer pointer
5602  *
5603  * CIK uses hw IB checking so this is a nop (CIK).
5604  */
5605 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5606 {
5607         return 0;
5608 }
5609 
5610 /*
5611  * vm
5612  * VMID 0 is the physical GPU addresses as used by the kernel.
5613  * VMIDs 1-15 are used for userspace clients and are handled
5614  * by the radeon vm/hsa code.
5615  */
5616 /**
5617  * cik_vm_init - cik vm init callback
5618  *
5619  * @rdev: radeon_device pointer
5620  *
5621  * Inits cik specific vm parameters (number of VMs, base of vram for
5622  * VMIDs 1-15) (CIK).
5623  * Returns 0 for success.
5624  */
5625 int cik_vm_init(struct radeon_device *rdev)
5626 {
5627         /*
5628          * number of VMs
5629          * VMID 0 is reserved for System
5630          * radeon graphics/compute will use VMIDs 1-15
5631          */
5632         rdev->vm_manager.nvm = 16;
5633         /* base offset of vram pages */
5634         if (rdev->flags & RADEON_IS_IGP) {
5635                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5636                 tmp <<= 22;
5637                 rdev->vm_manager.vram_base_offset = tmp;
5638         } else
5639                 rdev->vm_manager.vram_base_offset = 0;
5640 
5641         return 0;
5642 }
5643 
5644 /**
5645  * cik_vm_fini - cik vm fini callback
5646  *
5647  * @rdev: radeon_device pointer
5648  *
5649  * Tear down any asic specific VM setup (CIK).
5650  */
5651 void cik_vm_fini(struct radeon_device *rdev)
5652 {
5653 }
5654 
5655 /**
5656  * cik_vm_decode_fault - print human readable fault info
5657  *
5658  * @rdev: radeon_device pointer
5659  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5660  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5661  *
5662  * Print human readable fault information (CIK).
5663  */
5664 static void cik_vm_decode_fault(struct radeon_device *rdev,
5665                                 u32 status, u32 addr, u32 mc_client)
5666 {
5667         u32 mc_id;
5668         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5669         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5670         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5671                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5672 
5673         if (rdev->family == CHIP_HAWAII)
5674                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5675         else
5676                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5677 
5678         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5679                protections, vmid, addr,
5680                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5681                block, mc_client, mc_id);
5682 }
5683 
5684 /**
5685  * cik_vm_flush - cik vm flush using the CP
5686  *
5687  * @rdev: radeon_device pointer
5688  *
5689  * Update the page table base and flush the VM TLB
5690  * using the CP (CIK).
5691  */
5692 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5693                   unsigned vm_id, uint64_t pd_addr)
5694 {
5695         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5696 
5697         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5698         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5699                                  WRITE_DATA_DST_SEL(0)));
5700         if (vm_id < 8) {
5701                 radeon_ring_write(ring,
5702                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5703         } else {
5704                 radeon_ring_write(ring,
5705                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5706         }
5707         radeon_ring_write(ring, 0);
5708         radeon_ring_write(ring, pd_addr >> 12);
5709 
5710         /* update SH_MEM_* regs */
5711         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5712         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5713                                  WRITE_DATA_DST_SEL(0)));
5714         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5715         radeon_ring_write(ring, 0);
5716         radeon_ring_write(ring, VMID(vm_id));
5717 
5718         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5719         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5720                                  WRITE_DATA_DST_SEL(0)));
5721         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5722         radeon_ring_write(ring, 0);
5723 
5724         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5725         radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5726         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5727         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5728 
5729         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5730         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5731                                  WRITE_DATA_DST_SEL(0)));
5732         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5733         radeon_ring_write(ring, 0);
5734         radeon_ring_write(ring, VMID(0));
5735 
5736         /* HDP flush */
5737         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5738 
5739         /* bits 0-15 are the VM contexts0-15 */
5740         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5741         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5742                                  WRITE_DATA_DST_SEL(0)));
5743         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5744         radeon_ring_write(ring, 0);
5745         radeon_ring_write(ring, 1 << vm_id);
5746 
5747         /* wait for the invalidate to complete */
5748         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5749         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5750                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
5751                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5752         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5753         radeon_ring_write(ring, 0);
5754         radeon_ring_write(ring, 0); /* ref */
5755         radeon_ring_write(ring, 0); /* mask */
5756         radeon_ring_write(ring, 0x20); /* poll interval */
5757 
5758         /* compute doesn't have PFP */
5759         if (usepfp) {
5760                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5761                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5762                 radeon_ring_write(ring, 0x0);
5763         }
5764 }
5765 
5766 /*
5767  * RLC
5768  * The RLC is a multi-purpose microengine that handles a
5769  * variety of functions, the most important of which is
5770  * the interrupt controller.
5771  */
5772 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5773                                           bool enable)
5774 {
5775         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5776 
5777         if (enable)
5778                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5779         else
5780                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5781         WREG32(CP_INT_CNTL_RING0, tmp);
5782 }
5783 
5784 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5785 {
5786         u32 tmp;
5787 
5788         tmp = RREG32(RLC_LB_CNTL);
5789         if (enable)
5790                 tmp |= LOAD_BALANCE_ENABLE;
5791         else
5792                 tmp &= ~LOAD_BALANCE_ENABLE;
5793         WREG32(RLC_LB_CNTL, tmp);
5794 }
5795 
5796 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5797 {
5798         u32 i, j, k;
5799         u32 mask;
5800 
5801         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5802                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5803                         cik_select_se_sh(rdev, i, j);
5804                         for (k = 0; k < rdev->usec_timeout; k++) {
5805                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5806                                         break;
5807                                 udelay(1);
5808                         }
5809                 }
5810         }
5811         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5812 
5813         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5814         for (k = 0; k < rdev->usec_timeout; k++) {
5815                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5816                         break;
5817                 udelay(1);
5818         }
5819 }
5820 
5821 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5822 {
5823         u32 tmp;
5824 
5825         tmp = RREG32(RLC_CNTL);
5826         if (tmp != rlc)
5827                 WREG32(RLC_CNTL, rlc);
5828 }
5829 
5830 static u32 cik_halt_rlc(struct radeon_device *rdev)
5831 {
5832         u32 data, orig;
5833 
5834         orig = data = RREG32(RLC_CNTL);
5835 
5836         if (data & RLC_ENABLE) {
5837                 u32 i;
5838 
5839                 data &= ~RLC_ENABLE;
5840                 WREG32(RLC_CNTL, data);
5841 
5842                 for (i = 0; i < rdev->usec_timeout; i++) {
5843                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5844                                 break;
5845                         udelay(1);
5846                 }
5847 
5848                 cik_wait_for_rlc_serdes(rdev);
5849         }
5850 
5851         return orig;
5852 }
5853 
5854 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5855 {
5856         u32 tmp, i, mask;
5857 
5858         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5859         WREG32(RLC_GPR_REG2, tmp);
5860 
5861         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5862         for (i = 0; i < rdev->usec_timeout; i++) {
5863                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5864                         break;
5865                 udelay(1);
5866         }
5867 
5868         for (i = 0; i < rdev->usec_timeout; i++) {
5869                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5870                         break;
5871                 udelay(1);
5872         }
5873 }
5874 
5875 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5876 {
5877         u32 tmp;
5878 
5879         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5880         WREG32(RLC_GPR_REG2, tmp);
5881 }
5882 
5883 /**
5884  * cik_rlc_stop - stop the RLC ME
5885  *
5886  * @rdev: radeon_device pointer
5887  *
5888  * Halt the RLC ME (MicroEngine) (CIK).
5889  */
5890 static void cik_rlc_stop(struct radeon_device *rdev)
5891 {
5892         WREG32(RLC_CNTL, 0);
5893 
5894         cik_enable_gui_idle_interrupt(rdev, false);
5895 
5896         cik_wait_for_rlc_serdes(rdev);
5897 }
5898 
5899 /**
5900  * cik_rlc_start - start the RLC ME
5901  *
5902  * @rdev: radeon_device pointer
5903  *
5904  * Unhalt the RLC ME (MicroEngine) (CIK).
5905  */
5906 static void cik_rlc_start(struct radeon_device *rdev)
5907 {
5908         WREG32(RLC_CNTL, RLC_ENABLE);
5909 
5910         cik_enable_gui_idle_interrupt(rdev, true);
5911 
5912         udelay(50);
5913 }
5914 
5915 /**
5916  * cik_rlc_resume - setup the RLC hw
5917  *
5918  * @rdev: radeon_device pointer
5919  *
5920  * Initialize the RLC registers, load the ucode,
5921  * and start the RLC (CIK).
5922  * Returns 0 for success, -EINVAL if the ucode is not available.
5923  */
5924 static int cik_rlc_resume(struct radeon_device *rdev)
5925 {
5926         u32 i, size, tmp;
5927 
5928         if (!rdev->rlc_fw)
5929                 return -EINVAL;
5930 
5931         cik_rlc_stop(rdev);
5932 
5933         /* disable CG */
5934         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5935         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5936 
5937         si_rlc_reset(rdev);
5938 
5939         cik_init_pg(rdev);
5940 
5941         cik_init_cg(rdev);
5942 
5943         WREG32(RLC_LB_CNTR_INIT, 0);
5944         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5945 
5946         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5947         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5948         WREG32(RLC_LB_PARAMS, 0x00600408);
5949         WREG32(RLC_LB_CNTL, 0x80000004);
5950 
5951         WREG32(RLC_MC_CNTL, 0);
5952         WREG32(RLC_UCODE_CNTL, 0);
5953 
5954         if (rdev->new_fw) {
5955                 const struct rlc_firmware_header_v1_0 *hdr =
5956                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5957                 const __le32 *fw_data = (const __le32 *)
5958                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5959 
5960                 radeon_ucode_print_rlc_hdr(&hdr->header);
5961 
5962                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5963                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5964                 for (i = 0; i < size; i++)
5965                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5966                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5967         } else {
5968                 const __be32 *fw_data;
5969 
5970                 switch (rdev->family) {
5971                 case CHIP_BONAIRE:
5972                 case CHIP_HAWAII:
5973                 default:
5974                         size = BONAIRE_RLC_UCODE_SIZE;
5975                         break;
5976                 case CHIP_KAVERI:
5977                         size = KV_RLC_UCODE_SIZE;
5978                         break;
5979                 case CHIP_KABINI:
5980                         size = KB_RLC_UCODE_SIZE;
5981                         break;
5982                 case CHIP_MULLINS:
5983                         size = ML_RLC_UCODE_SIZE;
5984                         break;
5985                 }
5986 
5987                 fw_data = (const __be32 *)rdev->rlc_fw->data;
5988                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5989                 for (i = 0; i < size; i++)
5990                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5991                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5992         }
5993 
5994         /* XXX - find out what chips support lbpw */
5995         cik_enable_lbpw(rdev, false);
5996 
5997         if (rdev->family == CHIP_BONAIRE)
5998                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5999 
6000         cik_rlc_start(rdev);
6001 
6002         return 0;
6003 }
6004 
6005 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6006 {
6007         u32 data, orig, tmp, tmp2;
6008 
6009         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6010 
6011         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6012                 cik_enable_gui_idle_interrupt(rdev, true);
6013 
6014                 tmp = cik_halt_rlc(rdev);
6015 
6016                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6017                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6018                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6019                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6020                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6021 
6022                 cik_update_rlc(rdev, tmp);
6023 
6024                 data |= CGCG_EN | CGLS_EN;
6025         } else {
6026                 cik_enable_gui_idle_interrupt(rdev, false);
6027 
6028                 RREG32(CB_CGTT_SCLK_CTRL);
6029                 RREG32(CB_CGTT_SCLK_CTRL);
6030                 RREG32(CB_CGTT_SCLK_CTRL);
6031                 RREG32(CB_CGTT_SCLK_CTRL);
6032 
6033                 data &= ~(CGCG_EN | CGLS_EN);
6034         }
6035 
6036         if (orig != data)
6037                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6038 
6039 }
6040 
6041 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6042 {
6043         u32 data, orig, tmp = 0;
6044 
6045         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6046                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6047                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6048                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6049                                 data |= CP_MEM_LS_EN;
6050                                 if (orig != data)
6051                                         WREG32(CP_MEM_SLP_CNTL, data);
6052                         }
6053                 }
6054 
6055                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6056                 data |= 0x00000001;
6057                 data &= 0xfffffffd;
6058                 if (orig != data)
6059                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6060 
6061                 tmp = cik_halt_rlc(rdev);
6062 
6063                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6064                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6065                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6066                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6067                 WREG32(RLC_SERDES_WR_CTRL, data);
6068 
6069                 cik_update_rlc(rdev, tmp);
6070 
6071                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6072                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6073                         data &= ~SM_MODE_MASK;
6074                         data |= SM_MODE(0x2);
6075                         data |= SM_MODE_ENABLE;
6076                         data &= ~CGTS_OVERRIDE;
6077                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6078                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6079                                 data &= ~CGTS_LS_OVERRIDE;
6080                         data &= ~ON_MONITOR_ADD_MASK;
6081                         data |= ON_MONITOR_ADD_EN;
6082                         data |= ON_MONITOR_ADD(0x96);
6083                         if (orig != data)
6084                                 WREG32(CGTS_SM_CTRL_REG, data);
6085                 }
6086         } else {
6087                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6088                 data |= 0x00000003;
6089                 if (orig != data)
6090                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6091 
6092                 data = RREG32(RLC_MEM_SLP_CNTL);
6093                 if (data & RLC_MEM_LS_EN) {
6094                         data &= ~RLC_MEM_LS_EN;
6095                         WREG32(RLC_MEM_SLP_CNTL, data);
6096                 }
6097 
6098                 data = RREG32(CP_MEM_SLP_CNTL);
6099                 if (data & CP_MEM_LS_EN) {
6100                         data &= ~CP_MEM_LS_EN;
6101                         WREG32(CP_MEM_SLP_CNTL, data);
6102                 }
6103 
6104                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6105                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6106                 if (orig != data)
6107                         WREG32(CGTS_SM_CTRL_REG, data);
6108 
6109                 tmp = cik_halt_rlc(rdev);
6110 
6111                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6112                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6113                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6114                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6115                 WREG32(RLC_SERDES_WR_CTRL, data);
6116 
6117                 cik_update_rlc(rdev, tmp);
6118         }
6119 }
6120 
6121 static const u32 mc_cg_registers[] =
6122 {
6123         MC_HUB_MISC_HUB_CG,
6124         MC_HUB_MISC_SIP_CG,
6125         MC_HUB_MISC_VM_CG,
6126         MC_XPB_CLK_GAT,
6127         ATC_MISC_CG,
6128         MC_CITF_MISC_WR_CG,
6129         MC_CITF_MISC_RD_CG,
6130         MC_CITF_MISC_VM_CG,
6131         VM_L2_CG,
6132 };
6133 
6134 static void cik_enable_mc_ls(struct radeon_device *rdev,
6135                              bool enable)
6136 {
6137         int i;
6138         u32 orig, data;
6139 
6140         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6141                 orig = data = RREG32(mc_cg_registers[i]);
6142                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6143                         data |= MC_LS_ENABLE;
6144                 else
6145                         data &= ~MC_LS_ENABLE;
6146                 if (data != orig)
6147                         WREG32(mc_cg_registers[i], data);
6148         }
6149 }
6150 
6151 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6152                                bool enable)
6153 {
6154         int i;
6155         u32 orig, data;
6156 
6157         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6158                 orig = data = RREG32(mc_cg_registers[i]);
6159                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6160                         data |= MC_CG_ENABLE;
6161                 else
6162                         data &= ~MC_CG_ENABLE;
6163                 if (data != orig)
6164                         WREG32(mc_cg_registers[i], data);
6165         }
6166 }
6167 
6168 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6169                                  bool enable)
6170 {
6171         u32 orig, data;
6172 
6173         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6174                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6175                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6176         } else {
6177                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6178                 data |= 0xff000000;
6179                 if (data != orig)
6180                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6181 
6182                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6183                 data |= 0xff000000;
6184                 if (data != orig)
6185                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6186         }
6187 }
6188 
6189 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6190                                  bool enable)
6191 {
6192         u32 orig, data;
6193 
6194         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6195                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6196                 data |= 0x100;
6197                 if (orig != data)
6198                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6199 
6200                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6201                 data |= 0x100;
6202                 if (orig != data)
6203                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6204         } else {
6205                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6206                 data &= ~0x100;
6207                 if (orig != data)
6208                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6209 
6210                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6211                 data &= ~0x100;
6212                 if (orig != data)
6213                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6214         }
6215 }
6216 
6217 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6218                                 bool enable)
6219 {
6220         u32 orig, data;
6221 
6222         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6223                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6224                 data = 0xfff;
6225                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6226 
6227                 orig = data = RREG32(UVD_CGC_CTRL);
6228                 data |= DCM;
6229                 if (orig != data)
6230                         WREG32(UVD_CGC_CTRL, data);
6231         } else {
6232                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6233                 data &= ~0xfff;
6234                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6235 
6236                 orig = data = RREG32(UVD_CGC_CTRL);
6237                 data &= ~DCM;
6238                 if (orig != data)
6239                         WREG32(UVD_CGC_CTRL, data);
6240         }
6241 }
6242 
6243 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6244                                bool enable)
6245 {
6246         u32 orig, data;
6247 
6248         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6249 
6250         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6251                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6252                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6253         else
6254                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6255                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6256 
6257         if (orig != data)
6258                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6259 }
6260 
6261 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6262                                 bool enable)
6263 {
6264         u32 orig, data;
6265 
6266         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6267 
6268         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6269                 data &= ~CLOCK_GATING_DIS;
6270         else
6271                 data |= CLOCK_GATING_DIS;
6272 
6273         if (orig != data)
6274                 WREG32(HDP_HOST_PATH_CNTL, data);
6275 }
6276 
6277 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6278                               bool enable)
6279 {
6280         u32 orig, data;
6281 
6282         orig = data = RREG32(HDP_MEM_POWER_LS);
6283 
6284         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6285                 data |= HDP_LS_ENABLE;
6286         else
6287                 data &= ~HDP_LS_ENABLE;
6288 
6289         if (orig != data)
6290                 WREG32(HDP_MEM_POWER_LS, data);
6291 }
6292 
6293 void cik_update_cg(struct radeon_device *rdev,
6294                    u32 block, bool enable)
6295 {
6296 
6297         if (block & RADEON_CG_BLOCK_GFX) {
6298                 cik_enable_gui_idle_interrupt(rdev, false);
6299                 /* order matters! */
6300                 if (enable) {
6301                         cik_enable_mgcg(rdev, true);
6302                         cik_enable_cgcg(rdev, true);
6303                 } else {
6304                         cik_enable_cgcg(rdev, false);
6305                         cik_enable_mgcg(rdev, false);
6306                 }
6307                 cik_enable_gui_idle_interrupt(rdev, true);
6308         }
6309 
6310         if (block & RADEON_CG_BLOCK_MC) {
6311                 if (!(rdev->flags & RADEON_IS_IGP)) {
6312                         cik_enable_mc_mgcg(rdev, enable);
6313                         cik_enable_mc_ls(rdev, enable);
6314                 }
6315         }
6316 
6317         if (block & RADEON_CG_BLOCK_SDMA) {
6318                 cik_enable_sdma_mgcg(rdev, enable);
6319                 cik_enable_sdma_mgls(rdev, enable);
6320         }
6321 
6322         if (block & RADEON_CG_BLOCK_BIF) {
6323                 cik_enable_bif_mgls(rdev, enable);
6324         }
6325 
6326         if (block & RADEON_CG_BLOCK_UVD) {
6327                 if (rdev->has_uvd)
6328                         cik_enable_uvd_mgcg(rdev, enable);
6329         }
6330 
6331         if (block & RADEON_CG_BLOCK_HDP) {
6332                 cik_enable_hdp_mgcg(rdev, enable);
6333                 cik_enable_hdp_ls(rdev, enable);
6334         }
6335 
6336         if (block & RADEON_CG_BLOCK_VCE) {
6337                 vce_v2_0_enable_mgcg(rdev, enable);
6338         }
6339 }
6340 
6341 static void cik_init_cg(struct radeon_device *rdev)
6342 {
6343 
6344         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6345 
6346         if (rdev->has_uvd)
6347                 si_init_uvd_internal_cg(rdev);
6348 
6349         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6350                              RADEON_CG_BLOCK_SDMA |
6351                              RADEON_CG_BLOCK_BIF |
6352                              RADEON_CG_BLOCK_UVD |
6353                              RADEON_CG_BLOCK_HDP), true);
6354 }
6355 
6356 static void cik_fini_cg(struct radeon_device *rdev)
6357 {
6358         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6359                              RADEON_CG_BLOCK_SDMA |
6360                              RADEON_CG_BLOCK_BIF |
6361                              RADEON_CG_BLOCK_UVD |
6362                              RADEON_CG_BLOCK_HDP), false);
6363 
6364         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6365 }
6366 
6367 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6368                                           bool enable)
6369 {
6370         u32 data, orig;
6371 
6372         orig = data = RREG32(RLC_PG_CNTL);
6373         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6374                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6375         else
6376                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6377         if (orig != data)
6378                 WREG32(RLC_PG_CNTL, data);
6379 }
6380 
6381 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6382                                           bool enable)
6383 {
6384         u32 data, orig;
6385 
6386         orig = data = RREG32(RLC_PG_CNTL);
6387         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6388                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6389         else
6390                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6391         if (orig != data)
6392                 WREG32(RLC_PG_CNTL, data);
6393 }
6394 
6395 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6396 {
6397         u32 data, orig;
6398 
6399         orig = data = RREG32(RLC_PG_CNTL);
6400         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6401                 data &= ~DISABLE_CP_PG;
6402         else
6403                 data |= DISABLE_CP_PG;
6404         if (orig != data)
6405                 WREG32(RLC_PG_CNTL, data);
6406 }
6407 
6408 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6409 {
6410         u32 data, orig;
6411 
6412         orig = data = RREG32(RLC_PG_CNTL);
6413         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6414                 data &= ~DISABLE_GDS_PG;
6415         else
6416                 data |= DISABLE_GDS_PG;
6417         if (orig != data)
6418                 WREG32(RLC_PG_CNTL, data);
6419 }
6420 
6421 #define CP_ME_TABLE_SIZE    96
6422 #define CP_ME_TABLE_OFFSET  2048
6423 #define CP_MEC_TABLE_OFFSET 4096
6424 
6425 void cik_init_cp_pg_table(struct radeon_device *rdev)
6426 {
6427         volatile u32 *dst_ptr;
6428         int me, i, max_me = 4;
6429         u32 bo_offset = 0;
6430         u32 table_offset, table_size;
6431 
6432         if (rdev->family == CHIP_KAVERI)
6433                 max_me = 5;
6434 
6435         if (rdev->rlc.cp_table_ptr == NULL)
6436                 return;
6437 
6438         /* write the cp table buffer */
6439         dst_ptr = rdev->rlc.cp_table_ptr;
6440         for (me = 0; me < max_me; me++) {
6441                 if (rdev->new_fw) {
6442                         const __le32 *fw_data;
6443                         const struct gfx_firmware_header_v1_0 *hdr;
6444 
6445                         if (me == 0) {
6446                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6447                                 fw_data = (const __le32 *)
6448                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6449                                 table_offset = le32_to_cpu(hdr->jt_offset);
6450                                 table_size = le32_to_cpu(hdr->jt_size);
6451                         } else if (me == 1) {
6452                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6453                                 fw_data = (const __le32 *)
6454                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6455                                 table_offset = le32_to_cpu(hdr->jt_offset);
6456                                 table_size = le32_to_cpu(hdr->jt_size);
6457                         } else if (me == 2) {
6458                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6459                                 fw_data = (const __le32 *)
6460                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6461                                 table_offset = le32_to_cpu(hdr->jt_offset);
6462                                 table_size = le32_to_cpu(hdr->jt_size);
6463                         } else if (me == 3) {
6464                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6465                                 fw_data = (const __le32 *)
6466                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6467                                 table_offset = le32_to_cpu(hdr->jt_offset);
6468                                 table_size = le32_to_cpu(hdr->jt_size);
6469                         } else {
6470                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6471                                 fw_data = (const __le32 *)
6472                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6473                                 table_offset = le32_to_cpu(hdr->jt_offset);
6474                                 table_size = le32_to_cpu(hdr->jt_size);
6475                         }
6476 
6477                         for (i = 0; i < table_size; i ++) {
6478                                 dst_ptr[bo_offset + i] =
6479                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6480                         }
6481                         bo_offset += table_size;
6482                 } else {
6483                         const __be32 *fw_data;
6484                         table_size = CP_ME_TABLE_SIZE;
6485 
6486                         if (me == 0) {
6487                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6488                                 table_offset = CP_ME_TABLE_OFFSET;
6489                         } else if (me == 1) {
6490                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6491                                 table_offset = CP_ME_TABLE_OFFSET;
6492                         } else if (me == 2) {
6493                                 fw_data = (const __be32 *)rdev->me_fw->data;
6494                                 table_offset = CP_ME_TABLE_OFFSET;
6495                         } else {
6496                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6497                                 table_offset = CP_MEC_TABLE_OFFSET;
6498                         }
6499 
6500                         for (i = 0; i < table_size; i ++) {
6501                                 dst_ptr[bo_offset + i] =
6502                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6503                         }
6504                         bo_offset += table_size;
6505                 }
6506         }
6507 }
6508 
6509 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6510                                 bool enable)
6511 {
6512         u32 data, orig;
6513 
6514         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6515                 orig = data = RREG32(RLC_PG_CNTL);
6516                 data |= GFX_PG_ENABLE;
6517                 if (orig != data)
6518                         WREG32(RLC_PG_CNTL, data);
6519 
6520                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6521                 data |= AUTO_PG_EN;
6522                 if (orig != data)
6523                         WREG32(RLC_AUTO_PG_CTRL, data);
6524         } else {
6525                 orig = data = RREG32(RLC_PG_CNTL);
6526                 data &= ~GFX_PG_ENABLE;
6527                 if (orig != data)
6528                         WREG32(RLC_PG_CNTL, data);
6529 
6530                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6531                 data &= ~AUTO_PG_EN;
6532                 if (orig != data)
6533                         WREG32(RLC_AUTO_PG_CTRL, data);
6534 
6535                 data = RREG32(DB_RENDER_CONTROL);
6536         }
6537 }
6538 
6539 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6540 {
6541         u32 mask = 0, tmp, tmp1;
6542         int i;
6543 
6544         cik_select_se_sh(rdev, se, sh);
6545         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6546         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6547         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6548 
6549         tmp &= 0xffff0000;
6550 
6551         tmp |= tmp1;
6552         tmp >>= 16;
6553 
6554         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6555                 mask <<= 1;
6556                 mask |= 1;
6557         }
6558 
6559         return (~tmp) & mask;
6560 }
6561 
6562 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6563 {
6564         u32 i, j, k, active_cu_number = 0;
6565         u32 mask, counter, cu_bitmap;
6566         u32 tmp = 0;
6567 
6568         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6569                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6570                         mask = 1;
6571                         cu_bitmap = 0;
6572                         counter = 0;
6573                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6574                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6575                                         if (counter < 2)
6576                                                 cu_bitmap |= mask;
6577                                         counter ++;
6578                                 }
6579                                 mask <<= 1;
6580                         }
6581 
6582                         active_cu_number += counter;
6583                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6584                 }
6585         }
6586 
6587         WREG32(RLC_PG_AO_CU_MASK, tmp);
6588 
6589         tmp = RREG32(RLC_MAX_PG_CU);
6590         tmp &= ~MAX_PU_CU_MASK;
6591         tmp |= MAX_PU_CU(active_cu_number);
6592         WREG32(RLC_MAX_PG_CU, tmp);
6593 }
6594 
6595 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6596                                        bool enable)
6597 {
6598         u32 data, orig;
6599 
6600         orig = data = RREG32(RLC_PG_CNTL);
6601         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6602                 data |= STATIC_PER_CU_PG_ENABLE;
6603         else
6604                 data &= ~STATIC_PER_CU_PG_ENABLE;
6605         if (orig != data)
6606                 WREG32(RLC_PG_CNTL, data);
6607 }
6608 
6609 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6610                                         bool enable)
6611 {
6612         u32 data, orig;
6613 
6614         orig = data = RREG32(RLC_PG_CNTL);
6615         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6616                 data |= DYN_PER_CU_PG_ENABLE;
6617         else
6618                 data &= ~DYN_PER_CU_PG_ENABLE;
6619         if (orig != data)
6620                 WREG32(RLC_PG_CNTL, data);
6621 }
6622 
6623 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6624 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6625 
6626 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6627 {
6628         u32 data, orig;
6629         u32 i;
6630 
6631         if (rdev->rlc.cs_data) {
6632                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6633                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6634                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6635                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6636         } else {
6637                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6638                 for (i = 0; i < 3; i++)
6639                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6640         }
6641         if (rdev->rlc.reg_list) {
6642                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6643                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6644                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6645         }
6646 
6647         orig = data = RREG32(RLC_PG_CNTL);
6648         data |= GFX_PG_SRC;
6649         if (orig != data)
6650                 WREG32(RLC_PG_CNTL, data);
6651 
6652         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6653         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6654 
6655         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6656         data &= ~IDLE_POLL_COUNT_MASK;
6657         data |= IDLE_POLL_COUNT(0x60);
6658         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6659 
6660         data = 0x10101010;
6661         WREG32(RLC_PG_DELAY, data);
6662 
6663         data = RREG32(RLC_PG_DELAY_2);
6664         data &= ~0xff;
6665         data |= 0x3;
6666         WREG32(RLC_PG_DELAY_2, data);
6667 
6668         data = RREG32(RLC_AUTO_PG_CTRL);
6669         data &= ~GRBM_REG_SGIT_MASK;
6670         data |= GRBM_REG_SGIT(0x700);
6671         WREG32(RLC_AUTO_PG_CTRL, data);
6672 
6673 }
6674 
6675 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6676 {
6677         cik_enable_gfx_cgpg(rdev, enable);
6678         cik_enable_gfx_static_mgpg(rdev, enable);
6679         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6680 }
6681 
6682 u32 cik_get_csb_size(struct radeon_device *rdev)
6683 {
6684         u32 count = 0;
6685         const struct cs_section_def *sect = NULL;
6686         const struct cs_extent_def *ext = NULL;
6687 
6688         if (rdev->rlc.cs_data == NULL)
6689                 return 0;
6690 
6691         /* begin clear state */
6692         count += 2;
6693         /* context control state */
6694         count += 3;
6695 
6696         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6697                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6698                         if (sect->id == SECT_CONTEXT)
6699                                 count += 2 + ext->reg_count;
6700                         else
6701                                 return 0;
6702                 }
6703         }
6704         /* pa_sc_raster_config/pa_sc_raster_config1 */
6705         count += 4;
6706         /* end clear state */
6707         count += 2;
6708         /* clear state */
6709         count += 2;
6710 
6711         return count;
6712 }
6713 
6714 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6715 {
6716         u32 count = 0, i;
6717         const struct cs_section_def *sect = NULL;
6718         const struct cs_extent_def *ext = NULL;
6719 
6720         if (rdev->rlc.cs_data == NULL)
6721                 return;
6722         if (buffer == NULL)
6723                 return;
6724 
6725         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6726         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6727 
6728         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6729         buffer[count++] = cpu_to_le32(0x80000000);
6730         buffer[count++] = cpu_to_le32(0x80000000);
6731 
6732         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6733                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6734                         if (sect->id == SECT_CONTEXT) {
6735                                 buffer[count++] =
6736                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6737                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6738                                 for (i = 0; i < ext->reg_count; i++)
6739                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6740                         } else {
6741                                 return;
6742                         }
6743                 }
6744         }
6745 
6746         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6747         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6748         switch (rdev->family) {
6749         case CHIP_BONAIRE:
6750                 buffer[count++] = cpu_to_le32(0x16000012);
6751                 buffer[count++] = cpu_to_le32(0x00000000);
6752                 break;
6753         case CHIP_KAVERI:
6754                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6755                 buffer[count++] = cpu_to_le32(0x00000000);
6756                 break;
6757         case CHIP_KABINI:
6758         case CHIP_MULLINS:
6759                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6760                 buffer[count++] = cpu_to_le32(0x00000000);
6761                 break;
6762         case CHIP_HAWAII:
6763                 buffer[count++] = cpu_to_le32(0x3a00161a);
6764                 buffer[count++] = cpu_to_le32(0x0000002e);
6765                 break;
6766         default:
6767                 buffer[count++] = cpu_to_le32(0x00000000);
6768                 buffer[count++] = cpu_to_le32(0x00000000);
6769                 break;
6770         }
6771 
6772         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6773         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6774 
6775         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6776         buffer[count++] = cpu_to_le32(0);
6777 }
6778 
6779 static void cik_init_pg(struct radeon_device *rdev)
6780 {
6781         if (rdev->pg_flags) {
6782                 cik_enable_sck_slowdown_on_pu(rdev, true);
6783                 cik_enable_sck_slowdown_on_pd(rdev, true);
6784                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6785                         cik_init_gfx_cgpg(rdev);
6786                         cik_enable_cp_pg(rdev, true);
6787                         cik_enable_gds_pg(rdev, true);
6788                 }
6789                 cik_init_ao_cu_mask(rdev);
6790                 cik_update_gfx_pg(rdev, true);
6791         }
6792 }
6793 
6794 static void cik_fini_pg(struct radeon_device *rdev)
6795 {
6796         if (rdev->pg_flags) {
6797                 cik_update_gfx_pg(rdev, false);
6798                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6799                         cik_enable_cp_pg(rdev, false);
6800                         cik_enable_gds_pg(rdev, false);
6801                 }
6802         }
6803 }
6804 
6805 /*
6806  * Interrupts
6807  * Starting with r6xx, interrupts are handled via a ring buffer.
6808  * Ring buffers are areas of GPU accessible memory that the GPU
6809  * writes interrupt vectors into and the host reads vectors out of.
6810  * There is a rptr (read pointer) that determines where the
6811  * host is currently reading, and a wptr (write pointer)
6812  * which determines where the GPU has written.  When the
6813  * pointers are equal, the ring is idle.  When the GPU
6814  * writes vectors to the ring buffer, it increments the
6815  * wptr.  When there is an interrupt, the host then starts
6816  * fetching commands and processing them until the pointers are
6817  * equal again at which point it updates the rptr.
6818  */
6819 
6820 /**
6821  * cik_enable_interrupts - Enable the interrupt ring buffer
6822  *
6823  * @rdev: radeon_device pointer
6824  *
6825  * Enable the interrupt ring buffer (CIK).
6826  */
6827 static void cik_enable_interrupts(struct radeon_device *rdev)
6828 {
6829         u32 ih_cntl = RREG32(IH_CNTL);
6830         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6831 
6832         ih_cntl |= ENABLE_INTR;
6833         ih_rb_cntl |= IH_RB_ENABLE;
6834         WREG32(IH_CNTL, ih_cntl);
6835         WREG32(IH_RB_CNTL, ih_rb_cntl);
6836         rdev->ih.enabled = true;
6837 }
6838 
6839 /**
6840  * cik_disable_interrupts - Disable the interrupt ring buffer
6841  *
6842  * @rdev: radeon_device pointer
6843  *
6844  * Disable the interrupt ring buffer (CIK).
6845  */
6846 static void cik_disable_interrupts(struct radeon_device *rdev)
6847 {
6848         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6849         u32 ih_cntl = RREG32(IH_CNTL);
6850 
6851         ih_rb_cntl &= ~IH_RB_ENABLE;
6852         ih_cntl &= ~ENABLE_INTR;
6853         WREG32(IH_RB_CNTL, ih_rb_cntl);
6854         WREG32(IH_CNTL, ih_cntl);
6855         /* set rptr, wptr to 0 */
6856         WREG32(IH_RB_RPTR, 0);
6857         WREG32(IH_RB_WPTR, 0);
6858         rdev->ih.enabled = false;
6859         rdev->ih.rptr = 0;
6860 }
6861 
6862 /**
6863  * cik_disable_interrupt_state - Disable all interrupt sources
6864  *
6865  * @rdev: radeon_device pointer
6866  *
6867  * Clear all interrupt enable bits used by the driver (CIK).
6868  */
6869 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6870 {
6871         u32 tmp;
6872 
6873         /* gfx ring */
6874         tmp = RREG32(CP_INT_CNTL_RING0) &
6875                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6876         WREG32(CP_INT_CNTL_RING0, tmp);
6877         /* sdma */
6878         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6879         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6880         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6881         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6882         /* compute queues */
6883         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6884         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6885         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6886         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6887         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6888         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6889         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6890         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6891         /* grbm */
6892         WREG32(GRBM_INT_CNTL, 0);
6893         /* SRBM */
6894         WREG32(SRBM_INT_CNTL, 0);
6895         /* vline/vblank, etc. */
6896         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6897         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6898         if (rdev->num_crtc >= 4) {
6899                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6900                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6901         }
6902         if (rdev->num_crtc >= 6) {
6903                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6904                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6905         }
6906         /* pflip */
6907         if (rdev->num_crtc >= 2) {
6908                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6909                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6910         }
6911         if (rdev->num_crtc >= 4) {
6912                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6913                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6914         }
6915         if (rdev->num_crtc >= 6) {
6916                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6917                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6918         }
6919 
6920         /* dac hotplug */
6921         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6922 
6923         /* digital hotplug */
6924         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6925         WREG32(DC_HPD1_INT_CONTROL, tmp);
6926         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6927         WREG32(DC_HPD2_INT_CONTROL, tmp);
6928         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6929         WREG32(DC_HPD3_INT_CONTROL, tmp);
6930         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6931         WREG32(DC_HPD4_INT_CONTROL, tmp);
6932         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6933         WREG32(DC_HPD5_INT_CONTROL, tmp);
6934         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6935         WREG32(DC_HPD6_INT_CONTROL, tmp);
6936 
6937 }
6938 
6939 /**
6940  * cik_irq_init - init and enable the interrupt ring
6941  *
6942  * @rdev: radeon_device pointer
6943  *
6944  * Allocate a ring buffer for the interrupt controller,
6945  * enable the RLC, disable interrupts, enable the IH
6946  * ring buffer and enable it (CIK).
6947  * Called at device load and reume.
6948  * Returns 0 for success, errors for failure.
6949  */
6950 static int cik_irq_init(struct radeon_device *rdev)
6951 {
6952         int ret = 0;
6953         int rb_bufsz;
6954         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6955 
6956         /* allocate ring */
6957         ret = r600_ih_ring_alloc(rdev);
6958         if (ret)
6959                 return ret;
6960 
6961         /* disable irqs */
6962         cik_disable_interrupts(rdev);
6963 
6964         /* init rlc */
6965         ret = cik_rlc_resume(rdev);
6966         if (ret) {
6967                 r600_ih_ring_fini(rdev);
6968                 return ret;
6969         }
6970 
6971         /* setup interrupt control */
6972         /* set dummy read address to dummy page address */
6973         WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
6974         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6975         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6976          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6977          */
6978         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6979         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6980         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6981         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6982 
6983         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6984         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6985 
6986         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6987                       IH_WPTR_OVERFLOW_CLEAR |
6988                       (rb_bufsz << 1));
6989 
6990         if (rdev->wb.enabled)
6991                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6992 
6993         /* set the writeback address whether it's enabled or not */
6994         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6995         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6996 
6997         WREG32(IH_RB_CNTL, ih_rb_cntl);
6998 
6999         /* set rptr, wptr to 0 */
7000         WREG32(IH_RB_RPTR, 0);
7001         WREG32(IH_RB_WPTR, 0);
7002 
7003         /* Default settings for IH_CNTL (disabled at first) */
7004         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7005         /* RPTR_REARM only works if msi's are enabled */
7006         if (rdev->msi_enabled)
7007                 ih_cntl |= RPTR_REARM;
7008         WREG32(IH_CNTL, ih_cntl);
7009 
7010         /* force the active interrupt state to all disabled */
7011         cik_disable_interrupt_state(rdev);
7012 
7013         pci_set_master(rdev->pdev);
7014 
7015         /* enable irqs */
7016         cik_enable_interrupts(rdev);
7017 
7018         return ret;
7019 }
7020 
7021 /**
7022  * cik_irq_set - enable/disable interrupt sources
7023  *
7024  * @rdev: radeon_device pointer
7025  *
7026  * Enable interrupt sources on the GPU (vblanks, hpd,
7027  * etc.) (CIK).
7028  * Returns 0 for success, errors for failure.
7029  */
7030 int cik_irq_set(struct radeon_device *rdev)
7031 {
7032         u32 cp_int_cntl;
7033         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7034         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7035         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7036         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7037         u32 grbm_int_cntl = 0;
7038         u32 dma_cntl, dma_cntl1;
7039 
7040         if (!rdev->irq.installed) {
7041                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7042                 return -EINVAL;
7043         }
7044         /* don't enable anything if the ih is disabled */
7045         if (!rdev->ih.enabled) {
7046                 cik_disable_interrupts(rdev);
7047                 /* force the active interrupt state to all disabled */
7048                 cik_disable_interrupt_state(rdev);
7049                 return 0;
7050         }
7051 
7052         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7053                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7054         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7055 
7056         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7057         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7058         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7059         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7060         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7061         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7062 
7063         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7064         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7065 
7066         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7067         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7068         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7069         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7070         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7071         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7072         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7073         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7074 
7075         /* enable CP interrupts on all rings */
7076         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7077                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7078                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7079         }
7080         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7081                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7082                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7083                 if (ring->me == 1) {
7084                         switch (ring->pipe) {
7085                         case 0:
7086                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7087                                 break;
7088                         case 1:
7089                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7090                                 break;
7091                         case 2:
7092                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7093                                 break;
7094                         case 3:
7095                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7096                                 break;
7097                         default:
7098                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7099                                 break;
7100                         }
7101                 } else if (ring->me == 2) {
7102                         switch (ring->pipe) {
7103                         case 0:
7104                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7105                                 break;
7106                         case 1:
7107                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7108                                 break;
7109                         case 2:
7110                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7111                                 break;
7112                         case 3:
7113                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7114                                 break;
7115                         default:
7116                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7117                                 break;
7118                         }
7119                 } else {
7120                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7121                 }
7122         }
7123         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7124                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7125                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7126                 if (ring->me == 1) {
7127                         switch (ring->pipe) {
7128                         case 0:
7129                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7130                                 break;
7131                         case 1:
7132                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7133                                 break;
7134                         case 2:
7135                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7136                                 break;
7137                         case 3:
7138                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7139                                 break;
7140                         default:
7141                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7142                                 break;
7143                         }
7144                 } else if (ring->me == 2) {
7145                         switch (ring->pipe) {
7146                         case 0:
7147                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7148                                 break;
7149                         case 1:
7150                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7151                                 break;
7152                         case 2:
7153                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7154                                 break;
7155                         case 3:
7156                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7157                                 break;
7158                         default:
7159                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7160                                 break;
7161                         }
7162                 } else {
7163                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7164                 }
7165         }
7166 
7167         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7168                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7169                 dma_cntl |= TRAP_ENABLE;
7170         }
7171 
7172         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7173                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7174                 dma_cntl1 |= TRAP_ENABLE;
7175         }
7176 
7177         if (rdev->irq.crtc_vblank_int[0] ||
7178             atomic_read(&rdev->irq.pflip[0])) {
7179                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7180                 crtc1 |= VBLANK_INTERRUPT_MASK;
7181         }
7182         if (rdev->irq.crtc_vblank_int[1] ||
7183             atomic_read(&rdev->irq.pflip[1])) {
7184                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7185                 crtc2 |= VBLANK_INTERRUPT_MASK;
7186         }
7187         if (rdev->irq.crtc_vblank_int[2] ||
7188             atomic_read(&rdev->irq.pflip[2])) {
7189                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7190                 crtc3 |= VBLANK_INTERRUPT_MASK;
7191         }
7192         if (rdev->irq.crtc_vblank_int[3] ||
7193             atomic_read(&rdev->irq.pflip[3])) {
7194                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7195                 crtc4 |= VBLANK_INTERRUPT_MASK;
7196         }
7197         if (rdev->irq.crtc_vblank_int[4] ||
7198             atomic_read(&rdev->irq.pflip[4])) {
7199                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7200                 crtc5 |= VBLANK_INTERRUPT_MASK;
7201         }
7202         if (rdev->irq.crtc_vblank_int[5] ||
7203             atomic_read(&rdev->irq.pflip[5])) {
7204                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7205                 crtc6 |= VBLANK_INTERRUPT_MASK;
7206         }
7207         if (rdev->irq.hpd[0]) {
7208                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7209                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7210         }
7211         if (rdev->irq.hpd[1]) {
7212                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7213                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7214         }
7215         if (rdev->irq.hpd[2]) {
7216                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7217                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7218         }
7219         if (rdev->irq.hpd[3]) {
7220                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7221                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7222         }
7223         if (rdev->irq.hpd[4]) {
7224                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7225                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7226         }
7227         if (rdev->irq.hpd[5]) {
7228                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7229                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7230         }
7231 
7232         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7233 
7234         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7235         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7236 
7237         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7238         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7239         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7240         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7241         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7242         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7243         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7244         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7245 
7246         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7247 
7248         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7249         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7250         if (rdev->num_crtc >= 4) {
7251                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7252                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7253         }
7254         if (rdev->num_crtc >= 6) {
7255                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7256                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7257         }
7258 
7259         if (rdev->num_crtc >= 2) {
7260                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7261                        GRPH_PFLIP_INT_MASK);
7262                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7263                        GRPH_PFLIP_INT_MASK);
7264         }
7265         if (rdev->num_crtc >= 4) {
7266                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7267                        GRPH_PFLIP_INT_MASK);
7268                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7269                        GRPH_PFLIP_INT_MASK);
7270         }
7271         if (rdev->num_crtc >= 6) {
7272                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7273                        GRPH_PFLIP_INT_MASK);
7274                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7275                        GRPH_PFLIP_INT_MASK);
7276         }
7277 
7278         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7279         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7280         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7281         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7282         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7283         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7284 
7285         /* posting read */
7286         RREG32(SRBM_STATUS);
7287 
7288         return 0;
7289 }
7290 
7291 /**
7292  * cik_irq_ack - ack interrupt sources
7293  *
7294  * @rdev: radeon_device pointer
7295  *
7296  * Ack interrupt sources on the GPU (vblanks, hpd,
7297  * etc.) (CIK).  Certain interrupts sources are sw
7298  * generated and do not require an explicit ack.
7299  */
7300 static inline void cik_irq_ack(struct radeon_device *rdev)
7301 {
7302         u32 tmp;
7303 
7304         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7305         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7306         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7307         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7308         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7309         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7310         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7311 
7312         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7313                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7314         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7315                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7316         if (rdev->num_crtc >= 4) {
7317                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7318                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7319                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7320                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7321         }
7322         if (rdev->num_crtc >= 6) {
7323                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7324                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7325                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7326                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7327         }
7328 
7329         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7330                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7331                        GRPH_PFLIP_INT_CLEAR);
7332         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7333                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7334                        GRPH_PFLIP_INT_CLEAR);
7335         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7336                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7337         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7338                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7339         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7340                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7341         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7342                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7343 
7344         if (rdev->num_crtc >= 4) {
7345                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7346                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7347                                GRPH_PFLIP_INT_CLEAR);
7348                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7349                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7350                                GRPH_PFLIP_INT_CLEAR);
7351                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7352                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7353                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7354                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7355                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7356                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7357                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7358                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7359         }
7360 
7361         if (rdev->num_crtc >= 6) {
7362                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7363                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7364                                GRPH_PFLIP_INT_CLEAR);
7365                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7366                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7367                                GRPH_PFLIP_INT_CLEAR);
7368                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7369                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7370                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7371                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7372                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7373                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7374                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7375                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7376         }
7377 
7378         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7379                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7380                 tmp |= DC_HPDx_INT_ACK;
7381                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7382         }
7383         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7384                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7385                 tmp |= DC_HPDx_INT_ACK;
7386                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7387         }
7388         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7389                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7390                 tmp |= DC_HPDx_INT_ACK;
7391                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7392         }
7393         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7394                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7395                 tmp |= DC_HPDx_INT_ACK;
7396                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7397         }
7398         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7399                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7400                 tmp |= DC_HPDx_INT_ACK;
7401                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7402         }
7403         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7404                 tmp = RREG32(DC_HPD6_INT_CONTROL);
7405                 tmp |= DC_HPDx_INT_ACK;
7406                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7407         }
7408         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7409                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7410                 tmp |= DC_HPDx_RX_INT_ACK;
7411                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7412         }
7413         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7414                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7415                 tmp |= DC_HPDx_RX_INT_ACK;
7416                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7417         }
7418         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7419                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7420                 tmp |= DC_HPDx_RX_INT_ACK;
7421                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7422         }
7423         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7424                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7425                 tmp |= DC_HPDx_RX_INT_ACK;
7426                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7427         }
7428         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7429                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7430                 tmp |= DC_HPDx_RX_INT_ACK;
7431                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7432         }
7433         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7434                 tmp = RREG32(DC_HPD6_INT_CONTROL);
7435                 tmp |= DC_HPDx_RX_INT_ACK;
7436                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7437         }
7438 }
7439 
7440 /**
7441  * cik_irq_disable - disable interrupts
7442  *
7443  * @rdev: radeon_device pointer
7444  *
7445  * Disable interrupts on the hw (CIK).
7446  */
7447 static void cik_irq_disable(struct radeon_device *rdev)
7448 {
7449         cik_disable_interrupts(rdev);
7450         /* Wait and acknowledge irq */
7451         mdelay(1);
7452         cik_irq_ack(rdev);
7453         cik_disable_interrupt_state(rdev);
7454 }
7455 
7456 /**
7457  * cik_irq_disable - disable interrupts for suspend
7458  *
7459  * @rdev: radeon_device pointer
7460  *
7461  * Disable interrupts and stop the RLC (CIK).
7462  * Used for suspend.
7463  */
7464 static void cik_irq_suspend(struct radeon_device *rdev)
7465 {
7466         cik_irq_disable(rdev);
7467         cik_rlc_stop(rdev);
7468 }
7469 
7470 /**
7471  * cik_irq_fini - tear down interrupt support
7472  *
7473  * @rdev: radeon_device pointer
7474  *
7475  * Disable interrupts on the hw and free the IH ring
7476  * buffer (CIK).
7477  * Used for driver unload.
7478  */
7479 static void cik_irq_fini(struct radeon_device *rdev)
7480 {
7481         cik_irq_suspend(rdev);
7482         r600_ih_ring_fini(rdev);
7483 }
7484 
7485 /**
7486  * cik_get_ih_wptr - get the IH ring buffer wptr
7487  *
7488  * @rdev: radeon_device pointer
7489  *
7490  * Get the IH ring buffer wptr from either the register
7491  * or the writeback memory buffer (CIK).  Also check for
7492  * ring buffer overflow and deal with it.
7493  * Used by cik_irq_process().
7494  * Returns the value of the wptr.
7495  */
7496 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7497 {
7498         u32 wptr, tmp;
7499 
7500         if (rdev->wb.enabled)
7501                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7502         else
7503                 wptr = RREG32(IH_RB_WPTR);
7504 
7505         if (wptr & RB_OVERFLOW) {
7506                 wptr &= ~RB_OVERFLOW;
7507                 /* When a ring buffer overflow happen start parsing interrupt
7508                  * from the last not overwritten vector (wptr + 16). Hopefully
7509                  * this should allow us to catchup.
7510                  */
7511                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7512                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7513                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7514                 tmp = RREG32(IH_RB_CNTL);
7515                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7516                 WREG32(IH_RB_CNTL, tmp);
7517         }
7518         return (wptr & rdev->ih.ptr_mask);
7519 }
7520 
7521 /*        CIK IV Ring
7522  * Each IV ring entry is 128 bits:
7523  * [7:0]    - interrupt source id
7524  * [31:8]   - reserved
7525  * [59:32]  - interrupt source data
7526  * [63:60]  - reserved
7527  * [71:64]  - RINGID
7528  *            CP:
7529  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7530  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7531  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7532  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7533  *            PIPE_ID - ME0 0=3D
7534  *                    - ME1&2 compute dispatcher (4 pipes each)
7535  *            SDMA:
7536  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7537  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7538  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7539  * [79:72]  - VMID
7540  * [95:80]  - PASID
7541  * [127:96] - reserved
7542  */
7543 /**
7544  * cik_irq_process - interrupt handler
7545  *
7546  * @rdev: radeon_device pointer
7547  *
7548  * Interrupt hander (CIK).  Walk the IH ring,
7549  * ack interrupts and schedule work to handle
7550  * interrupt events.
7551  * Returns irq process return code.
7552  */
7553 int cik_irq_process(struct radeon_device *rdev)
7554 {
7555         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7556         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7557         u32 wptr;
7558         u32 rptr;
7559         u32 src_id, src_data, ring_id;
7560         u8 me_id, pipe_id, queue_id;
7561         u32 ring_index;
7562         bool queue_hotplug = false;
7563         bool queue_dp = false;
7564         bool queue_reset = false;
7565         u32 addr, status, mc_client;
7566         bool queue_thermal = false;
7567 
7568         if (!rdev->ih.enabled || rdev->shutdown)
7569                 return IRQ_NONE;
7570 
7571         wptr = cik_get_ih_wptr(rdev);
7572 
7573 restart_ih:
7574         /* is somebody else already processing irqs? */
7575         if (atomic_xchg(&rdev->ih.lock, 1))
7576                 return IRQ_NONE;
7577 
7578         rptr = rdev->ih.rptr;
7579         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7580 
7581         /* Order reading of wptr vs. reading of IH ring data */
7582         rmb();
7583 
7584         /* display interrupts */
7585         cik_irq_ack(rdev);
7586 
7587         while (rptr != wptr) {
7588                 /* wptr/rptr are in bytes! */
7589                 ring_index = rptr / 4;
7590 
7591                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7592                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7593                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7594 
7595                 switch (src_id) {
7596                 case 1: /* D1 vblank/vline */
7597                         switch (src_data) {
7598                         case 0: /* D1 vblank */
7599                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7600                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7601 
7602                                 if (rdev->irq.crtc_vblank_int[0]) {
7603                                         drm_handle_vblank(rdev->ddev, 0);
7604                                         rdev->pm.vblank_sync = true;
7605                                         wake_up(&rdev->irq.vblank_queue);
7606                                 }
7607                                 if (atomic_read(&rdev->irq.pflip[0]))
7608                                         radeon_crtc_handle_vblank(rdev, 0);
7609                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7610                                 DRM_DEBUG("IH: D1 vblank\n");
7611 
7612                                 break;
7613                         case 1: /* D1 vline */
7614                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7615                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7616 
7617                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7618                                 DRM_DEBUG("IH: D1 vline\n");
7619 
7620                                 break;
7621                         default:
7622                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7623                                 break;
7624                         }
7625                         break;
7626                 case 2: /* D2 vblank/vline */
7627                         switch (src_data) {
7628                         case 0: /* D2 vblank */
7629                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7630                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7631 
7632                                 if (rdev->irq.crtc_vblank_int[1]) {
7633                                         drm_handle_vblank(rdev->ddev, 1);
7634                                         rdev->pm.vblank_sync = true;
7635                                         wake_up(&rdev->irq.vblank_queue);
7636                                 }
7637                                 if (atomic_read(&rdev->irq.pflip[1]))
7638                                         radeon_crtc_handle_vblank(rdev, 1);
7639                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7640                                 DRM_DEBUG("IH: D2 vblank\n");
7641 
7642                                 break;
7643                         case 1: /* D2 vline */
7644                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7645                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7646 
7647                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7648                                 DRM_DEBUG("IH: D2 vline\n");
7649 
7650                                 break;
7651                         default:
7652                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7653                                 break;
7654                         }
7655                         break;
7656                 case 3: /* D3 vblank/vline */
7657                         switch (src_data) {
7658                         case 0: /* D3 vblank */
7659                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7660                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7661 
7662                                 if (rdev->irq.crtc_vblank_int[2]) {
7663                                         drm_handle_vblank(rdev->ddev, 2);
7664                                         rdev->pm.vblank_sync = true;
7665                                         wake_up(&rdev->irq.vblank_queue);
7666                                 }
7667                                 if (atomic_read(&rdev->irq.pflip[2]))
7668                                         radeon_crtc_handle_vblank(rdev, 2);
7669                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7670                                 DRM_DEBUG("IH: D3 vblank\n");
7671 
7672                                 break;
7673                         case 1: /* D3 vline */
7674                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7675                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7676 
7677                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7678                                 DRM_DEBUG("IH: D3 vline\n");
7679 
7680                                 break;
7681                         default:
7682                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7683                                 break;
7684                         }
7685                         break;
7686                 case 4: /* D4 vblank/vline */
7687                         switch (src_data) {
7688                         case 0: /* D4 vblank */
7689                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7690                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7691 
7692                                 if (rdev->irq.crtc_vblank_int[3]) {
7693                                         drm_handle_vblank(rdev->ddev, 3);
7694                                         rdev->pm.vblank_sync = true;
7695                                         wake_up(&rdev->irq.vblank_queue);
7696                                 }
7697                                 if (atomic_read(&rdev->irq.pflip[3]))
7698                                         radeon_crtc_handle_vblank(rdev, 3);
7699                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7700                                 DRM_DEBUG("IH: D4 vblank\n");
7701 
7702                                 break;
7703                         case 1: /* D4 vline */
7704                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7705                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7706 
7707                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7708                                 DRM_DEBUG("IH: D4 vline\n");
7709 
7710                                 break;
7711                         default:
7712                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7713                                 break;
7714                         }
7715                         break;
7716                 case 5: /* D5 vblank/vline */
7717                         switch (src_data) {
7718                         case 0: /* D5 vblank */
7719                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7720                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7721 
7722                                 if (rdev->irq.crtc_vblank_int[4]) {
7723                                         drm_handle_vblank(rdev->ddev, 4);
7724                                         rdev->pm.vblank_sync = true;
7725                                         wake_up(&rdev->irq.vblank_queue);
7726                                 }
7727                                 if (atomic_read(&rdev->irq.pflip[4]))
7728                                         radeon_crtc_handle_vblank(rdev, 4);
7729                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7730                                 DRM_DEBUG("IH: D5 vblank\n");
7731 
7732                                 break;
7733                         case 1: /* D5 vline */
7734                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7735                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7736 
7737                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7738                                 DRM_DEBUG("IH: D5 vline\n");
7739 
7740                                 break;
7741                         default:
7742                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7743                                 break;
7744                         }
7745                         break;
7746                 case 6: /* D6 vblank/vline */
7747                         switch (src_data) {
7748                         case 0: /* D6 vblank */
7749                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7750                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7751 
7752                                 if (rdev->irq.crtc_vblank_int[5]) {
7753                                         drm_handle_vblank(rdev->ddev, 5);
7754                                         rdev->pm.vblank_sync = true;
7755                                         wake_up(&rdev->irq.vblank_queue);
7756                                 }
7757                                 if (atomic_read(&rdev->irq.pflip[5]))
7758                                         radeon_crtc_handle_vblank(rdev, 5);
7759                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7760                                 DRM_DEBUG("IH: D6 vblank\n");
7761 
7762                                 break;
7763                         case 1: /* D6 vline */
7764                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7765                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7766 
7767                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7768                                 DRM_DEBUG("IH: D6 vline\n");
7769 
7770                                 break;
7771                         default:
7772                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7773                                 break;
7774                         }
7775                         break;
7776                 case 8: /* D1 page flip */
7777                 case 10: /* D2 page flip */
7778                 case 12: /* D3 page flip */
7779                 case 14: /* D4 page flip */
7780                 case 16: /* D5 page flip */
7781                 case 18: /* D6 page flip */
7782                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7783                         if (radeon_use_pflipirq > 0)
7784                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7785                         break;
7786                 case 42: /* HPD hotplug */
7787                         switch (src_data) {
7788                         case 0:
7789                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7790                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7791 
7792                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7793                                 queue_hotplug = true;
7794                                 DRM_DEBUG("IH: HPD1\n");
7795 
7796                                 break;
7797                         case 1:
7798                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7799                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7800 
7801                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7802                                 queue_hotplug = true;
7803                                 DRM_DEBUG("IH: HPD2\n");
7804 
7805                                 break;
7806                         case 2:
7807                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7808                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7809 
7810                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7811                                 queue_hotplug = true;
7812                                 DRM_DEBUG("IH: HPD3\n");
7813 
7814                                 break;
7815                         case 3:
7816                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7817                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7818 
7819                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7820                                 queue_hotplug = true;
7821                                 DRM_DEBUG("IH: HPD4\n");
7822 
7823                                 break;
7824                         case 4:
7825                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7826                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7827 
7828                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7829                                 queue_hotplug = true;
7830                                 DRM_DEBUG("IH: HPD5\n");
7831 
7832                                 break;
7833                         case 5:
7834                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7835                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7836 
7837                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7838                                 queue_hotplug = true;
7839                                 DRM_DEBUG("IH: HPD6\n");
7840 
7841                                 break;
7842                         case 6:
7843                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7844                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7845 
7846                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7847                                 queue_dp = true;
7848                                 DRM_DEBUG("IH: HPD_RX 1\n");
7849 
7850                                 break;
7851                         case 7:
7852                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7853                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7854 
7855                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7856                                 queue_dp = true;
7857                                 DRM_DEBUG("IH: HPD_RX 2\n");
7858 
7859                                 break;
7860                         case 8:
7861                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7862                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7863 
7864                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7865                                 queue_dp = true;
7866                                 DRM_DEBUG("IH: HPD_RX 3\n");
7867 
7868                                 break;
7869                         case 9:
7870                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7871                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7872 
7873                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7874                                 queue_dp = true;
7875                                 DRM_DEBUG("IH: HPD_RX 4\n");
7876 
7877                                 break;
7878                         case 10:
7879                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7880                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7881 
7882                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7883                                 queue_dp = true;
7884                                 DRM_DEBUG("IH: HPD_RX 5\n");
7885 
7886                                 break;
7887                         case 11:
7888                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7889                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7890 
7891                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7892                                 queue_dp = true;
7893                                 DRM_DEBUG("IH: HPD_RX 6\n");
7894 
7895                                 break;
7896                         default:
7897                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7898                                 break;
7899                         }
7900                         break;
7901                 case 96:
7902                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7903                         WREG32(SRBM_INT_ACK, 0x1);
7904                         break;
7905                 case 124: /* UVD */
7906                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7907                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7908                         break;
7909                 case 146:
7910                 case 147:
7911                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7912                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7913                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7914                         /* reset addr and status */
7915                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7916                         if (addr == 0x0 && status == 0x0)
7917                                 break;
7918                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7919                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7920                                 addr);
7921                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7922                                 status);
7923                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7924                         break;
7925                 case 167: /* VCE */
7926                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7927                         switch (src_data) {
7928                         case 0:
7929                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7930                                 break;
7931                         case 1:
7932                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7933                                 break;
7934                         default:
7935                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7936                                 break;
7937                         }
7938                         break;
7939                 case 176: /* GFX RB CP_INT */
7940                 case 177: /* GFX IB CP_INT */
7941                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7942                         break;
7943                 case 181: /* CP EOP event */
7944                         DRM_DEBUG("IH: CP EOP\n");
7945                         /* XXX check the bitfield order! */
7946                         me_id = (ring_id & 0x60) >> 5;
7947                         pipe_id = (ring_id & 0x18) >> 3;
7948                         queue_id = (ring_id & 0x7) >> 0;
7949                         switch (me_id) {
7950                         case 0:
7951                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7952                                 break;
7953                         case 1:
7954                         case 2:
7955                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7956                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7957                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7958                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7959                                 break;
7960                         }
7961                         break;
7962                 case 184: /* CP Privileged reg access */
7963                         DRM_ERROR("Illegal register access in command stream\n");
7964                         /* XXX check the bitfield order! */
7965                         me_id = (ring_id & 0x60) >> 5;
7966                         pipe_id = (ring_id & 0x18) >> 3;
7967                         queue_id = (ring_id & 0x7) >> 0;
7968                         switch (me_id) {
7969                         case 0:
7970                                 /* This results in a full GPU reset, but all we need to do is soft
7971                                  * reset the CP for gfx
7972                                  */
7973                                 queue_reset = true;
7974                                 break;
7975                         case 1:
7976                                 /* XXX compute */
7977                                 queue_reset = true;
7978                                 break;
7979                         case 2:
7980                                 /* XXX compute */
7981                                 queue_reset = true;
7982                                 break;
7983                         }
7984                         break;
7985                 case 185: /* CP Privileged inst */
7986                         DRM_ERROR("Illegal instruction in command stream\n");
7987                         /* XXX check the bitfield order! */
7988                         me_id = (ring_id & 0x60) >> 5;
7989                         pipe_id = (ring_id & 0x18) >> 3;
7990                         queue_id = (ring_id & 0x7) >> 0;
7991                         switch (me_id) {
7992                         case 0:
7993                                 /* This results in a full GPU reset, but all we need to do is soft
7994                                  * reset the CP for gfx
7995                                  */
7996                                 queue_reset = true;
7997                                 break;
7998                         case 1:
7999                                 /* XXX compute */
8000                                 queue_reset = true;
8001                                 break;
8002                         case 2:
8003                                 /* XXX compute */
8004                                 queue_reset = true;
8005                                 break;
8006                         }
8007                         break;
8008                 case 224: /* SDMA trap event */
8009                         /* XXX check the bitfield order! */
8010                         me_id = (ring_id & 0x3) >> 0;
8011                         queue_id = (ring_id & 0xc) >> 2;
8012                         DRM_DEBUG("IH: SDMA trap\n");
8013                         switch (me_id) {
8014                         case 0:
8015                                 switch (queue_id) {
8016                                 case 0:
8017                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8018                                         break;
8019                                 case 1:
8020                                         /* XXX compute */
8021                                         break;
8022                                 case 2:
8023                                         /* XXX compute */
8024                                         break;
8025                                 }
8026                                 break;
8027                         case 1:
8028                                 switch (queue_id) {
8029                                 case 0:
8030                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8031                                         break;
8032                                 case 1:
8033                                         /* XXX compute */
8034                                         break;
8035                                 case 2:
8036                                         /* XXX compute */
8037                                         break;
8038                                 }
8039                                 break;
8040                         }
8041                         break;
8042                 case 230: /* thermal low to high */
8043                         DRM_DEBUG("IH: thermal low to high\n");
8044                         rdev->pm.dpm.thermal.high_to_low = false;
8045                         queue_thermal = true;
8046                         break;
8047                 case 231: /* thermal high to low */
8048                         DRM_DEBUG("IH: thermal high to low\n");
8049                         rdev->pm.dpm.thermal.high_to_low = true;
8050                         queue_thermal = true;
8051                         break;
8052                 case 233: /* GUI IDLE */
8053                         DRM_DEBUG("IH: GUI idle\n");
8054                         break;
8055                 case 241: /* SDMA Privileged inst */
8056                 case 247: /* SDMA Privileged inst */
8057                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
8058                         /* XXX check the bitfield order! */
8059                         me_id = (ring_id & 0x3) >> 0;
8060                         queue_id = (ring_id & 0xc) >> 2;
8061                         switch (me_id) {
8062                         case 0:
8063                                 switch (queue_id) {
8064                                 case 0:
8065                                         queue_reset = true;
8066                                         break;
8067                                 case 1:
8068                                         /* XXX compute */
8069                                         queue_reset = true;
8070                                         break;
8071                                 case 2:
8072                                         /* XXX compute */
8073                                         queue_reset = true;
8074                                         break;
8075                                 }
8076                                 break;
8077                         case 1:
8078                                 switch (queue_id) {
8079                                 case 0:
8080                                         queue_reset = true;
8081                                         break;
8082                                 case 1:
8083                                         /* XXX compute */
8084                                         queue_reset = true;
8085                                         break;
8086                                 case 2:
8087                                         /* XXX compute */
8088                                         queue_reset = true;
8089                                         break;
8090                                 }
8091                                 break;
8092                         }
8093                         break;
8094                 default:
8095                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8096                         break;
8097                 }
8098 
8099                 /* wptr/rptr are in bytes! */
8100                 rptr += 16;
8101                 rptr &= rdev->ih.ptr_mask;
8102                 WREG32(IH_RB_RPTR, rptr);
8103         }
8104         if (queue_dp)
8105                 schedule_work(&rdev->dp_work);
8106         if (queue_hotplug)
8107                 schedule_delayed_work(&rdev->hotplug_work, 0);
8108         if (queue_reset) {
8109                 rdev->needs_reset = true;
8110                 wake_up_all(&rdev->fence_queue);
8111         }
8112         if (queue_thermal)
8113                 schedule_work(&rdev->pm.dpm.thermal.work);
8114         rdev->ih.rptr = rptr;
8115         atomic_set(&rdev->ih.lock, 0);
8116 
8117         /* make sure wptr hasn't changed while processing */
8118         wptr = cik_get_ih_wptr(rdev);
8119         if (wptr != rptr)
8120                 goto restart_ih;
8121 
8122         return IRQ_HANDLED;
8123 }
8124 
8125 /*
8126  * startup/shutdown callbacks
8127  */
8128 static void cik_uvd_init(struct radeon_device *rdev)
8129 {
8130         int r;
8131 
8132         if (!rdev->has_uvd)
8133                 return;
8134 
8135         r = radeon_uvd_init(rdev);
8136         if (r) {
8137                 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8138                 /*
8139                  * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8140                  * to early fails cik_uvd_start() and thus nothing happens
8141                  * there. So it is pointless to try to go through that code
8142                  * hence why we disable uvd here.
8143                  */
8144                 rdev->has_uvd = 0;
8145                 return;
8146         }
8147         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8148         r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8149 }
8150 
8151 static void cik_uvd_start(struct radeon_device *rdev)
8152 {
8153         int r;
8154 
8155         if (!rdev->has_uvd)
8156                 return;
8157 
8158         r = radeon_uvd_resume(rdev);
8159         if (r) {
8160                 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8161                 goto error;
8162         }
8163         r = uvd_v4_2_resume(rdev);
8164         if (r) {
8165                 dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8166                 goto error;
8167         }
8168         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8169         if (r) {
8170                 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8171                 goto error;
8172         }
8173         return;
8174 
8175 error:
8176         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8177 }
8178 
8179 static void cik_uvd_resume(struct radeon_device *rdev)
8180 {
8181         struct radeon_ring *ring;
8182         int r;
8183 
8184         if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8185                 return;
8186 
8187         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8188         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8189         if (r) {
8190                 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8191                 return;
8192         }
8193         r = uvd_v1_0_init(rdev);
8194         if (r) {
8195                 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8196                 return;
8197         }
8198 }
8199 
8200 static void cik_vce_init(struct radeon_device *rdev)
8201 {
8202         int r;
8203 
8204         if (!rdev->has_vce)
8205                 return;
8206 
8207         r = radeon_vce_init(rdev);
8208         if (r) {
8209                 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8210                 /*
8211                  * At this point rdev->vce.vcpu_bo is NULL which trickles down
8212                  * to early fails cik_vce_start() and thus nothing happens
8213                  * there. So it is pointless to try to go through that code
8214                  * hence why we disable vce here.
8215                  */
8216                 rdev->has_vce = 0;
8217                 return;
8218         }
8219         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8220         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8221         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8222         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8223 }
8224 
8225 static void cik_vce_start(struct radeon_device *rdev)
8226 {
8227         int r;
8228 
8229         if (!rdev->has_vce)
8230                 return;
8231 
8232         r = radeon_vce_resume(rdev);
8233         if (r) {
8234                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8235                 goto error;
8236         }
8237         r = vce_v2_0_resume(rdev);
8238         if (r) {
8239                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8240                 goto error;
8241         }
8242         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8243         if (r) {
8244                 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8245                 goto error;
8246         }
8247         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8248         if (r) {
8249                 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8250                 goto error;
8251         }
8252         return;
8253 
8254 error:
8255         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8256         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8257 }
8258 
8259 static void cik_vce_resume(struct radeon_device *rdev)
8260 {
8261         struct radeon_ring *ring;
8262         int r;
8263 
8264         if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8265                 return;
8266 
8267         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8268         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8269         if (r) {
8270                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8271                 return;
8272         }
8273         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8274         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8275         if (r) {
8276                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8277                 return;
8278         }
8279         r = vce_v1_0_init(rdev);
8280         if (r) {
8281                 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8282                 return;
8283         }
8284 }
8285 
8286 /**
8287  * cik_startup - program the asic to a functional state
8288  *
8289  * @rdev: radeon_device pointer
8290  *
8291  * Programs the asic to a functional state (CIK).
8292  * Called by cik_init() and cik_resume().
8293  * Returns 0 for success, error for failure.
8294  */
8295 static int cik_startup(struct radeon_device *rdev)
8296 {
8297         struct radeon_ring *ring;
8298         u32 nop;
8299         int r;
8300 
8301         /* enable pcie gen2/3 link */
8302         cik_pcie_gen3_enable(rdev);
8303         /* enable aspm */
8304         cik_program_aspm(rdev);
8305 
8306         /* scratch needs to be initialized before MC */
8307         r = r600_vram_scratch_init(rdev);
8308         if (r)
8309                 return r;
8310 
8311         cik_mc_program(rdev);
8312 
8313         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8314                 r = ci_mc_load_microcode(rdev);
8315                 if (r) {
8316                         DRM_ERROR("Failed to load MC firmware!\n");
8317                         return r;
8318                 }
8319         }
8320 
8321         r = cik_pcie_gart_enable(rdev);
8322         if (r)
8323                 return r;
8324         cik_gpu_init(rdev);
8325 
8326         /* allocate rlc buffers */
8327         if (rdev->flags & RADEON_IS_IGP) {
8328                 if (rdev->family == CHIP_KAVERI) {
8329                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8330                         rdev->rlc.reg_list_size =
8331                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8332                 } else {
8333                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8334                         rdev->rlc.reg_list_size =
8335                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8336                 }
8337         }
8338         rdev->rlc.cs_data = ci_cs_data;
8339         rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8340         rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8341         r = sumo_rlc_init(rdev);
8342         if (r) {
8343                 DRM_ERROR("Failed to init rlc BOs!\n");
8344                 return r;
8345         }
8346 
8347         /* allocate wb buffer */
8348         r = radeon_wb_init(rdev);
8349         if (r)
8350                 return r;
8351 
8352         /* allocate mec buffers */
8353         r = cik_mec_init(rdev);
8354         if (r) {
8355                 DRM_ERROR("Failed to init MEC BOs!\n");
8356                 return r;
8357         }
8358 
8359         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8360         if (r) {
8361                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8362                 return r;
8363         }
8364 
8365         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8366         if (r) {
8367                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8368                 return r;
8369         }
8370 
8371         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8372         if (r) {
8373                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8374                 return r;
8375         }
8376 
8377         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8378         if (r) {
8379                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8380                 return r;
8381         }
8382 
8383         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8384         if (r) {
8385                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8386                 return r;
8387         }
8388 
8389         cik_uvd_start(rdev);
8390         cik_vce_start(rdev);
8391 
8392         /* Enable IRQ */
8393         if (!rdev->irq.installed) {
8394                 r = radeon_irq_kms_init(rdev);
8395                 if (r)
8396                         return r;
8397         }
8398 
8399         r = cik_irq_init(rdev);
8400         if (r) {
8401                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8402                 radeon_irq_kms_fini(rdev);
8403                 return r;
8404         }
8405         cik_irq_set(rdev);
8406 
8407         if (rdev->family == CHIP_HAWAII) {
8408                 if (rdev->new_fw)
8409                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8410                 else
8411                         nop = RADEON_CP_PACKET2;
8412         } else {
8413                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8414         }
8415 
8416         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8417         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8418                              nop);
8419         if (r)
8420                 return r;
8421 
8422         /* set up the compute queues */
8423         /* type-2 packets are deprecated on MEC, use type-3 instead */
8424         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8425         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8426                              nop);
8427         if (r)
8428                 return r;
8429         ring->me = 1; /* first MEC */
8430         ring->pipe = 0; /* first pipe */
8431         ring->queue = 0; /* first queue */
8432         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8433 
8434         /* type-2 packets are deprecated on MEC, use type-3 instead */
8435         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8436         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8437                              nop);
8438         if (r)
8439                 return r;
8440         /* dGPU only have 1 MEC */
8441         ring->me = 1; /* first MEC */
8442         ring->pipe = 0; /* first pipe */
8443         ring->queue = 1; /* second queue */
8444         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8445 
8446         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8447         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8448                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8449         if (r)
8450                 return r;
8451 
8452         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8453         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8454                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8455         if (r)
8456                 return r;
8457 
8458         r = cik_cp_resume(rdev);
8459         if (r)
8460                 return r;
8461 
8462         r = cik_sdma_resume(rdev);
8463         if (r)
8464                 return r;
8465 
8466         cik_uvd_resume(rdev);
8467         cik_vce_resume(rdev);
8468 
8469         r = radeon_ib_pool_init(rdev);
8470         if (r) {
8471                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8472                 return r;
8473         }
8474 
8475         r = radeon_vm_manager_init(rdev);
8476         if (r) {
8477                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8478                 return r;
8479         }
8480 
8481         r = radeon_audio_init(rdev);
8482         if (r)
8483                 return r;
8484 
8485         return 0;
8486 }
8487 
8488 /**
8489  * cik_resume - resume the asic to a functional state
8490  *
8491  * @rdev: radeon_device pointer
8492  *
8493  * Programs the asic to a functional state (CIK).
8494  * Called at resume.
8495  * Returns 0 for success, error for failure.
8496  */
8497 int cik_resume(struct radeon_device *rdev)
8498 {
8499         int r;
8500 
8501         /* post card */
8502         atom_asic_init(rdev->mode_info.atom_context);
8503 
8504         /* init golden registers */
8505         cik_init_golden_registers(rdev);
8506 
8507         if (rdev->pm.pm_method == PM_METHOD_DPM)
8508                 radeon_pm_resume(rdev);
8509 
8510         rdev->accel_working = true;
8511         r = cik_startup(rdev);
8512         if (r) {
8513                 DRM_ERROR("cik startup failed on resume\n");
8514                 rdev->accel_working = false;
8515                 return r;
8516         }
8517 
8518         return r;
8519 
8520 }
8521 
8522 /**
8523  * cik_suspend - suspend the asic
8524  *
8525  * @rdev: radeon_device pointer
8526  *
8527  * Bring the chip into a state suitable for suspend (CIK).
8528  * Called at suspend.
8529  * Returns 0 for success.
8530  */
8531 int cik_suspend(struct radeon_device *rdev)
8532 {
8533         radeon_pm_suspend(rdev);
8534         radeon_audio_fini(rdev);
8535         radeon_vm_manager_fini(rdev);
8536         cik_cp_enable(rdev, false);
8537         cik_sdma_enable(rdev, false);
8538         if (rdev->has_uvd) {
8539                 uvd_v1_0_fini(rdev);
8540                 radeon_uvd_suspend(rdev);
8541         }
8542         if (rdev->has_vce)
8543                 radeon_vce_suspend(rdev);
8544         cik_fini_pg(rdev);
8545         cik_fini_cg(rdev);
8546         cik_irq_suspend(rdev);
8547         radeon_wb_disable(rdev);
8548         cik_pcie_gart_disable(rdev);
8549         return 0;
8550 }
8551 
8552 /* Plan is to move initialization in that function and use
8553  * helper function so that radeon_device_init pretty much
8554  * do nothing more than calling asic specific function. This
8555  * should also allow to remove a bunch of callback function
8556  * like vram_info.
8557  */
8558 /**
8559  * cik_init - asic specific driver and hw init
8560  *
8561  * @rdev: radeon_device pointer
8562  *
8563  * Setup asic specific driver variables and program the hw
8564  * to a functional state (CIK).
8565  * Called at driver startup.
8566  * Returns 0 for success, errors for failure.
8567  */
8568 int cik_init(struct radeon_device *rdev)
8569 {
8570         struct radeon_ring *ring;
8571         int r;
8572 
8573         /* Read BIOS */
8574         if (!radeon_get_bios(rdev)) {
8575                 if (ASIC_IS_AVIVO(rdev))
8576                         return -EINVAL;
8577         }
8578         /* Must be an ATOMBIOS */
8579         if (!rdev->is_atom_bios) {
8580                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8581                 return -EINVAL;
8582         }
8583         r = radeon_atombios_init(rdev);
8584         if (r)
8585                 return r;
8586 
8587         /* Post card if necessary */
8588         if (!radeon_card_posted(rdev)) {
8589                 if (!rdev->bios) {
8590                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8591                         return -EINVAL;
8592                 }
8593                 DRM_INFO("GPU not posted. posting now...\n");
8594                 atom_asic_init(rdev->mode_info.atom_context);
8595         }
8596         /* init golden registers */
8597         cik_init_golden_registers(rdev);
8598         /* Initialize scratch registers */
8599         cik_scratch_init(rdev);
8600         /* Initialize surface registers */
8601         radeon_surface_init(rdev);
8602         /* Initialize clocks */
8603         radeon_get_clock_info(rdev->ddev);
8604 
8605         /* Fence driver */
8606         r = radeon_fence_driver_init(rdev);
8607         if (r)
8608                 return r;
8609 
8610         /* initialize memory controller */
8611         r = cik_mc_init(rdev);
8612         if (r)
8613                 return r;
8614         /* Memory manager */
8615         r = radeon_bo_init(rdev);
8616         if (r)
8617                 return r;
8618 
8619         if (rdev->flags & RADEON_IS_IGP) {
8620                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8621                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8622                         r = cik_init_microcode(rdev);
8623                         if (r) {
8624                                 DRM_ERROR("Failed to load firmware!\n");
8625                                 return r;
8626                         }
8627                 }
8628         } else {
8629                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8630                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8631                     !rdev->mc_fw) {
8632                         r = cik_init_microcode(rdev);
8633                         if (r) {
8634                                 DRM_ERROR("Failed to load firmware!\n");
8635                                 return r;
8636                         }
8637                 }
8638         }
8639 
8640         /* Initialize power management */
8641         radeon_pm_init(rdev);
8642 
8643         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8644         ring->ring_obj = NULL;
8645         r600_ring_init(rdev, ring, 1024 * 1024);
8646 
8647         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8648         ring->ring_obj = NULL;
8649         r600_ring_init(rdev, ring, 1024 * 1024);
8650         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8651         if (r)
8652                 return r;
8653 
8654         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8655         ring->ring_obj = NULL;
8656         r600_ring_init(rdev, ring, 1024 * 1024);
8657         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8658         if (r)
8659                 return r;
8660 
8661         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8662         ring->ring_obj = NULL;
8663         r600_ring_init(rdev, ring, 256 * 1024);
8664 
8665         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8666         ring->ring_obj = NULL;
8667         r600_ring_init(rdev, ring, 256 * 1024);
8668 
8669         cik_uvd_init(rdev);
8670         cik_vce_init(rdev);
8671 
8672         rdev->ih.ring_obj = NULL;
8673         r600_ih_ring_init(rdev, 64 * 1024);
8674 
8675         r = r600_pcie_gart_init(rdev);
8676         if (r)
8677                 return r;
8678 
8679         rdev->accel_working = true;
8680         r = cik_startup(rdev);
8681         if (r) {
8682                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8683                 cik_cp_fini(rdev);
8684                 cik_sdma_fini(rdev);
8685                 cik_irq_fini(rdev);
8686                 sumo_rlc_fini(rdev);
8687                 cik_mec_fini(rdev);
8688                 radeon_wb_fini(rdev);
8689                 radeon_ib_pool_fini(rdev);
8690                 radeon_vm_manager_fini(rdev);
8691                 radeon_irq_kms_fini(rdev);
8692                 cik_pcie_gart_fini(rdev);
8693                 rdev->accel_working = false;
8694         }
8695 
8696         /* Don't start up if the MC ucode is missing.
8697          * The default clocks and voltages before the MC ucode
8698          * is loaded are not suffient for advanced operations.
8699          */
8700         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8701                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8702                 return -EINVAL;
8703         }
8704 
8705         return 0;
8706 }
8707 
8708 /**
8709  * cik_fini - asic specific driver and hw fini
8710  *
8711  * @rdev: radeon_device pointer
8712  *
8713  * Tear down the asic specific driver variables and program the hw
8714  * to an idle state (CIK).
8715  * Called at driver unload.
8716  */
8717 void cik_fini(struct radeon_device *rdev)
8718 {
8719         radeon_pm_fini(rdev);
8720         cik_cp_fini(rdev);
8721         cik_sdma_fini(rdev);
8722         cik_fini_pg(rdev);
8723         cik_fini_cg(rdev);
8724         cik_irq_fini(rdev);
8725         sumo_rlc_fini(rdev);
8726         cik_mec_fini(rdev);
8727         radeon_wb_fini(rdev);
8728         radeon_vm_manager_fini(rdev);
8729         radeon_ib_pool_fini(rdev);
8730         radeon_irq_kms_fini(rdev);
8731         uvd_v1_0_fini(rdev);
8732         radeon_uvd_fini(rdev);
8733         radeon_vce_fini(rdev);
8734         cik_pcie_gart_fini(rdev);
8735         r600_vram_scratch_fini(rdev);
8736         radeon_gem_fini(rdev);
8737         radeon_fence_driver_fini(rdev);
8738         radeon_bo_fini(rdev);
8739         radeon_atombios_fini(rdev);
8740         kfree(rdev->bios);
8741         rdev->bios = NULL;
8742 }
8743 
8744 void dce8_program_fmt(struct drm_encoder *encoder)
8745 {
8746         struct drm_device *dev = encoder->dev;
8747         struct radeon_device *rdev = dev->dev_private;
8748         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8749         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8750         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8751         int bpc = 0;
8752         u32 tmp = 0;
8753         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8754 
8755         if (connector) {
8756                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8757                 bpc = radeon_get_monitor_bpc(connector);
8758                 dither = radeon_connector->dither;
8759         }
8760 
8761         /* LVDS/eDP FMT is set up by atom */
8762         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8763                 return;
8764 
8765         /* not needed for analog */
8766         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8767             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8768                 return;
8769 
8770         if (bpc == 0)
8771                 return;
8772 
8773         switch (bpc) {
8774         case 6:
8775                 if (dither == RADEON_FMT_DITHER_ENABLE)
8776                         /* XXX sort out optimal dither settings */
8777                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8778                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8779                 else
8780                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8781                 break;
8782         case 8:
8783                 if (dither == RADEON_FMT_DITHER_ENABLE)
8784                         /* XXX sort out optimal dither settings */
8785                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8786                                 FMT_RGB_RANDOM_ENABLE |
8787                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8788                 else
8789                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8790                 break;
8791         case 10:
8792                 if (dither == RADEON_FMT_DITHER_ENABLE)
8793                         /* XXX sort out optimal dither settings */
8794                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8795                                 FMT_RGB_RANDOM_ENABLE |
8796                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8797                 else
8798                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8799                 break;
8800         default:
8801                 /* not needed */
8802                 break;
8803         }
8804 
8805         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8806 }
8807 
8808 /* display watermark setup */
8809 /**
8810  * dce8_line_buffer_adjust - Set up the line buffer
8811  *
8812  * @rdev: radeon_device pointer
8813  * @radeon_crtc: the selected display controller
8814  * @mode: the current display mode on the selected display
8815  * controller
8816  *
8817  * Setup up the line buffer allocation for
8818  * the selected display controller (CIK).
8819  * Returns the line buffer size in pixels.
8820  */
8821 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8822                                    struct radeon_crtc *radeon_crtc,
8823                                    struct drm_display_mode *mode)
8824 {
8825         u32 tmp, buffer_alloc, i;
8826         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8827         /*
8828          * Line Buffer Setup
8829          * There are 6 line buffers, one for each display controllers.
8830          * There are 3 partitions per LB. Select the number of partitions
8831          * to enable based on the display width.  For display widths larger
8832          * than 4096, you need use to use 2 display controllers and combine
8833          * them using the stereo blender.
8834          */
8835         if (radeon_crtc->base.enabled && mode) {
8836                 if (mode->crtc_hdisplay < 1920) {
8837                         tmp = 1;
8838                         buffer_alloc = 2;
8839                 } else if (mode->crtc_hdisplay < 2560) {
8840                         tmp = 2;
8841                         buffer_alloc = 2;
8842                 } else if (mode->crtc_hdisplay < 4096) {
8843                         tmp = 0;
8844                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8845                 } else {
8846                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8847                         tmp = 0;
8848                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8849                 }
8850         } else {
8851                 tmp = 1;
8852                 buffer_alloc = 0;
8853         }
8854 
8855         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8856                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8857 
8858         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8859                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8860         for (i = 0; i < rdev->usec_timeout; i++) {
8861                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8862                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8863                         break;
8864                 udelay(1);
8865         }
8866 
8867         if (radeon_crtc->base.enabled && mode) {
8868                 switch (tmp) {
8869                 case 0:
8870                 default:
8871                         return 4096 * 2;
8872                 case 1:
8873                         return 1920 * 2;
8874                 case 2:
8875                         return 2560 * 2;
8876                 }
8877         }
8878 
8879         /* controller not enabled, so no lb used */
8880         return 0;
8881 }
8882 
8883 /**
8884  * cik_get_number_of_dram_channels - get the number of dram channels
8885  *
8886  * @rdev: radeon_device pointer
8887  *
8888  * Look up the number of video ram channels (CIK).
8889  * Used for display watermark bandwidth calculations
8890  * Returns the number of dram channels
8891  */
8892 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8893 {
8894         u32 tmp = RREG32(MC_SHARED_CHMAP);
8895 
8896         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8897         case 0:
8898         default:
8899                 return 1;
8900         case 1:
8901                 return 2;
8902         case 2:
8903                 return 4;
8904         case 3:
8905                 return 8;
8906         case 4:
8907                 return 3;
8908         case 5:
8909                 return 6;
8910         case 6:
8911                 return 10;
8912         case 7:
8913                 return 12;
8914         case 8:
8915                 return 16;
8916         }
8917 }
8918 
8919 struct dce8_wm_params {
8920         u32 dram_channels; /* number of dram channels */
8921         u32 yclk;          /* bandwidth per dram data pin in kHz */
8922         u32 sclk;          /* engine clock in kHz */
8923         u32 disp_clk;      /* display clock in kHz */
8924         u32 src_width;     /* viewport width */
8925         u32 active_time;   /* active display time in ns */
8926         u32 blank_time;    /* blank time in ns */
8927         bool interlaced;    /* mode is interlaced */
8928         fixed20_12 vsc;    /* vertical scale ratio */
8929         u32 num_heads;     /* number of active crtcs */
8930         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8931         u32 lb_size;       /* line buffer allocated to pipe */
8932         u32 vtaps;         /* vertical scaler taps */
8933 };
8934 
8935 /**
8936  * dce8_dram_bandwidth - get the dram bandwidth
8937  *
8938  * @wm: watermark calculation data
8939  *
8940  * Calculate the raw dram bandwidth (CIK).
8941  * Used for display watermark bandwidth calculations
8942  * Returns the dram bandwidth in MBytes/s
8943  */
8944 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8945 {
8946         /* Calculate raw DRAM Bandwidth */
8947         fixed20_12 dram_efficiency; /* 0.7 */
8948         fixed20_12 yclk, dram_channels, bandwidth;
8949         fixed20_12 a;
8950 
8951         a.full = dfixed_const(1000);
8952         yclk.full = dfixed_const(wm->yclk);
8953         yclk.full = dfixed_div(yclk, a);
8954         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8955         a.full = dfixed_const(10);
8956         dram_efficiency.full = dfixed_const(7);
8957         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8958         bandwidth.full = dfixed_mul(dram_channels, yclk);
8959         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8960 
8961         return dfixed_trunc(bandwidth);
8962 }
8963 
8964 /**
8965  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8966  *
8967  * @wm: watermark calculation data
8968  *
8969  * Calculate the dram bandwidth used for display (CIK).
8970  * Used for display watermark bandwidth calculations
8971  * Returns the dram bandwidth for display in MBytes/s
8972  */
8973 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8974 {
8975         /* Calculate DRAM Bandwidth and the part allocated to display. */
8976         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8977         fixed20_12 yclk, dram_channels, bandwidth;
8978         fixed20_12 a;
8979 
8980         a.full = dfixed_const(1000);
8981         yclk.full = dfixed_const(wm->yclk);
8982         yclk.full = dfixed_div(yclk, a);
8983         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8984         a.full = dfixed_const(10);
8985         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8986         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8987         bandwidth.full = dfixed_mul(dram_channels, yclk);
8988         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8989 
8990         return dfixed_trunc(bandwidth);
8991 }
8992 
8993 /**
8994  * dce8_data_return_bandwidth - get the data return bandwidth
8995  *
8996  * @wm: watermark calculation data
8997  *
8998  * Calculate the data return bandwidth used for display (CIK).
8999  * Used for display watermark bandwidth calculations
9000  * Returns the data return bandwidth in MBytes/s
9001  */
9002 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9003 {
9004         /* Calculate the display Data return Bandwidth */
9005         fixed20_12 return_efficiency; /* 0.8 */
9006         fixed20_12 sclk, bandwidth;
9007         fixed20_12 a;
9008 
9009         a.full = dfixed_const(1000);
9010         sclk.full = dfixed_const(wm->sclk);
9011         sclk.full = dfixed_div(sclk, a);
9012         a.full = dfixed_const(10);
9013         return_efficiency.full = dfixed_const(8);
9014         return_efficiency.full = dfixed_div(return_efficiency, a);
9015         a.full = dfixed_const(32);
9016         bandwidth.full = dfixed_mul(a, sclk);
9017         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9018 
9019         return dfixed_trunc(bandwidth);
9020 }
9021 
9022 /**
9023  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9024  *
9025  * @wm: watermark calculation data
9026  *
9027  * Calculate the dmif bandwidth used for display (CIK).
9028  * Used for display watermark bandwidth calculations
9029  * Returns the dmif bandwidth in MBytes/s
9030  */
9031 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9032 {
9033         /* Calculate the DMIF Request Bandwidth */
9034         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9035         fixed20_12 disp_clk, bandwidth;
9036         fixed20_12 a, b;
9037 
9038         a.full = dfixed_const(1000);
9039         disp_clk.full = dfixed_const(wm->disp_clk);
9040         disp_clk.full = dfixed_div(disp_clk, a);
9041         a.full = dfixed_const(32);
9042         b.full = dfixed_mul(a, disp_clk);
9043 
9044         a.full = dfixed_const(10);
9045         disp_clk_request_efficiency.full = dfixed_const(8);
9046         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9047 
9048         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9049 
9050         return dfixed_trunc(bandwidth);
9051 }
9052 
9053 /**
9054  * dce8_available_bandwidth - get the min available bandwidth
9055  *
9056  * @wm: watermark calculation data
9057  *
9058  * Calculate the min available bandwidth used for display (CIK).
9059  * Used for display watermark bandwidth calculations
9060  * Returns the min available bandwidth in MBytes/s
9061  */
9062 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9063 {
9064         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9065         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9066         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9067         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9068 
9069         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9070 }
9071 
9072 /**
9073  * dce8_average_bandwidth - get the average available bandwidth
9074  *
9075  * @wm: watermark calculation data
9076  *
9077  * Calculate the average available bandwidth used for display (CIK).
9078  * Used for display watermark bandwidth calculations
9079  * Returns the average available bandwidth in MBytes/s
9080  */
9081 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9082 {
9083         /* Calculate the display mode Average Bandwidth
9084          * DisplayMode should contain the source and destination dimensions,
9085          * timing, etc.
9086          */
9087         fixed20_12 bpp;
9088         fixed20_12 line_time;
9089         fixed20_12 src_width;
9090         fixed20_12 bandwidth;
9091         fixed20_12 a;
9092 
9093         a.full = dfixed_const(1000);
9094         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9095         line_time.full = dfixed_div(line_time, a);
9096         bpp.full = dfixed_const(wm->bytes_per_pixel);
9097         src_width.full = dfixed_const(wm->src_width);
9098         bandwidth.full = dfixed_mul(src_width, bpp);
9099         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9100         bandwidth.full = dfixed_div(bandwidth, line_time);
9101 
9102         return dfixed_trunc(bandwidth);
9103 }
9104 
9105 /**
9106  * dce8_latency_watermark - get the latency watermark
9107  *
9108  * @wm: watermark calculation data
9109  *
9110  * Calculate the latency watermark (CIK).
9111  * Used for display watermark bandwidth calculations
9112  * Returns the latency watermark in ns
9113  */
9114 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9115 {
9116         /* First calculate the latency in ns */
9117         u32 mc_latency = 2000; /* 2000 ns. */
9118         u32 available_bandwidth = dce8_available_bandwidth(wm);
9119         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9120         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9121         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9122         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9123                 (wm->num_heads * cursor_line_pair_return_time);
9124         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9125         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9126         u32 tmp, dmif_size = 12288;
9127         fixed20_12 a, b, c;
9128 
9129         if (wm->num_heads == 0)
9130                 return 0;
9131 
9132         a.full = dfixed_const(2);
9133         b.full = dfixed_const(1);
9134         if ((wm->vsc.full > a.full) ||
9135             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9136             (wm->vtaps >= 5) ||
9137             ((wm->vsc.full >= a.full) && wm->interlaced))
9138                 max_src_lines_per_dst_line = 4;
9139         else
9140                 max_src_lines_per_dst_line = 2;
9141 
9142         a.full = dfixed_const(available_bandwidth);
9143         b.full = dfixed_const(wm->num_heads);
9144         a.full = dfixed_div(a, b);
9145         tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9146         tmp = min(dfixed_trunc(a), tmp);
9147 
9148         lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9149 
9150         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9151         b.full = dfixed_const(1000);
9152         c.full = dfixed_const(lb_fill_bw);
9153         b.full = dfixed_div(c, b);
9154         a.full = dfixed_div(a, b);
9155         line_fill_time = dfixed_trunc(a);
9156 
9157         if (line_fill_time < wm->active_time)
9158                 return latency;
9159         else
9160                 return latency + (line_fill_time - wm->active_time);
9161 
9162 }
9163 
9164 /**
9165  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9166  * average and available dram bandwidth
9167  *
9168  * @wm: watermark calculation data
9169  *
9170  * Check if the display average bandwidth fits in the display
9171  * dram bandwidth (CIK).
9172  * Used for display watermark bandwidth calculations
9173  * Returns true if the display fits, false if not.
9174  */
9175 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9176 {
9177         if (dce8_average_bandwidth(wm) <=
9178             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9179                 return true;
9180         else
9181                 return false;
9182 }
9183 
9184 /**
9185  * dce8_average_bandwidth_vs_available_bandwidth - check
9186  * average and available bandwidth
9187  *
9188  * @wm: watermark calculation data
9189  *
9190  * Check if the display average bandwidth fits in the display
9191  * available bandwidth (CIK).
9192  * Used for display watermark bandwidth calculations
9193  * Returns true if the display fits, false if not.
9194  */
9195 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9196 {
9197         if (dce8_average_bandwidth(wm) <=
9198             (dce8_available_bandwidth(wm) / wm->num_heads))
9199                 return true;
9200         else
9201                 return false;
9202 }
9203 
9204 /**
9205  * dce8_check_latency_hiding - check latency hiding
9206  *
9207  * @wm: watermark calculation data
9208  *
9209  * Check latency hiding (CIK).
9210  * Used for display watermark bandwidth calculations
9211  * Returns true if the display fits, false if not.
9212  */
9213 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9214 {
9215         u32 lb_partitions = wm->lb_size / wm->src_width;
9216         u32 line_time = wm->active_time + wm->blank_time;
9217         u32 latency_tolerant_lines;
9218         u32 latency_hiding;
9219         fixed20_12 a;
9220 
9221         a.full = dfixed_const(1);
9222         if (wm->vsc.full > a.full)
9223                 latency_tolerant_lines = 1;
9224         else {
9225                 if (lb_partitions <= (wm->vtaps + 1))
9226                         latency_tolerant_lines = 1;
9227                 else
9228                         latency_tolerant_lines = 2;
9229         }
9230 
9231         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9232 
9233         if (dce8_latency_watermark(wm) <= latency_hiding)
9234                 return true;
9235         else
9236                 return false;
9237 }
9238 
9239 /**
9240  * dce8_program_watermarks - program display watermarks
9241  *
9242  * @rdev: radeon_device pointer
9243  * @radeon_crtc: the selected display controller
9244  * @lb_size: line buffer size
9245  * @num_heads: number of display controllers in use
9246  *
9247  * Calculate and program the display watermarks for the
9248  * selected display controller (CIK).
9249  */
9250 static void dce8_program_watermarks(struct radeon_device *rdev,
9251                                     struct radeon_crtc *radeon_crtc,
9252                                     u32 lb_size, u32 num_heads)
9253 {
9254         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9255         struct dce8_wm_params wm_low, wm_high;
9256         u32 active_time;
9257         u32 line_time = 0;
9258         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9259         u32 tmp, wm_mask;
9260 
9261         if (radeon_crtc->base.enabled && num_heads && mode) {
9262                 active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9263                                             (u32)mode->clock);
9264                 line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9265                                           (u32)mode->clock);
9266                 line_time = min(line_time, (u32)65535);
9267 
9268                 /* watermark for high clocks */
9269                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9270                     rdev->pm.dpm_enabled) {
9271                         wm_high.yclk =
9272                                 radeon_dpm_get_mclk(rdev, false) * 10;
9273                         wm_high.sclk =
9274                                 radeon_dpm_get_sclk(rdev, false) * 10;
9275                 } else {
9276                         wm_high.yclk = rdev->pm.current_mclk * 10;
9277                         wm_high.sclk = rdev->pm.current_sclk * 10;
9278                 }
9279 
9280                 wm_high.disp_clk = mode->clock;
9281                 wm_high.src_width = mode->crtc_hdisplay;
9282                 wm_high.active_time = active_time;
9283                 wm_high.blank_time = line_time - wm_high.active_time;
9284                 wm_high.interlaced = false;
9285                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9286                         wm_high.interlaced = true;
9287                 wm_high.vsc = radeon_crtc->vsc;
9288                 wm_high.vtaps = 1;
9289                 if (radeon_crtc->rmx_type != RMX_OFF)
9290                         wm_high.vtaps = 2;
9291                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9292                 wm_high.lb_size = lb_size;
9293                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9294                 wm_high.num_heads = num_heads;
9295 
9296                 /* set for high clocks */
9297                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9298 
9299                 /* possibly force display priority to high */
9300                 /* should really do this at mode validation time... */
9301                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9302                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9303                     !dce8_check_latency_hiding(&wm_high) ||
9304                     (rdev->disp_priority == 2)) {
9305                         DRM_DEBUG_KMS("force priority to high\n");
9306                 }
9307 
9308                 /* watermark for low clocks */
9309                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9310                     rdev->pm.dpm_enabled) {
9311                         wm_low.yclk =
9312                                 radeon_dpm_get_mclk(rdev, true) * 10;
9313                         wm_low.sclk =
9314                                 radeon_dpm_get_sclk(rdev, true) * 10;
9315                 } else {
9316                         wm_low.yclk = rdev->pm.current_mclk * 10;
9317                         wm_low.sclk = rdev->pm.current_sclk * 10;
9318                 }
9319 
9320                 wm_low.disp_clk = mode->clock;
9321                 wm_low.src_width = mode->crtc_hdisplay;
9322                 wm_low.active_time = active_time;
9323                 wm_low.blank_time = line_time - wm_low.active_time;
9324                 wm_low.interlaced = false;
9325                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9326                         wm_low.interlaced = true;
9327                 wm_low.vsc = radeon_crtc->vsc;
9328                 wm_low.vtaps = 1;
9329                 if (radeon_crtc->rmx_type != RMX_OFF)
9330                         wm_low.vtaps = 2;
9331                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9332                 wm_low.lb_size = lb_size;
9333                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9334                 wm_low.num_heads = num_heads;
9335 
9336                 /* set for low clocks */
9337                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9338 
9339                 /* possibly force display priority to high */
9340                 /* should really do this at mode validation time... */
9341                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9342                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9343                     !dce8_check_latency_hiding(&wm_low) ||
9344                     (rdev->disp_priority == 2)) {
9345                         DRM_DEBUG_KMS("force priority to high\n");
9346                 }
9347 
9348                 /* Save number of lines the linebuffer leads before the scanout */
9349                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9350         }
9351 
9352         /* select wm A */
9353         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9354         tmp = wm_mask;
9355         tmp &= ~LATENCY_WATERMARK_MASK(3);
9356         tmp |= LATENCY_WATERMARK_MASK(1);
9357         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9358         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9359                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9360                 LATENCY_HIGH_WATERMARK(line_time)));
9361         /* select wm B */
9362         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9363         tmp &= ~LATENCY_WATERMARK_MASK(3);
9364         tmp |= LATENCY_WATERMARK_MASK(2);
9365         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9366         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9367                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9368                 LATENCY_HIGH_WATERMARK(line_time)));
9369         /* restore original selection */
9370         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9371 
9372         /* save values for DPM */
9373         radeon_crtc->line_time = line_time;
9374         radeon_crtc->wm_high = latency_watermark_a;
9375         radeon_crtc->wm_low = latency_watermark_b;
9376 }
9377 
9378 /**
9379  * dce8_bandwidth_update - program display watermarks
9380  *
9381  * @rdev: radeon_device pointer
9382  *
9383  * Calculate and program the display watermarks and line
9384  * buffer allocation (CIK).
9385  */
9386 void dce8_bandwidth_update(struct radeon_device *rdev)
9387 {
9388         struct drm_display_mode *mode = NULL;
9389         u32 num_heads = 0, lb_size;
9390         int i;
9391 
9392         if (!rdev->mode_info.mode_config_initialized)
9393                 return;
9394 
9395         radeon_update_display_priority(rdev);
9396 
9397         for (i = 0; i < rdev->num_crtc; i++) {
9398                 if (rdev->mode_info.crtcs[i]->base.enabled)
9399                         num_heads++;
9400         }
9401         for (i = 0; i < rdev->num_crtc; i++) {
9402                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9403                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9404                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9405         }
9406 }
9407 
9408 /**
9409  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9410  *
9411  * @rdev: radeon_device pointer
9412  *
9413  * Fetches a GPU clock counter snapshot (SI).
9414  * Returns the 64 bit clock counter snapshot.
9415  */
9416 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9417 {
9418         uint64_t clock;
9419 
9420         mutex_lock(&rdev->gpu_clock_mutex);
9421         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9422         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9423                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9424         mutex_unlock(&rdev->gpu_clock_mutex);
9425         return clock;
9426 }
9427 
9428 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9429                              u32 cntl_reg, u32 status_reg)
9430 {
9431         int r, i;
9432         struct atom_clock_dividers dividers;
9433         uint32_t tmp;
9434 
9435         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9436                                            clock, false, &dividers);
9437         if (r)
9438                 return r;
9439 
9440         tmp = RREG32_SMC(cntl_reg);
9441         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9442         tmp |= dividers.post_divider;
9443         WREG32_SMC(cntl_reg, tmp);
9444 
9445         for (i = 0; i < 100; i++) {
9446                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9447                         break;
9448                 mdelay(10);
9449         }
9450         if (i == 100)
9451                 return -ETIMEDOUT;
9452 
9453         return 0;
9454 }
9455 
9456 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9457 {
9458         int r = 0;
9459 
9460         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9461         if (r)
9462                 return r;
9463 
9464         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9465         return r;
9466 }
9467 
9468 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9469 {
9470         int r, i;
9471         struct atom_clock_dividers dividers;
9472         u32 tmp;
9473 
9474         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9475                                            ecclk, false, &dividers);
9476         if (r)
9477                 return r;
9478 
9479         for (i = 0; i < 100; i++) {
9480                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9481                         break;
9482                 mdelay(10);
9483         }
9484         if (i == 100)
9485                 return -ETIMEDOUT;
9486 
9487         tmp = RREG32_SMC(CG_ECLK_CNTL);
9488         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9489         tmp |= dividers.post_divider;
9490         WREG32_SMC(CG_ECLK_CNTL, tmp);
9491 
9492         for (i = 0; i < 100; i++) {
9493                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9494                         break;
9495                 mdelay(10);
9496         }
9497         if (i == 100)
9498                 return -ETIMEDOUT;
9499 
9500         return 0;
9501 }
9502 
9503 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9504 {
9505         struct pci_dev *root = rdev->pdev->bus->self;
9506         enum pci_bus_speed speed_cap;
9507         int bridge_pos, gpu_pos;
9508         u32 speed_cntl, current_data_rate;
9509         int i;
9510         u16 tmp16;
9511 
9512         if (pci_is_root_bus(rdev->pdev->bus))
9513                 return;
9514 
9515         if (radeon_pcie_gen2 == 0)
9516                 return;
9517 
9518         if (rdev->flags & RADEON_IS_IGP)
9519                 return;
9520 
9521         if (!(rdev->flags & RADEON_IS_PCIE))
9522                 return;
9523 
9524         speed_cap = pcie_get_speed_cap(root);
9525         if (speed_cap == PCI_SPEED_UNKNOWN)
9526                 return;
9527 
9528         if ((speed_cap != PCIE_SPEED_8_0GT) &&
9529             (speed_cap != PCIE_SPEED_5_0GT))
9530                 return;
9531 
9532         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9533         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9534                 LC_CURRENT_DATA_RATE_SHIFT;
9535         if (speed_cap == PCIE_SPEED_8_0GT) {
9536                 if (current_data_rate == 2) {
9537                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9538                         return;
9539                 }
9540                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9541         } else if (speed_cap == PCIE_SPEED_5_0GT) {
9542                 if (current_data_rate == 1) {
9543                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9544                         return;
9545                 }
9546                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9547         }
9548 
9549         bridge_pos = pci_pcie_cap(root);
9550         if (!bridge_pos)
9551                 return;
9552 
9553         gpu_pos = pci_pcie_cap(rdev->pdev);
9554         if (!gpu_pos)
9555                 return;
9556 
9557         if (speed_cap == PCIE_SPEED_8_0GT) {
9558                 /* re-try equalization if gen3 is not already enabled */
9559                 if (current_data_rate != 2) {
9560                         u16 bridge_cfg, gpu_cfg;
9561                         u16 bridge_cfg2, gpu_cfg2;
9562                         u32 max_lw, current_lw, tmp;
9563 
9564                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9565                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9566 
9567                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9568                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9569 
9570                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9571                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9572 
9573                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9574                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9575                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9576 
9577                         if (current_lw < max_lw) {
9578                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9579                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9580                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9581                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9582                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9583                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9584                                 }
9585                         }
9586 
9587                         for (i = 0; i < 10; i++) {
9588                                 /* check status */
9589                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9590                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9591                                         break;
9592 
9593                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9594                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9595 
9596                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9597                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9598 
9599                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9600                                 tmp |= LC_SET_QUIESCE;
9601                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9602 
9603                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9604                                 tmp |= LC_REDO_EQ;
9605                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9606 
9607                                 msleep(100);
9608 
9609                                 /* linkctl */
9610                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9611                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9612                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9613                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9614 
9615                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9616                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9617                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9618                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9619 
9620                                 /* linkctl2 */
9621                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9622                                 tmp16 &= ~((1 << 4) | (7 << 9));
9623                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9624                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9625 
9626                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9627                                 tmp16 &= ~((1 << 4) | (7 << 9));
9628                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9629                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9630 
9631                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9632                                 tmp &= ~LC_SET_QUIESCE;
9633                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9634                         }
9635                 }
9636         }
9637 
9638         /* set the link speed */
9639         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9640         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9641         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9642 
9643         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9644         tmp16 &= ~0xf;
9645         if (speed_cap == PCIE_SPEED_8_0GT)
9646                 tmp16 |= 3; /* gen3 */
9647         else if (speed_cap == PCIE_SPEED_5_0GT)
9648                 tmp16 |= 2; /* gen2 */
9649         else
9650                 tmp16 |= 1; /* gen1 */
9651         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9652 
9653         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9654         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9655         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9656 
9657         for (i = 0; i < rdev->usec_timeout; i++) {
9658                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9659                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9660                         break;
9661                 udelay(1);
9662         }
9663 }
9664 
9665 static void cik_program_aspm(struct radeon_device *rdev)
9666 {
9667         u32 data, orig;
9668         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9669         bool disable_clkreq = false;
9670 
9671         if (radeon_aspm == 0)
9672                 return;
9673 
9674         /* XXX double check IGPs */
9675         if (rdev->flags & RADEON_IS_IGP)
9676                 return;
9677 
9678         if (!(rdev->flags & RADEON_IS_PCIE))
9679                 return;
9680 
9681         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9682         data &= ~LC_XMIT_N_FTS_MASK;
9683         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9684         if (orig != data)
9685                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9686 
9687         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9688         data |= LC_GO_TO_RECOVERY;
9689         if (orig != data)
9690                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9691 
9692         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9693         data |= P_IGNORE_EDB_ERR;
9694         if (orig != data)
9695                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9696 
9697         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9698         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9699         data |= LC_PMI_TO_L1_DIS;
9700         if (!disable_l0s)
9701                 data |= LC_L0S_INACTIVITY(7);
9702 
9703         if (!disable_l1) {
9704                 data |= LC_L1_INACTIVITY(7);
9705                 data &= ~LC_PMI_TO_L1_DIS;
9706                 if (orig != data)
9707                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9708 
9709                 if (!disable_plloff_in_l1) {
9710                         bool clk_req_support;
9711 
9712                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9713                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9714                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9715                         if (orig != data)
9716                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9717 
9718                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9719                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9720                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9721                         if (orig != data)
9722                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9723 
9724                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9725                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9726                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9727                         if (orig != data)
9728                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9729 
9730                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9731                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9732                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9733                         if (orig != data)
9734                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9735 
9736                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9737                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9738                         data |= LC_DYN_LANES_PWR_STATE(3);
9739                         if (orig != data)
9740                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9741 
9742                         if (!disable_clkreq &&
9743                             !pci_is_root_bus(rdev->pdev->bus)) {
9744                                 struct pci_dev *root = rdev->pdev->bus->self;
9745                                 u32 lnkcap;
9746 
9747                                 clk_req_support = false;
9748                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9749                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9750                                         clk_req_support = true;
9751                         } else {
9752                                 clk_req_support = false;
9753                         }
9754 
9755                         if (clk_req_support) {
9756                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9757                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9758                                 if (orig != data)
9759                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9760 
9761                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9762                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9763                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9764                                 if (orig != data)
9765                                         WREG32_SMC(THM_CLK_CNTL, data);
9766 
9767                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9768                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9769                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9770                                 if (orig != data)
9771                                         WREG32_SMC(MISC_CLK_CTRL, data);
9772 
9773                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9774                                 data &= ~BCLK_AS_XCLK;
9775                                 if (orig != data)
9776                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9777 
9778                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9779                                 data &= ~FORCE_BIF_REFCLK_EN;
9780                                 if (orig != data)
9781                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9782 
9783                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9784                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9785                                 data |= MPLL_CLKOUT_SEL(4);
9786                                 if (orig != data)
9787                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9788                         }
9789                 }
9790         } else {
9791                 if (orig != data)
9792                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9793         }
9794 
9795         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9796         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9797         if (orig != data)
9798                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9799 
9800         if (!disable_l0s) {
9801                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9802                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9803                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9804                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9805                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9806                                 data &= ~LC_L0S_INACTIVITY_MASK;
9807                                 if (orig != data)
9808                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9809                         }
9810                 }
9811         }
9812 }

/* [<][>][^][v][top][bottom][index][help] */