root/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. get_tile_config
  2. amdgpu_amdkfd_gfx_8_0_get_functions
  3. get_amdgpu_device
  4. lock_srbm
  5. unlock_srbm
  6. acquire_queue
  7. release_queue
  8. kgd_program_sh_mem_settings
  9. kgd_set_pasid_vmid_mapping
  10. kgd_init_interrupts
  11. get_sdma_base_addr
  12. get_mqd
  13. get_sdma_mqd
  14. kgd_hqd_load
  15. kgd_hqd_dump
  16. kgd_hqd_sdma_load
  17. kgd_hqd_sdma_dump
  18. kgd_hqd_is_occupied
  19. kgd_hqd_sdma_is_occupied
  20. kgd_hqd_destroy
  21. kgd_hqd_sdma_destroy
  22. get_atc_vmid_pasid_mapping_valid
  23. get_atc_vmid_pasid_mapping_pasid
  24. kgd_address_watch_disable
  25. kgd_address_watch_execute
  26. kgd_wave_control_execute
  27. kgd_address_watch_get_offset
  28. set_scratch_backing_va
  29. set_vm_context_page_table_base
  30. invalidate_tlbs
  31. invalidate_tlbs_vmid

   1 /*
   2  * Copyright 2014 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  */
  22 
  23 #include <linux/module.h>
  24 #include <linux/fdtable.h>
  25 #include <linux/uaccess.h>
  26 #include <linux/mmu_context.h>
  27 
  28 #include "amdgpu.h"
  29 #include "amdgpu_amdkfd.h"
  30 #include "gfx_v8_0.h"
  31 #include "gca/gfx_8_0_sh_mask.h"
  32 #include "gca/gfx_8_0_d.h"
  33 #include "gca/gfx_8_0_enum.h"
  34 #include "oss/oss_3_0_sh_mask.h"
  35 #include "oss/oss_3_0_d.h"
  36 #include "gmc/gmc_8_1_sh_mask.h"
  37 #include "gmc/gmc_8_1_d.h"
  38 #include "vi_structs.h"
  39 #include "vid.h"
  40 
  41 enum hqd_dequeue_request_type {
  42         NO_ACTION = 0,
  43         DRAIN_PIPE,
  44         RESET_WAVES
  45 };
  46 
  47 /*
  48  * Register access functions
  49  */
  50 
  51 static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
  52                 uint32_t sh_mem_config,
  53                 uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
  54                 uint32_t sh_mem_bases);
  55 static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
  56                 unsigned int vmid);
  57 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
  58 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
  59                         uint32_t queue_id, uint32_t __user *wptr,
  60                         uint32_t wptr_shift, uint32_t wptr_mask,
  61                         struct mm_struct *mm);
  62 static int kgd_hqd_dump(struct kgd_dev *kgd,
  63                         uint32_t pipe_id, uint32_t queue_id,
  64                         uint32_t (**dump)[2], uint32_t *n_regs);
  65 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
  66                              uint32_t __user *wptr, struct mm_struct *mm);
  67 static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
  68                              uint32_t engine_id, uint32_t queue_id,
  69                              uint32_t (**dump)[2], uint32_t *n_regs);
  70 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
  71                 uint32_t pipe_id, uint32_t queue_id);
  72 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
  73 static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
  74                                 enum kfd_preempt_type reset_type,
  75                                 unsigned int utimeout, uint32_t pipe_id,
  76                                 uint32_t queue_id);
  77 static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
  78                                 unsigned int utimeout);
  79 static int kgd_address_watch_disable(struct kgd_dev *kgd);
  80 static int kgd_address_watch_execute(struct kgd_dev *kgd,
  81                                         unsigned int watch_point_id,
  82                                         uint32_t cntl_val,
  83                                         uint32_t addr_hi,
  84                                         uint32_t addr_lo);
  85 static int kgd_wave_control_execute(struct kgd_dev *kgd,
  86                                         uint32_t gfx_index_val,
  87                                         uint32_t sq_cmd);
  88 static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
  89                                         unsigned int watch_point_id,
  90                                         unsigned int reg_offset);
  91 
  92 static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
  93                 uint8_t vmid);
  94 static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
  95                 uint8_t vmid);
  96 static void set_scratch_backing_va(struct kgd_dev *kgd,
  97                                         uint64_t va, uint32_t vmid);
  98 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
  99                 uint64_t page_table_base);
 100 static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
 101 static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
 102 
 103 /* Because of REG_GET_FIELD() being used, we put this function in the
 104  * asic specific file.
 105  */
 106 static int get_tile_config(struct kgd_dev *kgd,
 107                 struct tile_config *config)
 108 {
 109         struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 110 
 111         config->gb_addr_config = adev->gfx.config.gb_addr_config;
 112         config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
 113                                 MC_ARB_RAMCFG, NOOFBANK);
 114         config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
 115                                 MC_ARB_RAMCFG, NOOFRANKS);
 116 
 117         config->tile_config_ptr = adev->gfx.config.tile_mode_array;
 118         config->num_tile_configs =
 119                         ARRAY_SIZE(adev->gfx.config.tile_mode_array);
 120         config->macro_tile_config_ptr =
 121                         adev->gfx.config.macrotile_mode_array;
 122         config->num_macro_tile_configs =
 123                         ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
 124 
 125         return 0;
 126 }
 127 
 128 static const struct kfd2kgd_calls kfd2kgd = {
 129         .program_sh_mem_settings = kgd_program_sh_mem_settings,
 130         .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
 131         .init_interrupts = kgd_init_interrupts,
 132         .hqd_load = kgd_hqd_load,
 133         .hqd_sdma_load = kgd_hqd_sdma_load,
 134         .hqd_dump = kgd_hqd_dump,
 135         .hqd_sdma_dump = kgd_hqd_sdma_dump,
 136         .hqd_is_occupied = kgd_hqd_is_occupied,
 137         .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
 138         .hqd_destroy = kgd_hqd_destroy,
 139         .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
 140         .address_watch_disable = kgd_address_watch_disable,
 141         .address_watch_execute = kgd_address_watch_execute,
 142         .wave_control_execute = kgd_wave_control_execute,
 143         .address_watch_get_offset = kgd_address_watch_get_offset,
 144         .get_atc_vmid_pasid_mapping_pasid =
 145                         get_atc_vmid_pasid_mapping_pasid,
 146         .get_atc_vmid_pasid_mapping_valid =
 147                         get_atc_vmid_pasid_mapping_valid,
 148         .set_scratch_backing_va = set_scratch_backing_va,
 149         .get_tile_config = get_tile_config,
 150         .set_vm_context_page_table_base = set_vm_context_page_table_base,
 151         .invalidate_tlbs = invalidate_tlbs,
 152         .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
 153 };
 154 
 155 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
 156 {
 157         return (struct kfd2kgd_calls *)&kfd2kgd;
 158 }
 159 
 160 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
 161 {
 162         return (struct amdgpu_device *)kgd;
 163 }
 164 
 165 static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
 166                         uint32_t queue, uint32_t vmid)
 167 {
 168         struct amdgpu_device *adev = get_amdgpu_device(kgd);
 169         uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
 170 
 171         mutex_lock(&adev->srbm_mutex);
 172         WREG32(mmSRBM_GFX_CNTL, value);
 173 }
 174 
 175 static void unlock_srbm(struct kgd_dev *kgd)
 176 {
 177         struct amdgpu_device *adev = get_amdgpu_device(kgd);
 178 
 179         WREG32(mmSRBM_GFX_CNTL, 0);
 180         mutex_unlock(&adev->srbm_mutex);
 181 }
 182 
 183 static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
 184                                 uint32_t queue_id)
 185 {
 186         struct amdgpu_device *adev = get_amdgpu_device(kgd);
 187 
 188         uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
 189         uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 190 
 191         lock_srbm(kgd, mec, pipe, queue_id, 0);
 192 }
 193 
 194 static void release_queue(struct kgd_dev *kgd)
 195 {
 196         unlock_srbm(kgd);
 197 }
 198 
 199 static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
 200                                         uint32_t sh_mem_config,
 201                                         uint32_t sh_mem_ape1_base,
 202                                         uint32_t sh_mem_ape1_limit,
 203                                         uint32_t sh_mem_bases)
 204 {
 205         struct amdgpu_device *adev = get_amdgpu_device(kgd);
 206 
 207         lock_srbm(kgd, 0, 0, 0, vmid);
 208 
 209         WREG32(mmSH_MEM_CONFIG, sh_mem_config);
 210         WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base);
 211         WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
 212         WREG32(mmSH_MEM_BASES, sh_mem_bases);
 213 
 214         unlock_srbm(kgd);
 215 }
 216 
 217 static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 218                                         unsigned int vmid)
 219 {
 220         struct amdgpu_device *adev = get_amdgpu_device(kgd);
 221 
 222         /*
 223          * We have to assume that there is no outstanding mapping.
 224          * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
 225          * a mapping is in progress or because a mapping finished
 226          * and the SW cleared it.
 227          * So the protocol is to always wait & clear.
 228          */
 229         uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
 230                         ATC_VMID0_PASID_MAPPING__VALID_MASK;
 231 
 232         WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping);
 233 
 234         while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid)))
 235                 cpu_relax();
 236         WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
 237 
 238         /* Mapping vmid to pasid also for IH block */
 239         WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
 240 
 241         return 0;
 242 }
 243 
 244 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 245 {
 246         struct amdgpu_device *adev = get_amdgpu_device(kgd);
 247         uint32_t mec;
 248         uint32_t pipe;
 249 
 250         mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
 251         pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 252 
 253         lock_srbm(kgd, mec, pipe, 0, 0);
 254 
 255         WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
 256                         CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
 257 
 258         unlock_srbm(kgd);
 259 
 260         return 0;
 261 }
 262 
 263 static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m)
 264 {
 265         uint32_t retval;
 266 
 267         retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
 268                 m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
 269         pr_debug("sdma base address: 0x%x\n", retval);
 270 
 271         return retval;
 272 }
 273 
 274 static inline struct vi_mqd *get_mqd(void *mqd)
 275 {
 276         return (struct vi_mqd *)mqd;
 277 }
 278 
 279 static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
 280 {
 281         return (struct vi_sdma_mqd *)mqd;
 282 }
 283 
 284 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 285                         uint32_t queue_id, uint32_t __user *wptr,
 286                         uint32_t wptr_shift, uint32_t wptr_mask,
 287                         struct mm_struct *mm)
 288 {
 289         struct amdgpu_device *adev = get_amdgpu_device(kgd);
 290         struct vi_mqd *m;
 291         uint32_t *mqd_hqd;
 292         uint32_t reg, wptr_val, data;
 293         bool valid_wptr = false;
 294 
 295         m = get_mqd(mqd);
 296 
 297         acquire_queue(kgd, pipe_id, queue_id);
 298 
 299         /* HIQ is set during driver init period with vmid set to 0*/
 300         if (m->cp_hqd_vmid == 0) {
 301                 uint32_t value, mec, pipe;
 302 
 303                 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
 304                 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 305 
 306                 pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
 307                         mec, pipe, queue_id);
 308                 value = RREG32(mmRLC_CP_SCHEDULERS);
 309                 value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
 310                         ((mec << 5) | (pipe << 3) | queue_id | 0x80));
 311                 WREG32(mmRLC_CP_SCHEDULERS, value);
 312         }
 313 
 314         /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
 315         mqd_hqd = &m->cp_mqd_base_addr_lo;
 316 
 317         for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_CONTROL; reg++)
 318                 WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
 319 
 320         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
 321          * This is safe since EOP RPTR==WPTR for any inactive HQD
 322          * on ASICs that do not support context-save.
 323          * EOP writes/reads can start anywhere in the ring.
 324          */
 325         if (get_amdgpu_device(kgd)->asic_type != CHIP_TONGA) {
 326                 WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
 327                 WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
 328                 WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
 329         }
 330 
 331         for (reg = mmCP_HQD_EOP_EVENTS; reg <= mmCP_HQD_ERROR; reg++)
 332                 WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
 333 
 334         /* Copy userspace write pointer value to register.
 335          * Activate doorbell logic to monitor subsequent changes.
 336          */
 337         data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
 338                              CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
 339         WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
 340 
 341         /* read_user_ptr may take the mm->mmap_sem.
 342          * release srbm_mutex to avoid circular dependency between
 343          * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex.
 344          */
 345         release_queue(kgd);
 346         valid_wptr = read_user_wptr(mm, wptr, wptr_val);
 347         acquire_queue(kgd, pipe_id, queue_id);
 348         if (valid_wptr)
 349                 WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
 350 
 351         data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
 352         WREG32(mmCP_HQD_ACTIVE, data);
 353 
 354         release_queue(kgd);
 355 
 356         return 0;
 357 }
 358 
 359 static int kgd_hqd_dump(struct kgd_dev *kgd,
 360                         uint32_t pipe_id, uint32_t queue_id,
 361                         uint32_t (**dump)[2], uint32_t *n_regs)
 362 {
 363         struct amdgpu_device *adev = get_amdgpu_device(kgd);
 364         uint32_t i = 0, reg;
 365 #define HQD_N_REGS (54+4)
 366 #define DUMP_REG(addr) do {                             \
 367                 if (WARN_ON_ONCE(i >= HQD_N_REGS))      \
 368                         break;                          \
 369                 (*dump)[i][0] = (addr) << 2;            \
 370                 (*dump)[i++][1] = RREG32(addr);         \
 371         } while (0)
 372 
 373         *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
 374         if (*dump == NULL)
 375                 return -ENOMEM;
 376 
 377         acquire_queue(kgd, pipe_id, queue_id);
 378 
 379         DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
 380         DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
 381         DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2);
 382         DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3);
 383 
 384         for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_DONES; reg++)
 385                 DUMP_REG(reg);
 386 
 387         release_queue(kgd);
 388 
 389         WARN_ON_ONCE(i != HQD_N_REGS);
 390         *n_regs = i;
 391 
 392         return 0;
 393 }
 394 
 395 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
 396                              uint32_t __user *wptr, struct mm_struct *mm)
 397 {
 398         struct amdgpu_device *adev = get_amdgpu_device(kgd);
 399         struct vi_sdma_mqd *m;
 400         unsigned long end_jiffies;
 401         uint32_t sdma_base_addr;
 402         uint32_t data;
 403 
 404         m = get_sdma_mqd(mqd);
 405         sdma_base_addr = get_sdma_base_addr(m);
 406         WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
 407                 m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
 408 
 409         end_jiffies = msecs_to_jiffies(2000) + jiffies;
 410         while (true) {
 411                 data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
 412                 if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
 413                         break;
 414                 if (time_after(jiffies, end_jiffies))
 415                         return -ETIME;
 416                 usleep_range(500, 1000);
 417         }
 418         if (m->sdma_engine_id) {
 419                 data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL);
 420                 data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL,
 421                                 RESUME_CTX, 0);
 422                 WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data);
 423         } else {
 424                 data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL);
 425                 data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
 426                                 RESUME_CTX, 0);
 427                 WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data);
 428         }
 429 
 430         data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
 431                              ENABLE, 1);
 432         WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
 433         WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
 434 
 435         if (read_user_wptr(mm, wptr, data))
 436                 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data);
 437         else
 438                 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
 439                        m->sdmax_rlcx_rb_rptr);
 440 
 441         WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
 442                                 m->sdmax_rlcx_virtual_addr);
 443         WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
 444         WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
 445                         m->sdmax_rlcx_rb_base_hi);
 446         WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
 447                         m->sdmax_rlcx_rb_rptr_addr_lo);
 448         WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
 449                         m->sdmax_rlcx_rb_rptr_addr_hi);
 450 
 451         data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
 452                              RB_ENABLE, 1);
 453         WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
 454 
 455         return 0;
 456 }
 457 
 458 static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
 459                              uint32_t engine_id, uint32_t queue_id,
 460                              uint32_t (**dump)[2], uint32_t *n_regs)
 461 {
 462         struct amdgpu_device *adev = get_amdgpu_device(kgd);
 463         uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
 464                 queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
 465         uint32_t i = 0, reg;
 466 #undef HQD_N_REGS
 467 #define HQD_N_REGS (19+4+2+3+7)
 468 
 469         *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
 470         if (*dump == NULL)
 471                 return -ENOMEM;
 472 
 473         for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
 474                 DUMP_REG(sdma_offset + reg);
 475         for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK;
 476              reg++)
 477                 DUMP_REG(sdma_offset + reg);
 478         for (reg = mmSDMA0_RLC0_CSA_ADDR_LO; reg <= mmSDMA0_RLC0_CSA_ADDR_HI;
 479              reg++)
 480                 DUMP_REG(sdma_offset + reg);
 481         for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_DUMMY_REG;
 482              reg++)
 483                 DUMP_REG(sdma_offset + reg);
 484         for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL;
 485              reg++)
 486                 DUMP_REG(sdma_offset + reg);
 487 
 488         WARN_ON_ONCE(i != HQD_N_REGS);
 489         *n_regs = i;
 490 
 491         return 0;
 492 }
 493 
 494 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
 495                                 uint32_t pipe_id, uint32_t queue_id)
 496 {
 497         struct amdgpu_device *adev = get_amdgpu_device(kgd);
 498         uint32_t act;
 499         bool retval = false;
 500         uint32_t low, high;
 501 
 502         acquire_queue(kgd, pipe_id, queue_id);
 503         act = RREG32(mmCP_HQD_ACTIVE);
 504         if (act) {
 505                 low = lower_32_bits(queue_address >> 8);
 506                 high = upper_32_bits(queue_address >> 8);
 507 
 508                 if (low == RREG32(mmCP_HQD_PQ_BASE) &&
 509                                 high == RREG32(mmCP_HQD_PQ_BASE_HI))
 510                         retval = true;
 511         }
 512         release_queue(kgd);
 513         return retval;
 514 }
 515 
 516 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
 517 {
 518         struct amdgpu_device *adev = get_amdgpu_device(kgd);
 519         struct vi_sdma_mqd *m;
 520         uint32_t sdma_base_addr;
 521         uint32_t sdma_rlc_rb_cntl;
 522 
 523         m = get_sdma_mqd(mqd);
 524         sdma_base_addr = get_sdma_base_addr(m);
 525 
 526         sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
 527 
 528         if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
 529                 return true;
 530 
 531         return false;
 532 }
 533 
 534 static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 535                                 enum kfd_preempt_type reset_type,
 536                                 unsigned int utimeout, uint32_t pipe_id,
 537                                 uint32_t queue_id)
 538 {
 539         struct amdgpu_device *adev = get_amdgpu_device(kgd);
 540         uint32_t temp;
 541         enum hqd_dequeue_request_type type;
 542         unsigned long flags, end_jiffies;
 543         int retry;
 544         struct vi_mqd *m = get_mqd(mqd);
 545 
 546         if (adev->in_gpu_reset)
 547                 return -EIO;
 548 
 549         acquire_queue(kgd, pipe_id, queue_id);
 550 
 551         if (m->cp_hqd_vmid == 0)
 552                 WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0);
 553 
 554         switch (reset_type) {
 555         case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
 556                 type = DRAIN_PIPE;
 557                 break;
 558         case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
 559                 type = RESET_WAVES;
 560                 break;
 561         default:
 562                 type = DRAIN_PIPE;
 563                 break;
 564         }
 565 
 566         /* Workaround: If IQ timer is active and the wait time is close to or
 567          * equal to 0, dequeueing is not safe. Wait until either the wait time
 568          * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
 569          * cleared before continuing. Also, ensure wait times are set to at
 570          * least 0x3.
 571          */
 572         local_irq_save(flags);
 573         preempt_disable();
 574         retry = 5000; /* wait for 500 usecs at maximum */
 575         while (true) {
 576                 temp = RREG32(mmCP_HQD_IQ_TIMER);
 577                 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
 578                         pr_debug("HW is processing IQ\n");
 579                         goto loop;
 580                 }
 581                 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
 582                         if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
 583                                         == 3) /* SEM-rearm is safe */
 584                                 break;
 585                         /* Wait time 3 is safe for CP, but our MMIO read/write
 586                          * time is close to 1 microsecond, so check for 10 to
 587                          * leave more buffer room
 588                          */
 589                         if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
 590                                         >= 10)
 591                                 break;
 592                         pr_debug("IQ timer is active\n");
 593                 } else
 594                         break;
 595 loop:
 596                 if (!retry) {
 597                         pr_err("CP HQD IQ timer status time out\n");
 598                         break;
 599                 }
 600                 ndelay(100);
 601                 --retry;
 602         }
 603         retry = 1000;
 604         while (true) {
 605                 temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
 606                 if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
 607                         break;
 608                 pr_debug("Dequeue request is pending\n");
 609 
 610                 if (!retry) {
 611                         pr_err("CP HQD dequeue request time out\n");
 612                         break;
 613                 }
 614                 ndelay(100);
 615                 --retry;
 616         }
 617         local_irq_restore(flags);
 618         preempt_enable();
 619 
 620         WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
 621 
 622         end_jiffies = (utimeout * HZ / 1000) + jiffies;
 623         while (true) {
 624                 temp = RREG32(mmCP_HQD_ACTIVE);
 625                 if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
 626                         break;
 627                 if (time_after(jiffies, end_jiffies)) {
 628                         pr_err("cp queue preemption time out.\n");
 629                         release_queue(kgd);
 630                         return -ETIME;
 631                 }
 632                 usleep_range(500, 1000);
 633         }
 634 
 635         release_queue(kgd);
 636         return 0;
 637 }
 638 
 639 static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 640                                 unsigned int utimeout)
 641 {
 642         struct amdgpu_device *adev = get_amdgpu_device(kgd);
 643         struct vi_sdma_mqd *m;
 644         uint32_t sdma_base_addr;
 645         uint32_t temp;
 646         unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
 647 
 648         m = get_sdma_mqd(mqd);
 649         sdma_base_addr = get_sdma_base_addr(m);
 650 
 651         temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
 652         temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
 653         WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
 654 
 655         while (true) {
 656                 temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
 657                 if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
 658                         break;
 659                 if (time_after(jiffies, end_jiffies))
 660                         return -ETIME;
 661                 usleep_range(500, 1000);
 662         }
 663 
 664         WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
 665         WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
 666                 RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
 667                 SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
 668 
 669         m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
 670 
 671         return 0;
 672 }
 673 
 674 static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
 675                                                         uint8_t vmid)
 676 {
 677         uint32_t reg;
 678         struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 679 
 680         reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
 681         return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
 682 }
 683 
 684 static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 685                                                                 uint8_t vmid)
 686 {
 687         uint32_t reg;
 688         struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 689 
 690         reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
 691         return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
 692 }
 693 
 694 static int kgd_address_watch_disable(struct kgd_dev *kgd)
 695 {
 696         return 0;
 697 }
 698 
 699 static int kgd_address_watch_execute(struct kgd_dev *kgd,
 700                                         unsigned int watch_point_id,
 701                                         uint32_t cntl_val,
 702                                         uint32_t addr_hi,
 703                                         uint32_t addr_lo)
 704 {
 705         return 0;
 706 }
 707 
 708 static int kgd_wave_control_execute(struct kgd_dev *kgd,
 709                                         uint32_t gfx_index_val,
 710                                         uint32_t sq_cmd)
 711 {
 712         struct amdgpu_device *adev = get_amdgpu_device(kgd);
 713         uint32_t data = 0;
 714 
 715         mutex_lock(&adev->grbm_idx_mutex);
 716 
 717         WREG32(mmGRBM_GFX_INDEX, gfx_index_val);
 718         WREG32(mmSQ_CMD, sq_cmd);
 719 
 720         data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
 721                 INSTANCE_BROADCAST_WRITES, 1);
 722         data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
 723                 SH_BROADCAST_WRITES, 1);
 724         data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
 725                 SE_BROADCAST_WRITES, 1);
 726 
 727         WREG32(mmGRBM_GFX_INDEX, data);
 728         mutex_unlock(&adev->grbm_idx_mutex);
 729 
 730         return 0;
 731 }
 732 
 733 static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
 734                                         unsigned int watch_point_id,
 735                                         unsigned int reg_offset)
 736 {
 737         return 0;
 738 }
 739 
 740 static void set_scratch_backing_va(struct kgd_dev *kgd,
 741                                         uint64_t va, uint32_t vmid)
 742 {
 743         struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 744 
 745         lock_srbm(kgd, 0, 0, 0, vmid);
 746         WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
 747         unlock_srbm(kgd);
 748 }
 749 
 750 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 751                 uint64_t page_table_base)
 752 {
 753         struct amdgpu_device *adev = get_amdgpu_device(kgd);
 754 
 755         if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
 756                 pr_err("trying to set page table base for wrong VMID\n");
 757                 return;
 758         }
 759         WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8,
 760                         lower_32_bits(page_table_base));
 761 }
 762 
 763 static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
 764 {
 765         struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 766         int vmid;
 767         unsigned int tmp;
 768 
 769         if (adev->in_gpu_reset)
 770                 return -EIO;
 771 
 772         for (vmid = 0; vmid < 16; vmid++) {
 773                 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
 774                         continue;
 775 
 776                 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
 777                 if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
 778                         (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
 779                         WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
 780                         RREG32(mmVM_INVALIDATE_RESPONSE);
 781                         break;
 782                 }
 783         }
 784 
 785         return 0;
 786 }
 787 
 788 static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
 789 {
 790         struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 791 
 792         if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
 793                 pr_err("non kfd vmid %d\n", vmid);
 794                 return -EINVAL;
 795         }
 796 
 797         WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
 798         RREG32(mmVM_INVALIDATE_RESPONSE);
 799         return 0;
 800 }

/* [<][>][^][v][top][bottom][index][help] */