root/drivers/gpu/drm/amd/amdkfd/kfd_priv.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. kfd_debugfs_init
  2. kfd_debugfs_fini

   1 /*
   2  * Copyright 2014 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  */
  22 
  23 #ifndef KFD_PRIV_H_INCLUDED
  24 #define KFD_PRIV_H_INCLUDED
  25 
  26 #include <linux/hashtable.h>
  27 #include <linux/mmu_notifier.h>
  28 #include <linux/mutex.h>
  29 #include <linux/types.h>
  30 #include <linux/atomic.h>
  31 #include <linux/workqueue.h>
  32 #include <linux/spinlock.h>
  33 #include <linux/kfd_ioctl.h>
  34 #include <linux/idr.h>
  35 #include <linux/kfifo.h>
  36 #include <linux/seq_file.h>
  37 #include <linux/kref.h>
  38 #include <linux/sysfs.h>
  39 #include <kgd_kfd_interface.h>
  40 
  41 #include "amd_shared.h"
  42 
  43 #define KFD_MAX_RING_ENTRY_SIZE 8
  44 
  45 #define KFD_SYSFS_FILE_MODE 0444
  46 
  47 /* GPU ID hash width in bits */
  48 #define KFD_GPU_ID_HASH_WIDTH 16
  49 
  50 /* Use upper bits of mmap offset to store KFD driver specific information.
  51  * BITS[63:62] - Encode MMAP type
  52  * BITS[61:46] - Encode gpu_id. To identify to which GPU the offset belongs to
  53  * BITS[45:0]  - MMAP offset value
  54  *
  55  * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
  56  *  defines are w.r.t to PAGE_SIZE
  57  */
  58 #define KFD_MMAP_TYPE_SHIFT     (62 - PAGE_SHIFT)
  59 #define KFD_MMAP_TYPE_MASK      (0x3ULL << KFD_MMAP_TYPE_SHIFT)
  60 #define KFD_MMAP_TYPE_DOORBELL  (0x3ULL << KFD_MMAP_TYPE_SHIFT)
  61 #define KFD_MMAP_TYPE_EVENTS    (0x2ULL << KFD_MMAP_TYPE_SHIFT)
  62 #define KFD_MMAP_TYPE_RESERVED_MEM      (0x1ULL << KFD_MMAP_TYPE_SHIFT)
  63 #define KFD_MMAP_TYPE_MMIO      (0x0ULL << KFD_MMAP_TYPE_SHIFT)
  64 
  65 #define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)
  66 #define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
  67                                 << KFD_MMAP_GPU_ID_SHIFT)
  68 #define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\
  69                                 & KFD_MMAP_GPU_ID_MASK)
  70 #define KFD_MMAP_GPU_ID_GET(offset)    ((offset & KFD_MMAP_GPU_ID_MASK) \
  71                                 >> KFD_MMAP_GPU_ID_SHIFT)
  72 
  73 #define KFD_MMAP_OFFSET_VALUE_MASK      (0x3FFFFFFFFFFFULL >> PAGE_SHIFT)
  74 #define KFD_MMAP_OFFSET_VALUE_GET(offset) (offset & KFD_MMAP_OFFSET_VALUE_MASK)
  75 
  76 /*
  77  * When working with cp scheduler we should assign the HIQ manually or via
  78  * the amdgpu driver to a fixed hqd slot, here are the fixed HIQ hqd slot
  79  * definitions for Kaveri. In Kaveri only the first ME queues participates
  80  * in the cp scheduling taking that in mind we set the HIQ slot in the
  81  * second ME.
  82  */
  83 #define KFD_CIK_HIQ_PIPE 4
  84 #define KFD_CIK_HIQ_QUEUE 0
  85 
  86 /* Macro for allocating structures */
  87 #define kfd_alloc_struct(ptr_to_struct) \
  88         ((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL))
  89 
  90 #define KFD_MAX_NUM_OF_PROCESSES 512
  91 #define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
  92 
  93 /*
  94  * Size of the per-process TBA+TMA buffer: 2 pages
  95  *
  96  * The first page is the TBA used for the CWSR ISA code. The second
  97  * page is used as TMA for daisy changing a user-mode trap handler.
  98  */
  99 #define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2)
 100 #define KFD_CWSR_TMA_OFFSET PAGE_SIZE
 101 
 102 #define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE                \
 103         (KFD_MAX_NUM_OF_PROCESSES *                     \
 104                         KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
 105 
 106 #define KFD_KERNEL_QUEUE_SIZE 2048
 107 
 108 #define KFD_UNMAP_LATENCY_MS    (4000)
 109 
 110 /*
 111  * 512 = 0x200
 112  * The doorbell index distance between SDMA RLC (2*i) and (2*i+1) in the
 113  * same SDMA engine on SOC15, which has 8-byte doorbells for SDMA.
 114  * 512 8-byte doorbell distance (i.e. one page away) ensures that SDMA RLC
 115  * (2*i+1) doorbells (in terms of the lower 12 bit address) lie exactly in
 116  * the OFFSET and SIZE set in registers like BIF_SDMA0_DOORBELL_RANGE.
 117  */
 118 #define KFD_QUEUE_DOORBELL_MIRROR_OFFSET 512
 119 
 120 
 121 /*
 122  * Kernel module parameter to specify maximum number of supported queues per
 123  * device
 124  */
 125 extern int max_num_of_queues_per_device;
 126 
 127 
 128 /* Kernel module parameter to specify the scheduling policy */
 129 extern int sched_policy;
 130 
 131 /*
 132  * Kernel module parameter to specify the maximum process
 133  * number per HW scheduler
 134  */
 135 extern int hws_max_conc_proc;
 136 
 137 extern int cwsr_enable;
 138 
 139 /*
 140  * Kernel module parameter to specify whether to send sigterm to HSA process on
 141  * unhandled exception
 142  */
 143 extern int send_sigterm;
 144 
 145 /*
 146  * This kernel module is used to simulate large bar machine on non-large bar
 147  * enabled machines.
 148  */
 149 extern int debug_largebar;
 150 
 151 /*
 152  * Ignore CRAT table during KFD initialization, can be used to work around
 153  * broken CRAT tables on some AMD systems
 154  */
 155 extern int ignore_crat;
 156 
 157 /*
 158  * Set sh_mem_config.retry_disable on Vega10
 159  */
 160 extern int amdgpu_noretry;
 161 
 162 /*
 163  * Halt if HWS hang is detected
 164  */
 165 extern int halt_if_hws_hang;
 166 
 167 /*
 168  * Whether MEC FW support GWS barriers
 169  */
 170 extern bool hws_gws_support;
 171 
 172 /*
 173  * Queue preemption timeout in ms
 174  */
 175 extern int queue_preemption_timeout_ms;
 176 
 177 enum cache_policy {
 178         cache_policy_coherent,
 179         cache_policy_noncoherent
 180 };
 181 
 182 #define KFD_IS_VI(chip) ((chip) >= CHIP_CARRIZO && (chip) <= CHIP_POLARIS11)
 183 #define KFD_IS_DGPU(chip) (((chip) >= CHIP_TONGA && \
 184                            (chip) <= CHIP_NAVI10) || \
 185                            (chip) == CHIP_HAWAII)
 186 #define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10)
 187 
 188 struct kfd_event_interrupt_class {
 189         bool (*interrupt_isr)(struct kfd_dev *dev,
 190                         const uint32_t *ih_ring_entry, uint32_t *patched_ihre,
 191                         bool *patched_flag);
 192         void (*interrupt_wq)(struct kfd_dev *dev,
 193                         const uint32_t *ih_ring_entry);
 194 };
 195 
 196 struct kfd_device_info {
 197         enum amd_asic_type asic_family;
 198         const char *asic_name;
 199         const struct kfd_event_interrupt_class *event_interrupt_class;
 200         unsigned int max_pasid_bits;
 201         unsigned int max_no_of_hqd;
 202         unsigned int doorbell_size;
 203         size_t ih_ring_entry_size;
 204         uint8_t num_of_watch_points;
 205         uint16_t mqd_size_aligned;
 206         bool supports_cwsr;
 207         bool needs_iommu_device;
 208         bool needs_pci_atomics;
 209         unsigned int num_sdma_engines;
 210         unsigned int num_xgmi_sdma_engines;
 211         unsigned int num_sdma_queues_per_engine;
 212 };
 213 
 214 struct kfd_mem_obj {
 215         uint32_t range_start;
 216         uint32_t range_end;
 217         uint64_t gpu_addr;
 218         uint32_t *cpu_ptr;
 219         void *gtt_mem;
 220 };
 221 
 222 struct kfd_vmid_info {
 223         uint32_t first_vmid_kfd;
 224         uint32_t last_vmid_kfd;
 225         uint32_t vmid_num_kfd;
 226 };
 227 
 228 struct kfd_dev {
 229         struct kgd_dev *kgd;
 230 
 231         const struct kfd_device_info *device_info;
 232         struct pci_dev *pdev;
 233 
 234         unsigned int id;                /* topology stub index */
 235 
 236         phys_addr_t doorbell_base;      /* Start of actual doorbells used by
 237                                          * KFD. It is aligned for mapping
 238                                          * into user mode
 239                                          */
 240         size_t doorbell_id_offset;      /* Doorbell offset (from KFD doorbell
 241                                          * to HW doorbell, GFX reserved some
 242                                          * at the start)
 243                                          */
 244         u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells
 245                                            * page used by kernel queue
 246                                            */
 247 
 248         struct kgd2kfd_shared_resources shared_resources;
 249         struct kfd_vmid_info vm_info;
 250 
 251         const struct kfd2kgd_calls *kfd2kgd;
 252         struct mutex doorbell_mutex;
 253         DECLARE_BITMAP(doorbell_available_index,
 254                         KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
 255 
 256         void *gtt_mem;
 257         uint64_t gtt_start_gpu_addr;
 258         void *gtt_start_cpu_ptr;
 259         void *gtt_sa_bitmap;
 260         struct mutex gtt_sa_lock;
 261         unsigned int gtt_sa_chunk_size;
 262         unsigned int gtt_sa_num_of_chunks;
 263 
 264         /* Interrupts */
 265         struct kfifo ih_fifo;
 266         struct workqueue_struct *ih_wq;
 267         struct work_struct interrupt_work;
 268         spinlock_t interrupt_lock;
 269 
 270         /* QCM Device instance */
 271         struct device_queue_manager *dqm;
 272 
 273         bool init_complete;
 274         /*
 275          * Interrupts of interest to KFD are copied
 276          * from the HW ring into a SW ring.
 277          */
 278         bool interrupts_active;
 279 
 280         /* Debug manager */
 281         struct kfd_dbgmgr *dbgmgr;
 282 
 283         /* Firmware versions */
 284         uint16_t mec_fw_version;
 285         uint16_t sdma_fw_version;
 286 
 287         /* Maximum process number mapped to HW scheduler */
 288         unsigned int max_proc_per_quantum;
 289 
 290         /* CWSR */
 291         bool cwsr_enabled;
 292         const void *cwsr_isa;
 293         unsigned int cwsr_isa_size;
 294 
 295         /* xGMI */
 296         uint64_t hive_id;
 297 
 298         bool pci_atomic_requested;
 299 
 300         /* SRAM ECC flag */
 301         atomic_t sram_ecc_flag;
 302 
 303         /* Compute Profile ref. count */
 304         atomic_t compute_profile;
 305 
 306         /* Global GWS resource shared b/t processes*/
 307         void *gws;
 308 };
 309 
 310 enum kfd_mempool {
 311         KFD_MEMPOOL_SYSTEM_CACHEABLE = 1,
 312         KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2,
 313         KFD_MEMPOOL_FRAMEBUFFER = 3,
 314 };
 315 
 316 /* Character device interface */
 317 int kfd_chardev_init(void);
 318 void kfd_chardev_exit(void);
 319 struct device *kfd_chardev(void);
 320 
 321 /**
 322  * enum kfd_unmap_queues_filter
 323  *
 324  * @KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: Preempts single queue.
 325  *
 326  * @KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: Preempts all queues in the
 327  *                                              running queues list.
 328  *
 329  * @KFD_UNMAP_QUEUES_FILTER_BY_PASID: Preempts queues that belongs to
 330  *                                              specific process.
 331  *
 332  */
 333 enum kfd_unmap_queues_filter {
 334         KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE,
 335         KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES,
 336         KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES,
 337         KFD_UNMAP_QUEUES_FILTER_BY_PASID
 338 };
 339 
 340 /**
 341  * enum kfd_queue_type
 342  *
 343  * @KFD_QUEUE_TYPE_COMPUTE: Regular user mode queue type.
 344  *
 345  * @KFD_QUEUE_TYPE_SDMA: Sdma user mode queue type.
 346  *
 347  * @KFD_QUEUE_TYPE_HIQ: HIQ queue type.
 348  *
 349  * @KFD_QUEUE_TYPE_DIQ: DIQ queue type.
 350  */
 351 enum kfd_queue_type  {
 352         KFD_QUEUE_TYPE_COMPUTE,
 353         KFD_QUEUE_TYPE_SDMA,
 354         KFD_QUEUE_TYPE_HIQ,
 355         KFD_QUEUE_TYPE_DIQ,
 356         KFD_QUEUE_TYPE_SDMA_XGMI
 357 };
 358 
 359 enum kfd_queue_format {
 360         KFD_QUEUE_FORMAT_PM4,
 361         KFD_QUEUE_FORMAT_AQL
 362 };
 363 
 364 enum KFD_QUEUE_PRIORITY {
 365         KFD_QUEUE_PRIORITY_MINIMUM = 0,
 366         KFD_QUEUE_PRIORITY_MAXIMUM = 15
 367 };
 368 
 369 /**
 370  * struct queue_properties
 371  *
 372  * @type: The queue type.
 373  *
 374  * @queue_id: Queue identifier.
 375  *
 376  * @queue_address: Queue ring buffer address.
 377  *
 378  * @queue_size: Queue ring buffer size.
 379  *
 380  * @priority: Defines the queue priority relative to other queues in the
 381  * process.
 382  * This is just an indication and HW scheduling may override the priority as
 383  * necessary while keeping the relative prioritization.
 384  * the priority granularity is from 0 to f which f is the highest priority.
 385  * currently all queues are initialized with the highest priority.
 386  *
 387  * @queue_percent: This field is partially implemented and currently a zero in
 388  * this field defines that the queue is non active.
 389  *
 390  * @read_ptr: User space address which points to the number of dwords the
 391  * cp read from the ring buffer. This field updates automatically by the H/W.
 392  *
 393  * @write_ptr: Defines the number of dwords written to the ring buffer.
 394  *
 395  * @doorbell_ptr: This field aim is to notify the H/W of new packet written to
 396  * the queue ring buffer. This field should be similar to write_ptr and the
 397  * user should update this field after he updated the write_ptr.
 398  *
 399  * @doorbell_off: The doorbell offset in the doorbell pci-bar.
 400  *
 401  * @is_interop: Defines if this is a interop queue. Interop queue means that
 402  * the queue can access both graphics and compute resources.
 403  *
 404  * @is_evicted: Defines if the queue is evicted. Only active queues
 405  * are evicted, rendering them inactive.
 406  *
 407  * @is_active: Defines if the queue is active or not. @is_active and
 408  * @is_evicted are protected by the DQM lock.
 409  *
 410  * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid
 411  * of the queue.
 412  *
 413  * This structure represents the queue properties for each queue no matter if
 414  * it's user mode or kernel mode queue.
 415  *
 416  */
 417 struct queue_properties {
 418         enum kfd_queue_type type;
 419         enum kfd_queue_format format;
 420         unsigned int queue_id;
 421         uint64_t queue_address;
 422         uint64_t  queue_size;
 423         uint32_t priority;
 424         uint32_t queue_percent;
 425         uint32_t *read_ptr;
 426         uint32_t *write_ptr;
 427         void __iomem *doorbell_ptr;
 428         uint32_t doorbell_off;
 429         bool is_interop;
 430         bool is_evicted;
 431         bool is_active;
 432         /* Not relevant for user mode queues in cp scheduling */
 433         unsigned int vmid;
 434         /* Relevant only for sdma queues*/
 435         uint32_t sdma_engine_id;
 436         uint32_t sdma_queue_id;
 437         uint32_t sdma_vm_addr;
 438         /* Relevant only for VI */
 439         uint64_t eop_ring_buffer_address;
 440         uint32_t eop_ring_buffer_size;
 441         uint64_t ctx_save_restore_area_address;
 442         uint32_t ctx_save_restore_area_size;
 443         uint32_t ctl_stack_size;
 444         uint64_t tba_addr;
 445         uint64_t tma_addr;
 446         /* Relevant for CU */
 447         uint32_t cu_mask_count; /* Must be a multiple of 32 */
 448         uint32_t *cu_mask;
 449 };
 450 
 451 #define QUEUE_IS_ACTIVE(q) ((q).queue_size > 0 &&       \
 452                             (q).queue_address != 0 &&   \
 453                             (q).queue_percent > 0 &&    \
 454                             !(q).is_evicted)
 455 
 456 /**
 457  * struct queue
 458  *
 459  * @list: Queue linked list.
 460  *
 461  * @mqd: The queue MQD.
 462  *
 463  * @mqd_mem_obj: The MQD local gpu memory object.
 464  *
 465  * @gart_mqd_addr: The MQD gart mc address.
 466  *
 467  * @properties: The queue properties.
 468  *
 469  * @mec: Used only in no cp scheduling mode and identifies to micro engine id
 470  *       that the queue should be execute on.
 471  *
 472  * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe
 473  *        id.
 474  *
 475  * @queue: Used only in no cp scheduliong mode and identifies the queue's slot.
 476  *
 477  * @process: The kfd process that created this queue.
 478  *
 479  * @device: The kfd device that created this queue.
 480  *
 481  * @gws: Pointing to gws kgd_mem if this is a gws control queue; NULL
 482  * otherwise.
 483  *
 484  * This structure represents user mode compute queues.
 485  * It contains all the necessary data to handle such queues.
 486  *
 487  */
 488 
 489 struct queue {
 490         struct list_head list;
 491         void *mqd;
 492         struct kfd_mem_obj *mqd_mem_obj;
 493         uint64_t gart_mqd_addr;
 494         struct queue_properties properties;
 495 
 496         uint32_t mec;
 497         uint32_t pipe;
 498         uint32_t queue;
 499 
 500         unsigned int sdma_id;
 501         unsigned int doorbell_id;
 502 
 503         struct kfd_process      *process;
 504         struct kfd_dev          *device;
 505         void *gws;
 506 };
 507 
 508 /*
 509  * Please read the kfd_mqd_manager.h description.
 510  */
 511 enum KFD_MQD_TYPE {
 512         KFD_MQD_TYPE_COMPUTE = 0,       /* for no cp scheduling */
 513         KFD_MQD_TYPE_HIQ,               /* for hiq */
 514         KFD_MQD_TYPE_CP,                /* for cp queues and diq */
 515         KFD_MQD_TYPE_SDMA,              /* for sdma queues */
 516         KFD_MQD_TYPE_DIQ,               /* for diq */
 517         KFD_MQD_TYPE_MAX
 518 };
 519 
 520 enum KFD_PIPE_PRIORITY {
 521         KFD_PIPE_PRIORITY_CS_LOW = 0,
 522         KFD_PIPE_PRIORITY_CS_MEDIUM,
 523         KFD_PIPE_PRIORITY_CS_HIGH
 524 };
 525 
 526 struct scheduling_resources {
 527         unsigned int vmid_mask;
 528         enum kfd_queue_type type;
 529         uint64_t queue_mask;
 530         uint64_t gws_mask;
 531         uint32_t oac_mask;
 532         uint32_t gds_heap_base;
 533         uint32_t gds_heap_size;
 534 };
 535 
 536 struct process_queue_manager {
 537         /* data */
 538         struct kfd_process      *process;
 539         struct list_head        queues;
 540         unsigned long           *queue_slot_bitmap;
 541 };
 542 
 543 struct qcm_process_device {
 544         /* The Device Queue Manager that owns this data */
 545         struct device_queue_manager *dqm;
 546         struct process_queue_manager *pqm;
 547         /* Queues list */
 548         struct list_head queues_list;
 549         struct list_head priv_queue_list;
 550 
 551         unsigned int queue_count;
 552         unsigned int vmid;
 553         bool is_debug;
 554         unsigned int evicted; /* eviction counter, 0=active */
 555 
 556         /* This flag tells if we should reset all wavefronts on
 557          * process termination
 558          */
 559         bool reset_wavefronts;
 560 
 561         /*
 562          * All the memory management data should be here too
 563          */
 564         uint64_t gds_context_area;
 565         /* Contains page table flags such as AMDGPU_PTE_VALID since gfx9 */
 566         uint64_t page_table_base;
 567         uint32_t sh_mem_config;
 568         uint32_t sh_mem_bases;
 569         uint32_t sh_mem_ape1_base;
 570         uint32_t sh_mem_ape1_limit;
 571         uint32_t gds_size;
 572         uint32_t num_gws;
 573         uint32_t num_oac;
 574         uint32_t sh_hidden_private_base;
 575 
 576         /* CWSR memory */
 577         void *cwsr_kaddr;
 578         uint64_t cwsr_base;
 579         uint64_t tba_addr;
 580         uint64_t tma_addr;
 581 
 582         /* IB memory */
 583         uint64_t ib_base;
 584         void *ib_kaddr;
 585 
 586         /* doorbell resources per process per device */
 587         unsigned long *doorbell_bitmap;
 588 };
 589 
 590 /* KFD Memory Eviction */
 591 
 592 /* Approx. wait time before attempting to restore evicted BOs */
 593 #define PROCESS_RESTORE_TIME_MS 100
 594 /* Approx. back off time if restore fails due to lack of memory */
 595 #define PROCESS_BACK_OFF_TIME_MS 100
 596 /* Approx. time before evicting the process again */
 597 #define PROCESS_ACTIVE_TIME_MS 10
 598 
 599 /* 8 byte handle containing GPU ID in the most significant 4 bytes and
 600  * idr_handle in the least significant 4 bytes
 601  */
 602 #define MAKE_HANDLE(gpu_id, idr_handle) \
 603         (((uint64_t)(gpu_id) << 32) + idr_handle)
 604 #define GET_GPU_ID(handle) (handle >> 32)
 605 #define GET_IDR_HANDLE(handle) (handle & 0xFFFFFFFF)
 606 
 607 enum kfd_pdd_bound {
 608         PDD_UNBOUND = 0,
 609         PDD_BOUND,
 610         PDD_BOUND_SUSPENDED,
 611 };
 612 
 613 /* Data that is per-process-per device. */
 614 struct kfd_process_device {
 615         /*
 616          * List of all per-device data for a process.
 617          * Starts from kfd_process.per_device_data.
 618          */
 619         struct list_head per_device_list;
 620 
 621         /* The device that owns this data. */
 622         struct kfd_dev *dev;
 623 
 624         /* The process that owns this kfd_process_device. */
 625         struct kfd_process *process;
 626 
 627         /* per-process-per device QCM data structure */
 628         struct qcm_process_device qpd;
 629 
 630         /*Apertures*/
 631         uint64_t lds_base;
 632         uint64_t lds_limit;
 633         uint64_t gpuvm_base;
 634         uint64_t gpuvm_limit;
 635         uint64_t scratch_base;
 636         uint64_t scratch_limit;
 637 
 638         /* VM context for GPUVM allocations */
 639         struct file *drm_file;
 640         void *vm;
 641 
 642         /* GPUVM allocations storage */
 643         struct idr alloc_idr;
 644 
 645         /* Flag used to tell the pdd has dequeued from the dqm.
 646          * This is used to prevent dev->dqm->ops.process_termination() from
 647          * being called twice when it is already called in IOMMU callback
 648          * function.
 649          */
 650         bool already_dequeued;
 651 
 652         /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
 653         enum kfd_pdd_bound bound;
 654 };
 655 
 656 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
 657 
 658 /* Process data */
 659 struct kfd_process {
 660         /*
 661          * kfd_process are stored in an mm_struct*->kfd_process*
 662          * hash table (kfd_processes in kfd_process.c)
 663          */
 664         struct hlist_node kfd_processes;
 665 
 666         /*
 667          * Opaque pointer to mm_struct. We don't hold a reference to
 668          * it so it should never be dereferenced from here. This is
 669          * only used for looking up processes by their mm.
 670          */
 671         void *mm;
 672 
 673         struct kref ref;
 674         struct work_struct release_work;
 675 
 676         struct mutex mutex;
 677 
 678         /*
 679          * In any process, the thread that started main() is the lead
 680          * thread and outlives the rest.
 681          * It is here because amd_iommu_bind_pasid wants a task_struct.
 682          * It can also be used for safely getting a reference to the
 683          * mm_struct of the process.
 684          */
 685         struct task_struct *lead_thread;
 686 
 687         /* We want to receive a notification when the mm_struct is destroyed */
 688         struct mmu_notifier mmu_notifier;
 689 
 690         unsigned int pasid;
 691         unsigned int doorbell_index;
 692 
 693         /*
 694          * List of kfd_process_device structures,
 695          * one for each device the process is using.
 696          */
 697         struct list_head per_device_data;
 698 
 699         struct process_queue_manager pqm;
 700 
 701         /*Is the user space process 32 bit?*/
 702         bool is_32bit_user_mode;
 703 
 704         /* Event-related data */
 705         struct mutex event_mutex;
 706         /* Event ID allocator and lookup */
 707         struct idr event_idr;
 708         /* Event page */
 709         struct kfd_signal_page *signal_page;
 710         size_t signal_mapped_size;
 711         size_t signal_event_count;
 712         bool signal_event_limit_reached;
 713 
 714         /* Information used for memory eviction */
 715         void *kgd_process_info;
 716         /* Eviction fence that is attached to all the BOs of this process. The
 717          * fence will be triggered during eviction and new one will be created
 718          * during restore
 719          */
 720         struct dma_fence *ef;
 721 
 722         /* Work items for evicting and restoring BOs */
 723         struct delayed_work eviction_work;
 724         struct delayed_work restore_work;
 725         /* seqno of the last scheduled eviction */
 726         unsigned int last_eviction_seqno;
 727         /* Approx. the last timestamp (in jiffies) when the process was
 728          * restored after an eviction
 729          */
 730         unsigned long last_restore_timestamp;
 731 
 732         /* Kobj for our procfs */
 733         struct kobject *kobj;
 734         struct attribute attr_pasid;
 735 };
 736 
 737 #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
 738 extern DECLARE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
 739 extern struct srcu_struct kfd_processes_srcu;
 740 
 741 /**
 742  * Ioctl function type.
 743  *
 744  * \param filep pointer to file structure.
 745  * \param p amdkfd process pointer.
 746  * \param data pointer to arg that was copied from user.
 747  */
 748 typedef int amdkfd_ioctl_t(struct file *filep, struct kfd_process *p,
 749                                 void *data);
 750 
 751 struct amdkfd_ioctl_desc {
 752         unsigned int cmd;
 753         int flags;
 754         amdkfd_ioctl_t *func;
 755         unsigned int cmd_drv;
 756         const char *name;
 757 };
 758 bool kfd_dev_is_large_bar(struct kfd_dev *dev);
 759 
 760 int kfd_process_create_wq(void);
 761 void kfd_process_destroy_wq(void);
 762 struct kfd_process *kfd_create_process(struct file *filep);
 763 struct kfd_process *kfd_get_process(const struct task_struct *);
 764 struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
 765 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
 766 void kfd_unref_process(struct kfd_process *p);
 767 int kfd_process_evict_queues(struct kfd_process *p);
 768 int kfd_process_restore_queues(struct kfd_process *p);
 769 void kfd_suspend_all_processes(void);
 770 int kfd_resume_all_processes(void);
 771 
 772 int kfd_process_device_init_vm(struct kfd_process_device *pdd,
 773                                struct file *drm_file);
 774 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
 775                                                 struct kfd_process *p);
 776 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
 777                                                         struct kfd_process *p);
 778 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
 779                                                         struct kfd_process *p);
 780 
 781 int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
 782                           struct vm_area_struct *vma);
 783 
 784 /* KFD process API for creating and translating handles */
 785 int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
 786                                         void *mem);
 787 void *kfd_process_device_translate_handle(struct kfd_process_device *p,
 788                                         int handle);
 789 void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
 790                                         int handle);
 791 
 792 /* Process device data iterator */
 793 struct kfd_process_device *kfd_get_first_process_device_data(
 794                                                         struct kfd_process *p);
 795 struct kfd_process_device *kfd_get_next_process_device_data(
 796                                                 struct kfd_process *p,
 797                                                 struct kfd_process_device *pdd);
 798 bool kfd_has_process_device_data(struct kfd_process *p);
 799 
 800 /* PASIDs */
 801 int kfd_pasid_init(void);
 802 void kfd_pasid_exit(void);
 803 bool kfd_set_pasid_limit(unsigned int new_limit);
 804 unsigned int kfd_get_pasid_limit(void);
 805 unsigned int kfd_pasid_alloc(void);
 806 void kfd_pasid_free(unsigned int pasid);
 807 
 808 /* Doorbells */
 809 size_t kfd_doorbell_process_slice(struct kfd_dev *kfd);
 810 int kfd_doorbell_init(struct kfd_dev *kfd);
 811 void kfd_doorbell_fini(struct kfd_dev *kfd);
 812 int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
 813                       struct vm_area_struct *vma);
 814 void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
 815                                         unsigned int *doorbell_off);
 816 void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
 817 u32 read_kernel_doorbell(u32 __iomem *db);
 818 void write_kernel_doorbell(void __iomem *db, u32 value);
 819 void write_kernel_doorbell64(void __iomem *db, u64 value);
 820 unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
 821                                         struct kfd_process *process,
 822                                         unsigned int doorbell_id);
 823 phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
 824                                         struct kfd_process *process);
 825 int kfd_alloc_process_doorbells(struct kfd_process *process);
 826 void kfd_free_process_doorbells(struct kfd_process *process);
 827 
 828 /* GTT Sub-Allocator */
 829 
 830 int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
 831                         struct kfd_mem_obj **mem_obj);
 832 
 833 int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj);
 834 
 835 extern struct device *kfd_device;
 836 
 837 /* KFD's procfs */
 838 void kfd_procfs_init(void);
 839 void kfd_procfs_shutdown(void);
 840 
 841 /* Topology */
 842 int kfd_topology_init(void);
 843 void kfd_topology_shutdown(void);
 844 int kfd_topology_add_device(struct kfd_dev *gpu);
 845 int kfd_topology_remove_device(struct kfd_dev *gpu);
 846 struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
 847                                                 uint32_t proximity_domain);
 848 struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id);
 849 struct kfd_dev *kfd_device_by_id(uint32_t gpu_id);
 850 struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
 851 struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd);
 852 int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev);
 853 int kfd_numa_node_to_apic_id(int numa_node_id);
 854 
 855 /* Interrupts */
 856 int kfd_interrupt_init(struct kfd_dev *dev);
 857 void kfd_interrupt_exit(struct kfd_dev *dev);
 858 bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry);
 859 bool interrupt_is_wanted(struct kfd_dev *dev,
 860                                 const uint32_t *ih_ring_entry,
 861                                 uint32_t *patched_ihre, bool *flag);
 862 
 863 /* amdkfd Apertures */
 864 int kfd_init_apertures(struct kfd_process *process);
 865 
 866 /* Queue Context Management */
 867 int init_queue(struct queue **q, const struct queue_properties *properties);
 868 void uninit_queue(struct queue *q);
 869 void print_queue_properties(struct queue_properties *q);
 870 void print_queue(struct queue *q);
 871 
 872 struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
 873                 struct kfd_dev *dev);
 874 struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
 875                 struct kfd_dev *dev);
 876 struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
 877                 struct kfd_dev *dev);
 878 struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
 879                 struct kfd_dev *dev);
 880 struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
 881                 struct kfd_dev *dev);
 882 struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
 883                 struct kfd_dev *dev);
 884 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev);
 885 void device_queue_manager_uninit(struct device_queue_manager *dqm);
 886 struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
 887                                         enum kfd_queue_type type);
 888 void kernel_queue_uninit(struct kernel_queue *kq);
 889 int kfd_process_vm_fault(struct device_queue_manager *dqm, unsigned int pasid);
 890 
 891 /* Process Queue Manager */
 892 struct process_queue_node {
 893         struct queue *q;
 894         struct kernel_queue *kq;
 895         struct list_head process_queue_list;
 896 };
 897 
 898 void kfd_process_dequeue_from_device(struct kfd_process_device *pdd);
 899 void kfd_process_dequeue_from_all_devices(struct kfd_process *p);
 900 int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p);
 901 void pqm_uninit(struct process_queue_manager *pqm);
 902 int pqm_create_queue(struct process_queue_manager *pqm,
 903                             struct kfd_dev *dev,
 904                             struct file *f,
 905                             struct queue_properties *properties,
 906                             unsigned int *qid);
 907 int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
 908 int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
 909                         struct queue_properties *p);
 910 int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid,
 911                         struct queue_properties *p);
 912 int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
 913                         void *gws);
 914 struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
 915                                                 unsigned int qid);
 916 int pqm_get_wave_state(struct process_queue_manager *pqm,
 917                        unsigned int qid,
 918                        void __user *ctl_stack,
 919                        u32 *ctl_stack_used_size,
 920                        u32 *save_area_used_size);
 921 
 922 int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
 923                               unsigned int fence_value,
 924                               unsigned int timeout_ms);
 925 
 926 /* Packet Manager */
 927 
 928 #define KFD_FENCE_COMPLETED (100)
 929 #define KFD_FENCE_INIT   (10)
 930 
 931 struct packet_manager {
 932         struct device_queue_manager *dqm;
 933         struct kernel_queue *priv_queue;
 934         struct mutex lock;
 935         bool allocated;
 936         struct kfd_mem_obj *ib_buffer_obj;
 937         unsigned int ib_size_bytes;
 938         bool is_over_subscription;
 939 
 940         const struct packet_manager_funcs *pmf;
 941 };
 942 
 943 struct packet_manager_funcs {
 944         /* Support ASIC-specific packet formats for PM4 packets */
 945         int (*map_process)(struct packet_manager *pm, uint32_t *buffer,
 946                         struct qcm_process_device *qpd);
 947         int (*runlist)(struct packet_manager *pm, uint32_t *buffer,
 948                         uint64_t ib, size_t ib_size_in_dwords, bool chain);
 949         int (*set_resources)(struct packet_manager *pm, uint32_t *buffer,
 950                         struct scheduling_resources *res);
 951         int (*map_queues)(struct packet_manager *pm, uint32_t *buffer,
 952                         struct queue *q, bool is_static);
 953         int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer,
 954                         enum kfd_queue_type type,
 955                         enum kfd_unmap_queues_filter mode,
 956                         uint32_t filter_param, bool reset,
 957                         unsigned int sdma_engine);
 958         int (*query_status)(struct packet_manager *pm, uint32_t *buffer,
 959                         uint64_t fence_address, uint32_t fence_value);
 960         int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer);
 961 
 962         /* Packet sizes */
 963         int map_process_size;
 964         int runlist_size;
 965         int set_resources_size;
 966         int map_queues_size;
 967         int unmap_queues_size;
 968         int query_status_size;
 969         int release_mem_size;
 970 };
 971 
 972 extern const struct packet_manager_funcs kfd_vi_pm_funcs;
 973 extern const struct packet_manager_funcs kfd_v9_pm_funcs;
 974 extern const struct packet_manager_funcs kfd_v10_pm_funcs;
 975 
 976 int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
 977 void pm_uninit(struct packet_manager *pm);
 978 int pm_send_set_resources(struct packet_manager *pm,
 979                                 struct scheduling_resources *res);
 980 int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues);
 981 int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
 982                                 uint32_t fence_value);
 983 
 984 int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
 985                         enum kfd_unmap_queues_filter mode,
 986                         uint32_t filter_param, bool reset,
 987                         unsigned int sdma_engine);
 988 
 989 void pm_release_ib(struct packet_manager *pm);
 990 
 991 /* Following PM funcs can be shared among VI and AI */
 992 unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
 993 int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
 994                         struct scheduling_resources *res);
 995 
 996 
 997 uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
 998 
 999 /* Events */
1000 extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
1001 extern const struct kfd_event_interrupt_class event_interrupt_class_v9;
1002 
1003 extern const struct kfd_device_global_init_class device_global_init_class_cik;
1004 
1005 void kfd_event_init_process(struct kfd_process *p);
1006 void kfd_event_free_process(struct kfd_process *p);
1007 int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma);
1008 int kfd_wait_on_events(struct kfd_process *p,
1009                        uint32_t num_events, void __user *data,
1010                        bool all, uint32_t user_timeout_ms,
1011                        uint32_t *wait_result);
1012 void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
1013                                 uint32_t valid_id_bits);
1014 void kfd_signal_iommu_event(struct kfd_dev *dev,
1015                 unsigned int pasid, unsigned long address,
1016                 bool is_write_requested, bool is_execute_requested);
1017 void kfd_signal_hw_exception_event(unsigned int pasid);
1018 int kfd_set_event(struct kfd_process *p, uint32_t event_id);
1019 int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
1020 int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
1021                        uint64_t size);
1022 int kfd_event_create(struct file *devkfd, struct kfd_process *p,
1023                      uint32_t event_type, bool auto_reset, uint32_t node_id,
1024                      uint32_t *event_id, uint32_t *event_trigger_data,
1025                      uint64_t *event_page_offset, uint32_t *event_slot_index);
1026 int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
1027 
1028 void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid,
1029                                 struct kfd_vm_fault_info *info);
1030 
1031 void kfd_signal_reset_event(struct kfd_dev *dev);
1032 
1033 void kfd_flush_tlb(struct kfd_process_device *pdd);
1034 
1035 int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
1036 
1037 bool kfd_is_locked(void);
1038 
1039 /* Compute profile */
1040 void kfd_inc_compute_active(struct kfd_dev *dev);
1041 void kfd_dec_compute_active(struct kfd_dev *dev);
1042 
1043 /* Debugfs */
1044 #if defined(CONFIG_DEBUG_FS)
1045 
1046 void kfd_debugfs_init(void);
1047 void kfd_debugfs_fini(void);
1048 int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data);
1049 int pqm_debugfs_mqds(struct seq_file *m, void *data);
1050 int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data);
1051 int dqm_debugfs_hqds(struct seq_file *m, void *data);
1052 int kfd_debugfs_rls_by_device(struct seq_file *m, void *data);
1053 int pm_debugfs_runlist(struct seq_file *m, void *data);
1054 
1055 int kfd_debugfs_hang_hws(struct kfd_dev *dev);
1056 int pm_debugfs_hang_hws(struct packet_manager *pm);
1057 int dqm_debugfs_execute_queues(struct device_queue_manager *dqm);
1058 
1059 #else
1060 
1061 static inline void kfd_debugfs_init(void) {}
1062 static inline void kfd_debugfs_fini(void) {}
1063 
1064 #endif
1065 
1066 #endif

/* [<][>][^][v][top][bottom][index][help] */