root/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. kfd_doorbell_process_slice
  2. kfd_doorbell_init
  3. kfd_doorbell_fini
  4. kfd_doorbell_mmap
  5. kfd_get_kernel_doorbell
  6. kfd_release_kernel_doorbell
  7. write_kernel_doorbell
  8. write_kernel_doorbell64
  9. kfd_doorbell_id_to_offset
  10. kfd_get_number_elems
  11. kfd_get_process_doorbells
  12. kfd_alloc_process_doorbells
  13. kfd_free_process_doorbells

   1 /*
   2  * Copyright 2014 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  */
  22 #include "kfd_priv.h"
  23 #include <linux/mm.h>
  24 #include <linux/mman.h>
  25 #include <linux/slab.h>
  26 #include <linux/io.h>
  27 #include <linux/idr.h>
  28 
  29 /*
  30  * This extension supports a kernel level doorbells management for the
  31  * kernel queues using the first doorbell page reserved for the kernel.
  32  */
  33 
  34 static DEFINE_IDA(doorbell_ida);
  35 static unsigned int max_doorbell_slices;
  36 
  37 /*
  38  * Each device exposes a doorbell aperture, a PCI MMIO aperture that
  39  * receives 32-bit writes that are passed to queues as wptr values.
  40  * The doorbells are intended to be written by applications as part
  41  * of queueing work on user-mode queues.
  42  * We assign doorbells to applications in PAGE_SIZE-sized and aligned chunks.
  43  * We map the doorbell address space into user-mode when a process creates
  44  * its first queue on each device.
  45  * Although the mapping is done by KFD, it is equivalent to an mmap of
  46  * the /dev/kfd with the particular device encoded in the mmap offset.
  47  * There will be other uses for mmap of /dev/kfd, so only a range of
  48  * offsets (KFD_MMAP_DOORBELL_START-END) is used for doorbells.
  49  */
  50 
  51 /* # of doorbell bytes allocated for each process. */
  52 size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
  53 {
  54         return roundup(kfd->device_info->doorbell_size *
  55                         KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
  56                         PAGE_SIZE);
  57 }
  58 
  59 /* Doorbell calculations for device init. */
  60 int kfd_doorbell_init(struct kfd_dev *kfd)
  61 {
  62         size_t doorbell_start_offset;
  63         size_t doorbell_aperture_size;
  64         size_t doorbell_process_limit;
  65 
  66         /*
  67          * We start with calculations in bytes because the input data might
  68          * only be byte-aligned.
  69          * Only after we have done the rounding can we assume any alignment.
  70          */
  71 
  72         doorbell_start_offset =
  73                         roundup(kfd->shared_resources.doorbell_start_offset,
  74                                         kfd_doorbell_process_slice(kfd));
  75 
  76         doorbell_aperture_size =
  77                         rounddown(kfd->shared_resources.doorbell_aperture_size,
  78                                         kfd_doorbell_process_slice(kfd));
  79 
  80         if (doorbell_aperture_size > doorbell_start_offset)
  81                 doorbell_process_limit =
  82                         (doorbell_aperture_size - doorbell_start_offset) /
  83                                                 kfd_doorbell_process_slice(kfd);
  84         else
  85                 return -ENOSPC;
  86 
  87         if (!max_doorbell_slices ||
  88             doorbell_process_limit < max_doorbell_slices)
  89                 max_doorbell_slices = doorbell_process_limit;
  90 
  91         kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
  92                                 doorbell_start_offset;
  93 
  94         kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32);
  95 
  96         kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
  97                                            kfd_doorbell_process_slice(kfd));
  98 
  99         if (!kfd->doorbell_kernel_ptr)
 100                 return -ENOMEM;
 101 
 102         pr_debug("Doorbell initialization:\n");
 103         pr_debug("doorbell base           == 0x%08lX\n",
 104                         (uintptr_t)kfd->doorbell_base);
 105 
 106         pr_debug("doorbell_id_offset      == 0x%08lX\n",
 107                         kfd->doorbell_id_offset);
 108 
 109         pr_debug("doorbell_process_limit  == 0x%08lX\n",
 110                         doorbell_process_limit);
 111 
 112         pr_debug("doorbell_kernel_offset  == 0x%08lX\n",
 113                         (uintptr_t)kfd->doorbell_base);
 114 
 115         pr_debug("doorbell aperture size  == 0x%08lX\n",
 116                         kfd->shared_resources.doorbell_aperture_size);
 117 
 118         pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
 119 
 120         return 0;
 121 }
 122 
 123 void kfd_doorbell_fini(struct kfd_dev *kfd)
 124 {
 125         if (kfd->doorbell_kernel_ptr)
 126                 iounmap(kfd->doorbell_kernel_ptr);
 127 }
 128 
 129 int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
 130                       struct vm_area_struct *vma)
 131 {
 132         phys_addr_t address;
 133 
 134         /*
 135          * For simplicitly we only allow mapping of the entire doorbell
 136          * allocation of a single device & process.
 137          */
 138         if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev))
 139                 return -EINVAL;
 140 
 141         /* Calculate physical address of doorbell */
 142         address = kfd_get_process_doorbells(dev, process);
 143 
 144         vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
 145                                 VM_DONTDUMP | VM_PFNMAP;
 146 
 147         vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 148 
 149         pr_debug("Mapping doorbell page\n"
 150                  "     target user address == 0x%08llX\n"
 151                  "     physical address    == 0x%08llX\n"
 152                  "     vm_flags            == 0x%04lX\n"
 153                  "     size                == 0x%04lX\n",
 154                  (unsigned long long) vma->vm_start, address, vma->vm_flags,
 155                  kfd_doorbell_process_slice(dev));
 156 
 157 
 158         return io_remap_pfn_range(vma,
 159                                 vma->vm_start,
 160                                 address >> PAGE_SHIFT,
 161                                 kfd_doorbell_process_slice(dev),
 162                                 vma->vm_page_prot);
 163 }
 164 
 165 
 166 /* get kernel iomem pointer for a doorbell */
 167 void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
 168                                         unsigned int *doorbell_off)
 169 {
 170         u32 inx;
 171 
 172         mutex_lock(&kfd->doorbell_mutex);
 173         inx = find_first_zero_bit(kfd->doorbell_available_index,
 174                                         KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
 175 
 176         __set_bit(inx, kfd->doorbell_available_index);
 177         mutex_unlock(&kfd->doorbell_mutex);
 178 
 179         if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
 180                 return NULL;
 181 
 182         inx *= kfd->device_info->doorbell_size / sizeof(u32);
 183 
 184         /*
 185          * Calculating the kernel doorbell offset using the first
 186          * doorbell page.
 187          */
 188         *doorbell_off = kfd->doorbell_id_offset + inx;
 189 
 190         pr_debug("Get kernel queue doorbell\n"
 191                         "     doorbell offset   == 0x%08X\n"
 192                         "     doorbell index    == 0x%x\n",
 193                 *doorbell_off, inx);
 194 
 195         return kfd->doorbell_kernel_ptr + inx;
 196 }
 197 
 198 void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
 199 {
 200         unsigned int inx;
 201 
 202         inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr)
 203                 * sizeof(u32) / kfd->device_info->doorbell_size;
 204 
 205         mutex_lock(&kfd->doorbell_mutex);
 206         __clear_bit(inx, kfd->doorbell_available_index);
 207         mutex_unlock(&kfd->doorbell_mutex);
 208 }
 209 
 210 void write_kernel_doorbell(void __iomem *db, u32 value)
 211 {
 212         if (db) {
 213                 writel(value, db);
 214                 pr_debug("Writing %d to doorbell address %p\n", value, db);
 215         }
 216 }
 217 
 218 void write_kernel_doorbell64(void __iomem *db, u64 value)
 219 {
 220         if (db) {
 221                 WARN(((unsigned long)db & 7) != 0,
 222                      "Unaligned 64-bit doorbell");
 223                 writeq(value, (u64 __iomem *)db);
 224                 pr_debug("writing %llu to doorbell address %p\n", value, db);
 225         }
 226 }
 227 
 228 unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
 229                                         struct kfd_process *process,
 230                                         unsigned int doorbell_id)
 231 {
 232         /*
 233          * doorbell_id_offset accounts for doorbells taken by KGD.
 234          * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to
 235          * the process's doorbells. The offset returned is in dword
 236          * units regardless of the ASIC-dependent doorbell size.
 237          */
 238         return kfd->doorbell_id_offset +
 239                 process->doorbell_index
 240                 * kfd_doorbell_process_slice(kfd) / sizeof(u32) +
 241                 doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
 242 }
 243 
 244 uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
 245 {
 246         uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size -
 247                                 kfd->shared_resources.doorbell_start_offset) /
 248                                         kfd_doorbell_process_slice(kfd) + 1;
 249 
 250         return num_of_elems;
 251 
 252 }
 253 
 254 phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
 255                                         struct kfd_process *process)
 256 {
 257         return dev->doorbell_base +
 258                 process->doorbell_index * kfd_doorbell_process_slice(dev);
 259 }
 260 
 261 int kfd_alloc_process_doorbells(struct kfd_process *process)
 262 {
 263         int r = ida_simple_get(&doorbell_ida, 1, max_doorbell_slices,
 264                                 GFP_KERNEL);
 265         if (r > 0)
 266                 process->doorbell_index = r;
 267 
 268         return r;
 269 }
 270 
 271 void kfd_free_process_doorbells(struct kfd_process *process)
 272 {
 273         if (process->doorbell_index)
 274                 ida_simple_remove(&doorbell_ida, process->doorbell_index);
 275 }

/* [<][>][^][v][top][bottom][index][help] */