root/drivers/gpu/drm/i915/gem/i915_gem_mman.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. __vma_matches
  2. i915_gem_mmap_ioctl
  3. tile_row_pages
  4. i915_gem_mmap_gtt_version
  5. compute_partial_view
  6. i915_gem_fault
  7. __i915_gem_object_release_mmap
  8. i915_gem_object_release_mmap
  9. create_mmap_offset
  10. i915_gem_mmap_gtt
  11. i915_gem_mmap_gtt_ioctl

   1 /*
   2  * SPDX-License-Identifier: MIT
   3  *
   4  * Copyright © 2014-2016 Intel Corporation
   5  */
   6 
   7 #include <linux/mman.h>
   8 #include <linux/sizes.h>
   9 
  10 #include "gt/intel_gt.h"
  11 
  12 #include "i915_drv.h"
  13 #include "i915_gem_gtt.h"
  14 #include "i915_gem_ioctls.h"
  15 #include "i915_gem_object.h"
  16 #include "i915_trace.h"
  17 #include "i915_vma.h"
  18 
  19 static inline bool
  20 __vma_matches(struct vm_area_struct *vma, struct file *filp,
  21               unsigned long addr, unsigned long size)
  22 {
  23         if (vma->vm_file != filp)
  24                 return false;
  25 
  26         return vma->vm_start == addr &&
  27                (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
  28 }
  29 
  30 /**
  31  * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
  32  *                       it is mapped to.
  33  * @dev: drm device
  34  * @data: ioctl data blob
  35  * @file: drm file
  36  *
  37  * While the mapping holds a reference on the contents of the object, it doesn't
  38  * imply a ref on the object itself.
  39  *
  40  * IMPORTANT:
  41  *
  42  * DRM driver writers who look a this function as an example for how to do GEM
  43  * mmap support, please don't implement mmap support like here. The modern way
  44  * to implement DRM mmap support is with an mmap offset ioctl (like
  45  * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
  46  * That way debug tooling like valgrind will understand what's going on, hiding
  47  * the mmap call in a driver private ioctl will break that. The i915 driver only
  48  * does cpu mmaps this way because we didn't know better.
  49  */
  50 int
  51 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
  52                     struct drm_file *file)
  53 {
  54         struct drm_i915_gem_mmap *args = data;
  55         struct drm_i915_gem_object *obj;
  56         unsigned long addr;
  57 
  58         if (args->flags & ~(I915_MMAP_WC))
  59                 return -EINVAL;
  60 
  61         if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
  62                 return -ENODEV;
  63 
  64         obj = i915_gem_object_lookup(file, args->handle);
  65         if (!obj)
  66                 return -ENOENT;
  67 
  68         /* prime objects have no backing filp to GEM mmap
  69          * pages from.
  70          */
  71         if (!obj->base.filp) {
  72                 addr = -ENXIO;
  73                 goto err;
  74         }
  75 
  76         if (range_overflows(args->offset, args->size, (u64)obj->base.size)) {
  77                 addr = -EINVAL;
  78                 goto err;
  79         }
  80 
  81         addr = vm_mmap(obj->base.filp, 0, args->size,
  82                        PROT_READ | PROT_WRITE, MAP_SHARED,
  83                        args->offset);
  84         if (IS_ERR_VALUE(addr))
  85                 goto err;
  86 
  87         if (args->flags & I915_MMAP_WC) {
  88                 struct mm_struct *mm = current->mm;
  89                 struct vm_area_struct *vma;
  90 
  91                 if (down_write_killable(&mm->mmap_sem)) {
  92                         addr = -EINTR;
  93                         goto err;
  94                 }
  95                 vma = find_vma(mm, addr);
  96                 if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
  97                         vma->vm_page_prot =
  98                                 pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
  99                 else
 100                         addr = -ENOMEM;
 101                 up_write(&mm->mmap_sem);
 102                 if (IS_ERR_VALUE(addr))
 103                         goto err;
 104         }
 105         i915_gem_object_put(obj);
 106 
 107         args->addr_ptr = (u64)addr;
 108         return 0;
 109 
 110 err:
 111         i915_gem_object_put(obj);
 112         return addr;
 113 }
 114 
 115 static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
 116 {
 117         return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
 118 }
 119 
 120 /**
 121  * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
 122  *
 123  * A history of the GTT mmap interface:
 124  *
 125  * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
 126  *     aligned and suitable for fencing, and still fit into the available
 127  *     mappable space left by the pinned display objects. A classic problem
 128  *     we called the page-fault-of-doom where we would ping-pong between
 129  *     two objects that could not fit inside the GTT and so the memcpy
 130  *     would page one object in at the expense of the other between every
 131  *     single byte.
 132  *
 133  * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
 134  *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
 135  *     object is too large for the available space (or simply too large
 136  *     for the mappable aperture!), a view is created instead and faulted
 137  *     into userspace. (This view is aligned and sized appropriately for
 138  *     fenced access.)
 139  *
 140  * 2 - Recognise WC as a separate cache domain so that we can flush the
 141  *     delayed writes via GTT before performing direct access via WC.
 142  *
 143  * 3 - Remove implicit set-domain(GTT) and synchronisation on initial
 144  *     pagefault; swapin remains transparent.
 145  *
 146  * Restrictions:
 147  *
 148  *  * snoopable objects cannot be accessed via the GTT. It can cause machine
 149  *    hangs on some architectures, corruption on others. An attempt to service
 150  *    a GTT page fault from a snoopable object will generate a SIGBUS.
 151  *
 152  *  * the object must be able to fit into RAM (physical memory, though no
 153  *    limited to the mappable aperture).
 154  *
 155  *
 156  * Caveats:
 157  *
 158  *  * a new GTT page fault will synchronize rendering from the GPU and flush
 159  *    all data to system memory. Subsequent access will not be synchronized.
 160  *
 161  *  * all mappings are revoked on runtime device suspend.
 162  *
 163  *  * there are only 8, 16 or 32 fence registers to share between all users
 164  *    (older machines require fence register for display and blitter access
 165  *    as well). Contention of the fence registers will cause the previous users
 166  *    to be unmapped and any new access will generate new page faults.
 167  *
 168  *  * running out of memory while servicing a fault may generate a SIGBUS,
 169  *    rather than the expected SIGSEGV.
 170  */
 171 int i915_gem_mmap_gtt_version(void)
 172 {
 173         return 3;
 174 }
 175 
 176 static inline struct i915_ggtt_view
 177 compute_partial_view(const struct drm_i915_gem_object *obj,
 178                      pgoff_t page_offset,
 179                      unsigned int chunk)
 180 {
 181         struct i915_ggtt_view view;
 182 
 183         if (i915_gem_object_is_tiled(obj))
 184                 chunk = roundup(chunk, tile_row_pages(obj));
 185 
 186         view.type = I915_GGTT_VIEW_PARTIAL;
 187         view.partial.offset = rounddown(page_offset, chunk);
 188         view.partial.size =
 189                 min_t(unsigned int, chunk,
 190                       (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
 191 
 192         /* If the partial covers the entire object, just create a normal VMA. */
 193         if (chunk >= obj->base.size >> PAGE_SHIFT)
 194                 view.type = I915_GGTT_VIEW_NORMAL;
 195 
 196         return view;
 197 }
 198 
 199 /**
 200  * i915_gem_fault - fault a page into the GTT
 201  * @vmf: fault info
 202  *
 203  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
 204  * from userspace.  The fault handler takes care of binding the object to
 205  * the GTT (if needed), allocating and programming a fence register (again,
 206  * only if needed based on whether the old reg is still valid or the object
 207  * is tiled) and inserting a new PTE into the faulting process.
 208  *
 209  * Note that the faulting process may involve evicting existing objects
 210  * from the GTT and/or fence registers to make room.  So performance may
 211  * suffer if the GTT working set is large or there are few fence registers
 212  * left.
 213  *
 214  * The current feature set supported by i915_gem_fault() and thus GTT mmaps
 215  * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
 216  */
 217 vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 218 {
 219 #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
 220         struct vm_area_struct *area = vmf->vma;
 221         struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
 222         struct drm_device *dev = obj->base.dev;
 223         struct drm_i915_private *i915 = to_i915(dev);
 224         struct intel_runtime_pm *rpm = &i915->runtime_pm;
 225         struct i915_ggtt *ggtt = &i915->ggtt;
 226         bool write = area->vm_flags & VM_WRITE;
 227         intel_wakeref_t wakeref;
 228         struct i915_vma *vma;
 229         pgoff_t page_offset;
 230         int srcu;
 231         int ret;
 232 
 233         /* Sanity check that we allow writing into this object */
 234         if (i915_gem_object_is_readonly(obj) && write)
 235                 return VM_FAULT_SIGBUS;
 236 
 237         /* We don't use vmf->pgoff since that has the fake offset */
 238         page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
 239 
 240         trace_i915_gem_object_fault(obj, page_offset, true, write);
 241 
 242         ret = i915_gem_object_pin_pages(obj);
 243         if (ret)
 244                 goto err;
 245 
 246         wakeref = intel_runtime_pm_get(rpm);
 247 
 248         ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu);
 249         if (ret)
 250                 goto err_rpm;
 251 
 252         ret = i915_mutex_lock_interruptible(dev);
 253         if (ret)
 254                 goto err_reset;
 255 
 256         /* Access to snoopable pages through the GTT is incoherent. */
 257         if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) {
 258                 ret = -EFAULT;
 259                 goto err_unlock;
 260         }
 261 
 262         /* Now pin it into the GTT as needed */
 263         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
 264                                        PIN_MAPPABLE |
 265                                        PIN_NONBLOCK /* NOWARN */ |
 266                                        PIN_NOEVICT);
 267         if (IS_ERR(vma)) {
 268                 /* Use a partial view if it is bigger than available space */
 269                 struct i915_ggtt_view view =
 270                         compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
 271                 unsigned int flags;
 272 
 273                 flags = PIN_MAPPABLE | PIN_NOSEARCH;
 274                 if (view.type == I915_GGTT_VIEW_NORMAL)
 275                         flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
 276 
 277                 /*
 278                  * Userspace is now writing through an untracked VMA, abandon
 279                  * all hope that the hardware is able to track future writes.
 280                  */
 281 
 282                 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
 283                 if (IS_ERR(vma)) {
 284                         flags = PIN_MAPPABLE;
 285                         view.type = I915_GGTT_VIEW_PARTIAL;
 286                         vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
 287                 }
 288         }
 289         if (IS_ERR(vma)) {
 290                 ret = PTR_ERR(vma);
 291                 goto err_unlock;
 292         }
 293 
 294         ret = i915_vma_pin_fence(vma);
 295         if (ret)
 296                 goto err_unpin;
 297 
 298         /* Finally, remap it using the new GTT offset */
 299         ret = remap_io_mapping(area,
 300                                area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
 301                                (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
 302                                min_t(u64, vma->size, area->vm_end - area->vm_start),
 303                                &ggtt->iomap);
 304         if (ret)
 305                 goto err_fence;
 306 
 307         assert_rpm_wakelock_held(rpm);
 308 
 309         /* Mark as being mmapped into userspace for later revocation */
 310         mutex_lock(&i915->ggtt.vm.mutex);
 311         if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
 312                 list_add(&obj->userfault_link, &i915->ggtt.userfault_list);
 313         mutex_unlock(&i915->ggtt.vm.mutex);
 314 
 315         if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
 316                 intel_wakeref_auto(&i915->ggtt.userfault_wakeref,
 317                                    msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
 318 
 319         if (write) {
 320                 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
 321                 i915_vma_set_ggtt_write(vma);
 322                 obj->mm.dirty = true;
 323         }
 324 
 325 err_fence:
 326         i915_vma_unpin_fence(vma);
 327 err_unpin:
 328         __i915_vma_unpin(vma);
 329 err_unlock:
 330         mutex_unlock(&dev->struct_mutex);
 331 err_reset:
 332         intel_gt_reset_unlock(ggtt->vm.gt, srcu);
 333 err_rpm:
 334         intel_runtime_pm_put(rpm, wakeref);
 335         i915_gem_object_unpin_pages(obj);
 336 err:
 337         switch (ret) {
 338         case -EIO:
 339                 /*
 340                  * We eat errors when the gpu is terminally wedged to avoid
 341                  * userspace unduly crashing (gl has no provisions for mmaps to
 342                  * fail). But any other -EIO isn't ours (e.g. swap in failure)
 343                  * and so needs to be reported.
 344                  */
 345                 if (!intel_gt_is_wedged(ggtt->vm.gt))
 346                         return VM_FAULT_SIGBUS;
 347                 /* else, fall through */
 348         case -EAGAIN:
 349                 /*
 350                  * EAGAIN means the gpu is hung and we'll wait for the error
 351                  * handler to reset everything when re-faulting in
 352                  * i915_mutex_lock_interruptible.
 353                  */
 354         case 0:
 355         case -ERESTARTSYS:
 356         case -EINTR:
 357         case -EBUSY:
 358                 /*
 359                  * EBUSY is ok: this just means that another thread
 360                  * already did the job.
 361                  */
 362                 return VM_FAULT_NOPAGE;
 363         case -ENOMEM:
 364                 return VM_FAULT_OOM;
 365         case -ENOSPC:
 366         case -EFAULT:
 367         case -ENODEV: /* bad object, how did you get here! */
 368                 return VM_FAULT_SIGBUS;
 369         default:
 370                 WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret);
 371                 return VM_FAULT_SIGBUS;
 372         }
 373 }
 374 
 375 void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
 376 {
 377         struct i915_vma *vma;
 378 
 379         GEM_BUG_ON(!obj->userfault_count);
 380 
 381         obj->userfault_count = 0;
 382         list_del(&obj->userfault_link);
 383         drm_vma_node_unmap(&obj->base.vma_node,
 384                            obj->base.dev->anon_inode->i_mapping);
 385 
 386         for_each_ggtt_vma(vma, obj)
 387                 i915_vma_unset_userfault(vma);
 388 }
 389 
 390 /**
 391  * i915_gem_object_release_mmap - remove physical page mappings
 392  * @obj: obj in question
 393  *
 394  * Preserve the reservation of the mmapping with the DRM core code, but
 395  * relinquish ownership of the pages back to the system.
 396  *
 397  * It is vital that we remove the page mapping if we have mapped a tiled
 398  * object through the GTT and then lose the fence register due to
 399  * resource pressure. Similarly if the object has been moved out of the
 400  * aperture, than pages mapped into userspace must be revoked. Removing the
 401  * mapping will then trigger a page fault on the next user access, allowing
 402  * fixup by i915_gem_fault().
 403  */
 404 void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
 405 {
 406         struct drm_i915_private *i915 = to_i915(obj->base.dev);
 407         intel_wakeref_t wakeref;
 408 
 409         /* Serialisation between user GTT access and our code depends upon
 410          * revoking the CPU's PTE whilst the mutex is held. The next user
 411          * pagefault then has to wait until we release the mutex.
 412          *
 413          * Note that RPM complicates somewhat by adding an additional
 414          * requirement that operations to the GGTT be made holding the RPM
 415          * wakeref.
 416          */
 417         wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 418         mutex_lock(&i915->ggtt.vm.mutex);
 419 
 420         if (!obj->userfault_count)
 421                 goto out;
 422 
 423         __i915_gem_object_release_mmap(obj);
 424 
 425         /* Ensure that the CPU's PTE are revoked and there are not outstanding
 426          * memory transactions from userspace before we return. The TLB
 427          * flushing implied above by changing the PTE above *should* be
 428          * sufficient, an extra barrier here just provides us with a bit
 429          * of paranoid documentation about our requirement to serialise
 430          * memory writes before touching registers / GSM.
 431          */
 432         wmb();
 433 
 434 out:
 435         mutex_unlock(&i915->ggtt.vm.mutex);
 436         intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 437 }
 438 
 439 static int create_mmap_offset(struct drm_i915_gem_object *obj)
 440 {
 441         struct drm_i915_private *i915 = to_i915(obj->base.dev);
 442         int err;
 443 
 444         err = drm_gem_create_mmap_offset(&obj->base);
 445         if (likely(!err))
 446                 return 0;
 447 
 448         /* Attempt to reap some mmap space from dead objects */
 449         do {
 450                 err = i915_gem_wait_for_idle(i915,
 451                                              I915_WAIT_INTERRUPTIBLE,
 452                                              MAX_SCHEDULE_TIMEOUT);
 453                 if (err)
 454                         break;
 455 
 456                 i915_gem_drain_freed_objects(i915);
 457                 err = drm_gem_create_mmap_offset(&obj->base);
 458                 if (!err)
 459                         break;
 460 
 461         } while (flush_delayed_work(&i915->gem.retire_work));
 462 
 463         return err;
 464 }
 465 
 466 int
 467 i915_gem_mmap_gtt(struct drm_file *file,
 468                   struct drm_device *dev,
 469                   u32 handle,
 470                   u64 *offset)
 471 {
 472         struct drm_i915_gem_object *obj;
 473         int ret;
 474 
 475         obj = i915_gem_object_lookup(file, handle);
 476         if (!obj)
 477                 return -ENOENT;
 478 
 479         if (i915_gem_object_never_bind_ggtt(obj)) {
 480                 ret = -ENODEV;
 481                 goto out;
 482         }
 483 
 484         ret = create_mmap_offset(obj);
 485         if (ret == 0)
 486                 *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
 487 
 488 out:
 489         i915_gem_object_put(obj);
 490         return ret;
 491 }
 492 
 493 /**
 494  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
 495  * @dev: DRM device
 496  * @data: GTT mapping ioctl data
 497  * @file: GEM object info
 498  *
 499  * Simply returns the fake offset to userspace so it can mmap it.
 500  * The mmap call will end up in drm_gem_mmap(), which will set things
 501  * up so we can get faults in the handler above.
 502  *
 503  * The fault handler will take care of binding the object into the GTT
 504  * (since it may have been evicted to make room for something), allocating
 505  * a fence register, and mapping the appropriate aperture address into
 506  * userspace.
 507  */
 508 int
 509 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
 510                         struct drm_file *file)
 511 {
 512         struct drm_i915_gem_mmap_gtt *args = data;
 513 
 514         return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
 515 }
 516 
 517 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 518 #include "selftests/i915_gem_mman.c"
 519 #endif

/* [<][>][^][v][top][bottom][index][help] */