root/drivers/dax/device.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. check_vma
  2. dax_pgoff_to_phys
  3. __dev_dax_pte_fault
  4. __dev_dax_pmd_fault
  5. __dev_dax_pud_fault
  6. __dev_dax_pud_fault
  7. dev_dax_huge_fault
  8. dev_dax_fault
  9. dev_dax_split
  10. dev_dax_pagesize
  11. dax_mmap
  12. dax_get_unmapped_area
  13. dax_open
  14. dax_release
  15. dev_dax_cdev_del
  16. dev_dax_kill
  17. dev_dax_probe
  18. dev_dax_remove
  19. dax_init
  20. dax_exit

   1 // SPDX-License-Identifier: GPL-2.0
   2 /* Copyright(c) 2016-2018 Intel Corporation. All rights reserved. */
   3 #include <linux/memremap.h>
   4 #include <linux/pagemap.h>
   5 #include <linux/module.h>
   6 #include <linux/device.h>
   7 #include <linux/pfn_t.h>
   8 #include <linux/cdev.h>
   9 #include <linux/slab.h>
  10 #include <linux/dax.h>
  11 #include <linux/fs.h>
  12 #include <linux/mm.h>
  13 #include <linux/mman.h>
  14 #include "dax-private.h"
  15 #include "bus.h"
  16 
  17 static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
  18                 const char *func)
  19 {
  20         struct dax_region *dax_region = dev_dax->region;
  21         struct device *dev = &dev_dax->dev;
  22         unsigned long mask;
  23 
  24         if (!dax_alive(dev_dax->dax_dev))
  25                 return -ENXIO;
  26 
  27         /* prevent private mappings from being established */
  28         if ((vma->vm_flags & VM_MAYSHARE) != VM_MAYSHARE) {
  29                 dev_info_ratelimited(dev,
  30                                 "%s: %s: fail, attempted private mapping\n",
  31                                 current->comm, func);
  32                 return -EINVAL;
  33         }
  34 
  35         mask = dax_region->align - 1;
  36         if (vma->vm_start & mask || vma->vm_end & mask) {
  37                 dev_info_ratelimited(dev,
  38                                 "%s: %s: fail, unaligned vma (%#lx - %#lx, %#lx)\n",
  39                                 current->comm, func, vma->vm_start, vma->vm_end,
  40                                 mask);
  41                 return -EINVAL;
  42         }
  43 
  44         if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) == PFN_DEV
  45                         && (vma->vm_flags & VM_DONTCOPY) == 0) {
  46                 dev_info_ratelimited(dev,
  47                                 "%s: %s: fail, dax range requires MADV_DONTFORK\n",
  48                                 current->comm, func);
  49                 return -EINVAL;
  50         }
  51 
  52         if (!vma_is_dax(vma)) {
  53                 dev_info_ratelimited(dev,
  54                                 "%s: %s: fail, vma is not DAX capable\n",
  55                                 current->comm, func);
  56                 return -EINVAL;
  57         }
  58 
  59         return 0;
  60 }
  61 
  62 /* see "strong" declaration in tools/testing/nvdimm/dax-dev.c */
  63 __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
  64                 unsigned long size)
  65 {
  66         struct resource *res = &dev_dax->region->res;
  67         phys_addr_t phys;
  68 
  69         phys = pgoff * PAGE_SIZE + res->start;
  70         if (phys >= res->start && phys <= res->end) {
  71                 if (phys + size - 1 <= res->end)
  72                         return phys;
  73         }
  74 
  75         return -1;
  76 }
  77 
  78 static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
  79                                 struct vm_fault *vmf, pfn_t *pfn)
  80 {
  81         struct device *dev = &dev_dax->dev;
  82         struct dax_region *dax_region;
  83         phys_addr_t phys;
  84         unsigned int fault_size = PAGE_SIZE;
  85 
  86         if (check_vma(dev_dax, vmf->vma, __func__))
  87                 return VM_FAULT_SIGBUS;
  88 
  89         dax_region = dev_dax->region;
  90         if (dax_region->align > PAGE_SIZE) {
  91                 dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
  92                         dax_region->align, fault_size);
  93                 return VM_FAULT_SIGBUS;
  94         }
  95 
  96         if (fault_size != dax_region->align)
  97                 return VM_FAULT_SIGBUS;
  98 
  99         phys = dax_pgoff_to_phys(dev_dax, vmf->pgoff, PAGE_SIZE);
 100         if (phys == -1) {
 101                 dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", vmf->pgoff);
 102                 return VM_FAULT_SIGBUS;
 103         }
 104 
 105         *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
 106 
 107         return vmf_insert_mixed(vmf->vma, vmf->address, *pfn);
 108 }
 109 
 110 static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
 111                                 struct vm_fault *vmf, pfn_t *pfn)
 112 {
 113         unsigned long pmd_addr = vmf->address & PMD_MASK;
 114         struct device *dev = &dev_dax->dev;
 115         struct dax_region *dax_region;
 116         phys_addr_t phys;
 117         pgoff_t pgoff;
 118         unsigned int fault_size = PMD_SIZE;
 119 
 120         if (check_vma(dev_dax, vmf->vma, __func__))
 121                 return VM_FAULT_SIGBUS;
 122 
 123         dax_region = dev_dax->region;
 124         if (dax_region->align > PMD_SIZE) {
 125                 dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
 126                         dax_region->align, fault_size);
 127                 return VM_FAULT_SIGBUS;
 128         }
 129 
 130         /* dax pmd mappings require pfn_t_devmap() */
 131         if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) {
 132                 dev_dbg(dev, "region lacks devmap flags\n");
 133                 return VM_FAULT_SIGBUS;
 134         }
 135 
 136         if (fault_size < dax_region->align)
 137                 return VM_FAULT_SIGBUS;
 138         else if (fault_size > dax_region->align)
 139                 return VM_FAULT_FALLBACK;
 140 
 141         /* if we are outside of the VMA */
 142         if (pmd_addr < vmf->vma->vm_start ||
 143                         (pmd_addr + PMD_SIZE) > vmf->vma->vm_end)
 144                 return VM_FAULT_SIGBUS;
 145 
 146         pgoff = linear_page_index(vmf->vma, pmd_addr);
 147         phys = dax_pgoff_to_phys(dev_dax, pgoff, PMD_SIZE);
 148         if (phys == -1) {
 149                 dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", pgoff);
 150                 return VM_FAULT_SIGBUS;
 151         }
 152 
 153         *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
 154 
 155         return vmf_insert_pfn_pmd(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
 156 }
 157 
 158 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
 159 static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
 160                                 struct vm_fault *vmf, pfn_t *pfn)
 161 {
 162         unsigned long pud_addr = vmf->address & PUD_MASK;
 163         struct device *dev = &dev_dax->dev;
 164         struct dax_region *dax_region;
 165         phys_addr_t phys;
 166         pgoff_t pgoff;
 167         unsigned int fault_size = PUD_SIZE;
 168 
 169 
 170         if (check_vma(dev_dax, vmf->vma, __func__))
 171                 return VM_FAULT_SIGBUS;
 172 
 173         dax_region = dev_dax->region;
 174         if (dax_region->align > PUD_SIZE) {
 175                 dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
 176                         dax_region->align, fault_size);
 177                 return VM_FAULT_SIGBUS;
 178         }
 179 
 180         /* dax pud mappings require pfn_t_devmap() */
 181         if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) {
 182                 dev_dbg(dev, "region lacks devmap flags\n");
 183                 return VM_FAULT_SIGBUS;
 184         }
 185 
 186         if (fault_size < dax_region->align)
 187                 return VM_FAULT_SIGBUS;
 188         else if (fault_size > dax_region->align)
 189                 return VM_FAULT_FALLBACK;
 190 
 191         /* if we are outside of the VMA */
 192         if (pud_addr < vmf->vma->vm_start ||
 193                         (pud_addr + PUD_SIZE) > vmf->vma->vm_end)
 194                 return VM_FAULT_SIGBUS;
 195 
 196         pgoff = linear_page_index(vmf->vma, pud_addr);
 197         phys = dax_pgoff_to_phys(dev_dax, pgoff, PUD_SIZE);
 198         if (phys == -1) {
 199                 dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", pgoff);
 200                 return VM_FAULT_SIGBUS;
 201         }
 202 
 203         *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
 204 
 205         return vmf_insert_pfn_pud(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
 206 }
 207 #else
 208 static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
 209                                 struct vm_fault *vmf, pfn_t *pfn)
 210 {
 211         return VM_FAULT_FALLBACK;
 212 }
 213 #endif /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
 214 
 215 static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
 216                 enum page_entry_size pe_size)
 217 {
 218         struct file *filp = vmf->vma->vm_file;
 219         unsigned long fault_size;
 220         vm_fault_t rc = VM_FAULT_SIGBUS;
 221         int id;
 222         pfn_t pfn;
 223         struct dev_dax *dev_dax = filp->private_data;
 224 
 225         dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm,
 226                         (vmf->flags & FAULT_FLAG_WRITE) ? "write" : "read",
 227                         vmf->vma->vm_start, vmf->vma->vm_end, pe_size);
 228 
 229         id = dax_read_lock();
 230         switch (pe_size) {
 231         case PE_SIZE_PTE:
 232                 fault_size = PAGE_SIZE;
 233                 rc = __dev_dax_pte_fault(dev_dax, vmf, &pfn);
 234                 break;
 235         case PE_SIZE_PMD:
 236                 fault_size = PMD_SIZE;
 237                 rc = __dev_dax_pmd_fault(dev_dax, vmf, &pfn);
 238                 break;
 239         case PE_SIZE_PUD:
 240                 fault_size = PUD_SIZE;
 241                 rc = __dev_dax_pud_fault(dev_dax, vmf, &pfn);
 242                 break;
 243         default:
 244                 rc = VM_FAULT_SIGBUS;
 245         }
 246 
 247         if (rc == VM_FAULT_NOPAGE) {
 248                 unsigned long i;
 249                 pgoff_t pgoff;
 250 
 251                 /*
 252                  * In the device-dax case the only possibility for a
 253                  * VM_FAULT_NOPAGE result is when device-dax capacity is
 254                  * mapped. No need to consider the zero page, or racing
 255                  * conflicting mappings.
 256                  */
 257                 pgoff = linear_page_index(vmf->vma, vmf->address
 258                                 & ~(fault_size - 1));
 259                 for (i = 0; i < fault_size / PAGE_SIZE; i++) {
 260                         struct page *page;
 261 
 262                         page = pfn_to_page(pfn_t_to_pfn(pfn) + i);
 263                         if (page->mapping)
 264                                 continue;
 265                         page->mapping = filp->f_mapping;
 266                         page->index = pgoff + i;
 267                 }
 268         }
 269         dax_read_unlock(id);
 270 
 271         return rc;
 272 }
 273 
 274 static vm_fault_t dev_dax_fault(struct vm_fault *vmf)
 275 {
 276         return dev_dax_huge_fault(vmf, PE_SIZE_PTE);
 277 }
 278 
 279 static int dev_dax_split(struct vm_area_struct *vma, unsigned long addr)
 280 {
 281         struct file *filp = vma->vm_file;
 282         struct dev_dax *dev_dax = filp->private_data;
 283         struct dax_region *dax_region = dev_dax->region;
 284 
 285         if (!IS_ALIGNED(addr, dax_region->align))
 286                 return -EINVAL;
 287         return 0;
 288 }
 289 
 290 static unsigned long dev_dax_pagesize(struct vm_area_struct *vma)
 291 {
 292         struct file *filp = vma->vm_file;
 293         struct dev_dax *dev_dax = filp->private_data;
 294         struct dax_region *dax_region = dev_dax->region;
 295 
 296         return dax_region->align;
 297 }
 298 
 299 static const struct vm_operations_struct dax_vm_ops = {
 300         .fault = dev_dax_fault,
 301         .huge_fault = dev_dax_huge_fault,
 302         .split = dev_dax_split,
 303         .pagesize = dev_dax_pagesize,
 304 };
 305 
 306 static int dax_mmap(struct file *filp, struct vm_area_struct *vma)
 307 {
 308         struct dev_dax *dev_dax = filp->private_data;
 309         int rc, id;
 310 
 311         dev_dbg(&dev_dax->dev, "trace\n");
 312 
 313         /*
 314          * We lock to check dax_dev liveness and will re-check at
 315          * fault time.
 316          */
 317         id = dax_read_lock();
 318         rc = check_vma(dev_dax, vma, __func__);
 319         dax_read_unlock(id);
 320         if (rc)
 321                 return rc;
 322 
 323         vma->vm_ops = &dax_vm_ops;
 324         vma->vm_flags |= VM_HUGEPAGE;
 325         return 0;
 326 }
 327 
 328 /* return an unmapped area aligned to the dax region specified alignment */
 329 static unsigned long dax_get_unmapped_area(struct file *filp,
 330                 unsigned long addr, unsigned long len, unsigned long pgoff,
 331                 unsigned long flags)
 332 {
 333         unsigned long off, off_end, off_align, len_align, addr_align, align;
 334         struct dev_dax *dev_dax = filp ? filp->private_data : NULL;
 335         struct dax_region *dax_region;
 336 
 337         if (!dev_dax || addr)
 338                 goto out;
 339 
 340         dax_region = dev_dax->region;
 341         align = dax_region->align;
 342         off = pgoff << PAGE_SHIFT;
 343         off_end = off + len;
 344         off_align = round_up(off, align);
 345 
 346         if ((off_end <= off_align) || ((off_end - off_align) < align))
 347                 goto out;
 348 
 349         len_align = len + align;
 350         if ((off + len_align) < off)
 351                 goto out;
 352 
 353         addr_align = current->mm->get_unmapped_area(filp, addr, len_align,
 354                         pgoff, flags);
 355         if (!IS_ERR_VALUE(addr_align)) {
 356                 addr_align += (off - addr_align) & (align - 1);
 357                 return addr_align;
 358         }
 359  out:
 360         return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
 361 }
 362 
 363 static const struct address_space_operations dev_dax_aops = {
 364         .set_page_dirty         = noop_set_page_dirty,
 365         .invalidatepage         = noop_invalidatepage,
 366 };
 367 
 368 static int dax_open(struct inode *inode, struct file *filp)
 369 {
 370         struct dax_device *dax_dev = inode_dax(inode);
 371         struct inode *__dax_inode = dax_inode(dax_dev);
 372         struct dev_dax *dev_dax = dax_get_private(dax_dev);
 373 
 374         dev_dbg(&dev_dax->dev, "trace\n");
 375         inode->i_mapping = __dax_inode->i_mapping;
 376         inode->i_mapping->host = __dax_inode;
 377         inode->i_mapping->a_ops = &dev_dax_aops;
 378         filp->f_mapping = inode->i_mapping;
 379         filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
 380         filp->private_data = dev_dax;
 381         inode->i_flags = S_DAX;
 382 
 383         return 0;
 384 }
 385 
 386 static int dax_release(struct inode *inode, struct file *filp)
 387 {
 388         struct dev_dax *dev_dax = filp->private_data;
 389 
 390         dev_dbg(&dev_dax->dev, "trace\n");
 391         return 0;
 392 }
 393 
 394 static const struct file_operations dax_fops = {
 395         .llseek = noop_llseek,
 396         .owner = THIS_MODULE,
 397         .open = dax_open,
 398         .release = dax_release,
 399         .get_unmapped_area = dax_get_unmapped_area,
 400         .mmap = dax_mmap,
 401         .mmap_supported_flags = MAP_SYNC,
 402 };
 403 
 404 static void dev_dax_cdev_del(void *cdev)
 405 {
 406         cdev_del(cdev);
 407 }
 408 
 409 static void dev_dax_kill(void *dev_dax)
 410 {
 411         kill_dev_dax(dev_dax);
 412 }
 413 
 414 int dev_dax_probe(struct device *dev)
 415 {
 416         struct dev_dax *dev_dax = to_dev_dax(dev);
 417         struct dax_device *dax_dev = dev_dax->dax_dev;
 418         struct resource *res = &dev_dax->region->res;
 419         struct inode *inode;
 420         struct cdev *cdev;
 421         void *addr;
 422         int rc;
 423 
 424         /* 1:1 map region resource range to device-dax instance range */
 425         if (!devm_request_mem_region(dev, res->start, resource_size(res),
 426                                 dev_name(dev))) {
 427                 dev_warn(dev, "could not reserve region %pR\n", res);
 428                 return -EBUSY;
 429         }
 430 
 431         dev_dax->pgmap.type = MEMORY_DEVICE_DEVDAX;
 432         addr = devm_memremap_pages(dev, &dev_dax->pgmap);
 433         if (IS_ERR(addr))
 434                 return PTR_ERR(addr);
 435 
 436         inode = dax_inode(dax_dev);
 437         cdev = inode->i_cdev;
 438         cdev_init(cdev, &dax_fops);
 439         if (dev->class) {
 440                 /* for the CONFIG_DEV_DAX_PMEM_COMPAT case */
 441                 cdev->owner = dev->parent->driver->owner;
 442         } else
 443                 cdev->owner = dev->driver->owner;
 444         cdev_set_parent(cdev, &dev->kobj);
 445         rc = cdev_add(cdev, dev->devt, 1);
 446         if (rc)
 447                 return rc;
 448 
 449         rc = devm_add_action_or_reset(dev, dev_dax_cdev_del, cdev);
 450         if (rc)
 451                 return rc;
 452 
 453         run_dax(dax_dev);
 454         return devm_add_action_or_reset(dev, dev_dax_kill, dev_dax);
 455 }
 456 EXPORT_SYMBOL_GPL(dev_dax_probe);
 457 
 458 static int dev_dax_remove(struct device *dev)
 459 {
 460         /* all probe actions are unwound by devm */
 461         return 0;
 462 }
 463 
 464 static struct dax_device_driver device_dax_driver = {
 465         .drv = {
 466                 .probe = dev_dax_probe,
 467                 .remove = dev_dax_remove,
 468         },
 469         .match_always = 1,
 470 };
 471 
 472 static int __init dax_init(void)
 473 {
 474         return dax_driver_register(&device_dax_driver);
 475 }
 476 
 477 static void __exit dax_exit(void)
 478 {
 479         dax_driver_unregister(&device_dax_driver);
 480 }
 481 
 482 MODULE_AUTHOR("Intel Corporation");
 483 MODULE_LICENSE("GPL v2");
 484 module_init(dax_init);
 485 module_exit(dax_exit);
 486 MODULE_ALIAS_DAX_DEVICE(0);

/* [<][>][^][v][top][bottom][index][help] */