1/* 2 * VFIO core 3 * 4 * Copyright (C) 2012 Red Hat, Inc. All rights reserved. 5 * Author: Alex Williamson <alex.williamson@redhat.com> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 * 11 * Derived from original vfio: 12 * Copyright 2010 Cisco Systems, Inc. All rights reserved. 13 * Author: Tom Lyon, pugs@cisco.com 14 */ 15 16#include <linux/cdev.h> 17#include <linux/compat.h> 18#include <linux/device.h> 19#include <linux/file.h> 20#include <linux/anon_inodes.h> 21#include <linux/fs.h> 22#include <linux/idr.h> 23#include <linux/iommu.h> 24#include <linux/list.h> 25#include <linux/miscdevice.h> 26#include <linux/module.h> 27#include <linux/mutex.h> 28#include <linux/pci.h> 29#include <linux/rwsem.h> 30#include <linux/sched.h> 31#include <linux/slab.h> 32#include <linux/stat.h> 33#include <linux/string.h> 34#include <linux/uaccess.h> 35#include <linux/vfio.h> 36#include <linux/wait.h> 37 38#define DRIVER_VERSION "0.3" 39#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" 40#define DRIVER_DESC "VFIO - User Level meta-driver" 41 42static struct vfio { 43 struct class *class; 44 struct list_head iommu_drivers_list; 45 struct mutex iommu_drivers_lock; 46 struct list_head group_list; 47 struct idr group_idr; 48 struct mutex group_lock; 49 struct cdev group_cdev; 50 dev_t group_devt; 51 wait_queue_head_t release_q; 52} vfio; 53 54struct vfio_iommu_driver { 55 const struct vfio_iommu_driver_ops *ops; 56 struct list_head vfio_next; 57}; 58 59struct vfio_container { 60 struct kref kref; 61 struct list_head group_list; 62 struct rw_semaphore group_lock; 63 struct vfio_iommu_driver *iommu_driver; 64 void *iommu_data; 65}; 66 67struct vfio_unbound_dev { 68 struct device *dev; 69 struct list_head unbound_next; 70}; 71 72struct vfio_group { 73 struct kref kref; 74 int minor; 75 atomic_t container_users; 76 struct iommu_group *iommu_group; 77 struct vfio_container *container; 78 struct list_head device_list; 79 struct mutex device_lock; 80 struct device *dev; 81 struct notifier_block nb; 82 struct list_head vfio_next; 83 struct list_head container_next; 84 struct list_head unbound_list; 85 struct mutex unbound_lock; 86 atomic_t opened; 87}; 88 89struct vfio_device { 90 struct kref kref; 91 struct device *dev; 92 const struct vfio_device_ops *ops; 93 struct vfio_group *group; 94 struct list_head group_next; 95 void *device_data; 96}; 97 98/** 99 * IOMMU driver registration 100 */ 101int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops) 102{ 103 struct vfio_iommu_driver *driver, *tmp; 104 105 driver = kzalloc(sizeof(*driver), GFP_KERNEL); 106 if (!driver) 107 return -ENOMEM; 108 109 driver->ops = ops; 110 111 mutex_lock(&vfio.iommu_drivers_lock); 112 113 /* Check for duplicates */ 114 list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) { 115 if (tmp->ops == ops) { 116 mutex_unlock(&vfio.iommu_drivers_lock); 117 kfree(driver); 118 return -EINVAL; 119 } 120 } 121 122 list_add(&driver->vfio_next, &vfio.iommu_drivers_list); 123 124 mutex_unlock(&vfio.iommu_drivers_lock); 125 126 return 0; 127} 128EXPORT_SYMBOL_GPL(vfio_register_iommu_driver); 129 130void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops) 131{ 132 struct vfio_iommu_driver *driver; 133 134 mutex_lock(&vfio.iommu_drivers_lock); 135 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { 136 if (driver->ops == ops) { 137 list_del(&driver->vfio_next); 138 mutex_unlock(&vfio.iommu_drivers_lock); 139 kfree(driver); 140 return; 141 } 142 } 143 mutex_unlock(&vfio.iommu_drivers_lock); 144} 145EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver); 146 147/** 148 * Group minor allocation/free - both called with vfio.group_lock held 149 */ 150static int vfio_alloc_group_minor(struct vfio_group *group) 151{ 152 return idr_alloc(&vfio.group_idr, group, 0, MINORMASK + 1, GFP_KERNEL); 153} 154 155static void vfio_free_group_minor(int minor) 156{ 157 idr_remove(&vfio.group_idr, minor); 158} 159 160static int vfio_iommu_group_notifier(struct notifier_block *nb, 161 unsigned long action, void *data); 162static void vfio_group_get(struct vfio_group *group); 163 164/** 165 * Container objects - containers are created when /dev/vfio/vfio is 166 * opened, but their lifecycle extends until the last user is done, so 167 * it's freed via kref. Must support container/group/device being 168 * closed in any order. 169 */ 170static void vfio_container_get(struct vfio_container *container) 171{ 172 kref_get(&container->kref); 173} 174 175static void vfio_container_release(struct kref *kref) 176{ 177 struct vfio_container *container; 178 container = container_of(kref, struct vfio_container, kref); 179 180 kfree(container); 181} 182 183static void vfio_container_put(struct vfio_container *container) 184{ 185 kref_put(&container->kref, vfio_container_release); 186} 187 188static void vfio_group_unlock_and_free(struct vfio_group *group) 189{ 190 mutex_unlock(&vfio.group_lock); 191 /* 192 * Unregister outside of lock. A spurious callback is harmless now 193 * that the group is no longer in vfio.group_list. 194 */ 195 iommu_group_unregister_notifier(group->iommu_group, &group->nb); 196 kfree(group); 197} 198 199/** 200 * Group objects - create, release, get, put, search 201 */ 202static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) 203{ 204 struct vfio_group *group, *tmp; 205 struct device *dev; 206 int ret, minor; 207 208 group = kzalloc(sizeof(*group), GFP_KERNEL); 209 if (!group) 210 return ERR_PTR(-ENOMEM); 211 212 kref_init(&group->kref); 213 INIT_LIST_HEAD(&group->device_list); 214 mutex_init(&group->device_lock); 215 INIT_LIST_HEAD(&group->unbound_list); 216 mutex_init(&group->unbound_lock); 217 atomic_set(&group->container_users, 0); 218 atomic_set(&group->opened, 0); 219 group->iommu_group = iommu_group; 220 221 group->nb.notifier_call = vfio_iommu_group_notifier; 222 223 /* 224 * blocking notifiers acquire a rwsem around registering and hold 225 * it around callback. Therefore, need to register outside of 226 * vfio.group_lock to avoid A-B/B-A contention. Our callback won't 227 * do anything unless it can find the group in vfio.group_list, so 228 * no harm in registering early. 229 */ 230 ret = iommu_group_register_notifier(iommu_group, &group->nb); 231 if (ret) { 232 kfree(group); 233 return ERR_PTR(ret); 234 } 235 236 mutex_lock(&vfio.group_lock); 237 238 /* Did we race creating this group? */ 239 list_for_each_entry(tmp, &vfio.group_list, vfio_next) { 240 if (tmp->iommu_group == iommu_group) { 241 vfio_group_get(tmp); 242 vfio_group_unlock_and_free(group); 243 return tmp; 244 } 245 } 246 247 minor = vfio_alloc_group_minor(group); 248 if (minor < 0) { 249 vfio_group_unlock_and_free(group); 250 return ERR_PTR(minor); 251 } 252 253 dev = device_create(vfio.class, NULL, 254 MKDEV(MAJOR(vfio.group_devt), minor), 255 group, "%d", iommu_group_id(iommu_group)); 256 if (IS_ERR(dev)) { 257 vfio_free_group_minor(minor); 258 vfio_group_unlock_and_free(group); 259 return (struct vfio_group *)dev; /* ERR_PTR */ 260 } 261 262 group->minor = minor; 263 group->dev = dev; 264 265 list_add(&group->vfio_next, &vfio.group_list); 266 267 mutex_unlock(&vfio.group_lock); 268 269 return group; 270} 271 272/* called with vfio.group_lock held */ 273static void vfio_group_release(struct kref *kref) 274{ 275 struct vfio_group *group = container_of(kref, struct vfio_group, kref); 276 struct vfio_unbound_dev *unbound, *tmp; 277 struct iommu_group *iommu_group = group->iommu_group; 278 279 WARN_ON(!list_empty(&group->device_list)); 280 281 list_for_each_entry_safe(unbound, tmp, 282 &group->unbound_list, unbound_next) { 283 list_del(&unbound->unbound_next); 284 kfree(unbound); 285 } 286 287 device_destroy(vfio.class, MKDEV(MAJOR(vfio.group_devt), group->minor)); 288 list_del(&group->vfio_next); 289 vfio_free_group_minor(group->minor); 290 vfio_group_unlock_and_free(group); 291 iommu_group_put(iommu_group); 292} 293 294static void vfio_group_put(struct vfio_group *group) 295{ 296 kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock); 297} 298 299/* Assume group_lock or group reference is held */ 300static void vfio_group_get(struct vfio_group *group) 301{ 302 kref_get(&group->kref); 303} 304 305/* 306 * Not really a try as we will sleep for mutex, but we need to make 307 * sure the group pointer is valid under lock and get a reference. 308 */ 309static struct vfio_group *vfio_group_try_get(struct vfio_group *group) 310{ 311 struct vfio_group *target = group; 312 313 mutex_lock(&vfio.group_lock); 314 list_for_each_entry(group, &vfio.group_list, vfio_next) { 315 if (group == target) { 316 vfio_group_get(group); 317 mutex_unlock(&vfio.group_lock); 318 return group; 319 } 320 } 321 mutex_unlock(&vfio.group_lock); 322 323 return NULL; 324} 325 326static 327struct vfio_group *vfio_group_get_from_iommu(struct iommu_group *iommu_group) 328{ 329 struct vfio_group *group; 330 331 mutex_lock(&vfio.group_lock); 332 list_for_each_entry(group, &vfio.group_list, vfio_next) { 333 if (group->iommu_group == iommu_group) { 334 vfio_group_get(group); 335 mutex_unlock(&vfio.group_lock); 336 return group; 337 } 338 } 339 mutex_unlock(&vfio.group_lock); 340 341 return NULL; 342} 343 344static struct vfio_group *vfio_group_get_from_minor(int minor) 345{ 346 struct vfio_group *group; 347 348 mutex_lock(&vfio.group_lock); 349 group = idr_find(&vfio.group_idr, minor); 350 if (!group) { 351 mutex_unlock(&vfio.group_lock); 352 return NULL; 353 } 354 vfio_group_get(group); 355 mutex_unlock(&vfio.group_lock); 356 357 return group; 358} 359 360/** 361 * Device objects - create, release, get, put, search 362 */ 363static 364struct vfio_device *vfio_group_create_device(struct vfio_group *group, 365 struct device *dev, 366 const struct vfio_device_ops *ops, 367 void *device_data) 368{ 369 struct vfio_device *device; 370 371 device = kzalloc(sizeof(*device), GFP_KERNEL); 372 if (!device) 373 return ERR_PTR(-ENOMEM); 374 375 kref_init(&device->kref); 376 device->dev = dev; 377 device->group = group; 378 device->ops = ops; 379 device->device_data = device_data; 380 dev_set_drvdata(dev, device); 381 382 /* No need to get group_lock, caller has group reference */ 383 vfio_group_get(group); 384 385 mutex_lock(&group->device_lock); 386 list_add(&device->group_next, &group->device_list); 387 mutex_unlock(&group->device_lock); 388 389 return device; 390} 391 392static void vfio_device_release(struct kref *kref) 393{ 394 struct vfio_device *device = container_of(kref, 395 struct vfio_device, kref); 396 struct vfio_group *group = device->group; 397 398 list_del(&device->group_next); 399 mutex_unlock(&group->device_lock); 400 401 dev_set_drvdata(device->dev, NULL); 402 403 kfree(device); 404 405 /* vfio_del_group_dev may be waiting for this device */ 406 wake_up(&vfio.release_q); 407} 408 409/* Device reference always implies a group reference */ 410void vfio_device_put(struct vfio_device *device) 411{ 412 struct vfio_group *group = device->group; 413 kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock); 414 vfio_group_put(group); 415} 416EXPORT_SYMBOL_GPL(vfio_device_put); 417 418static void vfio_device_get(struct vfio_device *device) 419{ 420 vfio_group_get(device->group); 421 kref_get(&device->kref); 422} 423 424static struct vfio_device *vfio_group_get_device(struct vfio_group *group, 425 struct device *dev) 426{ 427 struct vfio_device *device; 428 429 mutex_lock(&group->device_lock); 430 list_for_each_entry(device, &group->device_list, group_next) { 431 if (device->dev == dev) { 432 vfio_device_get(device); 433 mutex_unlock(&group->device_lock); 434 return device; 435 } 436 } 437 mutex_unlock(&group->device_lock); 438 return NULL; 439} 440 441/* 442 * Some drivers, like pci-stub, are only used to prevent other drivers from 443 * claiming a device and are therefore perfectly legitimate for a user owned 444 * group. The pci-stub driver has no dependencies on DMA or the IOVA mapping 445 * of the device, but it does prevent the user from having direct access to 446 * the device, which is useful in some circumstances. 447 * 448 * We also assume that we can include PCI interconnect devices, ie. bridges. 449 * IOMMU grouping on PCI necessitates that if we lack isolation on a bridge 450 * then all of the downstream devices will be part of the same IOMMU group as 451 * the bridge. Thus, if placing the bridge into the user owned IOVA space 452 * breaks anything, it only does so for user owned devices downstream. Note 453 * that error notification via MSI can be affected for platforms that handle 454 * MSI within the same IOVA space as DMA. 455 */ 456static const char * const vfio_driver_whitelist[] = { "pci-stub" }; 457 458static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv) 459{ 460 int i; 461 462 if (dev_is_pci(dev)) { 463 struct pci_dev *pdev = to_pci_dev(dev); 464 465 if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL) 466 return true; 467 } 468 469 for (i = 0; i < ARRAY_SIZE(vfio_driver_whitelist); i++) { 470 if (!strcmp(drv->name, vfio_driver_whitelist[i])) 471 return true; 472 } 473 474 return false; 475} 476 477/* 478 * A vfio group is viable for use by userspace if all devices are in 479 * one of the following states: 480 * - driver-less 481 * - bound to a vfio driver 482 * - bound to a whitelisted driver 483 * - a PCI interconnect device 484 * 485 * We use two methods to determine whether a device is bound to a vfio 486 * driver. The first is to test whether the device exists in the vfio 487 * group. The second is to test if the device exists on the group 488 * unbound_list, indicating it's in the middle of transitioning from 489 * a vfio driver to driver-less. 490 */ 491static int vfio_dev_viable(struct device *dev, void *data) 492{ 493 struct vfio_group *group = data; 494 struct vfio_device *device; 495 struct device_driver *drv = ACCESS_ONCE(dev->driver); 496 struct vfio_unbound_dev *unbound; 497 int ret = -EINVAL; 498 499 mutex_lock(&group->unbound_lock); 500 list_for_each_entry(unbound, &group->unbound_list, unbound_next) { 501 if (dev == unbound->dev) { 502 ret = 0; 503 break; 504 } 505 } 506 mutex_unlock(&group->unbound_lock); 507 508 if (!ret || !drv || vfio_dev_whitelisted(dev, drv)) 509 return 0; 510 511 device = vfio_group_get_device(group, dev); 512 if (device) { 513 vfio_device_put(device); 514 return 0; 515 } 516 517 return ret; 518} 519 520/** 521 * Async device support 522 */ 523static int vfio_group_nb_add_dev(struct vfio_group *group, struct device *dev) 524{ 525 struct vfio_device *device; 526 527 /* Do we already know about it? We shouldn't */ 528 device = vfio_group_get_device(group, dev); 529 if (WARN_ON_ONCE(device)) { 530 vfio_device_put(device); 531 return 0; 532 } 533 534 /* Nothing to do for idle groups */ 535 if (!atomic_read(&group->container_users)) 536 return 0; 537 538 /* TODO Prevent device auto probing */ 539 WARN(1, "Device %s added to live group %d!\n", dev_name(dev), 540 iommu_group_id(group->iommu_group)); 541 542 return 0; 543} 544 545static int vfio_group_nb_verify(struct vfio_group *group, struct device *dev) 546{ 547 /* We don't care what happens when the group isn't in use */ 548 if (!atomic_read(&group->container_users)) 549 return 0; 550 551 return vfio_dev_viable(dev, group); 552} 553 554static int vfio_iommu_group_notifier(struct notifier_block *nb, 555 unsigned long action, void *data) 556{ 557 struct vfio_group *group = container_of(nb, struct vfio_group, nb); 558 struct device *dev = data; 559 struct vfio_unbound_dev *unbound; 560 561 /* 562 * Need to go through a group_lock lookup to get a reference or we 563 * risk racing a group being removed. Ignore spurious notifies. 564 */ 565 group = vfio_group_try_get(group); 566 if (!group) 567 return NOTIFY_OK; 568 569 switch (action) { 570 case IOMMU_GROUP_NOTIFY_ADD_DEVICE: 571 vfio_group_nb_add_dev(group, dev); 572 break; 573 case IOMMU_GROUP_NOTIFY_DEL_DEVICE: 574 /* 575 * Nothing to do here. If the device is in use, then the 576 * vfio sub-driver should block the remove callback until 577 * it is unused. If the device is unused or attached to a 578 * stub driver, then it should be released and we don't 579 * care that it will be going away. 580 */ 581 break; 582 case IOMMU_GROUP_NOTIFY_BIND_DRIVER: 583 pr_debug("%s: Device %s, group %d binding to driver\n", 584 __func__, dev_name(dev), 585 iommu_group_id(group->iommu_group)); 586 break; 587 case IOMMU_GROUP_NOTIFY_BOUND_DRIVER: 588 pr_debug("%s: Device %s, group %d bound to driver %s\n", 589 __func__, dev_name(dev), 590 iommu_group_id(group->iommu_group), dev->driver->name); 591 BUG_ON(vfio_group_nb_verify(group, dev)); 592 break; 593 case IOMMU_GROUP_NOTIFY_UNBIND_DRIVER: 594 pr_debug("%s: Device %s, group %d unbinding from driver %s\n", 595 __func__, dev_name(dev), 596 iommu_group_id(group->iommu_group), dev->driver->name); 597 break; 598 case IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER: 599 pr_debug("%s: Device %s, group %d unbound from driver\n", 600 __func__, dev_name(dev), 601 iommu_group_id(group->iommu_group)); 602 /* 603 * XXX An unbound device in a live group is ok, but we'd 604 * really like to avoid the above BUG_ON by preventing other 605 * drivers from binding to it. Once that occurs, we have to 606 * stop the system to maintain isolation. At a minimum, we'd 607 * want a toggle to disable driver auto probe for this device. 608 */ 609 610 mutex_lock(&group->unbound_lock); 611 list_for_each_entry(unbound, 612 &group->unbound_list, unbound_next) { 613 if (dev == unbound->dev) { 614 list_del(&unbound->unbound_next); 615 kfree(unbound); 616 break; 617 } 618 } 619 mutex_unlock(&group->unbound_lock); 620 break; 621 } 622 623 vfio_group_put(group); 624 return NOTIFY_OK; 625} 626 627/** 628 * VFIO driver API 629 */ 630int vfio_add_group_dev(struct device *dev, 631 const struct vfio_device_ops *ops, void *device_data) 632{ 633 struct iommu_group *iommu_group; 634 struct vfio_group *group; 635 struct vfio_device *device; 636 637 iommu_group = iommu_group_get(dev); 638 if (!iommu_group) 639 return -EINVAL; 640 641 group = vfio_group_get_from_iommu(iommu_group); 642 if (!group) { 643 group = vfio_create_group(iommu_group); 644 if (IS_ERR(group)) { 645 iommu_group_put(iommu_group); 646 return PTR_ERR(group); 647 } 648 } else { 649 /* 650 * A found vfio_group already holds a reference to the 651 * iommu_group. A created vfio_group keeps the reference. 652 */ 653 iommu_group_put(iommu_group); 654 } 655 656 device = vfio_group_get_device(group, dev); 657 if (device) { 658 WARN(1, "Device %s already exists on group %d\n", 659 dev_name(dev), iommu_group_id(iommu_group)); 660 vfio_device_put(device); 661 vfio_group_put(group); 662 return -EBUSY; 663 } 664 665 device = vfio_group_create_device(group, dev, ops, device_data); 666 if (IS_ERR(device)) { 667 vfio_group_put(group); 668 return PTR_ERR(device); 669 } 670 671 /* 672 * Drop all but the vfio_device reference. The vfio_device holds 673 * a reference to the vfio_group, which holds a reference to the 674 * iommu_group. 675 */ 676 vfio_group_put(group); 677 678 return 0; 679} 680EXPORT_SYMBOL_GPL(vfio_add_group_dev); 681 682/** 683 * Get a reference to the vfio_device for a device. Even if the 684 * caller thinks they own the device, they could be racing with a 685 * release call path, so we can't trust drvdata for the shortcut. 686 * Go the long way around, from the iommu_group to the vfio_group 687 * to the vfio_device. 688 */ 689struct vfio_device *vfio_device_get_from_dev(struct device *dev) 690{ 691 struct iommu_group *iommu_group; 692 struct vfio_group *group; 693 struct vfio_device *device; 694 695 iommu_group = iommu_group_get(dev); 696 if (!iommu_group) 697 return NULL; 698 699 group = vfio_group_get_from_iommu(iommu_group); 700 iommu_group_put(iommu_group); 701 if (!group) 702 return NULL; 703 704 device = vfio_group_get_device(group, dev); 705 vfio_group_put(group); 706 707 return device; 708} 709EXPORT_SYMBOL_GPL(vfio_device_get_from_dev); 710 711static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, 712 char *buf) 713{ 714 struct vfio_device *it, *device = NULL; 715 716 mutex_lock(&group->device_lock); 717 list_for_each_entry(it, &group->device_list, group_next) { 718 if (!strcmp(dev_name(it->dev), buf)) { 719 device = it; 720 vfio_device_get(device); 721 break; 722 } 723 } 724 mutex_unlock(&group->device_lock); 725 726 return device; 727} 728 729/* 730 * Caller must hold a reference to the vfio_device 731 */ 732void *vfio_device_data(struct vfio_device *device) 733{ 734 return device->device_data; 735} 736EXPORT_SYMBOL_GPL(vfio_device_data); 737 738/* Given a referenced group, check if it contains the device */ 739static bool vfio_dev_present(struct vfio_group *group, struct device *dev) 740{ 741 struct vfio_device *device; 742 743 device = vfio_group_get_device(group, dev); 744 if (!device) 745 return false; 746 747 vfio_device_put(device); 748 return true; 749} 750 751/* 752 * Decrement the device reference count and wait for the device to be 753 * removed. Open file descriptors for the device... */ 754void *vfio_del_group_dev(struct device *dev) 755{ 756 struct vfio_device *device = dev_get_drvdata(dev); 757 struct vfio_group *group = device->group; 758 void *device_data = device->device_data; 759 struct vfio_unbound_dev *unbound; 760 unsigned int i = 0; 761 long ret; 762 bool interrupted = false; 763 764 /* 765 * The group exists so long as we have a device reference. Get 766 * a group reference and use it to scan for the device going away. 767 */ 768 vfio_group_get(group); 769 770 /* 771 * When the device is removed from the group, the group suddenly 772 * becomes non-viable; the device has a driver (until the unbind 773 * completes), but it's not present in the group. This is bad news 774 * for any external users that need to re-acquire a group reference 775 * in order to match and release their existing reference. To 776 * solve this, we track such devices on the unbound_list to bridge 777 * the gap until they're fully unbound. 778 */ 779 unbound = kzalloc(sizeof(*unbound), GFP_KERNEL); 780 if (unbound) { 781 unbound->dev = dev; 782 mutex_lock(&group->unbound_lock); 783 list_add(&unbound->unbound_next, &group->unbound_list); 784 mutex_unlock(&group->unbound_lock); 785 } 786 WARN_ON(!unbound); 787 788 vfio_device_put(device); 789 790 /* 791 * If the device is still present in the group after the above 792 * 'put', then it is in use and we need to request it from the 793 * bus driver. The driver may in turn need to request the 794 * device from the user. We send the request on an arbitrary 795 * interval with counter to allow the driver to take escalating 796 * measures to release the device if it has the ability to do so. 797 */ 798 do { 799 device = vfio_group_get_device(group, dev); 800 if (!device) 801 break; 802 803 if (device->ops->request) 804 device->ops->request(device_data, i++); 805 806 vfio_device_put(device); 807 808 if (interrupted) { 809 ret = wait_event_timeout(vfio.release_q, 810 !vfio_dev_present(group, dev), HZ * 10); 811 } else { 812 ret = wait_event_interruptible_timeout(vfio.release_q, 813 !vfio_dev_present(group, dev), HZ * 10); 814 if (ret == -ERESTARTSYS) { 815 interrupted = true; 816 dev_warn(dev, 817 "Device is currently in use, task" 818 " \"%s\" (%d) " 819 "blocked until device is released", 820 current->comm, task_pid_nr(current)); 821 } 822 } 823 } while (ret <= 0); 824 825 vfio_group_put(group); 826 827 return device_data; 828} 829EXPORT_SYMBOL_GPL(vfio_del_group_dev); 830 831/** 832 * VFIO base fd, /dev/vfio/vfio 833 */ 834static long vfio_ioctl_check_extension(struct vfio_container *container, 835 unsigned long arg) 836{ 837 struct vfio_iommu_driver *driver; 838 long ret = 0; 839 840 down_read(&container->group_lock); 841 842 driver = container->iommu_driver; 843 844 switch (arg) { 845 /* No base extensions yet */ 846 default: 847 /* 848 * If no driver is set, poll all registered drivers for 849 * extensions and return the first positive result. If 850 * a driver is already set, further queries will be passed 851 * only to that driver. 852 */ 853 if (!driver) { 854 mutex_lock(&vfio.iommu_drivers_lock); 855 list_for_each_entry(driver, &vfio.iommu_drivers_list, 856 vfio_next) { 857 if (!try_module_get(driver->ops->owner)) 858 continue; 859 860 ret = driver->ops->ioctl(NULL, 861 VFIO_CHECK_EXTENSION, 862 arg); 863 module_put(driver->ops->owner); 864 if (ret > 0) 865 break; 866 } 867 mutex_unlock(&vfio.iommu_drivers_lock); 868 } else 869 ret = driver->ops->ioctl(container->iommu_data, 870 VFIO_CHECK_EXTENSION, arg); 871 } 872 873 up_read(&container->group_lock); 874 875 return ret; 876} 877 878/* hold write lock on container->group_lock */ 879static int __vfio_container_attach_groups(struct vfio_container *container, 880 struct vfio_iommu_driver *driver, 881 void *data) 882{ 883 struct vfio_group *group; 884 int ret = -ENODEV; 885 886 list_for_each_entry(group, &container->group_list, container_next) { 887 ret = driver->ops->attach_group(data, group->iommu_group); 888 if (ret) 889 goto unwind; 890 } 891 892 return ret; 893 894unwind: 895 list_for_each_entry_continue_reverse(group, &container->group_list, 896 container_next) { 897 driver->ops->detach_group(data, group->iommu_group); 898 } 899 900 return ret; 901} 902 903static long vfio_ioctl_set_iommu(struct vfio_container *container, 904 unsigned long arg) 905{ 906 struct vfio_iommu_driver *driver; 907 long ret = -ENODEV; 908 909 down_write(&container->group_lock); 910 911 /* 912 * The container is designed to be an unprivileged interface while 913 * the group can be assigned to specific users. Therefore, only by 914 * adding a group to a container does the user get the privilege of 915 * enabling the iommu, which may allocate finite resources. There 916 * is no unset_iommu, but by removing all the groups from a container, 917 * the container is deprivileged and returns to an unset state. 918 */ 919 if (list_empty(&container->group_list) || container->iommu_driver) { 920 up_write(&container->group_lock); 921 return -EINVAL; 922 } 923 924 mutex_lock(&vfio.iommu_drivers_lock); 925 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { 926 void *data; 927 928 if (!try_module_get(driver->ops->owner)) 929 continue; 930 931 /* 932 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION, 933 * so test which iommu driver reported support for this 934 * extension and call open on them. We also pass them the 935 * magic, allowing a single driver to support multiple 936 * interfaces if they'd like. 937 */ 938 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) { 939 module_put(driver->ops->owner); 940 continue; 941 } 942 943 /* module reference holds the driver we're working on */ 944 mutex_unlock(&vfio.iommu_drivers_lock); 945 946 data = driver->ops->open(arg); 947 if (IS_ERR(data)) { 948 ret = PTR_ERR(data); 949 module_put(driver->ops->owner); 950 goto skip_drivers_unlock; 951 } 952 953 ret = __vfio_container_attach_groups(container, driver, data); 954 if (!ret) { 955 container->iommu_driver = driver; 956 container->iommu_data = data; 957 } else { 958 driver->ops->release(data); 959 module_put(driver->ops->owner); 960 } 961 962 goto skip_drivers_unlock; 963 } 964 965 mutex_unlock(&vfio.iommu_drivers_lock); 966skip_drivers_unlock: 967 up_write(&container->group_lock); 968 969 return ret; 970} 971 972static long vfio_fops_unl_ioctl(struct file *filep, 973 unsigned int cmd, unsigned long arg) 974{ 975 struct vfio_container *container = filep->private_data; 976 struct vfio_iommu_driver *driver; 977 void *data; 978 long ret = -EINVAL; 979 980 if (!container) 981 return ret; 982 983 switch (cmd) { 984 case VFIO_GET_API_VERSION: 985 ret = VFIO_API_VERSION; 986 break; 987 case VFIO_CHECK_EXTENSION: 988 ret = vfio_ioctl_check_extension(container, arg); 989 break; 990 case VFIO_SET_IOMMU: 991 ret = vfio_ioctl_set_iommu(container, arg); 992 break; 993 default: 994 down_read(&container->group_lock); 995 996 driver = container->iommu_driver; 997 data = container->iommu_data; 998 999 if (driver) /* passthrough all unrecognized ioctls */ 1000 ret = driver->ops->ioctl(data, cmd, arg); 1001 1002 up_read(&container->group_lock); 1003 } 1004 1005 return ret; 1006} 1007 1008#ifdef CONFIG_COMPAT 1009static long vfio_fops_compat_ioctl(struct file *filep, 1010 unsigned int cmd, unsigned long arg) 1011{ 1012 arg = (unsigned long)compat_ptr(arg); 1013 return vfio_fops_unl_ioctl(filep, cmd, arg); 1014} 1015#endif /* CONFIG_COMPAT */ 1016 1017static int vfio_fops_open(struct inode *inode, struct file *filep) 1018{ 1019 struct vfio_container *container; 1020 1021 container = kzalloc(sizeof(*container), GFP_KERNEL); 1022 if (!container) 1023 return -ENOMEM; 1024 1025 INIT_LIST_HEAD(&container->group_list); 1026 init_rwsem(&container->group_lock); 1027 kref_init(&container->kref); 1028 1029 filep->private_data = container; 1030 1031 return 0; 1032} 1033 1034static int vfio_fops_release(struct inode *inode, struct file *filep) 1035{ 1036 struct vfio_container *container = filep->private_data; 1037 1038 filep->private_data = NULL; 1039 1040 vfio_container_put(container); 1041 1042 return 0; 1043} 1044 1045/* 1046 * Once an iommu driver is set, we optionally pass read/write/mmap 1047 * on to the driver, allowing management interfaces beyond ioctl. 1048 */ 1049static ssize_t vfio_fops_read(struct file *filep, char __user *buf, 1050 size_t count, loff_t *ppos) 1051{ 1052 struct vfio_container *container = filep->private_data; 1053 struct vfio_iommu_driver *driver; 1054 ssize_t ret = -EINVAL; 1055 1056 down_read(&container->group_lock); 1057 1058 driver = container->iommu_driver; 1059 if (likely(driver && driver->ops->read)) 1060 ret = driver->ops->read(container->iommu_data, 1061 buf, count, ppos); 1062 1063 up_read(&container->group_lock); 1064 1065 return ret; 1066} 1067 1068static ssize_t vfio_fops_write(struct file *filep, const char __user *buf, 1069 size_t count, loff_t *ppos) 1070{ 1071 struct vfio_container *container = filep->private_data; 1072 struct vfio_iommu_driver *driver; 1073 ssize_t ret = -EINVAL; 1074 1075 down_read(&container->group_lock); 1076 1077 driver = container->iommu_driver; 1078 if (likely(driver && driver->ops->write)) 1079 ret = driver->ops->write(container->iommu_data, 1080 buf, count, ppos); 1081 1082 up_read(&container->group_lock); 1083 1084 return ret; 1085} 1086 1087static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma) 1088{ 1089 struct vfio_container *container = filep->private_data; 1090 struct vfio_iommu_driver *driver; 1091 int ret = -EINVAL; 1092 1093 down_read(&container->group_lock); 1094 1095 driver = container->iommu_driver; 1096 if (likely(driver && driver->ops->mmap)) 1097 ret = driver->ops->mmap(container->iommu_data, vma); 1098 1099 up_read(&container->group_lock); 1100 1101 return ret; 1102} 1103 1104static const struct file_operations vfio_fops = { 1105 .owner = THIS_MODULE, 1106 .open = vfio_fops_open, 1107 .release = vfio_fops_release, 1108 .read = vfio_fops_read, 1109 .write = vfio_fops_write, 1110 .unlocked_ioctl = vfio_fops_unl_ioctl, 1111#ifdef CONFIG_COMPAT 1112 .compat_ioctl = vfio_fops_compat_ioctl, 1113#endif 1114 .mmap = vfio_fops_mmap, 1115}; 1116 1117/** 1118 * VFIO Group fd, /dev/vfio/$GROUP 1119 */ 1120static void __vfio_group_unset_container(struct vfio_group *group) 1121{ 1122 struct vfio_container *container = group->container; 1123 struct vfio_iommu_driver *driver; 1124 1125 down_write(&container->group_lock); 1126 1127 driver = container->iommu_driver; 1128 if (driver) 1129 driver->ops->detach_group(container->iommu_data, 1130 group->iommu_group); 1131 1132 group->container = NULL; 1133 list_del(&group->container_next); 1134 1135 /* Detaching the last group deprivileges a container, remove iommu */ 1136 if (driver && list_empty(&container->group_list)) { 1137 driver->ops->release(container->iommu_data); 1138 module_put(driver->ops->owner); 1139 container->iommu_driver = NULL; 1140 container->iommu_data = NULL; 1141 } 1142 1143 up_write(&container->group_lock); 1144 1145 vfio_container_put(container); 1146} 1147 1148/* 1149 * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or 1150 * if there was no container to unset. Since the ioctl is called on 1151 * the group, we know that still exists, therefore the only valid 1152 * transition here is 1->0. 1153 */ 1154static int vfio_group_unset_container(struct vfio_group *group) 1155{ 1156 int users = atomic_cmpxchg(&group->container_users, 1, 0); 1157 1158 if (!users) 1159 return -EINVAL; 1160 if (users != 1) 1161 return -EBUSY; 1162 1163 __vfio_group_unset_container(group); 1164 1165 return 0; 1166} 1167 1168/* 1169 * When removing container users, anything that removes the last user 1170 * implicitly removes the group from the container. That is, if the 1171 * group file descriptor is closed, as well as any device file descriptors, 1172 * the group is free. 1173 */ 1174static void vfio_group_try_dissolve_container(struct vfio_group *group) 1175{ 1176 if (0 == atomic_dec_if_positive(&group->container_users)) 1177 __vfio_group_unset_container(group); 1178} 1179 1180static int vfio_group_set_container(struct vfio_group *group, int container_fd) 1181{ 1182 struct fd f; 1183 struct vfio_container *container; 1184 struct vfio_iommu_driver *driver; 1185 int ret = 0; 1186 1187 if (atomic_read(&group->container_users)) 1188 return -EINVAL; 1189 1190 f = fdget(container_fd); 1191 if (!f.file) 1192 return -EBADF; 1193 1194 /* Sanity check, is this really our fd? */ 1195 if (f.file->f_op != &vfio_fops) { 1196 fdput(f); 1197 return -EINVAL; 1198 } 1199 1200 container = f.file->private_data; 1201 WARN_ON(!container); /* fget ensures we don't race vfio_release */ 1202 1203 down_write(&container->group_lock); 1204 1205 driver = container->iommu_driver; 1206 if (driver) { 1207 ret = driver->ops->attach_group(container->iommu_data, 1208 group->iommu_group); 1209 if (ret) 1210 goto unlock_out; 1211 } 1212 1213 group->container = container; 1214 list_add(&group->container_next, &container->group_list); 1215 1216 /* Get a reference on the container and mark a user within the group */ 1217 vfio_container_get(container); 1218 atomic_inc(&group->container_users); 1219 1220unlock_out: 1221 up_write(&container->group_lock); 1222 fdput(f); 1223 return ret; 1224} 1225 1226static bool vfio_group_viable(struct vfio_group *group) 1227{ 1228 return (iommu_group_for_each_dev(group->iommu_group, 1229 group, vfio_dev_viable) == 0); 1230} 1231 1232static const struct file_operations vfio_device_fops; 1233 1234static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) 1235{ 1236 struct vfio_device *device; 1237 struct file *filep; 1238 int ret; 1239 1240 if (0 == atomic_read(&group->container_users) || 1241 !group->container->iommu_driver || !vfio_group_viable(group)) 1242 return -EINVAL; 1243 1244 device = vfio_device_get_from_name(group, buf); 1245 if (!device) 1246 return -ENODEV; 1247 1248 ret = device->ops->open(device->device_data); 1249 if (ret) { 1250 vfio_device_put(device); 1251 return ret; 1252 } 1253 1254 /* 1255 * We can't use anon_inode_getfd() because we need to modify 1256 * the f_mode flags directly to allow more than just ioctls 1257 */ 1258 ret = get_unused_fd_flags(O_CLOEXEC); 1259 if (ret < 0) { 1260 device->ops->release(device->device_data); 1261 vfio_device_put(device); 1262 return ret; 1263 } 1264 1265 filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, 1266 device, O_RDWR); 1267 if (IS_ERR(filep)) { 1268 put_unused_fd(ret); 1269 ret = PTR_ERR(filep); 1270 device->ops->release(device->device_data); 1271 vfio_device_put(device); 1272 return ret; 1273 } 1274 1275 /* 1276 * TODO: add an anon_inode interface to do this. 1277 * Appears to be missing by lack of need rather than 1278 * explicitly prevented. Now there's need. 1279 */ 1280 filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); 1281 1282 atomic_inc(&group->container_users); 1283 1284 fd_install(ret, filep); 1285 1286 return ret; 1287} 1288 1289static long vfio_group_fops_unl_ioctl(struct file *filep, 1290 unsigned int cmd, unsigned long arg) 1291{ 1292 struct vfio_group *group = filep->private_data; 1293 long ret = -ENOTTY; 1294 1295 switch (cmd) { 1296 case VFIO_GROUP_GET_STATUS: 1297 { 1298 struct vfio_group_status status; 1299 unsigned long minsz; 1300 1301 minsz = offsetofend(struct vfio_group_status, flags); 1302 1303 if (copy_from_user(&status, (void __user *)arg, minsz)) 1304 return -EFAULT; 1305 1306 if (status.argsz < minsz) 1307 return -EINVAL; 1308 1309 status.flags = 0; 1310 1311 if (vfio_group_viable(group)) 1312 status.flags |= VFIO_GROUP_FLAGS_VIABLE; 1313 1314 if (group->container) 1315 status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET; 1316 1317 if (copy_to_user((void __user *)arg, &status, minsz)) 1318 return -EFAULT; 1319 1320 ret = 0; 1321 break; 1322 } 1323 case VFIO_GROUP_SET_CONTAINER: 1324 { 1325 int fd; 1326 1327 if (get_user(fd, (int __user *)arg)) 1328 return -EFAULT; 1329 1330 if (fd < 0) 1331 return -EINVAL; 1332 1333 ret = vfio_group_set_container(group, fd); 1334 break; 1335 } 1336 case VFIO_GROUP_UNSET_CONTAINER: 1337 ret = vfio_group_unset_container(group); 1338 break; 1339 case VFIO_GROUP_GET_DEVICE_FD: 1340 { 1341 char *buf; 1342 1343 buf = strndup_user((const char __user *)arg, PAGE_SIZE); 1344 if (IS_ERR(buf)) 1345 return PTR_ERR(buf); 1346 1347 ret = vfio_group_get_device_fd(group, buf); 1348 kfree(buf); 1349 break; 1350 } 1351 } 1352 1353 return ret; 1354} 1355 1356#ifdef CONFIG_COMPAT 1357static long vfio_group_fops_compat_ioctl(struct file *filep, 1358 unsigned int cmd, unsigned long arg) 1359{ 1360 arg = (unsigned long)compat_ptr(arg); 1361 return vfio_group_fops_unl_ioctl(filep, cmd, arg); 1362} 1363#endif /* CONFIG_COMPAT */ 1364 1365static int vfio_group_fops_open(struct inode *inode, struct file *filep) 1366{ 1367 struct vfio_group *group; 1368 int opened; 1369 1370 group = vfio_group_get_from_minor(iminor(inode)); 1371 if (!group) 1372 return -ENODEV; 1373 1374 /* Do we need multiple instances of the group open? Seems not. */ 1375 opened = atomic_cmpxchg(&group->opened, 0, 1); 1376 if (opened) { 1377 vfio_group_put(group); 1378 return -EBUSY; 1379 } 1380 1381 /* Is something still in use from a previous open? */ 1382 if (group->container) { 1383 atomic_dec(&group->opened); 1384 vfio_group_put(group); 1385 return -EBUSY; 1386 } 1387 1388 filep->private_data = group; 1389 1390 return 0; 1391} 1392 1393static int vfio_group_fops_release(struct inode *inode, struct file *filep) 1394{ 1395 struct vfio_group *group = filep->private_data; 1396 1397 filep->private_data = NULL; 1398 1399 vfio_group_try_dissolve_container(group); 1400 1401 atomic_dec(&group->opened); 1402 1403 vfio_group_put(group); 1404 1405 return 0; 1406} 1407 1408static const struct file_operations vfio_group_fops = { 1409 .owner = THIS_MODULE, 1410 .unlocked_ioctl = vfio_group_fops_unl_ioctl, 1411#ifdef CONFIG_COMPAT 1412 .compat_ioctl = vfio_group_fops_compat_ioctl, 1413#endif 1414 .open = vfio_group_fops_open, 1415 .release = vfio_group_fops_release, 1416}; 1417 1418/** 1419 * VFIO Device fd 1420 */ 1421static int vfio_device_fops_release(struct inode *inode, struct file *filep) 1422{ 1423 struct vfio_device *device = filep->private_data; 1424 1425 device->ops->release(device->device_data); 1426 1427 vfio_group_try_dissolve_container(device->group); 1428 1429 vfio_device_put(device); 1430 1431 return 0; 1432} 1433 1434static long vfio_device_fops_unl_ioctl(struct file *filep, 1435 unsigned int cmd, unsigned long arg) 1436{ 1437 struct vfio_device *device = filep->private_data; 1438 1439 if (unlikely(!device->ops->ioctl)) 1440 return -EINVAL; 1441 1442 return device->ops->ioctl(device->device_data, cmd, arg); 1443} 1444 1445static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf, 1446 size_t count, loff_t *ppos) 1447{ 1448 struct vfio_device *device = filep->private_data; 1449 1450 if (unlikely(!device->ops->read)) 1451 return -EINVAL; 1452 1453 return device->ops->read(device->device_data, buf, count, ppos); 1454} 1455 1456static ssize_t vfio_device_fops_write(struct file *filep, 1457 const char __user *buf, 1458 size_t count, loff_t *ppos) 1459{ 1460 struct vfio_device *device = filep->private_data; 1461 1462 if (unlikely(!device->ops->write)) 1463 return -EINVAL; 1464 1465 return device->ops->write(device->device_data, buf, count, ppos); 1466} 1467 1468static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) 1469{ 1470 struct vfio_device *device = filep->private_data; 1471 1472 if (unlikely(!device->ops->mmap)) 1473 return -EINVAL; 1474 1475 return device->ops->mmap(device->device_data, vma); 1476} 1477 1478#ifdef CONFIG_COMPAT 1479static long vfio_device_fops_compat_ioctl(struct file *filep, 1480 unsigned int cmd, unsigned long arg) 1481{ 1482 arg = (unsigned long)compat_ptr(arg); 1483 return vfio_device_fops_unl_ioctl(filep, cmd, arg); 1484} 1485#endif /* CONFIG_COMPAT */ 1486 1487static const struct file_operations vfio_device_fops = { 1488 .owner = THIS_MODULE, 1489 .release = vfio_device_fops_release, 1490 .read = vfio_device_fops_read, 1491 .write = vfio_device_fops_write, 1492 .unlocked_ioctl = vfio_device_fops_unl_ioctl, 1493#ifdef CONFIG_COMPAT 1494 .compat_ioctl = vfio_device_fops_compat_ioctl, 1495#endif 1496 .mmap = vfio_device_fops_mmap, 1497}; 1498 1499/** 1500 * External user API, exported by symbols to be linked dynamically. 1501 * 1502 * The protocol includes: 1503 * 1. do normal VFIO init operation: 1504 * - opening a new container; 1505 * - attaching group(s) to it; 1506 * - setting an IOMMU driver for a container. 1507 * When IOMMU is set for a container, all groups in it are 1508 * considered ready to use by an external user. 1509 * 1510 * 2. User space passes a group fd to an external user. 1511 * The external user calls vfio_group_get_external_user() 1512 * to verify that: 1513 * - the group is initialized; 1514 * - IOMMU is set for it. 1515 * If both checks passed, vfio_group_get_external_user() 1516 * increments the container user counter to prevent 1517 * the VFIO group from disposal before KVM exits. 1518 * 1519 * 3. The external user calls vfio_external_user_iommu_id() 1520 * to know an IOMMU ID. 1521 * 1522 * 4. When the external KVM finishes, it calls 1523 * vfio_group_put_external_user() to release the VFIO group. 1524 * This call decrements the container user counter. 1525 */ 1526struct vfio_group *vfio_group_get_external_user(struct file *filep) 1527{ 1528 struct vfio_group *group = filep->private_data; 1529 1530 if (filep->f_op != &vfio_group_fops) 1531 return ERR_PTR(-EINVAL); 1532 1533 if (!atomic_inc_not_zero(&group->container_users)) 1534 return ERR_PTR(-EINVAL); 1535 1536 if (!group->container->iommu_driver || 1537 !vfio_group_viable(group)) { 1538 atomic_dec(&group->container_users); 1539 return ERR_PTR(-EINVAL); 1540 } 1541 1542 vfio_group_get(group); 1543 1544 return group; 1545} 1546EXPORT_SYMBOL_GPL(vfio_group_get_external_user); 1547 1548void vfio_group_put_external_user(struct vfio_group *group) 1549{ 1550 vfio_group_put(group); 1551 vfio_group_try_dissolve_container(group); 1552} 1553EXPORT_SYMBOL_GPL(vfio_group_put_external_user); 1554 1555int vfio_external_user_iommu_id(struct vfio_group *group) 1556{ 1557 return iommu_group_id(group->iommu_group); 1558} 1559EXPORT_SYMBOL_GPL(vfio_external_user_iommu_id); 1560 1561long vfio_external_check_extension(struct vfio_group *group, unsigned long arg) 1562{ 1563 return vfio_ioctl_check_extension(group->container, arg); 1564} 1565EXPORT_SYMBOL_GPL(vfio_external_check_extension); 1566 1567/** 1568 * Module/class support 1569 */ 1570static char *vfio_devnode(struct device *dev, umode_t *mode) 1571{ 1572 return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev)); 1573} 1574 1575static struct miscdevice vfio_dev = { 1576 .minor = VFIO_MINOR, 1577 .name = "vfio", 1578 .fops = &vfio_fops, 1579 .nodename = "vfio/vfio", 1580 .mode = S_IRUGO | S_IWUGO, 1581}; 1582 1583static int __init vfio_init(void) 1584{ 1585 int ret; 1586 1587 idr_init(&vfio.group_idr); 1588 mutex_init(&vfio.group_lock); 1589 mutex_init(&vfio.iommu_drivers_lock); 1590 INIT_LIST_HEAD(&vfio.group_list); 1591 INIT_LIST_HEAD(&vfio.iommu_drivers_list); 1592 init_waitqueue_head(&vfio.release_q); 1593 1594 ret = misc_register(&vfio_dev); 1595 if (ret) { 1596 pr_err("vfio: misc device register failed\n"); 1597 return ret; 1598 } 1599 1600 /* /dev/vfio/$GROUP */ 1601 vfio.class = class_create(THIS_MODULE, "vfio"); 1602 if (IS_ERR(vfio.class)) { 1603 ret = PTR_ERR(vfio.class); 1604 goto err_class; 1605 } 1606 1607 vfio.class->devnode = vfio_devnode; 1608 1609 ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK, "vfio"); 1610 if (ret) 1611 goto err_alloc_chrdev; 1612 1613 cdev_init(&vfio.group_cdev, &vfio_group_fops); 1614 ret = cdev_add(&vfio.group_cdev, vfio.group_devt, MINORMASK); 1615 if (ret) 1616 goto err_cdev_add; 1617 1618 pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); 1619 1620 /* 1621 * Attempt to load known iommu-drivers. This gives us a working 1622 * environment without the user needing to explicitly load iommu 1623 * drivers. 1624 */ 1625 request_module_nowait("vfio_iommu_type1"); 1626 request_module_nowait("vfio_iommu_spapr_tce"); 1627 1628 return 0; 1629 1630err_cdev_add: 1631 unregister_chrdev_region(vfio.group_devt, MINORMASK); 1632err_alloc_chrdev: 1633 class_destroy(vfio.class); 1634 vfio.class = NULL; 1635err_class: 1636 misc_deregister(&vfio_dev); 1637 return ret; 1638} 1639 1640static void __exit vfio_cleanup(void) 1641{ 1642 WARN_ON(!list_empty(&vfio.group_list)); 1643 1644 idr_destroy(&vfio.group_idr); 1645 cdev_del(&vfio.group_cdev); 1646 unregister_chrdev_region(vfio.group_devt, MINORMASK); 1647 class_destroy(vfio.class); 1648 vfio.class = NULL; 1649 misc_deregister(&vfio_dev); 1650} 1651 1652module_init(vfio_init); 1653module_exit(vfio_cleanup); 1654 1655MODULE_VERSION(DRIVER_VERSION); 1656MODULE_LICENSE("GPL v2"); 1657MODULE_AUTHOR(DRIVER_AUTHOR); 1658MODULE_DESCRIPTION(DRIVER_DESC); 1659MODULE_ALIAS_MISCDEV(VFIO_MINOR); 1660MODULE_ALIAS("devname:vfio/vfio"); 1661