1/* 2 * Kernel-based Virtual Machine - device assignment support 3 * 4 * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2. See 7 * the COPYING file in the top-level directory. 8 * 9 */ 10 11#include <linux/kvm_host.h> 12#include <linux/kvm.h> 13#include <linux/uaccess.h> 14#include <linux/vmalloc.h> 15#include <linux/errno.h> 16#include <linux/spinlock.h> 17#include <linux/pci.h> 18#include <linux/interrupt.h> 19#include <linux/slab.h> 20#include <linux/namei.h> 21#include <linux/fs.h> 22#include "irq.h" 23#include "assigned-dev.h" 24 25struct kvm_assigned_dev_kernel { 26 struct kvm_irq_ack_notifier ack_notifier; 27 struct list_head list; 28 int assigned_dev_id; 29 int host_segnr; 30 int host_busnr; 31 int host_devfn; 32 unsigned int entries_nr; 33 int host_irq; 34 bool host_irq_disabled; 35 bool pci_2_3; 36 struct msix_entry *host_msix_entries; 37 int guest_irq; 38 struct msix_entry *guest_msix_entries; 39 unsigned long irq_requested_type; 40 int irq_source_id; 41 int flags; 42 struct pci_dev *dev; 43 struct kvm *kvm; 44 spinlock_t intx_lock; 45 spinlock_t intx_mask_lock; 46 char irq_name[32]; 47 struct pci_saved_state *pci_saved_state; 48}; 49 50static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, 51 int assigned_dev_id) 52{ 53 struct list_head *ptr; 54 struct kvm_assigned_dev_kernel *match; 55 56 list_for_each(ptr, head) { 57 match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); 58 if (match->assigned_dev_id == assigned_dev_id) 59 return match; 60 } 61 return NULL; 62} 63 64static int find_index_from_host_irq(struct kvm_assigned_dev_kernel 65 *assigned_dev, int irq) 66{ 67 int i, index; 68 struct msix_entry *host_msix_entries; 69 70 host_msix_entries = assigned_dev->host_msix_entries; 71 72 index = -1; 73 for (i = 0; i < assigned_dev->entries_nr; i++) 74 if (irq == host_msix_entries[i].vector) { 75 index = i; 76 break; 77 } 78 if (index < 0) 79 printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); 80 81 return index; 82} 83 84static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id) 85{ 86 struct kvm_assigned_dev_kernel *assigned_dev = dev_id; 87 int ret; 88 89 spin_lock(&assigned_dev->intx_lock); 90 if (pci_check_and_mask_intx(assigned_dev->dev)) { 91 assigned_dev->host_irq_disabled = true; 92 ret = IRQ_WAKE_THREAD; 93 } else 94 ret = IRQ_NONE; 95 spin_unlock(&assigned_dev->intx_lock); 96 97 return ret; 98} 99 100static void 101kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev, 102 int vector) 103{ 104 if (unlikely(assigned_dev->irq_requested_type & 105 KVM_DEV_IRQ_GUEST_INTX)) { 106 spin_lock(&assigned_dev->intx_mask_lock); 107 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) 108 kvm_set_irq(assigned_dev->kvm, 109 assigned_dev->irq_source_id, vector, 1, 110 false); 111 spin_unlock(&assigned_dev->intx_mask_lock); 112 } else 113 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, 114 vector, 1, false); 115} 116 117static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id) 118{ 119 struct kvm_assigned_dev_kernel *assigned_dev = dev_id; 120 121 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { 122 spin_lock_irq(&assigned_dev->intx_lock); 123 disable_irq_nosync(irq); 124 assigned_dev->host_irq_disabled = true; 125 spin_unlock_irq(&assigned_dev->intx_lock); 126 } 127 128 kvm_assigned_dev_raise_guest_irq(assigned_dev, 129 assigned_dev->guest_irq); 130 131 return IRQ_HANDLED; 132} 133 134#ifdef __KVM_HAVE_MSI 135static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id) 136{ 137 struct kvm_assigned_dev_kernel *assigned_dev = dev_id; 138 int ret = kvm_set_irq_inatomic(assigned_dev->kvm, 139 assigned_dev->irq_source_id, 140 assigned_dev->guest_irq, 1); 141 return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED; 142} 143 144static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id) 145{ 146 struct kvm_assigned_dev_kernel *assigned_dev = dev_id; 147 148 kvm_assigned_dev_raise_guest_irq(assigned_dev, 149 assigned_dev->guest_irq); 150 151 return IRQ_HANDLED; 152} 153#endif 154 155#ifdef __KVM_HAVE_MSIX 156static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id) 157{ 158 struct kvm_assigned_dev_kernel *assigned_dev = dev_id; 159 int index = find_index_from_host_irq(assigned_dev, irq); 160 u32 vector; 161 int ret = 0; 162 163 if (index >= 0) { 164 vector = assigned_dev->guest_msix_entries[index].vector; 165 ret = kvm_set_irq_inatomic(assigned_dev->kvm, 166 assigned_dev->irq_source_id, 167 vector, 1); 168 } 169 170 return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED; 171} 172 173static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) 174{ 175 struct kvm_assigned_dev_kernel *assigned_dev = dev_id; 176 int index = find_index_from_host_irq(assigned_dev, irq); 177 u32 vector; 178 179 if (index >= 0) { 180 vector = assigned_dev->guest_msix_entries[index].vector; 181 kvm_assigned_dev_raise_guest_irq(assigned_dev, vector); 182 } 183 184 return IRQ_HANDLED; 185} 186#endif 187 188/* Ack the irq line for an assigned device */ 189static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) 190{ 191 struct kvm_assigned_dev_kernel *dev = 192 container_of(kian, struct kvm_assigned_dev_kernel, 193 ack_notifier); 194 195 kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false); 196 197 spin_lock(&dev->intx_mask_lock); 198 199 if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) { 200 bool reassert = false; 201 202 spin_lock_irq(&dev->intx_lock); 203 /* 204 * The guest IRQ may be shared so this ack can come from an 205 * IRQ for another guest device. 206 */ 207 if (dev->host_irq_disabled) { 208 if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) 209 enable_irq(dev->host_irq); 210 else if (!pci_check_and_unmask_intx(dev->dev)) 211 reassert = true; 212 dev->host_irq_disabled = reassert; 213 } 214 spin_unlock_irq(&dev->intx_lock); 215 216 if (reassert) 217 kvm_set_irq(dev->kvm, dev->irq_source_id, 218 dev->guest_irq, 1, false); 219 } 220 221 spin_unlock(&dev->intx_mask_lock); 222} 223 224static void deassign_guest_irq(struct kvm *kvm, 225 struct kvm_assigned_dev_kernel *assigned_dev) 226{ 227 if (assigned_dev->ack_notifier.gsi != -1) 228 kvm_unregister_irq_ack_notifier(kvm, 229 &assigned_dev->ack_notifier); 230 231 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, 232 assigned_dev->guest_irq, 0, false); 233 234 if (assigned_dev->irq_source_id != -1) 235 kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); 236 assigned_dev->irq_source_id = -1; 237 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); 238} 239 240/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ 241static void deassign_host_irq(struct kvm *kvm, 242 struct kvm_assigned_dev_kernel *assigned_dev) 243{ 244 /* 245 * We disable irq here to prevent further events. 246 * 247 * Notice this maybe result in nested disable if the interrupt type is 248 * INTx, but it's OK for we are going to free it. 249 * 250 * If this function is a part of VM destroy, please ensure that till 251 * now, the kvm state is still legal for probably we also have to wait 252 * on a currently running IRQ handler. 253 */ 254 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { 255 int i; 256 for (i = 0; i < assigned_dev->entries_nr; i++) 257 disable_irq(assigned_dev->host_msix_entries[i].vector); 258 259 for (i = 0; i < assigned_dev->entries_nr; i++) 260 free_irq(assigned_dev->host_msix_entries[i].vector, 261 assigned_dev); 262 263 assigned_dev->entries_nr = 0; 264 kfree(assigned_dev->host_msix_entries); 265 kfree(assigned_dev->guest_msix_entries); 266 pci_disable_msix(assigned_dev->dev); 267 } else { 268 /* Deal with MSI and INTx */ 269 if ((assigned_dev->irq_requested_type & 270 KVM_DEV_IRQ_HOST_INTX) && 271 (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { 272 spin_lock_irq(&assigned_dev->intx_lock); 273 pci_intx(assigned_dev->dev, false); 274 spin_unlock_irq(&assigned_dev->intx_lock); 275 synchronize_irq(assigned_dev->host_irq); 276 } else 277 disable_irq(assigned_dev->host_irq); 278 279 free_irq(assigned_dev->host_irq, assigned_dev); 280 281 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) 282 pci_disable_msi(assigned_dev->dev); 283 } 284 285 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); 286} 287 288static int kvm_deassign_irq(struct kvm *kvm, 289 struct kvm_assigned_dev_kernel *assigned_dev, 290 unsigned long irq_requested_type) 291{ 292 unsigned long guest_irq_type, host_irq_type; 293 294 if (!irqchip_in_kernel(kvm)) 295 return -EINVAL; 296 /* no irq assignment to deassign */ 297 if (!assigned_dev->irq_requested_type) 298 return -ENXIO; 299 300 host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; 301 guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; 302 303 if (host_irq_type) 304 deassign_host_irq(kvm, assigned_dev); 305 if (guest_irq_type) 306 deassign_guest_irq(kvm, assigned_dev); 307 308 return 0; 309} 310 311static void kvm_free_assigned_irq(struct kvm *kvm, 312 struct kvm_assigned_dev_kernel *assigned_dev) 313{ 314 kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); 315} 316 317static void kvm_free_assigned_device(struct kvm *kvm, 318 struct kvm_assigned_dev_kernel 319 *assigned_dev) 320{ 321 kvm_free_assigned_irq(kvm, assigned_dev); 322 323 pci_reset_function(assigned_dev->dev); 324 if (pci_load_and_free_saved_state(assigned_dev->dev, 325 &assigned_dev->pci_saved_state)) 326 printk(KERN_INFO "%s: Couldn't reload %s saved state\n", 327 __func__, dev_name(&assigned_dev->dev->dev)); 328 else 329 pci_restore_state(assigned_dev->dev); 330 331 pci_clear_dev_assigned(assigned_dev->dev); 332 333 pci_release_regions(assigned_dev->dev); 334 pci_disable_device(assigned_dev->dev); 335 pci_dev_put(assigned_dev->dev); 336 337 list_del(&assigned_dev->list); 338 kfree(assigned_dev); 339} 340 341void kvm_free_all_assigned_devices(struct kvm *kvm) 342{ 343 struct list_head *ptr, *ptr2; 344 struct kvm_assigned_dev_kernel *assigned_dev; 345 346 list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { 347 assigned_dev = list_entry(ptr, 348 struct kvm_assigned_dev_kernel, 349 list); 350 351 kvm_free_assigned_device(kvm, assigned_dev); 352 } 353} 354 355static int assigned_device_enable_host_intx(struct kvm *kvm, 356 struct kvm_assigned_dev_kernel *dev) 357{ 358 irq_handler_t irq_handler; 359 unsigned long flags; 360 361 dev->host_irq = dev->dev->irq; 362 363 /* 364 * We can only share the IRQ line with other host devices if we are 365 * able to disable the IRQ source at device-level - independently of 366 * the guest driver. Otherwise host devices may suffer from unbounded 367 * IRQ latencies when the guest keeps the line asserted. 368 */ 369 if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { 370 irq_handler = kvm_assigned_dev_intx; 371 flags = IRQF_SHARED; 372 } else { 373 irq_handler = NULL; 374 flags = IRQF_ONESHOT; 375 } 376 if (request_threaded_irq(dev->host_irq, irq_handler, 377 kvm_assigned_dev_thread_intx, flags, 378 dev->irq_name, dev)) 379 return -EIO; 380 381 if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { 382 spin_lock_irq(&dev->intx_lock); 383 pci_intx(dev->dev, true); 384 spin_unlock_irq(&dev->intx_lock); 385 } 386 return 0; 387} 388 389#ifdef __KVM_HAVE_MSI 390static int assigned_device_enable_host_msi(struct kvm *kvm, 391 struct kvm_assigned_dev_kernel *dev) 392{ 393 int r; 394 395 if (!dev->dev->msi_enabled) { 396 r = pci_enable_msi(dev->dev); 397 if (r) 398 return r; 399 } 400 401 dev->host_irq = dev->dev->irq; 402 if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi, 403 kvm_assigned_dev_thread_msi, 0, 404 dev->irq_name, dev)) { 405 pci_disable_msi(dev->dev); 406 return -EIO; 407 } 408 409 return 0; 410} 411#endif 412 413#ifdef __KVM_HAVE_MSIX 414static int assigned_device_enable_host_msix(struct kvm *kvm, 415 struct kvm_assigned_dev_kernel *dev) 416{ 417 int i, r = -EINVAL; 418 419 /* host_msix_entries and guest_msix_entries should have been 420 * initialized */ 421 if (dev->entries_nr == 0) 422 return r; 423 424 r = pci_enable_msix_exact(dev->dev, 425 dev->host_msix_entries, dev->entries_nr); 426 if (r) 427 return r; 428 429 for (i = 0; i < dev->entries_nr; i++) { 430 r = request_threaded_irq(dev->host_msix_entries[i].vector, 431 kvm_assigned_dev_msix, 432 kvm_assigned_dev_thread_msix, 433 0, dev->irq_name, dev); 434 if (r) 435 goto err; 436 } 437 438 return 0; 439err: 440 for (i -= 1; i >= 0; i--) 441 free_irq(dev->host_msix_entries[i].vector, dev); 442 pci_disable_msix(dev->dev); 443 return r; 444} 445 446#endif 447 448static int assigned_device_enable_guest_intx(struct kvm *kvm, 449 struct kvm_assigned_dev_kernel *dev, 450 struct kvm_assigned_irq *irq) 451{ 452 dev->guest_irq = irq->guest_irq; 453 dev->ack_notifier.gsi = irq->guest_irq; 454 return 0; 455} 456 457#ifdef __KVM_HAVE_MSI 458static int assigned_device_enable_guest_msi(struct kvm *kvm, 459 struct kvm_assigned_dev_kernel *dev, 460 struct kvm_assigned_irq *irq) 461{ 462 dev->guest_irq = irq->guest_irq; 463 dev->ack_notifier.gsi = -1; 464 return 0; 465} 466#endif 467 468#ifdef __KVM_HAVE_MSIX 469static int assigned_device_enable_guest_msix(struct kvm *kvm, 470 struct kvm_assigned_dev_kernel *dev, 471 struct kvm_assigned_irq *irq) 472{ 473 dev->guest_irq = irq->guest_irq; 474 dev->ack_notifier.gsi = -1; 475 return 0; 476} 477#endif 478 479static int assign_host_irq(struct kvm *kvm, 480 struct kvm_assigned_dev_kernel *dev, 481 __u32 host_irq_type) 482{ 483 int r = -EEXIST; 484 485 if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) 486 return r; 487 488 snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s", 489 pci_name(dev->dev)); 490 491 switch (host_irq_type) { 492 case KVM_DEV_IRQ_HOST_INTX: 493 r = assigned_device_enable_host_intx(kvm, dev); 494 break; 495#ifdef __KVM_HAVE_MSI 496 case KVM_DEV_IRQ_HOST_MSI: 497 r = assigned_device_enable_host_msi(kvm, dev); 498 break; 499#endif 500#ifdef __KVM_HAVE_MSIX 501 case KVM_DEV_IRQ_HOST_MSIX: 502 r = assigned_device_enable_host_msix(kvm, dev); 503 break; 504#endif 505 default: 506 r = -EINVAL; 507 } 508 dev->host_irq_disabled = false; 509 510 if (!r) 511 dev->irq_requested_type |= host_irq_type; 512 513 return r; 514} 515 516static int assign_guest_irq(struct kvm *kvm, 517 struct kvm_assigned_dev_kernel *dev, 518 struct kvm_assigned_irq *irq, 519 unsigned long guest_irq_type) 520{ 521 int id; 522 int r = -EEXIST; 523 524 if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) 525 return r; 526 527 id = kvm_request_irq_source_id(kvm); 528 if (id < 0) 529 return id; 530 531 dev->irq_source_id = id; 532 533 switch (guest_irq_type) { 534 case KVM_DEV_IRQ_GUEST_INTX: 535 r = assigned_device_enable_guest_intx(kvm, dev, irq); 536 break; 537#ifdef __KVM_HAVE_MSI 538 case KVM_DEV_IRQ_GUEST_MSI: 539 r = assigned_device_enable_guest_msi(kvm, dev, irq); 540 break; 541#endif 542#ifdef __KVM_HAVE_MSIX 543 case KVM_DEV_IRQ_GUEST_MSIX: 544 r = assigned_device_enable_guest_msix(kvm, dev, irq); 545 break; 546#endif 547 default: 548 r = -EINVAL; 549 } 550 551 if (!r) { 552 dev->irq_requested_type |= guest_irq_type; 553 if (dev->ack_notifier.gsi != -1) 554 kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); 555 } else { 556 kvm_free_irq_source_id(kvm, dev->irq_source_id); 557 dev->irq_source_id = -1; 558 } 559 560 return r; 561} 562 563/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ 564static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, 565 struct kvm_assigned_irq *assigned_irq) 566{ 567 int r = -EINVAL; 568 struct kvm_assigned_dev_kernel *match; 569 unsigned long host_irq_type, guest_irq_type; 570 571 if (!irqchip_in_kernel(kvm)) 572 return r; 573 574 mutex_lock(&kvm->lock); 575 r = -ENODEV; 576 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 577 assigned_irq->assigned_dev_id); 578 if (!match) 579 goto out; 580 581 host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); 582 guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); 583 584 r = -EINVAL; 585 /* can only assign one type at a time */ 586 if (hweight_long(host_irq_type) > 1) 587 goto out; 588 if (hweight_long(guest_irq_type) > 1) 589 goto out; 590 if (host_irq_type == 0 && guest_irq_type == 0) 591 goto out; 592 593 r = 0; 594 if (host_irq_type) 595 r = assign_host_irq(kvm, match, host_irq_type); 596 if (r) 597 goto out; 598 599 if (guest_irq_type) 600 r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); 601out: 602 mutex_unlock(&kvm->lock); 603 return r; 604} 605 606static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, 607 struct kvm_assigned_irq 608 *assigned_irq) 609{ 610 int r = -ENODEV; 611 struct kvm_assigned_dev_kernel *match; 612 unsigned long irq_type; 613 614 mutex_lock(&kvm->lock); 615 616 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 617 assigned_irq->assigned_dev_id); 618 if (!match) 619 goto out; 620 621 irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK | 622 KVM_DEV_IRQ_GUEST_MASK); 623 r = kvm_deassign_irq(kvm, match, irq_type); 624out: 625 mutex_unlock(&kvm->lock); 626 return r; 627} 628 629/* 630 * We want to test whether the caller has been granted permissions to 631 * use this device. To be able to configure and control the device, 632 * the user needs access to PCI configuration space and BAR resources. 633 * These are accessed through PCI sysfs. PCI config space is often 634 * passed to the process calling this ioctl via file descriptor, so we 635 * can't rely on access to that file. We can check for permissions 636 * on each of the BAR resource files, which is a pretty clear 637 * indicator that the user has been granted access to the device. 638 */ 639static int probe_sysfs_permissions(struct pci_dev *dev) 640{ 641#ifdef CONFIG_SYSFS 642 int i; 643 bool bar_found = false; 644 645 for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) { 646 char *kpath, *syspath; 647 struct path path; 648 struct inode *inode; 649 int r; 650 651 if (!pci_resource_len(dev, i)) 652 continue; 653 654 kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL); 655 if (!kpath) 656 return -ENOMEM; 657 658 /* Per sysfs-rules, sysfs is always at /sys */ 659 syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i); 660 kfree(kpath); 661 if (!syspath) 662 return -ENOMEM; 663 664 r = kern_path(syspath, LOOKUP_FOLLOW, &path); 665 kfree(syspath); 666 if (r) 667 return r; 668 669 inode = d_backing_inode(path.dentry); 670 671 r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS); 672 path_put(&path); 673 if (r) 674 return r; 675 676 bar_found = true; 677 } 678 679 /* If no resources, probably something special */ 680 if (!bar_found) 681 return -EPERM; 682 683 return 0; 684#else 685 return -EINVAL; /* No way to control the device without sysfs */ 686#endif 687} 688 689static int kvm_vm_ioctl_assign_device(struct kvm *kvm, 690 struct kvm_assigned_pci_dev *assigned_dev) 691{ 692 int r = 0, idx; 693 struct kvm_assigned_dev_kernel *match; 694 struct pci_dev *dev; 695 696 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)) 697 return -EINVAL; 698 699 mutex_lock(&kvm->lock); 700 idx = srcu_read_lock(&kvm->srcu); 701 702 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 703 assigned_dev->assigned_dev_id); 704 if (match) { 705 /* device already assigned */ 706 r = -EEXIST; 707 goto out; 708 } 709 710 match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); 711 if (match == NULL) { 712 printk(KERN_INFO "%s: Couldn't allocate memory\n", 713 __func__); 714 r = -ENOMEM; 715 goto out; 716 } 717 dev = pci_get_domain_bus_and_slot(assigned_dev->segnr, 718 assigned_dev->busnr, 719 assigned_dev->devfn); 720 if (!dev) { 721 printk(KERN_INFO "%s: host device not found\n", __func__); 722 r = -EINVAL; 723 goto out_free; 724 } 725 726 /* Don't allow bridges to be assigned */ 727 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) { 728 r = -EPERM; 729 goto out_put; 730 } 731 732 r = probe_sysfs_permissions(dev); 733 if (r) 734 goto out_put; 735 736 if (pci_enable_device(dev)) { 737 printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); 738 r = -EBUSY; 739 goto out_put; 740 } 741 r = pci_request_regions(dev, "kvm_assigned_device"); 742 if (r) { 743 printk(KERN_INFO "%s: Could not get access to device regions\n", 744 __func__); 745 goto out_disable; 746 } 747 748 pci_reset_function(dev); 749 pci_save_state(dev); 750 match->pci_saved_state = pci_store_saved_state(dev); 751 if (!match->pci_saved_state) 752 printk(KERN_DEBUG "%s: Couldn't store %s saved state\n", 753 __func__, dev_name(&dev->dev)); 754 755 if (!pci_intx_mask_supported(dev)) 756 assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3; 757 758 match->assigned_dev_id = assigned_dev->assigned_dev_id; 759 match->host_segnr = assigned_dev->segnr; 760 match->host_busnr = assigned_dev->busnr; 761 match->host_devfn = assigned_dev->devfn; 762 match->flags = assigned_dev->flags; 763 match->dev = dev; 764 spin_lock_init(&match->intx_lock); 765 spin_lock_init(&match->intx_mask_lock); 766 match->irq_source_id = -1; 767 match->kvm = kvm; 768 match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; 769 770 list_add(&match->list, &kvm->arch.assigned_dev_head); 771 772 if (!kvm->arch.iommu_domain) { 773 r = kvm_iommu_map_guest(kvm); 774 if (r) 775 goto out_list_del; 776 } 777 r = kvm_assign_device(kvm, match->dev); 778 if (r) 779 goto out_list_del; 780 781out: 782 srcu_read_unlock(&kvm->srcu, idx); 783 mutex_unlock(&kvm->lock); 784 return r; 785out_list_del: 786 if (pci_load_and_free_saved_state(dev, &match->pci_saved_state)) 787 printk(KERN_INFO "%s: Couldn't reload %s saved state\n", 788 __func__, dev_name(&dev->dev)); 789 list_del(&match->list); 790 pci_release_regions(dev); 791out_disable: 792 pci_disable_device(dev); 793out_put: 794 pci_dev_put(dev); 795out_free: 796 kfree(match); 797 srcu_read_unlock(&kvm->srcu, idx); 798 mutex_unlock(&kvm->lock); 799 return r; 800} 801 802static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, 803 struct kvm_assigned_pci_dev *assigned_dev) 804{ 805 int r = 0; 806 struct kvm_assigned_dev_kernel *match; 807 808 mutex_lock(&kvm->lock); 809 810 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 811 assigned_dev->assigned_dev_id); 812 if (!match) { 813 printk(KERN_INFO "%s: device hasn't been assigned before, " 814 "so cannot be deassigned\n", __func__); 815 r = -EINVAL; 816 goto out; 817 } 818 819 kvm_deassign_device(kvm, match->dev); 820 821 kvm_free_assigned_device(kvm, match); 822 823out: 824 mutex_unlock(&kvm->lock); 825 return r; 826} 827 828 829#ifdef __KVM_HAVE_MSIX 830static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, 831 struct kvm_assigned_msix_nr *entry_nr) 832{ 833 int r = 0; 834 struct kvm_assigned_dev_kernel *adev; 835 836 mutex_lock(&kvm->lock); 837 838 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 839 entry_nr->assigned_dev_id); 840 if (!adev) { 841 r = -EINVAL; 842 goto msix_nr_out; 843 } 844 845 if (adev->entries_nr == 0) { 846 adev->entries_nr = entry_nr->entry_nr; 847 if (adev->entries_nr == 0 || 848 adev->entries_nr > KVM_MAX_MSIX_PER_DEV) { 849 r = -EINVAL; 850 goto msix_nr_out; 851 } 852 853 adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * 854 entry_nr->entry_nr, 855 GFP_KERNEL); 856 if (!adev->host_msix_entries) { 857 r = -ENOMEM; 858 goto msix_nr_out; 859 } 860 adev->guest_msix_entries = 861 kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr, 862 GFP_KERNEL); 863 if (!adev->guest_msix_entries) { 864 kfree(adev->host_msix_entries); 865 r = -ENOMEM; 866 goto msix_nr_out; 867 } 868 } else /* Not allowed set MSI-X number twice */ 869 r = -EINVAL; 870msix_nr_out: 871 mutex_unlock(&kvm->lock); 872 return r; 873} 874 875static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, 876 struct kvm_assigned_msix_entry *entry) 877{ 878 int r = 0, i; 879 struct kvm_assigned_dev_kernel *adev; 880 881 mutex_lock(&kvm->lock); 882 883 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 884 entry->assigned_dev_id); 885 886 if (!adev) { 887 r = -EINVAL; 888 goto msix_entry_out; 889 } 890 891 for (i = 0; i < adev->entries_nr; i++) 892 if (adev->guest_msix_entries[i].vector == 0 || 893 adev->guest_msix_entries[i].entry == entry->entry) { 894 adev->guest_msix_entries[i].entry = entry->entry; 895 adev->guest_msix_entries[i].vector = entry->gsi; 896 adev->host_msix_entries[i].entry = entry->entry; 897 break; 898 } 899 if (i == adev->entries_nr) { 900 r = -ENOSPC; 901 goto msix_entry_out; 902 } 903 904msix_entry_out: 905 mutex_unlock(&kvm->lock); 906 907 return r; 908} 909#endif 910 911static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm, 912 struct kvm_assigned_pci_dev *assigned_dev) 913{ 914 int r = 0; 915 struct kvm_assigned_dev_kernel *match; 916 917 mutex_lock(&kvm->lock); 918 919 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 920 assigned_dev->assigned_dev_id); 921 if (!match) { 922 r = -ENODEV; 923 goto out; 924 } 925 926 spin_lock(&match->intx_mask_lock); 927 928 match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX; 929 match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX; 930 931 if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { 932 if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) { 933 kvm_set_irq(match->kvm, match->irq_source_id, 934 match->guest_irq, 0, false); 935 /* 936 * Masking at hardware-level is performed on demand, 937 * i.e. when an IRQ actually arrives at the host. 938 */ 939 } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { 940 /* 941 * Unmask the IRQ line if required. Unmasking at 942 * device level will be performed by user space. 943 */ 944 spin_lock_irq(&match->intx_lock); 945 if (match->host_irq_disabled) { 946 enable_irq(match->host_irq); 947 match->host_irq_disabled = false; 948 } 949 spin_unlock_irq(&match->intx_lock); 950 } 951 } 952 953 spin_unlock(&match->intx_mask_lock); 954 955out: 956 mutex_unlock(&kvm->lock); 957 return r; 958} 959 960long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, 961 unsigned long arg) 962{ 963 void __user *argp = (void __user *)arg; 964 int r; 965 966 switch (ioctl) { 967 case KVM_ASSIGN_PCI_DEVICE: { 968 struct kvm_assigned_pci_dev assigned_dev; 969 970 r = -EFAULT; 971 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) 972 goto out; 973 r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); 974 if (r) 975 goto out; 976 break; 977 } 978 case KVM_ASSIGN_IRQ: { 979 r = -EOPNOTSUPP; 980 break; 981 } 982 case KVM_ASSIGN_DEV_IRQ: { 983 struct kvm_assigned_irq assigned_irq; 984 985 r = -EFAULT; 986 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) 987 goto out; 988 r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); 989 if (r) 990 goto out; 991 break; 992 } 993 case KVM_DEASSIGN_DEV_IRQ: { 994 struct kvm_assigned_irq assigned_irq; 995 996 r = -EFAULT; 997 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) 998 goto out; 999 r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); 1000 if (r) 1001 goto out; 1002 break; 1003 } 1004 case KVM_DEASSIGN_PCI_DEVICE: { 1005 struct kvm_assigned_pci_dev assigned_dev; 1006 1007 r = -EFAULT; 1008 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) 1009 goto out; 1010 r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); 1011 if (r) 1012 goto out; 1013 break; 1014 } 1015#ifdef __KVM_HAVE_MSIX 1016 case KVM_ASSIGN_SET_MSIX_NR: { 1017 struct kvm_assigned_msix_nr entry_nr; 1018 r = -EFAULT; 1019 if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) 1020 goto out; 1021 r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); 1022 if (r) 1023 goto out; 1024 break; 1025 } 1026 case KVM_ASSIGN_SET_MSIX_ENTRY: { 1027 struct kvm_assigned_msix_entry entry; 1028 r = -EFAULT; 1029 if (copy_from_user(&entry, argp, sizeof entry)) 1030 goto out; 1031 r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); 1032 if (r) 1033 goto out; 1034 break; 1035 } 1036#endif 1037 case KVM_ASSIGN_SET_INTX_MASK: { 1038 struct kvm_assigned_pci_dev assigned_dev; 1039 1040 r = -EFAULT; 1041 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) 1042 goto out; 1043 r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev); 1044 break; 1045 } 1046 default: 1047 r = -ENOTTY; 1048 break; 1049 } 1050out: 1051 return r; 1052} 1053