1/* 2 * Kernel-based Virtual Machine - device assignment support 3 * 4 * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2. See 7 * the COPYING file in the top-level directory. 8 * 9 */ 10 11#include <linux/kvm_host.h> 12#include <linux/kvm.h> 13#include <linux/uaccess.h> 14#include <linux/vmalloc.h> 15#include <linux/errno.h> 16#include <linux/spinlock.h> 17#include <linux/pci.h> 18#include <linux/interrupt.h> 19#include <linux/slab.h> 20#include <linux/namei.h> 21#include <linux/fs.h> 22#include "irq.h" 23#include "assigned-dev.h" 24#include "trace/events/kvm.h" 25 26struct kvm_assigned_dev_kernel { 27 struct kvm_irq_ack_notifier ack_notifier; 28 struct list_head list; 29 int assigned_dev_id; 30 int host_segnr; 31 int host_busnr; 32 int host_devfn; 33 unsigned int entries_nr; 34 int host_irq; 35 bool host_irq_disabled; 36 bool pci_2_3; 37 struct msix_entry *host_msix_entries; 38 int guest_irq; 39 struct msix_entry *guest_msix_entries; 40 unsigned long irq_requested_type; 41 int irq_source_id; 42 int flags; 43 struct pci_dev *dev; 44 struct kvm *kvm; 45 spinlock_t intx_lock; 46 spinlock_t intx_mask_lock; 47 char irq_name[32]; 48 struct pci_saved_state *pci_saved_state; 49}; 50 51static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, 52 int assigned_dev_id) 53{ 54 struct list_head *ptr; 55 struct kvm_assigned_dev_kernel *match; 56 57 list_for_each(ptr, head) { 58 match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); 59 if (match->assigned_dev_id == assigned_dev_id) 60 return match; 61 } 62 return NULL; 63} 64 65static int find_index_from_host_irq(struct kvm_assigned_dev_kernel 66 *assigned_dev, int irq) 67{ 68 int i, index; 69 struct msix_entry *host_msix_entries; 70 71 host_msix_entries = assigned_dev->host_msix_entries; 72 73 index = -1; 74 for (i = 0; i < assigned_dev->entries_nr; i++) 75 if (irq == host_msix_entries[i].vector) { 76 index = i; 77 break; 78 } 79 if (index < 0) 80 printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); 81 82 return index; 83} 84 85static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id) 86{ 87 struct kvm_assigned_dev_kernel *assigned_dev = dev_id; 88 int ret; 89 90 spin_lock(&assigned_dev->intx_lock); 91 if (pci_check_and_mask_intx(assigned_dev->dev)) { 92 assigned_dev->host_irq_disabled = true; 93 ret = IRQ_WAKE_THREAD; 94 } else 95 ret = IRQ_NONE; 96 spin_unlock(&assigned_dev->intx_lock); 97 98 return ret; 99} 100 101static void 102kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev, 103 int vector) 104{ 105 if (unlikely(assigned_dev->irq_requested_type & 106 KVM_DEV_IRQ_GUEST_INTX)) { 107 spin_lock(&assigned_dev->intx_mask_lock); 108 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) 109 kvm_set_irq(assigned_dev->kvm, 110 assigned_dev->irq_source_id, vector, 1, 111 false); 112 spin_unlock(&assigned_dev->intx_mask_lock); 113 } else 114 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, 115 vector, 1, false); 116} 117 118static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id) 119{ 120 struct kvm_assigned_dev_kernel *assigned_dev = dev_id; 121 122 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { 123 spin_lock_irq(&assigned_dev->intx_lock); 124 disable_irq_nosync(irq); 125 assigned_dev->host_irq_disabled = true; 126 spin_unlock_irq(&assigned_dev->intx_lock); 127 } 128 129 kvm_assigned_dev_raise_guest_irq(assigned_dev, 130 assigned_dev->guest_irq); 131 132 return IRQ_HANDLED; 133} 134 135/* 136 * Deliver an IRQ in an atomic context if we can, or return a failure, 137 * user can retry in a process context. 138 * Return value: 139 * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context. 140 * Other values - No need to retry. 141 */ 142static int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, 143 int level) 144{ 145 struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; 146 struct kvm_kernel_irq_routing_entry *e; 147 int ret = -EINVAL; 148 int idx; 149 150 trace_kvm_set_irq(irq, level, irq_source_id); 151 152 /* 153 * Injection into either PIC or IOAPIC might need to scan all CPUs, 154 * which would need to be retried from thread context; when same GSI 155 * is connected to both PIC and IOAPIC, we'd have to report a 156 * partial failure here. 157 * Since there's no easy way to do this, we only support injecting MSI 158 * which is limited to 1:1 GSI mapping. 159 */ 160 idx = srcu_read_lock(&kvm->irq_srcu); 161 if (kvm_irq_map_gsi(kvm, entries, irq) > 0) { 162 e = &entries[0]; 163 ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id, 164 irq, level); 165 } 166 srcu_read_unlock(&kvm->irq_srcu, idx); 167 return ret; 168} 169 170 171static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id) 172{ 173 struct kvm_assigned_dev_kernel *assigned_dev = dev_id; 174 int ret = kvm_set_irq_inatomic(assigned_dev->kvm, 175 assigned_dev->irq_source_id, 176 assigned_dev->guest_irq, 1); 177 return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED; 178} 179 180static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id) 181{ 182 struct kvm_assigned_dev_kernel *assigned_dev = dev_id; 183 184 kvm_assigned_dev_raise_guest_irq(assigned_dev, 185 assigned_dev->guest_irq); 186 187 return IRQ_HANDLED; 188} 189 190static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id) 191{ 192 struct kvm_assigned_dev_kernel *assigned_dev = dev_id; 193 int index = find_index_from_host_irq(assigned_dev, irq); 194 u32 vector; 195 int ret = 0; 196 197 if (index >= 0) { 198 vector = assigned_dev->guest_msix_entries[index].vector; 199 ret = kvm_set_irq_inatomic(assigned_dev->kvm, 200 assigned_dev->irq_source_id, 201 vector, 1); 202 } 203 204 return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED; 205} 206 207static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) 208{ 209 struct kvm_assigned_dev_kernel *assigned_dev = dev_id; 210 int index = find_index_from_host_irq(assigned_dev, irq); 211 u32 vector; 212 213 if (index >= 0) { 214 vector = assigned_dev->guest_msix_entries[index].vector; 215 kvm_assigned_dev_raise_guest_irq(assigned_dev, vector); 216 } 217 218 return IRQ_HANDLED; 219} 220 221/* Ack the irq line for an assigned device */ 222static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) 223{ 224 struct kvm_assigned_dev_kernel *dev = 225 container_of(kian, struct kvm_assigned_dev_kernel, 226 ack_notifier); 227 228 kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false); 229 230 spin_lock(&dev->intx_mask_lock); 231 232 if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) { 233 bool reassert = false; 234 235 spin_lock_irq(&dev->intx_lock); 236 /* 237 * The guest IRQ may be shared so this ack can come from an 238 * IRQ for another guest device. 239 */ 240 if (dev->host_irq_disabled) { 241 if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) 242 enable_irq(dev->host_irq); 243 else if (!pci_check_and_unmask_intx(dev->dev)) 244 reassert = true; 245 dev->host_irq_disabled = reassert; 246 } 247 spin_unlock_irq(&dev->intx_lock); 248 249 if (reassert) 250 kvm_set_irq(dev->kvm, dev->irq_source_id, 251 dev->guest_irq, 1, false); 252 } 253 254 spin_unlock(&dev->intx_mask_lock); 255} 256 257static void deassign_guest_irq(struct kvm *kvm, 258 struct kvm_assigned_dev_kernel *assigned_dev) 259{ 260 if (assigned_dev->ack_notifier.gsi != -1) 261 kvm_unregister_irq_ack_notifier(kvm, 262 &assigned_dev->ack_notifier); 263 264 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, 265 assigned_dev->guest_irq, 0, false); 266 267 if (assigned_dev->irq_source_id != -1) 268 kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); 269 assigned_dev->irq_source_id = -1; 270 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); 271} 272 273/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ 274static void deassign_host_irq(struct kvm *kvm, 275 struct kvm_assigned_dev_kernel *assigned_dev) 276{ 277 /* 278 * We disable irq here to prevent further events. 279 * 280 * Notice this maybe result in nested disable if the interrupt type is 281 * INTx, but it's OK for we are going to free it. 282 * 283 * If this function is a part of VM destroy, please ensure that till 284 * now, the kvm state is still legal for probably we also have to wait 285 * on a currently running IRQ handler. 286 */ 287 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { 288 int i; 289 for (i = 0; i < assigned_dev->entries_nr; i++) 290 disable_irq(assigned_dev->host_msix_entries[i].vector); 291 292 for (i = 0; i < assigned_dev->entries_nr; i++) 293 free_irq(assigned_dev->host_msix_entries[i].vector, 294 assigned_dev); 295 296 assigned_dev->entries_nr = 0; 297 kfree(assigned_dev->host_msix_entries); 298 kfree(assigned_dev->guest_msix_entries); 299 pci_disable_msix(assigned_dev->dev); 300 } else { 301 /* Deal with MSI and INTx */ 302 if ((assigned_dev->irq_requested_type & 303 KVM_DEV_IRQ_HOST_INTX) && 304 (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { 305 spin_lock_irq(&assigned_dev->intx_lock); 306 pci_intx(assigned_dev->dev, false); 307 spin_unlock_irq(&assigned_dev->intx_lock); 308 synchronize_irq(assigned_dev->host_irq); 309 } else 310 disable_irq(assigned_dev->host_irq); 311 312 free_irq(assigned_dev->host_irq, assigned_dev); 313 314 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) 315 pci_disable_msi(assigned_dev->dev); 316 } 317 318 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); 319} 320 321static int kvm_deassign_irq(struct kvm *kvm, 322 struct kvm_assigned_dev_kernel *assigned_dev, 323 unsigned long irq_requested_type) 324{ 325 unsigned long guest_irq_type, host_irq_type; 326 327 if (!irqchip_in_kernel(kvm)) 328 return -EINVAL; 329 /* no irq assignment to deassign */ 330 if (!assigned_dev->irq_requested_type) 331 return -ENXIO; 332 333 host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; 334 guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; 335 336 if (host_irq_type) 337 deassign_host_irq(kvm, assigned_dev); 338 if (guest_irq_type) 339 deassign_guest_irq(kvm, assigned_dev); 340 341 return 0; 342} 343 344static void kvm_free_assigned_irq(struct kvm *kvm, 345 struct kvm_assigned_dev_kernel *assigned_dev) 346{ 347 kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); 348} 349 350static void kvm_free_assigned_device(struct kvm *kvm, 351 struct kvm_assigned_dev_kernel 352 *assigned_dev) 353{ 354 kvm_free_assigned_irq(kvm, assigned_dev); 355 356 pci_reset_function(assigned_dev->dev); 357 if (pci_load_and_free_saved_state(assigned_dev->dev, 358 &assigned_dev->pci_saved_state)) 359 printk(KERN_INFO "%s: Couldn't reload %s saved state\n", 360 __func__, dev_name(&assigned_dev->dev->dev)); 361 else 362 pci_restore_state(assigned_dev->dev); 363 364 pci_clear_dev_assigned(assigned_dev->dev); 365 366 pci_release_regions(assigned_dev->dev); 367 pci_disable_device(assigned_dev->dev); 368 pci_dev_put(assigned_dev->dev); 369 370 list_del(&assigned_dev->list); 371 kfree(assigned_dev); 372} 373 374void kvm_free_all_assigned_devices(struct kvm *kvm) 375{ 376 struct list_head *ptr, *ptr2; 377 struct kvm_assigned_dev_kernel *assigned_dev; 378 379 list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { 380 assigned_dev = list_entry(ptr, 381 struct kvm_assigned_dev_kernel, 382 list); 383 384 kvm_free_assigned_device(kvm, assigned_dev); 385 } 386} 387 388static int assigned_device_enable_host_intx(struct kvm *kvm, 389 struct kvm_assigned_dev_kernel *dev) 390{ 391 irq_handler_t irq_handler; 392 unsigned long flags; 393 394 dev->host_irq = dev->dev->irq; 395 396 /* 397 * We can only share the IRQ line with other host devices if we are 398 * able to disable the IRQ source at device-level - independently of 399 * the guest driver. Otherwise host devices may suffer from unbounded 400 * IRQ latencies when the guest keeps the line asserted. 401 */ 402 if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { 403 irq_handler = kvm_assigned_dev_intx; 404 flags = IRQF_SHARED; 405 } else { 406 irq_handler = NULL; 407 flags = IRQF_ONESHOT; 408 } 409 if (request_threaded_irq(dev->host_irq, irq_handler, 410 kvm_assigned_dev_thread_intx, flags, 411 dev->irq_name, dev)) 412 return -EIO; 413 414 if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { 415 spin_lock_irq(&dev->intx_lock); 416 pci_intx(dev->dev, true); 417 spin_unlock_irq(&dev->intx_lock); 418 } 419 return 0; 420} 421 422static int assigned_device_enable_host_msi(struct kvm *kvm, 423 struct kvm_assigned_dev_kernel *dev) 424{ 425 int r; 426 427 if (!dev->dev->msi_enabled) { 428 r = pci_enable_msi(dev->dev); 429 if (r) 430 return r; 431 } 432 433 dev->host_irq = dev->dev->irq; 434 if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi, 435 kvm_assigned_dev_thread_msi, 0, 436 dev->irq_name, dev)) { 437 pci_disable_msi(dev->dev); 438 return -EIO; 439 } 440 441 return 0; 442} 443 444static int assigned_device_enable_host_msix(struct kvm *kvm, 445 struct kvm_assigned_dev_kernel *dev) 446{ 447 int i, r = -EINVAL; 448 449 /* host_msix_entries and guest_msix_entries should have been 450 * initialized */ 451 if (dev->entries_nr == 0) 452 return r; 453 454 r = pci_enable_msix_exact(dev->dev, 455 dev->host_msix_entries, dev->entries_nr); 456 if (r) 457 return r; 458 459 for (i = 0; i < dev->entries_nr; i++) { 460 r = request_threaded_irq(dev->host_msix_entries[i].vector, 461 kvm_assigned_dev_msix, 462 kvm_assigned_dev_thread_msix, 463 0, dev->irq_name, dev); 464 if (r) 465 goto err; 466 } 467 468 return 0; 469err: 470 for (i -= 1; i >= 0; i--) 471 free_irq(dev->host_msix_entries[i].vector, dev); 472 pci_disable_msix(dev->dev); 473 return r; 474} 475 476static int assigned_device_enable_guest_intx(struct kvm *kvm, 477 struct kvm_assigned_dev_kernel *dev, 478 struct kvm_assigned_irq *irq) 479{ 480 dev->guest_irq = irq->guest_irq; 481 dev->ack_notifier.gsi = irq->guest_irq; 482 return 0; 483} 484 485static int assigned_device_enable_guest_msi(struct kvm *kvm, 486 struct kvm_assigned_dev_kernel *dev, 487 struct kvm_assigned_irq *irq) 488{ 489 dev->guest_irq = irq->guest_irq; 490 dev->ack_notifier.gsi = -1; 491 return 0; 492} 493 494static int assigned_device_enable_guest_msix(struct kvm *kvm, 495 struct kvm_assigned_dev_kernel *dev, 496 struct kvm_assigned_irq *irq) 497{ 498 dev->guest_irq = irq->guest_irq; 499 dev->ack_notifier.gsi = -1; 500 return 0; 501} 502 503static int assign_host_irq(struct kvm *kvm, 504 struct kvm_assigned_dev_kernel *dev, 505 __u32 host_irq_type) 506{ 507 int r = -EEXIST; 508 509 if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) 510 return r; 511 512 snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s", 513 pci_name(dev->dev)); 514 515 switch (host_irq_type) { 516 case KVM_DEV_IRQ_HOST_INTX: 517 r = assigned_device_enable_host_intx(kvm, dev); 518 break; 519 case KVM_DEV_IRQ_HOST_MSI: 520 r = assigned_device_enable_host_msi(kvm, dev); 521 break; 522 case KVM_DEV_IRQ_HOST_MSIX: 523 r = assigned_device_enable_host_msix(kvm, dev); 524 break; 525 default: 526 r = -EINVAL; 527 } 528 dev->host_irq_disabled = false; 529 530 if (!r) 531 dev->irq_requested_type |= host_irq_type; 532 533 return r; 534} 535 536static int assign_guest_irq(struct kvm *kvm, 537 struct kvm_assigned_dev_kernel *dev, 538 struct kvm_assigned_irq *irq, 539 unsigned long guest_irq_type) 540{ 541 int id; 542 int r = -EEXIST; 543 544 if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) 545 return r; 546 547 id = kvm_request_irq_source_id(kvm); 548 if (id < 0) 549 return id; 550 551 dev->irq_source_id = id; 552 553 switch (guest_irq_type) { 554 case KVM_DEV_IRQ_GUEST_INTX: 555 r = assigned_device_enable_guest_intx(kvm, dev, irq); 556 break; 557 case KVM_DEV_IRQ_GUEST_MSI: 558 r = assigned_device_enable_guest_msi(kvm, dev, irq); 559 break; 560 case KVM_DEV_IRQ_GUEST_MSIX: 561 r = assigned_device_enable_guest_msix(kvm, dev, irq); 562 break; 563 default: 564 r = -EINVAL; 565 } 566 567 if (!r) { 568 dev->irq_requested_type |= guest_irq_type; 569 if (dev->ack_notifier.gsi != -1) 570 kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); 571 } else { 572 kvm_free_irq_source_id(kvm, dev->irq_source_id); 573 dev->irq_source_id = -1; 574 } 575 576 return r; 577} 578 579/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ 580static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, 581 struct kvm_assigned_irq *assigned_irq) 582{ 583 int r = -EINVAL; 584 struct kvm_assigned_dev_kernel *match; 585 unsigned long host_irq_type, guest_irq_type; 586 587 if (!irqchip_in_kernel(kvm)) 588 return r; 589 590 mutex_lock(&kvm->lock); 591 r = -ENODEV; 592 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 593 assigned_irq->assigned_dev_id); 594 if (!match) 595 goto out; 596 597 host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); 598 guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); 599 600 r = -EINVAL; 601 /* can only assign one type at a time */ 602 if (hweight_long(host_irq_type) > 1) 603 goto out; 604 if (hweight_long(guest_irq_type) > 1) 605 goto out; 606 if (host_irq_type == 0 && guest_irq_type == 0) 607 goto out; 608 609 r = 0; 610 if (host_irq_type) 611 r = assign_host_irq(kvm, match, host_irq_type); 612 if (r) 613 goto out; 614 615 if (guest_irq_type) 616 r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); 617out: 618 mutex_unlock(&kvm->lock); 619 return r; 620} 621 622static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, 623 struct kvm_assigned_irq 624 *assigned_irq) 625{ 626 int r = -ENODEV; 627 struct kvm_assigned_dev_kernel *match; 628 unsigned long irq_type; 629 630 mutex_lock(&kvm->lock); 631 632 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 633 assigned_irq->assigned_dev_id); 634 if (!match) 635 goto out; 636 637 irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK | 638 KVM_DEV_IRQ_GUEST_MASK); 639 r = kvm_deassign_irq(kvm, match, irq_type); 640out: 641 mutex_unlock(&kvm->lock); 642 return r; 643} 644 645/* 646 * We want to test whether the caller has been granted permissions to 647 * use this device. To be able to configure and control the device, 648 * the user needs access to PCI configuration space and BAR resources. 649 * These are accessed through PCI sysfs. PCI config space is often 650 * passed to the process calling this ioctl via file descriptor, so we 651 * can't rely on access to that file. We can check for permissions 652 * on each of the BAR resource files, which is a pretty clear 653 * indicator that the user has been granted access to the device. 654 */ 655static int probe_sysfs_permissions(struct pci_dev *dev) 656{ 657#ifdef CONFIG_SYSFS 658 int i; 659 bool bar_found = false; 660 661 for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) { 662 char *kpath, *syspath; 663 struct path path; 664 struct inode *inode; 665 int r; 666 667 if (!pci_resource_len(dev, i)) 668 continue; 669 670 kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL); 671 if (!kpath) 672 return -ENOMEM; 673 674 /* Per sysfs-rules, sysfs is always at /sys */ 675 syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i); 676 kfree(kpath); 677 if (!syspath) 678 return -ENOMEM; 679 680 r = kern_path(syspath, LOOKUP_FOLLOW, &path); 681 kfree(syspath); 682 if (r) 683 return r; 684 685 inode = d_backing_inode(path.dentry); 686 687 r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS); 688 path_put(&path); 689 if (r) 690 return r; 691 692 bar_found = true; 693 } 694 695 /* If no resources, probably something special */ 696 if (!bar_found) 697 return -EPERM; 698 699 return 0; 700#else 701 return -EINVAL; /* No way to control the device without sysfs */ 702#endif 703} 704 705static int kvm_vm_ioctl_assign_device(struct kvm *kvm, 706 struct kvm_assigned_pci_dev *assigned_dev) 707{ 708 int r = 0, idx; 709 struct kvm_assigned_dev_kernel *match; 710 struct pci_dev *dev; 711 712 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)) 713 return -EINVAL; 714 715 mutex_lock(&kvm->lock); 716 idx = srcu_read_lock(&kvm->srcu); 717 718 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 719 assigned_dev->assigned_dev_id); 720 if (match) { 721 /* device already assigned */ 722 r = -EEXIST; 723 goto out; 724 } 725 726 match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); 727 if (match == NULL) { 728 printk(KERN_INFO "%s: Couldn't allocate memory\n", 729 __func__); 730 r = -ENOMEM; 731 goto out; 732 } 733 dev = pci_get_domain_bus_and_slot(assigned_dev->segnr, 734 assigned_dev->busnr, 735 assigned_dev->devfn); 736 if (!dev) { 737 printk(KERN_INFO "%s: host device not found\n", __func__); 738 r = -EINVAL; 739 goto out_free; 740 } 741 742 /* Don't allow bridges to be assigned */ 743 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) { 744 r = -EPERM; 745 goto out_put; 746 } 747 748 r = probe_sysfs_permissions(dev); 749 if (r) 750 goto out_put; 751 752 if (pci_enable_device(dev)) { 753 printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); 754 r = -EBUSY; 755 goto out_put; 756 } 757 r = pci_request_regions(dev, "kvm_assigned_device"); 758 if (r) { 759 printk(KERN_INFO "%s: Could not get access to device regions\n", 760 __func__); 761 goto out_disable; 762 } 763 764 pci_reset_function(dev); 765 pci_save_state(dev); 766 match->pci_saved_state = pci_store_saved_state(dev); 767 if (!match->pci_saved_state) 768 printk(KERN_DEBUG "%s: Couldn't store %s saved state\n", 769 __func__, dev_name(&dev->dev)); 770 771 if (!pci_intx_mask_supported(dev)) 772 assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3; 773 774 match->assigned_dev_id = assigned_dev->assigned_dev_id; 775 match->host_segnr = assigned_dev->segnr; 776 match->host_busnr = assigned_dev->busnr; 777 match->host_devfn = assigned_dev->devfn; 778 match->flags = assigned_dev->flags; 779 match->dev = dev; 780 spin_lock_init(&match->intx_lock); 781 spin_lock_init(&match->intx_mask_lock); 782 match->irq_source_id = -1; 783 match->kvm = kvm; 784 match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; 785 786 list_add(&match->list, &kvm->arch.assigned_dev_head); 787 788 if (!kvm->arch.iommu_domain) { 789 r = kvm_iommu_map_guest(kvm); 790 if (r) 791 goto out_list_del; 792 } 793 r = kvm_assign_device(kvm, match->dev); 794 if (r) 795 goto out_list_del; 796 797out: 798 srcu_read_unlock(&kvm->srcu, idx); 799 mutex_unlock(&kvm->lock); 800 return r; 801out_list_del: 802 if (pci_load_and_free_saved_state(dev, &match->pci_saved_state)) 803 printk(KERN_INFO "%s: Couldn't reload %s saved state\n", 804 __func__, dev_name(&dev->dev)); 805 list_del(&match->list); 806 pci_release_regions(dev); 807out_disable: 808 pci_disable_device(dev); 809out_put: 810 pci_dev_put(dev); 811out_free: 812 kfree(match); 813 srcu_read_unlock(&kvm->srcu, idx); 814 mutex_unlock(&kvm->lock); 815 return r; 816} 817 818static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, 819 struct kvm_assigned_pci_dev *assigned_dev) 820{ 821 int r = 0; 822 struct kvm_assigned_dev_kernel *match; 823 824 mutex_lock(&kvm->lock); 825 826 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 827 assigned_dev->assigned_dev_id); 828 if (!match) { 829 printk(KERN_INFO "%s: device hasn't been assigned before, " 830 "so cannot be deassigned\n", __func__); 831 r = -EINVAL; 832 goto out; 833 } 834 835 kvm_deassign_device(kvm, match->dev); 836 837 kvm_free_assigned_device(kvm, match); 838 839out: 840 mutex_unlock(&kvm->lock); 841 return r; 842} 843 844 845static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, 846 struct kvm_assigned_msix_nr *entry_nr) 847{ 848 int r = 0; 849 struct kvm_assigned_dev_kernel *adev; 850 851 mutex_lock(&kvm->lock); 852 853 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 854 entry_nr->assigned_dev_id); 855 if (!adev) { 856 r = -EINVAL; 857 goto msix_nr_out; 858 } 859 860 if (adev->entries_nr == 0) { 861 adev->entries_nr = entry_nr->entry_nr; 862 if (adev->entries_nr == 0 || 863 adev->entries_nr > KVM_MAX_MSIX_PER_DEV) { 864 r = -EINVAL; 865 goto msix_nr_out; 866 } 867 868 adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * 869 entry_nr->entry_nr, 870 GFP_KERNEL); 871 if (!adev->host_msix_entries) { 872 r = -ENOMEM; 873 goto msix_nr_out; 874 } 875 adev->guest_msix_entries = 876 kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr, 877 GFP_KERNEL); 878 if (!adev->guest_msix_entries) { 879 kfree(adev->host_msix_entries); 880 r = -ENOMEM; 881 goto msix_nr_out; 882 } 883 } else /* Not allowed set MSI-X number twice */ 884 r = -EINVAL; 885msix_nr_out: 886 mutex_unlock(&kvm->lock); 887 return r; 888} 889 890static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, 891 struct kvm_assigned_msix_entry *entry) 892{ 893 int r = 0, i; 894 struct kvm_assigned_dev_kernel *adev; 895 896 mutex_lock(&kvm->lock); 897 898 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 899 entry->assigned_dev_id); 900 901 if (!adev) { 902 r = -EINVAL; 903 goto msix_entry_out; 904 } 905 906 for (i = 0; i < adev->entries_nr; i++) 907 if (adev->guest_msix_entries[i].vector == 0 || 908 adev->guest_msix_entries[i].entry == entry->entry) { 909 adev->guest_msix_entries[i].entry = entry->entry; 910 adev->guest_msix_entries[i].vector = entry->gsi; 911 adev->host_msix_entries[i].entry = entry->entry; 912 break; 913 } 914 if (i == adev->entries_nr) { 915 r = -ENOSPC; 916 goto msix_entry_out; 917 } 918 919msix_entry_out: 920 mutex_unlock(&kvm->lock); 921 922 return r; 923} 924 925static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm, 926 struct kvm_assigned_pci_dev *assigned_dev) 927{ 928 int r = 0; 929 struct kvm_assigned_dev_kernel *match; 930 931 mutex_lock(&kvm->lock); 932 933 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 934 assigned_dev->assigned_dev_id); 935 if (!match) { 936 r = -ENODEV; 937 goto out; 938 } 939 940 spin_lock(&match->intx_mask_lock); 941 942 match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX; 943 match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX; 944 945 if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { 946 if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) { 947 kvm_set_irq(match->kvm, match->irq_source_id, 948 match->guest_irq, 0, false); 949 /* 950 * Masking at hardware-level is performed on demand, 951 * i.e. when an IRQ actually arrives at the host. 952 */ 953 } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { 954 /* 955 * Unmask the IRQ line if required. Unmasking at 956 * device level will be performed by user space. 957 */ 958 spin_lock_irq(&match->intx_lock); 959 if (match->host_irq_disabled) { 960 enable_irq(match->host_irq); 961 match->host_irq_disabled = false; 962 } 963 spin_unlock_irq(&match->intx_lock); 964 } 965 } 966 967 spin_unlock(&match->intx_mask_lock); 968 969out: 970 mutex_unlock(&kvm->lock); 971 return r; 972} 973 974long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, 975 unsigned long arg) 976{ 977 void __user *argp = (void __user *)arg; 978 int r; 979 980 switch (ioctl) { 981 case KVM_ASSIGN_PCI_DEVICE: { 982 struct kvm_assigned_pci_dev assigned_dev; 983 984 r = -EFAULT; 985 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) 986 goto out; 987 r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); 988 if (r) 989 goto out; 990 break; 991 } 992 case KVM_ASSIGN_IRQ: { 993 r = -EOPNOTSUPP; 994 break; 995 } 996 case KVM_ASSIGN_DEV_IRQ: { 997 struct kvm_assigned_irq assigned_irq; 998 999 r = -EFAULT; 1000 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) 1001 goto out; 1002 r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); 1003 if (r) 1004 goto out; 1005 break; 1006 } 1007 case KVM_DEASSIGN_DEV_IRQ: { 1008 struct kvm_assigned_irq assigned_irq; 1009 1010 r = -EFAULT; 1011 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) 1012 goto out; 1013 r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); 1014 if (r) 1015 goto out; 1016 break; 1017 } 1018 case KVM_DEASSIGN_PCI_DEVICE: { 1019 struct kvm_assigned_pci_dev assigned_dev; 1020 1021 r = -EFAULT; 1022 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) 1023 goto out; 1024 r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); 1025 if (r) 1026 goto out; 1027 break; 1028 } 1029 case KVM_ASSIGN_SET_MSIX_NR: { 1030 struct kvm_assigned_msix_nr entry_nr; 1031 r = -EFAULT; 1032 if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) 1033 goto out; 1034 r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); 1035 if (r) 1036 goto out; 1037 break; 1038 } 1039 case KVM_ASSIGN_SET_MSIX_ENTRY: { 1040 struct kvm_assigned_msix_entry entry; 1041 r = -EFAULT; 1042 if (copy_from_user(&entry, argp, sizeof entry)) 1043 goto out; 1044 r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); 1045 if (r) 1046 goto out; 1047 break; 1048 } 1049 case KVM_ASSIGN_SET_INTX_MASK: { 1050 struct kvm_assigned_pci_dev assigned_dev; 1051 1052 r = -EFAULT; 1053 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) 1054 goto out; 1055 r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev); 1056 break; 1057 } 1058 default: 1059 r = -ENOTTY; 1060 break; 1061 } 1062out: 1063 return r; 1064} 1065