1/* 2 * VFIO: IOMMU DMA mapping support for TCE on POWER 3 * 4 * Copyright (C) 2013 IBM Corp. All rights reserved. 5 * Author: Alexey Kardashevskiy <aik@ozlabs.ru> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 * 11 * Derived from original vfio_iommu_type1.c: 12 * Copyright (C) 2012 Red Hat, Inc. All rights reserved. 13 * Author: Alex Williamson <alex.williamson@redhat.com> 14 */ 15 16#include <linux/module.h> 17#include <linux/pci.h> 18#include <linux/slab.h> 19#include <linux/uaccess.h> 20#include <linux/err.h> 21#include <linux/vfio.h> 22#include <linux/vmalloc.h> 23#include <asm/iommu.h> 24#include <asm/tce.h> 25#include <asm/mmu_context.h> 26 27#define DRIVER_VERSION "0.1" 28#define DRIVER_AUTHOR "aik@ozlabs.ru" 29#define DRIVER_DESC "VFIO IOMMU SPAPR TCE" 30 31static void tce_iommu_detach_group(void *iommu_data, 32 struct iommu_group *iommu_group); 33 34static long try_increment_locked_vm(long npages) 35{ 36 long ret = 0, locked, lock_limit; 37 38 if (!current || !current->mm) 39 return -ESRCH; /* process exited */ 40 41 if (!npages) 42 return 0; 43 44 down_write(¤t->mm->mmap_sem); 45 locked = current->mm->locked_vm + npages; 46 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 47 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) 48 ret = -ENOMEM; 49 else 50 current->mm->locked_vm += npages; 51 52 pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid, 53 npages << PAGE_SHIFT, 54 current->mm->locked_vm << PAGE_SHIFT, 55 rlimit(RLIMIT_MEMLOCK), 56 ret ? " - exceeded" : ""); 57 58 up_write(¤t->mm->mmap_sem); 59 60 return ret; 61} 62 63static void decrement_locked_vm(long npages) 64{ 65 if (!current || !current->mm || !npages) 66 return; /* process exited */ 67 68 down_write(¤t->mm->mmap_sem); 69 if (WARN_ON_ONCE(npages > current->mm->locked_vm)) 70 npages = current->mm->locked_vm; 71 current->mm->locked_vm -= npages; 72 pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid, 73 npages << PAGE_SHIFT, 74 current->mm->locked_vm << PAGE_SHIFT, 75 rlimit(RLIMIT_MEMLOCK)); 76 up_write(¤t->mm->mmap_sem); 77} 78 79/* 80 * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation 81 * 82 * This code handles mapping and unmapping of user data buffers 83 * into DMA'ble space using the IOMMU 84 */ 85 86struct tce_iommu_group { 87 struct list_head next; 88 struct iommu_group *grp; 89}; 90 91/* 92 * The container descriptor supports only a single group per container. 93 * Required by the API as the container is not supplied with the IOMMU group 94 * at the moment of initialization. 95 */ 96struct tce_container { 97 struct mutex lock; 98 bool enabled; 99 bool v2; 100 unsigned long locked_pages; 101 struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES]; 102 struct list_head group_list; 103}; 104 105static long tce_iommu_unregister_pages(struct tce_container *container, 106 __u64 vaddr, __u64 size) 107{ 108 struct mm_iommu_table_group_mem_t *mem; 109 110 if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK)) 111 return -EINVAL; 112 113 mem = mm_iommu_find(vaddr, size >> PAGE_SHIFT); 114 if (!mem) 115 return -ENOENT; 116 117 return mm_iommu_put(mem); 118} 119 120static long tce_iommu_register_pages(struct tce_container *container, 121 __u64 vaddr, __u64 size) 122{ 123 long ret = 0; 124 struct mm_iommu_table_group_mem_t *mem = NULL; 125 unsigned long entries = size >> PAGE_SHIFT; 126 127 if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) || 128 ((vaddr + size) < vaddr)) 129 return -EINVAL; 130 131 ret = mm_iommu_get(vaddr, entries, &mem); 132 if (ret) 133 return ret; 134 135 container->enabled = true; 136 137 return 0; 138} 139 140static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl) 141{ 142 unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) * 143 tbl->it_size, PAGE_SIZE); 144 unsigned long *uas; 145 long ret; 146 147 BUG_ON(tbl->it_userspace); 148 149 ret = try_increment_locked_vm(cb >> PAGE_SHIFT); 150 if (ret) 151 return ret; 152 153 uas = vzalloc(cb); 154 if (!uas) { 155 decrement_locked_vm(cb >> PAGE_SHIFT); 156 return -ENOMEM; 157 } 158 tbl->it_userspace = uas; 159 160 return 0; 161} 162 163static void tce_iommu_userspace_view_free(struct iommu_table *tbl) 164{ 165 unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) * 166 tbl->it_size, PAGE_SIZE); 167 168 if (!tbl->it_userspace) 169 return; 170 171 vfree(tbl->it_userspace); 172 tbl->it_userspace = NULL; 173 decrement_locked_vm(cb >> PAGE_SHIFT); 174} 175 176static bool tce_page_is_contained(struct page *page, unsigned page_shift) 177{ 178 /* 179 * Check that the TCE table granularity is not bigger than the size of 180 * a page we just found. Otherwise the hardware can get access to 181 * a bigger memory chunk that it should. 182 */ 183 return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift; 184} 185 186static inline bool tce_groups_attached(struct tce_container *container) 187{ 188 return !list_empty(&container->group_list); 189} 190 191static long tce_iommu_find_table(struct tce_container *container, 192 phys_addr_t ioba, struct iommu_table **ptbl) 193{ 194 long i; 195 196 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { 197 struct iommu_table *tbl = container->tables[i]; 198 199 if (tbl) { 200 unsigned long entry = ioba >> tbl->it_page_shift; 201 unsigned long start = tbl->it_offset; 202 unsigned long end = start + tbl->it_size; 203 204 if ((start <= entry) && (entry < end)) { 205 *ptbl = tbl; 206 return i; 207 } 208 } 209 } 210 211 return -1; 212} 213 214static int tce_iommu_find_free_table(struct tce_container *container) 215{ 216 int i; 217 218 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { 219 if (!container->tables[i]) 220 return i; 221 } 222 223 return -ENOSPC; 224} 225 226static int tce_iommu_enable(struct tce_container *container) 227{ 228 int ret = 0; 229 unsigned long locked; 230 struct iommu_table_group *table_group; 231 struct tce_iommu_group *tcegrp; 232 233 if (!current->mm) 234 return -ESRCH; /* process exited */ 235 236 if (container->enabled) 237 return -EBUSY; 238 239 /* 240 * When userspace pages are mapped into the IOMMU, they are effectively 241 * locked memory, so, theoretically, we need to update the accounting 242 * of locked pages on each map and unmap. For powerpc, the map unmap 243 * paths can be very hot, though, and the accounting would kill 244 * performance, especially since it would be difficult to impossible 245 * to handle the accounting in real mode only. 246 * 247 * To address that, rather than precisely accounting every page, we 248 * instead account for a worst case on locked memory when the iommu is 249 * enabled and disabled. The worst case upper bound on locked memory 250 * is the size of the whole iommu window, which is usually relatively 251 * small (compared to total memory sizes) on POWER hardware. 252 * 253 * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits, 254 * that would effectively kill the guest at random points, much better 255 * enforcing the limit based on the max that the guest can map. 256 * 257 * Unfortunately at the moment it counts whole tables, no matter how 258 * much memory the guest has. I.e. for 4GB guest and 4 IOMMU groups 259 * each with 2GB DMA window, 8GB will be counted here. The reason for 260 * this is that we cannot tell here the amount of RAM used by the guest 261 * as this information is only available from KVM and VFIO is 262 * KVM agnostic. 263 * 264 * So we do not allow enabling a container without a group attached 265 * as there is no way to know how much we should increment 266 * the locked_vm counter. 267 */ 268 if (!tce_groups_attached(container)) 269 return -ENODEV; 270 271 tcegrp = list_first_entry(&container->group_list, 272 struct tce_iommu_group, next); 273 table_group = iommu_group_get_iommudata(tcegrp->grp); 274 if (!table_group) 275 return -ENODEV; 276 277 if (!table_group->tce32_size) 278 return -EPERM; 279 280 locked = table_group->tce32_size >> PAGE_SHIFT; 281 ret = try_increment_locked_vm(locked); 282 if (ret) 283 return ret; 284 285 container->locked_pages = locked; 286 287 container->enabled = true; 288 289 return ret; 290} 291 292static void tce_iommu_disable(struct tce_container *container) 293{ 294 if (!container->enabled) 295 return; 296 297 container->enabled = false; 298 299 if (!current->mm) 300 return; 301 302 decrement_locked_vm(container->locked_pages); 303} 304 305static void *tce_iommu_open(unsigned long arg) 306{ 307 struct tce_container *container; 308 309 if ((arg != VFIO_SPAPR_TCE_IOMMU) && (arg != VFIO_SPAPR_TCE_v2_IOMMU)) { 310 pr_err("tce_vfio: Wrong IOMMU type\n"); 311 return ERR_PTR(-EINVAL); 312 } 313 314 container = kzalloc(sizeof(*container), GFP_KERNEL); 315 if (!container) 316 return ERR_PTR(-ENOMEM); 317 318 mutex_init(&container->lock); 319 INIT_LIST_HEAD_RCU(&container->group_list); 320 321 container->v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU; 322 323 return container; 324} 325 326static int tce_iommu_clear(struct tce_container *container, 327 struct iommu_table *tbl, 328 unsigned long entry, unsigned long pages); 329static void tce_iommu_free_table(struct iommu_table *tbl); 330 331static void tce_iommu_release(void *iommu_data) 332{ 333 struct tce_container *container = iommu_data; 334 struct iommu_table_group *table_group; 335 struct tce_iommu_group *tcegrp; 336 long i; 337 338 while (tce_groups_attached(container)) { 339 tcegrp = list_first_entry(&container->group_list, 340 struct tce_iommu_group, next); 341 table_group = iommu_group_get_iommudata(tcegrp->grp); 342 tce_iommu_detach_group(iommu_data, tcegrp->grp); 343 } 344 345 /* 346 * If VFIO created a table, it was not disposed 347 * by tce_iommu_detach_group() so do it now. 348 */ 349 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { 350 struct iommu_table *tbl = container->tables[i]; 351 352 if (!tbl) 353 continue; 354 355 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); 356 tce_iommu_free_table(tbl); 357 } 358 359 tce_iommu_disable(container); 360 mutex_destroy(&container->lock); 361 362 kfree(container); 363} 364 365static void tce_iommu_unuse_page(struct tce_container *container, 366 unsigned long hpa) 367{ 368 struct page *page; 369 370 page = pfn_to_page(hpa >> PAGE_SHIFT); 371 put_page(page); 372} 373 374static int tce_iommu_prereg_ua_to_hpa(unsigned long tce, unsigned long size, 375 unsigned long *phpa, struct mm_iommu_table_group_mem_t **pmem) 376{ 377 long ret = 0; 378 struct mm_iommu_table_group_mem_t *mem; 379 380 mem = mm_iommu_lookup(tce, size); 381 if (!mem) 382 return -EINVAL; 383 384 ret = mm_iommu_ua_to_hpa(mem, tce, phpa); 385 if (ret) 386 return -EINVAL; 387 388 *pmem = mem; 389 390 return 0; 391} 392 393static void tce_iommu_unuse_page_v2(struct iommu_table *tbl, 394 unsigned long entry) 395{ 396 struct mm_iommu_table_group_mem_t *mem = NULL; 397 int ret; 398 unsigned long hpa = 0; 399 unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); 400 401 if (!pua || !current || !current->mm) 402 return; 403 404 ret = tce_iommu_prereg_ua_to_hpa(*pua, IOMMU_PAGE_SIZE(tbl), 405 &hpa, &mem); 406 if (ret) 407 pr_debug("%s: tce %lx at #%lx was not cached, ret=%d\n", 408 __func__, *pua, entry, ret); 409 if (mem) 410 mm_iommu_mapped_dec(mem); 411 412 *pua = 0; 413} 414 415static int tce_iommu_clear(struct tce_container *container, 416 struct iommu_table *tbl, 417 unsigned long entry, unsigned long pages) 418{ 419 unsigned long oldhpa; 420 long ret; 421 enum dma_data_direction direction; 422 423 for ( ; pages; --pages, ++entry) { 424 direction = DMA_NONE; 425 oldhpa = 0; 426 ret = iommu_tce_xchg(tbl, entry, &oldhpa, &direction); 427 if (ret) 428 continue; 429 430 if (direction == DMA_NONE) 431 continue; 432 433 if (container->v2) { 434 tce_iommu_unuse_page_v2(tbl, entry); 435 continue; 436 } 437 438 tce_iommu_unuse_page(container, oldhpa); 439 } 440 441 return 0; 442} 443 444static int tce_iommu_use_page(unsigned long tce, unsigned long *hpa) 445{ 446 struct page *page = NULL; 447 enum dma_data_direction direction = iommu_tce_direction(tce); 448 449 if (get_user_pages_fast(tce & PAGE_MASK, 1, 450 direction != DMA_TO_DEVICE, &page) != 1) 451 return -EFAULT; 452 453 *hpa = __pa((unsigned long) page_address(page)); 454 455 return 0; 456} 457 458static long tce_iommu_build(struct tce_container *container, 459 struct iommu_table *tbl, 460 unsigned long entry, unsigned long tce, unsigned long pages, 461 enum dma_data_direction direction) 462{ 463 long i, ret = 0; 464 struct page *page; 465 unsigned long hpa; 466 enum dma_data_direction dirtmp; 467 468 for (i = 0; i < pages; ++i) { 469 unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK; 470 471 ret = tce_iommu_use_page(tce, &hpa); 472 if (ret) 473 break; 474 475 page = pfn_to_page(hpa >> PAGE_SHIFT); 476 if (!tce_page_is_contained(page, tbl->it_page_shift)) { 477 ret = -EPERM; 478 break; 479 } 480 481 hpa |= offset; 482 dirtmp = direction; 483 ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp); 484 if (ret) { 485 tce_iommu_unuse_page(container, hpa); 486 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n", 487 __func__, entry << tbl->it_page_shift, 488 tce, ret); 489 break; 490 } 491 492 if (dirtmp != DMA_NONE) 493 tce_iommu_unuse_page(container, hpa); 494 495 tce += IOMMU_PAGE_SIZE(tbl); 496 } 497 498 if (ret) 499 tce_iommu_clear(container, tbl, entry, i); 500 501 return ret; 502} 503 504static long tce_iommu_build_v2(struct tce_container *container, 505 struct iommu_table *tbl, 506 unsigned long entry, unsigned long tce, unsigned long pages, 507 enum dma_data_direction direction) 508{ 509 long i, ret = 0; 510 struct page *page; 511 unsigned long hpa; 512 enum dma_data_direction dirtmp; 513 514 for (i = 0; i < pages; ++i) { 515 struct mm_iommu_table_group_mem_t *mem = NULL; 516 unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, 517 entry + i); 518 519 ret = tce_iommu_prereg_ua_to_hpa(tce, IOMMU_PAGE_SIZE(tbl), 520 &hpa, &mem); 521 if (ret) 522 break; 523 524 page = pfn_to_page(hpa >> PAGE_SHIFT); 525 if (!tce_page_is_contained(page, tbl->it_page_shift)) { 526 ret = -EPERM; 527 break; 528 } 529 530 /* Preserve offset within IOMMU page */ 531 hpa |= tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK; 532 dirtmp = direction; 533 534 /* The registered region is being unregistered */ 535 if (mm_iommu_mapped_inc(mem)) 536 break; 537 538 ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp); 539 if (ret) { 540 /* dirtmp cannot be DMA_NONE here */ 541 tce_iommu_unuse_page_v2(tbl, entry + i); 542 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n", 543 __func__, entry << tbl->it_page_shift, 544 tce, ret); 545 break; 546 } 547 548 if (dirtmp != DMA_NONE) 549 tce_iommu_unuse_page_v2(tbl, entry + i); 550 551 *pua = tce; 552 553 tce += IOMMU_PAGE_SIZE(tbl); 554 } 555 556 if (ret) 557 tce_iommu_clear(container, tbl, entry, i); 558 559 return ret; 560} 561 562static long tce_iommu_create_table(struct tce_container *container, 563 struct iommu_table_group *table_group, 564 int num, 565 __u32 page_shift, 566 __u64 window_size, 567 __u32 levels, 568 struct iommu_table **ptbl) 569{ 570 long ret, table_size; 571 572 table_size = table_group->ops->get_table_size(page_shift, window_size, 573 levels); 574 if (!table_size) 575 return -EINVAL; 576 577 ret = try_increment_locked_vm(table_size >> PAGE_SHIFT); 578 if (ret) 579 return ret; 580 581 ret = table_group->ops->create_table(table_group, num, 582 page_shift, window_size, levels, ptbl); 583 584 WARN_ON(!ret && !(*ptbl)->it_ops->free); 585 WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size)); 586 587 if (!ret && container->v2) { 588 ret = tce_iommu_userspace_view_alloc(*ptbl); 589 if (ret) 590 (*ptbl)->it_ops->free(*ptbl); 591 } 592 593 if (ret) 594 decrement_locked_vm(table_size >> PAGE_SHIFT); 595 596 return ret; 597} 598 599static void tce_iommu_free_table(struct iommu_table *tbl) 600{ 601 unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT; 602 603 tce_iommu_userspace_view_free(tbl); 604 tbl->it_ops->free(tbl); 605 decrement_locked_vm(pages); 606} 607 608static long tce_iommu_create_window(struct tce_container *container, 609 __u32 page_shift, __u64 window_size, __u32 levels, 610 __u64 *start_addr) 611{ 612 struct tce_iommu_group *tcegrp; 613 struct iommu_table_group *table_group; 614 struct iommu_table *tbl = NULL; 615 long ret, num; 616 617 num = tce_iommu_find_free_table(container); 618 if (num < 0) 619 return num; 620 621 /* Get the first group for ops::create_table */ 622 tcegrp = list_first_entry(&container->group_list, 623 struct tce_iommu_group, next); 624 table_group = iommu_group_get_iommudata(tcegrp->grp); 625 if (!table_group) 626 return -EFAULT; 627 628 if (!(table_group->pgsizes & (1ULL << page_shift))) 629 return -EINVAL; 630 631 if (!table_group->ops->set_window || !table_group->ops->unset_window || 632 !table_group->ops->get_table_size || 633 !table_group->ops->create_table) 634 return -EPERM; 635 636 /* Create TCE table */ 637 ret = tce_iommu_create_table(container, table_group, num, 638 page_shift, window_size, levels, &tbl); 639 if (ret) 640 return ret; 641 642 BUG_ON(!tbl->it_ops->free); 643 644 /* 645 * Program the table to every group. 646 * Groups have been tested for compatibility at the attach time. 647 */ 648 list_for_each_entry(tcegrp, &container->group_list, next) { 649 table_group = iommu_group_get_iommudata(tcegrp->grp); 650 651 ret = table_group->ops->set_window(table_group, num, tbl); 652 if (ret) 653 goto unset_exit; 654 } 655 656 container->tables[num] = tbl; 657 658 /* Return start address assigned by platform in create_table() */ 659 *start_addr = tbl->it_offset << tbl->it_page_shift; 660 661 return 0; 662 663unset_exit: 664 list_for_each_entry(tcegrp, &container->group_list, next) { 665 table_group = iommu_group_get_iommudata(tcegrp->grp); 666 table_group->ops->unset_window(table_group, num); 667 } 668 tce_iommu_free_table(tbl); 669 670 return ret; 671} 672 673static long tce_iommu_remove_window(struct tce_container *container, 674 __u64 start_addr) 675{ 676 struct iommu_table_group *table_group = NULL; 677 struct iommu_table *tbl; 678 struct tce_iommu_group *tcegrp; 679 int num; 680 681 num = tce_iommu_find_table(container, start_addr, &tbl); 682 if (num < 0) 683 return -EINVAL; 684 685 BUG_ON(!tbl->it_size); 686 687 /* Detach groups from IOMMUs */ 688 list_for_each_entry(tcegrp, &container->group_list, next) { 689 table_group = iommu_group_get_iommudata(tcegrp->grp); 690 691 /* 692 * SPAPR TCE IOMMU exposes the default DMA window to 693 * the guest via dma32_window_start/size of 694 * VFIO_IOMMU_SPAPR_TCE_GET_INFO. Some platforms allow 695 * the userspace to remove this window, some do not so 696 * here we check for the platform capability. 697 */ 698 if (!table_group->ops || !table_group->ops->unset_window) 699 return -EPERM; 700 701 table_group->ops->unset_window(table_group, num); 702 } 703 704 /* Free table */ 705 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); 706 tce_iommu_free_table(tbl); 707 container->tables[num] = NULL; 708 709 return 0; 710} 711 712static long tce_iommu_ioctl(void *iommu_data, 713 unsigned int cmd, unsigned long arg) 714{ 715 struct tce_container *container = iommu_data; 716 unsigned long minsz, ddwsz; 717 long ret; 718 719 switch (cmd) { 720 case VFIO_CHECK_EXTENSION: 721 switch (arg) { 722 case VFIO_SPAPR_TCE_IOMMU: 723 case VFIO_SPAPR_TCE_v2_IOMMU: 724 ret = 1; 725 break; 726 default: 727 ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg); 728 break; 729 } 730 731 return (ret < 0) ? 0 : ret; 732 733 case VFIO_IOMMU_SPAPR_TCE_GET_INFO: { 734 struct vfio_iommu_spapr_tce_info info; 735 struct tce_iommu_group *tcegrp; 736 struct iommu_table_group *table_group; 737 738 if (!tce_groups_attached(container)) 739 return -ENXIO; 740 741 tcegrp = list_first_entry(&container->group_list, 742 struct tce_iommu_group, next); 743 table_group = iommu_group_get_iommudata(tcegrp->grp); 744 745 if (!table_group) 746 return -ENXIO; 747 748 minsz = offsetofend(struct vfio_iommu_spapr_tce_info, 749 dma32_window_size); 750 751 if (copy_from_user(&info, (void __user *)arg, minsz)) 752 return -EFAULT; 753 754 if (info.argsz < minsz) 755 return -EINVAL; 756 757 info.dma32_window_start = table_group->tce32_start; 758 info.dma32_window_size = table_group->tce32_size; 759 info.flags = 0; 760 memset(&info.ddw, 0, sizeof(info.ddw)); 761 762 if (table_group->max_dynamic_windows_supported && 763 container->v2) { 764 info.flags |= VFIO_IOMMU_SPAPR_INFO_DDW; 765 info.ddw.pgsizes = table_group->pgsizes; 766 info.ddw.max_dynamic_windows_supported = 767 table_group->max_dynamic_windows_supported; 768 info.ddw.levels = table_group->max_levels; 769 } 770 771 ddwsz = offsetofend(struct vfio_iommu_spapr_tce_info, ddw); 772 773 if (info.argsz >= ddwsz) 774 minsz = ddwsz; 775 776 if (copy_to_user((void __user *)arg, &info, minsz)) 777 return -EFAULT; 778 779 return 0; 780 } 781 case VFIO_IOMMU_MAP_DMA: { 782 struct vfio_iommu_type1_dma_map param; 783 struct iommu_table *tbl = NULL; 784 long num; 785 enum dma_data_direction direction; 786 787 if (!container->enabled) 788 return -EPERM; 789 790 minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); 791 792 if (copy_from_user(¶m, (void __user *)arg, minsz)) 793 return -EFAULT; 794 795 if (param.argsz < minsz) 796 return -EINVAL; 797 798 if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ | 799 VFIO_DMA_MAP_FLAG_WRITE)) 800 return -EINVAL; 801 802 num = tce_iommu_find_table(container, param.iova, &tbl); 803 if (num < 0) 804 return -ENXIO; 805 806 if ((param.size & ~IOMMU_PAGE_MASK(tbl)) || 807 (param.vaddr & ~IOMMU_PAGE_MASK(tbl))) 808 return -EINVAL; 809 810 /* iova is checked by the IOMMU API */ 811 if (param.flags & VFIO_DMA_MAP_FLAG_READ) { 812 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE) 813 direction = DMA_BIDIRECTIONAL; 814 else 815 direction = DMA_TO_DEVICE; 816 } else { 817 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE) 818 direction = DMA_FROM_DEVICE; 819 else 820 return -EINVAL; 821 } 822 823 ret = iommu_tce_put_param_check(tbl, param.iova, param.vaddr); 824 if (ret) 825 return ret; 826 827 if (container->v2) 828 ret = tce_iommu_build_v2(container, tbl, 829 param.iova >> tbl->it_page_shift, 830 param.vaddr, 831 param.size >> tbl->it_page_shift, 832 direction); 833 else 834 ret = tce_iommu_build(container, tbl, 835 param.iova >> tbl->it_page_shift, 836 param.vaddr, 837 param.size >> tbl->it_page_shift, 838 direction); 839 840 iommu_flush_tce(tbl); 841 842 return ret; 843 } 844 case VFIO_IOMMU_UNMAP_DMA: { 845 struct vfio_iommu_type1_dma_unmap param; 846 struct iommu_table *tbl = NULL; 847 long num; 848 849 if (!container->enabled) 850 return -EPERM; 851 852 minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, 853 size); 854 855 if (copy_from_user(¶m, (void __user *)arg, minsz)) 856 return -EFAULT; 857 858 if (param.argsz < minsz) 859 return -EINVAL; 860 861 /* No flag is supported now */ 862 if (param.flags) 863 return -EINVAL; 864 865 num = tce_iommu_find_table(container, param.iova, &tbl); 866 if (num < 0) 867 return -ENXIO; 868 869 if (param.size & ~IOMMU_PAGE_MASK(tbl)) 870 return -EINVAL; 871 872 ret = iommu_tce_clear_param_check(tbl, param.iova, 0, 873 param.size >> tbl->it_page_shift); 874 if (ret) 875 return ret; 876 877 ret = tce_iommu_clear(container, tbl, 878 param.iova >> tbl->it_page_shift, 879 param.size >> tbl->it_page_shift); 880 iommu_flush_tce(tbl); 881 882 return ret; 883 } 884 case VFIO_IOMMU_SPAPR_REGISTER_MEMORY: { 885 struct vfio_iommu_spapr_register_memory param; 886 887 if (!container->v2) 888 break; 889 890 minsz = offsetofend(struct vfio_iommu_spapr_register_memory, 891 size); 892 893 if (copy_from_user(¶m, (void __user *)arg, minsz)) 894 return -EFAULT; 895 896 if (param.argsz < minsz) 897 return -EINVAL; 898 899 /* No flag is supported now */ 900 if (param.flags) 901 return -EINVAL; 902 903 mutex_lock(&container->lock); 904 ret = tce_iommu_register_pages(container, param.vaddr, 905 param.size); 906 mutex_unlock(&container->lock); 907 908 return ret; 909 } 910 case VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY: { 911 struct vfio_iommu_spapr_register_memory param; 912 913 if (!container->v2) 914 break; 915 916 minsz = offsetofend(struct vfio_iommu_spapr_register_memory, 917 size); 918 919 if (copy_from_user(¶m, (void __user *)arg, minsz)) 920 return -EFAULT; 921 922 if (param.argsz < minsz) 923 return -EINVAL; 924 925 /* No flag is supported now */ 926 if (param.flags) 927 return -EINVAL; 928 929 mutex_lock(&container->lock); 930 ret = tce_iommu_unregister_pages(container, param.vaddr, 931 param.size); 932 mutex_unlock(&container->lock); 933 934 return ret; 935 } 936 case VFIO_IOMMU_ENABLE: 937 if (container->v2) 938 break; 939 940 mutex_lock(&container->lock); 941 ret = tce_iommu_enable(container); 942 mutex_unlock(&container->lock); 943 return ret; 944 945 946 case VFIO_IOMMU_DISABLE: 947 if (container->v2) 948 break; 949 950 mutex_lock(&container->lock); 951 tce_iommu_disable(container); 952 mutex_unlock(&container->lock); 953 return 0; 954 955 case VFIO_EEH_PE_OP: { 956 struct tce_iommu_group *tcegrp; 957 958 ret = 0; 959 list_for_each_entry(tcegrp, &container->group_list, next) { 960 ret = vfio_spapr_iommu_eeh_ioctl(tcegrp->grp, 961 cmd, arg); 962 if (ret) 963 return ret; 964 } 965 return ret; 966 } 967 968 case VFIO_IOMMU_SPAPR_TCE_CREATE: { 969 struct vfio_iommu_spapr_tce_create create; 970 971 if (!container->v2) 972 break; 973 974 if (!tce_groups_attached(container)) 975 return -ENXIO; 976 977 minsz = offsetofend(struct vfio_iommu_spapr_tce_create, 978 start_addr); 979 980 if (copy_from_user(&create, (void __user *)arg, minsz)) 981 return -EFAULT; 982 983 if (create.argsz < minsz) 984 return -EINVAL; 985 986 if (create.flags) 987 return -EINVAL; 988 989 mutex_lock(&container->lock); 990 991 ret = tce_iommu_create_window(container, create.page_shift, 992 create.window_size, create.levels, 993 &create.start_addr); 994 995 mutex_unlock(&container->lock); 996 997 if (!ret && copy_to_user((void __user *)arg, &create, minsz)) 998 ret = -EFAULT; 999 1000 return ret; 1001 } 1002 case VFIO_IOMMU_SPAPR_TCE_REMOVE: { 1003 struct vfio_iommu_spapr_tce_remove remove; 1004 1005 if (!container->v2) 1006 break; 1007 1008 if (!tce_groups_attached(container)) 1009 return -ENXIO; 1010 1011 minsz = offsetofend(struct vfio_iommu_spapr_tce_remove, 1012 start_addr); 1013 1014 if (copy_from_user(&remove, (void __user *)arg, minsz)) 1015 return -EFAULT; 1016 1017 if (remove.argsz < minsz) 1018 return -EINVAL; 1019 1020 if (remove.flags) 1021 return -EINVAL; 1022 1023 mutex_lock(&container->lock); 1024 1025 ret = tce_iommu_remove_window(container, remove.start_addr); 1026 1027 mutex_unlock(&container->lock); 1028 1029 return ret; 1030 } 1031 } 1032 1033 return -ENOTTY; 1034} 1035 1036static void tce_iommu_release_ownership(struct tce_container *container, 1037 struct iommu_table_group *table_group) 1038{ 1039 int i; 1040 1041 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { 1042 struct iommu_table *tbl = container->tables[i]; 1043 1044 if (!tbl) 1045 continue; 1046 1047 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); 1048 tce_iommu_userspace_view_free(tbl); 1049 if (tbl->it_map) 1050 iommu_release_ownership(tbl); 1051 1052 container->tables[i] = NULL; 1053 } 1054} 1055 1056static int tce_iommu_take_ownership(struct tce_container *container, 1057 struct iommu_table_group *table_group) 1058{ 1059 int i, j, rc = 0; 1060 1061 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { 1062 struct iommu_table *tbl = table_group->tables[i]; 1063 1064 if (!tbl || !tbl->it_map) 1065 continue; 1066 1067 rc = tce_iommu_userspace_view_alloc(tbl); 1068 if (!rc) 1069 rc = iommu_take_ownership(tbl); 1070 1071 if (rc) { 1072 for (j = 0; j < i; ++j) 1073 iommu_release_ownership( 1074 table_group->tables[j]); 1075 1076 return rc; 1077 } 1078 } 1079 1080 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) 1081 container->tables[i] = table_group->tables[i]; 1082 1083 return 0; 1084} 1085 1086static void tce_iommu_release_ownership_ddw(struct tce_container *container, 1087 struct iommu_table_group *table_group) 1088{ 1089 long i; 1090 1091 if (!table_group->ops->unset_window) { 1092 WARN_ON_ONCE(1); 1093 return; 1094 } 1095 1096 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) 1097 table_group->ops->unset_window(table_group, i); 1098 1099 table_group->ops->release_ownership(table_group); 1100} 1101 1102static long tce_iommu_take_ownership_ddw(struct tce_container *container, 1103 struct iommu_table_group *table_group) 1104{ 1105 long i, ret = 0; 1106 struct iommu_table *tbl = NULL; 1107 1108 if (!table_group->ops->create_table || !table_group->ops->set_window || 1109 !table_group->ops->release_ownership) { 1110 WARN_ON_ONCE(1); 1111 return -EFAULT; 1112 } 1113 1114 table_group->ops->take_ownership(table_group); 1115 1116 /* 1117 * If it the first group attached, check if there is 1118 * a default DMA window and create one if none as 1119 * the userspace expects it to exist. 1120 */ 1121 if (!tce_groups_attached(container) && !container->tables[0]) { 1122 ret = tce_iommu_create_table(container, 1123 table_group, 1124 0, /* window number */ 1125 IOMMU_PAGE_SHIFT_4K, 1126 table_group->tce32_size, 1127 1, /* default levels */ 1128 &tbl); 1129 if (ret) 1130 goto release_exit; 1131 else 1132 container->tables[0] = tbl; 1133 } 1134 1135 /* Set all windows to the new group */ 1136 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { 1137 tbl = container->tables[i]; 1138 1139 if (!tbl) 1140 continue; 1141 1142 /* Set the default window to a new group */ 1143 ret = table_group->ops->set_window(table_group, i, tbl); 1144 if (ret) 1145 goto release_exit; 1146 } 1147 1148 return 0; 1149 1150release_exit: 1151 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) 1152 table_group->ops->unset_window(table_group, i); 1153 1154 table_group->ops->release_ownership(table_group); 1155 1156 return ret; 1157} 1158 1159static int tce_iommu_attach_group(void *iommu_data, 1160 struct iommu_group *iommu_group) 1161{ 1162 int ret; 1163 struct tce_container *container = iommu_data; 1164 struct iommu_table_group *table_group; 1165 struct tce_iommu_group *tcegrp = NULL; 1166 1167 mutex_lock(&container->lock); 1168 1169 /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n", 1170 iommu_group_id(iommu_group), iommu_group); */ 1171 table_group = iommu_group_get_iommudata(iommu_group); 1172 1173 if (tce_groups_attached(container) && (!table_group->ops || 1174 !table_group->ops->take_ownership || 1175 !table_group->ops->release_ownership)) { 1176 ret = -EBUSY; 1177 goto unlock_exit; 1178 } 1179 1180 /* Check if new group has the same iommu_ops (i.e. compatible) */ 1181 list_for_each_entry(tcegrp, &container->group_list, next) { 1182 struct iommu_table_group *table_group_tmp; 1183 1184 if (tcegrp->grp == iommu_group) { 1185 pr_warn("tce_vfio: Group %d is already attached\n", 1186 iommu_group_id(iommu_group)); 1187 ret = -EBUSY; 1188 goto unlock_exit; 1189 } 1190 table_group_tmp = iommu_group_get_iommudata(tcegrp->grp); 1191 if (table_group_tmp->ops != table_group->ops) { 1192 pr_warn("tce_vfio: Group %d is incompatible with group %d\n", 1193 iommu_group_id(iommu_group), 1194 iommu_group_id(tcegrp->grp)); 1195 ret = -EPERM; 1196 goto unlock_exit; 1197 } 1198 } 1199 1200 tcegrp = kzalloc(sizeof(*tcegrp), GFP_KERNEL); 1201 if (!tcegrp) { 1202 ret = -ENOMEM; 1203 goto unlock_exit; 1204 } 1205 1206 if (!table_group->ops || !table_group->ops->take_ownership || 1207 !table_group->ops->release_ownership) 1208 ret = tce_iommu_take_ownership(container, table_group); 1209 else 1210 ret = tce_iommu_take_ownership_ddw(container, table_group); 1211 1212 if (!ret) { 1213 tcegrp->grp = iommu_group; 1214 list_add(&tcegrp->next, &container->group_list); 1215 } 1216 1217unlock_exit: 1218 if (ret && tcegrp) 1219 kfree(tcegrp); 1220 1221 mutex_unlock(&container->lock); 1222 1223 return ret; 1224} 1225 1226static void tce_iommu_detach_group(void *iommu_data, 1227 struct iommu_group *iommu_group) 1228{ 1229 struct tce_container *container = iommu_data; 1230 struct iommu_table_group *table_group; 1231 bool found = false; 1232 struct tce_iommu_group *tcegrp; 1233 1234 mutex_lock(&container->lock); 1235 1236 list_for_each_entry(tcegrp, &container->group_list, next) { 1237 if (tcegrp->grp == iommu_group) { 1238 found = true; 1239 break; 1240 } 1241 } 1242 1243 if (!found) { 1244 pr_warn("tce_vfio: detaching unattached group #%u\n", 1245 iommu_group_id(iommu_group)); 1246 goto unlock_exit; 1247 } 1248 1249 list_del(&tcegrp->next); 1250 kfree(tcegrp); 1251 1252 table_group = iommu_group_get_iommudata(iommu_group); 1253 BUG_ON(!table_group); 1254 1255 if (!table_group->ops || !table_group->ops->release_ownership) 1256 tce_iommu_release_ownership(container, table_group); 1257 else 1258 tce_iommu_release_ownership_ddw(container, table_group); 1259 1260unlock_exit: 1261 mutex_unlock(&container->lock); 1262} 1263 1264const struct vfio_iommu_driver_ops tce_iommu_driver_ops = { 1265 .name = "iommu-vfio-powerpc", 1266 .owner = THIS_MODULE, 1267 .open = tce_iommu_open, 1268 .release = tce_iommu_release, 1269 .ioctl = tce_iommu_ioctl, 1270 .attach_group = tce_iommu_attach_group, 1271 .detach_group = tce_iommu_detach_group, 1272}; 1273 1274static int __init tce_iommu_init(void) 1275{ 1276 return vfio_register_iommu_driver(&tce_iommu_driver_ops); 1277} 1278 1279static void __exit tce_iommu_cleanup(void) 1280{ 1281 vfio_unregister_iommu_driver(&tce_iommu_driver_ops); 1282} 1283 1284module_init(tce_iommu_init); 1285module_exit(tce_iommu_cleanup); 1286 1287MODULE_VERSION(DRIVER_VERSION); 1288MODULE_LICENSE("GPL v2"); 1289MODULE_AUTHOR(DRIVER_AUTHOR); 1290MODULE_DESCRIPTION(DRIVER_DESC); 1291 1292