1/* 2 * Intel MIC Platform Software Stack (MPSS) 3 * 4 * Copyright(c) 2015 Intel Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License, version 2, as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License for more details. 14 * 15 * Intel SCIF driver. 16 * 17 */ 18#include <linux/dma_remapping.h> 19#include <linux/pagemap.h> 20#include "scif_main.h" 21#include "scif_map.h" 22 23/* Used to skip ulimit checks for registrations with SCIF_MAP_KERNEL flag */ 24#define SCIF_MAP_ULIMIT 0x40 25 26bool scif_ulimit_check = 1; 27 28/** 29 * scif_rma_ep_init: 30 * @ep: end point 31 * 32 * Initialize RMA per EP data structures. 33 */ 34void scif_rma_ep_init(struct scif_endpt *ep) 35{ 36 struct scif_endpt_rma_info *rma = &ep->rma_info; 37 38 mutex_init(&rma->rma_lock); 39 init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN, 40 SCIF_DMA_64BIT_PFN); 41 spin_lock_init(&rma->tc_lock); 42 mutex_init(&rma->mmn_lock); 43 INIT_LIST_HEAD(&rma->reg_list); 44 INIT_LIST_HEAD(&rma->remote_reg_list); 45 atomic_set(&rma->tw_refcount, 0); 46 atomic_set(&rma->tcw_refcount, 0); 47 atomic_set(&rma->tcw_total_pages, 0); 48 atomic_set(&rma->fence_refcount, 0); 49 50 rma->async_list_del = 0; 51 rma->dma_chan = NULL; 52 INIT_LIST_HEAD(&rma->mmn_list); 53 INIT_LIST_HEAD(&rma->vma_list); 54 init_waitqueue_head(&rma->markwq); 55} 56 57/** 58 * scif_rma_ep_can_uninit: 59 * @ep: end point 60 * 61 * Returns 1 if an endpoint can be uninitialized and 0 otherwise. 62 */ 63int scif_rma_ep_can_uninit(struct scif_endpt *ep) 64{ 65 int ret = 0; 66 67 mutex_lock(&ep->rma_info.rma_lock); 68 /* Destroy RMA Info only if both lists are empty */ 69 if (list_empty(&ep->rma_info.reg_list) && 70 list_empty(&ep->rma_info.remote_reg_list) && 71 list_empty(&ep->rma_info.mmn_list) && 72 !atomic_read(&ep->rma_info.tw_refcount) && 73 !atomic_read(&ep->rma_info.tcw_refcount) && 74 !atomic_read(&ep->rma_info.fence_refcount)) 75 ret = 1; 76 mutex_unlock(&ep->rma_info.rma_lock); 77 return ret; 78} 79 80/** 81 * scif_create_pinned_pages: 82 * @nr_pages: number of pages in window 83 * @prot: read/write protection 84 * 85 * Allocate and prepare a set of pinned pages. 86 */ 87static struct scif_pinned_pages * 88scif_create_pinned_pages(int nr_pages, int prot) 89{ 90 struct scif_pinned_pages *pin; 91 92 might_sleep(); 93 pin = scif_zalloc(sizeof(*pin)); 94 if (!pin) 95 goto error; 96 97 pin->pages = scif_zalloc(nr_pages * sizeof(*pin->pages)); 98 if (!pin->pages) 99 goto error_free_pinned_pages; 100 101 pin->prot = prot; 102 pin->magic = SCIFEP_MAGIC; 103 return pin; 104 105error_free_pinned_pages: 106 scif_free(pin, sizeof(*pin)); 107error: 108 return NULL; 109} 110 111/** 112 * scif_destroy_pinned_pages: 113 * @pin: A set of pinned pages. 114 * 115 * Deallocate resources for pinned pages. 116 */ 117static int scif_destroy_pinned_pages(struct scif_pinned_pages *pin) 118{ 119 int j; 120 int writeable = pin->prot & SCIF_PROT_WRITE; 121 int kernel = SCIF_MAP_KERNEL & pin->map_flags; 122 123 for (j = 0; j < pin->nr_pages; j++) { 124 if (pin->pages[j] && !kernel) { 125 if (writeable) 126 SetPageDirty(pin->pages[j]); 127 put_page(pin->pages[j]); 128 } 129 } 130 131 scif_free(pin->pages, 132 pin->nr_pages * sizeof(*pin->pages)); 133 scif_free(pin, sizeof(*pin)); 134 return 0; 135} 136 137/* 138 * scif_create_window: 139 * @ep: end point 140 * @nr_pages: number of pages 141 * @offset: registration offset 142 * @temp: true if a temporary window is being created 143 * 144 * Allocate and prepare a self registration window. 145 */ 146struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages, 147 s64 offset, bool temp) 148{ 149 struct scif_window *window; 150 151 might_sleep(); 152 window = scif_zalloc(sizeof(*window)); 153 if (!window) 154 goto error; 155 156 window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr)); 157 if (!window->dma_addr) 158 goto error_free_window; 159 160 window->num_pages = scif_zalloc(nr_pages * sizeof(*window->num_pages)); 161 if (!window->num_pages) 162 goto error_free_window; 163 164 window->offset = offset; 165 window->ep = (u64)ep; 166 window->magic = SCIFEP_MAGIC; 167 window->reg_state = OP_IDLE; 168 init_waitqueue_head(&window->regwq); 169 window->unreg_state = OP_IDLE; 170 init_waitqueue_head(&window->unregwq); 171 INIT_LIST_HEAD(&window->list); 172 window->type = SCIF_WINDOW_SELF; 173 window->temp = temp; 174 return window; 175 176error_free_window: 177 scif_free(window->dma_addr, 178 nr_pages * sizeof(*window->dma_addr)); 179 scif_free(window, sizeof(*window)); 180error: 181 return NULL; 182} 183 184/** 185 * scif_destroy_incomplete_window: 186 * @ep: end point 187 * @window: registration window 188 * 189 * Deallocate resources for self window. 190 */ 191static void scif_destroy_incomplete_window(struct scif_endpt *ep, 192 struct scif_window *window) 193{ 194 int err; 195 int nr_pages = window->nr_pages; 196 struct scif_allocmsg *alloc = &window->alloc_handle; 197 struct scifmsg msg; 198 199retry: 200 /* Wait for a SCIF_ALLOC_GNT/REJ message */ 201 err = wait_event_timeout(alloc->allocwq, 202 alloc->state != OP_IN_PROGRESS, 203 SCIF_NODE_ALIVE_TIMEOUT); 204 if (!err && scifdev_alive(ep)) 205 goto retry; 206 207 mutex_lock(&ep->rma_info.rma_lock); 208 if (alloc->state == OP_COMPLETED) { 209 msg.uop = SCIF_FREE_VIRT; 210 msg.src = ep->port; 211 msg.payload[0] = ep->remote_ep; 212 msg.payload[1] = window->alloc_handle.vaddr; 213 msg.payload[2] = (u64)window; 214 msg.payload[3] = SCIF_REGISTER; 215 _scif_nodeqp_send(ep->remote_dev, &msg); 216 } 217 mutex_unlock(&ep->rma_info.rma_lock); 218 219 scif_free_window_offset(ep, window, window->offset); 220 scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr)); 221 scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages)); 222 scif_free(window, sizeof(*window)); 223} 224 225/** 226 * scif_unmap_window: 227 * @remote_dev: SCIF remote device 228 * @window: registration window 229 * 230 * Delete any DMA mappings created for a registered self window 231 */ 232void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window) 233{ 234 int j; 235 236 if (scif_is_iommu_enabled() && !scifdev_self(remote_dev)) { 237 if (window->st) { 238 dma_unmap_sg(&remote_dev->sdev->dev, 239 window->st->sgl, window->st->nents, 240 DMA_BIDIRECTIONAL); 241 sg_free_table(window->st); 242 kfree(window->st); 243 window->st = NULL; 244 } 245 } else { 246 for (j = 0; j < window->nr_contig_chunks; j++) { 247 if (window->dma_addr[j]) { 248 scif_unmap_single(window->dma_addr[j], 249 remote_dev, 250 window->num_pages[j] << 251 PAGE_SHIFT); 252 window->dma_addr[j] = 0x0; 253 } 254 } 255 } 256} 257 258static inline struct mm_struct *__scif_acquire_mm(void) 259{ 260 if (scif_ulimit_check) 261 return get_task_mm(current); 262 return NULL; 263} 264 265static inline void __scif_release_mm(struct mm_struct *mm) 266{ 267 if (mm) 268 mmput(mm); 269} 270 271static inline int 272__scif_dec_pinned_vm_lock(struct mm_struct *mm, 273 int nr_pages, bool try_lock) 274{ 275 if (!mm || !nr_pages || !scif_ulimit_check) 276 return 0; 277 if (try_lock) { 278 if (!down_write_trylock(&mm->mmap_sem)) { 279 dev_err(scif_info.mdev.this_device, 280 "%s %d err\n", __func__, __LINE__); 281 return -1; 282 } 283 } else { 284 down_write(&mm->mmap_sem); 285 } 286 mm->pinned_vm -= nr_pages; 287 up_write(&mm->mmap_sem); 288 return 0; 289} 290 291static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm, 292 int nr_pages) 293{ 294 unsigned long locked, lock_limit; 295 296 if (!mm || !nr_pages || !scif_ulimit_check) 297 return 0; 298 299 locked = nr_pages; 300 locked += mm->pinned_vm; 301 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 302 if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { 303 dev_err(scif_info.mdev.this_device, 304 "locked(%lu) > lock_limit(%lu)\n", 305 locked, lock_limit); 306 return -ENOMEM; 307 } 308 mm->pinned_vm = locked; 309 return 0; 310} 311 312/** 313 * scif_destroy_window: 314 * @ep: end point 315 * @window: registration window 316 * 317 * Deallocate resources for self window. 318 */ 319int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window) 320{ 321 int j; 322 struct scif_pinned_pages *pinned_pages = window->pinned_pages; 323 int nr_pages = window->nr_pages; 324 325 might_sleep(); 326 if (!window->temp && window->mm) { 327 __scif_dec_pinned_vm_lock(window->mm, window->nr_pages, 0); 328 __scif_release_mm(window->mm); 329 window->mm = NULL; 330 } 331 332 scif_free_window_offset(ep, window, window->offset); 333 scif_unmap_window(ep->remote_dev, window); 334 /* 335 * Decrement references for this set of pinned pages from 336 * this window. 337 */ 338 j = atomic_sub_return(1, &pinned_pages->ref_count); 339 if (j < 0) 340 dev_err(scif_info.mdev.this_device, 341 "%s %d incorrect ref count %d\n", 342 __func__, __LINE__, j); 343 /* 344 * If the ref count for pinned_pages is zero then someone 345 * has already called scif_unpin_pages() for it and we should 346 * destroy the page cache. 347 */ 348 if (!j) 349 scif_destroy_pinned_pages(window->pinned_pages); 350 scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr)); 351 scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages)); 352 window->magic = 0; 353 scif_free(window, sizeof(*window)); 354 return 0; 355} 356 357/** 358 * scif_create_remote_lookup: 359 * @remote_dev: SCIF remote device 360 * @window: remote window 361 * 362 * Allocate and prepare lookup entries for the remote 363 * end to copy over the physical addresses. 364 * Returns 0 on success and appropriate errno on failure. 365 */ 366static int scif_create_remote_lookup(struct scif_dev *remote_dev, 367 struct scif_window *window) 368{ 369 int i, j, err = 0; 370 int nr_pages = window->nr_pages; 371 bool vmalloc_dma_phys, vmalloc_num_pages; 372 373 might_sleep(); 374 /* Map window */ 375 err = scif_map_single(&window->mapped_offset, 376 window, remote_dev, sizeof(*window)); 377 if (err) 378 goto error_window; 379 380 /* Compute the number of lookup entries. 21 == 2MB Shift */ 381 window->nr_lookup = ALIGN(nr_pages * PAGE_SIZE, 382 ((2) * 1024 * 1024)) >> 21; 383 384 window->dma_addr_lookup.lookup = 385 scif_alloc_coherent(&window->dma_addr_lookup.offset, 386 remote_dev, window->nr_lookup * 387 sizeof(*window->dma_addr_lookup.lookup), 388 GFP_KERNEL | __GFP_ZERO); 389 if (!window->dma_addr_lookup.lookup) { 390 err = -ENOMEM; 391 goto error_window; 392 } 393 394 window->num_pages_lookup.lookup = 395 scif_alloc_coherent(&window->num_pages_lookup.offset, 396 remote_dev, window->nr_lookup * 397 sizeof(*window->num_pages_lookup.lookup), 398 GFP_KERNEL | __GFP_ZERO); 399 if (!window->num_pages_lookup.lookup) { 400 err = -ENOMEM; 401 goto error_window; 402 } 403 404 vmalloc_dma_phys = is_vmalloc_addr(&window->dma_addr[0]); 405 vmalloc_num_pages = is_vmalloc_addr(&window->num_pages[0]); 406 407 /* Now map each of the pages containing physical addresses */ 408 for (i = 0, j = 0; i < nr_pages; i += SCIF_NR_ADDR_IN_PAGE, j++) { 409 err = scif_map_page(&window->dma_addr_lookup.lookup[j], 410 vmalloc_dma_phys ? 411 vmalloc_to_page(&window->dma_addr[i]) : 412 virt_to_page(&window->dma_addr[i]), 413 remote_dev); 414 if (err) 415 goto error_window; 416 err = scif_map_page(&window->num_pages_lookup.lookup[j], 417 vmalloc_dma_phys ? 418 vmalloc_to_page(&window->num_pages[i]) : 419 virt_to_page(&window->num_pages[i]), 420 remote_dev); 421 if (err) 422 goto error_window; 423 } 424 return 0; 425error_window: 426 return err; 427} 428 429/** 430 * scif_destroy_remote_lookup: 431 * @remote_dev: SCIF remote device 432 * @window: remote window 433 * 434 * Destroy lookup entries used for the remote 435 * end to copy over the physical addresses. 436 */ 437static void scif_destroy_remote_lookup(struct scif_dev *remote_dev, 438 struct scif_window *window) 439{ 440 int i, j; 441 442 if (window->nr_lookup) { 443 struct scif_rma_lookup *lup = &window->dma_addr_lookup; 444 struct scif_rma_lookup *npup = &window->num_pages_lookup; 445 446 for (i = 0, j = 0; i < window->nr_pages; 447 i += SCIF_NR_ADDR_IN_PAGE, j++) { 448 if (lup->lookup && lup->lookup[j]) 449 scif_unmap_single(lup->lookup[j], 450 remote_dev, 451 PAGE_SIZE); 452 if (npup->lookup && npup->lookup[j]) 453 scif_unmap_single(npup->lookup[j], 454 remote_dev, 455 PAGE_SIZE); 456 } 457 if (lup->lookup) 458 scif_free_coherent(lup->lookup, lup->offset, 459 remote_dev, window->nr_lookup * 460 sizeof(*lup->lookup)); 461 if (npup->lookup) 462 scif_free_coherent(npup->lookup, npup->offset, 463 remote_dev, window->nr_lookup * 464 sizeof(*npup->lookup)); 465 if (window->mapped_offset) 466 scif_unmap_single(window->mapped_offset, 467 remote_dev, sizeof(*window)); 468 window->nr_lookup = 0; 469 } 470} 471 472/** 473 * scif_create_remote_window: 474 * @ep: end point 475 * @nr_pages: number of pages in window 476 * 477 * Allocate and prepare a remote registration window. 478 */ 479static struct scif_window * 480scif_create_remote_window(struct scif_dev *scifdev, int nr_pages) 481{ 482 struct scif_window *window; 483 484 might_sleep(); 485 window = scif_zalloc(sizeof(*window)); 486 if (!window) 487 goto error_ret; 488 489 window->magic = SCIFEP_MAGIC; 490 window->nr_pages = nr_pages; 491 492 window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr)); 493 if (!window->dma_addr) 494 goto error_window; 495 496 window->num_pages = scif_zalloc(nr_pages * 497 sizeof(*window->num_pages)); 498 if (!window->num_pages) 499 goto error_window; 500 501 if (scif_create_remote_lookup(scifdev, window)) 502 goto error_window; 503 504 window->type = SCIF_WINDOW_PEER; 505 window->unreg_state = OP_IDLE; 506 INIT_LIST_HEAD(&window->list); 507 return window; 508error_window: 509 scif_destroy_remote_window(window); 510error_ret: 511 return NULL; 512} 513 514/** 515 * scif_destroy_remote_window: 516 * @ep: end point 517 * @window: remote registration window 518 * 519 * Deallocate resources for remote window. 520 */ 521void 522scif_destroy_remote_window(struct scif_window *window) 523{ 524 scif_free(window->dma_addr, window->nr_pages * 525 sizeof(*window->dma_addr)); 526 scif_free(window->num_pages, window->nr_pages * 527 sizeof(*window->num_pages)); 528 window->magic = 0; 529 scif_free(window, sizeof(*window)); 530} 531 532/** 533 * scif_iommu_map: create DMA mappings if the IOMMU is enabled 534 * @remote_dev: SCIF remote device 535 * @window: remote registration window 536 * 537 * Map the physical pages using dma_map_sg(..) and then detect the number 538 * of contiguous DMA mappings allocated 539 */ 540static int scif_iommu_map(struct scif_dev *remote_dev, 541 struct scif_window *window) 542{ 543 struct scatterlist *sg; 544 int i, err; 545 scif_pinned_pages_t pin = window->pinned_pages; 546 547 window->st = kzalloc(sizeof(*window->st), GFP_KERNEL); 548 if (!window->st) 549 return -ENOMEM; 550 551 err = sg_alloc_table(window->st, window->nr_pages, GFP_KERNEL); 552 if (err) 553 return err; 554 555 for_each_sg(window->st->sgl, sg, window->st->nents, i) 556 sg_set_page(sg, pin->pages[i], PAGE_SIZE, 0x0); 557 558 err = dma_map_sg(&remote_dev->sdev->dev, window->st->sgl, 559 window->st->nents, DMA_BIDIRECTIONAL); 560 if (!err) 561 return -ENOMEM; 562 /* Detect contiguous ranges of DMA mappings */ 563 sg = window->st->sgl; 564 for (i = 0; sg; i++) { 565 dma_addr_t last_da; 566 567 window->dma_addr[i] = sg_dma_address(sg); 568 window->num_pages[i] = sg_dma_len(sg) >> PAGE_SHIFT; 569 last_da = sg_dma_address(sg) + sg_dma_len(sg); 570 while ((sg = sg_next(sg)) && sg_dma_address(sg) == last_da) { 571 window->num_pages[i] += 572 (sg_dma_len(sg) >> PAGE_SHIFT); 573 last_da = window->dma_addr[i] + 574 sg_dma_len(sg); 575 } 576 window->nr_contig_chunks++; 577 } 578 return 0; 579} 580 581/** 582 * scif_map_window: 583 * @remote_dev: SCIF remote device 584 * @window: self registration window 585 * 586 * Map pages of a window into the aperture/PCI. 587 * Also determine addresses required for DMA. 588 */ 589int 590scif_map_window(struct scif_dev *remote_dev, struct scif_window *window) 591{ 592 int i, j, k, err = 0, nr_contig_pages; 593 scif_pinned_pages_t pin; 594 phys_addr_t phys_prev, phys_curr; 595 596 might_sleep(); 597 598 pin = window->pinned_pages; 599 600 if (intel_iommu_enabled && !scifdev_self(remote_dev)) 601 return scif_iommu_map(remote_dev, window); 602 603 for (i = 0, j = 0; i < window->nr_pages; i += nr_contig_pages, j++) { 604 phys_prev = page_to_phys(pin->pages[i]); 605 nr_contig_pages = 1; 606 607 /* Detect physically contiguous chunks */ 608 for (k = i + 1; k < window->nr_pages; k++) { 609 phys_curr = page_to_phys(pin->pages[k]); 610 if (phys_curr != (phys_prev + PAGE_SIZE)) 611 break; 612 phys_prev = phys_curr; 613 nr_contig_pages++; 614 } 615 window->num_pages[j] = nr_contig_pages; 616 window->nr_contig_chunks++; 617 if (scif_is_mgmt_node()) { 618 /* 619 * Management node has to deal with SMPT on X100 and 620 * hence the DMA mapping is required 621 */ 622 err = scif_map_single(&window->dma_addr[j], 623 phys_to_virt(page_to_phys( 624 pin->pages[i])), 625 remote_dev, 626 nr_contig_pages << PAGE_SHIFT); 627 if (err) 628 return err; 629 } else { 630 window->dma_addr[j] = page_to_phys(pin->pages[i]); 631 } 632 } 633 return err; 634} 635 636/** 637 * scif_send_scif_unregister: 638 * @ep: end point 639 * @window: self registration window 640 * 641 * Send a SCIF_UNREGISTER message. 642 */ 643static int scif_send_scif_unregister(struct scif_endpt *ep, 644 struct scif_window *window) 645{ 646 struct scifmsg msg; 647 648 msg.uop = SCIF_UNREGISTER; 649 msg.src = ep->port; 650 msg.payload[0] = window->alloc_handle.vaddr; 651 msg.payload[1] = (u64)window; 652 return scif_nodeqp_send(ep->remote_dev, &msg); 653} 654 655/** 656 * scif_unregister_window: 657 * @window: self registration window 658 * 659 * Send an unregistration request and wait for a response. 660 */ 661int scif_unregister_window(struct scif_window *window) 662{ 663 int err = 0; 664 struct scif_endpt *ep = (struct scif_endpt *)window->ep; 665 bool send_msg = false; 666 667 might_sleep(); 668 switch (window->unreg_state) { 669 case OP_IDLE: 670 { 671 window->unreg_state = OP_IN_PROGRESS; 672 send_msg = true; 673 /* fall through */ 674 } 675 case OP_IN_PROGRESS: 676 { 677 scif_get_window(window, 1); 678 mutex_unlock(&ep->rma_info.rma_lock); 679 if (send_msg) { 680 err = scif_send_scif_unregister(ep, window); 681 if (err) { 682 window->unreg_state = OP_COMPLETED; 683 goto done; 684 } 685 } else { 686 /* Return ENXIO since unregistration is in progress */ 687 mutex_lock(&ep->rma_info.rma_lock); 688 return -ENXIO; 689 } 690retry: 691 /* Wait for a SCIF_UNREGISTER_(N)ACK message */ 692 err = wait_event_timeout(window->unregwq, 693 window->unreg_state != OP_IN_PROGRESS, 694 SCIF_NODE_ALIVE_TIMEOUT); 695 if (!err && scifdev_alive(ep)) 696 goto retry; 697 if (!err) { 698 err = -ENODEV; 699 window->unreg_state = OP_COMPLETED; 700 dev_err(scif_info.mdev.this_device, 701 "%s %d err %d\n", __func__, __LINE__, err); 702 } 703 if (err > 0) 704 err = 0; 705done: 706 mutex_lock(&ep->rma_info.rma_lock); 707 scif_put_window(window, 1); 708 break; 709 } 710 case OP_FAILED: 711 { 712 if (!scifdev_alive(ep)) { 713 err = -ENODEV; 714 window->unreg_state = OP_COMPLETED; 715 } 716 break; 717 } 718 case OP_COMPLETED: 719 break; 720 default: 721 err = -ENODEV; 722 } 723 724 if (window->unreg_state == OP_COMPLETED && window->ref_count) 725 scif_put_window(window, window->nr_pages); 726 727 if (!window->ref_count) { 728 atomic_inc(&ep->rma_info.tw_refcount); 729 list_del_init(&window->list); 730 scif_free_window_offset(ep, window, window->offset); 731 mutex_unlock(&ep->rma_info.rma_lock); 732 if ((!!(window->pinned_pages->map_flags & SCIF_MAP_KERNEL)) && 733 scifdev_alive(ep)) { 734 scif_drain_dma_intr(ep->remote_dev->sdev, 735 ep->rma_info.dma_chan); 736 } else { 737 if (!__scif_dec_pinned_vm_lock(window->mm, 738 window->nr_pages, 1)) { 739 __scif_release_mm(window->mm); 740 window->mm = NULL; 741 } 742 } 743 scif_queue_for_cleanup(window, &scif_info.rma); 744 mutex_lock(&ep->rma_info.rma_lock); 745 } 746 return err; 747} 748 749/** 750 * scif_send_alloc_request: 751 * @ep: end point 752 * @window: self registration window 753 * 754 * Send a remote window allocation request 755 */ 756static int scif_send_alloc_request(struct scif_endpt *ep, 757 struct scif_window *window) 758{ 759 struct scifmsg msg; 760 struct scif_allocmsg *alloc = &window->alloc_handle; 761 762 /* Set up the Alloc Handle */ 763 alloc->state = OP_IN_PROGRESS; 764 init_waitqueue_head(&alloc->allocwq); 765 766 /* Send out an allocation request */ 767 msg.uop = SCIF_ALLOC_REQ; 768 msg.payload[1] = window->nr_pages; 769 msg.payload[2] = (u64)&window->alloc_handle; 770 return _scif_nodeqp_send(ep->remote_dev, &msg); 771} 772 773/** 774 * scif_prep_remote_window: 775 * @ep: end point 776 * @window: self registration window 777 * 778 * Send a remote window allocation request, wait for an allocation response, 779 * and prepares the remote window by copying over the page lists 780 */ 781static int scif_prep_remote_window(struct scif_endpt *ep, 782 struct scif_window *window) 783{ 784 struct scifmsg msg; 785 struct scif_window *remote_window; 786 struct scif_allocmsg *alloc = &window->alloc_handle; 787 dma_addr_t *dma_phys_lookup, *tmp, *num_pages_lookup, *tmp1; 788 int i = 0, j = 0; 789 int nr_contig_chunks, loop_nr_contig_chunks; 790 int remaining_nr_contig_chunks, nr_lookup; 791 int err, map_err; 792 793 map_err = scif_map_window(ep->remote_dev, window); 794 if (map_err) 795 dev_err(&ep->remote_dev->sdev->dev, 796 "%s %d map_err %d\n", __func__, __LINE__, map_err); 797 remaining_nr_contig_chunks = window->nr_contig_chunks; 798 nr_contig_chunks = window->nr_contig_chunks; 799retry: 800 /* Wait for a SCIF_ALLOC_GNT/REJ message */ 801 err = wait_event_timeout(alloc->allocwq, 802 alloc->state != OP_IN_PROGRESS, 803 SCIF_NODE_ALIVE_TIMEOUT); 804 mutex_lock(&ep->rma_info.rma_lock); 805 /* Synchronize with the thread waking up allocwq */ 806 mutex_unlock(&ep->rma_info.rma_lock); 807 if (!err && scifdev_alive(ep)) 808 goto retry; 809 810 if (!err) 811 err = -ENODEV; 812 813 if (err > 0) 814 err = 0; 815 else 816 return err; 817 818 /* Bail out. The remote end rejected this request */ 819 if (alloc->state == OP_FAILED) 820 return -ENOMEM; 821 822 if (map_err) { 823 dev_err(&ep->remote_dev->sdev->dev, 824 "%s %d err %d\n", __func__, __LINE__, map_err); 825 msg.uop = SCIF_FREE_VIRT; 826 msg.src = ep->port; 827 msg.payload[0] = ep->remote_ep; 828 msg.payload[1] = window->alloc_handle.vaddr; 829 msg.payload[2] = (u64)window; 830 msg.payload[3] = SCIF_REGISTER; 831 spin_lock(&ep->lock); 832 if (ep->state == SCIFEP_CONNECTED) 833 err = _scif_nodeqp_send(ep->remote_dev, &msg); 834 else 835 err = -ENOTCONN; 836 spin_unlock(&ep->lock); 837 return err; 838 } 839 840 remote_window = scif_ioremap(alloc->phys_addr, sizeof(*window), 841 ep->remote_dev); 842 843 /* Compute the number of lookup entries. 21 == 2MB Shift */ 844 nr_lookup = ALIGN(nr_contig_chunks, SCIF_NR_ADDR_IN_PAGE) 845 >> ilog2(SCIF_NR_ADDR_IN_PAGE); 846 847 dma_phys_lookup = 848 scif_ioremap(remote_window->dma_addr_lookup.offset, 849 nr_lookup * 850 sizeof(*remote_window->dma_addr_lookup.lookup), 851 ep->remote_dev); 852 num_pages_lookup = 853 scif_ioremap(remote_window->num_pages_lookup.offset, 854 nr_lookup * 855 sizeof(*remote_window->num_pages_lookup.lookup), 856 ep->remote_dev); 857 858 while (remaining_nr_contig_chunks) { 859 loop_nr_contig_chunks = min_t(int, remaining_nr_contig_chunks, 860 (int)SCIF_NR_ADDR_IN_PAGE); 861 /* #1/2 - Copy physical addresses over to the remote side */ 862 863 /* #2/2 - Copy DMA addresses (addresses that are fed into the 864 * DMA engine) We transfer bus addresses which are then 865 * converted into a MIC physical address on the remote 866 * side if it is a MIC, if the remote node is a mgmt node we 867 * transfer the MIC physical address 868 */ 869 tmp = scif_ioremap(dma_phys_lookup[j], 870 loop_nr_contig_chunks * 871 sizeof(*window->dma_addr), 872 ep->remote_dev); 873 tmp1 = scif_ioremap(num_pages_lookup[j], 874 loop_nr_contig_chunks * 875 sizeof(*window->num_pages), 876 ep->remote_dev); 877 if (scif_is_mgmt_node()) { 878 memcpy_toio((void __force __iomem *)tmp, 879 &window->dma_addr[i], loop_nr_contig_chunks 880 * sizeof(*window->dma_addr)); 881 memcpy_toio((void __force __iomem *)tmp1, 882 &window->num_pages[i], loop_nr_contig_chunks 883 * sizeof(*window->num_pages)); 884 } else { 885 if (scifdev_is_p2p(ep->remote_dev)) { 886 /* 887 * add remote node's base address for this node 888 * to convert it into a MIC address 889 */ 890 int m; 891 dma_addr_t dma_addr; 892 893 for (m = 0; m < loop_nr_contig_chunks; m++) { 894 dma_addr = window->dma_addr[i + m] + 895 ep->remote_dev->base_addr; 896 writeq(dma_addr, 897 (void __force __iomem *)&tmp[m]); 898 } 899 memcpy_toio((void __force __iomem *)tmp1, 900 &window->num_pages[i], 901 loop_nr_contig_chunks 902 * sizeof(*window->num_pages)); 903 } else { 904 /* Mgmt node or loopback - transfer DMA 905 * addresses as is, this is the same as a 906 * MIC physical address (we use the dma_addr 907 * and not the phys_addr array since the 908 * phys_addr is only setup if there is a mmap() 909 * request from the mgmt node) 910 */ 911 memcpy_toio((void __force __iomem *)tmp, 912 &window->dma_addr[i], 913 loop_nr_contig_chunks * 914 sizeof(*window->dma_addr)); 915 memcpy_toio((void __force __iomem *)tmp1, 916 &window->num_pages[i], 917 loop_nr_contig_chunks * 918 sizeof(*window->num_pages)); 919 } 920 } 921 remaining_nr_contig_chunks -= loop_nr_contig_chunks; 922 i += loop_nr_contig_chunks; 923 j++; 924 scif_iounmap(tmp, loop_nr_contig_chunks * 925 sizeof(*window->dma_addr), ep->remote_dev); 926 scif_iounmap(tmp1, loop_nr_contig_chunks * 927 sizeof(*window->num_pages), ep->remote_dev); 928 } 929 930 /* Prepare the remote window for the peer */ 931 remote_window->peer_window = (u64)window; 932 remote_window->offset = window->offset; 933 remote_window->prot = window->prot; 934 remote_window->nr_contig_chunks = nr_contig_chunks; 935 remote_window->ep = ep->remote_ep; 936 scif_iounmap(num_pages_lookup, 937 nr_lookup * 938 sizeof(*remote_window->num_pages_lookup.lookup), 939 ep->remote_dev); 940 scif_iounmap(dma_phys_lookup, 941 nr_lookup * 942 sizeof(*remote_window->dma_addr_lookup.lookup), 943 ep->remote_dev); 944 scif_iounmap(remote_window, sizeof(*remote_window), ep->remote_dev); 945 window->peer_window = alloc->vaddr; 946 return err; 947} 948 949/** 950 * scif_send_scif_register: 951 * @ep: end point 952 * @window: self registration window 953 * 954 * Send a SCIF_REGISTER message if EP is connected and wait for a 955 * SCIF_REGISTER_(N)ACK message else send a SCIF_FREE_VIRT 956 * message so that the peer can free its remote window allocated earlier. 957 */ 958static int scif_send_scif_register(struct scif_endpt *ep, 959 struct scif_window *window) 960{ 961 int err = 0; 962 struct scifmsg msg; 963 964 msg.src = ep->port; 965 msg.payload[0] = ep->remote_ep; 966 msg.payload[1] = window->alloc_handle.vaddr; 967 msg.payload[2] = (u64)window; 968 spin_lock(&ep->lock); 969 if (ep->state == SCIFEP_CONNECTED) { 970 msg.uop = SCIF_REGISTER; 971 window->reg_state = OP_IN_PROGRESS; 972 err = _scif_nodeqp_send(ep->remote_dev, &msg); 973 spin_unlock(&ep->lock); 974 if (!err) { 975retry: 976 /* Wait for a SCIF_REGISTER_(N)ACK message */ 977 err = wait_event_timeout(window->regwq, 978 window->reg_state != 979 OP_IN_PROGRESS, 980 SCIF_NODE_ALIVE_TIMEOUT); 981 if (!err && scifdev_alive(ep)) 982 goto retry; 983 err = !err ? -ENODEV : 0; 984 if (window->reg_state == OP_FAILED) 985 err = -ENOTCONN; 986 } 987 } else { 988 msg.uop = SCIF_FREE_VIRT; 989 msg.payload[3] = SCIF_REGISTER; 990 err = _scif_nodeqp_send(ep->remote_dev, &msg); 991 spin_unlock(&ep->lock); 992 if (!err) 993 err = -ENOTCONN; 994 } 995 return err; 996} 997 998/** 999 * scif_get_window_offset: 1000 * @ep: end point descriptor 1001 * @flags: flags 1002 * @offset: offset hint 1003 * @num_pages: number of pages 1004 * @out_offset: computed offset returned by reference. 1005 * 1006 * Compute/Claim a new offset for this EP. 1007 */ 1008int scif_get_window_offset(struct scif_endpt *ep, int flags, s64 offset, 1009 int num_pages, s64 *out_offset) 1010{ 1011 s64 page_index; 1012 struct iova *iova_ptr; 1013 int err = 0; 1014 1015 if (flags & SCIF_MAP_FIXED) { 1016 page_index = SCIF_IOVA_PFN(offset); 1017 iova_ptr = reserve_iova(&ep->rma_info.iovad, page_index, 1018 page_index + num_pages - 1); 1019 if (!iova_ptr) 1020 err = -EADDRINUSE; 1021 } else { 1022 iova_ptr = alloc_iova(&ep->rma_info.iovad, num_pages, 1023 SCIF_DMA_63BIT_PFN - 1, 0); 1024 if (!iova_ptr) 1025 err = -ENOMEM; 1026 } 1027 if (!err) 1028 *out_offset = (iova_ptr->pfn_lo) << PAGE_SHIFT; 1029 return err; 1030} 1031 1032/** 1033 * scif_free_window_offset: 1034 * @ep: end point descriptor 1035 * @window: registration window 1036 * @offset: Offset to be freed 1037 * 1038 * Free offset for this EP. The callee is supposed to grab 1039 * the RMA mutex before calling this API. 1040 */ 1041void scif_free_window_offset(struct scif_endpt *ep, 1042 struct scif_window *window, s64 offset) 1043{ 1044 if ((window && !window->offset_freed) || !window) { 1045 free_iova(&ep->rma_info.iovad, offset >> PAGE_SHIFT); 1046 if (window) 1047 window->offset_freed = true; 1048 } 1049} 1050 1051/** 1052 * scif_alloc_req: Respond to SCIF_ALLOC_REQ interrupt message 1053 * @msg: Interrupt message 1054 * 1055 * Remote side is requesting a memory allocation. 1056 */ 1057void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg) 1058{ 1059 int err; 1060 struct scif_window *window = NULL; 1061 int nr_pages = msg->payload[1]; 1062 1063 window = scif_create_remote_window(scifdev, nr_pages); 1064 if (!window) { 1065 err = -ENOMEM; 1066 goto error; 1067 } 1068 1069 /* The peer's allocation request is granted */ 1070 msg->uop = SCIF_ALLOC_GNT; 1071 msg->payload[0] = (u64)window; 1072 msg->payload[1] = window->mapped_offset; 1073 err = scif_nodeqp_send(scifdev, msg); 1074 if (err) 1075 scif_destroy_remote_window(window); 1076 return; 1077error: 1078 /* The peer's allocation request is rejected */ 1079 dev_err(&scifdev->sdev->dev, 1080 "%s %d error %d alloc_ptr %p nr_pages 0x%x\n", 1081 __func__, __LINE__, err, window, nr_pages); 1082 msg->uop = SCIF_ALLOC_REJ; 1083 scif_nodeqp_send(scifdev, msg); 1084} 1085 1086/** 1087 * scif_alloc_gnt_rej: Respond to SCIF_ALLOC_GNT/REJ interrupt message 1088 * @msg: Interrupt message 1089 * 1090 * Remote side responded to a memory allocation. 1091 */ 1092void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg) 1093{ 1094 struct scif_allocmsg *handle = (struct scif_allocmsg *)msg->payload[2]; 1095 struct scif_window *window = container_of(handle, struct scif_window, 1096 alloc_handle); 1097 struct scif_endpt *ep = (struct scif_endpt *)window->ep; 1098 1099 mutex_lock(&ep->rma_info.rma_lock); 1100 handle->vaddr = msg->payload[0]; 1101 handle->phys_addr = msg->payload[1]; 1102 if (msg->uop == SCIF_ALLOC_GNT) 1103 handle->state = OP_COMPLETED; 1104 else 1105 handle->state = OP_FAILED; 1106 wake_up(&handle->allocwq); 1107 mutex_unlock(&ep->rma_info.rma_lock); 1108} 1109 1110/** 1111 * scif_free_virt: Respond to SCIF_FREE_VIRT interrupt message 1112 * @msg: Interrupt message 1113 * 1114 * Free up memory kmalloc'd earlier. 1115 */ 1116void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg) 1117{ 1118 struct scif_window *window = (struct scif_window *)msg->payload[1]; 1119 1120 scif_destroy_remote_window(window); 1121} 1122 1123static void 1124scif_fixup_aper_base(struct scif_dev *dev, struct scif_window *window) 1125{ 1126 int j; 1127 struct scif_hw_dev *sdev = dev->sdev; 1128 phys_addr_t apt_base = 0; 1129 1130 /* 1131 * Add the aperture base if the DMA address is not card relative 1132 * since the DMA addresses need to be an offset into the bar 1133 */ 1134 if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER && 1135 sdev->aper && !sdev->card_rel_da) 1136 apt_base = sdev->aper->pa; 1137 else 1138 return; 1139 1140 for (j = 0; j < window->nr_contig_chunks; j++) { 1141 if (window->num_pages[j]) 1142 window->dma_addr[j] += apt_base; 1143 else 1144 break; 1145 } 1146} 1147 1148/** 1149 * scif_recv_reg: Respond to SCIF_REGISTER interrupt message 1150 * @msg: Interrupt message 1151 * 1152 * Update remote window list with a new registered window. 1153 */ 1154void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg) 1155{ 1156 struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; 1157 struct scif_window *window = 1158 (struct scif_window *)msg->payload[1]; 1159 1160 mutex_lock(&ep->rma_info.rma_lock); 1161 spin_lock(&ep->lock); 1162 if (ep->state == SCIFEP_CONNECTED) { 1163 msg->uop = SCIF_REGISTER_ACK; 1164 scif_nodeqp_send(ep->remote_dev, msg); 1165 scif_fixup_aper_base(ep->remote_dev, window); 1166 /* No further failures expected. Insert new window */ 1167 scif_insert_window(window, &ep->rma_info.remote_reg_list); 1168 } else { 1169 msg->uop = SCIF_REGISTER_NACK; 1170 scif_nodeqp_send(ep->remote_dev, msg); 1171 } 1172 spin_unlock(&ep->lock); 1173 mutex_unlock(&ep->rma_info.rma_lock); 1174 /* free up any lookup resources now that page lists are transferred */ 1175 scif_destroy_remote_lookup(ep->remote_dev, window); 1176 /* 1177 * We could not insert the window but we need to 1178 * destroy the window. 1179 */ 1180 if (msg->uop == SCIF_REGISTER_NACK) 1181 scif_destroy_remote_window(window); 1182} 1183 1184/** 1185 * scif_recv_unreg: Respond to SCIF_UNREGISTER interrupt message 1186 * @msg: Interrupt message 1187 * 1188 * Remove window from remote registration list; 1189 */ 1190void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg) 1191{ 1192 struct scif_rma_req req; 1193 struct scif_window *window = NULL; 1194 struct scif_window *recv_window = 1195 (struct scif_window *)msg->payload[0]; 1196 struct scif_endpt *ep; 1197 int del_window = 0; 1198 1199 ep = (struct scif_endpt *)recv_window->ep; 1200 req.out_window = &window; 1201 req.offset = recv_window->offset; 1202 req.prot = 0; 1203 req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT; 1204 req.type = SCIF_WINDOW_FULL; 1205 req.head = &ep->rma_info.remote_reg_list; 1206 msg->payload[0] = ep->remote_ep; 1207 1208 mutex_lock(&ep->rma_info.rma_lock); 1209 /* Does a valid window exist? */ 1210 if (scif_query_window(&req)) { 1211 dev_err(&scifdev->sdev->dev, 1212 "%s %d -ENXIO\n", __func__, __LINE__); 1213 msg->uop = SCIF_UNREGISTER_ACK; 1214 goto error; 1215 } 1216 if (window) { 1217 if (window->ref_count) 1218 scif_put_window(window, window->nr_pages); 1219 else 1220 dev_err(&scifdev->sdev->dev, 1221 "%s %d ref count should be +ve\n", 1222 __func__, __LINE__); 1223 window->unreg_state = OP_COMPLETED; 1224 if (!window->ref_count) { 1225 msg->uop = SCIF_UNREGISTER_ACK; 1226 atomic_inc(&ep->rma_info.tw_refcount); 1227 ep->rma_info.async_list_del = 1; 1228 list_del_init(&window->list); 1229 del_window = 1; 1230 } else { 1231 /* NACK! There are valid references to this window */ 1232 msg->uop = SCIF_UNREGISTER_NACK; 1233 } 1234 } else { 1235 /* The window did not make its way to the list at all. ACK */ 1236 msg->uop = SCIF_UNREGISTER_ACK; 1237 scif_destroy_remote_window(recv_window); 1238 } 1239error: 1240 mutex_unlock(&ep->rma_info.rma_lock); 1241 if (del_window) 1242 scif_drain_dma_intr(ep->remote_dev->sdev, 1243 ep->rma_info.dma_chan); 1244 scif_nodeqp_send(ep->remote_dev, msg); 1245 if (del_window) 1246 scif_queue_for_cleanup(window, &scif_info.rma); 1247} 1248 1249/** 1250 * scif_recv_reg_ack: Respond to SCIF_REGISTER_ACK interrupt message 1251 * @msg: Interrupt message 1252 * 1253 * Wake up the window waiting to complete registration. 1254 */ 1255void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg) 1256{ 1257 struct scif_window *window = 1258 (struct scif_window *)msg->payload[2]; 1259 struct scif_endpt *ep = (struct scif_endpt *)window->ep; 1260 1261 mutex_lock(&ep->rma_info.rma_lock); 1262 window->reg_state = OP_COMPLETED; 1263 wake_up(&window->regwq); 1264 mutex_unlock(&ep->rma_info.rma_lock); 1265} 1266 1267/** 1268 * scif_recv_reg_nack: Respond to SCIF_REGISTER_NACK interrupt message 1269 * @msg: Interrupt message 1270 * 1271 * Wake up the window waiting to inform it that registration 1272 * cannot be completed. 1273 */ 1274void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg) 1275{ 1276 struct scif_window *window = 1277 (struct scif_window *)msg->payload[2]; 1278 struct scif_endpt *ep = (struct scif_endpt *)window->ep; 1279 1280 mutex_lock(&ep->rma_info.rma_lock); 1281 window->reg_state = OP_FAILED; 1282 wake_up(&window->regwq); 1283 mutex_unlock(&ep->rma_info.rma_lock); 1284} 1285 1286/** 1287 * scif_recv_unreg_ack: Respond to SCIF_UNREGISTER_ACK interrupt message 1288 * @msg: Interrupt message 1289 * 1290 * Wake up the window waiting to complete unregistration. 1291 */ 1292void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg) 1293{ 1294 struct scif_window *window = 1295 (struct scif_window *)msg->payload[1]; 1296 struct scif_endpt *ep = (struct scif_endpt *)window->ep; 1297 1298 mutex_lock(&ep->rma_info.rma_lock); 1299 window->unreg_state = OP_COMPLETED; 1300 wake_up(&window->unregwq); 1301 mutex_unlock(&ep->rma_info.rma_lock); 1302} 1303 1304/** 1305 * scif_recv_unreg_nack: Respond to SCIF_UNREGISTER_NACK interrupt message 1306 * @msg: Interrupt message 1307 * 1308 * Wake up the window waiting to inform it that unregistration 1309 * cannot be completed immediately. 1310 */ 1311void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg) 1312{ 1313 struct scif_window *window = 1314 (struct scif_window *)msg->payload[1]; 1315 struct scif_endpt *ep = (struct scif_endpt *)window->ep; 1316 1317 mutex_lock(&ep->rma_info.rma_lock); 1318 window->unreg_state = OP_FAILED; 1319 wake_up(&window->unregwq); 1320 mutex_unlock(&ep->rma_info.rma_lock); 1321} 1322 1323int __scif_pin_pages(void *addr, size_t len, int *out_prot, 1324 int map_flags, scif_pinned_pages_t *pages) 1325{ 1326 struct scif_pinned_pages *pinned_pages; 1327 int nr_pages, err = 0, i; 1328 bool vmalloc_addr = false; 1329 bool try_upgrade = false; 1330 int prot = *out_prot; 1331 int ulimit = 0; 1332 struct mm_struct *mm = NULL; 1333 1334 /* Unsupported flags */ 1335 if (map_flags & ~(SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT)) 1336 return -EINVAL; 1337 ulimit = !!(map_flags & SCIF_MAP_ULIMIT); 1338 1339 /* Unsupported protection requested */ 1340 if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE)) 1341 return -EINVAL; 1342 1343 /* addr/len must be page aligned. len should be non zero */ 1344 if (!len || 1345 (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) || 1346 (ALIGN((u64)len, PAGE_SIZE) != (u64)len)) 1347 return -EINVAL; 1348 1349 might_sleep(); 1350 1351 nr_pages = len >> PAGE_SHIFT; 1352 1353 /* Allocate a set of pinned pages */ 1354 pinned_pages = scif_create_pinned_pages(nr_pages, prot); 1355 if (!pinned_pages) 1356 return -ENOMEM; 1357 1358 if (map_flags & SCIF_MAP_KERNEL) { 1359 if (is_vmalloc_addr(addr)) 1360 vmalloc_addr = true; 1361 1362 for (i = 0; i < nr_pages; i++) { 1363 if (vmalloc_addr) 1364 pinned_pages->pages[i] = 1365 vmalloc_to_page(addr + (i * PAGE_SIZE)); 1366 else 1367 pinned_pages->pages[i] = 1368 virt_to_page(addr + (i * PAGE_SIZE)); 1369 } 1370 pinned_pages->nr_pages = nr_pages; 1371 pinned_pages->map_flags = SCIF_MAP_KERNEL; 1372 } else { 1373 /* 1374 * SCIF supports registration caching. If a registration has 1375 * been requested with read only permissions, then we try 1376 * to pin the pages with RW permissions so that a subsequent 1377 * transfer with RW permission can hit the cache instead of 1378 * invalidating it. If the upgrade fails with RW then we 1379 * revert back to R permission and retry 1380 */ 1381 if (prot == SCIF_PROT_READ) 1382 try_upgrade = true; 1383 prot |= SCIF_PROT_WRITE; 1384retry: 1385 mm = current->mm; 1386 down_write(&mm->mmap_sem); 1387 if (ulimit) { 1388 err = __scif_check_inc_pinned_vm(mm, nr_pages); 1389 if (err) { 1390 up_write(&mm->mmap_sem); 1391 pinned_pages->nr_pages = 0; 1392 goto error_unmap; 1393 } 1394 } 1395 1396 pinned_pages->nr_pages = get_user_pages( 1397 current, 1398 mm, 1399 (u64)addr, 1400 nr_pages, 1401 !!(prot & SCIF_PROT_WRITE), 1402 0, 1403 pinned_pages->pages, 1404 NULL); 1405 up_write(&mm->mmap_sem); 1406 if (nr_pages != pinned_pages->nr_pages) { 1407 if (try_upgrade) { 1408 if (ulimit) 1409 __scif_dec_pinned_vm_lock(mm, 1410 nr_pages, 0); 1411 /* Roll back any pinned pages */ 1412 for (i = 0; i < pinned_pages->nr_pages; i++) { 1413 if (pinned_pages->pages[i]) 1414 put_page( 1415 pinned_pages->pages[i]); 1416 } 1417 prot &= ~SCIF_PROT_WRITE; 1418 try_upgrade = false; 1419 goto retry; 1420 } 1421 } 1422 pinned_pages->map_flags = 0; 1423 } 1424 1425 if (pinned_pages->nr_pages < nr_pages) { 1426 err = -EFAULT; 1427 pinned_pages->nr_pages = nr_pages; 1428 goto dec_pinned; 1429 } 1430 1431 *out_prot = prot; 1432 atomic_set(&pinned_pages->ref_count, 1); 1433 *pages = pinned_pages; 1434 return err; 1435dec_pinned: 1436 if (ulimit) 1437 __scif_dec_pinned_vm_lock(mm, nr_pages, 0); 1438 /* Something went wrong! Rollback */ 1439error_unmap: 1440 pinned_pages->nr_pages = nr_pages; 1441 scif_destroy_pinned_pages(pinned_pages); 1442 *pages = NULL; 1443 dev_dbg(scif_info.mdev.this_device, 1444 "%s %d err %d len 0x%lx\n", __func__, __LINE__, err, len); 1445 return err; 1446} 1447 1448int scif_pin_pages(void *addr, size_t len, int prot, 1449 int map_flags, scif_pinned_pages_t *pages) 1450{ 1451 return __scif_pin_pages(addr, len, &prot, map_flags, pages); 1452} 1453EXPORT_SYMBOL_GPL(scif_pin_pages); 1454 1455int scif_unpin_pages(scif_pinned_pages_t pinned_pages) 1456{ 1457 int err = 0, ret; 1458 1459 if (!pinned_pages || SCIFEP_MAGIC != pinned_pages->magic) 1460 return -EINVAL; 1461 1462 ret = atomic_sub_return(1, &pinned_pages->ref_count); 1463 if (ret < 0) { 1464 dev_err(scif_info.mdev.this_device, 1465 "%s %d scif_unpin_pages called without pinning? rc %d\n", 1466 __func__, __LINE__, ret); 1467 return -EINVAL; 1468 } 1469 /* 1470 * Destroy the window if the ref count for this set of pinned 1471 * pages has dropped to zero. If it is positive then there is 1472 * a valid registered window which is backed by these pages and 1473 * it will be destroyed once all such windows are unregistered. 1474 */ 1475 if (!ret) 1476 err = scif_destroy_pinned_pages(pinned_pages); 1477 1478 return err; 1479} 1480EXPORT_SYMBOL_GPL(scif_unpin_pages); 1481 1482static inline void 1483scif_insert_local_window(struct scif_window *window, struct scif_endpt *ep) 1484{ 1485 mutex_lock(&ep->rma_info.rma_lock); 1486 scif_insert_window(window, &ep->rma_info.reg_list); 1487 mutex_unlock(&ep->rma_info.rma_lock); 1488} 1489 1490off_t scif_register_pinned_pages(scif_epd_t epd, 1491 scif_pinned_pages_t pinned_pages, 1492 off_t offset, int map_flags) 1493{ 1494 struct scif_endpt *ep = (struct scif_endpt *)epd; 1495 s64 computed_offset; 1496 struct scif_window *window; 1497 int err; 1498 size_t len; 1499 struct device *spdev; 1500 1501 /* Unsupported flags */ 1502 if (map_flags & ~SCIF_MAP_FIXED) 1503 return -EINVAL; 1504 1505 len = pinned_pages->nr_pages << PAGE_SHIFT; 1506 1507 /* 1508 * Offset is not page aligned/negative or offset+len 1509 * wraps around with SCIF_MAP_FIXED. 1510 */ 1511 if ((map_flags & SCIF_MAP_FIXED) && 1512 ((ALIGN(offset, PAGE_SIZE) != offset) || 1513 (offset < 0) || 1514 (len > LONG_MAX - offset))) 1515 return -EINVAL; 1516 1517 might_sleep(); 1518 1519 err = scif_verify_epd(ep); 1520 if (err) 1521 return err; 1522 /* 1523 * It is an error to pass pinned_pages to scif_register_pinned_pages() 1524 * after calling scif_unpin_pages(). 1525 */ 1526 if (!atomic_add_unless(&pinned_pages->ref_count, 1, 0)) 1527 return -EINVAL; 1528 1529 /* Compute the offset for this registration */ 1530 err = scif_get_window_offset(ep, map_flags, offset, 1531 len, &computed_offset); 1532 if (err) { 1533 atomic_sub(1, &pinned_pages->ref_count); 1534 return err; 1535 } 1536 1537 /* Allocate and prepare self registration window */ 1538 window = scif_create_window(ep, pinned_pages->nr_pages, 1539 computed_offset, false); 1540 if (!window) { 1541 atomic_sub(1, &pinned_pages->ref_count); 1542 scif_free_window_offset(ep, NULL, computed_offset); 1543 return -ENOMEM; 1544 } 1545 1546 window->pinned_pages = pinned_pages; 1547 window->nr_pages = pinned_pages->nr_pages; 1548 window->prot = pinned_pages->prot; 1549 1550 spdev = scif_get_peer_dev(ep->remote_dev); 1551 if (IS_ERR(spdev)) { 1552 err = PTR_ERR(spdev); 1553 scif_destroy_window(ep, window); 1554 return err; 1555 } 1556 err = scif_send_alloc_request(ep, window); 1557 if (err) { 1558 dev_err(&ep->remote_dev->sdev->dev, 1559 "%s %d err %d\n", __func__, __LINE__, err); 1560 goto error_unmap; 1561 } 1562 1563 /* Prepare the remote registration window */ 1564 err = scif_prep_remote_window(ep, window); 1565 if (err) { 1566 dev_err(&ep->remote_dev->sdev->dev, 1567 "%s %d err %d\n", __func__, __LINE__, err); 1568 goto error_unmap; 1569 } 1570 1571 /* Tell the peer about the new window */ 1572 err = scif_send_scif_register(ep, window); 1573 if (err) { 1574 dev_err(&ep->remote_dev->sdev->dev, 1575 "%s %d err %d\n", __func__, __LINE__, err); 1576 goto error_unmap; 1577 } 1578 1579 scif_put_peer_dev(spdev); 1580 /* No further failures expected. Insert new window */ 1581 scif_insert_local_window(window, ep); 1582 return computed_offset; 1583error_unmap: 1584 scif_destroy_window(ep, window); 1585 scif_put_peer_dev(spdev); 1586 dev_err(&ep->remote_dev->sdev->dev, 1587 "%s %d err %d\n", __func__, __LINE__, err); 1588 return err; 1589} 1590EXPORT_SYMBOL_GPL(scif_register_pinned_pages); 1591 1592off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset, 1593 int prot, int map_flags) 1594{ 1595 scif_pinned_pages_t pinned_pages; 1596 off_t err; 1597 struct scif_endpt *ep = (struct scif_endpt *)epd; 1598 s64 computed_offset; 1599 struct scif_window *window; 1600 struct mm_struct *mm = NULL; 1601 struct device *spdev; 1602 1603 dev_dbg(scif_info.mdev.this_device, 1604 "SCIFAPI register: ep %p addr %p len 0x%lx offset 0x%lx prot 0x%x map_flags 0x%x\n", 1605 epd, addr, len, offset, prot, map_flags); 1606 /* Unsupported flags */ 1607 if (map_flags & ~(SCIF_MAP_FIXED | SCIF_MAP_KERNEL)) 1608 return -EINVAL; 1609 1610 /* 1611 * Offset is not page aligned/negative or offset+len 1612 * wraps around with SCIF_MAP_FIXED. 1613 */ 1614 if ((map_flags & SCIF_MAP_FIXED) && 1615 ((ALIGN(offset, PAGE_SIZE) != offset) || 1616 (offset < 0) || 1617 (len > LONG_MAX - offset))) 1618 return -EINVAL; 1619 1620 /* Unsupported protection requested */ 1621 if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE)) 1622 return -EINVAL; 1623 1624 /* addr/len must be page aligned. len should be non zero */ 1625 if (!len || (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) || 1626 (ALIGN(len, PAGE_SIZE) != len)) 1627 return -EINVAL; 1628 1629 might_sleep(); 1630 1631 err = scif_verify_epd(ep); 1632 if (err) 1633 return err; 1634 1635 /* Compute the offset for this registration */ 1636 err = scif_get_window_offset(ep, map_flags, offset, 1637 len >> PAGE_SHIFT, &computed_offset); 1638 if (err) 1639 return err; 1640 1641 spdev = scif_get_peer_dev(ep->remote_dev); 1642 if (IS_ERR(spdev)) { 1643 err = PTR_ERR(spdev); 1644 scif_free_window_offset(ep, NULL, computed_offset); 1645 return err; 1646 } 1647 /* Allocate and prepare self registration window */ 1648 window = scif_create_window(ep, len >> PAGE_SHIFT, 1649 computed_offset, false); 1650 if (!window) { 1651 scif_free_window_offset(ep, NULL, computed_offset); 1652 scif_put_peer_dev(spdev); 1653 return -ENOMEM; 1654 } 1655 1656 window->nr_pages = len >> PAGE_SHIFT; 1657 1658 err = scif_send_alloc_request(ep, window); 1659 if (err) { 1660 scif_destroy_incomplete_window(ep, window); 1661 scif_put_peer_dev(spdev); 1662 return err; 1663 } 1664 1665 if (!(map_flags & SCIF_MAP_KERNEL)) { 1666 mm = __scif_acquire_mm(); 1667 map_flags |= SCIF_MAP_ULIMIT; 1668 } 1669 /* Pin down the pages */ 1670 err = __scif_pin_pages(addr, len, &prot, 1671 map_flags & (SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT), 1672 &pinned_pages); 1673 if (err) { 1674 scif_destroy_incomplete_window(ep, window); 1675 __scif_release_mm(mm); 1676 goto error; 1677 } 1678 1679 window->pinned_pages = pinned_pages; 1680 window->prot = pinned_pages->prot; 1681 window->mm = mm; 1682 1683 /* Prepare the remote registration window */ 1684 err = scif_prep_remote_window(ep, window); 1685 if (err) { 1686 dev_err(&ep->remote_dev->sdev->dev, 1687 "%s %d err %ld\n", __func__, __LINE__, err); 1688 goto error_unmap; 1689 } 1690 1691 /* Tell the peer about the new window */ 1692 err = scif_send_scif_register(ep, window); 1693 if (err) { 1694 dev_err(&ep->remote_dev->sdev->dev, 1695 "%s %d err %ld\n", __func__, __LINE__, err); 1696 goto error_unmap; 1697 } 1698 1699 scif_put_peer_dev(spdev); 1700 /* No further failures expected. Insert new window */ 1701 scif_insert_local_window(window, ep); 1702 dev_dbg(&ep->remote_dev->sdev->dev, 1703 "SCIFAPI register: ep %p addr %p len 0x%lx computed_offset 0x%llx\n", 1704 epd, addr, len, computed_offset); 1705 return computed_offset; 1706error_unmap: 1707 scif_destroy_window(ep, window); 1708error: 1709 scif_put_peer_dev(spdev); 1710 dev_err(&ep->remote_dev->sdev->dev, 1711 "%s %d err %ld\n", __func__, __LINE__, err); 1712 return err; 1713} 1714EXPORT_SYMBOL_GPL(scif_register); 1715 1716int 1717scif_unregister(scif_epd_t epd, off_t offset, size_t len) 1718{ 1719 struct scif_endpt *ep = (struct scif_endpt *)epd; 1720 struct scif_window *window = NULL; 1721 struct scif_rma_req req; 1722 int nr_pages, err; 1723 struct device *spdev; 1724 1725 dev_dbg(scif_info.mdev.this_device, 1726 "SCIFAPI unregister: ep %p offset 0x%lx len 0x%lx\n", 1727 ep, offset, len); 1728 /* len must be page aligned. len should be non zero */ 1729 if (!len || 1730 (ALIGN((u64)len, PAGE_SIZE) != (u64)len)) 1731 return -EINVAL; 1732 1733 /* Offset is not page aligned or offset+len wraps around */ 1734 if ((ALIGN(offset, PAGE_SIZE) != offset) || 1735 (offset < 0) || 1736 (len > LONG_MAX - offset)) 1737 return -EINVAL; 1738 1739 err = scif_verify_epd(ep); 1740 if (err) 1741 return err; 1742 1743 might_sleep(); 1744 nr_pages = len >> PAGE_SHIFT; 1745 1746 req.out_window = &window; 1747 req.offset = offset; 1748 req.prot = 0; 1749 req.nr_bytes = len; 1750 req.type = SCIF_WINDOW_FULL; 1751 req.head = &ep->rma_info.reg_list; 1752 1753 spdev = scif_get_peer_dev(ep->remote_dev); 1754 if (IS_ERR(spdev)) { 1755 err = PTR_ERR(spdev); 1756 return err; 1757 } 1758 mutex_lock(&ep->rma_info.rma_lock); 1759 /* Does a valid window exist? */ 1760 err = scif_query_window(&req); 1761 if (err) { 1762 dev_err(&ep->remote_dev->sdev->dev, 1763 "%s %d err %d\n", __func__, __LINE__, err); 1764 goto error; 1765 } 1766 /* Unregister all the windows in this range */ 1767 err = scif_rma_list_unregister(window, offset, nr_pages); 1768 if (err) 1769 dev_err(&ep->remote_dev->sdev->dev, 1770 "%s %d err %d\n", __func__, __LINE__, err); 1771error: 1772 mutex_unlock(&ep->rma_info.rma_lock); 1773 scif_put_peer_dev(spdev); 1774 return err; 1775} 1776EXPORT_SYMBOL_GPL(scif_unregister); 1777