root/drivers/infiniband/sw/rdmavt/mr.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rvt_driver_mr_init
  2. rvt_mr_exit
  3. rvt_deinit_mregion
  4. __rvt_mregion_complete
  5. rvt_init_mregion
  6. rvt_alloc_lkey
  7. rvt_free_lkey
  8. __rvt_alloc_mr
  9. __rvt_free_mr
  10. rvt_get_dma_mr
  11. rvt_reg_user_mr
  12. rvt_dereg_clean_qp_cb
  13. rvt_dereg_clean_qps
  14. rvt_check_refs
  15. rvt_mr_has_lkey
  16. rvt_ss_has_lkey
  17. rvt_dereg_mr
  18. rvt_alloc_mr
  19. rvt_set_page
  20. rvt_map_mr_sg
  21. rvt_fast_reg_mr
  22. rvt_invalidate_rkey
  23. rvt_alloc_fmr
  24. rvt_map_phys_fmr
  25. rvt_unmap_fmr
  26. rvt_dealloc_fmr
  27. rvt_sge_adjacent
  28. rvt_lkey_ok
  29. rvt_rkey_ok

   1 /*
   2  * Copyright(c) 2016 Intel Corporation.
   3  *
   4  * This file is provided under a dual BSD/GPLv2 license.  When using or
   5  * redistributing this file, you may do so under either license.
   6  *
   7  * GPL LICENSE SUMMARY
   8  *
   9  * This program is free software; you can redistribute it and/or modify
  10  * it under the terms of version 2 of the GNU General Public License as
  11  * published by the Free Software Foundation.
  12  *
  13  * This program is distributed in the hope that it will be useful, but
  14  * WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * General Public License for more details.
  17  *
  18  * BSD LICENSE
  19  *
  20  * Redistribution and use in source and binary forms, with or without
  21  * modification, are permitted provided that the following conditions
  22  * are met:
  23  *
  24  *  - Redistributions of source code must retain the above copyright
  25  *    notice, this list of conditions and the following disclaimer.
  26  *  - Redistributions in binary form must reproduce the above copyright
  27  *    notice, this list of conditions and the following disclaimer in
  28  *    the documentation and/or other materials provided with the
  29  *    distribution.
  30  *  - Neither the name of Intel Corporation nor the names of its
  31  *    contributors may be used to endorse or promote products derived
  32  *    from this software without specific prior written permission.
  33  *
  34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45  *
  46  */
  47 
  48 #include <linux/slab.h>
  49 #include <linux/vmalloc.h>
  50 #include <rdma/ib_umem.h>
  51 #include <rdma/rdma_vt.h>
  52 #include "vt.h"
  53 #include "mr.h"
  54 #include "trace.h"
  55 
  56 /**
  57  * rvt_driver_mr_init - Init MR resources per driver
  58  * @rdi: rvt dev struct
  59  *
  60  * Do any intilization needed when a driver registers with rdmavt.
  61  *
  62  * Return: 0 on success or errno on failure
  63  */
  64 int rvt_driver_mr_init(struct rvt_dev_info *rdi)
  65 {
  66         unsigned int lkey_table_size = rdi->dparms.lkey_table_size;
  67         unsigned lk_tab_size;
  68         int i;
  69 
  70         /*
  71          * The top hfi1_lkey_table_size bits are used to index the
  72          * table.  The lower 8 bits can be owned by the user (copied from
  73          * the LKEY).  The remaining bits act as a generation number or tag.
  74          */
  75         if (!lkey_table_size)
  76                 return -EINVAL;
  77 
  78         spin_lock_init(&rdi->lkey_table.lock);
  79 
  80         /* ensure generation is at least 4 bits */
  81         if (lkey_table_size > RVT_MAX_LKEY_TABLE_BITS) {
  82                 rvt_pr_warn(rdi, "lkey bits %u too large, reduced to %u\n",
  83                             lkey_table_size, RVT_MAX_LKEY_TABLE_BITS);
  84                 rdi->dparms.lkey_table_size = RVT_MAX_LKEY_TABLE_BITS;
  85                 lkey_table_size = rdi->dparms.lkey_table_size;
  86         }
  87         rdi->lkey_table.max = 1 << lkey_table_size;
  88         rdi->lkey_table.shift = 32 - lkey_table_size;
  89         lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table);
  90         rdi->lkey_table.table = (struct rvt_mregion __rcu **)
  91                                vmalloc_node(lk_tab_size, rdi->dparms.node);
  92         if (!rdi->lkey_table.table)
  93                 return -ENOMEM;
  94 
  95         RCU_INIT_POINTER(rdi->dma_mr, NULL);
  96         for (i = 0; i < rdi->lkey_table.max; i++)
  97                 RCU_INIT_POINTER(rdi->lkey_table.table[i], NULL);
  98 
  99         rdi->dparms.props.max_mr = rdi->lkey_table.max;
 100         rdi->dparms.props.max_fmr = rdi->lkey_table.max;
 101         return 0;
 102 }
 103 
 104 /**
 105  *rvt_mr_exit: clean up MR
 106  *@rdi: rvt dev structure
 107  *
 108  * called when drivers have unregistered or perhaps failed to register with us
 109  */
 110 void rvt_mr_exit(struct rvt_dev_info *rdi)
 111 {
 112         if (rdi->dma_mr)
 113                 rvt_pr_err(rdi, "DMA MR not null!\n");
 114 
 115         vfree(rdi->lkey_table.table);
 116 }
 117 
 118 static void rvt_deinit_mregion(struct rvt_mregion *mr)
 119 {
 120         int i = mr->mapsz;
 121 
 122         mr->mapsz = 0;
 123         while (i)
 124                 kfree(mr->map[--i]);
 125         percpu_ref_exit(&mr->refcount);
 126 }
 127 
 128 static void __rvt_mregion_complete(struct percpu_ref *ref)
 129 {
 130         struct rvt_mregion *mr = container_of(ref, struct rvt_mregion,
 131                                               refcount);
 132 
 133         complete(&mr->comp);
 134 }
 135 
 136 static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd,
 137                             int count, unsigned int percpu_flags)
 138 {
 139         int m, i = 0;
 140         struct rvt_dev_info *dev = ib_to_rvt(pd->device);
 141 
 142         mr->mapsz = 0;
 143         m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ;
 144         for (; i < m; i++) {
 145                 mr->map[i] = kzalloc_node(sizeof(*mr->map[0]), GFP_KERNEL,
 146                                           dev->dparms.node);
 147                 if (!mr->map[i])
 148                         goto bail;
 149                 mr->mapsz++;
 150         }
 151         init_completion(&mr->comp);
 152         /* count returning the ptr to user */
 153         if (percpu_ref_init(&mr->refcount, &__rvt_mregion_complete,
 154                             percpu_flags, GFP_KERNEL))
 155                 goto bail;
 156 
 157         atomic_set(&mr->lkey_invalid, 0);
 158         mr->pd = pd;
 159         mr->max_segs = count;
 160         return 0;
 161 bail:
 162         rvt_deinit_mregion(mr);
 163         return -ENOMEM;
 164 }
 165 
 166 /**
 167  * rvt_alloc_lkey - allocate an lkey
 168  * @mr: memory region that this lkey protects
 169  * @dma_region: 0->normal key, 1->restricted DMA key
 170  *
 171  * Returns 0 if successful, otherwise returns -errno.
 172  *
 173  * Increments mr reference count as required.
 174  *
 175  * Sets the lkey field mr for non-dma regions.
 176  *
 177  */
 178 static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region)
 179 {
 180         unsigned long flags;
 181         u32 r;
 182         u32 n;
 183         int ret = 0;
 184         struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device);
 185         struct rvt_lkey_table *rkt = &dev->lkey_table;
 186 
 187         rvt_get_mr(mr);
 188         spin_lock_irqsave(&rkt->lock, flags);
 189 
 190         /* special case for dma_mr lkey == 0 */
 191         if (dma_region) {
 192                 struct rvt_mregion *tmr;
 193 
 194                 tmr = rcu_access_pointer(dev->dma_mr);
 195                 if (!tmr) {
 196                         mr->lkey_published = 1;
 197                         /* Insure published written first */
 198                         rcu_assign_pointer(dev->dma_mr, mr);
 199                         rvt_get_mr(mr);
 200                 }
 201                 goto success;
 202         }
 203 
 204         /* Find the next available LKEY */
 205         r = rkt->next;
 206         n = r;
 207         for (;;) {
 208                 if (!rcu_access_pointer(rkt->table[r]))
 209                         break;
 210                 r = (r + 1) & (rkt->max - 1);
 211                 if (r == n)
 212                         goto bail;
 213         }
 214         rkt->next = (r + 1) & (rkt->max - 1);
 215         /*
 216          * Make sure lkey is never zero which is reserved to indicate an
 217          * unrestricted LKEY.
 218          */
 219         rkt->gen++;
 220         /*
 221          * bits are capped to ensure enough bits for generation number
 222          */
 223         mr->lkey = (r << (32 - dev->dparms.lkey_table_size)) |
 224                 ((((1 << (24 - dev->dparms.lkey_table_size)) - 1) & rkt->gen)
 225                  << 8);
 226         if (mr->lkey == 0) {
 227                 mr->lkey |= 1 << 8;
 228                 rkt->gen++;
 229         }
 230         mr->lkey_published = 1;
 231         /* Insure published written first */
 232         rcu_assign_pointer(rkt->table[r], mr);
 233 success:
 234         spin_unlock_irqrestore(&rkt->lock, flags);
 235 out:
 236         return ret;
 237 bail:
 238         rvt_put_mr(mr);
 239         spin_unlock_irqrestore(&rkt->lock, flags);
 240         ret = -ENOMEM;
 241         goto out;
 242 }
 243 
 244 /**
 245  * rvt_free_lkey - free an lkey
 246  * @mr: mr to free from tables
 247  */
 248 static void rvt_free_lkey(struct rvt_mregion *mr)
 249 {
 250         unsigned long flags;
 251         u32 lkey = mr->lkey;
 252         u32 r;
 253         struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device);
 254         struct rvt_lkey_table *rkt = &dev->lkey_table;
 255         int freed = 0;
 256 
 257         spin_lock_irqsave(&rkt->lock, flags);
 258         if (!lkey) {
 259                 if (mr->lkey_published) {
 260                         mr->lkey_published = 0;
 261                         /* insure published is written before pointer */
 262                         rcu_assign_pointer(dev->dma_mr, NULL);
 263                         rvt_put_mr(mr);
 264                 }
 265         } else {
 266                 if (!mr->lkey_published)
 267                         goto out;
 268                 r = lkey >> (32 - dev->dparms.lkey_table_size);
 269                 mr->lkey_published = 0;
 270                 /* insure published is written before pointer */
 271                 rcu_assign_pointer(rkt->table[r], NULL);
 272         }
 273         freed++;
 274 out:
 275         spin_unlock_irqrestore(&rkt->lock, flags);
 276         if (freed)
 277                 percpu_ref_kill(&mr->refcount);
 278 }
 279 
 280 static struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd)
 281 {
 282         struct rvt_mr *mr;
 283         int rval = -ENOMEM;
 284         int m;
 285 
 286         /* Allocate struct plus pointers to first level page tables. */
 287         m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ;
 288         mr = kzalloc(struct_size(mr, mr.map, m), GFP_KERNEL);
 289         if (!mr)
 290                 goto bail;
 291 
 292         rval = rvt_init_mregion(&mr->mr, pd, count, 0);
 293         if (rval)
 294                 goto bail;
 295         /*
 296          * ib_reg_phys_mr() will initialize mr->ibmr except for
 297          * lkey and rkey.
 298          */
 299         rval = rvt_alloc_lkey(&mr->mr, 0);
 300         if (rval)
 301                 goto bail_mregion;
 302         mr->ibmr.lkey = mr->mr.lkey;
 303         mr->ibmr.rkey = mr->mr.lkey;
 304 done:
 305         return mr;
 306 
 307 bail_mregion:
 308         rvt_deinit_mregion(&mr->mr);
 309 bail:
 310         kfree(mr);
 311         mr = ERR_PTR(rval);
 312         goto done;
 313 }
 314 
 315 static void __rvt_free_mr(struct rvt_mr *mr)
 316 {
 317         rvt_free_lkey(&mr->mr);
 318         rvt_deinit_mregion(&mr->mr);
 319         kfree(mr);
 320 }
 321 
 322 /**
 323  * rvt_get_dma_mr - get a DMA memory region
 324  * @pd: protection domain for this memory region
 325  * @acc: access flags
 326  *
 327  * Return: the memory region on success, otherwise returns an errno.
 328  * Note that all DMA addresses should be created via the functions in
 329  * struct dma_virt_ops.
 330  */
 331 struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc)
 332 {
 333         struct rvt_mr *mr;
 334         struct ib_mr *ret;
 335         int rval;
 336 
 337         if (ibpd_to_rvtpd(pd)->user)
 338                 return ERR_PTR(-EPERM);
 339 
 340         mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 341         if (!mr) {
 342                 ret = ERR_PTR(-ENOMEM);
 343                 goto bail;
 344         }
 345 
 346         rval = rvt_init_mregion(&mr->mr, pd, 0, 0);
 347         if (rval) {
 348                 ret = ERR_PTR(rval);
 349                 goto bail;
 350         }
 351 
 352         rval = rvt_alloc_lkey(&mr->mr, 1);
 353         if (rval) {
 354                 ret = ERR_PTR(rval);
 355                 goto bail_mregion;
 356         }
 357 
 358         mr->mr.access_flags = acc;
 359         ret = &mr->ibmr;
 360 done:
 361         return ret;
 362 
 363 bail_mregion:
 364         rvt_deinit_mregion(&mr->mr);
 365 bail:
 366         kfree(mr);
 367         goto done;
 368 }
 369 
 370 /**
 371  * rvt_reg_user_mr - register a userspace memory region
 372  * @pd: protection domain for this memory region
 373  * @start: starting userspace address
 374  * @length: length of region to register
 375  * @mr_access_flags: access flags for this memory region
 376  * @udata: unused by the driver
 377  *
 378  * Return: the memory region on success, otherwise returns an errno.
 379  */
 380 struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 381                               u64 virt_addr, int mr_access_flags,
 382                               struct ib_udata *udata)
 383 {
 384         struct rvt_mr *mr;
 385         struct ib_umem *umem;
 386         struct sg_page_iter sg_iter;
 387         int n, m;
 388         struct ib_mr *ret;
 389 
 390         if (length == 0)
 391                 return ERR_PTR(-EINVAL);
 392 
 393         umem = ib_umem_get(udata, start, length, mr_access_flags, 0);
 394         if (IS_ERR(umem))
 395                 return (void *)umem;
 396 
 397         n = ib_umem_num_pages(umem);
 398 
 399         mr = __rvt_alloc_mr(n, pd);
 400         if (IS_ERR(mr)) {
 401                 ret = (struct ib_mr *)mr;
 402                 goto bail_umem;
 403         }
 404 
 405         mr->mr.user_base = start;
 406         mr->mr.iova = virt_addr;
 407         mr->mr.length = length;
 408         mr->mr.offset = ib_umem_offset(umem);
 409         mr->mr.access_flags = mr_access_flags;
 410         mr->umem = umem;
 411 
 412         mr->mr.page_shift = PAGE_SHIFT;
 413         m = 0;
 414         n = 0;
 415         for_each_sg_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
 416                 void *vaddr;
 417 
 418                 vaddr = page_address(sg_page_iter_page(&sg_iter));
 419                 if (!vaddr) {
 420                         ret = ERR_PTR(-EINVAL);
 421                         goto bail_inval;
 422                 }
 423                 mr->mr.map[m]->segs[n].vaddr = vaddr;
 424                 mr->mr.map[m]->segs[n].length = PAGE_SIZE;
 425                 trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr, PAGE_SIZE);
 426                 if (++n == RVT_SEGSZ) {
 427                         m++;
 428                         n = 0;
 429                 }
 430         }
 431         return &mr->ibmr;
 432 
 433 bail_inval:
 434         __rvt_free_mr(mr);
 435 
 436 bail_umem:
 437         ib_umem_release(umem);
 438 
 439         return ret;
 440 }
 441 
 442 /**
 443  * rvt_dereg_clean_qp_cb - callback from iterator
 444  * @qp - the qp
 445  * @v - the mregion (as u64)
 446  *
 447  * This routine fields the callback for all QPs and
 448  * for QPs in the same PD as the MR will call the
 449  * rvt_qp_mr_clean() to potentially cleanup references.
 450  */
 451 static void rvt_dereg_clean_qp_cb(struct rvt_qp *qp, u64 v)
 452 {
 453         struct rvt_mregion *mr = (struct rvt_mregion *)v;
 454 
 455         /* skip PDs that are not ours */
 456         if (mr->pd != qp->ibqp.pd)
 457                 return;
 458         rvt_qp_mr_clean(qp, mr->lkey);
 459 }
 460 
 461 /**
 462  * rvt_dereg_clean_qps - find QPs for reference cleanup
 463  * @mr - the MR that is being deregistered
 464  *
 465  * This routine iterates RC QPs looking for references
 466  * to the lkey noted in mr.
 467  */
 468 static void rvt_dereg_clean_qps(struct rvt_mregion *mr)
 469 {
 470         struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device);
 471 
 472         rvt_qp_iter(rdi, (u64)mr, rvt_dereg_clean_qp_cb);
 473 }
 474 
 475 /**
 476  * rvt_check_refs - check references
 477  * @mr - the megion
 478  * @t - the caller identification
 479  *
 480  * This routine checks MRs holding a reference during
 481  * when being de-registered.
 482  *
 483  * If the count is non-zero, the code calls a clean routine then
 484  * waits for the timeout for the count to zero.
 485  */
 486 static int rvt_check_refs(struct rvt_mregion *mr, const char *t)
 487 {
 488         unsigned long timeout;
 489         struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device);
 490 
 491         if (mr->lkey) {
 492                 /* avoid dma mr */
 493                 rvt_dereg_clean_qps(mr);
 494                 /* @mr was indexed on rcu protected @lkey_table */
 495                 synchronize_rcu();
 496         }
 497 
 498         timeout = wait_for_completion_timeout(&mr->comp, 5 * HZ);
 499         if (!timeout) {
 500                 rvt_pr_err(rdi,
 501                            "%s timeout mr %p pd %p lkey %x refcount %ld\n",
 502                            t, mr, mr->pd, mr->lkey,
 503                            atomic_long_read(&mr->refcount.count));
 504                 rvt_get_mr(mr);
 505                 return -EBUSY;
 506         }
 507         return 0;
 508 }
 509 
 510 /**
 511  * rvt_mr_has_lkey - is MR
 512  * @mr - the mregion
 513  * @lkey - the lkey
 514  */
 515 bool rvt_mr_has_lkey(struct rvt_mregion *mr, u32 lkey)
 516 {
 517         return mr && lkey == mr->lkey;
 518 }
 519 
 520 /**
 521  * rvt_ss_has_lkey - is mr in sge tests
 522  * @ss - the sge state
 523  * @lkey
 524  *
 525  * This code tests for an MR in the indicated
 526  * sge state.
 527  */
 528 bool rvt_ss_has_lkey(struct rvt_sge_state *ss, u32 lkey)
 529 {
 530         int i;
 531         bool rval = false;
 532 
 533         if (!ss->num_sge)
 534                 return rval;
 535         /* first one */
 536         rval = rvt_mr_has_lkey(ss->sge.mr, lkey);
 537         /* any others */
 538         for (i = 0; !rval && i < ss->num_sge - 1; i++)
 539                 rval = rvt_mr_has_lkey(ss->sg_list[i].mr, lkey);
 540         return rval;
 541 }
 542 
 543 /**
 544  * rvt_dereg_mr - unregister and free a memory region
 545  * @ibmr: the memory region to free
 546  *
 547  *
 548  * Note that this is called to free MRs created by rvt_get_dma_mr()
 549  * or rvt_reg_user_mr().
 550  *
 551  * Returns 0 on success.
 552  */
 553 int rvt_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
 554 {
 555         struct rvt_mr *mr = to_imr(ibmr);
 556         int ret;
 557 
 558         rvt_free_lkey(&mr->mr);
 559 
 560         rvt_put_mr(&mr->mr); /* will set completion if last */
 561         ret = rvt_check_refs(&mr->mr, __func__);
 562         if (ret)
 563                 goto out;
 564         rvt_deinit_mregion(&mr->mr);
 565         ib_umem_release(mr->umem);
 566         kfree(mr);
 567 out:
 568         return ret;
 569 }
 570 
 571 /**
 572  * rvt_alloc_mr - Allocate a memory region usable with the
 573  * @pd: protection domain for this memory region
 574  * @mr_type: mem region type
 575  * @max_num_sg: Max number of segments allowed
 576  *
 577  * Return: the memory region on success, otherwise return an errno.
 578  */
 579 struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
 580                            u32 max_num_sg, struct ib_udata *udata)
 581 {
 582         struct rvt_mr *mr;
 583 
 584         if (mr_type != IB_MR_TYPE_MEM_REG)
 585                 return ERR_PTR(-EINVAL);
 586 
 587         mr = __rvt_alloc_mr(max_num_sg, pd);
 588         if (IS_ERR(mr))
 589                 return (struct ib_mr *)mr;
 590 
 591         return &mr->ibmr;
 592 }
 593 
 594 /**
 595  * rvt_set_page - page assignment function called by ib_sg_to_pages
 596  * @ibmr: memory region
 597  * @addr: dma address of mapped page
 598  *
 599  * Return: 0 on success
 600  */
 601 static int rvt_set_page(struct ib_mr *ibmr, u64 addr)
 602 {
 603         struct rvt_mr *mr = to_imr(ibmr);
 604         u32 ps = 1 << mr->mr.page_shift;
 605         u32 mapped_segs = mr->mr.length >> mr->mr.page_shift;
 606         int m, n;
 607 
 608         if (unlikely(mapped_segs == mr->mr.max_segs))
 609                 return -ENOMEM;
 610 
 611         m = mapped_segs / RVT_SEGSZ;
 612         n = mapped_segs % RVT_SEGSZ;
 613         mr->mr.map[m]->segs[n].vaddr = (void *)addr;
 614         mr->mr.map[m]->segs[n].length = ps;
 615         mr->mr.length += ps;
 616         trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps);
 617 
 618         return 0;
 619 }
 620 
 621 /**
 622  * rvt_map_mr_sg - map sg list and set it the memory region
 623  * @ibmr: memory region
 624  * @sg: dma mapped scatterlist
 625  * @sg_nents: number of entries in sg
 626  * @sg_offset: offset in bytes into sg
 627  *
 628  * Overwrite rvt_mr length with mr length calculated by ib_sg_to_pages.
 629  *
 630  * Return: number of sg elements mapped to the memory region
 631  */
 632 int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
 633                   int sg_nents, unsigned int *sg_offset)
 634 {
 635         struct rvt_mr *mr = to_imr(ibmr);
 636         int ret;
 637 
 638         mr->mr.length = 0;
 639         mr->mr.page_shift = PAGE_SHIFT;
 640         ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rvt_set_page);
 641         mr->mr.user_base = ibmr->iova;
 642         mr->mr.iova = ibmr->iova;
 643         mr->mr.offset = ibmr->iova - (u64)mr->mr.map[0]->segs[0].vaddr;
 644         mr->mr.length = (size_t)ibmr->length;
 645         trace_rvt_map_mr_sg(ibmr, sg_nents, sg_offset);
 646         return ret;
 647 }
 648 
 649 /**
 650  * rvt_fast_reg_mr - fast register physical MR
 651  * @qp: the queue pair where the work request comes from
 652  * @ibmr: the memory region to be registered
 653  * @key: updated key for this memory region
 654  * @access: access flags for this memory region
 655  *
 656  * Returns 0 on success.
 657  */
 658 int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key,
 659                     int access)
 660 {
 661         struct rvt_mr *mr = to_imr(ibmr);
 662 
 663         if (qp->ibqp.pd != mr->mr.pd)
 664                 return -EACCES;
 665 
 666         /* not applicable to dma MR or user MR */
 667         if (!mr->mr.lkey || mr->umem)
 668                 return -EINVAL;
 669 
 670         if ((key & 0xFFFFFF00) != (mr->mr.lkey & 0xFFFFFF00))
 671                 return -EINVAL;
 672 
 673         ibmr->lkey = key;
 674         ibmr->rkey = key;
 675         mr->mr.lkey = key;
 676         mr->mr.access_flags = access;
 677         mr->mr.iova = ibmr->iova;
 678         atomic_set(&mr->mr.lkey_invalid, 0);
 679 
 680         return 0;
 681 }
 682 EXPORT_SYMBOL(rvt_fast_reg_mr);
 683 
 684 /**
 685  * rvt_invalidate_rkey - invalidate an MR rkey
 686  * @qp: queue pair associated with the invalidate op
 687  * @rkey: rkey to invalidate
 688  *
 689  * Returns 0 on success.
 690  */
 691 int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey)
 692 {
 693         struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device);
 694         struct rvt_lkey_table *rkt = &dev->lkey_table;
 695         struct rvt_mregion *mr;
 696 
 697         if (rkey == 0)
 698                 return -EINVAL;
 699 
 700         rcu_read_lock();
 701         mr = rcu_dereference(
 702                 rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]);
 703         if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
 704                 goto bail;
 705 
 706         atomic_set(&mr->lkey_invalid, 1);
 707         rcu_read_unlock();
 708         return 0;
 709 
 710 bail:
 711         rcu_read_unlock();
 712         return -EINVAL;
 713 }
 714 EXPORT_SYMBOL(rvt_invalidate_rkey);
 715 
 716 /**
 717  * rvt_alloc_fmr - allocate a fast memory region
 718  * @pd: the protection domain for this memory region
 719  * @mr_access_flags: access flags for this memory region
 720  * @fmr_attr: fast memory region attributes
 721  *
 722  * Return: the memory region on success, otherwise returns an errno.
 723  */
 724 struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
 725                              struct ib_fmr_attr *fmr_attr)
 726 {
 727         struct rvt_fmr *fmr;
 728         int m;
 729         struct ib_fmr *ret;
 730         int rval = -ENOMEM;
 731 
 732         /* Allocate struct plus pointers to first level page tables. */
 733         m = (fmr_attr->max_pages + RVT_SEGSZ - 1) / RVT_SEGSZ;
 734         fmr = kzalloc(struct_size(fmr, mr.map, m), GFP_KERNEL);
 735         if (!fmr)
 736                 goto bail;
 737 
 738         rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages,
 739                                 PERCPU_REF_INIT_ATOMIC);
 740         if (rval)
 741                 goto bail;
 742 
 743         /*
 744          * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
 745          * rkey.
 746          */
 747         rval = rvt_alloc_lkey(&fmr->mr, 0);
 748         if (rval)
 749                 goto bail_mregion;
 750         fmr->ibfmr.rkey = fmr->mr.lkey;
 751         fmr->ibfmr.lkey = fmr->mr.lkey;
 752         /*
 753          * Resources are allocated but no valid mapping (RKEY can't be
 754          * used).
 755          */
 756         fmr->mr.access_flags = mr_access_flags;
 757         fmr->mr.max_segs = fmr_attr->max_pages;
 758         fmr->mr.page_shift = fmr_attr->page_shift;
 759 
 760         ret = &fmr->ibfmr;
 761 done:
 762         return ret;
 763 
 764 bail_mregion:
 765         rvt_deinit_mregion(&fmr->mr);
 766 bail:
 767         kfree(fmr);
 768         ret = ERR_PTR(rval);
 769         goto done;
 770 }
 771 
 772 /**
 773  * rvt_map_phys_fmr - set up a fast memory region
 774  * @ibfmr: the fast memory region to set up
 775  * @page_list: the list of pages to associate with the fast memory region
 776  * @list_len: the number of pages to associate with the fast memory region
 777  * @iova: the virtual address of the start of the fast memory region
 778  *
 779  * This may be called from interrupt context.
 780  *
 781  * Return: 0 on success
 782  */
 783 
 784 int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
 785                      int list_len, u64 iova)
 786 {
 787         struct rvt_fmr *fmr = to_ifmr(ibfmr);
 788         struct rvt_lkey_table *rkt;
 789         unsigned long flags;
 790         int m, n;
 791         unsigned long i;
 792         u32 ps;
 793         struct rvt_dev_info *rdi = ib_to_rvt(ibfmr->device);
 794 
 795         i = atomic_long_read(&fmr->mr.refcount.count);
 796         if (i > 2)
 797                 return -EBUSY;
 798 
 799         if (list_len > fmr->mr.max_segs)
 800                 return -EINVAL;
 801 
 802         rkt = &rdi->lkey_table;
 803         spin_lock_irqsave(&rkt->lock, flags);
 804         fmr->mr.user_base = iova;
 805         fmr->mr.iova = iova;
 806         ps = 1 << fmr->mr.page_shift;
 807         fmr->mr.length = list_len * ps;
 808         m = 0;
 809         n = 0;
 810         for (i = 0; i < list_len; i++) {
 811                 fmr->mr.map[m]->segs[n].vaddr = (void *)page_list[i];
 812                 fmr->mr.map[m]->segs[n].length = ps;
 813                 trace_rvt_mr_fmr_seg(&fmr->mr, m, n, (void *)page_list[i], ps);
 814                 if (++n == RVT_SEGSZ) {
 815                         m++;
 816                         n = 0;
 817                 }
 818         }
 819         spin_unlock_irqrestore(&rkt->lock, flags);
 820         return 0;
 821 }
 822 
 823 /**
 824  * rvt_unmap_fmr - unmap fast memory regions
 825  * @fmr_list: the list of fast memory regions to unmap
 826  *
 827  * Return: 0 on success.
 828  */
 829 int rvt_unmap_fmr(struct list_head *fmr_list)
 830 {
 831         struct rvt_fmr *fmr;
 832         struct rvt_lkey_table *rkt;
 833         unsigned long flags;
 834         struct rvt_dev_info *rdi;
 835 
 836         list_for_each_entry(fmr, fmr_list, ibfmr.list) {
 837                 rdi = ib_to_rvt(fmr->ibfmr.device);
 838                 rkt = &rdi->lkey_table;
 839                 spin_lock_irqsave(&rkt->lock, flags);
 840                 fmr->mr.user_base = 0;
 841                 fmr->mr.iova = 0;
 842                 fmr->mr.length = 0;
 843                 spin_unlock_irqrestore(&rkt->lock, flags);
 844         }
 845         return 0;
 846 }
 847 
 848 /**
 849  * rvt_dealloc_fmr - deallocate a fast memory region
 850  * @ibfmr: the fast memory region to deallocate
 851  *
 852  * Return: 0 on success.
 853  */
 854 int rvt_dealloc_fmr(struct ib_fmr *ibfmr)
 855 {
 856         struct rvt_fmr *fmr = to_ifmr(ibfmr);
 857         int ret = 0;
 858 
 859         rvt_free_lkey(&fmr->mr);
 860         rvt_put_mr(&fmr->mr); /* will set completion if last */
 861         ret = rvt_check_refs(&fmr->mr, __func__);
 862         if (ret)
 863                 goto out;
 864         rvt_deinit_mregion(&fmr->mr);
 865         kfree(fmr);
 866 out:
 867         return ret;
 868 }
 869 
 870 /**
 871  * rvt_sge_adjacent - is isge compressible
 872  * @last_sge: last outgoing SGE written
 873  * @sge: SGE to check
 874  *
 875  * If adjacent will update last_sge to add length.
 876  *
 877  * Return: true if isge is adjacent to last sge
 878  */
 879 static inline bool rvt_sge_adjacent(struct rvt_sge *last_sge,
 880                                     struct ib_sge *sge)
 881 {
 882         if (last_sge && sge->lkey == last_sge->mr->lkey &&
 883             ((uint64_t)(last_sge->vaddr + last_sge->length) == sge->addr)) {
 884                 if (sge->lkey) {
 885                         if (unlikely((sge->addr - last_sge->mr->user_base +
 886                               sge->length > last_sge->mr->length)))
 887                                 return false; /* overrun, caller will catch */
 888                 } else {
 889                         last_sge->length += sge->length;
 890                 }
 891                 last_sge->sge_length += sge->length;
 892                 trace_rvt_sge_adjacent(last_sge, sge);
 893                 return true;
 894         }
 895         return false;
 896 }
 897 
 898 /**
 899  * rvt_lkey_ok - check IB SGE for validity and initialize
 900  * @rkt: table containing lkey to check SGE against
 901  * @pd: protection domain
 902  * @isge: outgoing internal SGE
 903  * @last_sge: last outgoing SGE written
 904  * @sge: SGE to check
 905  * @acc: access flags
 906  *
 907  * Check the IB SGE for validity and initialize our internal version
 908  * of it.
 909  *
 910  * Increments the reference count when a new sge is stored.
 911  *
 912  * Return: 0 if compressed, 1 if added , otherwise returns -errno.
 913  */
 914 int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
 915                 struct rvt_sge *isge, struct rvt_sge *last_sge,
 916                 struct ib_sge *sge, int acc)
 917 {
 918         struct rvt_mregion *mr;
 919         unsigned n, m;
 920         size_t off;
 921 
 922         /*
 923          * We use LKEY == zero for kernel virtual addresses
 924          * (see rvt_get_dma_mr() and dma_virt_ops).
 925          */
 926         if (sge->lkey == 0) {
 927                 struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
 928 
 929                 if (pd->user)
 930                         return -EINVAL;
 931                 if (rvt_sge_adjacent(last_sge, sge))
 932                         return 0;
 933                 rcu_read_lock();
 934                 mr = rcu_dereference(dev->dma_mr);
 935                 if (!mr)
 936                         goto bail;
 937                 rvt_get_mr(mr);
 938                 rcu_read_unlock();
 939 
 940                 isge->mr = mr;
 941                 isge->vaddr = (void *)sge->addr;
 942                 isge->length = sge->length;
 943                 isge->sge_length = sge->length;
 944                 isge->m = 0;
 945                 isge->n = 0;
 946                 goto ok;
 947         }
 948         if (rvt_sge_adjacent(last_sge, sge))
 949                 return 0;
 950         rcu_read_lock();
 951         mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]);
 952         if (!mr)
 953                 goto bail;
 954         rvt_get_mr(mr);
 955         if (!READ_ONCE(mr->lkey_published))
 956                 goto bail_unref;
 957 
 958         if (unlikely(atomic_read(&mr->lkey_invalid) ||
 959                      mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
 960                 goto bail_unref;
 961 
 962         off = sge->addr - mr->user_base;
 963         if (unlikely(sge->addr < mr->user_base ||
 964                      off + sge->length > mr->length ||
 965                      (mr->access_flags & acc) != acc))
 966                 goto bail_unref;
 967         rcu_read_unlock();
 968 
 969         off += mr->offset;
 970         if (mr->page_shift) {
 971                 /*
 972                  * page sizes are uniform power of 2 so no loop is necessary
 973                  * entries_spanned_by_off is the number of times the loop below
 974                  * would have executed.
 975                 */
 976                 size_t entries_spanned_by_off;
 977 
 978                 entries_spanned_by_off = off >> mr->page_shift;
 979                 off -= (entries_spanned_by_off << mr->page_shift);
 980                 m = entries_spanned_by_off / RVT_SEGSZ;
 981                 n = entries_spanned_by_off % RVT_SEGSZ;
 982         } else {
 983                 m = 0;
 984                 n = 0;
 985                 while (off >= mr->map[m]->segs[n].length) {
 986                         off -= mr->map[m]->segs[n].length;
 987                         n++;
 988                         if (n >= RVT_SEGSZ) {
 989                                 m++;
 990                                 n = 0;
 991                         }
 992                 }
 993         }
 994         isge->mr = mr;
 995         isge->vaddr = mr->map[m]->segs[n].vaddr + off;
 996         isge->length = mr->map[m]->segs[n].length - off;
 997         isge->sge_length = sge->length;
 998         isge->m = m;
 999         isge->n = n;
1000 ok:
1001         trace_rvt_sge_new(isge, sge);
1002         return 1;
1003 bail_unref:
1004         rvt_put_mr(mr);
1005 bail:
1006         rcu_read_unlock();
1007         return -EINVAL;
1008 }
1009 EXPORT_SYMBOL(rvt_lkey_ok);
1010 
1011 /**
1012  * rvt_rkey_ok - check the IB virtual address, length, and RKEY
1013  * @qp: qp for validation
1014  * @sge: SGE state
1015  * @len: length of data
1016  * @vaddr: virtual address to place data
1017  * @rkey: rkey to check
1018  * @acc: access flags
1019  *
1020  * Return: 1 if successful, otherwise 0.
1021  *
1022  * increments the reference count upon success
1023  */
1024 int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
1025                 u32 len, u64 vaddr, u32 rkey, int acc)
1026 {
1027         struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device);
1028         struct rvt_lkey_table *rkt = &dev->lkey_table;
1029         struct rvt_mregion *mr;
1030         unsigned n, m;
1031         size_t off;
1032 
1033         /*
1034          * We use RKEY == zero for kernel virtual addresses
1035          * (see rvt_get_dma_mr() and dma_virt_ops).
1036          */
1037         rcu_read_lock();
1038         if (rkey == 0) {
1039                 struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd);
1040                 struct rvt_dev_info *rdi = ib_to_rvt(pd->ibpd.device);
1041 
1042                 if (pd->user)
1043                         goto bail;
1044                 mr = rcu_dereference(rdi->dma_mr);
1045                 if (!mr)
1046                         goto bail;
1047                 rvt_get_mr(mr);
1048                 rcu_read_unlock();
1049 
1050                 sge->mr = mr;
1051                 sge->vaddr = (void *)vaddr;
1052                 sge->length = len;
1053                 sge->sge_length = len;
1054                 sge->m = 0;
1055                 sge->n = 0;
1056                 goto ok;
1057         }
1058 
1059         mr = rcu_dereference(rkt->table[rkey >> rkt->shift]);
1060         if (!mr)
1061                 goto bail;
1062         rvt_get_mr(mr);
1063         /* insure mr read is before test */
1064         if (!READ_ONCE(mr->lkey_published))
1065                 goto bail_unref;
1066         if (unlikely(atomic_read(&mr->lkey_invalid) ||
1067                      mr->lkey != rkey || qp->ibqp.pd != mr->pd))
1068                 goto bail_unref;
1069 
1070         off = vaddr - mr->iova;
1071         if (unlikely(vaddr < mr->iova || off + len > mr->length ||
1072                      (mr->access_flags & acc) == 0))
1073                 goto bail_unref;
1074         rcu_read_unlock();
1075 
1076         off += mr->offset;
1077         if (mr->page_shift) {
1078                 /*
1079                  * page sizes are uniform power of 2 so no loop is necessary
1080                  * entries_spanned_by_off is the number of times the loop below
1081                  * would have executed.
1082                 */
1083                 size_t entries_spanned_by_off;
1084 
1085                 entries_spanned_by_off = off >> mr->page_shift;
1086                 off -= (entries_spanned_by_off << mr->page_shift);
1087                 m = entries_spanned_by_off / RVT_SEGSZ;
1088                 n = entries_spanned_by_off % RVT_SEGSZ;
1089         } else {
1090                 m = 0;
1091                 n = 0;
1092                 while (off >= mr->map[m]->segs[n].length) {
1093                         off -= mr->map[m]->segs[n].length;
1094                         n++;
1095                         if (n >= RVT_SEGSZ) {
1096                                 m++;
1097                                 n = 0;
1098                         }
1099                 }
1100         }
1101         sge->mr = mr;
1102         sge->vaddr = mr->map[m]->segs[n].vaddr + off;
1103         sge->length = mr->map[m]->segs[n].length - off;
1104         sge->sge_length = len;
1105         sge->m = m;
1106         sge->n = n;
1107 ok:
1108         return 1;
1109 bail_unref:
1110         rvt_put_mr(mr);
1111 bail:
1112         rcu_read_unlock();
1113         return 0;
1114 }
1115 EXPORT_SYMBOL(rvt_rkey_ok);

/* [<][>][^][v][top][bottom][index][help] */