root/drivers/infiniband/core/rw.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rdma_rw_can_use_mr
  2. rdma_rw_io_needs_mr
  3. rdma_rw_fr_page_list_len
  4. rdma_rw_inv_key
  5. rdma_rw_init_one_mr
  6. rdma_rw_init_mr_wrs
  7. rdma_rw_init_map_wrs
  8. rdma_rw_init_single_wr
  9. rdma_rw_unmap_sg
  10. rdma_rw_map_sg
  11. rdma_rw_ctx_init
  12. rdma_rw_ctx_signature_init
  13. rdma_rw_update_lkey
  14. rdma_rw_ctx_wrs
  15. rdma_rw_ctx_post
  16. rdma_rw_ctx_destroy
  17. rdma_rw_ctx_destroy_signature
  18. rdma_rw_mr_factor
  19. rdma_rw_init_qp
  20. rdma_rw_init_mrs
  21. rdma_rw_cleanup_mrs

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Copyright (c) 2016 HGST, a Western Digital Company.
   4  */
   5 #include <linux/moduleparam.h>
   6 #include <linux/slab.h>
   7 #include <linux/pci-p2pdma.h>
   8 #include <rdma/mr_pool.h>
   9 #include <rdma/rw.h>
  10 
  11 enum {
  12         RDMA_RW_SINGLE_WR,
  13         RDMA_RW_MULTI_WR,
  14         RDMA_RW_MR,
  15         RDMA_RW_SIG_MR,
  16 };
  17 
  18 static bool rdma_rw_force_mr;
  19 module_param_named(force_mr, rdma_rw_force_mr, bool, 0);
  20 MODULE_PARM_DESC(force_mr, "Force usage of MRs for RDMA READ/WRITE operations");
  21 
  22 /*
  23  * Check if the device might use memory registration.  This is currently only
  24  * true for iWarp devices. In the future we can hopefully fine tune this based
  25  * on HCA driver input.
  26  */
  27 static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num)
  28 {
  29         if (rdma_protocol_iwarp(dev, port_num))
  30                 return true;
  31         if (unlikely(rdma_rw_force_mr))
  32                 return true;
  33         return false;
  34 }
  35 
  36 /*
  37  * Check if the device will use memory registration for this RW operation.
  38  * We currently always use memory registrations for iWarp RDMA READs, and
  39  * have a debug option to force usage of MRs.
  40  *
  41  * XXX: In the future we can hopefully fine tune this based on HCA driver
  42  * input.
  43  */
  44 static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u8 port_num,
  45                 enum dma_data_direction dir, int dma_nents)
  46 {
  47         if (rdma_protocol_iwarp(dev, port_num) && dir == DMA_FROM_DEVICE)
  48                 return true;
  49         if (unlikely(rdma_rw_force_mr))
  50                 return true;
  51         return false;
  52 }
  53 
  54 static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev,
  55                                            bool pi_support)
  56 {
  57         u32 max_pages;
  58 
  59         if (pi_support)
  60                 max_pages = dev->attrs.max_pi_fast_reg_page_list_len;
  61         else
  62                 max_pages = dev->attrs.max_fast_reg_page_list_len;
  63 
  64         /* arbitrary limit to avoid allocating gigantic resources */
  65         return min_t(u32, max_pages, 256);
  66 }
  67 
  68 static inline int rdma_rw_inv_key(struct rdma_rw_reg_ctx *reg)
  69 {
  70         int count = 0;
  71 
  72         if (reg->mr->need_inval) {
  73                 reg->inv_wr.opcode = IB_WR_LOCAL_INV;
  74                 reg->inv_wr.ex.invalidate_rkey = reg->mr->lkey;
  75                 reg->inv_wr.next = &reg->reg_wr.wr;
  76                 count++;
  77         } else {
  78                 reg->inv_wr.next = NULL;
  79         }
  80 
  81         return count;
  82 }
  83 
  84 /* Caller must have zero-initialized *reg. */
  85 static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num,
  86                 struct rdma_rw_reg_ctx *reg, struct scatterlist *sg,
  87                 u32 sg_cnt, u32 offset)
  88 {
  89         u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device,
  90                                                     qp->integrity_en);
  91         u32 nents = min(sg_cnt, pages_per_mr);
  92         int count = 0, ret;
  93 
  94         reg->mr = ib_mr_pool_get(qp, &qp->rdma_mrs);
  95         if (!reg->mr)
  96                 return -EAGAIN;
  97 
  98         count += rdma_rw_inv_key(reg);
  99 
 100         ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE);
 101         if (ret < 0 || ret < nents) {
 102                 ib_mr_pool_put(qp, &qp->rdma_mrs, reg->mr);
 103                 return -EINVAL;
 104         }
 105 
 106         reg->reg_wr.wr.opcode = IB_WR_REG_MR;
 107         reg->reg_wr.mr = reg->mr;
 108         reg->reg_wr.access = IB_ACCESS_LOCAL_WRITE;
 109         if (rdma_protocol_iwarp(qp->device, port_num))
 110                 reg->reg_wr.access |= IB_ACCESS_REMOTE_WRITE;
 111         count++;
 112 
 113         reg->sge.addr = reg->mr->iova;
 114         reg->sge.length = reg->mr->length;
 115         return count;
 116 }
 117 
 118 static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
 119                 u8 port_num, struct scatterlist *sg, u32 sg_cnt, u32 offset,
 120                 u64 remote_addr, u32 rkey, enum dma_data_direction dir)
 121 {
 122         struct rdma_rw_reg_ctx *prev = NULL;
 123         u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device,
 124                                                     qp->integrity_en);
 125         int i, j, ret = 0, count = 0;
 126 
 127         ctx->nr_ops = (sg_cnt + pages_per_mr - 1) / pages_per_mr;
 128         ctx->reg = kcalloc(ctx->nr_ops, sizeof(*ctx->reg), GFP_KERNEL);
 129         if (!ctx->reg) {
 130                 ret = -ENOMEM;
 131                 goto out;
 132         }
 133 
 134         for (i = 0; i < ctx->nr_ops; i++) {
 135                 struct rdma_rw_reg_ctx *reg = &ctx->reg[i];
 136                 u32 nents = min(sg_cnt, pages_per_mr);
 137 
 138                 ret = rdma_rw_init_one_mr(qp, port_num, reg, sg, sg_cnt,
 139                                 offset);
 140                 if (ret < 0)
 141                         goto out_free;
 142                 count += ret;
 143 
 144                 if (prev) {
 145                         if (reg->mr->need_inval)
 146                                 prev->wr.wr.next = &reg->inv_wr;
 147                         else
 148                                 prev->wr.wr.next = &reg->reg_wr.wr;
 149                 }
 150 
 151                 reg->reg_wr.wr.next = &reg->wr.wr;
 152 
 153                 reg->wr.wr.sg_list = &reg->sge;
 154                 reg->wr.wr.num_sge = 1;
 155                 reg->wr.remote_addr = remote_addr;
 156                 reg->wr.rkey = rkey;
 157                 if (dir == DMA_TO_DEVICE) {
 158                         reg->wr.wr.opcode = IB_WR_RDMA_WRITE;
 159                 } else if (!rdma_cap_read_inv(qp->device, port_num)) {
 160                         reg->wr.wr.opcode = IB_WR_RDMA_READ;
 161                 } else {
 162                         reg->wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
 163                         reg->wr.wr.ex.invalidate_rkey = reg->mr->lkey;
 164                 }
 165                 count++;
 166 
 167                 remote_addr += reg->sge.length;
 168                 sg_cnt -= nents;
 169                 for (j = 0; j < nents; j++)
 170                         sg = sg_next(sg);
 171                 prev = reg;
 172                 offset = 0;
 173         }
 174 
 175         if (prev)
 176                 prev->wr.wr.next = NULL;
 177 
 178         ctx->type = RDMA_RW_MR;
 179         return count;
 180 
 181 out_free:
 182         while (--i >= 0)
 183                 ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr);
 184         kfree(ctx->reg);
 185 out:
 186         return ret;
 187 }
 188 
 189 static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
 190                 struct scatterlist *sg, u32 sg_cnt, u32 offset,
 191                 u64 remote_addr, u32 rkey, enum dma_data_direction dir)
 192 {
 193         u32 max_sge = dir == DMA_TO_DEVICE ? qp->max_write_sge :
 194                       qp->max_read_sge;
 195         struct ib_sge *sge;
 196         u32 total_len = 0, i, j;
 197 
 198         ctx->nr_ops = DIV_ROUND_UP(sg_cnt, max_sge);
 199 
 200         ctx->map.sges = sge = kcalloc(sg_cnt, sizeof(*sge), GFP_KERNEL);
 201         if (!ctx->map.sges)
 202                 goto out;
 203 
 204         ctx->map.wrs = kcalloc(ctx->nr_ops, sizeof(*ctx->map.wrs), GFP_KERNEL);
 205         if (!ctx->map.wrs)
 206                 goto out_free_sges;
 207 
 208         for (i = 0; i < ctx->nr_ops; i++) {
 209                 struct ib_rdma_wr *rdma_wr = &ctx->map.wrs[i];
 210                 u32 nr_sge = min(sg_cnt, max_sge);
 211 
 212                 if (dir == DMA_TO_DEVICE)
 213                         rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
 214                 else
 215                         rdma_wr->wr.opcode = IB_WR_RDMA_READ;
 216                 rdma_wr->remote_addr = remote_addr + total_len;
 217                 rdma_wr->rkey = rkey;
 218                 rdma_wr->wr.num_sge = nr_sge;
 219                 rdma_wr->wr.sg_list = sge;
 220 
 221                 for (j = 0; j < nr_sge; j++, sg = sg_next(sg)) {
 222                         sge->addr = sg_dma_address(sg) + offset;
 223                         sge->length = sg_dma_len(sg) - offset;
 224                         sge->lkey = qp->pd->local_dma_lkey;
 225 
 226                         total_len += sge->length;
 227                         sge++;
 228                         sg_cnt--;
 229                         offset = 0;
 230                 }
 231 
 232                 rdma_wr->wr.next = i + 1 < ctx->nr_ops ?
 233                         &ctx->map.wrs[i + 1].wr : NULL;
 234         }
 235 
 236         ctx->type = RDMA_RW_MULTI_WR;
 237         return ctx->nr_ops;
 238 
 239 out_free_sges:
 240         kfree(ctx->map.sges);
 241 out:
 242         return -ENOMEM;
 243 }
 244 
 245 static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
 246                 struct scatterlist *sg, u32 offset, u64 remote_addr, u32 rkey,
 247                 enum dma_data_direction dir)
 248 {
 249         struct ib_rdma_wr *rdma_wr = &ctx->single.wr;
 250 
 251         ctx->nr_ops = 1;
 252 
 253         ctx->single.sge.lkey = qp->pd->local_dma_lkey;
 254         ctx->single.sge.addr = sg_dma_address(sg) + offset;
 255         ctx->single.sge.length = sg_dma_len(sg) - offset;
 256 
 257         memset(rdma_wr, 0, sizeof(*rdma_wr));
 258         if (dir == DMA_TO_DEVICE)
 259                 rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
 260         else
 261                 rdma_wr->wr.opcode = IB_WR_RDMA_READ;
 262         rdma_wr->wr.sg_list = &ctx->single.sge;
 263         rdma_wr->wr.num_sge = 1;
 264         rdma_wr->remote_addr = remote_addr;
 265         rdma_wr->rkey = rkey;
 266 
 267         ctx->type = RDMA_RW_SINGLE_WR;
 268         return 1;
 269 }
 270 
 271 static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
 272                              u32 sg_cnt, enum dma_data_direction dir)
 273 {
 274         if (is_pci_p2pdma_page(sg_page(sg)))
 275                 pci_p2pdma_unmap_sg(dev->dma_device, sg, sg_cnt, dir);
 276         else
 277                 ib_dma_unmap_sg(dev, sg, sg_cnt, dir);
 278 }
 279 
 280 static int rdma_rw_map_sg(struct ib_device *dev, struct scatterlist *sg,
 281                           u32 sg_cnt, enum dma_data_direction dir)
 282 {
 283         if (is_pci_p2pdma_page(sg_page(sg)))
 284                 return pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir);
 285         return ib_dma_map_sg(dev, sg, sg_cnt, dir);
 286 }
 287 
 288 /**
 289  * rdma_rw_ctx_init - initialize a RDMA READ/WRITE context
 290  * @ctx:        context to initialize
 291  * @qp:         queue pair to operate on
 292  * @port_num:   port num to which the connection is bound
 293  * @sg:         scatterlist to READ/WRITE from/to
 294  * @sg_cnt:     number of entries in @sg
 295  * @sg_offset:  current byte offset into @sg
 296  * @remote_addr:remote address to read/write (relative to @rkey)
 297  * @rkey:       remote key to operate on
 298  * @dir:        %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
 299  *
 300  * Returns the number of WQEs that will be needed on the workqueue if
 301  * successful, or a negative error code.
 302  */
 303 int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
 304                 struct scatterlist *sg, u32 sg_cnt, u32 sg_offset,
 305                 u64 remote_addr, u32 rkey, enum dma_data_direction dir)
 306 {
 307         struct ib_device *dev = qp->pd->device;
 308         int ret;
 309 
 310         ret = rdma_rw_map_sg(dev, sg, sg_cnt, dir);
 311         if (!ret)
 312                 return -ENOMEM;
 313         sg_cnt = ret;
 314 
 315         /*
 316          * Skip to the S/G entry that sg_offset falls into:
 317          */
 318         for (;;) {
 319                 u32 len = sg_dma_len(sg);
 320 
 321                 if (sg_offset < len)
 322                         break;
 323 
 324                 sg = sg_next(sg);
 325                 sg_offset -= len;
 326                 sg_cnt--;
 327         }
 328 
 329         ret = -EIO;
 330         if (WARN_ON_ONCE(sg_cnt == 0))
 331                 goto out_unmap_sg;
 332 
 333         if (rdma_rw_io_needs_mr(qp->device, port_num, dir, sg_cnt)) {
 334                 ret = rdma_rw_init_mr_wrs(ctx, qp, port_num, sg, sg_cnt,
 335                                 sg_offset, remote_addr, rkey, dir);
 336         } else if (sg_cnt > 1) {
 337                 ret = rdma_rw_init_map_wrs(ctx, qp, sg, sg_cnt, sg_offset,
 338                                 remote_addr, rkey, dir);
 339         } else {
 340                 ret = rdma_rw_init_single_wr(ctx, qp, sg, sg_offset,
 341                                 remote_addr, rkey, dir);
 342         }
 343 
 344         if (ret < 0)
 345                 goto out_unmap_sg;
 346         return ret;
 347 
 348 out_unmap_sg:
 349         rdma_rw_unmap_sg(dev, sg, sg_cnt, dir);
 350         return ret;
 351 }
 352 EXPORT_SYMBOL(rdma_rw_ctx_init);
 353 
 354 /**
 355  * rdma_rw_ctx_signature_init - initialize a RW context with signature offload
 356  * @ctx:        context to initialize
 357  * @qp:         queue pair to operate on
 358  * @port_num:   port num to which the connection is bound
 359  * @sg:         scatterlist to READ/WRITE from/to
 360  * @sg_cnt:     number of entries in @sg
 361  * @prot_sg:    scatterlist to READ/WRITE protection information from/to
 362  * @prot_sg_cnt: number of entries in @prot_sg
 363  * @sig_attrs:  signature offloading algorithms
 364  * @remote_addr:remote address to read/write (relative to @rkey)
 365  * @rkey:       remote key to operate on
 366  * @dir:        %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
 367  *
 368  * Returns the number of WQEs that will be needed on the workqueue if
 369  * successful, or a negative error code.
 370  */
 371 int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
 372                 u8 port_num, struct scatterlist *sg, u32 sg_cnt,
 373                 struct scatterlist *prot_sg, u32 prot_sg_cnt,
 374                 struct ib_sig_attrs *sig_attrs,
 375                 u64 remote_addr, u32 rkey, enum dma_data_direction dir)
 376 {
 377         struct ib_device *dev = qp->pd->device;
 378         u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device,
 379                                                     qp->integrity_en);
 380         struct ib_rdma_wr *rdma_wr;
 381         int count = 0, ret;
 382 
 383         if (sg_cnt > pages_per_mr || prot_sg_cnt > pages_per_mr) {
 384                 pr_err("SG count too large: sg_cnt=%d, prot_sg_cnt=%d, pages_per_mr=%d\n",
 385                        sg_cnt, prot_sg_cnt, pages_per_mr);
 386                 return -EINVAL;
 387         }
 388 
 389         ret = ib_dma_map_sg(dev, sg, sg_cnt, dir);
 390         if (!ret)
 391                 return -ENOMEM;
 392         sg_cnt = ret;
 393 
 394         if (prot_sg_cnt) {
 395                 ret = ib_dma_map_sg(dev, prot_sg, prot_sg_cnt, dir);
 396                 if (!ret) {
 397                         ret = -ENOMEM;
 398                         goto out_unmap_sg;
 399                 }
 400                 prot_sg_cnt = ret;
 401         }
 402 
 403         ctx->type = RDMA_RW_SIG_MR;
 404         ctx->nr_ops = 1;
 405         ctx->reg = kcalloc(1, sizeof(*ctx->reg), GFP_KERNEL);
 406         if (!ctx->reg) {
 407                 ret = -ENOMEM;
 408                 goto out_unmap_prot_sg;
 409         }
 410 
 411         ctx->reg->mr = ib_mr_pool_get(qp, &qp->sig_mrs);
 412         if (!ctx->reg->mr) {
 413                 ret = -EAGAIN;
 414                 goto out_free_ctx;
 415         }
 416 
 417         count += rdma_rw_inv_key(ctx->reg);
 418 
 419         memcpy(ctx->reg->mr->sig_attrs, sig_attrs, sizeof(struct ib_sig_attrs));
 420 
 421         ret = ib_map_mr_sg_pi(ctx->reg->mr, sg, sg_cnt, NULL, prot_sg,
 422                               prot_sg_cnt, NULL, SZ_4K);
 423         if (unlikely(ret)) {
 424                 pr_err("failed to map PI sg (%d)\n", sg_cnt + prot_sg_cnt);
 425                 goto out_destroy_sig_mr;
 426         }
 427 
 428         ctx->reg->reg_wr.wr.opcode = IB_WR_REG_MR_INTEGRITY;
 429         ctx->reg->reg_wr.wr.wr_cqe = NULL;
 430         ctx->reg->reg_wr.wr.num_sge = 0;
 431         ctx->reg->reg_wr.wr.send_flags = 0;
 432         ctx->reg->reg_wr.access = IB_ACCESS_LOCAL_WRITE;
 433         if (rdma_protocol_iwarp(qp->device, port_num))
 434                 ctx->reg->reg_wr.access |= IB_ACCESS_REMOTE_WRITE;
 435         ctx->reg->reg_wr.mr = ctx->reg->mr;
 436         ctx->reg->reg_wr.key = ctx->reg->mr->lkey;
 437         count++;
 438 
 439         ctx->reg->sge.addr = ctx->reg->mr->iova;
 440         ctx->reg->sge.length = ctx->reg->mr->length;
 441         if (sig_attrs->wire.sig_type == IB_SIG_TYPE_NONE)
 442                 ctx->reg->sge.length -= ctx->reg->mr->sig_attrs->meta_length;
 443 
 444         rdma_wr = &ctx->reg->wr;
 445         rdma_wr->wr.sg_list = &ctx->reg->sge;
 446         rdma_wr->wr.num_sge = 1;
 447         rdma_wr->remote_addr = remote_addr;
 448         rdma_wr->rkey = rkey;
 449         if (dir == DMA_TO_DEVICE)
 450                 rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
 451         else
 452                 rdma_wr->wr.opcode = IB_WR_RDMA_READ;
 453         ctx->reg->reg_wr.wr.next = &rdma_wr->wr;
 454         count++;
 455 
 456         return count;
 457 
 458 out_destroy_sig_mr:
 459         ib_mr_pool_put(qp, &qp->sig_mrs, ctx->reg->mr);
 460 out_free_ctx:
 461         kfree(ctx->reg);
 462 out_unmap_prot_sg:
 463         if (prot_sg_cnt)
 464                 ib_dma_unmap_sg(dev, prot_sg, prot_sg_cnt, dir);
 465 out_unmap_sg:
 466         ib_dma_unmap_sg(dev, sg, sg_cnt, dir);
 467         return ret;
 468 }
 469 EXPORT_SYMBOL(rdma_rw_ctx_signature_init);
 470 
 471 /*
 472  * Now that we are going to post the WRs we can update the lkey and need_inval
 473  * state on the MRs.  If we were doing this at init time, we would get double
 474  * or missing invalidations if a context was initialized but not actually
 475  * posted.
 476  */
 477 static void rdma_rw_update_lkey(struct rdma_rw_reg_ctx *reg, bool need_inval)
 478 {
 479         reg->mr->need_inval = need_inval;
 480         ib_update_fast_reg_key(reg->mr, ib_inc_rkey(reg->mr->lkey));
 481         reg->reg_wr.key = reg->mr->lkey;
 482         reg->sge.lkey = reg->mr->lkey;
 483 }
 484 
 485 /**
 486  * rdma_rw_ctx_wrs - return chain of WRs for a RDMA READ or WRITE operation
 487  * @ctx:        context to operate on
 488  * @qp:         queue pair to operate on
 489  * @port_num:   port num to which the connection is bound
 490  * @cqe:        completion queue entry for the last WR
 491  * @chain_wr:   WR to append to the posted chain
 492  *
 493  * Return the WR chain for the set of RDMA READ/WRITE operations described by
 494  * @ctx, as well as any memory registration operations needed.  If @chain_wr
 495  * is non-NULL the WR it points to will be appended to the chain of WRs posted.
 496  * If @chain_wr is not set @cqe must be set so that the caller gets a
 497  * completion notification.
 498  */
 499 struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
 500                 u8 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
 501 {
 502         struct ib_send_wr *first_wr, *last_wr;
 503         int i;
 504 
 505         switch (ctx->type) {
 506         case RDMA_RW_SIG_MR:
 507         case RDMA_RW_MR:
 508                 /* fallthrough */
 509                 for (i = 0; i < ctx->nr_ops; i++) {
 510                         rdma_rw_update_lkey(&ctx->reg[i],
 511                                 ctx->reg[i].wr.wr.opcode !=
 512                                         IB_WR_RDMA_READ_WITH_INV);
 513                 }
 514 
 515                 if (ctx->reg[0].inv_wr.next)
 516                         first_wr = &ctx->reg[0].inv_wr;
 517                 else
 518                         first_wr = &ctx->reg[0].reg_wr.wr;
 519                 last_wr = &ctx->reg[ctx->nr_ops - 1].wr.wr;
 520                 break;
 521         case RDMA_RW_MULTI_WR:
 522                 first_wr = &ctx->map.wrs[0].wr;
 523                 last_wr = &ctx->map.wrs[ctx->nr_ops - 1].wr;
 524                 break;
 525         case RDMA_RW_SINGLE_WR:
 526                 first_wr = &ctx->single.wr.wr;
 527                 last_wr = &ctx->single.wr.wr;
 528                 break;
 529         default:
 530                 BUG();
 531         }
 532 
 533         if (chain_wr) {
 534                 last_wr->next = chain_wr;
 535         } else {
 536                 last_wr->wr_cqe = cqe;
 537                 last_wr->send_flags |= IB_SEND_SIGNALED;
 538         }
 539 
 540         return first_wr;
 541 }
 542 EXPORT_SYMBOL(rdma_rw_ctx_wrs);
 543 
 544 /**
 545  * rdma_rw_ctx_post - post a RDMA READ or RDMA WRITE operation
 546  * @ctx:        context to operate on
 547  * @qp:         queue pair to operate on
 548  * @port_num:   port num to which the connection is bound
 549  * @cqe:        completion queue entry for the last WR
 550  * @chain_wr:   WR to append to the posted chain
 551  *
 552  * Post the set of RDMA READ/WRITE operations described by @ctx, as well as
 553  * any memory registration operations needed.  If @chain_wr is non-NULL the
 554  * WR it points to will be appended to the chain of WRs posted.  If @chain_wr
 555  * is not set @cqe must be set so that the caller gets a completion
 556  * notification.
 557  */
 558 int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
 559                 struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
 560 {
 561         struct ib_send_wr *first_wr;
 562 
 563         first_wr = rdma_rw_ctx_wrs(ctx, qp, port_num, cqe, chain_wr);
 564         return ib_post_send(qp, first_wr, NULL);
 565 }
 566 EXPORT_SYMBOL(rdma_rw_ctx_post);
 567 
 568 /**
 569  * rdma_rw_ctx_destroy - release all resources allocated by rdma_rw_ctx_init
 570  * @ctx:        context to release
 571  * @qp:         queue pair to operate on
 572  * @port_num:   port num to which the connection is bound
 573  * @sg:         scatterlist that was used for the READ/WRITE
 574  * @sg_cnt:     number of entries in @sg
 575  * @dir:        %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
 576  */
 577 void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
 578                 struct scatterlist *sg, u32 sg_cnt, enum dma_data_direction dir)
 579 {
 580         int i;
 581 
 582         switch (ctx->type) {
 583         case RDMA_RW_MR:
 584                 for (i = 0; i < ctx->nr_ops; i++)
 585                         ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr);
 586                 kfree(ctx->reg);
 587                 break;
 588         case RDMA_RW_MULTI_WR:
 589                 kfree(ctx->map.wrs);
 590                 kfree(ctx->map.sges);
 591                 break;
 592         case RDMA_RW_SINGLE_WR:
 593                 break;
 594         default:
 595                 BUG();
 596                 break;
 597         }
 598 
 599         rdma_rw_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
 600 }
 601 EXPORT_SYMBOL(rdma_rw_ctx_destroy);
 602 
 603 /**
 604  * rdma_rw_ctx_destroy_signature - release all resources allocated by
 605  *      rdma_rw_ctx_signature_init
 606  * @ctx:        context to release
 607  * @qp:         queue pair to operate on
 608  * @port_num:   port num to which the connection is bound
 609  * @sg:         scatterlist that was used for the READ/WRITE
 610  * @sg_cnt:     number of entries in @sg
 611  * @prot_sg:    scatterlist that was used for the READ/WRITE of the PI
 612  * @prot_sg_cnt: number of entries in @prot_sg
 613  * @dir:        %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
 614  */
 615 void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
 616                 u8 port_num, struct scatterlist *sg, u32 sg_cnt,
 617                 struct scatterlist *prot_sg, u32 prot_sg_cnt,
 618                 enum dma_data_direction dir)
 619 {
 620         if (WARN_ON_ONCE(ctx->type != RDMA_RW_SIG_MR))
 621                 return;
 622 
 623         ib_mr_pool_put(qp, &qp->sig_mrs, ctx->reg->mr);
 624         kfree(ctx->reg);
 625 
 626         ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
 627         if (prot_sg_cnt)
 628                 ib_dma_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir);
 629 }
 630 EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature);
 631 
 632 /**
 633  * rdma_rw_mr_factor - return number of MRs required for a payload
 634  * @device:     device handling the connection
 635  * @port_num:   port num to which the connection is bound
 636  * @maxpages:   maximum payload pages per rdma_rw_ctx
 637  *
 638  * Returns the number of MRs the device requires to move @maxpayload
 639  * bytes. The returned value is used during transport creation to
 640  * compute max_rdma_ctxts and the size of the transport's Send and
 641  * Send Completion Queues.
 642  */
 643 unsigned int rdma_rw_mr_factor(struct ib_device *device, u8 port_num,
 644                                unsigned int maxpages)
 645 {
 646         unsigned int mr_pages;
 647 
 648         if (rdma_rw_can_use_mr(device, port_num))
 649                 mr_pages = rdma_rw_fr_page_list_len(device, false);
 650         else
 651                 mr_pages = device->attrs.max_sge_rd;
 652         return DIV_ROUND_UP(maxpages, mr_pages);
 653 }
 654 EXPORT_SYMBOL(rdma_rw_mr_factor);
 655 
 656 void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr)
 657 {
 658         u32 factor;
 659 
 660         WARN_ON_ONCE(attr->port_num == 0);
 661 
 662         /*
 663          * Each context needs at least one RDMA READ or WRITE WR.
 664          *
 665          * For some hardware we might need more, eventually we should ask the
 666          * HCA driver for a multiplier here.
 667          */
 668         factor = 1;
 669 
 670         /*
 671          * If the devices needs MRs to perform RDMA READ or WRITE operations,
 672          * we'll need two additional MRs for the registrations and the
 673          * invalidation.
 674          */
 675         if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN ||
 676             rdma_rw_can_use_mr(dev, attr->port_num))
 677                 factor += 2;    /* inv + reg */
 678 
 679         attr->cap.max_send_wr += factor * attr->cap.max_rdma_ctxs;
 680 
 681         /*
 682          * But maybe we were just too high in the sky and the device doesn't
 683          * even support all we need, and we'll have to live with what we get..
 684          */
 685         attr->cap.max_send_wr =
 686                 min_t(u32, attr->cap.max_send_wr, dev->attrs.max_qp_wr);
 687 }
 688 
 689 int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr)
 690 {
 691         struct ib_device *dev = qp->pd->device;
 692         u32 nr_mrs = 0, nr_sig_mrs = 0, max_num_sg = 0;
 693         int ret = 0;
 694 
 695         if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) {
 696                 nr_sig_mrs = attr->cap.max_rdma_ctxs;
 697                 nr_mrs = attr->cap.max_rdma_ctxs;
 698                 max_num_sg = rdma_rw_fr_page_list_len(dev, true);
 699         } else if (rdma_rw_can_use_mr(dev, attr->port_num)) {
 700                 nr_mrs = attr->cap.max_rdma_ctxs;
 701                 max_num_sg = rdma_rw_fr_page_list_len(dev, false);
 702         }
 703 
 704         if (nr_mrs) {
 705                 ret = ib_mr_pool_init(qp, &qp->rdma_mrs, nr_mrs,
 706                                 IB_MR_TYPE_MEM_REG,
 707                                 max_num_sg, 0);
 708                 if (ret) {
 709                         pr_err("%s: failed to allocated %d MRs\n",
 710                                 __func__, nr_mrs);
 711                         return ret;
 712                 }
 713         }
 714 
 715         if (nr_sig_mrs) {
 716                 ret = ib_mr_pool_init(qp, &qp->sig_mrs, nr_sig_mrs,
 717                                 IB_MR_TYPE_INTEGRITY, max_num_sg, max_num_sg);
 718                 if (ret) {
 719                         pr_err("%s: failed to allocated %d SIG MRs\n",
 720                                 __func__, nr_sig_mrs);
 721                         goto out_free_rdma_mrs;
 722                 }
 723         }
 724 
 725         return 0;
 726 
 727 out_free_rdma_mrs:
 728         ib_mr_pool_destroy(qp, &qp->rdma_mrs);
 729         return ret;
 730 }
 731 
 732 void rdma_rw_cleanup_mrs(struct ib_qp *qp)
 733 {
 734         ib_mr_pool_destroy(qp, &qp->sig_mrs);
 735         ib_mr_pool_destroy(qp, &qp->rdma_mrs);
 736 }

/* [<][>][^][v][top][bottom][index][help] */