root/drivers/infiniband/core/verbs.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ib_event_msg
  2. ib_wc_status_msg
  3. ib_rate_to_mult
  4. mult_to_ib_rate
  5. ib_rate_to_mbps
  6. rdma_node_get_transport
  7. rdma_port_get_link_layer
  8. __ib_alloc_pd
  9. ib_dealloc_pd_user
  10. rdma_copy_ah_attr
  11. rdma_replace_ah_attr
  12. rdma_move_ah_attr
  13. rdma_check_ah_attr
  14. rdma_fill_sgid_attr
  15. rdma_unfill_sgid_attr
  16. rdma_update_sgid_attr
  17. _rdma_create_ah
  18. rdma_create_ah
  19. rdma_create_user_ah
  20. ib_get_rdma_header_version
  21. ib_get_net_type_by_grh
  22. find_gid_index
  23. get_sgid_attr_from_eth
  24. ib_get_gids_from_rdma_hdr
  25. ib_resolve_unicast_gid_dmac
  26. ib_init_ah_attr_from_wc
  27. rdma_move_grh_sgid_attr
  28. rdma_destroy_ah_attr
  29. ib_create_ah_from_wc
  30. rdma_modify_ah
  31. rdma_query_ah
  32. rdma_destroy_ah_user
  33. ib_create_srq
  34. ib_modify_srq
  35. ib_query_srq
  36. ib_destroy_srq_user
  37. __ib_shared_qp_event_handler
  38. __ib_insert_xrcd_qp
  39. __ib_open_qp
  40. ib_open_qp
  41. create_xrc_qp_user
  42. ib_create_qp_user
  43. ib_modify_qp_is_ok
  44. ib_resolve_eth_dmac
  45. is_qp_type_connected
  46. _ib_modify_qp
  47. ib_modify_qp_with_udata
  48. ib_get_eth_speed
  49. ib_modify_qp
  50. ib_query_qp
  51. ib_close_qp
  52. __ib_destroy_shared_qp
  53. ib_destroy_qp_user
  54. __ib_create_cq
  55. rdma_set_cq_moderation
  56. ib_destroy_cq_user
  57. ib_resize_cq
  58. ib_dereg_mr_user
  59. ib_alloc_mr_user
  60. ib_alloc_mr_integrity
  61. ib_alloc_fmr
  62. ib_unmap_fmr
  63. ib_dealloc_fmr
  64. is_valid_mcast_lid
  65. ib_attach_mcast
  66. ib_detach_mcast
  67. __ib_alloc_xrcd
  68. ib_dealloc_xrcd
  69. ib_create_wq
  70. ib_destroy_wq
  71. ib_modify_wq
  72. ib_create_rwq_ind_table
  73. ib_destroy_rwq_ind_table
  74. ib_check_mr_status
  75. ib_set_vf_link_state
  76. ib_get_vf_config
  77. ib_get_vf_stats
  78. ib_set_vf_guid
  79. ib_map_mr_sg_pi
  80. ib_map_mr_sg
  81. ib_sg_to_pages
  82. ib_drain_qp_done
  83. __ib_drain_sq
  84. __ib_drain_rq
  85. ib_drain_sq
  86. ib_drain_rq
  87. ib_drain_qp
  88. rdma_alloc_netdev
  89. rdma_init_netdev
  90. __rdma_block_iter_start
  91. __rdma_block_iter_next

   1 /*
   2  * Copyright (c) 2004 Mellanox Technologies Ltd.  All rights reserved.
   3  * Copyright (c) 2004 Infinicon Corporation.  All rights reserved.
   4  * Copyright (c) 2004 Intel Corporation.  All rights reserved.
   5  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
   6  * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
   7  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
   8  * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
   9  *
  10  * This software is available to you under a choice of one of two
  11  * licenses.  You may choose to be licensed under the terms of the GNU
  12  * General Public License (GPL) Version 2, available from the file
  13  * COPYING in the main directory of this source tree, or the
  14  * OpenIB.org BSD license below:
  15  *
  16  *     Redistribution and use in source and binary forms, with or
  17  *     without modification, are permitted provided that the following
  18  *     conditions are met:
  19  *
  20  *      - Redistributions of source code must retain the above
  21  *        copyright notice, this list of conditions and the following
  22  *        disclaimer.
  23  *
  24  *      - Redistributions in binary form must reproduce the above
  25  *        copyright notice, this list of conditions and the following
  26  *        disclaimer in the documentation and/or other materials
  27  *        provided with the distribution.
  28  *
  29  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  30  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  31  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  32  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  33  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  34  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  35  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  36  * SOFTWARE.
  37  */
  38 
  39 #include <linux/errno.h>
  40 #include <linux/err.h>
  41 #include <linux/export.h>
  42 #include <linux/string.h>
  43 #include <linux/slab.h>
  44 #include <linux/in.h>
  45 #include <linux/in6.h>
  46 #include <net/addrconf.h>
  47 #include <linux/security.h>
  48 
  49 #include <rdma/ib_verbs.h>
  50 #include <rdma/ib_cache.h>
  51 #include <rdma/ib_addr.h>
  52 #include <rdma/rw.h>
  53 
  54 #include "core_priv.h"
  55 
  56 static int ib_resolve_eth_dmac(struct ib_device *device,
  57                                struct rdma_ah_attr *ah_attr);
  58 
  59 static const char * const ib_events[] = {
  60         [IB_EVENT_CQ_ERR]               = "CQ error",
  61         [IB_EVENT_QP_FATAL]             = "QP fatal error",
  62         [IB_EVENT_QP_REQ_ERR]           = "QP request error",
  63         [IB_EVENT_QP_ACCESS_ERR]        = "QP access error",
  64         [IB_EVENT_COMM_EST]             = "communication established",
  65         [IB_EVENT_SQ_DRAINED]           = "send queue drained",
  66         [IB_EVENT_PATH_MIG]             = "path migration successful",
  67         [IB_EVENT_PATH_MIG_ERR]         = "path migration error",
  68         [IB_EVENT_DEVICE_FATAL]         = "device fatal error",
  69         [IB_EVENT_PORT_ACTIVE]          = "port active",
  70         [IB_EVENT_PORT_ERR]             = "port error",
  71         [IB_EVENT_LID_CHANGE]           = "LID change",
  72         [IB_EVENT_PKEY_CHANGE]          = "P_key change",
  73         [IB_EVENT_SM_CHANGE]            = "SM change",
  74         [IB_EVENT_SRQ_ERR]              = "SRQ error",
  75         [IB_EVENT_SRQ_LIMIT_REACHED]    = "SRQ limit reached",
  76         [IB_EVENT_QP_LAST_WQE_REACHED]  = "last WQE reached",
  77         [IB_EVENT_CLIENT_REREGISTER]    = "client reregister",
  78         [IB_EVENT_GID_CHANGE]           = "GID changed",
  79 };
  80 
  81 const char *__attribute_const__ ib_event_msg(enum ib_event_type event)
  82 {
  83         size_t index = event;
  84 
  85         return (index < ARRAY_SIZE(ib_events) && ib_events[index]) ?
  86                         ib_events[index] : "unrecognized event";
  87 }
  88 EXPORT_SYMBOL(ib_event_msg);
  89 
  90 static const char * const wc_statuses[] = {
  91         [IB_WC_SUCCESS]                 = "success",
  92         [IB_WC_LOC_LEN_ERR]             = "local length error",
  93         [IB_WC_LOC_QP_OP_ERR]           = "local QP operation error",
  94         [IB_WC_LOC_EEC_OP_ERR]          = "local EE context operation error",
  95         [IB_WC_LOC_PROT_ERR]            = "local protection error",
  96         [IB_WC_WR_FLUSH_ERR]            = "WR flushed",
  97         [IB_WC_MW_BIND_ERR]             = "memory management operation error",
  98         [IB_WC_BAD_RESP_ERR]            = "bad response error",
  99         [IB_WC_LOC_ACCESS_ERR]          = "local access error",
 100         [IB_WC_REM_INV_REQ_ERR]         = "invalid request error",
 101         [IB_WC_REM_ACCESS_ERR]          = "remote access error",
 102         [IB_WC_REM_OP_ERR]              = "remote operation error",
 103         [IB_WC_RETRY_EXC_ERR]           = "transport retry counter exceeded",
 104         [IB_WC_RNR_RETRY_EXC_ERR]       = "RNR retry counter exceeded",
 105         [IB_WC_LOC_RDD_VIOL_ERR]        = "local RDD violation error",
 106         [IB_WC_REM_INV_RD_REQ_ERR]      = "remote invalid RD request",
 107         [IB_WC_REM_ABORT_ERR]           = "operation aborted",
 108         [IB_WC_INV_EECN_ERR]            = "invalid EE context number",
 109         [IB_WC_INV_EEC_STATE_ERR]       = "invalid EE context state",
 110         [IB_WC_FATAL_ERR]               = "fatal error",
 111         [IB_WC_RESP_TIMEOUT_ERR]        = "response timeout error",
 112         [IB_WC_GENERAL_ERR]             = "general error",
 113 };
 114 
 115 const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status)
 116 {
 117         size_t index = status;
 118 
 119         return (index < ARRAY_SIZE(wc_statuses) && wc_statuses[index]) ?
 120                         wc_statuses[index] : "unrecognized status";
 121 }
 122 EXPORT_SYMBOL(ib_wc_status_msg);
 123 
 124 __attribute_const__ int ib_rate_to_mult(enum ib_rate rate)
 125 {
 126         switch (rate) {
 127         case IB_RATE_2_5_GBPS: return   1;
 128         case IB_RATE_5_GBPS:   return   2;
 129         case IB_RATE_10_GBPS:  return   4;
 130         case IB_RATE_20_GBPS:  return   8;
 131         case IB_RATE_30_GBPS:  return  12;
 132         case IB_RATE_40_GBPS:  return  16;
 133         case IB_RATE_60_GBPS:  return  24;
 134         case IB_RATE_80_GBPS:  return  32;
 135         case IB_RATE_120_GBPS: return  48;
 136         case IB_RATE_14_GBPS:  return   6;
 137         case IB_RATE_56_GBPS:  return  22;
 138         case IB_RATE_112_GBPS: return  45;
 139         case IB_RATE_168_GBPS: return  67;
 140         case IB_RATE_25_GBPS:  return  10;
 141         case IB_RATE_100_GBPS: return  40;
 142         case IB_RATE_200_GBPS: return  80;
 143         case IB_RATE_300_GBPS: return 120;
 144         case IB_RATE_28_GBPS:  return  11;
 145         case IB_RATE_50_GBPS:  return  20;
 146         case IB_RATE_400_GBPS: return 160;
 147         case IB_RATE_600_GBPS: return 240;
 148         default:               return  -1;
 149         }
 150 }
 151 EXPORT_SYMBOL(ib_rate_to_mult);
 152 
 153 __attribute_const__ enum ib_rate mult_to_ib_rate(int mult)
 154 {
 155         switch (mult) {
 156         case 1:   return IB_RATE_2_5_GBPS;
 157         case 2:   return IB_RATE_5_GBPS;
 158         case 4:   return IB_RATE_10_GBPS;
 159         case 8:   return IB_RATE_20_GBPS;
 160         case 12:  return IB_RATE_30_GBPS;
 161         case 16:  return IB_RATE_40_GBPS;
 162         case 24:  return IB_RATE_60_GBPS;
 163         case 32:  return IB_RATE_80_GBPS;
 164         case 48:  return IB_RATE_120_GBPS;
 165         case 6:   return IB_RATE_14_GBPS;
 166         case 22:  return IB_RATE_56_GBPS;
 167         case 45:  return IB_RATE_112_GBPS;
 168         case 67:  return IB_RATE_168_GBPS;
 169         case 10:  return IB_RATE_25_GBPS;
 170         case 40:  return IB_RATE_100_GBPS;
 171         case 80:  return IB_RATE_200_GBPS;
 172         case 120: return IB_RATE_300_GBPS;
 173         case 11:  return IB_RATE_28_GBPS;
 174         case 20:  return IB_RATE_50_GBPS;
 175         case 160: return IB_RATE_400_GBPS;
 176         case 240: return IB_RATE_600_GBPS;
 177         default:  return IB_RATE_PORT_CURRENT;
 178         }
 179 }
 180 EXPORT_SYMBOL(mult_to_ib_rate);
 181 
 182 __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate)
 183 {
 184         switch (rate) {
 185         case IB_RATE_2_5_GBPS: return 2500;
 186         case IB_RATE_5_GBPS:   return 5000;
 187         case IB_RATE_10_GBPS:  return 10000;
 188         case IB_RATE_20_GBPS:  return 20000;
 189         case IB_RATE_30_GBPS:  return 30000;
 190         case IB_RATE_40_GBPS:  return 40000;
 191         case IB_RATE_60_GBPS:  return 60000;
 192         case IB_RATE_80_GBPS:  return 80000;
 193         case IB_RATE_120_GBPS: return 120000;
 194         case IB_RATE_14_GBPS:  return 14062;
 195         case IB_RATE_56_GBPS:  return 56250;
 196         case IB_RATE_112_GBPS: return 112500;
 197         case IB_RATE_168_GBPS: return 168750;
 198         case IB_RATE_25_GBPS:  return 25781;
 199         case IB_RATE_100_GBPS: return 103125;
 200         case IB_RATE_200_GBPS: return 206250;
 201         case IB_RATE_300_GBPS: return 309375;
 202         case IB_RATE_28_GBPS:  return 28125;
 203         case IB_RATE_50_GBPS:  return 53125;
 204         case IB_RATE_400_GBPS: return 425000;
 205         case IB_RATE_600_GBPS: return 637500;
 206         default:               return -1;
 207         }
 208 }
 209 EXPORT_SYMBOL(ib_rate_to_mbps);
 210 
 211 __attribute_const__ enum rdma_transport_type
 212 rdma_node_get_transport(unsigned int node_type)
 213 {
 214 
 215         if (node_type == RDMA_NODE_USNIC)
 216                 return RDMA_TRANSPORT_USNIC;
 217         if (node_type == RDMA_NODE_USNIC_UDP)
 218                 return RDMA_TRANSPORT_USNIC_UDP;
 219         if (node_type == RDMA_NODE_RNIC)
 220                 return RDMA_TRANSPORT_IWARP;
 221         if (node_type == RDMA_NODE_UNSPECIFIED)
 222                 return RDMA_TRANSPORT_UNSPECIFIED;
 223 
 224         return RDMA_TRANSPORT_IB;
 225 }
 226 EXPORT_SYMBOL(rdma_node_get_transport);
 227 
 228 enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num)
 229 {
 230         enum rdma_transport_type lt;
 231         if (device->ops.get_link_layer)
 232                 return device->ops.get_link_layer(device, port_num);
 233 
 234         lt = rdma_node_get_transport(device->node_type);
 235         if (lt == RDMA_TRANSPORT_IB)
 236                 return IB_LINK_LAYER_INFINIBAND;
 237 
 238         return IB_LINK_LAYER_ETHERNET;
 239 }
 240 EXPORT_SYMBOL(rdma_port_get_link_layer);
 241 
 242 /* Protection domains */
 243 
 244 /**
 245  * ib_alloc_pd - Allocates an unused protection domain.
 246  * @device: The device on which to allocate the protection domain.
 247  *
 248  * A protection domain object provides an association between QPs, shared
 249  * receive queues, address handles, memory regions, and memory windows.
 250  *
 251  * Every PD has a local_dma_lkey which can be used as the lkey value for local
 252  * memory operations.
 253  */
 254 struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
 255                 const char *caller)
 256 {
 257         struct ib_pd *pd;
 258         int mr_access_flags = 0;
 259         int ret;
 260 
 261         pd = rdma_zalloc_drv_obj(device, ib_pd);
 262         if (!pd)
 263                 return ERR_PTR(-ENOMEM);
 264 
 265         pd->device = device;
 266         pd->uobject = NULL;
 267         pd->__internal_mr = NULL;
 268         atomic_set(&pd->usecnt, 0);
 269         pd->flags = flags;
 270 
 271         pd->res.type = RDMA_RESTRACK_PD;
 272         rdma_restrack_set_task(&pd->res, caller);
 273 
 274         ret = device->ops.alloc_pd(pd, NULL);
 275         if (ret) {
 276                 kfree(pd);
 277                 return ERR_PTR(ret);
 278         }
 279         rdma_restrack_kadd(&pd->res);
 280 
 281         if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
 282                 pd->local_dma_lkey = device->local_dma_lkey;
 283         else
 284                 mr_access_flags |= IB_ACCESS_LOCAL_WRITE;
 285 
 286         if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
 287                 pr_warn("%s: enabling unsafe global rkey\n", caller);
 288                 mr_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE;
 289         }
 290 
 291         if (mr_access_flags) {
 292                 struct ib_mr *mr;
 293 
 294                 mr = pd->device->ops.get_dma_mr(pd, mr_access_flags);
 295                 if (IS_ERR(mr)) {
 296                         ib_dealloc_pd(pd);
 297                         return ERR_CAST(mr);
 298                 }
 299 
 300                 mr->device      = pd->device;
 301                 mr->pd          = pd;
 302                 mr->type        = IB_MR_TYPE_DMA;
 303                 mr->uobject     = NULL;
 304                 mr->need_inval  = false;
 305 
 306                 pd->__internal_mr = mr;
 307 
 308                 if (!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY))
 309                         pd->local_dma_lkey = pd->__internal_mr->lkey;
 310 
 311                 if (flags & IB_PD_UNSAFE_GLOBAL_RKEY)
 312                         pd->unsafe_global_rkey = pd->__internal_mr->rkey;
 313         }
 314 
 315         return pd;
 316 }
 317 EXPORT_SYMBOL(__ib_alloc_pd);
 318 
 319 /**
 320  * ib_dealloc_pd_user - Deallocates a protection domain.
 321  * @pd: The protection domain to deallocate.
 322  * @udata: Valid user data or NULL for kernel object
 323  *
 324  * It is an error to call this function while any resources in the pd still
 325  * exist.  The caller is responsible to synchronously destroy them and
 326  * guarantee no new allocations will happen.
 327  */
 328 void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata)
 329 {
 330         int ret;
 331 
 332         if (pd->__internal_mr) {
 333                 ret = pd->device->ops.dereg_mr(pd->__internal_mr, NULL);
 334                 WARN_ON(ret);
 335                 pd->__internal_mr = NULL;
 336         }
 337 
 338         /* uverbs manipulates usecnt with proper locking, while the kabi
 339            requires the caller to guarantee we can't race here. */
 340         WARN_ON(atomic_read(&pd->usecnt));
 341 
 342         rdma_restrack_del(&pd->res);
 343         pd->device->ops.dealloc_pd(pd, udata);
 344         kfree(pd);
 345 }
 346 EXPORT_SYMBOL(ib_dealloc_pd_user);
 347 
 348 /* Address handles */
 349 
 350 /**
 351  * rdma_copy_ah_attr - Copy rdma ah attribute from source to destination.
 352  * @dest:       Pointer to destination ah_attr. Contents of the destination
 353  *              pointer is assumed to be invalid and attribute are overwritten.
 354  * @src:        Pointer to source ah_attr.
 355  */
 356 void rdma_copy_ah_attr(struct rdma_ah_attr *dest,
 357                        const struct rdma_ah_attr *src)
 358 {
 359         *dest = *src;
 360         if (dest->grh.sgid_attr)
 361                 rdma_hold_gid_attr(dest->grh.sgid_attr);
 362 }
 363 EXPORT_SYMBOL(rdma_copy_ah_attr);
 364 
 365 /**
 366  * rdma_replace_ah_attr - Replace valid ah_attr with new new one.
 367  * @old:        Pointer to existing ah_attr which needs to be replaced.
 368  *              old is assumed to be valid or zero'd
 369  * @new:        Pointer to the new ah_attr.
 370  *
 371  * rdma_replace_ah_attr() first releases any reference in the old ah_attr if
 372  * old the ah_attr is valid; after that it copies the new attribute and holds
 373  * the reference to the replaced ah_attr.
 374  */
 375 void rdma_replace_ah_attr(struct rdma_ah_attr *old,
 376                           const struct rdma_ah_attr *new)
 377 {
 378         rdma_destroy_ah_attr(old);
 379         *old = *new;
 380         if (old->grh.sgid_attr)
 381                 rdma_hold_gid_attr(old->grh.sgid_attr);
 382 }
 383 EXPORT_SYMBOL(rdma_replace_ah_attr);
 384 
 385 /**
 386  * rdma_move_ah_attr - Move ah_attr pointed by source to destination.
 387  * @dest:       Pointer to destination ah_attr to copy to.
 388  *              dest is assumed to be valid or zero'd
 389  * @src:        Pointer to the new ah_attr.
 390  *
 391  * rdma_move_ah_attr() first releases any reference in the destination ah_attr
 392  * if it is valid. This also transfers ownership of internal references from
 393  * src to dest, making src invalid in the process. No new reference of the src
 394  * ah_attr is taken.
 395  */
 396 void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src)
 397 {
 398         rdma_destroy_ah_attr(dest);
 399         *dest = *src;
 400         src->grh.sgid_attr = NULL;
 401 }
 402 EXPORT_SYMBOL(rdma_move_ah_attr);
 403 
 404 /*
 405  * Validate that the rdma_ah_attr is valid for the device before passing it
 406  * off to the driver.
 407  */
 408 static int rdma_check_ah_attr(struct ib_device *device,
 409                               struct rdma_ah_attr *ah_attr)
 410 {
 411         if (!rdma_is_port_valid(device, ah_attr->port_num))
 412                 return -EINVAL;
 413 
 414         if ((rdma_is_grh_required(device, ah_attr->port_num) ||
 415              ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) &&
 416             !(ah_attr->ah_flags & IB_AH_GRH))
 417                 return -EINVAL;
 418 
 419         if (ah_attr->grh.sgid_attr) {
 420                 /*
 421                  * Make sure the passed sgid_attr is consistent with the
 422                  * parameters
 423                  */
 424                 if (ah_attr->grh.sgid_attr->index != ah_attr->grh.sgid_index ||
 425                     ah_attr->grh.sgid_attr->port_num != ah_attr->port_num)
 426                         return -EINVAL;
 427         }
 428         return 0;
 429 }
 430 
 431 /*
 432  * If the ah requires a GRH then ensure that sgid_attr pointer is filled in.
 433  * On success the caller is responsible to call rdma_unfill_sgid_attr().
 434  */
 435 static int rdma_fill_sgid_attr(struct ib_device *device,
 436                                struct rdma_ah_attr *ah_attr,
 437                                const struct ib_gid_attr **old_sgid_attr)
 438 {
 439         const struct ib_gid_attr *sgid_attr;
 440         struct ib_global_route *grh;
 441         int ret;
 442 
 443         *old_sgid_attr = ah_attr->grh.sgid_attr;
 444 
 445         ret = rdma_check_ah_attr(device, ah_attr);
 446         if (ret)
 447                 return ret;
 448 
 449         if (!(ah_attr->ah_flags & IB_AH_GRH))
 450                 return 0;
 451 
 452         grh = rdma_ah_retrieve_grh(ah_attr);
 453         if (grh->sgid_attr)
 454                 return 0;
 455 
 456         sgid_attr =
 457                 rdma_get_gid_attr(device, ah_attr->port_num, grh->sgid_index);
 458         if (IS_ERR(sgid_attr))
 459                 return PTR_ERR(sgid_attr);
 460 
 461         /* Move ownerhip of the kref into the ah_attr */
 462         grh->sgid_attr = sgid_attr;
 463         return 0;
 464 }
 465 
 466 static void rdma_unfill_sgid_attr(struct rdma_ah_attr *ah_attr,
 467                                   const struct ib_gid_attr *old_sgid_attr)
 468 {
 469         /*
 470          * Fill didn't change anything, the caller retains ownership of
 471          * whatever it passed
 472          */
 473         if (ah_attr->grh.sgid_attr == old_sgid_attr)
 474                 return;
 475 
 476         /*
 477          * Otherwise, we need to undo what rdma_fill_sgid_attr so the caller
 478          * doesn't see any change in the rdma_ah_attr. If we get here
 479          * old_sgid_attr is NULL.
 480          */
 481         rdma_destroy_ah_attr(ah_attr);
 482 }
 483 
 484 static const struct ib_gid_attr *
 485 rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr,
 486                       const struct ib_gid_attr *old_attr)
 487 {
 488         if (old_attr)
 489                 rdma_put_gid_attr(old_attr);
 490         if (ah_attr->ah_flags & IB_AH_GRH) {
 491                 rdma_hold_gid_attr(ah_attr->grh.sgid_attr);
 492                 return ah_attr->grh.sgid_attr;
 493         }
 494         return NULL;
 495 }
 496 
 497 static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
 498                                      struct rdma_ah_attr *ah_attr,
 499                                      u32 flags,
 500                                      struct ib_udata *udata)
 501 {
 502         struct ib_device *device = pd->device;
 503         struct ib_ah *ah;
 504         int ret;
 505 
 506         might_sleep_if(flags & RDMA_CREATE_AH_SLEEPABLE);
 507 
 508         if (!device->ops.create_ah)
 509                 return ERR_PTR(-EOPNOTSUPP);
 510 
 511         ah = rdma_zalloc_drv_obj_gfp(
 512                 device, ib_ah,
 513                 (flags & RDMA_CREATE_AH_SLEEPABLE) ? GFP_KERNEL : GFP_ATOMIC);
 514         if (!ah)
 515                 return ERR_PTR(-ENOMEM);
 516 
 517         ah->device = device;
 518         ah->pd = pd;
 519         ah->type = ah_attr->type;
 520         ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL);
 521 
 522         ret = device->ops.create_ah(ah, ah_attr, flags, udata);
 523         if (ret) {
 524                 kfree(ah);
 525                 return ERR_PTR(ret);
 526         }
 527 
 528         atomic_inc(&pd->usecnt);
 529         return ah;
 530 }
 531 
 532 /**
 533  * rdma_create_ah - Creates an address handle for the
 534  * given address vector.
 535  * @pd: The protection domain associated with the address handle.
 536  * @ah_attr: The attributes of the address vector.
 537  * @flags: Create address handle flags (see enum rdma_create_ah_flags).
 538  *
 539  * It returns 0 on success and returns appropriate error code on error.
 540  * The address handle is used to reference a local or global destination
 541  * in all UD QP post sends.
 542  */
 543 struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
 544                              u32 flags)
 545 {
 546         const struct ib_gid_attr *old_sgid_attr;
 547         struct ib_ah *ah;
 548         int ret;
 549 
 550         ret = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr);
 551         if (ret)
 552                 return ERR_PTR(ret);
 553 
 554         ah = _rdma_create_ah(pd, ah_attr, flags, NULL);
 555 
 556         rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
 557         return ah;
 558 }
 559 EXPORT_SYMBOL(rdma_create_ah);
 560 
 561 /**
 562  * rdma_create_user_ah - Creates an address handle for the
 563  * given address vector.
 564  * It resolves destination mac address for ah attribute of RoCE type.
 565  * @pd: The protection domain associated with the address handle.
 566  * @ah_attr: The attributes of the address vector.
 567  * @udata: pointer to user's input output buffer information need by
 568  *         provider driver.
 569  *
 570  * It returns 0 on success and returns appropriate error code on error.
 571  * The address handle is used to reference a local or global destination
 572  * in all UD QP post sends.
 573  */
 574 struct ib_ah *rdma_create_user_ah(struct ib_pd *pd,
 575                                   struct rdma_ah_attr *ah_attr,
 576                                   struct ib_udata *udata)
 577 {
 578         const struct ib_gid_attr *old_sgid_attr;
 579         struct ib_ah *ah;
 580         int err;
 581 
 582         err = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr);
 583         if (err)
 584                 return ERR_PTR(err);
 585 
 586         if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
 587                 err = ib_resolve_eth_dmac(pd->device, ah_attr);
 588                 if (err) {
 589                         ah = ERR_PTR(err);
 590                         goto out;
 591                 }
 592         }
 593 
 594         ah = _rdma_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE, udata);
 595 
 596 out:
 597         rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
 598         return ah;
 599 }
 600 EXPORT_SYMBOL(rdma_create_user_ah);
 601 
 602 int ib_get_rdma_header_version(const union rdma_network_hdr *hdr)
 603 {
 604         const struct iphdr *ip4h = (struct iphdr *)&hdr->roce4grh;
 605         struct iphdr ip4h_checked;
 606         const struct ipv6hdr *ip6h = (struct ipv6hdr *)&hdr->ibgrh;
 607 
 608         /* If it's IPv6, the version must be 6, otherwise, the first
 609          * 20 bytes (before the IPv4 header) are garbled.
 610          */
 611         if (ip6h->version != 6)
 612                 return (ip4h->version == 4) ? 4 : 0;
 613         /* version may be 6 or 4 because the first 20 bytes could be garbled */
 614 
 615         /* RoCE v2 requires no options, thus header length
 616          * must be 5 words
 617          */
 618         if (ip4h->ihl != 5)
 619                 return 6;
 620 
 621         /* Verify checksum.
 622          * We can't write on scattered buffers so we need to copy to
 623          * temp buffer.
 624          */
 625         memcpy(&ip4h_checked, ip4h, sizeof(ip4h_checked));
 626         ip4h_checked.check = 0;
 627         ip4h_checked.check = ip_fast_csum((u8 *)&ip4h_checked, 5);
 628         /* if IPv4 header checksum is OK, believe it */
 629         if (ip4h->check == ip4h_checked.check)
 630                 return 4;
 631         return 6;
 632 }
 633 EXPORT_SYMBOL(ib_get_rdma_header_version);
 634 
 635 static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device,
 636                                                      u8 port_num,
 637                                                      const struct ib_grh *grh)
 638 {
 639         int grh_version;
 640 
 641         if (rdma_protocol_ib(device, port_num))
 642                 return RDMA_NETWORK_IB;
 643 
 644         grh_version = ib_get_rdma_header_version((union rdma_network_hdr *)grh);
 645 
 646         if (grh_version == 4)
 647                 return RDMA_NETWORK_IPV4;
 648 
 649         if (grh->next_hdr == IPPROTO_UDP)
 650                 return RDMA_NETWORK_IPV6;
 651 
 652         return RDMA_NETWORK_ROCE_V1;
 653 }
 654 
 655 struct find_gid_index_context {
 656         u16 vlan_id;
 657         enum ib_gid_type gid_type;
 658 };
 659 
 660 static bool find_gid_index(const union ib_gid *gid,
 661                            const struct ib_gid_attr *gid_attr,
 662                            void *context)
 663 {
 664         struct find_gid_index_context *ctx = context;
 665         u16 vlan_id = 0xffff;
 666         int ret;
 667 
 668         if (ctx->gid_type != gid_attr->gid_type)
 669                 return false;
 670 
 671         ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL);
 672         if (ret)
 673                 return false;
 674 
 675         return ctx->vlan_id == vlan_id;
 676 }
 677 
 678 static const struct ib_gid_attr *
 679 get_sgid_attr_from_eth(struct ib_device *device, u8 port_num,
 680                        u16 vlan_id, const union ib_gid *sgid,
 681                        enum ib_gid_type gid_type)
 682 {
 683         struct find_gid_index_context context = {.vlan_id = vlan_id,
 684                                                  .gid_type = gid_type};
 685 
 686         return rdma_find_gid_by_filter(device, sgid, port_num, find_gid_index,
 687                                        &context);
 688 }
 689 
 690 int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
 691                               enum rdma_network_type net_type,
 692                               union ib_gid *sgid, union ib_gid *dgid)
 693 {
 694         struct sockaddr_in  src_in;
 695         struct sockaddr_in  dst_in;
 696         __be32 src_saddr, dst_saddr;
 697 
 698         if (!sgid || !dgid)
 699                 return -EINVAL;
 700 
 701         if (net_type == RDMA_NETWORK_IPV4) {
 702                 memcpy(&src_in.sin_addr.s_addr,
 703                        &hdr->roce4grh.saddr, 4);
 704                 memcpy(&dst_in.sin_addr.s_addr,
 705                        &hdr->roce4grh.daddr, 4);
 706                 src_saddr = src_in.sin_addr.s_addr;
 707                 dst_saddr = dst_in.sin_addr.s_addr;
 708                 ipv6_addr_set_v4mapped(src_saddr,
 709                                        (struct in6_addr *)sgid);
 710                 ipv6_addr_set_v4mapped(dst_saddr,
 711                                        (struct in6_addr *)dgid);
 712                 return 0;
 713         } else if (net_type == RDMA_NETWORK_IPV6 ||
 714                    net_type == RDMA_NETWORK_IB) {
 715                 *dgid = hdr->ibgrh.dgid;
 716                 *sgid = hdr->ibgrh.sgid;
 717                 return 0;
 718         } else {
 719                 return -EINVAL;
 720         }
 721 }
 722 EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
 723 
 724 /* Resolve destination mac address and hop limit for unicast destination
 725  * GID entry, considering the source GID entry as well.
 726  * ah_attribute must have have valid port_num, sgid_index.
 727  */
 728 static int ib_resolve_unicast_gid_dmac(struct ib_device *device,
 729                                        struct rdma_ah_attr *ah_attr)
 730 {
 731         struct ib_global_route *grh = rdma_ah_retrieve_grh(ah_attr);
 732         const struct ib_gid_attr *sgid_attr = grh->sgid_attr;
 733         int hop_limit = 0xff;
 734         int ret = 0;
 735 
 736         /* If destination is link local and source GID is RoCEv1,
 737          * IP stack is not used.
 738          */
 739         if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw) &&
 740             sgid_attr->gid_type == IB_GID_TYPE_ROCE) {
 741                 rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
 742                                 ah_attr->roce.dmac);
 743                 return ret;
 744         }
 745 
 746         ret = rdma_addr_find_l2_eth_by_grh(&sgid_attr->gid, &grh->dgid,
 747                                            ah_attr->roce.dmac,
 748                                            sgid_attr, &hop_limit);
 749 
 750         grh->hop_limit = hop_limit;
 751         return ret;
 752 }
 753 
 754 /*
 755  * This function initializes address handle attributes from the incoming packet.
 756  * Incoming packet has dgid of the receiver node on which this code is
 757  * getting executed and, sgid contains the GID of the sender.
 758  *
 759  * When resolving mac address of destination, the arrived dgid is used
 760  * as sgid and, sgid is used as dgid because sgid contains destinations
 761  * GID whom to respond to.
 762  *
 763  * On success the caller is responsible to call rdma_destroy_ah_attr on the
 764  * attr.
 765  */
 766 int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
 767                             const struct ib_wc *wc, const struct ib_grh *grh,
 768                             struct rdma_ah_attr *ah_attr)
 769 {
 770         u32 flow_class;
 771         int ret;
 772         enum rdma_network_type net_type = RDMA_NETWORK_IB;
 773         enum ib_gid_type gid_type = IB_GID_TYPE_IB;
 774         const struct ib_gid_attr *sgid_attr;
 775         int hoplimit = 0xff;
 776         union ib_gid dgid;
 777         union ib_gid sgid;
 778 
 779         might_sleep();
 780 
 781         memset(ah_attr, 0, sizeof *ah_attr);
 782         ah_attr->type = rdma_ah_find_type(device, port_num);
 783         if (rdma_cap_eth_ah(device, port_num)) {
 784                 if (wc->wc_flags & IB_WC_WITH_NETWORK_HDR_TYPE)
 785                         net_type = wc->network_hdr_type;
 786                 else
 787                         net_type = ib_get_net_type_by_grh(device, port_num, grh);
 788                 gid_type = ib_network_to_gid_type(net_type);
 789         }
 790         ret = ib_get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type,
 791                                         &sgid, &dgid);
 792         if (ret)
 793                 return ret;
 794 
 795         rdma_ah_set_sl(ah_attr, wc->sl);
 796         rdma_ah_set_port_num(ah_attr, port_num);
 797 
 798         if (rdma_protocol_roce(device, port_num)) {
 799                 u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
 800                                 wc->vlan_id : 0xffff;
 801 
 802                 if (!(wc->wc_flags & IB_WC_GRH))
 803                         return -EPROTOTYPE;
 804 
 805                 sgid_attr = get_sgid_attr_from_eth(device, port_num,
 806                                                    vlan_id, &dgid,
 807                                                    gid_type);
 808                 if (IS_ERR(sgid_attr))
 809                         return PTR_ERR(sgid_attr);
 810 
 811                 flow_class = be32_to_cpu(grh->version_tclass_flow);
 812                 rdma_move_grh_sgid_attr(ah_attr,
 813                                         &sgid,
 814                                         flow_class & 0xFFFFF,
 815                                         hoplimit,
 816                                         (flow_class >> 20) & 0xFF,
 817                                         sgid_attr);
 818 
 819                 ret = ib_resolve_unicast_gid_dmac(device, ah_attr);
 820                 if (ret)
 821                         rdma_destroy_ah_attr(ah_attr);
 822 
 823                 return ret;
 824         } else {
 825                 rdma_ah_set_dlid(ah_attr, wc->slid);
 826                 rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits);
 827 
 828                 if ((wc->wc_flags & IB_WC_GRH) == 0)
 829                         return 0;
 830 
 831                 if (dgid.global.interface_id !=
 832                                         cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
 833                         sgid_attr = rdma_find_gid_by_port(
 834                                 device, &dgid, IB_GID_TYPE_IB, port_num, NULL);
 835                 } else
 836                         sgid_attr = rdma_get_gid_attr(device, port_num, 0);
 837 
 838                 if (IS_ERR(sgid_attr))
 839                         return PTR_ERR(sgid_attr);
 840                 flow_class = be32_to_cpu(grh->version_tclass_flow);
 841                 rdma_move_grh_sgid_attr(ah_attr,
 842                                         &sgid,
 843                                         flow_class & 0xFFFFF,
 844                                         hoplimit,
 845                                         (flow_class >> 20) & 0xFF,
 846                                         sgid_attr);
 847 
 848                 return 0;
 849         }
 850 }
 851 EXPORT_SYMBOL(ib_init_ah_attr_from_wc);
 852 
 853 /**
 854  * rdma_move_grh_sgid_attr - Sets the sgid attribute of GRH, taking ownership
 855  * of the reference
 856  *
 857  * @attr:       Pointer to AH attribute structure
 858  * @dgid:       Destination GID
 859  * @flow_label: Flow label
 860  * @hop_limit:  Hop limit
 861  * @traffic_class: traffic class
 862  * @sgid_attr:  Pointer to SGID attribute
 863  *
 864  * This takes ownership of the sgid_attr reference. The caller must ensure
 865  * rdma_destroy_ah_attr() is called before destroying the rdma_ah_attr after
 866  * calling this function.
 867  */
 868 void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid,
 869                              u32 flow_label, u8 hop_limit, u8 traffic_class,
 870                              const struct ib_gid_attr *sgid_attr)
 871 {
 872         rdma_ah_set_grh(attr, dgid, flow_label, sgid_attr->index, hop_limit,
 873                         traffic_class);
 874         attr->grh.sgid_attr = sgid_attr;
 875 }
 876 EXPORT_SYMBOL(rdma_move_grh_sgid_attr);
 877 
 878 /**
 879  * rdma_destroy_ah_attr - Release reference to SGID attribute of
 880  * ah attribute.
 881  * @ah_attr: Pointer to ah attribute
 882  *
 883  * Release reference to the SGID attribute of the ah attribute if it is
 884  * non NULL. It is safe to call this multiple times, and safe to call it on
 885  * a zero initialized ah_attr.
 886  */
 887 void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr)
 888 {
 889         if (ah_attr->grh.sgid_attr) {
 890                 rdma_put_gid_attr(ah_attr->grh.sgid_attr);
 891                 ah_attr->grh.sgid_attr = NULL;
 892         }
 893 }
 894 EXPORT_SYMBOL(rdma_destroy_ah_attr);
 895 
 896 struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
 897                                    const struct ib_grh *grh, u8 port_num)
 898 {
 899         struct rdma_ah_attr ah_attr;
 900         struct ib_ah *ah;
 901         int ret;
 902 
 903         ret = ib_init_ah_attr_from_wc(pd->device, port_num, wc, grh, &ah_attr);
 904         if (ret)
 905                 return ERR_PTR(ret);
 906 
 907         ah = rdma_create_ah(pd, &ah_attr, RDMA_CREATE_AH_SLEEPABLE);
 908 
 909         rdma_destroy_ah_attr(&ah_attr);
 910         return ah;
 911 }
 912 EXPORT_SYMBOL(ib_create_ah_from_wc);
 913 
 914 int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
 915 {
 916         const struct ib_gid_attr *old_sgid_attr;
 917         int ret;
 918 
 919         if (ah->type != ah_attr->type)
 920                 return -EINVAL;
 921 
 922         ret = rdma_fill_sgid_attr(ah->device, ah_attr, &old_sgid_attr);
 923         if (ret)
 924                 return ret;
 925 
 926         ret = ah->device->ops.modify_ah ?
 927                 ah->device->ops.modify_ah(ah, ah_attr) :
 928                 -EOPNOTSUPP;
 929 
 930         ah->sgid_attr = rdma_update_sgid_attr(ah_attr, ah->sgid_attr);
 931         rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
 932         return ret;
 933 }
 934 EXPORT_SYMBOL(rdma_modify_ah);
 935 
 936 int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
 937 {
 938         ah_attr->grh.sgid_attr = NULL;
 939 
 940         return ah->device->ops.query_ah ?
 941                 ah->device->ops.query_ah(ah, ah_attr) :
 942                 -EOPNOTSUPP;
 943 }
 944 EXPORT_SYMBOL(rdma_query_ah);
 945 
 946 int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata)
 947 {
 948         const struct ib_gid_attr *sgid_attr = ah->sgid_attr;
 949         struct ib_pd *pd;
 950 
 951         might_sleep_if(flags & RDMA_DESTROY_AH_SLEEPABLE);
 952 
 953         pd = ah->pd;
 954 
 955         ah->device->ops.destroy_ah(ah, flags);
 956         atomic_dec(&pd->usecnt);
 957         if (sgid_attr)
 958                 rdma_put_gid_attr(sgid_attr);
 959 
 960         kfree(ah);
 961         return 0;
 962 }
 963 EXPORT_SYMBOL(rdma_destroy_ah_user);
 964 
 965 /* Shared receive queues */
 966 
 967 struct ib_srq *ib_create_srq(struct ib_pd *pd,
 968                              struct ib_srq_init_attr *srq_init_attr)
 969 {
 970         struct ib_srq *srq;
 971         int ret;
 972 
 973         if (!pd->device->ops.create_srq)
 974                 return ERR_PTR(-EOPNOTSUPP);
 975 
 976         srq = rdma_zalloc_drv_obj(pd->device, ib_srq);
 977         if (!srq)
 978                 return ERR_PTR(-ENOMEM);
 979 
 980         srq->device = pd->device;
 981         srq->pd = pd;
 982         srq->event_handler = srq_init_attr->event_handler;
 983         srq->srq_context = srq_init_attr->srq_context;
 984         srq->srq_type = srq_init_attr->srq_type;
 985 
 986         if (ib_srq_has_cq(srq->srq_type)) {
 987                 srq->ext.cq = srq_init_attr->ext.cq;
 988                 atomic_inc(&srq->ext.cq->usecnt);
 989         }
 990         if (srq->srq_type == IB_SRQT_XRC) {
 991                 srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd;
 992                 atomic_inc(&srq->ext.xrc.xrcd->usecnt);
 993         }
 994         atomic_inc(&pd->usecnt);
 995 
 996         ret = pd->device->ops.create_srq(srq, srq_init_attr, NULL);
 997         if (ret) {
 998                 atomic_dec(&srq->pd->usecnt);
 999                 if (srq->srq_type == IB_SRQT_XRC)
1000                         atomic_dec(&srq->ext.xrc.xrcd->usecnt);
1001                 if (ib_srq_has_cq(srq->srq_type))
1002                         atomic_dec(&srq->ext.cq->usecnt);
1003                 kfree(srq);
1004                 return ERR_PTR(ret);
1005         }
1006 
1007         return srq;
1008 }
1009 EXPORT_SYMBOL(ib_create_srq);
1010 
1011 int ib_modify_srq(struct ib_srq *srq,
1012                   struct ib_srq_attr *srq_attr,
1013                   enum ib_srq_attr_mask srq_attr_mask)
1014 {
1015         return srq->device->ops.modify_srq ?
1016                 srq->device->ops.modify_srq(srq, srq_attr, srq_attr_mask,
1017                                             NULL) : -EOPNOTSUPP;
1018 }
1019 EXPORT_SYMBOL(ib_modify_srq);
1020 
1021 int ib_query_srq(struct ib_srq *srq,
1022                  struct ib_srq_attr *srq_attr)
1023 {
1024         return srq->device->ops.query_srq ?
1025                 srq->device->ops.query_srq(srq, srq_attr) : -EOPNOTSUPP;
1026 }
1027 EXPORT_SYMBOL(ib_query_srq);
1028 
1029 int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata)
1030 {
1031         if (atomic_read(&srq->usecnt))
1032                 return -EBUSY;
1033 
1034         srq->device->ops.destroy_srq(srq, udata);
1035 
1036         atomic_dec(&srq->pd->usecnt);
1037         if (srq->srq_type == IB_SRQT_XRC)
1038                 atomic_dec(&srq->ext.xrc.xrcd->usecnt);
1039         if (ib_srq_has_cq(srq->srq_type))
1040                 atomic_dec(&srq->ext.cq->usecnt);
1041         kfree(srq);
1042 
1043         return 0;
1044 }
1045 EXPORT_SYMBOL(ib_destroy_srq_user);
1046 
1047 /* Queue pairs */
1048 
1049 static void __ib_shared_qp_event_handler(struct ib_event *event, void *context)
1050 {
1051         struct ib_qp *qp = context;
1052         unsigned long flags;
1053 
1054         spin_lock_irqsave(&qp->device->event_handler_lock, flags);
1055         list_for_each_entry(event->element.qp, &qp->open_list, open_list)
1056                 if (event->element.qp->event_handler)
1057                         event->element.qp->event_handler(event, event->element.qp->qp_context);
1058         spin_unlock_irqrestore(&qp->device->event_handler_lock, flags);
1059 }
1060 
1061 static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp)
1062 {
1063         mutex_lock(&xrcd->tgt_qp_mutex);
1064         list_add(&qp->xrcd_list, &xrcd->tgt_qp_list);
1065         mutex_unlock(&xrcd->tgt_qp_mutex);
1066 }
1067 
1068 static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp,
1069                                   void (*event_handler)(struct ib_event *, void *),
1070                                   void *qp_context)
1071 {
1072         struct ib_qp *qp;
1073         unsigned long flags;
1074         int err;
1075 
1076         qp = kzalloc(sizeof *qp, GFP_KERNEL);
1077         if (!qp)
1078                 return ERR_PTR(-ENOMEM);
1079 
1080         qp->real_qp = real_qp;
1081         err = ib_open_shared_qp_security(qp, real_qp->device);
1082         if (err) {
1083                 kfree(qp);
1084                 return ERR_PTR(err);
1085         }
1086 
1087         qp->real_qp = real_qp;
1088         atomic_inc(&real_qp->usecnt);
1089         qp->device = real_qp->device;
1090         qp->event_handler = event_handler;
1091         qp->qp_context = qp_context;
1092         qp->qp_num = real_qp->qp_num;
1093         qp->qp_type = real_qp->qp_type;
1094 
1095         spin_lock_irqsave(&real_qp->device->event_handler_lock, flags);
1096         list_add(&qp->open_list, &real_qp->open_list);
1097         spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
1098 
1099         return qp;
1100 }
1101 
1102 struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd,
1103                          struct ib_qp_open_attr *qp_open_attr)
1104 {
1105         struct ib_qp *qp, *real_qp;
1106 
1107         if (qp_open_attr->qp_type != IB_QPT_XRC_TGT)
1108                 return ERR_PTR(-EINVAL);
1109 
1110         qp = ERR_PTR(-EINVAL);
1111         mutex_lock(&xrcd->tgt_qp_mutex);
1112         list_for_each_entry(real_qp, &xrcd->tgt_qp_list, xrcd_list) {
1113                 if (real_qp->qp_num == qp_open_attr->qp_num) {
1114                         qp = __ib_open_qp(real_qp, qp_open_attr->event_handler,
1115                                           qp_open_attr->qp_context);
1116                         break;
1117                 }
1118         }
1119         mutex_unlock(&xrcd->tgt_qp_mutex);
1120         return qp;
1121 }
1122 EXPORT_SYMBOL(ib_open_qp);
1123 
1124 static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp,
1125                                         struct ib_qp_init_attr *qp_init_attr,
1126                                         struct ib_udata *udata)
1127 {
1128         struct ib_qp *real_qp = qp;
1129 
1130         qp->event_handler = __ib_shared_qp_event_handler;
1131         qp->qp_context = qp;
1132         qp->pd = NULL;
1133         qp->send_cq = qp->recv_cq = NULL;
1134         qp->srq = NULL;
1135         qp->xrcd = qp_init_attr->xrcd;
1136         atomic_inc(&qp_init_attr->xrcd->usecnt);
1137         INIT_LIST_HEAD(&qp->open_list);
1138 
1139         qp = __ib_open_qp(real_qp, qp_init_attr->event_handler,
1140                           qp_init_attr->qp_context);
1141         if (IS_ERR(qp))
1142                 return qp;
1143 
1144         __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp);
1145         return qp;
1146 }
1147 
1148 struct ib_qp *ib_create_qp_user(struct ib_pd *pd,
1149                                 struct ib_qp_init_attr *qp_init_attr,
1150                                 struct ib_udata *udata)
1151 {
1152         struct ib_device *device = pd ? pd->device : qp_init_attr->xrcd->device;
1153         struct ib_qp *qp;
1154         int ret;
1155 
1156         if (qp_init_attr->rwq_ind_tbl &&
1157             (qp_init_attr->recv_cq ||
1158             qp_init_attr->srq || qp_init_attr->cap.max_recv_wr ||
1159             qp_init_attr->cap.max_recv_sge))
1160                 return ERR_PTR(-EINVAL);
1161 
1162         if ((qp_init_attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) &&
1163             !(device->attrs.device_cap_flags & IB_DEVICE_INTEGRITY_HANDOVER))
1164                 return ERR_PTR(-EINVAL);
1165 
1166         /*
1167          * If the callers is using the RDMA API calculate the resources
1168          * needed for the RDMA READ/WRITE operations.
1169          *
1170          * Note that these callers need to pass in a port number.
1171          */
1172         if (qp_init_attr->cap.max_rdma_ctxs)
1173                 rdma_rw_init_qp(device, qp_init_attr);
1174 
1175         qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL);
1176         if (IS_ERR(qp))
1177                 return qp;
1178 
1179         ret = ib_create_qp_security(qp, device);
1180         if (ret)
1181                 goto err;
1182 
1183         if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) {
1184                 struct ib_qp *xrc_qp =
1185                         create_xrc_qp_user(qp, qp_init_attr, udata);
1186 
1187                 if (IS_ERR(xrc_qp)) {
1188                         ret = PTR_ERR(xrc_qp);
1189                         goto err;
1190                 }
1191                 return xrc_qp;
1192         }
1193 
1194         qp->event_handler = qp_init_attr->event_handler;
1195         qp->qp_context = qp_init_attr->qp_context;
1196         if (qp_init_attr->qp_type == IB_QPT_XRC_INI) {
1197                 qp->recv_cq = NULL;
1198                 qp->srq = NULL;
1199         } else {
1200                 qp->recv_cq = qp_init_attr->recv_cq;
1201                 if (qp_init_attr->recv_cq)
1202                         atomic_inc(&qp_init_attr->recv_cq->usecnt);
1203                 qp->srq = qp_init_attr->srq;
1204                 if (qp->srq)
1205                         atomic_inc(&qp_init_attr->srq->usecnt);
1206         }
1207 
1208         qp->send_cq = qp_init_attr->send_cq;
1209         qp->xrcd    = NULL;
1210 
1211         atomic_inc(&pd->usecnt);
1212         if (qp_init_attr->send_cq)
1213                 atomic_inc(&qp_init_attr->send_cq->usecnt);
1214         if (qp_init_attr->rwq_ind_tbl)
1215                 atomic_inc(&qp->rwq_ind_tbl->usecnt);
1216 
1217         if (qp_init_attr->cap.max_rdma_ctxs) {
1218                 ret = rdma_rw_init_mrs(qp, qp_init_attr);
1219                 if (ret)
1220                         goto err;
1221         }
1222 
1223         /*
1224          * Note: all hw drivers guarantee that max_send_sge is lower than
1225          * the device RDMA WRITE SGE limit but not all hw drivers ensure that
1226          * max_send_sge <= max_sge_rd.
1227          */
1228         qp->max_write_sge = qp_init_attr->cap.max_send_sge;
1229         qp->max_read_sge = min_t(u32, qp_init_attr->cap.max_send_sge,
1230                                  device->attrs.max_sge_rd);
1231         if (qp_init_attr->create_flags & IB_QP_CREATE_INTEGRITY_EN)
1232                 qp->integrity_en = true;
1233 
1234         return qp;
1235 
1236 err:
1237         ib_destroy_qp(qp);
1238         return ERR_PTR(ret);
1239 
1240 }
1241 EXPORT_SYMBOL(ib_create_qp_user);
1242 
1243 static const struct {
1244         int                     valid;
1245         enum ib_qp_attr_mask    req_param[IB_QPT_MAX];
1246         enum ib_qp_attr_mask    opt_param[IB_QPT_MAX];
1247 } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
1248         [IB_QPS_RESET] = {
1249                 [IB_QPS_RESET] = { .valid = 1 },
1250                 [IB_QPS_INIT]  = {
1251                         .valid = 1,
1252                         .req_param = {
1253                                 [IB_QPT_UD]  = (IB_QP_PKEY_INDEX                |
1254                                                 IB_QP_PORT                      |
1255                                                 IB_QP_QKEY),
1256                                 [IB_QPT_RAW_PACKET] = IB_QP_PORT,
1257                                 [IB_QPT_UC]  = (IB_QP_PKEY_INDEX                |
1258                                                 IB_QP_PORT                      |
1259                                                 IB_QP_ACCESS_FLAGS),
1260                                 [IB_QPT_RC]  = (IB_QP_PKEY_INDEX                |
1261                                                 IB_QP_PORT                      |
1262                                                 IB_QP_ACCESS_FLAGS),
1263                                 [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX            |
1264                                                 IB_QP_PORT                      |
1265                                                 IB_QP_ACCESS_FLAGS),
1266                                 [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX            |
1267                                                 IB_QP_PORT                      |
1268                                                 IB_QP_ACCESS_FLAGS),
1269                                 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX                |
1270                                                 IB_QP_QKEY),
1271                                 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX                |
1272                                                 IB_QP_QKEY),
1273                         }
1274                 },
1275         },
1276         [IB_QPS_INIT]  = {
1277                 [IB_QPS_RESET] = { .valid = 1 },
1278                 [IB_QPS_ERR] =   { .valid = 1 },
1279                 [IB_QPS_INIT]  = {
1280                         .valid = 1,
1281                         .opt_param = {
1282                                 [IB_QPT_UD]  = (IB_QP_PKEY_INDEX                |
1283                                                 IB_QP_PORT                      |
1284                                                 IB_QP_QKEY),
1285                                 [IB_QPT_UC]  = (IB_QP_PKEY_INDEX                |
1286                                                 IB_QP_PORT                      |
1287                                                 IB_QP_ACCESS_FLAGS),
1288                                 [IB_QPT_RC]  = (IB_QP_PKEY_INDEX                |
1289                                                 IB_QP_PORT                      |
1290                                                 IB_QP_ACCESS_FLAGS),
1291                                 [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX            |
1292                                                 IB_QP_PORT                      |
1293                                                 IB_QP_ACCESS_FLAGS),
1294                                 [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX            |
1295                                                 IB_QP_PORT                      |
1296                                                 IB_QP_ACCESS_FLAGS),
1297                                 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX                |
1298                                                 IB_QP_QKEY),
1299                                 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX                |
1300                                                 IB_QP_QKEY),
1301                         }
1302                 },
1303                 [IB_QPS_RTR]   = {
1304                         .valid = 1,
1305                         .req_param = {
1306                                 [IB_QPT_UC]  = (IB_QP_AV                        |
1307                                                 IB_QP_PATH_MTU                  |
1308                                                 IB_QP_DEST_QPN                  |
1309                                                 IB_QP_RQ_PSN),
1310                                 [IB_QPT_RC]  = (IB_QP_AV                        |
1311                                                 IB_QP_PATH_MTU                  |
1312                                                 IB_QP_DEST_QPN                  |
1313                                                 IB_QP_RQ_PSN                    |
1314                                                 IB_QP_MAX_DEST_RD_ATOMIC        |
1315                                                 IB_QP_MIN_RNR_TIMER),
1316                                 [IB_QPT_XRC_INI] = (IB_QP_AV                    |
1317                                                 IB_QP_PATH_MTU                  |
1318                                                 IB_QP_DEST_QPN                  |
1319                                                 IB_QP_RQ_PSN),
1320                                 [IB_QPT_XRC_TGT] = (IB_QP_AV                    |
1321                                                 IB_QP_PATH_MTU                  |
1322                                                 IB_QP_DEST_QPN                  |
1323                                                 IB_QP_RQ_PSN                    |
1324                                                 IB_QP_MAX_DEST_RD_ATOMIC        |
1325                                                 IB_QP_MIN_RNR_TIMER),
1326                         },
1327                         .opt_param = {
1328                                  [IB_QPT_UD]  = (IB_QP_PKEY_INDEX               |
1329                                                  IB_QP_QKEY),
1330                                  [IB_QPT_UC]  = (IB_QP_ALT_PATH                 |
1331                                                  IB_QP_ACCESS_FLAGS             |
1332                                                  IB_QP_PKEY_INDEX),
1333                                  [IB_QPT_RC]  = (IB_QP_ALT_PATH                 |
1334                                                  IB_QP_ACCESS_FLAGS             |
1335                                                  IB_QP_PKEY_INDEX),
1336                                  [IB_QPT_XRC_INI] = (IB_QP_ALT_PATH             |
1337                                                  IB_QP_ACCESS_FLAGS             |
1338                                                  IB_QP_PKEY_INDEX),
1339                                  [IB_QPT_XRC_TGT] = (IB_QP_ALT_PATH             |
1340                                                  IB_QP_ACCESS_FLAGS             |
1341                                                  IB_QP_PKEY_INDEX),
1342                                  [IB_QPT_SMI] = (IB_QP_PKEY_INDEX               |
1343                                                  IB_QP_QKEY),
1344                                  [IB_QPT_GSI] = (IB_QP_PKEY_INDEX               |
1345                                                  IB_QP_QKEY),
1346                          },
1347                 },
1348         },
1349         [IB_QPS_RTR]   = {
1350                 [IB_QPS_RESET] = { .valid = 1 },
1351                 [IB_QPS_ERR] =   { .valid = 1 },
1352                 [IB_QPS_RTS]   = {
1353                         .valid = 1,
1354                         .req_param = {
1355                                 [IB_QPT_UD]  = IB_QP_SQ_PSN,
1356                                 [IB_QPT_UC]  = IB_QP_SQ_PSN,
1357                                 [IB_QPT_RC]  = (IB_QP_TIMEOUT                   |
1358                                                 IB_QP_RETRY_CNT                 |
1359                                                 IB_QP_RNR_RETRY                 |
1360                                                 IB_QP_SQ_PSN                    |
1361                                                 IB_QP_MAX_QP_RD_ATOMIC),
1362                                 [IB_QPT_XRC_INI] = (IB_QP_TIMEOUT               |
1363                                                 IB_QP_RETRY_CNT                 |
1364                                                 IB_QP_RNR_RETRY                 |
1365                                                 IB_QP_SQ_PSN                    |
1366                                                 IB_QP_MAX_QP_RD_ATOMIC),
1367                                 [IB_QPT_XRC_TGT] = (IB_QP_TIMEOUT               |
1368                                                 IB_QP_SQ_PSN),
1369                                 [IB_QPT_SMI] = IB_QP_SQ_PSN,
1370                                 [IB_QPT_GSI] = IB_QP_SQ_PSN,
1371                         },
1372                         .opt_param = {
1373                                  [IB_QPT_UD]  = (IB_QP_CUR_STATE                |
1374                                                  IB_QP_QKEY),
1375                                  [IB_QPT_UC]  = (IB_QP_CUR_STATE                |
1376                                                  IB_QP_ALT_PATH                 |
1377                                                  IB_QP_ACCESS_FLAGS             |
1378                                                  IB_QP_PATH_MIG_STATE),
1379                                  [IB_QPT_RC]  = (IB_QP_CUR_STATE                |
1380                                                  IB_QP_ALT_PATH                 |
1381                                                  IB_QP_ACCESS_FLAGS             |
1382                                                  IB_QP_MIN_RNR_TIMER            |
1383                                                  IB_QP_PATH_MIG_STATE),
1384                                  [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE            |
1385                                                  IB_QP_ALT_PATH                 |
1386                                                  IB_QP_ACCESS_FLAGS             |
1387                                                  IB_QP_PATH_MIG_STATE),
1388                                  [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE            |
1389                                                  IB_QP_ALT_PATH                 |
1390                                                  IB_QP_ACCESS_FLAGS             |
1391                                                  IB_QP_MIN_RNR_TIMER            |
1392                                                  IB_QP_PATH_MIG_STATE),
1393                                  [IB_QPT_SMI] = (IB_QP_CUR_STATE                |
1394                                                  IB_QP_QKEY),
1395                                  [IB_QPT_GSI] = (IB_QP_CUR_STATE                |
1396                                                  IB_QP_QKEY),
1397                                  [IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT,
1398                          }
1399                 }
1400         },
1401         [IB_QPS_RTS]   = {
1402                 [IB_QPS_RESET] = { .valid = 1 },
1403                 [IB_QPS_ERR] =   { .valid = 1 },
1404                 [IB_QPS_RTS]   = {
1405                         .valid = 1,
1406                         .opt_param = {
1407                                 [IB_QPT_UD]  = (IB_QP_CUR_STATE                 |
1408                                                 IB_QP_QKEY),
1409                                 [IB_QPT_UC]  = (IB_QP_CUR_STATE                 |
1410                                                 IB_QP_ACCESS_FLAGS              |
1411                                                 IB_QP_ALT_PATH                  |
1412                                                 IB_QP_PATH_MIG_STATE),
1413                                 [IB_QPT_RC]  = (IB_QP_CUR_STATE                 |
1414                                                 IB_QP_ACCESS_FLAGS              |
1415                                                 IB_QP_ALT_PATH                  |
1416                                                 IB_QP_PATH_MIG_STATE            |
1417                                                 IB_QP_MIN_RNR_TIMER),
1418                                 [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE             |
1419                                                 IB_QP_ACCESS_FLAGS              |
1420                                                 IB_QP_ALT_PATH                  |
1421                                                 IB_QP_PATH_MIG_STATE),
1422                                 [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE             |
1423                                                 IB_QP_ACCESS_FLAGS              |
1424                                                 IB_QP_ALT_PATH                  |
1425                                                 IB_QP_PATH_MIG_STATE            |
1426                                                 IB_QP_MIN_RNR_TIMER),
1427                                 [IB_QPT_SMI] = (IB_QP_CUR_STATE                 |
1428                                                 IB_QP_QKEY),
1429                                 [IB_QPT_GSI] = (IB_QP_CUR_STATE                 |
1430                                                 IB_QP_QKEY),
1431                                 [IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT,
1432                         }
1433                 },
1434                 [IB_QPS_SQD]   = {
1435                         .valid = 1,
1436                         .opt_param = {
1437                                 [IB_QPT_UD]  = IB_QP_EN_SQD_ASYNC_NOTIFY,
1438                                 [IB_QPT_UC]  = IB_QP_EN_SQD_ASYNC_NOTIFY,
1439                                 [IB_QPT_RC]  = IB_QP_EN_SQD_ASYNC_NOTIFY,
1440                                 [IB_QPT_XRC_INI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
1441                                 [IB_QPT_XRC_TGT] = IB_QP_EN_SQD_ASYNC_NOTIFY, /* ??? */
1442                                 [IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
1443                                 [IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY
1444                         }
1445                 },
1446         },
1447         [IB_QPS_SQD]   = {
1448                 [IB_QPS_RESET] = { .valid = 1 },
1449                 [IB_QPS_ERR] =   { .valid = 1 },
1450                 [IB_QPS_RTS]   = {
1451                         .valid = 1,
1452                         .opt_param = {
1453                                 [IB_QPT_UD]  = (IB_QP_CUR_STATE                 |
1454                                                 IB_QP_QKEY),
1455                                 [IB_QPT_UC]  = (IB_QP_CUR_STATE                 |
1456                                                 IB_QP_ALT_PATH                  |
1457                                                 IB_QP_ACCESS_FLAGS              |
1458                                                 IB_QP_PATH_MIG_STATE),
1459                                 [IB_QPT_RC]  = (IB_QP_CUR_STATE                 |
1460                                                 IB_QP_ALT_PATH                  |
1461                                                 IB_QP_ACCESS_FLAGS              |
1462                                                 IB_QP_MIN_RNR_TIMER             |
1463                                                 IB_QP_PATH_MIG_STATE),
1464                                 [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE             |
1465                                                 IB_QP_ALT_PATH                  |
1466                                                 IB_QP_ACCESS_FLAGS              |
1467                                                 IB_QP_PATH_MIG_STATE),
1468                                 [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE             |
1469                                                 IB_QP_ALT_PATH                  |
1470                                                 IB_QP_ACCESS_FLAGS              |
1471                                                 IB_QP_MIN_RNR_TIMER             |
1472                                                 IB_QP_PATH_MIG_STATE),
1473                                 [IB_QPT_SMI] = (IB_QP_CUR_STATE                 |
1474                                                 IB_QP_QKEY),
1475                                 [IB_QPT_GSI] = (IB_QP_CUR_STATE                 |
1476                                                 IB_QP_QKEY),
1477                         }
1478                 },
1479                 [IB_QPS_SQD]   = {
1480                         .valid = 1,
1481                         .opt_param = {
1482                                 [IB_QPT_UD]  = (IB_QP_PKEY_INDEX                |
1483                                                 IB_QP_QKEY),
1484                                 [IB_QPT_UC]  = (IB_QP_AV                        |
1485                                                 IB_QP_ALT_PATH                  |
1486                                                 IB_QP_ACCESS_FLAGS              |
1487                                                 IB_QP_PKEY_INDEX                |
1488                                                 IB_QP_PATH_MIG_STATE),
1489                                 [IB_QPT_RC]  = (IB_QP_PORT                      |
1490                                                 IB_QP_AV                        |
1491                                                 IB_QP_TIMEOUT                   |
1492                                                 IB_QP_RETRY_CNT                 |
1493                                                 IB_QP_RNR_RETRY                 |
1494                                                 IB_QP_MAX_QP_RD_ATOMIC          |
1495                                                 IB_QP_MAX_DEST_RD_ATOMIC        |
1496                                                 IB_QP_ALT_PATH                  |
1497                                                 IB_QP_ACCESS_FLAGS              |
1498                                                 IB_QP_PKEY_INDEX                |
1499                                                 IB_QP_MIN_RNR_TIMER             |
1500                                                 IB_QP_PATH_MIG_STATE),
1501                                 [IB_QPT_XRC_INI] = (IB_QP_PORT                  |
1502                                                 IB_QP_AV                        |
1503                                                 IB_QP_TIMEOUT                   |
1504                                                 IB_QP_RETRY_CNT                 |
1505                                                 IB_QP_RNR_RETRY                 |
1506                                                 IB_QP_MAX_QP_RD_ATOMIC          |
1507                                                 IB_QP_ALT_PATH                  |
1508                                                 IB_QP_ACCESS_FLAGS              |
1509                                                 IB_QP_PKEY_INDEX                |
1510                                                 IB_QP_PATH_MIG_STATE),
1511                                 [IB_QPT_XRC_TGT] = (IB_QP_PORT                  |
1512                                                 IB_QP_AV                        |
1513                                                 IB_QP_TIMEOUT                   |
1514                                                 IB_QP_MAX_DEST_RD_ATOMIC        |
1515                                                 IB_QP_ALT_PATH                  |
1516                                                 IB_QP_ACCESS_FLAGS              |
1517                                                 IB_QP_PKEY_INDEX                |
1518                                                 IB_QP_MIN_RNR_TIMER             |
1519                                                 IB_QP_PATH_MIG_STATE),
1520                                 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX                |
1521                                                 IB_QP_QKEY),
1522                                 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX                |
1523                                                 IB_QP_QKEY),
1524                         }
1525                 }
1526         },
1527         [IB_QPS_SQE]   = {
1528                 [IB_QPS_RESET] = { .valid = 1 },
1529                 [IB_QPS_ERR] =   { .valid = 1 },
1530                 [IB_QPS_RTS]   = {
1531                         .valid = 1,
1532                         .opt_param = {
1533                                 [IB_QPT_UD]  = (IB_QP_CUR_STATE                 |
1534                                                 IB_QP_QKEY),
1535                                 [IB_QPT_UC]  = (IB_QP_CUR_STATE                 |
1536                                                 IB_QP_ACCESS_FLAGS),
1537                                 [IB_QPT_SMI] = (IB_QP_CUR_STATE                 |
1538                                                 IB_QP_QKEY),
1539                                 [IB_QPT_GSI] = (IB_QP_CUR_STATE                 |
1540                                                 IB_QP_QKEY),
1541                         }
1542                 }
1543         },
1544         [IB_QPS_ERR] = {
1545                 [IB_QPS_RESET] = { .valid = 1 },
1546                 [IB_QPS_ERR] =   { .valid = 1 }
1547         }
1548 };
1549 
1550 bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
1551                         enum ib_qp_type type, enum ib_qp_attr_mask mask)
1552 {
1553         enum ib_qp_attr_mask req_param, opt_param;
1554 
1555         if (mask & IB_QP_CUR_STATE  &&
1556             cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS &&
1557             cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE)
1558                 return false;
1559 
1560         if (!qp_state_table[cur_state][next_state].valid)
1561                 return false;
1562 
1563         req_param = qp_state_table[cur_state][next_state].req_param[type];
1564         opt_param = qp_state_table[cur_state][next_state].opt_param[type];
1565 
1566         if ((mask & req_param) != req_param)
1567                 return false;
1568 
1569         if (mask & ~(req_param | opt_param | IB_QP_STATE))
1570                 return false;
1571 
1572         return true;
1573 }
1574 EXPORT_SYMBOL(ib_modify_qp_is_ok);
1575 
1576 /**
1577  * ib_resolve_eth_dmac - Resolve destination mac address
1578  * @device:             Device to consider
1579  * @ah_attr:            address handle attribute which describes the
1580  *                      source and destination parameters
1581  * ib_resolve_eth_dmac() resolves destination mac address and L3 hop limit It
1582  * returns 0 on success or appropriate error code. It initializes the
1583  * necessary ah_attr fields when call is successful.
1584  */
1585 static int ib_resolve_eth_dmac(struct ib_device *device,
1586                                struct rdma_ah_attr *ah_attr)
1587 {
1588         int ret = 0;
1589 
1590         if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
1591                 if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) {
1592                         __be32 addr = 0;
1593 
1594                         memcpy(&addr, ah_attr->grh.dgid.raw + 12, 4);
1595                         ip_eth_mc_map(addr, (char *)ah_attr->roce.dmac);
1596                 } else {
1597                         ipv6_eth_mc_map((struct in6_addr *)ah_attr->grh.dgid.raw,
1598                                         (char *)ah_attr->roce.dmac);
1599                 }
1600         } else {
1601                 ret = ib_resolve_unicast_gid_dmac(device, ah_attr);
1602         }
1603         return ret;
1604 }
1605 
1606 static bool is_qp_type_connected(const struct ib_qp *qp)
1607 {
1608         return (qp->qp_type == IB_QPT_UC ||
1609                 qp->qp_type == IB_QPT_RC ||
1610                 qp->qp_type == IB_QPT_XRC_INI ||
1611                 qp->qp_type == IB_QPT_XRC_TGT);
1612 }
1613 
1614 /**
1615  * IB core internal function to perform QP attributes modification.
1616  */
1617 static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
1618                          int attr_mask, struct ib_udata *udata)
1619 {
1620         u8 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
1621         const struct ib_gid_attr *old_sgid_attr_av;
1622         const struct ib_gid_attr *old_sgid_attr_alt_av;
1623         int ret;
1624 
1625         if (attr_mask & IB_QP_AV) {
1626                 ret = rdma_fill_sgid_attr(qp->device, &attr->ah_attr,
1627                                           &old_sgid_attr_av);
1628                 if (ret)
1629                         return ret;
1630         }
1631         if (attr_mask & IB_QP_ALT_PATH) {
1632                 /*
1633                  * FIXME: This does not track the migration state, so if the
1634                  * user loads a new alternate path after the HW has migrated
1635                  * from primary->alternate we will keep the wrong
1636                  * references. This is OK for IB because the reference
1637                  * counting does not serve any functional purpose.
1638                  */
1639                 ret = rdma_fill_sgid_attr(qp->device, &attr->alt_ah_attr,
1640                                           &old_sgid_attr_alt_av);
1641                 if (ret)
1642                         goto out_av;
1643 
1644                 /*
1645                  * Today the core code can only handle alternate paths and APM
1646                  * for IB. Ban them in roce mode.
1647                  */
1648                 if (!(rdma_protocol_ib(qp->device,
1649                                        attr->alt_ah_attr.port_num) &&
1650                       rdma_protocol_ib(qp->device, port))) {
1651                         ret = EINVAL;
1652                         goto out;
1653                 }
1654         }
1655 
1656         /*
1657          * If the user provided the qp_attr then we have to resolve it. Kernel
1658          * users have to provide already resolved rdma_ah_attr's
1659          */
1660         if (udata && (attr_mask & IB_QP_AV) &&
1661             attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
1662             is_qp_type_connected(qp)) {
1663                 ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
1664                 if (ret)
1665                         goto out;
1666         }
1667 
1668         if (rdma_ib_or_roce(qp->device, port)) {
1669                 if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) {
1670                         dev_warn(&qp->device->dev,
1671                                  "%s rq_psn overflow, masking to 24 bits\n",
1672                                  __func__);
1673                         attr->rq_psn &= 0xffffff;
1674                 }
1675 
1676                 if (attr_mask & IB_QP_SQ_PSN && attr->sq_psn & ~0xffffff) {
1677                         dev_warn(&qp->device->dev,
1678                                  " %s sq_psn overflow, masking to 24 bits\n",
1679                                  __func__);
1680                         attr->sq_psn &= 0xffffff;
1681                 }
1682         }
1683 
1684         /*
1685          * Bind this qp to a counter automatically based on the rdma counter
1686          * rules. This only set in RST2INIT with port specified
1687          */
1688         if (!qp->counter && (attr_mask & IB_QP_PORT) &&
1689             ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT))
1690                 rdma_counter_bind_qp_auto(qp, attr->port_num);
1691 
1692         ret = ib_security_modify_qp(qp, attr, attr_mask, udata);
1693         if (ret)
1694                 goto out;
1695 
1696         if (attr_mask & IB_QP_PORT)
1697                 qp->port = attr->port_num;
1698         if (attr_mask & IB_QP_AV)
1699                 qp->av_sgid_attr =
1700                         rdma_update_sgid_attr(&attr->ah_attr, qp->av_sgid_attr);
1701         if (attr_mask & IB_QP_ALT_PATH)
1702                 qp->alt_path_sgid_attr = rdma_update_sgid_attr(
1703                         &attr->alt_ah_attr, qp->alt_path_sgid_attr);
1704 
1705 out:
1706         if (attr_mask & IB_QP_ALT_PATH)
1707                 rdma_unfill_sgid_attr(&attr->alt_ah_attr, old_sgid_attr_alt_av);
1708 out_av:
1709         if (attr_mask & IB_QP_AV)
1710                 rdma_unfill_sgid_attr(&attr->ah_attr, old_sgid_attr_av);
1711         return ret;
1712 }
1713 
1714 /**
1715  * ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
1716  * @ib_qp: The QP to modify.
1717  * @attr: On input, specifies the QP attributes to modify.  On output,
1718  *   the current values of selected QP attributes are returned.
1719  * @attr_mask: A bit-mask used to specify which attributes of the QP
1720  *   are being modified.
1721  * @udata: pointer to user's input output buffer information
1722  *   are being modified.
1723  * It returns 0 on success and returns appropriate error code on error.
1724  */
1725 int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
1726                             int attr_mask, struct ib_udata *udata)
1727 {
1728         return _ib_modify_qp(ib_qp->real_qp, attr, attr_mask, udata);
1729 }
1730 EXPORT_SYMBOL(ib_modify_qp_with_udata);
1731 
1732 int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width)
1733 {
1734         int rc;
1735         u32 netdev_speed;
1736         struct net_device *netdev;
1737         struct ethtool_link_ksettings lksettings;
1738 
1739         if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET)
1740                 return -EINVAL;
1741 
1742         netdev = ib_device_get_netdev(dev, port_num);
1743         if (!netdev)
1744                 return -ENODEV;
1745 
1746         rtnl_lock();
1747         rc = __ethtool_get_link_ksettings(netdev, &lksettings);
1748         rtnl_unlock();
1749 
1750         dev_put(netdev);
1751 
1752         if (!rc) {
1753                 netdev_speed = lksettings.base.speed;
1754         } else {
1755                 netdev_speed = SPEED_1000;
1756                 pr_warn("%s speed is unknown, defaulting to %d\n", netdev->name,
1757                         netdev_speed);
1758         }
1759 
1760         if (netdev_speed <= SPEED_1000) {
1761                 *width = IB_WIDTH_1X;
1762                 *speed = IB_SPEED_SDR;
1763         } else if (netdev_speed <= SPEED_10000) {
1764                 *width = IB_WIDTH_1X;
1765                 *speed = IB_SPEED_FDR10;
1766         } else if (netdev_speed <= SPEED_20000) {
1767                 *width = IB_WIDTH_4X;
1768                 *speed = IB_SPEED_DDR;
1769         } else if (netdev_speed <= SPEED_25000) {
1770                 *width = IB_WIDTH_1X;
1771                 *speed = IB_SPEED_EDR;
1772         } else if (netdev_speed <= SPEED_40000) {
1773                 *width = IB_WIDTH_4X;
1774                 *speed = IB_SPEED_FDR10;
1775         } else {
1776                 *width = IB_WIDTH_4X;
1777                 *speed = IB_SPEED_EDR;
1778         }
1779 
1780         return 0;
1781 }
1782 EXPORT_SYMBOL(ib_get_eth_speed);
1783 
1784 int ib_modify_qp(struct ib_qp *qp,
1785                  struct ib_qp_attr *qp_attr,
1786                  int qp_attr_mask)
1787 {
1788         return _ib_modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
1789 }
1790 EXPORT_SYMBOL(ib_modify_qp);
1791 
1792 int ib_query_qp(struct ib_qp *qp,
1793                 struct ib_qp_attr *qp_attr,
1794                 int qp_attr_mask,
1795                 struct ib_qp_init_attr *qp_init_attr)
1796 {
1797         qp_attr->ah_attr.grh.sgid_attr = NULL;
1798         qp_attr->alt_ah_attr.grh.sgid_attr = NULL;
1799 
1800         return qp->device->ops.query_qp ?
1801                 qp->device->ops.query_qp(qp->real_qp, qp_attr, qp_attr_mask,
1802                                          qp_init_attr) : -EOPNOTSUPP;
1803 }
1804 EXPORT_SYMBOL(ib_query_qp);
1805 
1806 int ib_close_qp(struct ib_qp *qp)
1807 {
1808         struct ib_qp *real_qp;
1809         unsigned long flags;
1810 
1811         real_qp = qp->real_qp;
1812         if (real_qp == qp)
1813                 return -EINVAL;
1814 
1815         spin_lock_irqsave(&real_qp->device->event_handler_lock, flags);
1816         list_del(&qp->open_list);
1817         spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
1818 
1819         atomic_dec(&real_qp->usecnt);
1820         if (qp->qp_sec)
1821                 ib_close_shared_qp_security(qp->qp_sec);
1822         kfree(qp);
1823 
1824         return 0;
1825 }
1826 EXPORT_SYMBOL(ib_close_qp);
1827 
1828 static int __ib_destroy_shared_qp(struct ib_qp *qp)
1829 {
1830         struct ib_xrcd *xrcd;
1831         struct ib_qp *real_qp;
1832         int ret;
1833 
1834         real_qp = qp->real_qp;
1835         xrcd = real_qp->xrcd;
1836 
1837         mutex_lock(&xrcd->tgt_qp_mutex);
1838         ib_close_qp(qp);
1839         if (atomic_read(&real_qp->usecnt) == 0)
1840                 list_del(&real_qp->xrcd_list);
1841         else
1842                 real_qp = NULL;
1843         mutex_unlock(&xrcd->tgt_qp_mutex);
1844 
1845         if (real_qp) {
1846                 ret = ib_destroy_qp(real_qp);
1847                 if (!ret)
1848                         atomic_dec(&xrcd->usecnt);
1849                 else
1850                         __ib_insert_xrcd_qp(xrcd, real_qp);
1851         }
1852 
1853         return 0;
1854 }
1855 
1856 int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata)
1857 {
1858         const struct ib_gid_attr *alt_path_sgid_attr = qp->alt_path_sgid_attr;
1859         const struct ib_gid_attr *av_sgid_attr = qp->av_sgid_attr;
1860         struct ib_pd *pd;
1861         struct ib_cq *scq, *rcq;
1862         struct ib_srq *srq;
1863         struct ib_rwq_ind_table *ind_tbl;
1864         struct ib_qp_security *sec;
1865         int ret;
1866 
1867         WARN_ON_ONCE(qp->mrs_used > 0);
1868 
1869         if (atomic_read(&qp->usecnt))
1870                 return -EBUSY;
1871 
1872         if (qp->real_qp != qp)
1873                 return __ib_destroy_shared_qp(qp);
1874 
1875         pd   = qp->pd;
1876         scq  = qp->send_cq;
1877         rcq  = qp->recv_cq;
1878         srq  = qp->srq;
1879         ind_tbl = qp->rwq_ind_tbl;
1880         sec  = qp->qp_sec;
1881         if (sec)
1882                 ib_destroy_qp_security_begin(sec);
1883 
1884         if (!qp->uobject)
1885                 rdma_rw_cleanup_mrs(qp);
1886 
1887         rdma_counter_unbind_qp(qp, true);
1888         rdma_restrack_del(&qp->res);
1889         ret = qp->device->ops.destroy_qp(qp, udata);
1890         if (!ret) {
1891                 if (alt_path_sgid_attr)
1892                         rdma_put_gid_attr(alt_path_sgid_attr);
1893                 if (av_sgid_attr)
1894                         rdma_put_gid_attr(av_sgid_attr);
1895                 if (pd)
1896                         atomic_dec(&pd->usecnt);
1897                 if (scq)
1898                         atomic_dec(&scq->usecnt);
1899                 if (rcq)
1900                         atomic_dec(&rcq->usecnt);
1901                 if (srq)
1902                         atomic_dec(&srq->usecnt);
1903                 if (ind_tbl)
1904                         atomic_dec(&ind_tbl->usecnt);
1905                 if (sec)
1906                         ib_destroy_qp_security_end(sec);
1907         } else {
1908                 if (sec)
1909                         ib_destroy_qp_security_abort(sec);
1910         }
1911 
1912         return ret;
1913 }
1914 EXPORT_SYMBOL(ib_destroy_qp_user);
1915 
1916 /* Completion queues */
1917 
1918 struct ib_cq *__ib_create_cq(struct ib_device *device,
1919                              ib_comp_handler comp_handler,
1920                              void (*event_handler)(struct ib_event *, void *),
1921                              void *cq_context,
1922                              const struct ib_cq_init_attr *cq_attr,
1923                              const char *caller)
1924 {
1925         struct ib_cq *cq;
1926         int ret;
1927 
1928         cq = rdma_zalloc_drv_obj(device, ib_cq);
1929         if (!cq)
1930                 return ERR_PTR(-ENOMEM);
1931 
1932         cq->device = device;
1933         cq->uobject = NULL;
1934         cq->comp_handler = comp_handler;
1935         cq->event_handler = event_handler;
1936         cq->cq_context = cq_context;
1937         atomic_set(&cq->usecnt, 0);
1938         cq->res.type = RDMA_RESTRACK_CQ;
1939         rdma_restrack_set_task(&cq->res, caller);
1940 
1941         ret = device->ops.create_cq(cq, cq_attr, NULL);
1942         if (ret) {
1943                 kfree(cq);
1944                 return ERR_PTR(ret);
1945         }
1946 
1947         rdma_restrack_kadd(&cq->res);
1948         return cq;
1949 }
1950 EXPORT_SYMBOL(__ib_create_cq);
1951 
1952 int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period)
1953 {
1954         return cq->device->ops.modify_cq ?
1955                 cq->device->ops.modify_cq(cq, cq_count,
1956                                           cq_period) : -EOPNOTSUPP;
1957 }
1958 EXPORT_SYMBOL(rdma_set_cq_moderation);
1959 
1960 int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata)
1961 {
1962         if (atomic_read(&cq->usecnt))
1963                 return -EBUSY;
1964 
1965         rdma_restrack_del(&cq->res);
1966         cq->device->ops.destroy_cq(cq, udata);
1967         kfree(cq);
1968         return 0;
1969 }
1970 EXPORT_SYMBOL(ib_destroy_cq_user);
1971 
1972 int ib_resize_cq(struct ib_cq *cq, int cqe)
1973 {
1974         return cq->device->ops.resize_cq ?
1975                 cq->device->ops.resize_cq(cq, cqe, NULL) : -EOPNOTSUPP;
1976 }
1977 EXPORT_SYMBOL(ib_resize_cq);
1978 
1979 /* Memory regions */
1980 
1981 int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata)
1982 {
1983         struct ib_pd *pd = mr->pd;
1984         struct ib_dm *dm = mr->dm;
1985         struct ib_sig_attrs *sig_attrs = mr->sig_attrs;
1986         int ret;
1987 
1988         rdma_restrack_del(&mr->res);
1989         ret = mr->device->ops.dereg_mr(mr, udata);
1990         if (!ret) {
1991                 atomic_dec(&pd->usecnt);
1992                 if (dm)
1993                         atomic_dec(&dm->usecnt);
1994                 kfree(sig_attrs);
1995         }
1996 
1997         return ret;
1998 }
1999 EXPORT_SYMBOL(ib_dereg_mr_user);
2000 
2001 /**
2002  * ib_alloc_mr_user() - Allocates a memory region
2003  * @pd:            protection domain associated with the region
2004  * @mr_type:       memory region type
2005  * @max_num_sg:    maximum sg entries available for registration.
2006  * @udata:         user data or null for kernel objects
2007  *
2008  * Notes:
2009  * Memory registeration page/sg lists must not exceed max_num_sg.
2010  * For mr_type IB_MR_TYPE_MEM_REG, the total length cannot exceed
2011  * max_num_sg * used_page_size.
2012  *
2013  */
2014 struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type,
2015                                u32 max_num_sg, struct ib_udata *udata)
2016 {
2017         struct ib_mr *mr;
2018 
2019         if (!pd->device->ops.alloc_mr)
2020                 return ERR_PTR(-EOPNOTSUPP);
2021 
2022         if (WARN_ON_ONCE(mr_type == IB_MR_TYPE_INTEGRITY))
2023                 return ERR_PTR(-EINVAL);
2024 
2025         mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg, udata);
2026         if (!IS_ERR(mr)) {
2027                 mr->device  = pd->device;
2028                 mr->pd      = pd;
2029                 mr->dm      = NULL;
2030                 mr->uobject = NULL;
2031                 atomic_inc(&pd->usecnt);
2032                 mr->need_inval = false;
2033                 mr->res.type = RDMA_RESTRACK_MR;
2034                 rdma_restrack_kadd(&mr->res);
2035                 mr->type = mr_type;
2036                 mr->sig_attrs = NULL;
2037         }
2038 
2039         return mr;
2040 }
2041 EXPORT_SYMBOL(ib_alloc_mr_user);
2042 
2043 /**
2044  * ib_alloc_mr_integrity() - Allocates an integrity memory region
2045  * @pd:                      protection domain associated with the region
2046  * @max_num_data_sg:         maximum data sg entries available for registration
2047  * @max_num_meta_sg:         maximum metadata sg entries available for
2048  *                           registration
2049  *
2050  * Notes:
2051  * Memory registration page/sg lists must not exceed max_num_sg,
2052  * also the integrity page/sg lists must not exceed max_num_meta_sg.
2053  *
2054  */
2055 struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd,
2056                                     u32 max_num_data_sg,
2057                                     u32 max_num_meta_sg)
2058 {
2059         struct ib_mr *mr;
2060         struct ib_sig_attrs *sig_attrs;
2061 
2062         if (!pd->device->ops.alloc_mr_integrity ||
2063             !pd->device->ops.map_mr_sg_pi)
2064                 return ERR_PTR(-EOPNOTSUPP);
2065 
2066         if (!max_num_meta_sg)
2067                 return ERR_PTR(-EINVAL);
2068 
2069         sig_attrs = kzalloc(sizeof(struct ib_sig_attrs), GFP_KERNEL);
2070         if (!sig_attrs)
2071                 return ERR_PTR(-ENOMEM);
2072 
2073         mr = pd->device->ops.alloc_mr_integrity(pd, max_num_data_sg,
2074                                                 max_num_meta_sg);
2075         if (IS_ERR(mr)) {
2076                 kfree(sig_attrs);
2077                 return mr;
2078         }
2079 
2080         mr->device = pd->device;
2081         mr->pd = pd;
2082         mr->dm = NULL;
2083         mr->uobject = NULL;
2084         atomic_inc(&pd->usecnt);
2085         mr->need_inval = false;
2086         mr->res.type = RDMA_RESTRACK_MR;
2087         rdma_restrack_kadd(&mr->res);
2088         mr->type = IB_MR_TYPE_INTEGRITY;
2089         mr->sig_attrs = sig_attrs;
2090 
2091         return mr;
2092 }
2093 EXPORT_SYMBOL(ib_alloc_mr_integrity);
2094 
2095 /* "Fast" memory regions */
2096 
2097 struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
2098                             int mr_access_flags,
2099                             struct ib_fmr_attr *fmr_attr)
2100 {
2101         struct ib_fmr *fmr;
2102 
2103         if (!pd->device->ops.alloc_fmr)
2104                 return ERR_PTR(-EOPNOTSUPP);
2105 
2106         fmr = pd->device->ops.alloc_fmr(pd, mr_access_flags, fmr_attr);
2107         if (!IS_ERR(fmr)) {
2108                 fmr->device = pd->device;
2109                 fmr->pd     = pd;
2110                 atomic_inc(&pd->usecnt);
2111         }
2112 
2113         return fmr;
2114 }
2115 EXPORT_SYMBOL(ib_alloc_fmr);
2116 
2117 int ib_unmap_fmr(struct list_head *fmr_list)
2118 {
2119         struct ib_fmr *fmr;
2120 
2121         if (list_empty(fmr_list))
2122                 return 0;
2123 
2124         fmr = list_entry(fmr_list->next, struct ib_fmr, list);
2125         return fmr->device->ops.unmap_fmr(fmr_list);
2126 }
2127 EXPORT_SYMBOL(ib_unmap_fmr);
2128 
2129 int ib_dealloc_fmr(struct ib_fmr *fmr)
2130 {
2131         struct ib_pd *pd;
2132         int ret;
2133 
2134         pd = fmr->pd;
2135         ret = fmr->device->ops.dealloc_fmr(fmr);
2136         if (!ret)
2137                 atomic_dec(&pd->usecnt);
2138 
2139         return ret;
2140 }
2141 EXPORT_SYMBOL(ib_dealloc_fmr);
2142 
2143 /* Multicast groups */
2144 
2145 static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid)
2146 {
2147         struct ib_qp_init_attr init_attr = {};
2148         struct ib_qp_attr attr = {};
2149         int num_eth_ports = 0;
2150         int port;
2151 
2152         /* If QP state >= init, it is assigned to a port and we can check this
2153          * port only.
2154          */
2155         if (!ib_query_qp(qp, &attr, IB_QP_STATE | IB_QP_PORT, &init_attr)) {
2156                 if (attr.qp_state >= IB_QPS_INIT) {
2157                         if (rdma_port_get_link_layer(qp->device, attr.port_num) !=
2158                             IB_LINK_LAYER_INFINIBAND)
2159                                 return true;
2160                         goto lid_check;
2161                 }
2162         }
2163 
2164         /* Can't get a quick answer, iterate over all ports */
2165         for (port = 0; port < qp->device->phys_port_cnt; port++)
2166                 if (rdma_port_get_link_layer(qp->device, port) !=
2167                     IB_LINK_LAYER_INFINIBAND)
2168                         num_eth_ports++;
2169 
2170         /* If we have at lease one Ethernet port, RoCE annex declares that
2171          * multicast LID should be ignored. We can't tell at this step if the
2172          * QP belongs to an IB or Ethernet port.
2173          */
2174         if (num_eth_ports)
2175                 return true;
2176 
2177         /* If all the ports are IB, we can check according to IB spec. */
2178 lid_check:
2179         return !(lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
2180                  lid == be16_to_cpu(IB_LID_PERMISSIVE));
2181 }
2182 
2183 int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
2184 {
2185         int ret;
2186 
2187         if (!qp->device->ops.attach_mcast)
2188                 return -EOPNOTSUPP;
2189 
2190         if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
2191             qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
2192                 return -EINVAL;
2193 
2194         ret = qp->device->ops.attach_mcast(qp, gid, lid);
2195         if (!ret)
2196                 atomic_inc(&qp->usecnt);
2197         return ret;
2198 }
2199 EXPORT_SYMBOL(ib_attach_mcast);
2200 
2201 int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
2202 {
2203         int ret;
2204 
2205         if (!qp->device->ops.detach_mcast)
2206                 return -EOPNOTSUPP;
2207 
2208         if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
2209             qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
2210                 return -EINVAL;
2211 
2212         ret = qp->device->ops.detach_mcast(qp, gid, lid);
2213         if (!ret)
2214                 atomic_dec(&qp->usecnt);
2215         return ret;
2216 }
2217 EXPORT_SYMBOL(ib_detach_mcast);
2218 
2219 struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller)
2220 {
2221         struct ib_xrcd *xrcd;
2222 
2223         if (!device->ops.alloc_xrcd)
2224                 return ERR_PTR(-EOPNOTSUPP);
2225 
2226         xrcd = device->ops.alloc_xrcd(device, NULL);
2227         if (!IS_ERR(xrcd)) {
2228                 xrcd->device = device;
2229                 xrcd->inode = NULL;
2230                 atomic_set(&xrcd->usecnt, 0);
2231                 mutex_init(&xrcd->tgt_qp_mutex);
2232                 INIT_LIST_HEAD(&xrcd->tgt_qp_list);
2233         }
2234 
2235         return xrcd;
2236 }
2237 EXPORT_SYMBOL(__ib_alloc_xrcd);
2238 
2239 int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
2240 {
2241         struct ib_qp *qp;
2242         int ret;
2243 
2244         if (atomic_read(&xrcd->usecnt))
2245                 return -EBUSY;
2246 
2247         while (!list_empty(&xrcd->tgt_qp_list)) {
2248                 qp = list_entry(xrcd->tgt_qp_list.next, struct ib_qp, xrcd_list);
2249                 ret = ib_destroy_qp(qp);
2250                 if (ret)
2251                         return ret;
2252         }
2253         mutex_destroy(&xrcd->tgt_qp_mutex);
2254 
2255         return xrcd->device->ops.dealloc_xrcd(xrcd, udata);
2256 }
2257 EXPORT_SYMBOL(ib_dealloc_xrcd);
2258 
2259 /**
2260  * ib_create_wq - Creates a WQ associated with the specified protection
2261  * domain.
2262  * @pd: The protection domain associated with the WQ.
2263  * @wq_attr: A list of initial attributes required to create the
2264  * WQ. If WQ creation succeeds, then the attributes are updated to
2265  * the actual capabilities of the created WQ.
2266  *
2267  * wq_attr->max_wr and wq_attr->max_sge determine
2268  * the requested size of the WQ, and set to the actual values allocated
2269  * on return.
2270  * If ib_create_wq() succeeds, then max_wr and max_sge will always be
2271  * at least as large as the requested values.
2272  */
2273 struct ib_wq *ib_create_wq(struct ib_pd *pd,
2274                            struct ib_wq_init_attr *wq_attr)
2275 {
2276         struct ib_wq *wq;
2277 
2278         if (!pd->device->ops.create_wq)
2279                 return ERR_PTR(-EOPNOTSUPP);
2280 
2281         wq = pd->device->ops.create_wq(pd, wq_attr, NULL);
2282         if (!IS_ERR(wq)) {
2283                 wq->event_handler = wq_attr->event_handler;
2284                 wq->wq_context = wq_attr->wq_context;
2285                 wq->wq_type = wq_attr->wq_type;
2286                 wq->cq = wq_attr->cq;
2287                 wq->device = pd->device;
2288                 wq->pd = pd;
2289                 wq->uobject = NULL;
2290                 atomic_inc(&pd->usecnt);
2291                 atomic_inc(&wq_attr->cq->usecnt);
2292                 atomic_set(&wq->usecnt, 0);
2293         }
2294         return wq;
2295 }
2296 EXPORT_SYMBOL(ib_create_wq);
2297 
2298 /**
2299  * ib_destroy_wq - Destroys the specified user WQ.
2300  * @wq: The WQ to destroy.
2301  * @udata: Valid user data
2302  */
2303 int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
2304 {
2305         struct ib_cq *cq = wq->cq;
2306         struct ib_pd *pd = wq->pd;
2307 
2308         if (atomic_read(&wq->usecnt))
2309                 return -EBUSY;
2310 
2311         wq->device->ops.destroy_wq(wq, udata);
2312         atomic_dec(&pd->usecnt);
2313         atomic_dec(&cq->usecnt);
2314 
2315         return 0;
2316 }
2317 EXPORT_SYMBOL(ib_destroy_wq);
2318 
2319 /**
2320  * ib_modify_wq - Modifies the specified WQ.
2321  * @wq: The WQ to modify.
2322  * @wq_attr: On input, specifies the WQ attributes to modify.
2323  * @wq_attr_mask: A bit-mask used to specify which attributes of the WQ
2324  *   are being modified.
2325  * On output, the current values of selected WQ attributes are returned.
2326  */
2327 int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
2328                  u32 wq_attr_mask)
2329 {
2330         int err;
2331 
2332         if (!wq->device->ops.modify_wq)
2333                 return -EOPNOTSUPP;
2334 
2335         err = wq->device->ops.modify_wq(wq, wq_attr, wq_attr_mask, NULL);
2336         return err;
2337 }
2338 EXPORT_SYMBOL(ib_modify_wq);
2339 
2340 /*
2341  * ib_create_rwq_ind_table - Creates a RQ Indirection Table.
2342  * @device: The device on which to create the rwq indirection table.
2343  * @ib_rwq_ind_table_init_attr: A list of initial attributes required to
2344  * create the Indirection Table.
2345  *
2346  * Note: The life time of ib_rwq_ind_table_init_attr->ind_tbl is not less
2347  *      than the created ib_rwq_ind_table object and the caller is responsible
2348  *      for its memory allocation/free.
2349  */
2350 struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device,
2351                                                  struct ib_rwq_ind_table_init_attr *init_attr)
2352 {
2353         struct ib_rwq_ind_table *rwq_ind_table;
2354         int i;
2355         u32 table_size;
2356 
2357         if (!device->ops.create_rwq_ind_table)
2358                 return ERR_PTR(-EOPNOTSUPP);
2359 
2360         table_size = (1 << init_attr->log_ind_tbl_size);
2361         rwq_ind_table = device->ops.create_rwq_ind_table(device,
2362                                                          init_attr, NULL);
2363         if (IS_ERR(rwq_ind_table))
2364                 return rwq_ind_table;
2365 
2366         rwq_ind_table->ind_tbl = init_attr->ind_tbl;
2367         rwq_ind_table->log_ind_tbl_size = init_attr->log_ind_tbl_size;
2368         rwq_ind_table->device = device;
2369         rwq_ind_table->uobject = NULL;
2370         atomic_set(&rwq_ind_table->usecnt, 0);
2371 
2372         for (i = 0; i < table_size; i++)
2373                 atomic_inc(&rwq_ind_table->ind_tbl[i]->usecnt);
2374 
2375         return rwq_ind_table;
2376 }
2377 EXPORT_SYMBOL(ib_create_rwq_ind_table);
2378 
2379 /*
2380  * ib_destroy_rwq_ind_table - Destroys the specified Indirection Table.
2381  * @wq_ind_table: The Indirection Table to destroy.
2382 */
2383 int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table)
2384 {
2385         int err, i;
2386         u32 table_size = (1 << rwq_ind_table->log_ind_tbl_size);
2387         struct ib_wq **ind_tbl = rwq_ind_table->ind_tbl;
2388 
2389         if (atomic_read(&rwq_ind_table->usecnt))
2390                 return -EBUSY;
2391 
2392         err = rwq_ind_table->device->ops.destroy_rwq_ind_table(rwq_ind_table);
2393         if (!err) {
2394                 for (i = 0; i < table_size; i++)
2395                         atomic_dec(&ind_tbl[i]->usecnt);
2396         }
2397 
2398         return err;
2399 }
2400 EXPORT_SYMBOL(ib_destroy_rwq_ind_table);
2401 
2402 int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
2403                        struct ib_mr_status *mr_status)
2404 {
2405         if (!mr->device->ops.check_mr_status)
2406                 return -EOPNOTSUPP;
2407 
2408         return mr->device->ops.check_mr_status(mr, check_mask, mr_status);
2409 }
2410 EXPORT_SYMBOL(ib_check_mr_status);
2411 
2412 int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
2413                          int state)
2414 {
2415         if (!device->ops.set_vf_link_state)
2416                 return -EOPNOTSUPP;
2417 
2418         return device->ops.set_vf_link_state(device, vf, port, state);
2419 }
2420 EXPORT_SYMBOL(ib_set_vf_link_state);
2421 
2422 int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
2423                      struct ifla_vf_info *info)
2424 {
2425         if (!device->ops.get_vf_config)
2426                 return -EOPNOTSUPP;
2427 
2428         return device->ops.get_vf_config(device, vf, port, info);
2429 }
2430 EXPORT_SYMBOL(ib_get_vf_config);
2431 
2432 int ib_get_vf_stats(struct ib_device *device, int vf, u8 port,
2433                     struct ifla_vf_stats *stats)
2434 {
2435         if (!device->ops.get_vf_stats)
2436                 return -EOPNOTSUPP;
2437 
2438         return device->ops.get_vf_stats(device, vf, port, stats);
2439 }
2440 EXPORT_SYMBOL(ib_get_vf_stats);
2441 
2442 int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
2443                    int type)
2444 {
2445         if (!device->ops.set_vf_guid)
2446                 return -EOPNOTSUPP;
2447 
2448         return device->ops.set_vf_guid(device, vf, port, guid, type);
2449 }
2450 EXPORT_SYMBOL(ib_set_vf_guid);
2451 
2452 /**
2453  * ib_map_mr_sg_pi() - Map the dma mapped SG lists for PI (protection
2454  *     information) and set an appropriate memory region for registration.
2455  * @mr:             memory region
2456  * @data_sg:        dma mapped scatterlist for data
2457  * @data_sg_nents:  number of entries in data_sg
2458  * @data_sg_offset: offset in bytes into data_sg
2459  * @meta_sg:        dma mapped scatterlist for metadata
2460  * @meta_sg_nents:  number of entries in meta_sg
2461  * @meta_sg_offset: offset in bytes into meta_sg
2462  * @page_size:      page vector desired page size
2463  *
2464  * Constraints:
2465  * - The MR must be allocated with type IB_MR_TYPE_INTEGRITY.
2466  *
2467  * Return: 0 on success.
2468  *
2469  * After this completes successfully, the  memory region
2470  * is ready for registration.
2471  */
2472 int ib_map_mr_sg_pi(struct ib_mr *mr, struct scatterlist *data_sg,
2473                     int data_sg_nents, unsigned int *data_sg_offset,
2474                     struct scatterlist *meta_sg, int meta_sg_nents,
2475                     unsigned int *meta_sg_offset, unsigned int page_size)
2476 {
2477         if (unlikely(!mr->device->ops.map_mr_sg_pi ||
2478                      WARN_ON_ONCE(mr->type != IB_MR_TYPE_INTEGRITY)))
2479                 return -EOPNOTSUPP;
2480 
2481         mr->page_size = page_size;
2482 
2483         return mr->device->ops.map_mr_sg_pi(mr, data_sg, data_sg_nents,
2484                                             data_sg_offset, meta_sg,
2485                                             meta_sg_nents, meta_sg_offset);
2486 }
2487 EXPORT_SYMBOL(ib_map_mr_sg_pi);
2488 
2489 /**
2490  * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list
2491  *     and set it the memory region.
2492  * @mr:            memory region
2493  * @sg:            dma mapped scatterlist
2494  * @sg_nents:      number of entries in sg
2495  * @sg_offset:     offset in bytes into sg
2496  * @page_size:     page vector desired page size
2497  *
2498  * Constraints:
2499  * - The first sg element is allowed to have an offset.
2500  * - Each sg element must either be aligned to page_size or virtually
2501  *   contiguous to the previous element. In case an sg element has a
2502  *   non-contiguous offset, the mapping prefix will not include it.
2503  * - The last sg element is allowed to have length less than page_size.
2504  * - If sg_nents total byte length exceeds the mr max_num_sge * page_size
2505  *   then only max_num_sg entries will be mapped.
2506  * - If the MR was allocated with type IB_MR_TYPE_SG_GAPS, none of these
2507  *   constraints holds and the page_size argument is ignored.
2508  *
2509  * Returns the number of sg elements that were mapped to the memory region.
2510  *
2511  * After this completes successfully, the  memory region
2512  * is ready for registration.
2513  */
2514 int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
2515                  unsigned int *sg_offset, unsigned int page_size)
2516 {
2517         if (unlikely(!mr->device->ops.map_mr_sg))
2518                 return -EOPNOTSUPP;
2519 
2520         mr->page_size = page_size;
2521 
2522         return mr->device->ops.map_mr_sg(mr, sg, sg_nents, sg_offset);
2523 }
2524 EXPORT_SYMBOL(ib_map_mr_sg);
2525 
2526 /**
2527  * ib_sg_to_pages() - Convert the largest prefix of a sg list
2528  *     to a page vector
2529  * @mr:            memory region
2530  * @sgl:           dma mapped scatterlist
2531  * @sg_nents:      number of entries in sg
2532  * @sg_offset_p:   IN:  start offset in bytes into sg
2533  *                 OUT: offset in bytes for element n of the sg of the first
2534  *                      byte that has not been processed where n is the return
2535  *                      value of this function.
2536  * @set_page:      driver page assignment function pointer
2537  *
2538  * Core service helper for drivers to convert the largest
2539  * prefix of given sg list to a page vector. The sg list
2540  * prefix converted is the prefix that meet the requirements
2541  * of ib_map_mr_sg.
2542  *
2543  * Returns the number of sg elements that were assigned to
2544  * a page vector.
2545  */
2546 int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents,
2547                 unsigned int *sg_offset_p, int (*set_page)(struct ib_mr *, u64))
2548 {
2549         struct scatterlist *sg;
2550         u64 last_end_dma_addr = 0;
2551         unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
2552         unsigned int last_page_off = 0;
2553         u64 page_mask = ~((u64)mr->page_size - 1);
2554         int i, ret;
2555 
2556         if (unlikely(sg_nents <= 0 || sg_offset > sg_dma_len(&sgl[0])))
2557                 return -EINVAL;
2558 
2559         mr->iova = sg_dma_address(&sgl[0]) + sg_offset;
2560         mr->length = 0;
2561 
2562         for_each_sg(sgl, sg, sg_nents, i) {
2563                 u64 dma_addr = sg_dma_address(sg) + sg_offset;
2564                 u64 prev_addr = dma_addr;
2565                 unsigned int dma_len = sg_dma_len(sg) - sg_offset;
2566                 u64 end_dma_addr = dma_addr + dma_len;
2567                 u64 page_addr = dma_addr & page_mask;
2568 
2569                 /*
2570                  * For the second and later elements, check whether either the
2571                  * end of element i-1 or the start of element i is not aligned
2572                  * on a page boundary.
2573                  */
2574                 if (i && (last_page_off != 0 || page_addr != dma_addr)) {
2575                         /* Stop mapping if there is a gap. */
2576                         if (last_end_dma_addr != dma_addr)
2577                                 break;
2578 
2579                         /*
2580                          * Coalesce this element with the last. If it is small
2581                          * enough just update mr->length. Otherwise start
2582                          * mapping from the next page.
2583                          */
2584                         goto next_page;
2585                 }
2586 
2587                 do {
2588                         ret = set_page(mr, page_addr);
2589                         if (unlikely(ret < 0)) {
2590                                 sg_offset = prev_addr - sg_dma_address(sg);
2591                                 mr->length += prev_addr - dma_addr;
2592                                 if (sg_offset_p)
2593                                         *sg_offset_p = sg_offset;
2594                                 return i || sg_offset ? i : ret;
2595                         }
2596                         prev_addr = page_addr;
2597 next_page:
2598                         page_addr += mr->page_size;
2599                 } while (page_addr < end_dma_addr);
2600 
2601                 mr->length += dma_len;
2602                 last_end_dma_addr = end_dma_addr;
2603                 last_page_off = end_dma_addr & ~page_mask;
2604 
2605                 sg_offset = 0;
2606         }
2607 
2608         if (sg_offset_p)
2609                 *sg_offset_p = 0;
2610         return i;
2611 }
2612 EXPORT_SYMBOL(ib_sg_to_pages);
2613 
2614 struct ib_drain_cqe {
2615         struct ib_cqe cqe;
2616         struct completion done;
2617 };
2618 
2619 static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
2620 {
2621         struct ib_drain_cqe *cqe = container_of(wc->wr_cqe, struct ib_drain_cqe,
2622                                                 cqe);
2623 
2624         complete(&cqe->done);
2625 }
2626 
2627 /*
2628  * Post a WR and block until its completion is reaped for the SQ.
2629  */
2630 static void __ib_drain_sq(struct ib_qp *qp)
2631 {
2632         struct ib_cq *cq = qp->send_cq;
2633         struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
2634         struct ib_drain_cqe sdrain;
2635         struct ib_rdma_wr swr = {
2636                 .wr = {
2637                         .next = NULL,
2638                         { .wr_cqe       = &sdrain.cqe, },
2639                         .opcode = IB_WR_RDMA_WRITE,
2640                 },
2641         };
2642         int ret;
2643 
2644         ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
2645         if (ret) {
2646                 WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
2647                 return;
2648         }
2649 
2650         sdrain.cqe.done = ib_drain_qp_done;
2651         init_completion(&sdrain.done);
2652 
2653         ret = ib_post_send(qp, &swr.wr, NULL);
2654         if (ret) {
2655                 WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
2656                 return;
2657         }
2658 
2659         if (cq->poll_ctx == IB_POLL_DIRECT)
2660                 while (wait_for_completion_timeout(&sdrain.done, HZ / 10) <= 0)
2661                         ib_process_cq_direct(cq, -1);
2662         else
2663                 wait_for_completion(&sdrain.done);
2664 }
2665 
2666 /*
2667  * Post a WR and block until its completion is reaped for the RQ.
2668  */
2669 static void __ib_drain_rq(struct ib_qp *qp)
2670 {
2671         struct ib_cq *cq = qp->recv_cq;
2672         struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
2673         struct ib_drain_cqe rdrain;
2674         struct ib_recv_wr rwr = {};
2675         int ret;
2676 
2677         ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
2678         if (ret) {
2679                 WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
2680                 return;
2681         }
2682 
2683         rwr.wr_cqe = &rdrain.cqe;
2684         rdrain.cqe.done = ib_drain_qp_done;
2685         init_completion(&rdrain.done);
2686 
2687         ret = ib_post_recv(qp, &rwr, NULL);
2688         if (ret) {
2689                 WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
2690                 return;
2691         }
2692 
2693         if (cq->poll_ctx == IB_POLL_DIRECT)
2694                 while (wait_for_completion_timeout(&rdrain.done, HZ / 10) <= 0)
2695                         ib_process_cq_direct(cq, -1);
2696         else
2697                 wait_for_completion(&rdrain.done);
2698 }
2699 
2700 /**
2701  * ib_drain_sq() - Block until all SQ CQEs have been consumed by the
2702  *                 application.
2703  * @qp:            queue pair to drain
2704  *
2705  * If the device has a provider-specific drain function, then
2706  * call that.  Otherwise call the generic drain function
2707  * __ib_drain_sq().
2708  *
2709  * The caller must:
2710  *
2711  * ensure there is room in the CQ and SQ for the drain work request and
2712  * completion.
2713  *
2714  * allocate the CQ using ib_alloc_cq().
2715  *
2716  * ensure that there are no other contexts that are posting WRs concurrently.
2717  * Otherwise the drain is not guaranteed.
2718  */
2719 void ib_drain_sq(struct ib_qp *qp)
2720 {
2721         if (qp->device->ops.drain_sq)
2722                 qp->device->ops.drain_sq(qp);
2723         else
2724                 __ib_drain_sq(qp);
2725 }
2726 EXPORT_SYMBOL(ib_drain_sq);
2727 
2728 /**
2729  * ib_drain_rq() - Block until all RQ CQEs have been consumed by the
2730  *                 application.
2731  * @qp:            queue pair to drain
2732  *
2733  * If the device has a provider-specific drain function, then
2734  * call that.  Otherwise call the generic drain function
2735  * __ib_drain_rq().
2736  *
2737  * The caller must:
2738  *
2739  * ensure there is room in the CQ and RQ for the drain work request and
2740  * completion.
2741  *
2742  * allocate the CQ using ib_alloc_cq().
2743  *
2744  * ensure that there are no other contexts that are posting WRs concurrently.
2745  * Otherwise the drain is not guaranteed.
2746  */
2747 void ib_drain_rq(struct ib_qp *qp)
2748 {
2749         if (qp->device->ops.drain_rq)
2750                 qp->device->ops.drain_rq(qp);
2751         else
2752                 __ib_drain_rq(qp);
2753 }
2754 EXPORT_SYMBOL(ib_drain_rq);
2755 
2756 /**
2757  * ib_drain_qp() - Block until all CQEs have been consumed by the
2758  *                 application on both the RQ and SQ.
2759  * @qp:            queue pair to drain
2760  *
2761  * The caller must:
2762  *
2763  * ensure there is room in the CQ(s), SQ, and RQ for drain work requests
2764  * and completions.
2765  *
2766  * allocate the CQs using ib_alloc_cq().
2767  *
2768  * ensure that there are no other contexts that are posting WRs concurrently.
2769  * Otherwise the drain is not guaranteed.
2770  */
2771 void ib_drain_qp(struct ib_qp *qp)
2772 {
2773         ib_drain_sq(qp);
2774         if (!qp->srq)
2775                 ib_drain_rq(qp);
2776 }
2777 EXPORT_SYMBOL(ib_drain_qp);
2778 
2779 struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num,
2780                                      enum rdma_netdev_t type, const char *name,
2781                                      unsigned char name_assign_type,
2782                                      void (*setup)(struct net_device *))
2783 {
2784         struct rdma_netdev_alloc_params params;
2785         struct net_device *netdev;
2786         int rc;
2787 
2788         if (!device->ops.rdma_netdev_get_params)
2789                 return ERR_PTR(-EOPNOTSUPP);
2790 
2791         rc = device->ops.rdma_netdev_get_params(device, port_num, type,
2792                                                 &params);
2793         if (rc)
2794                 return ERR_PTR(rc);
2795 
2796         netdev = alloc_netdev_mqs(params.sizeof_priv, name, name_assign_type,
2797                                   setup, params.txqs, params.rxqs);
2798         if (!netdev)
2799                 return ERR_PTR(-ENOMEM);
2800 
2801         return netdev;
2802 }
2803 EXPORT_SYMBOL(rdma_alloc_netdev);
2804 
2805 int rdma_init_netdev(struct ib_device *device, u8 port_num,
2806                      enum rdma_netdev_t type, const char *name,
2807                      unsigned char name_assign_type,
2808                      void (*setup)(struct net_device *),
2809                      struct net_device *netdev)
2810 {
2811         struct rdma_netdev_alloc_params params;
2812         int rc;
2813 
2814         if (!device->ops.rdma_netdev_get_params)
2815                 return -EOPNOTSUPP;
2816 
2817         rc = device->ops.rdma_netdev_get_params(device, port_num, type,
2818                                                 &params);
2819         if (rc)
2820                 return rc;
2821 
2822         return params.initialize_rdma_netdev(device, port_num,
2823                                              netdev, params.param);
2824 }
2825 EXPORT_SYMBOL(rdma_init_netdev);
2826 
2827 void __rdma_block_iter_start(struct ib_block_iter *biter,
2828                              struct scatterlist *sglist, unsigned int nents,
2829                              unsigned long pgsz)
2830 {
2831         memset(biter, 0, sizeof(struct ib_block_iter));
2832         biter->__sg = sglist;
2833         biter->__sg_nents = nents;
2834 
2835         /* Driver provides best block size to use */
2836         biter->__pg_bit = __fls(pgsz);
2837 }
2838 EXPORT_SYMBOL(__rdma_block_iter_start);
2839 
2840 bool __rdma_block_iter_next(struct ib_block_iter *biter)
2841 {
2842         unsigned int block_offset;
2843 
2844         if (!biter->__sg_nents || !biter->__sg)
2845                 return false;
2846 
2847         biter->__dma_addr = sg_dma_address(biter->__sg) + biter->__sg_advance;
2848         block_offset = biter->__dma_addr & (BIT_ULL(biter->__pg_bit) - 1);
2849         biter->__sg_advance += BIT_ULL(biter->__pg_bit) - block_offset;
2850 
2851         if (biter->__sg_advance >= sg_dma_len(biter->__sg)) {
2852                 biter->__sg_advance = 0;
2853                 biter->__sg = sg_next(biter->__sg);
2854                 biter->__sg_nents--;
2855         }
2856 
2857         return true;
2858 }
2859 EXPORT_SYMBOL(__rdma_block_iter_next);

/* [<][>][^][v][top][bottom][index][help] */