root/drivers/infiniband/hw/mlx4/main.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. init_query_mad
  2. check_flow_steering_support
  3. num_ib_ports
  4. mlx4_ib_get_netdev
  5. mlx4_ib_update_gids_v1
  6. mlx4_ib_update_gids_v1_v2
  7. mlx4_ib_update_gids
  8. free_gid_entry
  9. mlx4_ib_add_gid
  10. mlx4_ib_del_gid
  11. mlx4_ib_gid_index_to_real_index
  12. mlx4_ib_query_device
  13. mlx4_ib_port_link_layer
  14. ib_link_query_port
  15. state_to_phys_state
  16. eth_link_query_port
  17. __mlx4_ib_query_port
  18. mlx4_ib_query_port
  19. __mlx4_ib_query_gid
  20. mlx4_ib_query_gid
  21. mlx4_ib_query_sl2vl
  22. mlx4_init_sl2vl_tbl
  23. __mlx4_ib_query_pkey
  24. mlx4_ib_query_pkey
  25. mlx4_ib_modify_device
  26. mlx4_ib_SET_PORT
  27. mlx4_ib_modify_port
  28. mlx4_ib_alloc_ucontext
  29. mlx4_ib_dealloc_ucontext
  30. mlx4_ib_disassociate_ucontext
  31. mlx4_ib_mmap
  32. mlx4_ib_alloc_pd
  33. mlx4_ib_dealloc_pd
  34. mlx4_ib_alloc_xrcd
  35. mlx4_ib_dealloc_xrcd
  36. add_gid_entry
  37. mlx4_ib_delete_counters_table
  38. mlx4_ib_add_mc
  39. parse_flow_attr
  40. __mlx4_ib_default_rules_match
  41. __mlx4_ib_create_default_rules
  42. __mlx4_ib_create_flow
  43. __mlx4_ib_destroy_flow
  44. mlx4_ib_tunnel_steer_add
  45. mlx4_ib_add_dont_trap_rule
  46. mlx4_ib_create_flow
  47. mlx4_ib_destroy_flow
  48. mlx4_ib_mcg_attach
  49. find_gid_entry
  50. mlx4_ib_mcg_detach
  51. init_node_data
  52. hca_type_show
  53. hw_rev_show
  54. board_id_show
  55. mlx4_ib_alloc_hw_stats
  56. mlx4_ib_get_hw_stats
  57. __mlx4_ib_alloc_diag_counters
  58. mlx4_ib_fill_diag_counters
  59. mlx4_ib_alloc_diag_counters
  60. mlx4_ib_diag_cleanup
  61. mlx4_ib_update_qps
  62. mlx4_ib_scan_netdevs
  63. mlx4_ib_netdev_event
  64. init_pkeys
  65. mlx4_ib_alloc_eqs
  66. mlx4_ib_free_eqs
  67. mlx4_port_immutable
  68. get_fw_ver_str
  69. mlx4_ib_add
  70. mlx4_ib_steer_qp_alloc
  71. mlx4_ib_steer_qp_free
  72. mlx4_ib_steer_qp_reg
  73. mlx4_ib_remove
  74. do_slave_init
  75. mlx4_ib_handle_catas_error
  76. handle_bonded_port_state_event
  77. mlx4_ib_sl2vl_update
  78. ib_sl2vl_update_work
  79. mlx4_sched_ib_sl2vl_update_work
  80. mlx4_ib_event
  81. mlx4_ib_init
  82. mlx4_ib_cleanup

   1 /*
   2  * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
   3  * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
   4  *
   5  * This software is available to you under a choice of one of two
   6  * licenses.  You may choose to be licensed under the terms of the GNU
   7  * General Public License (GPL) Version 2, available from the file
   8  * COPYING in the main directory of this source tree, or the
   9  * OpenIB.org BSD license below:
  10  *
  11  *     Redistribution and use in source and binary forms, with or
  12  *     without modification, are permitted provided that the following
  13  *     conditions are met:
  14  *
  15  *      - Redistributions of source code must retain the above
  16  *        copyright notice, this list of conditions and the following
  17  *        disclaimer.
  18  *
  19  *      - Redistributions in binary form must reproduce the above
  20  *        copyright notice, this list of conditions and the following
  21  *        disclaimer in the documentation and/or other materials
  22  *        provided with the distribution.
  23  *
  24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  31  * SOFTWARE.
  32  */
  33 
  34 #include <linux/module.h>
  35 #include <linux/init.h>
  36 #include <linux/slab.h>
  37 #include <linux/errno.h>
  38 #include <linux/netdevice.h>
  39 #include <linux/inetdevice.h>
  40 #include <linux/rtnetlink.h>
  41 #include <linux/if_vlan.h>
  42 #include <linux/sched/mm.h>
  43 #include <linux/sched/task.h>
  44 
  45 #include <net/ipv6.h>
  46 #include <net/addrconf.h>
  47 #include <net/devlink.h>
  48 
  49 #include <rdma/ib_smi.h>
  50 #include <rdma/ib_user_verbs.h>
  51 #include <rdma/ib_addr.h>
  52 #include <rdma/ib_cache.h>
  53 
  54 #include <net/bonding.h>
  55 
  56 #include <linux/mlx4/driver.h>
  57 #include <linux/mlx4/cmd.h>
  58 #include <linux/mlx4/qp.h>
  59 
  60 #include "mlx4_ib.h"
  61 #include <rdma/mlx4-abi.h>
  62 
  63 #define DRV_NAME        MLX4_IB_DRV_NAME
  64 #define DRV_VERSION     "4.0-0"
  65 
  66 #define MLX4_IB_FLOW_MAX_PRIO 0xFFF
  67 #define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF
  68 #define MLX4_IB_CARD_REV_A0   0xA0
  69 
  70 MODULE_AUTHOR("Roland Dreier");
  71 MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
  72 MODULE_LICENSE("Dual BSD/GPL");
  73 
  74 int mlx4_ib_sm_guid_assign = 0;
  75 module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
  76 MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 0)");
  77 
  78 static const char mlx4_ib_version[] =
  79         DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
  80         DRV_VERSION "\n";
  81 
  82 static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
  83 static enum rdma_link_layer mlx4_ib_port_link_layer(struct ib_device *device,
  84                                                     u8 port_num);
  85 
  86 static struct workqueue_struct *wq;
  87 
  88 static void init_query_mad(struct ib_smp *mad)
  89 {
  90         mad->base_version  = 1;
  91         mad->mgmt_class    = IB_MGMT_CLASS_SUBN_LID_ROUTED;
  92         mad->class_version = 1;
  93         mad->method        = IB_MGMT_METHOD_GET;
  94 }
  95 
  96 static int check_flow_steering_support(struct mlx4_dev *dev)
  97 {
  98         int eth_num_ports = 0;
  99         int ib_num_ports = 0;
 100 
 101         int dmfs = dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED;
 102 
 103         if (dmfs) {
 104                 int i;
 105                 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
 106                         eth_num_ports++;
 107                 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
 108                         ib_num_ports++;
 109                 dmfs &= (!ib_num_ports ||
 110                          (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB)) &&
 111                         (!eth_num_ports ||
 112                          (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN));
 113                 if (ib_num_ports && mlx4_is_mfunc(dev)) {
 114                         pr_warn("Device managed flow steering is unavailable for IB port in multifunction env.\n");
 115                         dmfs = 0;
 116                 }
 117         }
 118         return dmfs;
 119 }
 120 
 121 static int num_ib_ports(struct mlx4_dev *dev)
 122 {
 123         int ib_ports = 0;
 124         int i;
 125 
 126         mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
 127                 ib_ports++;
 128 
 129         return ib_ports;
 130 }
 131 
 132 static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_num)
 133 {
 134         struct mlx4_ib_dev *ibdev = to_mdev(device);
 135         struct net_device *dev;
 136 
 137         rcu_read_lock();
 138         dev = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port_num);
 139 
 140         if (dev) {
 141                 if (mlx4_is_bonded(ibdev->dev)) {
 142                         struct net_device *upper = NULL;
 143 
 144                         upper = netdev_master_upper_dev_get_rcu(dev);
 145                         if (upper) {
 146                                 struct net_device *active;
 147 
 148                                 active = bond_option_active_slave_get_rcu(netdev_priv(upper));
 149                                 if (active)
 150                                         dev = active;
 151                         }
 152                 }
 153         }
 154         if (dev)
 155                 dev_hold(dev);
 156 
 157         rcu_read_unlock();
 158         return dev;
 159 }
 160 
 161 static int mlx4_ib_update_gids_v1(struct gid_entry *gids,
 162                                   struct mlx4_ib_dev *ibdev,
 163                                   u8 port_num)
 164 {
 165         struct mlx4_cmd_mailbox *mailbox;
 166         int err;
 167         struct mlx4_dev *dev = ibdev->dev;
 168         int i;
 169         union ib_gid *gid_tbl;
 170 
 171         mailbox = mlx4_alloc_cmd_mailbox(dev);
 172         if (IS_ERR(mailbox))
 173                 return -ENOMEM;
 174 
 175         gid_tbl = mailbox->buf;
 176 
 177         for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
 178                 memcpy(&gid_tbl[i], &gids[i].gid, sizeof(union ib_gid));
 179 
 180         err = mlx4_cmd(dev, mailbox->dma,
 181                        MLX4_SET_PORT_GID_TABLE << 8 | port_num,
 182                        1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
 183                        MLX4_CMD_WRAPPED);
 184         if (mlx4_is_bonded(dev))
 185                 err += mlx4_cmd(dev, mailbox->dma,
 186                                 MLX4_SET_PORT_GID_TABLE << 8 | 2,
 187                                 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
 188                                 MLX4_CMD_WRAPPED);
 189 
 190         mlx4_free_cmd_mailbox(dev, mailbox);
 191         return err;
 192 }
 193 
 194 static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
 195                                      struct mlx4_ib_dev *ibdev,
 196                                      u8 port_num)
 197 {
 198         struct mlx4_cmd_mailbox *mailbox;
 199         int err;
 200         struct mlx4_dev *dev = ibdev->dev;
 201         int i;
 202         struct {
 203                 union ib_gid    gid;
 204                 __be32          rsrvd1[2];
 205                 __be16          rsrvd2;
 206                 u8              type;
 207                 u8              version;
 208                 __be32          rsrvd3;
 209         } *gid_tbl;
 210 
 211         mailbox = mlx4_alloc_cmd_mailbox(dev);
 212         if (IS_ERR(mailbox))
 213                 return -ENOMEM;
 214 
 215         gid_tbl = mailbox->buf;
 216         for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
 217                 memcpy(&gid_tbl[i].gid, &gids[i].gid, sizeof(union ib_gid));
 218                 if (gids[i].gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
 219                         gid_tbl[i].version = 2;
 220                         if (!ipv6_addr_v4mapped((struct in6_addr *)&gids[i].gid))
 221                                 gid_tbl[i].type = 1;
 222                 }
 223         }
 224 
 225         err = mlx4_cmd(dev, mailbox->dma,
 226                        MLX4_SET_PORT_ROCE_ADDR << 8 | port_num,
 227                        1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
 228                        MLX4_CMD_WRAPPED);
 229         if (mlx4_is_bonded(dev))
 230                 err += mlx4_cmd(dev, mailbox->dma,
 231                                 MLX4_SET_PORT_ROCE_ADDR << 8 | 2,
 232                                 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
 233                                 MLX4_CMD_WRAPPED);
 234 
 235         mlx4_free_cmd_mailbox(dev, mailbox);
 236         return err;
 237 }
 238 
 239 static int mlx4_ib_update_gids(struct gid_entry *gids,
 240                                struct mlx4_ib_dev *ibdev,
 241                                u8 port_num)
 242 {
 243         if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
 244                 return mlx4_ib_update_gids_v1_v2(gids, ibdev, port_num);
 245 
 246         return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
 247 }
 248 
 249 static void free_gid_entry(struct gid_entry *entry)
 250 {
 251         memset(&entry->gid, 0, sizeof(entry->gid));
 252         kfree(entry->ctx);
 253         entry->ctx = NULL;
 254 }
 255 
 256 static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
 257 {
 258         struct mlx4_ib_dev *ibdev = to_mdev(attr->device);
 259         struct mlx4_ib_iboe *iboe = &ibdev->iboe;
 260         struct mlx4_port_gid_table   *port_gid_table;
 261         int free = -1, found = -1;
 262         int ret = 0;
 263         int hw_update = 0;
 264         int i;
 265         struct gid_entry *gids = NULL;
 266 
 267         if (!rdma_cap_roce_gid_table(attr->device, attr->port_num))
 268                 return -EINVAL;
 269 
 270         if (attr->port_num > MLX4_MAX_PORTS)
 271                 return -EINVAL;
 272 
 273         if (!context)
 274                 return -EINVAL;
 275 
 276         port_gid_table = &iboe->gids[attr->port_num - 1];
 277         spin_lock_bh(&iboe->lock);
 278         for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
 279                 if (!memcmp(&port_gid_table->gids[i].gid,
 280                             &attr->gid, sizeof(attr->gid)) &&
 281                     port_gid_table->gids[i].gid_type == attr->gid_type)  {
 282                         found = i;
 283                         break;
 284                 }
 285                 if (free < 0 && rdma_is_zero_gid(&port_gid_table->gids[i].gid))
 286                         free = i; /* HW has space */
 287         }
 288 
 289         if (found < 0) {
 290                 if (free < 0) {
 291                         ret = -ENOSPC;
 292                 } else {
 293                         port_gid_table->gids[free].ctx = kmalloc(sizeof(*port_gid_table->gids[free].ctx), GFP_ATOMIC);
 294                         if (!port_gid_table->gids[free].ctx) {
 295                                 ret = -ENOMEM;
 296                         } else {
 297                                 *context = port_gid_table->gids[free].ctx;
 298                                 memcpy(&port_gid_table->gids[free].gid,
 299                                        &attr->gid, sizeof(attr->gid));
 300                                 port_gid_table->gids[free].gid_type = attr->gid_type;
 301                                 port_gid_table->gids[free].ctx->real_index = free;
 302                                 port_gid_table->gids[free].ctx->refcount = 1;
 303                                 hw_update = 1;
 304                         }
 305                 }
 306         } else {
 307                 struct gid_cache_context *ctx = port_gid_table->gids[found].ctx;
 308                 *context = ctx;
 309                 ctx->refcount++;
 310         }
 311         if (!ret && hw_update) {
 312                 gids = kmalloc_array(MLX4_MAX_PORT_GIDS, sizeof(*gids),
 313                                      GFP_ATOMIC);
 314                 if (!gids) {
 315                         ret = -ENOMEM;
 316                         *context = NULL;
 317                         free_gid_entry(&port_gid_table->gids[free]);
 318                 } else {
 319                         for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
 320                                 memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
 321                                 gids[i].gid_type = port_gid_table->gids[i].gid_type;
 322                         }
 323                 }
 324         }
 325         spin_unlock_bh(&iboe->lock);
 326 
 327         if (!ret && hw_update) {
 328                 ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num);
 329                 if (ret) {
 330                         spin_lock_bh(&iboe->lock);
 331                         *context = NULL;
 332                         free_gid_entry(&port_gid_table->gids[free]);
 333                         spin_unlock_bh(&iboe->lock);
 334                 }
 335                 kfree(gids);
 336         }
 337 
 338         return ret;
 339 }
 340 
 341 static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context)
 342 {
 343         struct gid_cache_context *ctx = *context;
 344         struct mlx4_ib_dev *ibdev = to_mdev(attr->device);
 345         struct mlx4_ib_iboe *iboe = &ibdev->iboe;
 346         struct mlx4_port_gid_table   *port_gid_table;
 347         int ret = 0;
 348         int hw_update = 0;
 349         struct gid_entry *gids = NULL;
 350 
 351         if (!rdma_cap_roce_gid_table(attr->device, attr->port_num))
 352                 return -EINVAL;
 353 
 354         if (attr->port_num > MLX4_MAX_PORTS)
 355                 return -EINVAL;
 356 
 357         port_gid_table = &iboe->gids[attr->port_num - 1];
 358         spin_lock_bh(&iboe->lock);
 359         if (ctx) {
 360                 ctx->refcount--;
 361                 if (!ctx->refcount) {
 362                         unsigned int real_index = ctx->real_index;
 363 
 364                         free_gid_entry(&port_gid_table->gids[real_index]);
 365                         hw_update = 1;
 366                 }
 367         }
 368         if (!ret && hw_update) {
 369                 int i;
 370 
 371                 gids = kmalloc_array(MLX4_MAX_PORT_GIDS, sizeof(*gids),
 372                                      GFP_ATOMIC);
 373                 if (!gids) {
 374                         ret = -ENOMEM;
 375                 } else {
 376                         for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
 377                                 memcpy(&gids[i].gid,
 378                                        &port_gid_table->gids[i].gid,
 379                                        sizeof(union ib_gid));
 380                                 gids[i].gid_type =
 381                                     port_gid_table->gids[i].gid_type;
 382                         }
 383                 }
 384         }
 385         spin_unlock_bh(&iboe->lock);
 386 
 387         if (!ret && hw_update) {
 388                 ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num);
 389                 kfree(gids);
 390         }
 391         return ret;
 392 }
 393 
 394 int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
 395                                     const struct ib_gid_attr *attr)
 396 {
 397         struct mlx4_ib_iboe *iboe = &ibdev->iboe;
 398         struct gid_cache_context *ctx = NULL;
 399         struct mlx4_port_gid_table   *port_gid_table;
 400         int real_index = -EINVAL;
 401         int i;
 402         unsigned long flags;
 403         u8 port_num = attr->port_num;
 404 
 405         if (port_num > MLX4_MAX_PORTS)
 406                 return -EINVAL;
 407 
 408         if (mlx4_is_bonded(ibdev->dev))
 409                 port_num = 1;
 410 
 411         if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
 412                 return attr->index;
 413 
 414         spin_lock_irqsave(&iboe->lock, flags);
 415         port_gid_table = &iboe->gids[port_num - 1];
 416 
 417         for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
 418                 if (!memcmp(&port_gid_table->gids[i].gid,
 419                             &attr->gid, sizeof(attr->gid)) &&
 420                     attr->gid_type == port_gid_table->gids[i].gid_type) {
 421                         ctx = port_gid_table->gids[i].ctx;
 422                         break;
 423                 }
 424         if (ctx)
 425                 real_index = ctx->real_index;
 426         spin_unlock_irqrestore(&iboe->lock, flags);
 427         return real_index;
 428 }
 429 
 430 #define field_avail(type, fld, sz) (offsetof(type, fld) + \
 431                                     sizeof(((type *)0)->fld) <= (sz))
 432 
 433 static int mlx4_ib_query_device(struct ib_device *ibdev,
 434                                 struct ib_device_attr *props,
 435                                 struct ib_udata *uhw)
 436 {
 437         struct mlx4_ib_dev *dev = to_mdev(ibdev);
 438         struct ib_smp *in_mad  = NULL;
 439         struct ib_smp *out_mad = NULL;
 440         int err;
 441         int have_ib_ports;
 442         struct mlx4_uverbs_ex_query_device cmd;
 443         struct mlx4_uverbs_ex_query_device_resp resp = {.comp_mask = 0};
 444         struct mlx4_clock_params clock_params;
 445 
 446         if (uhw->inlen) {
 447                 if (uhw->inlen < sizeof(cmd))
 448                         return -EINVAL;
 449 
 450                 err = ib_copy_from_udata(&cmd, uhw, sizeof(cmd));
 451                 if (err)
 452                         return err;
 453 
 454                 if (cmd.comp_mask)
 455                         return -EINVAL;
 456 
 457                 if (cmd.reserved)
 458                         return -EINVAL;
 459         }
 460 
 461         resp.response_length = offsetof(typeof(resp), response_length) +
 462                 sizeof(resp.response_length);
 463         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
 464         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
 465         err = -ENOMEM;
 466         if (!in_mad || !out_mad)
 467                 goto out;
 468 
 469         init_query_mad(in_mad);
 470         in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
 471 
 472         err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS,
 473                            1, NULL, NULL, in_mad, out_mad);
 474         if (err)
 475                 goto out;
 476 
 477         memset(props, 0, sizeof *props);
 478 
 479         have_ib_ports = num_ib_ports(dev->dev);
 480 
 481         props->fw_ver = dev->dev->caps.fw_ver;
 482         props->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT |
 483                 IB_DEVICE_PORT_ACTIVE_EVENT             |
 484                 IB_DEVICE_SYS_IMAGE_GUID                |
 485                 IB_DEVICE_RC_RNR_NAK_GEN                |
 486                 IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
 487         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
 488                 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
 489         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
 490                 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
 491         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM && have_ib_ports)
 492                 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
 493         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT)
 494                 props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
 495         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
 496                 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
 497         if (dev->dev->caps.max_gso_sz &&
 498             (dev->dev->rev_id != MLX4_IB_CARD_REV_A0) &&
 499             (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH))
 500                 props->device_cap_flags |= IB_DEVICE_UD_TSO;
 501         if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
 502                 props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
 503         if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) &&
 504             (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
 505             (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
 506                 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
 507         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)
 508                 props->device_cap_flags |= IB_DEVICE_XRC;
 509         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW)
 510                 props->device_cap_flags |= IB_DEVICE_MEM_WINDOW;
 511         if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
 512                 if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_WIN_TYPE_2B)
 513                         props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
 514                 else
 515                         props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
 516         }
 517         if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
 518                 props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
 519 
 520         props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
 521 
 522         props->vendor_id           = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
 523                 0xffffff;
 524         props->vendor_part_id      = dev->dev->persist->pdev->device;
 525         props->hw_ver              = be32_to_cpup((__be32 *) (out_mad->data + 32));
 526         memcpy(&props->sys_image_guid, out_mad->data +  4, 8);
 527 
 528         props->max_mr_size         = ~0ull;
 529         props->page_size_cap       = dev->dev->caps.page_size_cap;
 530         props->max_qp              = dev->dev->quotas.qp;
 531         props->max_qp_wr           = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
 532         props->max_send_sge =
 533                 min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg);
 534         props->max_recv_sge =
 535                 min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg);
 536         props->max_sge_rd = MLX4_MAX_SGE_RD;
 537         props->max_cq              = dev->dev->quotas.cq;
 538         props->max_cqe             = dev->dev->caps.max_cqes;
 539         props->max_mr              = dev->dev->quotas.mpt;
 540         props->max_pd              = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds;
 541         props->max_qp_rd_atom      = dev->dev->caps.max_qp_dest_rdma;
 542         props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
 543         props->max_res_rd_atom     = props->max_qp_rd_atom * props->max_qp;
 544         props->max_srq             = dev->dev->quotas.srq;
 545         props->max_srq_wr          = dev->dev->caps.max_srq_wqes - 1;
 546         props->max_srq_sge         = dev->dev->caps.max_srq_sge;
 547         props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES;
 548         props->local_ca_ack_delay  = dev->dev->caps.local_ca_ack_delay;
 549         props->atomic_cap          = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
 550                 IB_ATOMIC_HCA : IB_ATOMIC_NONE;
 551         props->masked_atomic_cap   = props->atomic_cap;
 552         props->max_pkeys           = dev->dev->caps.pkey_table_len[1];
 553         props->max_mcast_grp       = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
 554         props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
 555         props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
 556                                            props->max_mcast_grp;
 557         props->max_map_per_fmr = dev->dev->caps.max_fmr_maps;
 558         props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL;
 559         props->timestamp_mask = 0xFFFFFFFFFFFFULL;
 560         props->max_ah = INT_MAX;
 561 
 562         if (mlx4_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET ||
 563             mlx4_ib_port_link_layer(ibdev, 2) == IB_LINK_LAYER_ETHERNET) {
 564                 if (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) {
 565                         props->rss_caps.max_rwq_indirection_tables =
 566                                 props->max_qp;
 567                         props->rss_caps.max_rwq_indirection_table_size =
 568                                 dev->dev->caps.max_rss_tbl_sz;
 569                         props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET;
 570                         props->max_wq_type_rq = props->max_qp;
 571                 }
 572 
 573                 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)
 574                         props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
 575         }
 576 
 577         props->cq_caps.max_cq_moderation_count = MLX4_MAX_CQ_COUNT;
 578         props->cq_caps.max_cq_moderation_period = MLX4_MAX_CQ_PERIOD;
 579 
 580         if (!mlx4_is_slave(dev->dev))
 581                 err = mlx4_get_internal_clock_params(dev->dev, &clock_params);
 582 
 583         if (uhw->outlen >= resp.response_length + sizeof(resp.hca_core_clock_offset)) {
 584                 resp.response_length += sizeof(resp.hca_core_clock_offset);
 585                 if (!err && !mlx4_is_slave(dev->dev)) {
 586                         resp.comp_mask |= MLX4_IB_QUERY_DEV_RESP_MASK_CORE_CLOCK_OFFSET;
 587                         resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE;
 588                 }
 589         }
 590 
 591         if (uhw->outlen >= resp.response_length +
 592             sizeof(resp.max_inl_recv_sz)) {
 593                 resp.response_length += sizeof(resp.max_inl_recv_sz);
 594                 resp.max_inl_recv_sz  = dev->dev->caps.max_rq_sg *
 595                         sizeof(struct mlx4_wqe_data_seg);
 596         }
 597 
 598         if (field_avail(typeof(resp), rss_caps, uhw->outlen)) {
 599                 if (props->rss_caps.supported_qpts) {
 600                         resp.rss_caps.rx_hash_function =
 601                                 MLX4_IB_RX_HASH_FUNC_TOEPLITZ;
 602 
 603                         resp.rss_caps.rx_hash_fields_mask =
 604                                 MLX4_IB_RX_HASH_SRC_IPV4 |
 605                                 MLX4_IB_RX_HASH_DST_IPV4 |
 606                                 MLX4_IB_RX_HASH_SRC_IPV6 |
 607                                 MLX4_IB_RX_HASH_DST_IPV6 |
 608                                 MLX4_IB_RX_HASH_SRC_PORT_TCP |
 609                                 MLX4_IB_RX_HASH_DST_PORT_TCP |
 610                                 MLX4_IB_RX_HASH_SRC_PORT_UDP |
 611                                 MLX4_IB_RX_HASH_DST_PORT_UDP;
 612 
 613                         if (dev->dev->caps.tunnel_offload_mode ==
 614                             MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
 615                                 resp.rss_caps.rx_hash_fields_mask |=
 616                                         MLX4_IB_RX_HASH_INNER;
 617                 }
 618                 resp.response_length = offsetof(typeof(resp), rss_caps) +
 619                                        sizeof(resp.rss_caps);
 620         }
 621 
 622         if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
 623                 if (dev->dev->caps.max_gso_sz &&
 624                     ((mlx4_ib_port_link_layer(ibdev, 1) ==
 625                     IB_LINK_LAYER_ETHERNET) ||
 626                     (mlx4_ib_port_link_layer(ibdev, 2) ==
 627                     IB_LINK_LAYER_ETHERNET))) {
 628                         resp.tso_caps.max_tso = dev->dev->caps.max_gso_sz;
 629                         resp.tso_caps.supported_qpts |=
 630                                 1 << IB_QPT_RAW_PACKET;
 631                 }
 632                 resp.response_length = offsetof(typeof(resp), tso_caps) +
 633                                        sizeof(resp.tso_caps);
 634         }
 635 
 636         if (uhw->outlen) {
 637                 err = ib_copy_to_udata(uhw, &resp, resp.response_length);
 638                 if (err)
 639                         goto out;
 640         }
 641 out:
 642         kfree(in_mad);
 643         kfree(out_mad);
 644 
 645         return err;
 646 }
 647 
 648 static enum rdma_link_layer
 649 mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
 650 {
 651         struct mlx4_dev *dev = to_mdev(device)->dev;
 652 
 653         return dev->caps.port_mask[port_num] == MLX4_PORT_TYPE_IB ?
 654                 IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
 655 }
 656 
 657 static int ib_link_query_port(struct ib_device *ibdev, u8 port,
 658                               struct ib_port_attr *props, int netw_view)
 659 {
 660         struct ib_smp *in_mad  = NULL;
 661         struct ib_smp *out_mad = NULL;
 662         int ext_active_speed;
 663         int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
 664         int err = -ENOMEM;
 665 
 666         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
 667         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
 668         if (!in_mad || !out_mad)
 669                 goto out;
 670 
 671         init_query_mad(in_mad);
 672         in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
 673         in_mad->attr_mod = cpu_to_be32(port);
 674 
 675         if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
 676                 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
 677 
 678         err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
 679                                 in_mad, out_mad);
 680         if (err)
 681                 goto out;
 682 
 683 
 684         props->lid              = be16_to_cpup((__be16 *) (out_mad->data + 16));
 685         props->lmc              = out_mad->data[34] & 0x7;
 686         props->sm_lid           = be16_to_cpup((__be16 *) (out_mad->data + 18));
 687         props->sm_sl            = out_mad->data[36] & 0xf;
 688         props->state            = out_mad->data[32] & 0xf;
 689         props->phys_state       = out_mad->data[33] >> 4;
 690         props->port_cap_flags   = be32_to_cpup((__be32 *) (out_mad->data + 20));
 691         if (netw_view)
 692                 props->gid_tbl_len = out_mad->data[50];
 693         else
 694                 props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
 695         props->max_msg_sz       = to_mdev(ibdev)->dev->caps.max_msg_sz;
 696         props->pkey_tbl_len     = to_mdev(ibdev)->dev->caps.pkey_table_len[port];
 697         props->bad_pkey_cntr    = be16_to_cpup((__be16 *) (out_mad->data + 46));
 698         props->qkey_viol_cntr   = be16_to_cpup((__be16 *) (out_mad->data + 48));
 699         props->active_width     = out_mad->data[31] & 0xf;
 700         props->active_speed     = out_mad->data[35] >> 4;
 701         props->max_mtu          = out_mad->data[41] & 0xf;
 702         props->active_mtu       = out_mad->data[36] >> 4;
 703         props->subnet_timeout   = out_mad->data[51] & 0x1f;
 704         props->max_vl_num       = out_mad->data[37] >> 4;
 705         props->init_type_reply  = out_mad->data[41] >> 4;
 706 
 707         /* Check if extended speeds (EDR/FDR/...) are supported */
 708         if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
 709                 ext_active_speed = out_mad->data[62] >> 4;
 710 
 711                 switch (ext_active_speed) {
 712                 case 1:
 713                         props->active_speed = IB_SPEED_FDR;
 714                         break;
 715                 case 2:
 716                         props->active_speed = IB_SPEED_EDR;
 717                         break;
 718                 }
 719         }
 720 
 721         /* If reported active speed is QDR, check if is FDR-10 */
 722         if (props->active_speed == IB_SPEED_QDR) {
 723                 init_query_mad(in_mad);
 724                 in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
 725                 in_mad->attr_mod = cpu_to_be32(port);
 726 
 727                 err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port,
 728                                    NULL, NULL, in_mad, out_mad);
 729                 if (err)
 730                         goto out;
 731 
 732                 /* Checking LinkSpeedActive for FDR-10 */
 733                 if (out_mad->data[15] & 0x1)
 734                         props->active_speed = IB_SPEED_FDR10;
 735         }
 736 
 737         /* Avoid wrong speed value returned by FW if the IB link is down. */
 738         if (props->state == IB_PORT_DOWN)
 739                  props->active_speed = IB_SPEED_SDR;
 740 
 741 out:
 742         kfree(in_mad);
 743         kfree(out_mad);
 744         return err;
 745 }
 746 
 747 static u8 state_to_phys_state(enum ib_port_state state)
 748 {
 749         return state == IB_PORT_ACTIVE ?
 750                 IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
 751 }
 752 
 753 static int eth_link_query_port(struct ib_device *ibdev, u8 port,
 754                                struct ib_port_attr *props)
 755 {
 756 
 757         struct mlx4_ib_dev *mdev = to_mdev(ibdev);
 758         struct mlx4_ib_iboe *iboe = &mdev->iboe;
 759         struct net_device *ndev;
 760         enum ib_mtu tmp;
 761         struct mlx4_cmd_mailbox *mailbox;
 762         int err = 0;
 763         int is_bonded = mlx4_is_bonded(mdev->dev);
 764 
 765         mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
 766         if (IS_ERR(mailbox))
 767                 return PTR_ERR(mailbox);
 768 
 769         err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0,
 770                            MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
 771                            MLX4_CMD_WRAPPED);
 772         if (err)
 773                 goto out;
 774 
 775         props->active_width     =  (((u8 *)mailbox->buf)[5] == 0x40) ||
 776                                    (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ?
 777                                            IB_WIDTH_4X : IB_WIDTH_1X;
 778         props->active_speed     =  (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ?
 779                                            IB_SPEED_FDR : IB_SPEED_QDR;
 780         props->port_cap_flags   = IB_PORT_CM_SUP;
 781         props->ip_gids = true;
 782         props->gid_tbl_len      = mdev->dev->caps.gid_table_len[port];
 783         props->max_msg_sz       = mdev->dev->caps.max_msg_sz;
 784         props->pkey_tbl_len     = 1;
 785         props->max_mtu          = IB_MTU_4096;
 786         props->max_vl_num       = 2;
 787         props->state            = IB_PORT_DOWN;
 788         props->phys_state       = state_to_phys_state(props->state);
 789         props->active_mtu       = IB_MTU_256;
 790         spin_lock_bh(&iboe->lock);
 791         ndev = iboe->netdevs[port - 1];
 792         if (ndev && is_bonded) {
 793                 rcu_read_lock(); /* required to get upper dev */
 794                 ndev = netdev_master_upper_dev_get_rcu(ndev);
 795                 rcu_read_unlock();
 796         }
 797         if (!ndev)
 798                 goto out_unlock;
 799 
 800         tmp = iboe_get_mtu(ndev->mtu);
 801         props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256;
 802 
 803         props->state            = (netif_running(ndev) && netif_carrier_ok(ndev)) ?
 804                                         IB_PORT_ACTIVE : IB_PORT_DOWN;
 805         props->phys_state       = state_to_phys_state(props->state);
 806 out_unlock:
 807         spin_unlock_bh(&iboe->lock);
 808 out:
 809         mlx4_free_cmd_mailbox(mdev->dev, mailbox);
 810         return err;
 811 }
 812 
 813 int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
 814                          struct ib_port_attr *props, int netw_view)
 815 {
 816         int err;
 817 
 818         /* props being zeroed by the caller, avoid zeroing it here */
 819 
 820         err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
 821                 ib_link_query_port(ibdev, port, props, netw_view) :
 822                                 eth_link_query_port(ibdev, port, props);
 823 
 824         return err;
 825 }
 826 
 827 static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
 828                               struct ib_port_attr *props)
 829 {
 830         /* returns host view */
 831         return __mlx4_ib_query_port(ibdev, port, props, 0);
 832 }
 833 
 834 int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
 835                         union ib_gid *gid, int netw_view)
 836 {
 837         struct ib_smp *in_mad  = NULL;
 838         struct ib_smp *out_mad = NULL;
 839         int err = -ENOMEM;
 840         struct mlx4_ib_dev *dev = to_mdev(ibdev);
 841         int clear = 0;
 842         int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
 843 
 844         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
 845         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
 846         if (!in_mad || !out_mad)
 847                 goto out;
 848 
 849         init_query_mad(in_mad);
 850         in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
 851         in_mad->attr_mod = cpu_to_be32(port);
 852 
 853         if (mlx4_is_mfunc(dev->dev) && netw_view)
 854                 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
 855 
 856         err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad);
 857         if (err)
 858                 goto out;
 859 
 860         memcpy(gid->raw, out_mad->data + 8, 8);
 861 
 862         if (mlx4_is_mfunc(dev->dev) && !netw_view) {
 863                 if (index) {
 864                         /* For any index > 0, return the null guid */
 865                         err = 0;
 866                         clear = 1;
 867                         goto out;
 868                 }
 869         }
 870 
 871         init_query_mad(in_mad);
 872         in_mad->attr_id  = IB_SMP_ATTR_GUID_INFO;
 873         in_mad->attr_mod = cpu_to_be32(index / 8);
 874 
 875         err = mlx4_MAD_IFC(dev, mad_ifc_flags, port,
 876                            NULL, NULL, in_mad, out_mad);
 877         if (err)
 878                 goto out;
 879 
 880         memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
 881 
 882 out:
 883         if (clear)
 884                 memset(gid->raw + 8, 0, 8);
 885         kfree(in_mad);
 886         kfree(out_mad);
 887         return err;
 888 }
 889 
 890 static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
 891                              union ib_gid *gid)
 892 {
 893         if (rdma_protocol_ib(ibdev, port))
 894                 return __mlx4_ib_query_gid(ibdev, port, index, gid, 0);
 895         return 0;
 896 }
 897 
 898 static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u8 port, u64 *sl2vl_tbl)
 899 {
 900         union sl2vl_tbl_to_u64 sl2vl64;
 901         struct ib_smp *in_mad  = NULL;
 902         struct ib_smp *out_mad = NULL;
 903         int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
 904         int err = -ENOMEM;
 905         int jj;
 906 
 907         if (mlx4_is_slave(to_mdev(ibdev)->dev)) {
 908                 *sl2vl_tbl = 0;
 909                 return 0;
 910         }
 911 
 912         in_mad  = kzalloc(sizeof(*in_mad), GFP_KERNEL);
 913         out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
 914         if (!in_mad || !out_mad)
 915                 goto out;
 916 
 917         init_query_mad(in_mad);
 918         in_mad->attr_id  = IB_SMP_ATTR_SL_TO_VL_TABLE;
 919         in_mad->attr_mod = 0;
 920 
 921         if (mlx4_is_mfunc(to_mdev(ibdev)->dev))
 922                 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
 923 
 924         err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
 925                            in_mad, out_mad);
 926         if (err)
 927                 goto out;
 928 
 929         for (jj = 0; jj < 8; jj++)
 930                 sl2vl64.sl8[jj] = ((struct ib_smp *)out_mad)->data[jj];
 931         *sl2vl_tbl = sl2vl64.sl64;
 932 
 933 out:
 934         kfree(in_mad);
 935         kfree(out_mad);
 936         return err;
 937 }
 938 
 939 static void mlx4_init_sl2vl_tbl(struct mlx4_ib_dev *mdev)
 940 {
 941         u64 sl2vl;
 942         int i;
 943         int err;
 944 
 945         for (i = 1; i <= mdev->dev->caps.num_ports; i++) {
 946                 if (mdev->dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
 947                         continue;
 948                 err = mlx4_ib_query_sl2vl(&mdev->ib_dev, i, &sl2vl);
 949                 if (err) {
 950                         pr_err("Unable to get default sl to vl mapping for port %d.  Using all zeroes (%d)\n",
 951                                i, err);
 952                         sl2vl = 0;
 953                 }
 954                 atomic64_set(&mdev->sl2vl[i - 1], sl2vl);
 955         }
 956 }
 957 
 958 int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
 959                          u16 *pkey, int netw_view)
 960 {
 961         struct ib_smp *in_mad  = NULL;
 962         struct ib_smp *out_mad = NULL;
 963         int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
 964         int err = -ENOMEM;
 965 
 966         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
 967         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
 968         if (!in_mad || !out_mad)
 969                 goto out;
 970 
 971         init_query_mad(in_mad);
 972         in_mad->attr_id  = IB_SMP_ATTR_PKEY_TABLE;
 973         in_mad->attr_mod = cpu_to_be32(index / 32);
 974 
 975         if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
 976                 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
 977 
 978         err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
 979                            in_mad, out_mad);
 980         if (err)
 981                 goto out;
 982 
 983         *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
 984 
 985 out:
 986         kfree(in_mad);
 987         kfree(out_mad);
 988         return err;
 989 }
 990 
 991 static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
 992 {
 993         return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0);
 994 }
 995 
 996 static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
 997                                  struct ib_device_modify *props)
 998 {
 999         struct mlx4_cmd_mailbox *mailbox;
1000         unsigned long flags;
1001 
1002         if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
1003                 return -EOPNOTSUPP;
1004 
1005         if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
1006                 return 0;
1007 
1008         if (mlx4_is_slave(to_mdev(ibdev)->dev))
1009                 return -EOPNOTSUPP;
1010 
1011         spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags);
1012         memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
1013         spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags);
1014 
1015         /*
1016          * If possible, pass node desc to FW, so it can generate
1017          * a 144 trap.  If cmd fails, just ignore.
1018          */
1019         mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev);
1020         if (IS_ERR(mailbox))
1021                 return 0;
1022 
1023         memcpy(mailbox->buf, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
1024         mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
1025                  MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
1026 
1027         mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox);
1028 
1029         return 0;
1030 }
1031 
1032 static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
1033                             u32 cap_mask)
1034 {
1035         struct mlx4_cmd_mailbox *mailbox;
1036         int err;
1037 
1038         mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
1039         if (IS_ERR(mailbox))
1040                 return PTR_ERR(mailbox);
1041 
1042         if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
1043                 *(u8 *) mailbox->buf         = !!reset_qkey_viols << 6;
1044                 ((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask);
1045         } else {
1046                 ((u8 *) mailbox->buf)[3]     = !!reset_qkey_viols;
1047                 ((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask);
1048         }
1049 
1050         err = mlx4_cmd(dev->dev, mailbox->dma, port, MLX4_SET_PORT_IB_OPCODE,
1051                        MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
1052                        MLX4_CMD_WRAPPED);
1053 
1054         mlx4_free_cmd_mailbox(dev->dev, mailbox);
1055         return err;
1056 }
1057 
1058 static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
1059                                struct ib_port_modify *props)
1060 {
1061         struct mlx4_ib_dev *mdev = to_mdev(ibdev);
1062         u8 is_eth = mdev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
1063         struct ib_port_attr attr;
1064         u32 cap_mask;
1065         int err;
1066 
1067         /* return OK if this is RoCE. CM calls ib_modify_port() regardless
1068          * of whether port link layer is ETH or IB. For ETH ports, qkey
1069          * violations and port capabilities are not meaningful.
1070          */
1071         if (is_eth)
1072                 return 0;
1073 
1074         mutex_lock(&mdev->cap_mask_mutex);
1075 
1076         err = ib_query_port(ibdev, port, &attr);
1077         if (err)
1078                 goto out;
1079 
1080         cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
1081                 ~props->clr_port_cap_mask;
1082 
1083         err = mlx4_ib_SET_PORT(mdev, port,
1084                                !!(mask & IB_PORT_RESET_QKEY_CNTR),
1085                                cap_mask);
1086 
1087 out:
1088         mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
1089         return err;
1090 }
1091 
1092 static int mlx4_ib_alloc_ucontext(struct ib_ucontext *uctx,
1093                                   struct ib_udata *udata)
1094 {
1095         struct ib_device *ibdev = uctx->device;
1096         struct mlx4_ib_dev *dev = to_mdev(ibdev);
1097         struct mlx4_ib_ucontext *context = to_mucontext(uctx);
1098         struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
1099         struct mlx4_ib_alloc_ucontext_resp resp;
1100         int err;
1101 
1102         if (!dev->ib_active)
1103                 return -EAGAIN;
1104 
1105         if (ibdev->ops.uverbs_abi_ver ==
1106             MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
1107                 resp_v3.qp_tab_size      = dev->dev->caps.num_qps;
1108                 resp_v3.bf_reg_size      = dev->dev->caps.bf_reg_size;
1109                 resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
1110         } else {
1111                 resp.dev_caps         = dev->dev->caps.userspace_caps;
1112                 resp.qp_tab_size      = dev->dev->caps.num_qps;
1113                 resp.bf_reg_size      = dev->dev->caps.bf_reg_size;
1114                 resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
1115                 resp.cqe_size         = dev->dev->caps.cqe_size;
1116         }
1117 
1118         err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar);
1119         if (err)
1120                 return err;
1121 
1122         INIT_LIST_HEAD(&context->db_page_list);
1123         mutex_init(&context->db_page_mutex);
1124 
1125         INIT_LIST_HEAD(&context->wqn_ranges_list);
1126         mutex_init(&context->wqn_ranges_mutex);
1127 
1128         if (ibdev->ops.uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
1129                 err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
1130         else
1131                 err = ib_copy_to_udata(udata, &resp, sizeof(resp));
1132 
1133         if (err) {
1134                 mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
1135                 return -EFAULT;
1136         }
1137 
1138         return err;
1139 }
1140 
1141 static void mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
1142 {
1143         struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
1144 
1145         mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar);
1146 }
1147 
1148 static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
1149 {
1150 }
1151 
1152 static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
1153 {
1154         struct mlx4_ib_dev *dev = to_mdev(context->device);
1155 
1156         switch (vma->vm_pgoff) {
1157         case 0:
1158                 return rdma_user_mmap_io(context, vma,
1159                                          to_mucontext(context)->uar.pfn,
1160                                          PAGE_SIZE,
1161                                          pgprot_noncached(vma->vm_page_prot));
1162 
1163         case 1:
1164                 if (dev->dev->caps.bf_reg_size == 0)
1165                         return -EINVAL;
1166                 return rdma_user_mmap_io(
1167                         context, vma,
1168                         to_mucontext(context)->uar.pfn +
1169                                 dev->dev->caps.num_uars,
1170                         PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot));
1171 
1172         case 3: {
1173                 struct mlx4_clock_params params;
1174                 int ret;
1175 
1176                 ret = mlx4_get_internal_clock_params(dev->dev, &params);
1177                 if (ret)
1178                         return ret;
1179 
1180                 return rdma_user_mmap_io(
1181                         context, vma,
1182                         (pci_resource_start(dev->dev->persist->pdev,
1183                                             params.bar) +
1184                          params.offset) >>
1185                                 PAGE_SHIFT,
1186                         PAGE_SIZE, pgprot_noncached(vma->vm_page_prot));
1187         }
1188 
1189         default:
1190                 return -EINVAL;
1191         }
1192 }
1193 
1194 static int mlx4_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
1195 {
1196         struct mlx4_ib_pd *pd = to_mpd(ibpd);
1197         struct ib_device *ibdev = ibpd->device;
1198         int err;
1199 
1200         err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn);
1201         if (err)
1202                 return err;
1203 
1204         if (udata && ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) {
1205                 mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
1206                 return -EFAULT;
1207         }
1208         return 0;
1209 }
1210 
1211 static void mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
1212 {
1213         mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
1214 }
1215 
1216 static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
1217                                           struct ib_udata *udata)
1218 {
1219         struct mlx4_ib_xrcd *xrcd;
1220         struct ib_cq_init_attr cq_attr = {};
1221         int err;
1222 
1223         if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
1224                 return ERR_PTR(-ENOSYS);
1225 
1226         xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL);
1227         if (!xrcd)
1228                 return ERR_PTR(-ENOMEM);
1229 
1230         err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn);
1231         if (err)
1232                 goto err1;
1233 
1234         xrcd->pd = ib_alloc_pd(ibdev, 0);
1235         if (IS_ERR(xrcd->pd)) {
1236                 err = PTR_ERR(xrcd->pd);
1237                 goto err2;
1238         }
1239 
1240         cq_attr.cqe = 1;
1241         xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, &cq_attr);
1242         if (IS_ERR(xrcd->cq)) {
1243                 err = PTR_ERR(xrcd->cq);
1244                 goto err3;
1245         }
1246 
1247         return &xrcd->ibxrcd;
1248 
1249 err3:
1250         ib_dealloc_pd(xrcd->pd);
1251 err2:
1252         mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn);
1253 err1:
1254         kfree(xrcd);
1255         return ERR_PTR(err);
1256 }
1257 
1258 static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
1259 {
1260         ib_destroy_cq(to_mxrcd(xrcd)->cq);
1261         ib_dealloc_pd(to_mxrcd(xrcd)->pd);
1262         mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
1263         kfree(xrcd);
1264 
1265         return 0;
1266 }
1267 
1268 static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
1269 {
1270         struct mlx4_ib_qp *mqp = to_mqp(ibqp);
1271         struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
1272         struct mlx4_ib_gid_entry *ge;
1273 
1274         ge = kzalloc(sizeof *ge, GFP_KERNEL);
1275         if (!ge)
1276                 return -ENOMEM;
1277 
1278         ge->gid = *gid;
1279         if (mlx4_ib_add_mc(mdev, mqp, gid)) {
1280                 ge->port = mqp->port;
1281                 ge->added = 1;
1282         }
1283 
1284         mutex_lock(&mqp->mutex);
1285         list_add_tail(&ge->list, &mqp->gid_list);
1286         mutex_unlock(&mqp->mutex);
1287 
1288         return 0;
1289 }
1290 
1291 static void mlx4_ib_delete_counters_table(struct mlx4_ib_dev *ibdev,
1292                                           struct mlx4_ib_counters *ctr_table)
1293 {
1294         struct counter_index *counter, *tmp_count;
1295 
1296         mutex_lock(&ctr_table->mutex);
1297         list_for_each_entry_safe(counter, tmp_count, &ctr_table->counters_list,
1298                                  list) {
1299                 if (counter->allocated)
1300                         mlx4_counter_free(ibdev->dev, counter->index);
1301                 list_del(&counter->list);
1302                 kfree(counter);
1303         }
1304         mutex_unlock(&ctr_table->mutex);
1305 }
1306 
1307 int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
1308                    union ib_gid *gid)
1309 {
1310         struct net_device *ndev;
1311         int ret = 0;
1312 
1313         if (!mqp->port)
1314                 return 0;
1315 
1316         spin_lock_bh(&mdev->iboe.lock);
1317         ndev = mdev->iboe.netdevs[mqp->port - 1];
1318         if (ndev)
1319                 dev_hold(ndev);
1320         spin_unlock_bh(&mdev->iboe.lock);
1321 
1322         if (ndev) {
1323                 ret = 1;
1324                 dev_put(ndev);
1325         }
1326 
1327         return ret;
1328 }
1329 
1330 struct mlx4_ib_steering {
1331         struct list_head list;
1332         struct mlx4_flow_reg_id reg_id;
1333         union ib_gid gid;
1334 };
1335 
1336 #define LAST_ETH_FIELD vlan_tag
1337 #define LAST_IB_FIELD sl
1338 #define LAST_IPV4_FIELD dst_ip
1339 #define LAST_TCP_UDP_FIELD src_port
1340 
1341 /* Field is the last supported field */
1342 #define FIELDS_NOT_SUPPORTED(filter, field)\
1343         memchr_inv((void *)&filter.field  +\
1344                    sizeof(filter.field), 0,\
1345                    sizeof(filter) -\
1346                    offsetof(typeof(filter), field) -\
1347                    sizeof(filter.field))
1348 
1349 static int parse_flow_attr(struct mlx4_dev *dev,
1350                            u32 qp_num,
1351                            union ib_flow_spec *ib_spec,
1352                            struct _rule_hw *mlx4_spec)
1353 {
1354         enum mlx4_net_trans_rule_id type;
1355 
1356         switch (ib_spec->type) {
1357         case IB_FLOW_SPEC_ETH:
1358                 if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
1359                         return -ENOTSUPP;
1360 
1361                 type = MLX4_NET_TRANS_RULE_ID_ETH;
1362                 memcpy(mlx4_spec->eth.dst_mac, ib_spec->eth.val.dst_mac,
1363                        ETH_ALEN);
1364                 memcpy(mlx4_spec->eth.dst_mac_msk, ib_spec->eth.mask.dst_mac,
1365                        ETH_ALEN);
1366                 mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag;
1367                 mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag;
1368                 break;
1369         case IB_FLOW_SPEC_IB:
1370                 if (FIELDS_NOT_SUPPORTED(ib_spec->ib.mask, LAST_IB_FIELD))
1371                         return -ENOTSUPP;
1372 
1373                 type = MLX4_NET_TRANS_RULE_ID_IB;
1374                 mlx4_spec->ib.l3_qpn =
1375                         cpu_to_be32(qp_num);
1376                 mlx4_spec->ib.qpn_mask =
1377                         cpu_to_be32(MLX4_IB_FLOW_QPN_MASK);
1378                 break;
1379 
1380 
1381         case IB_FLOW_SPEC_IPV4:
1382                 if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
1383                         return -ENOTSUPP;
1384 
1385                 type = MLX4_NET_TRANS_RULE_ID_IPV4;
1386                 mlx4_spec->ipv4.src_ip = ib_spec->ipv4.val.src_ip;
1387                 mlx4_spec->ipv4.src_ip_msk = ib_spec->ipv4.mask.src_ip;
1388                 mlx4_spec->ipv4.dst_ip = ib_spec->ipv4.val.dst_ip;
1389                 mlx4_spec->ipv4.dst_ip_msk = ib_spec->ipv4.mask.dst_ip;
1390                 break;
1391 
1392         case IB_FLOW_SPEC_TCP:
1393         case IB_FLOW_SPEC_UDP:
1394                 if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, LAST_TCP_UDP_FIELD))
1395                         return -ENOTSUPP;
1396 
1397                 type = ib_spec->type == IB_FLOW_SPEC_TCP ?
1398                                         MLX4_NET_TRANS_RULE_ID_TCP :
1399                                         MLX4_NET_TRANS_RULE_ID_UDP;
1400                 mlx4_spec->tcp_udp.dst_port = ib_spec->tcp_udp.val.dst_port;
1401                 mlx4_spec->tcp_udp.dst_port_msk = ib_spec->tcp_udp.mask.dst_port;
1402                 mlx4_spec->tcp_udp.src_port = ib_spec->tcp_udp.val.src_port;
1403                 mlx4_spec->tcp_udp.src_port_msk = ib_spec->tcp_udp.mask.src_port;
1404                 break;
1405 
1406         default:
1407                 return -EINVAL;
1408         }
1409         if (mlx4_map_sw_to_hw_steering_id(dev, type) < 0 ||
1410             mlx4_hw_rule_sz(dev, type) < 0)
1411                 return -EINVAL;
1412         mlx4_spec->id = cpu_to_be16(mlx4_map_sw_to_hw_steering_id(dev, type));
1413         mlx4_spec->size = mlx4_hw_rule_sz(dev, type) >> 2;
1414         return mlx4_hw_rule_sz(dev, type);
1415 }
1416 
1417 struct default_rules {
1418         __u32 mandatory_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
1419         __u32 mandatory_not_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
1420         __u32 rules_create_list[IB_FLOW_SPEC_SUPPORT_LAYERS];
1421         __u8  link_layer;
1422 };
1423 static const struct default_rules default_table[] = {
1424         {
1425                 .mandatory_fields = {IB_FLOW_SPEC_IPV4},
1426                 .mandatory_not_fields = {IB_FLOW_SPEC_ETH},
1427                 .rules_create_list = {IB_FLOW_SPEC_IB},
1428                 .link_layer = IB_LINK_LAYER_INFINIBAND
1429         }
1430 };
1431 
1432 static int __mlx4_ib_default_rules_match(struct ib_qp *qp,
1433                                          struct ib_flow_attr *flow_attr)
1434 {
1435         int i, j, k;
1436         void *ib_flow;
1437         const struct default_rules *pdefault_rules = default_table;
1438         u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port);
1439 
1440         for (i = 0; i < ARRAY_SIZE(default_table); i++, pdefault_rules++) {
1441                 __u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS];
1442                 memset(&field_types, 0, sizeof(field_types));
1443 
1444                 if (link_layer != pdefault_rules->link_layer)
1445                         continue;
1446 
1447                 ib_flow = flow_attr + 1;
1448                 /* we assume the specs are sorted */
1449                 for (j = 0, k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS &&
1450                      j < flow_attr->num_of_specs; k++) {
1451                         union ib_flow_spec *current_flow =
1452                                 (union ib_flow_spec *)ib_flow;
1453 
1454                         /* same layer but different type */
1455                         if (((current_flow->type & IB_FLOW_SPEC_LAYER_MASK) ==
1456                              (pdefault_rules->mandatory_fields[k] &
1457                               IB_FLOW_SPEC_LAYER_MASK)) &&
1458                             (current_flow->type !=
1459                              pdefault_rules->mandatory_fields[k]))
1460                                 goto out;
1461 
1462                         /* same layer, try match next one */
1463                         if (current_flow->type ==
1464                             pdefault_rules->mandatory_fields[k]) {
1465                                 j++;
1466                                 ib_flow +=
1467                                         ((union ib_flow_spec *)ib_flow)->size;
1468                         }
1469                 }
1470 
1471                 ib_flow = flow_attr + 1;
1472                 for (j = 0; j < flow_attr->num_of_specs;
1473                      j++, ib_flow += ((union ib_flow_spec *)ib_flow)->size)
1474                         for (k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS; k++)
1475                                 /* same layer and same type */
1476                                 if (((union ib_flow_spec *)ib_flow)->type ==
1477                                     pdefault_rules->mandatory_not_fields[k])
1478                                         goto out;
1479 
1480                 return i;
1481         }
1482 out:
1483         return -1;
1484 }
1485 
1486 static int __mlx4_ib_create_default_rules(
1487                 struct mlx4_ib_dev *mdev,
1488                 struct ib_qp *qp,
1489                 const struct default_rules *pdefault_rules,
1490                 struct _rule_hw *mlx4_spec) {
1491         int size = 0;
1492         int i;
1493 
1494         for (i = 0; i < ARRAY_SIZE(pdefault_rules->rules_create_list); i++) {
1495                 union ib_flow_spec ib_spec = {};
1496                 int ret;
1497 
1498                 switch (pdefault_rules->rules_create_list[i]) {
1499                 case 0:
1500                         /* no rule */
1501                         continue;
1502                 case IB_FLOW_SPEC_IB:
1503                         ib_spec.type = IB_FLOW_SPEC_IB;
1504                         ib_spec.size = sizeof(struct ib_flow_spec_ib);
1505 
1506                         break;
1507                 default:
1508                         /* invalid rule */
1509                         return -EINVAL;
1510                 }
1511                 /* We must put empty rule, qpn is being ignored */
1512                 ret = parse_flow_attr(mdev->dev, 0, &ib_spec,
1513                                       mlx4_spec);
1514                 if (ret < 0) {
1515                         pr_info("invalid parsing\n");
1516                         return -EINVAL;
1517                 }
1518 
1519                 mlx4_spec = (void *)mlx4_spec + ret;
1520                 size += ret;
1521         }
1522         return size;
1523 }
1524 
1525 static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
1526                           int domain,
1527                           enum mlx4_net_trans_promisc_mode flow_type,
1528                           u64 *reg_id)
1529 {
1530         int ret, i;
1531         int size = 0;
1532         void *ib_flow;
1533         struct mlx4_ib_dev *mdev = to_mdev(qp->device);
1534         struct mlx4_cmd_mailbox *mailbox;
1535         struct mlx4_net_trans_rule_hw_ctrl *ctrl;
1536         int default_flow;
1537 
1538         static const u16 __mlx4_domain[] = {
1539                 [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
1540                 [IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL,
1541                 [IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS,
1542                 [IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC,
1543         };
1544 
1545         if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) {
1546                 pr_err("Invalid priority value %d\n", flow_attr->priority);
1547                 return -EINVAL;
1548         }
1549 
1550         if (domain >= IB_FLOW_DOMAIN_NUM) {
1551                 pr_err("Invalid domain value %d\n", domain);
1552                 return -EINVAL;
1553         }
1554 
1555         if (mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0)
1556                 return -EINVAL;
1557 
1558         mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
1559         if (IS_ERR(mailbox))
1560                 return PTR_ERR(mailbox);
1561         ctrl = mailbox->buf;
1562 
1563         ctrl->prio = cpu_to_be16(__mlx4_domain[domain] |
1564                                  flow_attr->priority);
1565         ctrl->type = mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type);
1566         ctrl->port = flow_attr->port;
1567         ctrl->qpn = cpu_to_be32(qp->qp_num);
1568 
1569         ib_flow = flow_attr + 1;
1570         size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
1571         /* Add default flows */
1572         default_flow = __mlx4_ib_default_rules_match(qp, flow_attr);
1573         if (default_flow >= 0) {
1574                 ret = __mlx4_ib_create_default_rules(
1575                                 mdev, qp, default_table + default_flow,
1576                                 mailbox->buf + size);
1577                 if (ret < 0) {
1578                         mlx4_free_cmd_mailbox(mdev->dev, mailbox);
1579                         return -EINVAL;
1580                 }
1581                 size += ret;
1582         }
1583         for (i = 0; i < flow_attr->num_of_specs; i++) {
1584                 ret = parse_flow_attr(mdev->dev, qp->qp_num, ib_flow,
1585                                       mailbox->buf + size);
1586                 if (ret < 0) {
1587                         mlx4_free_cmd_mailbox(mdev->dev, mailbox);
1588                         return -EINVAL;
1589                 }
1590                 ib_flow += ((union ib_flow_spec *) ib_flow)->size;
1591                 size += ret;
1592         }
1593 
1594         if (mlx4_is_master(mdev->dev) && flow_type == MLX4_FS_REGULAR &&
1595             flow_attr->num_of_specs == 1) {
1596                 struct _rule_hw *rule_header = (struct _rule_hw *)(ctrl + 1);
1597                 enum ib_flow_spec_type header_spec =
1598                         ((union ib_flow_spec *)(flow_attr + 1))->type;
1599 
1600                 if (header_spec == IB_FLOW_SPEC_ETH)
1601                         mlx4_handle_eth_header_mcast_prio(ctrl, rule_header);
1602         }
1603 
1604         ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0,
1605                            MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
1606                            MLX4_CMD_NATIVE);
1607         if (ret == -ENOMEM)
1608                 pr_err("mcg table is full. Fail to register network rule.\n");
1609         else if (ret == -ENXIO)
1610                 pr_err("Device managed flow steering is disabled. Fail to register network rule.\n");
1611         else if (ret)
1612                 pr_err("Invalid argument. Fail to register network rule.\n");
1613 
1614         mlx4_free_cmd_mailbox(mdev->dev, mailbox);
1615         return ret;
1616 }
1617 
1618 static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id)
1619 {
1620         int err;
1621         err = mlx4_cmd(dev, reg_id, 0, 0,
1622                        MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
1623                        MLX4_CMD_NATIVE);
1624         if (err)
1625                 pr_err("Fail to detach network rule. registration id = 0x%llx\n",
1626                        reg_id);
1627         return err;
1628 }
1629 
1630 static int mlx4_ib_tunnel_steer_add(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
1631                                     u64 *reg_id)
1632 {
1633         void *ib_flow;
1634         union ib_flow_spec *ib_spec;
1635         struct mlx4_dev *dev = to_mdev(qp->device)->dev;
1636         int err = 0;
1637 
1638         if (dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN ||
1639             dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC)
1640                 return 0; /* do nothing */
1641 
1642         ib_flow = flow_attr + 1;
1643         ib_spec = (union ib_flow_spec *)ib_flow;
1644 
1645         if (ib_spec->type !=  IB_FLOW_SPEC_ETH || flow_attr->num_of_specs != 1)
1646                 return 0; /* do nothing */
1647 
1648         err = mlx4_tunnel_steer_add(to_mdev(qp->device)->dev, ib_spec->eth.val.dst_mac,
1649                                     flow_attr->port, qp->qp_num,
1650                                     MLX4_DOMAIN_UVERBS | (flow_attr->priority & 0xff),
1651                                     reg_id);
1652         return err;
1653 }
1654 
1655 static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev,
1656                                       struct ib_flow_attr *flow_attr,
1657                                       enum mlx4_net_trans_promisc_mode *type)
1658 {
1659         int err = 0;
1660 
1661         if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER) ||
1662             (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC) ||
1663             (flow_attr->num_of_specs > 1) || (flow_attr->priority != 0)) {
1664                 return -EOPNOTSUPP;
1665         }
1666 
1667         if (flow_attr->num_of_specs == 0) {
1668                 type[0] = MLX4_FS_MC_SNIFFER;
1669                 type[1] = MLX4_FS_UC_SNIFFER;
1670         } else {
1671                 union ib_flow_spec *ib_spec;
1672 
1673                 ib_spec = (union ib_flow_spec *)(flow_attr + 1);
1674                 if (ib_spec->type !=  IB_FLOW_SPEC_ETH)
1675                         return -EINVAL;
1676 
1677                 /* if all is zero than MC and UC */
1678                 if (is_zero_ether_addr(ib_spec->eth.mask.dst_mac)) {
1679                         type[0] = MLX4_FS_MC_SNIFFER;
1680                         type[1] = MLX4_FS_UC_SNIFFER;
1681                 } else {
1682                         u8 mac[ETH_ALEN] = {ib_spec->eth.mask.dst_mac[0] ^ 0x01,
1683                                             ib_spec->eth.mask.dst_mac[1],
1684                                             ib_spec->eth.mask.dst_mac[2],
1685                                             ib_spec->eth.mask.dst_mac[3],
1686                                             ib_spec->eth.mask.dst_mac[4],
1687                                             ib_spec->eth.mask.dst_mac[5]};
1688 
1689                         /* Above xor was only on MC bit, non empty mask is valid
1690                          * only if this bit is set and rest are zero.
1691                          */
1692                         if (!is_zero_ether_addr(&mac[0]))
1693                                 return -EINVAL;
1694 
1695                         if (is_multicast_ether_addr(ib_spec->eth.val.dst_mac))
1696                                 type[0] = MLX4_FS_MC_SNIFFER;
1697                         else
1698                                 type[0] = MLX4_FS_UC_SNIFFER;
1699                 }
1700         }
1701 
1702         return err;
1703 }
1704 
1705 static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
1706                                     struct ib_flow_attr *flow_attr,
1707                                     int domain, struct ib_udata *udata)
1708 {
1709         int err = 0, i = 0, j = 0;
1710         struct mlx4_ib_flow *mflow;
1711         enum mlx4_net_trans_promisc_mode type[2];
1712         struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
1713         int is_bonded = mlx4_is_bonded(dev);
1714 
1715         if (flow_attr->port < 1 || flow_attr->port > qp->device->phys_port_cnt)
1716                 return ERR_PTR(-EINVAL);
1717 
1718         if (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP)
1719                 return ERR_PTR(-EOPNOTSUPP);
1720 
1721         if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) &&
1722             (flow_attr->type != IB_FLOW_ATTR_NORMAL))
1723                 return ERR_PTR(-EOPNOTSUPP);
1724 
1725         if (udata &&
1726             udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen))
1727                 return ERR_PTR(-EOPNOTSUPP);
1728 
1729         memset(type, 0, sizeof(type));
1730 
1731         mflow = kzalloc(sizeof(*mflow), GFP_KERNEL);
1732         if (!mflow) {
1733                 err = -ENOMEM;
1734                 goto err_free;
1735         }
1736 
1737         switch (flow_attr->type) {
1738         case IB_FLOW_ATTR_NORMAL:
1739                 /* If dont trap flag (continue match) is set, under specific
1740                  * condition traffic be replicated to given qp,
1741                  * without stealing it
1742                  */
1743                 if (unlikely(flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)) {
1744                         err = mlx4_ib_add_dont_trap_rule(dev,
1745                                                          flow_attr,
1746                                                          type);
1747                         if (err)
1748                                 goto err_free;
1749                 } else {
1750                         type[0] = MLX4_FS_REGULAR;
1751                 }
1752                 break;
1753 
1754         case IB_FLOW_ATTR_ALL_DEFAULT:
1755                 type[0] = MLX4_FS_ALL_DEFAULT;
1756                 break;
1757 
1758         case IB_FLOW_ATTR_MC_DEFAULT:
1759                 type[0] = MLX4_FS_MC_DEFAULT;
1760                 break;
1761 
1762         case IB_FLOW_ATTR_SNIFFER:
1763                 type[0] = MLX4_FS_MIRROR_RX_PORT;
1764                 type[1] = MLX4_FS_MIRROR_SX_PORT;
1765                 break;
1766 
1767         default:
1768                 err = -EINVAL;
1769                 goto err_free;
1770         }
1771 
1772         while (i < ARRAY_SIZE(type) && type[i]) {
1773                 err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i],
1774                                             &mflow->reg_id[i].id);
1775                 if (err)
1776                         goto err_create_flow;
1777                 if (is_bonded) {
1778                         /* Application always sees one port so the mirror rule
1779                          * must be on port #2
1780                          */
1781                         flow_attr->port = 2;
1782                         err = __mlx4_ib_create_flow(qp, flow_attr,
1783                                                     domain, type[j],
1784                                                     &mflow->reg_id[j].mirror);
1785                         flow_attr->port = 1;
1786                         if (err)
1787                                 goto err_create_flow;
1788                         j++;
1789                 }
1790 
1791                 i++;
1792         }
1793 
1794         if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) {
1795                 err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
1796                                                &mflow->reg_id[i].id);
1797                 if (err)
1798                         goto err_create_flow;
1799 
1800                 if (is_bonded) {
1801                         flow_attr->port = 2;
1802                         err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
1803                                                        &mflow->reg_id[j].mirror);
1804                         flow_attr->port = 1;
1805                         if (err)
1806                                 goto err_create_flow;
1807                         j++;
1808                 }
1809                 /* function to create mirror rule */
1810                 i++;
1811         }
1812 
1813         return &mflow->ibflow;
1814 
1815 err_create_flow:
1816         while (i) {
1817                 (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
1818                                              mflow->reg_id[i].id);
1819                 i--;
1820         }
1821 
1822         while (j) {
1823                 (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
1824                                              mflow->reg_id[j].mirror);
1825                 j--;
1826         }
1827 err_free:
1828         kfree(mflow);
1829         return ERR_PTR(err);
1830 }
1831 
1832 static int mlx4_ib_destroy_flow(struct ib_flow *flow_id)
1833 {
1834         int err, ret = 0;
1835         int i = 0;
1836         struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device);
1837         struct mlx4_ib_flow *mflow = to_mflow(flow_id);
1838 
1839         while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i].id) {
1840                 err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i].id);
1841                 if (err)
1842                         ret = err;
1843                 if (mflow->reg_id[i].mirror) {
1844                         err = __mlx4_ib_destroy_flow(mdev->dev,
1845                                                      mflow->reg_id[i].mirror);
1846                         if (err)
1847                                 ret = err;
1848                 }
1849                 i++;
1850         }
1851 
1852         kfree(mflow);
1853         return ret;
1854 }
1855 
1856 static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1857 {
1858         int err;
1859         struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
1860         struct mlx4_dev *dev = mdev->dev;
1861         struct mlx4_ib_qp *mqp = to_mqp(ibqp);
1862         struct mlx4_ib_steering *ib_steering = NULL;
1863         enum mlx4_protocol prot = MLX4_PROT_IB_IPV6;
1864         struct mlx4_flow_reg_id reg_id;
1865 
1866         if (mdev->dev->caps.steering_mode ==
1867             MLX4_STEERING_MODE_DEVICE_MANAGED) {
1868                 ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL);
1869                 if (!ib_steering)
1870                         return -ENOMEM;
1871         }
1872 
1873         err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
1874                                     !!(mqp->flags &
1875                                        MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
1876                                     prot, &reg_id.id);
1877         if (err) {
1878                 pr_err("multicast attach op failed, err %d\n", err);
1879                 goto err_malloc;
1880         }
1881 
1882         reg_id.mirror = 0;
1883         if (mlx4_is_bonded(dev)) {
1884                 err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw,
1885                                             (mqp->port == 1) ? 2 : 1,
1886                                             !!(mqp->flags &
1887                                             MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
1888                                             prot, &reg_id.mirror);
1889                 if (err)
1890                         goto err_add;
1891         }
1892 
1893         err = add_gid_entry(ibqp, gid);
1894         if (err)
1895                 goto err_add;
1896 
1897         if (ib_steering) {
1898                 memcpy(ib_steering->gid.raw, gid->raw, 16);
1899                 ib_steering->reg_id = reg_id;
1900                 mutex_lock(&mqp->mutex);
1901                 list_add(&ib_steering->list, &mqp->steering_rules);
1902                 mutex_unlock(&mqp->mutex);
1903         }
1904         return 0;
1905 
1906 err_add:
1907         mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
1908                               prot, reg_id.id);
1909         if (reg_id.mirror)
1910                 mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
1911                                       prot, reg_id.mirror);
1912 err_malloc:
1913         kfree(ib_steering);
1914 
1915         return err;
1916 }
1917 
1918 static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw)
1919 {
1920         struct mlx4_ib_gid_entry *ge;
1921         struct mlx4_ib_gid_entry *tmp;
1922         struct mlx4_ib_gid_entry *ret = NULL;
1923 
1924         list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
1925                 if (!memcmp(raw, ge->gid.raw, 16)) {
1926                         ret = ge;
1927                         break;
1928                 }
1929         }
1930 
1931         return ret;
1932 }
1933 
1934 static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1935 {
1936         int err;
1937         struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
1938         struct mlx4_dev *dev = mdev->dev;
1939         struct mlx4_ib_qp *mqp = to_mqp(ibqp);
1940         struct net_device *ndev;
1941         struct mlx4_ib_gid_entry *ge;
1942         struct mlx4_flow_reg_id reg_id = {0, 0};
1943         enum mlx4_protocol prot =  MLX4_PROT_IB_IPV6;
1944 
1945         if (mdev->dev->caps.steering_mode ==
1946             MLX4_STEERING_MODE_DEVICE_MANAGED) {
1947                 struct mlx4_ib_steering *ib_steering;
1948 
1949                 mutex_lock(&mqp->mutex);
1950                 list_for_each_entry(ib_steering, &mqp->steering_rules, list) {
1951                         if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) {
1952                                 list_del(&ib_steering->list);
1953                                 break;
1954                         }
1955                 }
1956                 mutex_unlock(&mqp->mutex);
1957                 if (&ib_steering->list == &mqp->steering_rules) {
1958                         pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n");
1959                         return -EINVAL;
1960                 }
1961                 reg_id = ib_steering->reg_id;
1962                 kfree(ib_steering);
1963         }
1964 
1965         err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
1966                                     prot, reg_id.id);
1967         if (err)
1968                 return err;
1969 
1970         if (mlx4_is_bonded(dev)) {
1971                 err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
1972                                             prot, reg_id.mirror);
1973                 if (err)
1974                         return err;
1975         }
1976 
1977         mutex_lock(&mqp->mutex);
1978         ge = find_gid_entry(mqp, gid->raw);
1979         if (ge) {
1980                 spin_lock_bh(&mdev->iboe.lock);
1981                 ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL;
1982                 if (ndev)
1983                         dev_hold(ndev);
1984                 spin_unlock_bh(&mdev->iboe.lock);
1985                 if (ndev)
1986                         dev_put(ndev);
1987                 list_del(&ge->list);
1988                 kfree(ge);
1989         } else
1990                 pr_warn("could not find mgid entry\n");
1991 
1992         mutex_unlock(&mqp->mutex);
1993 
1994         return 0;
1995 }
1996 
1997 static int init_node_data(struct mlx4_ib_dev *dev)
1998 {
1999         struct ib_smp *in_mad  = NULL;
2000         struct ib_smp *out_mad = NULL;
2001         int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
2002         int err = -ENOMEM;
2003 
2004         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
2005         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
2006         if (!in_mad || !out_mad)
2007                 goto out;
2008 
2009         init_query_mad(in_mad);
2010         in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
2011         if (mlx4_is_master(dev->dev))
2012                 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
2013 
2014         err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
2015         if (err)
2016                 goto out;
2017 
2018         memcpy(dev->ib_dev.node_desc, out_mad->data, IB_DEVICE_NODE_DESC_MAX);
2019 
2020         in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
2021 
2022         err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
2023         if (err)
2024                 goto out;
2025 
2026         dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
2027         memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
2028 
2029 out:
2030         kfree(in_mad);
2031         kfree(out_mad);
2032         return err;
2033 }
2034 
2035 static ssize_t hca_type_show(struct device *device,
2036                              struct device_attribute *attr, char *buf)
2037 {
2038         struct mlx4_ib_dev *dev =
2039                 rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev);
2040         return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
2041 }
2042 static DEVICE_ATTR_RO(hca_type);
2043 
2044 static ssize_t hw_rev_show(struct device *device,
2045                            struct device_attribute *attr, char *buf)
2046 {
2047         struct mlx4_ib_dev *dev =
2048                 rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev);
2049         return sprintf(buf, "%x\n", dev->dev->rev_id);
2050 }
2051 static DEVICE_ATTR_RO(hw_rev);
2052 
2053 static ssize_t board_id_show(struct device *device,
2054                              struct device_attribute *attr, char *buf)
2055 {
2056         struct mlx4_ib_dev *dev =
2057                 rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev);
2058 
2059         return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN,
2060                        dev->dev->board_id);
2061 }
2062 static DEVICE_ATTR_RO(board_id);
2063 
2064 static struct attribute *mlx4_class_attributes[] = {
2065         &dev_attr_hw_rev.attr,
2066         &dev_attr_hca_type.attr,
2067         &dev_attr_board_id.attr,
2068         NULL
2069 };
2070 
2071 static const struct attribute_group mlx4_attr_group = {
2072         .attrs = mlx4_class_attributes,
2073 };
2074 
2075 struct diag_counter {
2076         const char *name;
2077         u32 offset;
2078 };
2079 
2080 #define DIAG_COUNTER(_name, _offset)                    \
2081         { .name = #_name, .offset = _offset }
2082 
2083 static const struct diag_counter diag_basic[] = {
2084         DIAG_COUNTER(rq_num_lle, 0x00),
2085         DIAG_COUNTER(sq_num_lle, 0x04),
2086         DIAG_COUNTER(rq_num_lqpoe, 0x08),
2087         DIAG_COUNTER(sq_num_lqpoe, 0x0C),
2088         DIAG_COUNTER(rq_num_lpe, 0x18),
2089         DIAG_COUNTER(sq_num_lpe, 0x1C),
2090         DIAG_COUNTER(rq_num_wrfe, 0x20),
2091         DIAG_COUNTER(sq_num_wrfe, 0x24),
2092         DIAG_COUNTER(sq_num_mwbe, 0x2C),
2093         DIAG_COUNTER(sq_num_bre, 0x34),
2094         DIAG_COUNTER(sq_num_rire, 0x44),
2095         DIAG_COUNTER(rq_num_rire, 0x48),
2096         DIAG_COUNTER(sq_num_rae, 0x4C),
2097         DIAG_COUNTER(rq_num_rae, 0x50),
2098         DIAG_COUNTER(sq_num_roe, 0x54),
2099         DIAG_COUNTER(sq_num_tree, 0x5C),
2100         DIAG_COUNTER(sq_num_rree, 0x64),
2101         DIAG_COUNTER(rq_num_rnr, 0x68),
2102         DIAG_COUNTER(sq_num_rnr, 0x6C),
2103         DIAG_COUNTER(rq_num_oos, 0x100),
2104         DIAG_COUNTER(sq_num_oos, 0x104),
2105 };
2106 
2107 static const struct diag_counter diag_ext[] = {
2108         DIAG_COUNTER(rq_num_dup, 0x130),
2109         DIAG_COUNTER(sq_num_to, 0x134),
2110 };
2111 
2112 static const struct diag_counter diag_device_only[] = {
2113         DIAG_COUNTER(num_cqovf, 0x1A0),
2114         DIAG_COUNTER(rq_num_udsdprd, 0x118),
2115 };
2116 
2117 static struct rdma_hw_stats *mlx4_ib_alloc_hw_stats(struct ib_device *ibdev,
2118                                                     u8 port_num)
2119 {
2120         struct mlx4_ib_dev *dev = to_mdev(ibdev);
2121         struct mlx4_ib_diag_counters *diag = dev->diag_counters;
2122 
2123         if (!diag[!!port_num].name)
2124                 return NULL;
2125 
2126         return rdma_alloc_hw_stats_struct(diag[!!port_num].name,
2127                                           diag[!!port_num].num_counters,
2128                                           RDMA_HW_STATS_DEFAULT_LIFESPAN);
2129 }
2130 
2131 static int mlx4_ib_get_hw_stats(struct ib_device *ibdev,
2132                                 struct rdma_hw_stats *stats,
2133                                 u8 port, int index)
2134 {
2135         struct mlx4_ib_dev *dev = to_mdev(ibdev);
2136         struct mlx4_ib_diag_counters *diag = dev->diag_counters;
2137         u32 hw_value[ARRAY_SIZE(diag_device_only) +
2138                 ARRAY_SIZE(diag_ext) + ARRAY_SIZE(diag_basic)] = {};
2139         int ret;
2140         int i;
2141 
2142         ret = mlx4_query_diag_counters(dev->dev,
2143                                        MLX4_OP_MOD_QUERY_TRANSPORT_CI_ERRORS,
2144                                        diag[!!port].offset, hw_value,
2145                                        diag[!!port].num_counters, port);
2146 
2147         if (ret)
2148                 return ret;
2149 
2150         for (i = 0; i < diag[!!port].num_counters; i++)
2151                 stats->value[i] = hw_value[i];
2152 
2153         return diag[!!port].num_counters;
2154 }
2155 
2156 static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev,
2157                                          const char ***name,
2158                                          u32 **offset,
2159                                          u32 *num,
2160                                          bool port)
2161 {
2162         u32 num_counters;
2163 
2164         num_counters = ARRAY_SIZE(diag_basic);
2165 
2166         if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT)
2167                 num_counters += ARRAY_SIZE(diag_ext);
2168 
2169         if (!port)
2170                 num_counters += ARRAY_SIZE(diag_device_only);
2171 
2172         *name = kcalloc(num_counters, sizeof(**name), GFP_KERNEL);
2173         if (!*name)
2174                 return -ENOMEM;
2175 
2176         *offset = kcalloc(num_counters, sizeof(**offset), GFP_KERNEL);
2177         if (!*offset)
2178                 goto err_name;
2179 
2180         *num = num_counters;
2181 
2182         return 0;
2183 
2184 err_name:
2185         kfree(*name);
2186         return -ENOMEM;
2187 }
2188 
2189 static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev,
2190                                        const char **name,
2191                                        u32 *offset,
2192                                        bool port)
2193 {
2194         int i;
2195         int j;
2196 
2197         for (i = 0, j = 0; i < ARRAY_SIZE(diag_basic); i++, j++) {
2198                 name[i] = diag_basic[i].name;
2199                 offset[i] = diag_basic[i].offset;
2200         }
2201 
2202         if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT) {
2203                 for (i = 0; i < ARRAY_SIZE(diag_ext); i++, j++) {
2204                         name[j] = diag_ext[i].name;
2205                         offset[j] = diag_ext[i].offset;
2206                 }
2207         }
2208 
2209         if (!port) {
2210                 for (i = 0; i < ARRAY_SIZE(diag_device_only); i++, j++) {
2211                         name[j] = diag_device_only[i].name;
2212                         offset[j] = diag_device_only[i].offset;
2213                 }
2214         }
2215 }
2216 
2217 static const struct ib_device_ops mlx4_ib_hw_stats_ops = {
2218         .alloc_hw_stats = mlx4_ib_alloc_hw_stats,
2219         .get_hw_stats = mlx4_ib_get_hw_stats,
2220 };
2221 
2222 static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
2223 {
2224         struct mlx4_ib_diag_counters *diag = ibdev->diag_counters;
2225         int i;
2226         int ret;
2227         bool per_port = !!(ibdev->dev->caps.flags2 &
2228                 MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT);
2229 
2230         if (mlx4_is_slave(ibdev->dev))
2231                 return 0;
2232 
2233         for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
2234                 /* i == 1 means we are building port counters */
2235                 if (i && !per_port)
2236                         continue;
2237 
2238                 ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].name,
2239                                                     &diag[i].offset,
2240                                                     &diag[i].num_counters, i);
2241                 if (ret)
2242                         goto err_alloc;
2243 
2244                 mlx4_ib_fill_diag_counters(ibdev, diag[i].name,
2245                                            diag[i].offset, i);
2246         }
2247 
2248         ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_hw_stats_ops);
2249 
2250         return 0;
2251 
2252 err_alloc:
2253         if (i) {
2254                 kfree(diag[i - 1].name);
2255                 kfree(diag[i - 1].offset);
2256         }
2257 
2258         return ret;
2259 }
2260 
2261 static void mlx4_ib_diag_cleanup(struct mlx4_ib_dev *ibdev)
2262 {
2263         int i;
2264 
2265         for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
2266                 kfree(ibdev->diag_counters[i].offset);
2267                 kfree(ibdev->diag_counters[i].name);
2268         }
2269 }
2270 
2271 #define MLX4_IB_INVALID_MAC     ((u64)-1)
2272 static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
2273                                struct net_device *dev,
2274                                int port)
2275 {
2276         u64 new_smac = 0;
2277         u64 release_mac = MLX4_IB_INVALID_MAC;
2278         struct mlx4_ib_qp *qp;
2279 
2280         read_lock(&dev_base_lock);
2281         new_smac = mlx4_mac_to_u64(dev->dev_addr);
2282         read_unlock(&dev_base_lock);
2283 
2284         atomic64_set(&ibdev->iboe.mac[port - 1], new_smac);
2285 
2286         /* no need for update QP1 and mac registration in non-SRIOV */
2287         if (!mlx4_is_mfunc(ibdev->dev))
2288                 return;
2289 
2290         mutex_lock(&ibdev->qp1_proxy_lock[port - 1]);
2291         qp = ibdev->qp1_proxy[port - 1];
2292         if (qp) {
2293                 int new_smac_index;
2294                 u64 old_smac;
2295                 struct mlx4_update_qp_params update_params;
2296 
2297                 mutex_lock(&qp->mutex);
2298                 old_smac = qp->pri.smac;
2299                 if (new_smac == old_smac)
2300                         goto unlock;
2301 
2302                 new_smac_index = mlx4_register_mac(ibdev->dev, port, new_smac);
2303 
2304                 if (new_smac_index < 0)
2305                         goto unlock;
2306 
2307                 update_params.smac_index = new_smac_index;
2308                 if (mlx4_update_qp(ibdev->dev, qp->mqp.qpn, MLX4_UPDATE_QP_SMAC,
2309                                    &update_params)) {
2310                         release_mac = new_smac;
2311                         goto unlock;
2312                 }
2313                 /* if old port was zero, no mac was yet registered for this QP */
2314                 if (qp->pri.smac_port)
2315                         release_mac = old_smac;
2316                 qp->pri.smac = new_smac;
2317                 qp->pri.smac_port = port;
2318                 qp->pri.smac_index = new_smac_index;
2319         }
2320 
2321 unlock:
2322         if (release_mac != MLX4_IB_INVALID_MAC)
2323                 mlx4_unregister_mac(ibdev->dev, port, release_mac);
2324         if (qp)
2325                 mutex_unlock(&qp->mutex);
2326         mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]);
2327 }
2328 
2329 static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
2330                                  struct net_device *dev,
2331                                  unsigned long event)
2332 
2333 {
2334         struct mlx4_ib_iboe *iboe;
2335         int update_qps_port = -1;
2336         int port;
2337 
2338         ASSERT_RTNL();
2339 
2340         iboe = &ibdev->iboe;
2341 
2342         spin_lock_bh(&iboe->lock);
2343         mlx4_foreach_ib_transport_port(port, ibdev->dev) {
2344 
2345                 iboe->netdevs[port - 1] =
2346                         mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
2347 
2348                 if (dev == iboe->netdevs[port - 1] &&
2349                     (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER ||
2350                      event == NETDEV_UP || event == NETDEV_CHANGE))
2351                         update_qps_port = port;
2352 
2353                 if (dev == iboe->netdevs[port - 1] &&
2354                     (event == NETDEV_UP || event == NETDEV_DOWN)) {
2355                         enum ib_port_state port_state;
2356                         struct ib_event ibev = { };
2357 
2358                         if (ib_get_cached_port_state(&ibdev->ib_dev, port,
2359                                                      &port_state))
2360                                 continue;
2361 
2362                         if (event == NETDEV_UP &&
2363                             (port_state != IB_PORT_ACTIVE ||
2364                              iboe->last_port_state[port - 1] != IB_PORT_DOWN))
2365                                 continue;
2366                         if (event == NETDEV_DOWN &&
2367                             (port_state != IB_PORT_DOWN ||
2368                              iboe->last_port_state[port - 1] != IB_PORT_ACTIVE))
2369                                 continue;
2370                         iboe->last_port_state[port - 1] = port_state;
2371 
2372                         ibev.device = &ibdev->ib_dev;
2373                         ibev.element.port_num = port;
2374                         ibev.event = event == NETDEV_UP ? IB_EVENT_PORT_ACTIVE :
2375                                                           IB_EVENT_PORT_ERR;
2376                         ib_dispatch_event(&ibev);
2377                 }
2378 
2379         }
2380         spin_unlock_bh(&iboe->lock);
2381 
2382         if (update_qps_port > 0)
2383                 mlx4_ib_update_qps(ibdev, dev, update_qps_port);
2384 }
2385 
2386 static int mlx4_ib_netdev_event(struct notifier_block *this,
2387                                 unsigned long event, void *ptr)
2388 {
2389         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2390         struct mlx4_ib_dev *ibdev;
2391 
2392         if (!net_eq(dev_net(dev), &init_net))
2393                 return NOTIFY_DONE;
2394 
2395         ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
2396         mlx4_ib_scan_netdevs(ibdev, dev, event);
2397 
2398         return NOTIFY_DONE;
2399 }
2400 
2401 static void init_pkeys(struct mlx4_ib_dev *ibdev)
2402 {
2403         int port;
2404         int slave;
2405         int i;
2406 
2407         if (mlx4_is_master(ibdev->dev)) {
2408                 for (slave = 0; slave <= ibdev->dev->persist->num_vfs;
2409                      ++slave) {
2410                         for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
2411                                 for (i = 0;
2412                                      i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
2413                                      ++i) {
2414                                         ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] =
2415                                         /* master has the identity virt2phys pkey mapping */
2416                                                 (slave == mlx4_master_func_num(ibdev->dev) || !i) ? i :
2417                                                         ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1;
2418                                         mlx4_sync_pkey_table(ibdev->dev, slave, port, i,
2419                                                              ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]);
2420                                 }
2421                         }
2422                 }
2423                 /* initialize pkey cache */
2424                 for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
2425                         for (i = 0;
2426                              i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
2427                              ++i)
2428                                 ibdev->pkeys.phys_pkey_cache[port-1][i] =
2429                                         (i) ? 0 : 0xFFFF;
2430                 }
2431         }
2432 }
2433 
2434 static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
2435 {
2436         int i, j, eq = 0, total_eqs = 0;
2437 
2438         ibdev->eq_table = kcalloc(dev->caps.num_comp_vectors,
2439                                   sizeof(ibdev->eq_table[0]), GFP_KERNEL);
2440         if (!ibdev->eq_table)
2441                 return;
2442 
2443         for (i = 1; i <= dev->caps.num_ports; i++) {
2444                 for (j = 0; j < mlx4_get_eqs_per_port(dev, i);
2445                      j++, total_eqs++) {
2446                         if (i > 1 &&  mlx4_is_eq_shared(dev, total_eqs))
2447                                 continue;
2448                         ibdev->eq_table[eq] = total_eqs;
2449                         if (!mlx4_assign_eq(dev, i,
2450                                             &ibdev->eq_table[eq]))
2451                                 eq++;
2452                         else
2453                                 ibdev->eq_table[eq] = -1;
2454                 }
2455         }
2456 
2457         for (i = eq; i < dev->caps.num_comp_vectors;
2458              ibdev->eq_table[i++] = -1)
2459                 ;
2460 
2461         /* Advertise the new number of EQs to clients */
2462         ibdev->ib_dev.num_comp_vectors = eq;
2463 }
2464 
2465 static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
2466 {
2467         int i;
2468         int total_eqs = ibdev->ib_dev.num_comp_vectors;
2469 
2470         /* no eqs were allocated */
2471         if (!ibdev->eq_table)
2472                 return;
2473 
2474         /* Reset the advertised EQ number */
2475         ibdev->ib_dev.num_comp_vectors = 0;
2476 
2477         for (i = 0; i < total_eqs; i++)
2478                 mlx4_release_eq(dev, ibdev->eq_table[i]);
2479 
2480         kfree(ibdev->eq_table);
2481         ibdev->eq_table = NULL;
2482 }
2483 
2484 static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
2485                                struct ib_port_immutable *immutable)
2486 {
2487         struct ib_port_attr attr;
2488         struct mlx4_ib_dev *mdev = to_mdev(ibdev);
2489         int err;
2490 
2491         if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
2492                 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
2493                 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
2494         } else {
2495                 if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
2496                         immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
2497                 if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
2498                         immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
2499                                 RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
2500                 immutable->core_cap_flags |= RDMA_CORE_PORT_RAW_PACKET;
2501                 if (immutable->core_cap_flags & (RDMA_CORE_PORT_IBA_ROCE |
2502                     RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP))
2503                         immutable->max_mad_size = IB_MGMT_MAD_SIZE;
2504         }
2505 
2506         err = ib_query_port(ibdev, port_num, &attr);
2507         if (err)
2508                 return err;
2509 
2510         immutable->pkey_tbl_len = attr.pkey_tbl_len;
2511         immutable->gid_tbl_len = attr.gid_tbl_len;
2512 
2513         return 0;
2514 }
2515 
2516 static void get_fw_ver_str(struct ib_device *device, char *str)
2517 {
2518         struct mlx4_ib_dev *dev =
2519                 container_of(device, struct mlx4_ib_dev, ib_dev);
2520         snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d",
2521                  (int) (dev->dev->caps.fw_ver >> 32),
2522                  (int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
2523                  (int) dev->dev->caps.fw_ver & 0xffff);
2524 }
2525 
2526 static const struct ib_device_ops mlx4_ib_dev_ops = {
2527         .owner = THIS_MODULE,
2528         .driver_id = RDMA_DRIVER_MLX4,
2529         .uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION,
2530 
2531         .add_gid = mlx4_ib_add_gid,
2532         .alloc_mr = mlx4_ib_alloc_mr,
2533         .alloc_pd = mlx4_ib_alloc_pd,
2534         .alloc_ucontext = mlx4_ib_alloc_ucontext,
2535         .attach_mcast = mlx4_ib_mcg_attach,
2536         .create_ah = mlx4_ib_create_ah,
2537         .create_cq = mlx4_ib_create_cq,
2538         .create_qp = mlx4_ib_create_qp,
2539         .create_srq = mlx4_ib_create_srq,
2540         .dealloc_pd = mlx4_ib_dealloc_pd,
2541         .dealloc_ucontext = mlx4_ib_dealloc_ucontext,
2542         .del_gid = mlx4_ib_del_gid,
2543         .dereg_mr = mlx4_ib_dereg_mr,
2544         .destroy_ah = mlx4_ib_destroy_ah,
2545         .destroy_cq = mlx4_ib_destroy_cq,
2546         .destroy_qp = mlx4_ib_destroy_qp,
2547         .destroy_srq = mlx4_ib_destroy_srq,
2548         .detach_mcast = mlx4_ib_mcg_detach,
2549         .disassociate_ucontext = mlx4_ib_disassociate_ucontext,
2550         .drain_rq = mlx4_ib_drain_rq,
2551         .drain_sq = mlx4_ib_drain_sq,
2552         .get_dev_fw_str = get_fw_ver_str,
2553         .get_dma_mr = mlx4_ib_get_dma_mr,
2554         .get_link_layer = mlx4_ib_port_link_layer,
2555         .get_netdev = mlx4_ib_get_netdev,
2556         .get_port_immutable = mlx4_port_immutable,
2557         .map_mr_sg = mlx4_ib_map_mr_sg,
2558         .mmap = mlx4_ib_mmap,
2559         .modify_cq = mlx4_ib_modify_cq,
2560         .modify_device = mlx4_ib_modify_device,
2561         .modify_port = mlx4_ib_modify_port,
2562         .modify_qp = mlx4_ib_modify_qp,
2563         .modify_srq = mlx4_ib_modify_srq,
2564         .poll_cq = mlx4_ib_poll_cq,
2565         .post_recv = mlx4_ib_post_recv,
2566         .post_send = mlx4_ib_post_send,
2567         .post_srq_recv = mlx4_ib_post_srq_recv,
2568         .process_mad = mlx4_ib_process_mad,
2569         .query_ah = mlx4_ib_query_ah,
2570         .query_device = mlx4_ib_query_device,
2571         .query_gid = mlx4_ib_query_gid,
2572         .query_pkey = mlx4_ib_query_pkey,
2573         .query_port = mlx4_ib_query_port,
2574         .query_qp = mlx4_ib_query_qp,
2575         .query_srq = mlx4_ib_query_srq,
2576         .reg_user_mr = mlx4_ib_reg_user_mr,
2577         .req_notify_cq = mlx4_ib_arm_cq,
2578         .rereg_user_mr = mlx4_ib_rereg_user_mr,
2579         .resize_cq = mlx4_ib_resize_cq,
2580 
2581         INIT_RDMA_OBJ_SIZE(ib_ah, mlx4_ib_ah, ibah),
2582         INIT_RDMA_OBJ_SIZE(ib_cq, mlx4_ib_cq, ibcq),
2583         INIT_RDMA_OBJ_SIZE(ib_pd, mlx4_ib_pd, ibpd),
2584         INIT_RDMA_OBJ_SIZE(ib_srq, mlx4_ib_srq, ibsrq),
2585         INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx4_ib_ucontext, ibucontext),
2586 };
2587 
2588 static const struct ib_device_ops mlx4_ib_dev_wq_ops = {
2589         .create_rwq_ind_table = mlx4_ib_create_rwq_ind_table,
2590         .create_wq = mlx4_ib_create_wq,
2591         .destroy_rwq_ind_table = mlx4_ib_destroy_rwq_ind_table,
2592         .destroy_wq = mlx4_ib_destroy_wq,
2593         .modify_wq = mlx4_ib_modify_wq,
2594 };
2595 
2596 static const struct ib_device_ops mlx4_ib_dev_fmr_ops = {
2597         .alloc_fmr = mlx4_ib_fmr_alloc,
2598         .dealloc_fmr = mlx4_ib_fmr_dealloc,
2599         .map_phys_fmr = mlx4_ib_map_phys_fmr,
2600         .unmap_fmr = mlx4_ib_unmap_fmr,
2601 };
2602 
2603 static const struct ib_device_ops mlx4_ib_dev_mw_ops = {
2604         .alloc_mw = mlx4_ib_alloc_mw,
2605         .dealloc_mw = mlx4_ib_dealloc_mw,
2606 };
2607 
2608 static const struct ib_device_ops mlx4_ib_dev_xrc_ops = {
2609         .alloc_xrcd = mlx4_ib_alloc_xrcd,
2610         .dealloc_xrcd = mlx4_ib_dealloc_xrcd,
2611 };
2612 
2613 static const struct ib_device_ops mlx4_ib_dev_fs_ops = {
2614         .create_flow = mlx4_ib_create_flow,
2615         .destroy_flow = mlx4_ib_destroy_flow,
2616 };
2617 
2618 static void *mlx4_ib_add(struct mlx4_dev *dev)
2619 {
2620         struct mlx4_ib_dev *ibdev;
2621         int num_ports = 0;
2622         int i, j;
2623         int err;
2624         struct mlx4_ib_iboe *iboe;
2625         int ib_num_ports = 0;
2626         int num_req_counters;
2627         int allocated;
2628         u32 counter_index;
2629         struct counter_index *new_counter_index = NULL;
2630 
2631         pr_info_once("%s", mlx4_ib_version);
2632 
2633         num_ports = 0;
2634         mlx4_foreach_ib_transport_port(i, dev)
2635                 num_ports++;
2636 
2637         /* No point in registering a device with no ports... */
2638         if (num_ports == 0)
2639                 return NULL;
2640 
2641         ibdev = ib_alloc_device(mlx4_ib_dev, ib_dev);
2642         if (!ibdev) {
2643                 dev_err(&dev->persist->pdev->dev,
2644                         "Device struct alloc failed\n");
2645                 return NULL;
2646         }
2647 
2648         iboe = &ibdev->iboe;
2649 
2650         if (mlx4_pd_alloc(dev, &ibdev->priv_pdn))
2651                 goto err_dealloc;
2652 
2653         if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
2654                 goto err_pd;
2655 
2656         ibdev->uar_map = ioremap((phys_addr_t) ibdev->priv_uar.pfn << PAGE_SHIFT,
2657                                  PAGE_SIZE);
2658         if (!ibdev->uar_map)
2659                 goto err_uar;
2660         MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
2661 
2662         ibdev->dev = dev;
2663         ibdev->bond_next_port   = 0;
2664 
2665         ibdev->ib_dev.node_type         = RDMA_NODE_IB_CA;
2666         ibdev->ib_dev.local_dma_lkey    = dev->caps.reserved_lkey;
2667         ibdev->num_ports                = num_ports;
2668         ibdev->ib_dev.phys_port_cnt     = mlx4_is_bonded(dev) ?
2669                                                 1 : ibdev->num_ports;
2670         ibdev->ib_dev.num_comp_vectors  = dev->caps.num_comp_vectors;
2671         ibdev->ib_dev.dev.parent        = &dev->persist->pdev->dev;
2672 
2673         ibdev->ib_dev.uverbs_cmd_mask   =
2674                 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
2675                 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
2676                 (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
2677                 (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
2678                 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
2679                 (1ull << IB_USER_VERBS_CMD_REG_MR)              |
2680                 (1ull << IB_USER_VERBS_CMD_REREG_MR)            |
2681                 (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
2682                 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
2683                 (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
2684                 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
2685                 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
2686                 (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
2687                 (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
2688                 (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
2689                 (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
2690                 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
2691                 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
2692                 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
2693                 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
2694                 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
2695                 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
2696                 (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)         |
2697                 (1ull << IB_USER_VERBS_CMD_OPEN_QP);
2698 
2699         ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_ops);
2700         ibdev->ib_dev.uverbs_ex_cmd_mask |=
2701                 (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) |
2702                 (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
2703                 (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
2704                 (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
2705 
2706         if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) &&
2707             ((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) ==
2708             IB_LINK_LAYER_ETHERNET) ||
2709             (mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) ==
2710             IB_LINK_LAYER_ETHERNET))) {
2711                 ibdev->ib_dev.uverbs_ex_cmd_mask |=
2712                         (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ)          |
2713                         (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ)          |
2714                         (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ)         |
2715                         (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
2716                         (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
2717                 ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_wq_ops);
2718         }
2719 
2720         if (!mlx4_is_slave(ibdev->dev))
2721                 ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fmr_ops);
2722 
2723         if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
2724             dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
2725                 ibdev->ib_dev.uverbs_cmd_mask |=
2726                         (1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
2727                         (1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
2728                 ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_mw_ops);
2729         }
2730 
2731         if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
2732                 ibdev->ib_dev.uverbs_cmd_mask |=
2733                         (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
2734                         (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
2735                 ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_xrc_ops);
2736         }
2737 
2738         if (check_flow_steering_support(dev)) {
2739                 ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED;
2740                 ibdev->ib_dev.uverbs_ex_cmd_mask        |=
2741                         (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
2742                         (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
2743                 ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fs_ops);
2744         }
2745 
2746         if (!dev->caps.userspace_caps)
2747                 ibdev->ib_dev.ops.uverbs_abi_ver =
2748                         MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
2749 
2750         mlx4_ib_alloc_eqs(dev, ibdev);
2751 
2752         spin_lock_init(&iboe->lock);
2753 
2754         if (init_node_data(ibdev))
2755                 goto err_map;
2756         mlx4_init_sl2vl_tbl(ibdev);
2757 
2758         for (i = 0; i < ibdev->num_ports; ++i) {
2759                 mutex_init(&ibdev->counters_table[i].mutex);
2760                 INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list);
2761                 iboe->last_port_state[i] = IB_PORT_DOWN;
2762         }
2763 
2764         num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports;
2765         for (i = 0; i < num_req_counters; ++i) {
2766                 mutex_init(&ibdev->qp1_proxy_lock[i]);
2767                 allocated = 0;
2768                 if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
2769                                                 IB_LINK_LAYER_ETHERNET) {
2770                         err = mlx4_counter_alloc(ibdev->dev, &counter_index,
2771                                                  MLX4_RES_USAGE_DRIVER);
2772                         /* if failed to allocate a new counter, use default */
2773                         if (err)
2774                                 counter_index =
2775                                         mlx4_get_default_counter_index(dev,
2776                                                                        i + 1);
2777                         else
2778                                 allocated = 1;
2779                 } else { /* IB_LINK_LAYER_INFINIBAND use the default counter */
2780                         counter_index = mlx4_get_default_counter_index(dev,
2781                                                                        i + 1);
2782                 }
2783                 new_counter_index = kmalloc(sizeof(*new_counter_index),
2784                                             GFP_KERNEL);
2785                 if (!new_counter_index) {
2786                         if (allocated)
2787                                 mlx4_counter_free(ibdev->dev, counter_index);
2788                         goto err_counter;
2789                 }
2790                 new_counter_index->index = counter_index;
2791                 new_counter_index->allocated = allocated;
2792                 list_add_tail(&new_counter_index->list,
2793                               &ibdev->counters_table[i].counters_list);
2794                 ibdev->counters_table[i].default_counter = counter_index;
2795                 pr_info("counter index %d for port %d allocated %d\n",
2796                         counter_index, i + 1, allocated);
2797         }
2798         if (mlx4_is_bonded(dev))
2799                 for (i = 1; i < ibdev->num_ports ; ++i) {
2800                         new_counter_index =
2801                                         kmalloc(sizeof(struct counter_index),
2802                                                 GFP_KERNEL);
2803                         if (!new_counter_index)
2804                                 goto err_counter;
2805                         new_counter_index->index = counter_index;
2806                         new_counter_index->allocated = 0;
2807                         list_add_tail(&new_counter_index->list,
2808                                       &ibdev->counters_table[i].counters_list);
2809                         ibdev->counters_table[i].default_counter =
2810                                                                 counter_index;
2811                 }
2812 
2813         mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
2814                 ib_num_ports++;
2815 
2816         spin_lock_init(&ibdev->sm_lock);
2817         mutex_init(&ibdev->cap_mask_mutex);
2818         INIT_LIST_HEAD(&ibdev->qp_list);
2819         spin_lock_init(&ibdev->reset_flow_resource_lock);
2820 
2821         if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED &&
2822             ib_num_ports) {
2823                 ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
2824                 err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
2825                                             MLX4_IB_UC_STEER_QPN_ALIGN,
2826                                             &ibdev->steer_qpn_base, 0,
2827                                             MLX4_RES_USAGE_DRIVER);
2828                 if (err)
2829                         goto err_counter;
2830 
2831                 ibdev->ib_uc_qpns_bitmap =
2832                         kmalloc_array(BITS_TO_LONGS(ibdev->steer_qpn_count),
2833                                       sizeof(long),
2834                                       GFP_KERNEL);
2835                 if (!ibdev->ib_uc_qpns_bitmap)
2836                         goto err_steer_qp_release;
2837 
2838                 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB) {
2839                         bitmap_zero(ibdev->ib_uc_qpns_bitmap,
2840                                     ibdev->steer_qpn_count);
2841                         err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(
2842                                         dev, ibdev->steer_qpn_base,
2843                                         ibdev->steer_qpn_base +
2844                                         ibdev->steer_qpn_count - 1);
2845                         if (err)
2846                                 goto err_steer_free_bitmap;
2847                 } else {
2848                         bitmap_fill(ibdev->ib_uc_qpns_bitmap,
2849                                     ibdev->steer_qpn_count);
2850                 }
2851         }
2852 
2853         for (j = 1; j <= ibdev->dev->caps.num_ports; j++)
2854                 atomic64_set(&iboe->mac[j - 1], ibdev->dev->caps.def_mac[j]);
2855 
2856         if (mlx4_ib_alloc_diag_counters(ibdev))
2857                 goto err_steer_free_bitmap;
2858 
2859         rdma_set_device_sysfs_group(&ibdev->ib_dev, &mlx4_attr_group);
2860         if (ib_register_device(&ibdev->ib_dev, "mlx4_%d"))
2861                 goto err_diag_counters;
2862 
2863         if (mlx4_ib_mad_init(ibdev))
2864                 goto err_reg;
2865 
2866         if (mlx4_ib_init_sriov(ibdev))
2867                 goto err_mad;
2868 
2869         if (!iboe->nb.notifier_call) {
2870                 iboe->nb.notifier_call = mlx4_ib_netdev_event;
2871                 err = register_netdevice_notifier(&iboe->nb);
2872                 if (err) {
2873                         iboe->nb.notifier_call = NULL;
2874                         goto err_notif;
2875                 }
2876         }
2877         if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
2878                 err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
2879                 if (err)
2880                         goto err_notif;
2881         }
2882 
2883         ibdev->ib_active = true;
2884         mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
2885                 devlink_port_type_ib_set(mlx4_get_devlink_port(dev, i),
2886                                          &ibdev->ib_dev);
2887 
2888         if (mlx4_is_mfunc(ibdev->dev))
2889                 init_pkeys(ibdev);
2890 
2891         /* create paravirt contexts for any VFs which are active */
2892         if (mlx4_is_master(ibdev->dev)) {
2893                 for (j = 0; j < MLX4_MFUNC_MAX; j++) {
2894                         if (j == mlx4_master_func_num(ibdev->dev))
2895                                 continue;
2896                         if (mlx4_is_slave_active(ibdev->dev, j))
2897                                 do_slave_init(ibdev, j, 1);
2898                 }
2899         }
2900         return ibdev;
2901 
2902 err_notif:
2903         if (ibdev->iboe.nb.notifier_call) {
2904                 if (unregister_netdevice_notifier(&ibdev->iboe.nb))
2905                         pr_warn("failure unregistering notifier\n");
2906                 ibdev->iboe.nb.notifier_call = NULL;
2907         }
2908         flush_workqueue(wq);
2909 
2910         mlx4_ib_close_sriov(ibdev);
2911 
2912 err_mad:
2913         mlx4_ib_mad_cleanup(ibdev);
2914 
2915 err_reg:
2916         ib_unregister_device(&ibdev->ib_dev);
2917 
2918 err_diag_counters:
2919         mlx4_ib_diag_cleanup(ibdev);
2920 
2921 err_steer_free_bitmap:
2922         kfree(ibdev->ib_uc_qpns_bitmap);
2923 
2924 err_steer_qp_release:
2925         mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
2926                               ibdev->steer_qpn_count);
2927 err_counter:
2928         for (i = 0; i < ibdev->num_ports; ++i)
2929                 mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]);
2930 
2931 err_map:
2932         mlx4_ib_free_eqs(dev, ibdev);
2933         iounmap(ibdev->uar_map);
2934 
2935 err_uar:
2936         mlx4_uar_free(dev, &ibdev->priv_uar);
2937 
2938 err_pd:
2939         mlx4_pd_free(dev, ibdev->priv_pdn);
2940 
2941 err_dealloc:
2942         ib_dealloc_device(&ibdev->ib_dev);
2943 
2944         return NULL;
2945 }
2946 
2947 int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn)
2948 {
2949         int offset;
2950 
2951         WARN_ON(!dev->ib_uc_qpns_bitmap);
2952 
2953         offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap,
2954                                          dev->steer_qpn_count,
2955                                          get_count_order(count));
2956         if (offset < 0)
2957                 return offset;
2958 
2959         *qpn = dev->steer_qpn_base + offset;
2960         return 0;
2961 }
2962 
2963 void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count)
2964 {
2965         if (!qpn ||
2966             dev->steering_support != MLX4_STEERING_MODE_DEVICE_MANAGED)
2967                 return;
2968 
2969         if (WARN(qpn < dev->steer_qpn_base, "qpn = %u, steer_qpn_base = %u\n",
2970                  qpn, dev->steer_qpn_base))
2971                 /* not supposed to be here */
2972                 return;
2973 
2974         bitmap_release_region(dev->ib_uc_qpns_bitmap,
2975                               qpn - dev->steer_qpn_base,
2976                               get_count_order(count));
2977 }
2978 
2979 int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
2980                          int is_attach)
2981 {
2982         int err;
2983         size_t flow_size;
2984         struct ib_flow_attr *flow = NULL;
2985         struct ib_flow_spec_ib *ib_spec;
2986 
2987         if (is_attach) {
2988                 flow_size = sizeof(struct ib_flow_attr) +
2989                             sizeof(struct ib_flow_spec_ib);
2990                 flow = kzalloc(flow_size, GFP_KERNEL);
2991                 if (!flow)
2992                         return -ENOMEM;
2993                 flow->port = mqp->port;
2994                 flow->num_of_specs = 1;
2995                 flow->size = flow_size;
2996                 ib_spec = (struct ib_flow_spec_ib *)(flow + 1);
2997                 ib_spec->type = IB_FLOW_SPEC_IB;
2998                 ib_spec->size = sizeof(struct ib_flow_spec_ib);
2999                 /* Add an empty rule for IB L2 */
3000                 memset(&ib_spec->mask, 0, sizeof(ib_spec->mask));
3001 
3002                 err = __mlx4_ib_create_flow(&mqp->ibqp, flow,
3003                                             IB_FLOW_DOMAIN_NIC,
3004                                             MLX4_FS_REGULAR,
3005                                             &mqp->reg_id);
3006         } else {
3007                 err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
3008         }
3009         kfree(flow);
3010         return err;
3011 }
3012 
3013 static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
3014 {
3015         struct mlx4_ib_dev *ibdev = ibdev_ptr;
3016         int p;
3017         int i;
3018 
3019         mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
3020                 devlink_port_type_clear(mlx4_get_devlink_port(dev, i));
3021         ibdev->ib_active = false;
3022         flush_workqueue(wq);
3023 
3024         if (ibdev->iboe.nb.notifier_call) {
3025                 if (unregister_netdevice_notifier(&ibdev->iboe.nb))
3026                         pr_warn("failure unregistering notifier\n");
3027                 ibdev->iboe.nb.notifier_call = NULL;
3028         }
3029 
3030         mlx4_ib_close_sriov(ibdev);
3031         mlx4_ib_mad_cleanup(ibdev);
3032         ib_unregister_device(&ibdev->ib_dev);
3033         mlx4_ib_diag_cleanup(ibdev);
3034 
3035         mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
3036                               ibdev->steer_qpn_count);
3037         kfree(ibdev->ib_uc_qpns_bitmap);
3038 
3039         iounmap(ibdev->uar_map);
3040         for (p = 0; p < ibdev->num_ports; ++p)
3041                 mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[p]);
3042 
3043         mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
3044                 mlx4_CLOSE_PORT(dev, p);
3045 
3046         mlx4_ib_free_eqs(dev, ibdev);
3047 
3048         mlx4_uar_free(dev, &ibdev->priv_uar);
3049         mlx4_pd_free(dev, ibdev->priv_pdn);
3050         ib_dealloc_device(&ibdev->ib_dev);
3051 }
3052 
3053 static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
3054 {
3055         struct mlx4_ib_demux_work **dm = NULL;
3056         struct mlx4_dev *dev = ibdev->dev;
3057         int i;
3058         unsigned long flags;
3059         struct mlx4_active_ports actv_ports;
3060         unsigned int ports;
3061         unsigned int first_port;
3062 
3063         if (!mlx4_is_master(dev))
3064                 return;
3065 
3066         actv_ports = mlx4_get_active_ports(dev, slave);
3067         ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports);
3068         first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports);
3069 
3070         dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC);
3071         if (!dm)
3072                 return;
3073 
3074         for (i = 0; i < ports; i++) {
3075                 dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
3076                 if (!dm[i]) {
3077                         while (--i >= 0)
3078                                 kfree(dm[i]);
3079                         goto out;
3080                 }
3081                 INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work);
3082                 dm[i]->port = first_port + i + 1;
3083                 dm[i]->slave = slave;
3084                 dm[i]->do_init = do_init;
3085                 dm[i]->dev = ibdev;
3086         }
3087         /* initialize or tear down tunnel QPs for the slave */
3088         spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags);
3089         if (!ibdev->sriov.is_going_down) {
3090                 for (i = 0; i < ports; i++)
3091                         queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work);
3092                 spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
3093         } else {
3094                 spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
3095                 for (i = 0; i < ports; i++)
3096                         kfree(dm[i]);
3097         }
3098 out:
3099         kfree(dm);
3100         return;
3101 }
3102 
3103 static void mlx4_ib_handle_catas_error(struct mlx4_ib_dev *ibdev)
3104 {
3105         struct mlx4_ib_qp *mqp;
3106         unsigned long flags_qp;
3107         unsigned long flags_cq;
3108         struct mlx4_ib_cq *send_mcq, *recv_mcq;
3109         struct list_head    cq_notify_list;
3110         struct mlx4_cq *mcq;
3111         unsigned long flags;
3112 
3113         pr_warn("mlx4_ib_handle_catas_error was started\n");
3114         INIT_LIST_HEAD(&cq_notify_list);
3115 
3116         /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
3117         spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
3118 
3119         list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
3120                 spin_lock_irqsave(&mqp->sq.lock, flags_qp);
3121                 if (mqp->sq.tail != mqp->sq.head) {
3122                         send_mcq = to_mcq(mqp->ibqp.send_cq);
3123                         spin_lock_irqsave(&send_mcq->lock, flags_cq);
3124                         if (send_mcq->mcq.comp &&
3125                             mqp->ibqp.send_cq->comp_handler) {
3126                                 if (!send_mcq->mcq.reset_notify_added) {
3127                                         send_mcq->mcq.reset_notify_added = 1;
3128                                         list_add_tail(&send_mcq->mcq.reset_notify,
3129                                                       &cq_notify_list);
3130                                 }
3131                         }
3132                         spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
3133                 }
3134                 spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
3135                 /* Now, handle the QP's receive queue */
3136                 spin_lock_irqsave(&mqp->rq.lock, flags_qp);
3137                 /* no handling is needed for SRQ */
3138                 if (!mqp->ibqp.srq) {
3139                         if (mqp->rq.tail != mqp->rq.head) {
3140                                 recv_mcq = to_mcq(mqp->ibqp.recv_cq);
3141                                 spin_lock_irqsave(&recv_mcq->lock, flags_cq);
3142                                 if (recv_mcq->mcq.comp &&
3143                                     mqp->ibqp.recv_cq->comp_handler) {
3144                                         if (!recv_mcq->mcq.reset_notify_added) {
3145                                                 recv_mcq->mcq.reset_notify_added = 1;
3146                                                 list_add_tail(&recv_mcq->mcq.reset_notify,
3147                                                               &cq_notify_list);
3148                                         }
3149                                 }
3150                                 spin_unlock_irqrestore(&recv_mcq->lock,
3151                                                        flags_cq);
3152                         }
3153                 }
3154                 spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
3155         }
3156 
3157         list_for_each_entry(mcq, &cq_notify_list, reset_notify) {
3158                 mcq->comp(mcq);
3159         }
3160         spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
3161         pr_warn("mlx4_ib_handle_catas_error ended\n");
3162 }
3163 
3164 static void handle_bonded_port_state_event(struct work_struct *work)
3165 {
3166         struct ib_event_work *ew =
3167                 container_of(work, struct ib_event_work, work);
3168         struct mlx4_ib_dev *ibdev = ew->ib_dev;
3169         enum ib_port_state bonded_port_state = IB_PORT_NOP;
3170         int i;
3171         struct ib_event ibev;
3172 
3173         kfree(ew);
3174         spin_lock_bh(&ibdev->iboe.lock);
3175         for (i = 0; i < MLX4_MAX_PORTS; ++i) {
3176                 struct net_device *curr_netdev = ibdev->iboe.netdevs[i];
3177                 enum ib_port_state curr_port_state;
3178 
3179                 if (!curr_netdev)
3180                         continue;
3181 
3182                 curr_port_state =
3183                         (netif_running(curr_netdev) &&
3184                          netif_carrier_ok(curr_netdev)) ?
3185                         IB_PORT_ACTIVE : IB_PORT_DOWN;
3186 
3187                 bonded_port_state = (bonded_port_state != IB_PORT_ACTIVE) ?
3188                         curr_port_state : IB_PORT_ACTIVE;
3189         }
3190         spin_unlock_bh(&ibdev->iboe.lock);
3191 
3192         ibev.device = &ibdev->ib_dev;
3193         ibev.element.port_num = 1;
3194         ibev.event = (bonded_port_state == IB_PORT_ACTIVE) ?
3195                 IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
3196 
3197         ib_dispatch_event(&ibev);
3198 }
3199 
3200 void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port)
3201 {
3202         u64 sl2vl;
3203         int err;
3204 
3205         err = mlx4_ib_query_sl2vl(&mdev->ib_dev, port, &sl2vl);
3206         if (err) {
3207                 pr_err("Unable to get current sl to vl mapping for port %d.  Using all zeroes (%d)\n",
3208                        port, err);
3209                 sl2vl = 0;
3210         }
3211         atomic64_set(&mdev->sl2vl[port - 1], sl2vl);
3212 }
3213 
3214 static void ib_sl2vl_update_work(struct work_struct *work)
3215 {
3216         struct ib_event_work *ew = container_of(work, struct ib_event_work, work);
3217         struct mlx4_ib_dev *mdev = ew->ib_dev;
3218         int port = ew->port;
3219 
3220         mlx4_ib_sl2vl_update(mdev, port);
3221 
3222         kfree(ew);
3223 }
3224 
3225 void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
3226                                      int port)
3227 {
3228         struct ib_event_work *ew;
3229 
3230         ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
3231         if (ew) {
3232                 INIT_WORK(&ew->work, ib_sl2vl_update_work);
3233                 ew->port = port;
3234                 ew->ib_dev = ibdev;
3235                 queue_work(wq, &ew->work);
3236         }
3237 }
3238 
3239 static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
3240                           enum mlx4_dev_event event, unsigned long param)
3241 {
3242         struct ib_event ibev;
3243         struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
3244         struct mlx4_eqe *eqe = NULL;
3245         struct ib_event_work *ew;
3246         int p = 0;
3247 
3248         if (mlx4_is_bonded(dev) &&
3249             ((event == MLX4_DEV_EVENT_PORT_UP) ||
3250             (event == MLX4_DEV_EVENT_PORT_DOWN))) {
3251                 ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
3252                 if (!ew)
3253                         return;
3254                 INIT_WORK(&ew->work, handle_bonded_port_state_event);
3255                 ew->ib_dev = ibdev;
3256                 queue_work(wq, &ew->work);
3257                 return;
3258         }
3259 
3260         if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
3261                 eqe = (struct mlx4_eqe *)param;
3262         else
3263                 p = (int) param;
3264 
3265         switch (event) {
3266         case MLX4_DEV_EVENT_PORT_UP:
3267                 if (p > ibdev->num_ports)
3268                         return;
3269                 if (!mlx4_is_slave(dev) &&
3270                     rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
3271                         IB_LINK_LAYER_INFINIBAND) {
3272                         if (mlx4_is_master(dev))
3273                                 mlx4_ib_invalidate_all_guid_record(ibdev, p);
3274                         if (ibdev->dev->flags & MLX4_FLAG_SECURE_HOST &&
3275                             !(ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT))
3276                                 mlx4_sched_ib_sl2vl_update_work(ibdev, p);
3277                 }
3278                 ibev.event = IB_EVENT_PORT_ACTIVE;
3279                 break;
3280 
3281         case MLX4_DEV_EVENT_PORT_DOWN:
3282                 if (p > ibdev->num_ports)
3283                         return;
3284                 ibev.event = IB_EVENT_PORT_ERR;
3285                 break;
3286 
3287         case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
3288                 ibdev->ib_active = false;
3289                 ibev.event = IB_EVENT_DEVICE_FATAL;
3290                 mlx4_ib_handle_catas_error(ibdev);
3291                 break;
3292 
3293         case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
3294                 ew = kmalloc(sizeof *ew, GFP_ATOMIC);
3295                 if (!ew)
3296                         break;
3297 
3298                 INIT_WORK(&ew->work, handle_port_mgmt_change_event);
3299                 memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
3300                 ew->ib_dev = ibdev;
3301                 /* need to queue only for port owner, which uses GEN_EQE */
3302                 if (mlx4_is_master(dev))
3303                         queue_work(wq, &ew->work);
3304                 else
3305                         handle_port_mgmt_change_event(&ew->work);
3306                 return;
3307 
3308         case MLX4_DEV_EVENT_SLAVE_INIT:
3309                 /* here, p is the slave id */
3310                 do_slave_init(ibdev, p, 1);
3311                 if (mlx4_is_master(dev)) {
3312                         int i;
3313 
3314                         for (i = 1; i <= ibdev->num_ports; i++) {
3315                                 if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
3316                                         == IB_LINK_LAYER_INFINIBAND)
3317                                         mlx4_ib_slave_alias_guid_event(ibdev,
3318                                                                        p, i,
3319                                                                        1);
3320                         }
3321                 }
3322                 return;
3323 
3324         case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
3325                 if (mlx4_is_master(dev)) {
3326                         int i;
3327 
3328                         for (i = 1; i <= ibdev->num_ports; i++) {
3329                                 if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
3330                                         == IB_LINK_LAYER_INFINIBAND)
3331                                         mlx4_ib_slave_alias_guid_event(ibdev,
3332                                                                        p, i,
3333                                                                        0);
3334                         }
3335                 }
3336                 /* here, p is the slave id */
3337                 do_slave_init(ibdev, p, 0);
3338                 return;
3339 
3340         default:
3341                 return;
3342         }
3343 
3344         ibev.device           = ibdev_ptr;
3345         ibev.element.port_num = mlx4_is_bonded(ibdev->dev) ? 1 : (u8)p;
3346 
3347         ib_dispatch_event(&ibev);
3348 }
3349 
3350 static struct mlx4_interface mlx4_ib_interface = {
3351         .add            = mlx4_ib_add,
3352         .remove         = mlx4_ib_remove,
3353         .event          = mlx4_ib_event,
3354         .protocol       = MLX4_PROT_IB_IPV6,
3355         .flags          = MLX4_INTFF_BONDING
3356 };
3357 
3358 static int __init mlx4_ib_init(void)
3359 {
3360         int err;
3361 
3362         wq = alloc_ordered_workqueue("mlx4_ib", WQ_MEM_RECLAIM);
3363         if (!wq)
3364                 return -ENOMEM;
3365 
3366         err = mlx4_ib_mcg_init();
3367         if (err)
3368                 goto clean_wq;
3369 
3370         err = mlx4_register_interface(&mlx4_ib_interface);
3371         if (err)
3372                 goto clean_mcg;
3373 
3374         return 0;
3375 
3376 clean_mcg:
3377         mlx4_ib_mcg_destroy();
3378 
3379 clean_wq:
3380         destroy_workqueue(wq);
3381         return err;
3382 }
3383 
3384 static void __exit mlx4_ib_cleanup(void)
3385 {
3386         mlx4_unregister_interface(&mlx4_ib_interface);
3387         mlx4_ib_mcg_destroy();
3388         destroy_workqueue(wq);
3389 }
3390 
3391 module_init(mlx4_ib_init);
3392 module_exit(mlx4_ib_cleanup);

/* [<][>][^][v][top][bottom][index][help] */