root/drivers/net/ethernet/mellanox/mlx4/main.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mlx4_devlink_ierr_reset_get
  2. mlx4_devlink_ierr_reset_set
  3. mlx4_devlink_crdump_snapshot_get
  4. mlx4_devlink_crdump_snapshot_set
  5. mlx4_devlink_max_macs_validate
  6. mlx4_devlink_set_params_init_values
  7. mlx4_set_num_reserved_uars
  8. mlx4_check_port_params
  9. mlx4_set_port_mask
  10. mlx4_query_func
  11. mlx4_enable_cqe_eqe_stride
  12. _mlx4_dev_port
  13. mlx4_dev_port
  14. mlx4_enable_ignore_fcs
  15. mlx4_dev_cap
  16. mlx4_how_many_lives_vf
  17. mlx4_get_parav_qkey
  18. mlx4_sync_pkey_table
  19. mlx4_put_slave_node_guid
  20. mlx4_get_slave_node_guid
  21. mlx4_is_slave_active
  22. mlx4_handle_eth_header_mcast_prio
  23. slave_adjust_steering_mode
  24. mlx4_slave_destroy_special_qp_cap
  25. mlx4_slave_special_qp_cap
  26. mlx4_slave_cap
  27. mlx4_request_modules
  28. mlx4_change_port_types
  29. show_port_type
  30. __set_port_type
  31. set_port_type
  32. int_to_ibta_mtu
  33. ibta_mtu_to_int
  34. show_port_ib_mtu
  35. set_port_ib_mtu
  36. mlx4_mf_bond
  37. mlx4_mf_unbond
  38. mlx4_bond
  39. mlx4_unbond
  40. mlx4_port_map_set
  41. mlx4_load_fw
  42. mlx4_init_cmpt_table
  43. mlx4_init_icm
  44. mlx4_free_icms
  45. mlx4_slave_exit
  46. map_bf_area
  47. unmap_bf_area
  48. mlx4_read_clock
  49. map_internal_clock
  50. mlx4_get_internal_clock_params
  51. unmap_internal_clock
  52. mlx4_close_hca
  53. mlx4_close_fw
  54. mlx4_comm_check_offline
  55. mlx4_reset_vf_support
  56. mlx4_init_slave
  57. mlx4_parav_master_pf_caps
  58. choose_log_fs_mgm_entry_size
  59. dmfs_high_rate_steering_mode_str
  60. choose_steering_mode
  61. choose_tunnel_offload_mode
  62. mlx4_validate_optimized_steering
  63. mlx4_init_fw
  64. mlx4_init_hca
  65. mlx4_init_counters_table
  66. mlx4_cleanup_counters_table
  67. mlx4_cleanup_default_counters
  68. mlx4_allocate_default_counters
  69. __mlx4_counter_alloc
  70. mlx4_counter_alloc
  71. __mlx4_clear_if_stat
  72. __mlx4_counter_free
  73. mlx4_counter_free
  74. mlx4_get_default_counter_index
  75. mlx4_set_admin_guid
  76. mlx4_get_admin_guid
  77. mlx4_set_random_admin_guid
  78. mlx4_setup_hca
  79. mlx4_init_affinity_hint
  80. mlx4_enable_msi_x
  81. mlx4_init_port_info
  82. mlx4_cleanup_port_info
  83. mlx4_init_steering
  84. mlx4_clear_steering
  85. extended_func_num
  86. mlx4_get_ownership
  87. mlx4_free_ownership
  88. mlx4_enable_sriov
  89. mlx4_check_dev_cap
  90. mlx4_pci_enable_device
  91. mlx4_pci_disable_device
  92. mlx4_load_one
  93. __mlx4_init_one
  94. mlx4_devlink_port_type_set
  95. mlx4_devlink_param_load_driverinit_values
  96. mlx4_devlink_reload_down
  97. mlx4_devlink_reload_up
  98. mlx4_init_one
  99. mlx4_clean_dev
  100. mlx4_unload_one
  101. mlx4_remove_one
  102. restore_current_port_types
  103. mlx4_restart_one_down
  104. mlx4_restart_one_up
  105. mlx4_restart_one
  106. mlx4_pci_err_detected
  107. mlx4_pci_slot_reset
  108. mlx4_pci_resume
  109. mlx4_shutdown
  110. mlx4_suspend
  111. mlx4_resume
  112. mlx4_verify_params
  113. mlx4_init
  114. mlx4_cleanup

   1 /*
   2  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
   3  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
   4  * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
   5  * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
   6  *
   7  * This software is available to you under a choice of one of two
   8  * licenses.  You may choose to be licensed under the terms of the GNU
   9  * General Public License (GPL) Version 2, available from the file
  10  * COPYING in the main directory of this source tree, or the
  11  * OpenIB.org BSD license below:
  12  *
  13  *     Redistribution and use in source and binary forms, with or
  14  *     without modification, are permitted provided that the following
  15  *     conditions are met:
  16  *
  17  *      - Redistributions of source code must retain the above
  18  *        copyright notice, this list of conditions and the following
  19  *        disclaimer.
  20  *
  21  *      - Redistributions in binary form must reproduce the above
  22  *        copyright notice, this list of conditions and the following
  23  *        disclaimer in the documentation and/or other materials
  24  *        provided with the distribution.
  25  *
  26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33  * SOFTWARE.
  34  */
  35 
  36 #include <linux/module.h>
  37 #include <linux/kernel.h>
  38 #include <linux/init.h>
  39 #include <linux/errno.h>
  40 #include <linux/pci.h>
  41 #include <linux/dma-mapping.h>
  42 #include <linux/slab.h>
  43 #include <linux/io-mapping.h>
  44 #include <linux/delay.h>
  45 #include <linux/kmod.h>
  46 #include <linux/etherdevice.h>
  47 #include <net/devlink.h>
  48 
  49 #include <uapi/rdma/mlx4-abi.h>
  50 #include <linux/mlx4/device.h>
  51 #include <linux/mlx4/doorbell.h>
  52 
  53 #include "mlx4.h"
  54 #include "fw.h"
  55 #include "icm.h"
  56 
  57 MODULE_AUTHOR("Roland Dreier");
  58 MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver");
  59 MODULE_LICENSE("Dual BSD/GPL");
  60 MODULE_VERSION(DRV_VERSION);
  61 
  62 struct workqueue_struct *mlx4_wq;
  63 
  64 #ifdef CONFIG_MLX4_DEBUG
  65 
  66 int mlx4_debug_level; /* 0 by default */
  67 module_param_named(debug_level, mlx4_debug_level, int, 0644);
  68 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
  69 
  70 #endif /* CONFIG_MLX4_DEBUG */
  71 
  72 #ifdef CONFIG_PCI_MSI
  73 
  74 static int msi_x = 1;
  75 module_param(msi_x, int, 0444);
  76 MODULE_PARM_DESC(msi_x, "0 - don't use MSI-X, 1 - use MSI-X, >1 - limit number of MSI-X irqs to msi_x");
  77 
  78 #else /* CONFIG_PCI_MSI */
  79 
  80 #define msi_x (0)
  81 
  82 #endif /* CONFIG_PCI_MSI */
  83 
  84 static uint8_t num_vfs[3] = {0, 0, 0};
  85 static int num_vfs_argc;
  86 module_param_array(num_vfs, byte, &num_vfs_argc, 0444);
  87 MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0\n"
  88                           "num_vfs=port1,port2,port1+2");
  89 
  90 static uint8_t probe_vf[3] = {0, 0, 0};
  91 static int probe_vfs_argc;
  92 module_param_array(probe_vf, byte, &probe_vfs_argc, 0444);
  93 MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)\n"
  94                            "probe_vf=port1,port2,port1+2");
  95 
  96 static int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
  97 module_param_named(log_num_mgm_entry_size,
  98                         mlx4_log_num_mgm_entry_size, int, 0444);
  99 MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
 100                                          " of qp per mcg, for example:"
 101                                          " 10 gives 248.range: 7 <="
 102                                          " log_num_mgm_entry_size <= 12."
 103                                          " To activate device managed"
 104                                          " flow steering when available, set to -1");
 105 
 106 static bool enable_64b_cqe_eqe = true;
 107 module_param(enable_64b_cqe_eqe, bool, 0444);
 108 MODULE_PARM_DESC(enable_64b_cqe_eqe,
 109                  "Enable 64 byte CQEs/EQEs when the FW supports this (default: True)");
 110 
 111 static bool enable_4k_uar;
 112 module_param(enable_4k_uar, bool, 0444);
 113 MODULE_PARM_DESC(enable_4k_uar,
 114                  "Enable using 4K UAR. Should not be enabled if have VFs which do not support 4K UARs (default: false)");
 115 
 116 #define PF_CONTEXT_BEHAVIOUR_MASK       (MLX4_FUNC_CAP_64B_EQE_CQE | \
 117                                          MLX4_FUNC_CAP_EQE_CQE_STRIDE | \
 118                                          MLX4_FUNC_CAP_DMFS_A0_STATIC)
 119 
 120 #define RESET_PERSIST_MASK_FLAGS        (MLX4_FLAG_SRIOV)
 121 
 122 static char mlx4_version[] =
 123         DRV_NAME ": Mellanox ConnectX core driver v"
 124         DRV_VERSION "\n";
 125 
 126 static const struct mlx4_profile default_profile = {
 127         .num_qp         = 1 << 18,
 128         .num_srq        = 1 << 16,
 129         .rdmarc_per_qp  = 1 << 4,
 130         .num_cq         = 1 << 16,
 131         .num_mcg        = 1 << 13,
 132         .num_mpt        = 1 << 19,
 133         .num_mtt        = 1 << 20, /* It is really num mtt segements */
 134 };
 135 
 136 static const struct mlx4_profile low_mem_profile = {
 137         .num_qp         = 1 << 17,
 138         .num_srq        = 1 << 6,
 139         .rdmarc_per_qp  = 1 << 4,
 140         .num_cq         = 1 << 8,
 141         .num_mcg        = 1 << 8,
 142         .num_mpt        = 1 << 9,
 143         .num_mtt        = 1 << 7,
 144 };
 145 
 146 static int log_num_mac = 7;
 147 module_param_named(log_num_mac, log_num_mac, int, 0444);
 148 MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");
 149 
 150 static int log_num_vlan;
 151 module_param_named(log_num_vlan, log_num_vlan, int, 0444);
 152 MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)");
 153 /* Log2 max number of VLANs per ETH port (0-7) */
 154 #define MLX4_LOG_NUM_VLANS 7
 155 #define MLX4_MIN_LOG_NUM_VLANS 0
 156 #define MLX4_MIN_LOG_NUM_MAC 1
 157 
 158 static bool use_prio;
 159 module_param_named(use_prio, use_prio, bool, 0444);
 160 MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports (deprecated)");
 161 
 162 int log_mtts_per_seg = ilog2(1);
 163 module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
 164 MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment "
 165                  "(0-7) (default: 0)");
 166 
 167 static int port_type_array[2] = {MLX4_PORT_TYPE_NONE, MLX4_PORT_TYPE_NONE};
 168 static int arr_argc = 2;
 169 module_param_array(port_type_array, int, &arr_argc, 0444);
 170 MODULE_PARM_DESC(port_type_array, "Array of port types: HW_DEFAULT (0) is default "
 171                                 "1 for IB, 2 for Ethernet");
 172 
 173 struct mlx4_port_config {
 174         struct list_head list;
 175         enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
 176         struct pci_dev *pdev;
 177 };
 178 
 179 static atomic_t pf_loading = ATOMIC_INIT(0);
 180 
 181 static int mlx4_devlink_ierr_reset_get(struct devlink *devlink, u32 id,
 182                                        struct devlink_param_gset_ctx *ctx)
 183 {
 184         ctx->val.vbool = !!mlx4_internal_err_reset;
 185         return 0;
 186 }
 187 
 188 static int mlx4_devlink_ierr_reset_set(struct devlink *devlink, u32 id,
 189                                        struct devlink_param_gset_ctx *ctx)
 190 {
 191         mlx4_internal_err_reset = ctx->val.vbool;
 192         return 0;
 193 }
 194 
 195 static int mlx4_devlink_crdump_snapshot_get(struct devlink *devlink, u32 id,
 196                                             struct devlink_param_gset_ctx *ctx)
 197 {
 198         struct mlx4_priv *priv = devlink_priv(devlink);
 199         struct mlx4_dev *dev = &priv->dev;
 200 
 201         ctx->val.vbool = dev->persist->crdump.snapshot_enable;
 202         return 0;
 203 }
 204 
 205 static int mlx4_devlink_crdump_snapshot_set(struct devlink *devlink, u32 id,
 206                                             struct devlink_param_gset_ctx *ctx)
 207 {
 208         struct mlx4_priv *priv = devlink_priv(devlink);
 209         struct mlx4_dev *dev = &priv->dev;
 210 
 211         dev->persist->crdump.snapshot_enable = ctx->val.vbool;
 212         return 0;
 213 }
 214 
 215 static int
 216 mlx4_devlink_max_macs_validate(struct devlink *devlink, u32 id,
 217                                union devlink_param_value val,
 218                                struct netlink_ext_ack *extack)
 219 {
 220         u32 value = val.vu32;
 221 
 222         if (value < 1 || value > 128)
 223                 return -ERANGE;
 224 
 225         if (!is_power_of_2(value)) {
 226                 NL_SET_ERR_MSG_MOD(extack, "max_macs supported must be power of 2");
 227                 return -EINVAL;
 228         }
 229 
 230         return 0;
 231 }
 232 
 233 enum mlx4_devlink_param_id {
 234         MLX4_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
 235         MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE,
 236         MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR,
 237 };
 238 
 239 static const struct devlink_param mlx4_devlink_params[] = {
 240         DEVLINK_PARAM_GENERIC(INT_ERR_RESET,
 241                               BIT(DEVLINK_PARAM_CMODE_RUNTIME) |
 242                               BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
 243                               mlx4_devlink_ierr_reset_get,
 244                               mlx4_devlink_ierr_reset_set, NULL),
 245         DEVLINK_PARAM_GENERIC(MAX_MACS,
 246                               BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
 247                               NULL, NULL, mlx4_devlink_max_macs_validate),
 248         DEVLINK_PARAM_GENERIC(REGION_SNAPSHOT,
 249                               BIT(DEVLINK_PARAM_CMODE_RUNTIME) |
 250                               BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
 251                               mlx4_devlink_crdump_snapshot_get,
 252                               mlx4_devlink_crdump_snapshot_set, NULL),
 253         DEVLINK_PARAM_DRIVER(MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE,
 254                              "enable_64b_cqe_eqe", DEVLINK_PARAM_TYPE_BOOL,
 255                              BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
 256                              NULL, NULL, NULL),
 257         DEVLINK_PARAM_DRIVER(MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR,
 258                              "enable_4k_uar", DEVLINK_PARAM_TYPE_BOOL,
 259                              BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
 260                              NULL, NULL, NULL),
 261 };
 262 
 263 static void mlx4_devlink_set_params_init_values(struct devlink *devlink)
 264 {
 265         union devlink_param_value value;
 266 
 267         value.vbool = !!mlx4_internal_err_reset;
 268         devlink_param_driverinit_value_set(devlink,
 269                                            DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
 270                                            value);
 271 
 272         value.vu32 = 1UL << log_num_mac;
 273         devlink_param_driverinit_value_set(devlink,
 274                                            DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
 275                                            value);
 276 
 277         value.vbool = enable_64b_cqe_eqe;
 278         devlink_param_driverinit_value_set(devlink,
 279                                            MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE,
 280                                            value);
 281 
 282         value.vbool = enable_4k_uar;
 283         devlink_param_driverinit_value_set(devlink,
 284                                            MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR,
 285                                            value);
 286 
 287         value.vbool = false;
 288         devlink_param_driverinit_value_set(devlink,
 289                                            DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
 290                                            value);
 291 }
 292 
 293 static inline void mlx4_set_num_reserved_uars(struct mlx4_dev *dev,
 294                                               struct mlx4_dev_cap *dev_cap)
 295 {
 296         /* The reserved_uars is calculated by system page size unit.
 297          * Therefore, adjustment is added when the uar page size is less
 298          * than the system page size
 299          */
 300         dev->caps.reserved_uars =
 301                 max_t(int,
 302                       mlx4_get_num_reserved_uar(dev),
 303                       dev_cap->reserved_uars /
 304                         (1 << (PAGE_SHIFT - dev->uar_page_shift)));
 305 }
 306 
 307 int mlx4_check_port_params(struct mlx4_dev *dev,
 308                            enum mlx4_port_type *port_type)
 309 {
 310         int i;
 311 
 312         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
 313                 for (i = 0; i < dev->caps.num_ports - 1; i++) {
 314                         if (port_type[i] != port_type[i + 1]) {
 315                                 mlx4_err(dev, "Only same port types supported on this HCA, aborting\n");
 316                                 return -EOPNOTSUPP;
 317                         }
 318                 }
 319         }
 320 
 321         for (i = 0; i < dev->caps.num_ports; i++) {
 322                 if (!(port_type[i] & dev->caps.supported_type[i+1])) {
 323                         mlx4_err(dev, "Requested port type for port %d is not supported on this HCA\n",
 324                                  i + 1);
 325                         return -EOPNOTSUPP;
 326                 }
 327         }
 328         return 0;
 329 }
 330 
 331 static void mlx4_set_port_mask(struct mlx4_dev *dev)
 332 {
 333         int i;
 334 
 335         for (i = 1; i <= dev->caps.num_ports; ++i)
 336                 dev->caps.port_mask[i] = dev->caps.port_type[i];
 337 }
 338 
 339 enum {
 340         MLX4_QUERY_FUNC_NUM_SYS_EQS = 1 << 0,
 341 };
 342 
 343 static int mlx4_query_func(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 344 {
 345         int err = 0;
 346         struct mlx4_func func;
 347 
 348         if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
 349                 err = mlx4_QUERY_FUNC(dev, &func, 0);
 350                 if (err) {
 351                         mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
 352                         return err;
 353                 }
 354                 dev_cap->max_eqs = func.max_eq;
 355                 dev_cap->reserved_eqs = func.rsvd_eqs;
 356                 dev_cap->reserved_uars = func.rsvd_uars;
 357                 err |= MLX4_QUERY_FUNC_NUM_SYS_EQS;
 358         }
 359         return err;
 360 }
 361 
 362 static void mlx4_enable_cqe_eqe_stride(struct mlx4_dev *dev)
 363 {
 364         struct mlx4_caps *dev_cap = &dev->caps;
 365 
 366         /* FW not supporting or cancelled by user */
 367         if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) ||
 368             !(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE))
 369                 return;
 370 
 371         /* Must have 64B CQE_EQE enabled by FW to use bigger stride
 372          * When FW has NCSI it may decide not to report 64B CQE/EQEs
 373          */
 374         if (!(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_EQE) ||
 375             !(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_CQE)) {
 376                 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
 377                 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
 378                 return;
 379         }
 380 
 381         if (cache_line_size() == 128 || cache_line_size() == 256) {
 382                 mlx4_dbg(dev, "Enabling CQE stride cacheLine supported\n");
 383                 /* Changing the real data inside CQE size to 32B */
 384                 dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
 385                 dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
 386 
 387                 if (mlx4_is_master(dev))
 388                         dev_cap->function_caps |= MLX4_FUNC_CAP_EQE_CQE_STRIDE;
 389         } else {
 390                 if (cache_line_size() != 32  && cache_line_size() != 64)
 391                         mlx4_dbg(dev, "Disabling CQE stride, cacheLine size unsupported\n");
 392                 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
 393                 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
 394         }
 395 }
 396 
 397 static int _mlx4_dev_port(struct mlx4_dev *dev, int port,
 398                           struct mlx4_port_cap *port_cap)
 399 {
 400         dev->caps.vl_cap[port]      = port_cap->max_vl;
 401         dev->caps.ib_mtu_cap[port]          = port_cap->ib_mtu;
 402         dev->phys_caps.gid_phys_table_len[port]  = port_cap->max_gids;
 403         dev->phys_caps.pkey_phys_table_len[port] = port_cap->max_pkeys;
 404         /* set gid and pkey table operating lengths by default
 405          * to non-sriov values
 406          */
 407         dev->caps.gid_table_len[port]  = port_cap->max_gids;
 408         dev->caps.pkey_table_len[port] = port_cap->max_pkeys;
 409         dev->caps.port_width_cap[port] = port_cap->max_port_width;
 410         dev->caps.eth_mtu_cap[port]    = port_cap->eth_mtu;
 411         dev->caps.max_tc_eth           = port_cap->max_tc_eth;
 412         dev->caps.def_mac[port]        = port_cap->def_mac;
 413         dev->caps.supported_type[port] = port_cap->supported_port_types;
 414         dev->caps.suggested_type[port] = port_cap->suggested_type;
 415         dev->caps.default_sense[port] = port_cap->default_sense;
 416         dev->caps.trans_type[port]          = port_cap->trans_type;
 417         dev->caps.vendor_oui[port]     = port_cap->vendor_oui;
 418         dev->caps.wavelength[port]     = port_cap->wavelength;
 419         dev->caps.trans_code[port]     = port_cap->trans_code;
 420 
 421         return 0;
 422 }
 423 
 424 static int mlx4_dev_port(struct mlx4_dev *dev, int port,
 425                          struct mlx4_port_cap *port_cap)
 426 {
 427         int err = 0;
 428 
 429         err = mlx4_QUERY_PORT(dev, port, port_cap);
 430 
 431         if (err)
 432                 mlx4_err(dev, "QUERY_PORT command failed.\n");
 433 
 434         return err;
 435 }
 436 
 437 static inline void mlx4_enable_ignore_fcs(struct mlx4_dev *dev)
 438 {
 439         if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_IGNORE_FCS))
 440                 return;
 441 
 442         if (mlx4_is_mfunc(dev)) {
 443                 mlx4_dbg(dev, "SRIOV mode - Disabling Ignore FCS");
 444                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS;
 445                 return;
 446         }
 447 
 448         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)) {
 449                 mlx4_dbg(dev,
 450                          "Keep FCS is not supported - Disabling Ignore FCS");
 451                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS;
 452                 return;
 453         }
 454 }
 455 
 456 #define MLX4_A0_STEERING_TABLE_SIZE     256
 457 static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 458 {
 459         int err;
 460         int i;
 461 
 462         err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
 463         if (err) {
 464                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
 465                 return err;
 466         }
 467         mlx4_dev_cap_dump(dev, dev_cap);
 468 
 469         if (dev_cap->min_page_sz > PAGE_SIZE) {
 470                 mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
 471                          dev_cap->min_page_sz, PAGE_SIZE);
 472                 return -ENODEV;
 473         }
 474         if (dev_cap->num_ports > MLX4_MAX_PORTS) {
 475                 mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
 476                          dev_cap->num_ports, MLX4_MAX_PORTS);
 477                 return -ENODEV;
 478         }
 479 
 480         if (dev_cap->uar_size > pci_resource_len(dev->persist->pdev, 2)) {
 481                 mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
 482                          dev_cap->uar_size,
 483                          (unsigned long long)
 484                          pci_resource_len(dev->persist->pdev, 2));
 485                 return -ENODEV;
 486         }
 487 
 488         dev->caps.num_ports          = dev_cap->num_ports;
 489         dev->caps.num_sys_eqs = dev_cap->num_sys_eqs;
 490         dev->phys_caps.num_phys_eqs = dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS ?
 491                                       dev->caps.num_sys_eqs :
 492                                       MLX4_MAX_EQ_NUM;
 493         for (i = 1; i <= dev->caps.num_ports; ++i) {
 494                 err = _mlx4_dev_port(dev, i, dev_cap->port_cap + i);
 495                 if (err) {
 496                         mlx4_err(dev, "QUERY_PORT command failed, aborting\n");
 497                         return err;
 498                 }
 499         }
 500 
 501         dev->caps.uar_page_size      = PAGE_SIZE;
 502         dev->caps.num_uars           = dev_cap->uar_size / PAGE_SIZE;
 503         dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
 504         dev->caps.bf_reg_size        = dev_cap->bf_reg_size;
 505         dev->caps.bf_regs_per_page   = dev_cap->bf_regs_per_page;
 506         dev->caps.max_sq_sg          = dev_cap->max_sq_sg;
 507         dev->caps.max_rq_sg          = dev_cap->max_rq_sg;
 508         dev->caps.max_wqes           = dev_cap->max_qp_sz;
 509         dev->caps.max_qp_init_rdma   = dev_cap->max_requester_per_qp;
 510         dev->caps.max_srq_wqes       = dev_cap->max_srq_sz;
 511         dev->caps.max_srq_sge        = dev_cap->max_rq_sg - 1;
 512         dev->caps.reserved_srqs      = dev_cap->reserved_srqs;
 513         dev->caps.max_sq_desc_sz     = dev_cap->max_sq_desc_sz;
 514         dev->caps.max_rq_desc_sz     = dev_cap->max_rq_desc_sz;
 515         /*
 516          * Subtract 1 from the limit because we need to allocate a
 517          * spare CQE to enable resizing the CQ.
 518          */
 519         dev->caps.max_cqes           = dev_cap->max_cq_sz - 1;
 520         dev->caps.reserved_cqs       = dev_cap->reserved_cqs;
 521         dev->caps.reserved_eqs       = dev_cap->reserved_eqs;
 522         dev->caps.reserved_mtts      = dev_cap->reserved_mtts;
 523         dev->caps.reserved_mrws      = dev_cap->reserved_mrws;
 524 
 525         dev->caps.reserved_pds       = dev_cap->reserved_pds;
 526         dev->caps.reserved_xrcds     = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
 527                                         dev_cap->reserved_xrcds : 0;
 528         dev->caps.max_xrcds          = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
 529                                         dev_cap->max_xrcds : 0;
 530         dev->caps.mtt_entry_sz       = dev_cap->mtt_entry_sz;
 531 
 532         dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
 533         dev->caps.page_size_cap      = ~(u32) (dev_cap->min_page_sz - 1);
 534         dev->caps.flags              = dev_cap->flags;
 535         dev->caps.flags2             = dev_cap->flags2;
 536         dev->caps.bmme_flags         = dev_cap->bmme_flags;
 537         dev->caps.reserved_lkey      = dev_cap->reserved_lkey;
 538         dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
 539         dev->caps.max_gso_sz         = dev_cap->max_gso_sz;
 540         dev->caps.max_rss_tbl_sz     = dev_cap->max_rss_tbl_sz;
 541         dev->caps.wol_port[1]          = dev_cap->wol_port[1];
 542         dev->caps.wol_port[2]          = dev_cap->wol_port[2];
 543         dev->caps.health_buffer_addrs  = dev_cap->health_buffer_addrs;
 544 
 545         /* Save uar page shift */
 546         if (!mlx4_is_slave(dev)) {
 547                 /* Virtual PCI function needs to determine UAR page size from
 548                  * firmware. Only master PCI function can set the uar page size
 549                  */
 550                 if (enable_4k_uar || !dev->persist->num_vfs)
 551                         dev->uar_page_shift = DEFAULT_UAR_PAGE_SHIFT;
 552                 else
 553                         dev->uar_page_shift = PAGE_SHIFT;
 554 
 555                 mlx4_set_num_reserved_uars(dev, dev_cap);
 556         }
 557 
 558         if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN) {
 559                 struct mlx4_init_hca_param hca_param;
 560 
 561                 memset(&hca_param, 0, sizeof(hca_param));
 562                 err = mlx4_QUERY_HCA(dev, &hca_param);
 563                 /* Turn off PHV_EN flag in case phv_check_en is set.
 564                  * phv_check_en is a HW check that parse the packet and verify
 565                  * phv bit was reported correctly in the wqe. To allow QinQ
 566                  * PHV_EN flag should be set and phv_check_en must be cleared
 567                  * otherwise QinQ packets will be drop by the HW.
 568                  */
 569                 if (err || hca_param.phv_check_en)
 570                         dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_PHV_EN;
 571         }
 572 
 573         /* Sense port always allowed on supported devices for ConnectX-1 and -2 */
 574         if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT)
 575                 dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
 576         /* Don't do sense port on multifunction devices (for now at least) */
 577         if (mlx4_is_mfunc(dev))
 578                 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
 579 
 580         if (mlx4_low_memory_profile()) {
 581                 dev->caps.log_num_macs  = MLX4_MIN_LOG_NUM_MAC;
 582                 dev->caps.log_num_vlans = MLX4_MIN_LOG_NUM_VLANS;
 583         } else {
 584                 dev->caps.log_num_macs  = log_num_mac;
 585                 dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
 586         }
 587 
 588         for (i = 1; i <= dev->caps.num_ports; ++i) {
 589                 dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE;
 590                 if (dev->caps.supported_type[i]) {
 591                         /* if only ETH is supported - assign ETH */
 592                         if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH)
 593                                 dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
 594                         /* if only IB is supported, assign IB */
 595                         else if (dev->caps.supported_type[i] ==
 596                                  MLX4_PORT_TYPE_IB)
 597                                 dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
 598                         else {
 599                                 /* if IB and ETH are supported, we set the port
 600                                  * type according to user selection of port type;
 601                                  * if user selected none, take the FW hint */
 602                                 if (port_type_array[i - 1] == MLX4_PORT_TYPE_NONE)
 603                                         dev->caps.port_type[i] = dev->caps.suggested_type[i] ?
 604                                                 MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB;
 605                                 else
 606                                         dev->caps.port_type[i] = port_type_array[i - 1];
 607                         }
 608                 }
 609                 /*
 610                  * Link sensing is allowed on the port if 3 conditions are true:
 611                  * 1. Both protocols are supported on the port.
 612                  * 2. Different types are supported on the port
 613                  * 3. FW declared that it supports link sensing
 614                  */
 615                 mlx4_priv(dev)->sense.sense_allowed[i] =
 616                         ((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) &&
 617                          (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
 618                          (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT));
 619 
 620                 /*
 621                  * If "default_sense" bit is set, we move the port to "AUTO" mode
 622                  * and perform sense_port FW command to try and set the correct
 623                  * port type from beginning
 624                  */
 625                 if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) {
 626                         enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE;
 627                         dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO;
 628                         mlx4_SENSE_PORT(dev, i, &sensed_port);
 629                         if (sensed_port != MLX4_PORT_TYPE_NONE)
 630                                 dev->caps.port_type[i] = sensed_port;
 631                 } else {
 632                         dev->caps.possible_type[i] = dev->caps.port_type[i];
 633                 }
 634 
 635                 if (dev->caps.log_num_macs > dev_cap->port_cap[i].log_max_macs) {
 636                         dev->caps.log_num_macs = dev_cap->port_cap[i].log_max_macs;
 637                         mlx4_warn(dev, "Requested number of MACs is too much for port %d, reducing to %d\n",
 638                                   i, 1 << dev->caps.log_num_macs);
 639                 }
 640                 if (dev->caps.log_num_vlans > dev_cap->port_cap[i].log_max_vlans) {
 641                         dev->caps.log_num_vlans = dev_cap->port_cap[i].log_max_vlans;
 642                         mlx4_warn(dev, "Requested number of VLANs is too much for port %d, reducing to %d\n",
 643                                   i, 1 << dev->caps.log_num_vlans);
 644                 }
 645         }
 646 
 647         if (mlx4_is_master(dev) && (dev->caps.num_ports == 2) &&
 648             (port_type_array[0] == MLX4_PORT_TYPE_IB) &&
 649             (port_type_array[1] == MLX4_PORT_TYPE_ETH)) {
 650                 mlx4_warn(dev,
 651                           "Granular QoS per VF not supported with IB/Eth configuration\n");
 652                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_QOS_VPP;
 653         }
 654 
 655         dev->caps.max_counters = dev_cap->max_counters;
 656 
 657         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
 658         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
 659                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
 660                 (1 << dev->caps.log_num_macs) *
 661                 (1 << dev->caps.log_num_vlans) *
 662                 dev->caps.num_ports;
 663         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
 664 
 665         if (dev_cap->dmfs_high_rate_qpn_base > 0 &&
 666             dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN)
 667                 dev->caps.dmfs_high_rate_qpn_base = dev_cap->dmfs_high_rate_qpn_base;
 668         else
 669                 dev->caps.dmfs_high_rate_qpn_base =
 670                         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
 671 
 672         if (dev_cap->dmfs_high_rate_qpn_range > 0 &&
 673             dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN) {
 674                 dev->caps.dmfs_high_rate_qpn_range = dev_cap->dmfs_high_rate_qpn_range;
 675                 dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DEFAULT;
 676                 dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_FS_A0;
 677         } else {
 678                 dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_NOT_SUPPORTED;
 679                 dev->caps.dmfs_high_rate_qpn_base =
 680                         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
 681                 dev->caps.dmfs_high_rate_qpn_range = MLX4_A0_STEERING_TABLE_SIZE;
 682         }
 683 
 684         dev->caps.rl_caps = dev_cap->rl_caps;
 685 
 686         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_RSS_RAW_ETH] =
 687                 dev->caps.dmfs_high_rate_qpn_range;
 688 
 689         dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
 690                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] +
 691                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] +
 692                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
 693 
 694         dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
 695 
 696         if (!enable_64b_cqe_eqe && !mlx4_is_slave(dev)) {
 697                 if (dev_cap->flags &
 698                     (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) {
 699                         mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n");
 700                         dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
 701                         dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
 702                 }
 703 
 704                 if (dev_cap->flags2 &
 705                     (MLX4_DEV_CAP_FLAG2_CQE_STRIDE |
 706                      MLX4_DEV_CAP_FLAG2_EQE_STRIDE)) {
 707                         mlx4_warn(dev, "Disabling EQE/CQE stride per user request\n");
 708                         dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
 709                         dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
 710                 }
 711         }
 712 
 713         if ((dev->caps.flags &
 714             (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) &&
 715             mlx4_is_master(dev))
 716                 dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
 717 
 718         if (!mlx4_is_slave(dev)) {
 719                 mlx4_enable_cqe_eqe_stride(dev);
 720                 dev->caps.alloc_res_qp_mask =
 721                         (dev->caps.bf_reg_size ? MLX4_RESERVE_ETH_BF_QP : 0) |
 722                         MLX4_RESERVE_A0_QP;
 723 
 724                 if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) &&
 725                     dev->caps.flags & MLX4_DEV_CAP_FLAG_SET_ETH_SCHED) {
 726                         mlx4_warn(dev, "Old device ETS support detected\n");
 727                         mlx4_warn(dev, "Consider upgrading device FW.\n");
 728                         dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_ETS_CFG;
 729                 }
 730 
 731         } else {
 732                 dev->caps.alloc_res_qp_mask = 0;
 733         }
 734 
 735         mlx4_enable_ignore_fcs(dev);
 736 
 737         return 0;
 738 }
 739 
 740 /*The function checks if there are live vf, return the num of them*/
 741 static int mlx4_how_many_lives_vf(struct mlx4_dev *dev)
 742 {
 743         struct mlx4_priv *priv = mlx4_priv(dev);
 744         struct mlx4_slave_state *s_state;
 745         int i;
 746         int ret = 0;
 747 
 748         for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) {
 749                 s_state = &priv->mfunc.master.slave_state[i];
 750                 if (s_state->active && s_state->last_cmd !=
 751                     MLX4_COMM_CMD_RESET) {
 752                         mlx4_warn(dev, "%s: slave: %d is still active\n",
 753                                   __func__, i);
 754                         ret++;
 755                 }
 756         }
 757         return ret;
 758 }
 759 
 760 int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey)
 761 {
 762         u32 qk = MLX4_RESERVED_QKEY_BASE;
 763 
 764         if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX ||
 765             qpn < dev->phys_caps.base_proxy_sqpn)
 766                 return -EINVAL;
 767 
 768         if (qpn >= dev->phys_caps.base_tunnel_sqpn)
 769                 /* tunnel qp */
 770                 qk += qpn - dev->phys_caps.base_tunnel_sqpn;
 771         else
 772                 qk += qpn - dev->phys_caps.base_proxy_sqpn;
 773         *qkey = qk;
 774         return 0;
 775 }
 776 EXPORT_SYMBOL(mlx4_get_parav_qkey);
 777 
 778 void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val)
 779 {
 780         struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
 781 
 782         if (!mlx4_is_master(dev))
 783                 return;
 784 
 785         priv->virt2phys_pkey[slave][port - 1][i] = val;
 786 }
 787 EXPORT_SYMBOL(mlx4_sync_pkey_table);
 788 
 789 void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid)
 790 {
 791         struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
 792 
 793         if (!mlx4_is_master(dev))
 794                 return;
 795 
 796         priv->slave_node_guids[slave] = guid;
 797 }
 798 EXPORT_SYMBOL(mlx4_put_slave_node_guid);
 799 
 800 __be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave)
 801 {
 802         struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
 803 
 804         if (!mlx4_is_master(dev))
 805                 return 0;
 806 
 807         return priv->slave_node_guids[slave];
 808 }
 809 EXPORT_SYMBOL(mlx4_get_slave_node_guid);
 810 
 811 int mlx4_is_slave_active(struct mlx4_dev *dev, int slave)
 812 {
 813         struct mlx4_priv *priv = mlx4_priv(dev);
 814         struct mlx4_slave_state *s_slave;
 815 
 816         if (!mlx4_is_master(dev))
 817                 return 0;
 818 
 819         s_slave = &priv->mfunc.master.slave_state[slave];
 820         return !!s_slave->active;
 821 }
 822 EXPORT_SYMBOL(mlx4_is_slave_active);
 823 
 824 void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl,
 825                                        struct _rule_hw *eth_header)
 826 {
 827         if (is_multicast_ether_addr(eth_header->eth.dst_mac) ||
 828             is_broadcast_ether_addr(eth_header->eth.dst_mac)) {
 829                 struct mlx4_net_trans_rule_hw_eth *eth =
 830                         (struct mlx4_net_trans_rule_hw_eth *)eth_header;
 831                 struct _rule_hw *next_rule = (struct _rule_hw *)(eth + 1);
 832                 bool last_rule = next_rule->size == 0 && next_rule->id == 0 &&
 833                         next_rule->rsvd == 0;
 834 
 835                 if (last_rule)
 836                         ctrl->prio = cpu_to_be16(MLX4_DOMAIN_NIC);
 837         }
 838 }
 839 EXPORT_SYMBOL(mlx4_handle_eth_header_mcast_prio);
 840 
 841 static void slave_adjust_steering_mode(struct mlx4_dev *dev,
 842                                        struct mlx4_dev_cap *dev_cap,
 843                                        struct mlx4_init_hca_param *hca_param)
 844 {
 845         dev->caps.steering_mode = hca_param->steering_mode;
 846         if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
 847                 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
 848                 dev->caps.fs_log_max_ucast_qp_range_size =
 849                         dev_cap->fs_log_max_ucast_qp_range_size;
 850         } else
 851                 dev->caps.num_qp_per_mgm =
 852                         4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2);
 853 
 854         mlx4_dbg(dev, "Steering mode is: %s\n",
 855                  mlx4_steering_mode_str(dev->caps.steering_mode));
 856 }
 857 
 858 static void mlx4_slave_destroy_special_qp_cap(struct mlx4_dev *dev)
 859 {
 860         kfree(dev->caps.spec_qps);
 861         dev->caps.spec_qps = NULL;
 862 }
 863 
 864 static int mlx4_slave_special_qp_cap(struct mlx4_dev *dev)
 865 {
 866         struct mlx4_func_cap *func_cap = NULL;
 867         struct mlx4_caps *caps = &dev->caps;
 868         int i, err = 0;
 869 
 870         func_cap = kzalloc(sizeof(*func_cap), GFP_KERNEL);
 871         caps->spec_qps = kcalloc(caps->num_ports, sizeof(*caps->spec_qps), GFP_KERNEL);
 872 
 873         if (!func_cap || !caps->spec_qps) {
 874                 mlx4_err(dev, "Failed to allocate memory for special qps cap\n");
 875                 err = -ENOMEM;
 876                 goto err_mem;
 877         }
 878 
 879         for (i = 1; i <= caps->num_ports; ++i) {
 880                 err = mlx4_QUERY_FUNC_CAP(dev, i, func_cap);
 881                 if (err) {
 882                         mlx4_err(dev, "QUERY_FUNC_CAP port command failed for port %d, aborting (%d)\n",
 883                                  i, err);
 884                         goto err_mem;
 885                 }
 886                 caps->spec_qps[i - 1] = func_cap->spec_qps;
 887                 caps->port_mask[i] = caps->port_type[i];
 888                 caps->phys_port_id[i] = func_cap->phys_port_id;
 889                 err = mlx4_get_slave_pkey_gid_tbl_len(dev, i,
 890                                                       &caps->gid_table_len[i],
 891                                                       &caps->pkey_table_len[i]);
 892                 if (err) {
 893                         mlx4_err(dev, "QUERY_PORT command failed for port %d, aborting (%d)\n",
 894                                  i, err);
 895                         goto err_mem;
 896                 }
 897         }
 898 
 899 err_mem:
 900         if (err)
 901                 mlx4_slave_destroy_special_qp_cap(dev);
 902         kfree(func_cap);
 903         return err;
 904 }
 905 
 906 static int mlx4_slave_cap(struct mlx4_dev *dev)
 907 {
 908         int                        err;
 909         u32                        page_size;
 910         struct mlx4_dev_cap        *dev_cap = NULL;
 911         struct mlx4_func_cap       *func_cap = NULL;
 912         struct mlx4_init_hca_param *hca_param = NULL;
 913 
 914         hca_param = kzalloc(sizeof(*hca_param), GFP_KERNEL);
 915         func_cap = kzalloc(sizeof(*func_cap), GFP_KERNEL);
 916         dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);
 917         if (!hca_param || !func_cap || !dev_cap) {
 918                 mlx4_err(dev, "Failed to allocate memory for slave_cap\n");
 919                 err = -ENOMEM;
 920                 goto free_mem;
 921         }
 922 
 923         err = mlx4_QUERY_HCA(dev, hca_param);
 924         if (err) {
 925                 mlx4_err(dev, "QUERY_HCA command failed, aborting\n");
 926                 goto free_mem;
 927         }
 928 
 929         /* fail if the hca has an unknown global capability
 930          * at this time global_caps should be always zeroed
 931          */
 932         if (hca_param->global_caps) {
 933                 mlx4_err(dev, "Unknown hca global capabilities\n");
 934                 err = -EINVAL;
 935                 goto free_mem;
 936         }
 937 
 938         dev->caps.hca_core_clock = hca_param->hca_core_clock;
 939 
 940         dev->caps.max_qp_dest_rdma = 1 << hca_param->log_rd_per_qp;
 941         err = mlx4_dev_cap(dev, dev_cap);
 942         if (err) {
 943                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
 944                 goto free_mem;
 945         }
 946 
 947         err = mlx4_QUERY_FW(dev);
 948         if (err)
 949                 mlx4_err(dev, "QUERY_FW command failed: could not get FW version\n");
 950 
 951         page_size = ~dev->caps.page_size_cap + 1;
 952         mlx4_warn(dev, "HCA minimum page size:%d\n", page_size);
 953         if (page_size > PAGE_SIZE) {
 954                 mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
 955                          page_size, PAGE_SIZE);
 956                 err = -ENODEV;
 957                 goto free_mem;
 958         }
 959 
 960         /* Set uar_page_shift for VF */
 961         dev->uar_page_shift = hca_param->uar_page_sz + 12;
 962 
 963         /* Make sure the master uar page size is valid */
 964         if (dev->uar_page_shift > PAGE_SHIFT) {
 965                 mlx4_err(dev,
 966                          "Invalid configuration: uar page size is larger than system page size\n");
 967                 err = -ENODEV;
 968                 goto free_mem;
 969         }
 970 
 971         /* Set reserved_uars based on the uar_page_shift */
 972         mlx4_set_num_reserved_uars(dev, dev_cap);
 973 
 974         /* Although uar page size in FW differs from system page size,
 975          * upper software layers (mlx4_ib, mlx4_en and part of mlx4_core)
 976          * still works with assumption that uar page size == system page size
 977          */
 978         dev->caps.uar_page_size = PAGE_SIZE;
 979 
 980         err = mlx4_QUERY_FUNC_CAP(dev, 0, func_cap);
 981         if (err) {
 982                 mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d)\n",
 983                          err);
 984                 goto free_mem;
 985         }
 986 
 987         if ((func_cap->pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) !=
 988             PF_CONTEXT_BEHAVIOUR_MASK) {
 989                 mlx4_err(dev, "Unknown pf context behaviour %x known flags %x\n",
 990                          func_cap->pf_context_behaviour,
 991                          PF_CONTEXT_BEHAVIOUR_MASK);
 992                 err = -EINVAL;
 993                 goto free_mem;
 994         }
 995 
 996         dev->caps.num_ports             = func_cap->num_ports;
 997         dev->quotas.qp                  = func_cap->qp_quota;
 998         dev->quotas.srq                 = func_cap->srq_quota;
 999         dev->quotas.cq                  = func_cap->cq_quota;
1000         dev->quotas.mpt                 = func_cap->mpt_quota;
1001         dev->quotas.mtt                 = func_cap->mtt_quota;
1002         dev->caps.num_qps               = 1 << hca_param->log_num_qps;
1003         dev->caps.num_srqs              = 1 << hca_param->log_num_srqs;
1004         dev->caps.num_cqs               = 1 << hca_param->log_num_cqs;
1005         dev->caps.num_mpts              = 1 << hca_param->log_mpt_sz;
1006         dev->caps.num_eqs               = func_cap->max_eq;
1007         dev->caps.reserved_eqs          = func_cap->reserved_eq;
1008         dev->caps.reserved_lkey         = func_cap->reserved_lkey;
1009         dev->caps.num_pds               = MLX4_NUM_PDS;
1010         dev->caps.num_mgms              = 0;
1011         dev->caps.num_amgms             = 0;
1012 
1013         if (dev->caps.num_ports > MLX4_MAX_PORTS) {
1014                 mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
1015                          dev->caps.num_ports, MLX4_MAX_PORTS);
1016                 err = -ENODEV;
1017                 goto free_mem;
1018         }
1019 
1020         mlx4_replace_zero_macs(dev);
1021 
1022         err = mlx4_slave_special_qp_cap(dev);
1023         if (err) {
1024                 mlx4_err(dev, "Set special QP caps failed. aborting\n");
1025                 goto free_mem;
1026         }
1027 
1028         if (dev->caps.uar_page_size * (dev->caps.num_uars -
1029                                        dev->caps.reserved_uars) >
1030                                        pci_resource_len(dev->persist->pdev,
1031                                                         2)) {
1032                 mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
1033                          dev->caps.uar_page_size * dev->caps.num_uars,
1034                          (unsigned long long)
1035                          pci_resource_len(dev->persist->pdev, 2));
1036                 err = -ENOMEM;
1037                 goto err_mem;
1038         }
1039 
1040         if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
1041                 dev->caps.eqe_size   = 64;
1042                 dev->caps.eqe_factor = 1;
1043         } else {
1044                 dev->caps.eqe_size   = 32;
1045                 dev->caps.eqe_factor = 0;
1046         }
1047 
1048         if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
1049                 dev->caps.cqe_size   = 64;
1050                 dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
1051         } else {
1052                 dev->caps.cqe_size   = 32;
1053         }
1054 
1055         if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_EQE_STRIDE_ENABLED) {
1056                 dev->caps.eqe_size = hca_param->eqe_size;
1057                 dev->caps.eqe_factor = 0;
1058         }
1059 
1060         if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_CQE_STRIDE_ENABLED) {
1061                 dev->caps.cqe_size = hca_param->cqe_size;
1062                 /* User still need to know when CQE > 32B */
1063                 dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
1064         }
1065 
1066         dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
1067         mlx4_warn(dev, "Timestamping is not supported in slave mode\n");
1068 
1069         dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_USER_MAC_EN;
1070         mlx4_dbg(dev, "User MAC FW update is not supported in slave mode\n");
1071 
1072         slave_adjust_steering_mode(dev, dev_cap, hca_param);
1073         mlx4_dbg(dev, "RSS support for IP fragments is %s\n",
1074                  hca_param->rss_ip_frags ? "on" : "off");
1075 
1076         if (func_cap->extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP &&
1077             dev->caps.bf_reg_size)
1078                 dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP;
1079 
1080         if (func_cap->extra_flags & MLX4_QUERY_FUNC_FLAGS_A0_RES_QP)
1081                 dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_A0_QP;
1082 
1083 err_mem:
1084         if (err)
1085                 mlx4_slave_destroy_special_qp_cap(dev);
1086 free_mem:
1087         kfree(hca_param);
1088         kfree(func_cap);
1089         kfree(dev_cap);
1090         return err;
1091 }
1092 
1093 static void mlx4_request_modules(struct mlx4_dev *dev)
1094 {
1095         int port;
1096         int has_ib_port = false;
1097         int has_eth_port = false;
1098 #define EN_DRV_NAME     "mlx4_en"
1099 #define IB_DRV_NAME     "mlx4_ib"
1100 
1101         for (port = 1; port <= dev->caps.num_ports; port++) {
1102                 if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB)
1103                         has_ib_port = true;
1104                 else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
1105                         has_eth_port = true;
1106         }
1107 
1108         if (has_eth_port)
1109                 request_module_nowait(EN_DRV_NAME);
1110         if (has_ib_port || (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
1111                 request_module_nowait(IB_DRV_NAME);
1112 }
1113 
1114 /*
1115  * Change the port configuration of the device.
1116  * Every user of this function must hold the port mutex.
1117  */
1118 int mlx4_change_port_types(struct mlx4_dev *dev,
1119                            enum mlx4_port_type *port_types)
1120 {
1121         int err = 0;
1122         int change = 0;
1123         int port;
1124 
1125         for (port = 0; port <  dev->caps.num_ports; port++) {
1126                 /* Change the port type only if the new type is different
1127                  * from the current, and not set to Auto */
1128                 if (port_types[port] != dev->caps.port_type[port + 1])
1129                         change = 1;
1130         }
1131         if (change) {
1132                 mlx4_unregister_device(dev);
1133                 for (port = 1; port <= dev->caps.num_ports; port++) {
1134                         mlx4_CLOSE_PORT(dev, port);
1135                         dev->caps.port_type[port] = port_types[port - 1];
1136                         err = mlx4_SET_PORT(dev, port, -1);
1137                         if (err) {
1138                                 mlx4_err(dev, "Failed to set port %d, aborting\n",
1139                                          port);
1140                                 goto out;
1141                         }
1142                 }
1143                 mlx4_set_port_mask(dev);
1144                 err = mlx4_register_device(dev);
1145                 if (err) {
1146                         mlx4_err(dev, "Failed to register device\n");
1147                         goto out;
1148                 }
1149                 mlx4_request_modules(dev);
1150         }
1151 
1152 out:
1153         return err;
1154 }
1155 
1156 static ssize_t show_port_type(struct device *dev,
1157                               struct device_attribute *attr,
1158                               char *buf)
1159 {
1160         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1161                                                    port_attr);
1162         struct mlx4_dev *mdev = info->dev;
1163         char type[8];
1164 
1165         sprintf(type, "%s",
1166                 (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ?
1167                 "ib" : "eth");
1168         if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO)
1169                 sprintf(buf, "auto (%s)\n", type);
1170         else
1171                 sprintf(buf, "%s\n", type);
1172 
1173         return strlen(buf);
1174 }
1175 
1176 static int __set_port_type(struct mlx4_port_info *info,
1177                            enum mlx4_port_type port_type)
1178 {
1179         struct mlx4_dev *mdev = info->dev;
1180         struct mlx4_priv *priv = mlx4_priv(mdev);
1181         enum mlx4_port_type types[MLX4_MAX_PORTS];
1182         enum mlx4_port_type new_types[MLX4_MAX_PORTS];
1183         int i;
1184         int err = 0;
1185 
1186         if ((port_type & mdev->caps.supported_type[info->port]) != port_type) {
1187                 mlx4_err(mdev,
1188                          "Requested port type for port %d is not supported on this HCA\n",
1189                          info->port);
1190                 return -EOPNOTSUPP;
1191         }
1192 
1193         mlx4_stop_sense(mdev);
1194         mutex_lock(&priv->port_mutex);
1195         info->tmp_type = port_type;
1196 
1197         /* Possible type is always the one that was delivered */
1198         mdev->caps.possible_type[info->port] = info->tmp_type;
1199 
1200         for (i = 0; i < mdev->caps.num_ports; i++) {
1201                 types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type :
1202                                         mdev->caps.possible_type[i+1];
1203                 if (types[i] == MLX4_PORT_TYPE_AUTO)
1204                         types[i] = mdev->caps.port_type[i+1];
1205         }
1206 
1207         if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
1208             !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) {
1209                 for (i = 1; i <= mdev->caps.num_ports; i++) {
1210                         if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) {
1211                                 mdev->caps.possible_type[i] = mdev->caps.port_type[i];
1212                                 err = -EOPNOTSUPP;
1213                         }
1214                 }
1215         }
1216         if (err) {
1217                 mlx4_err(mdev, "Auto sensing is not supported on this HCA. Set only 'eth' or 'ib' for both ports (should be the same)\n");
1218                 goto out;
1219         }
1220 
1221         mlx4_do_sense_ports(mdev, new_types, types);
1222 
1223         err = mlx4_check_port_params(mdev, new_types);
1224         if (err)
1225                 goto out;
1226 
1227         /* We are about to apply the changes after the configuration
1228          * was verified, no need to remember the temporary types
1229          * any more */
1230         for (i = 0; i < mdev->caps.num_ports; i++)
1231                 priv->port[i + 1].tmp_type = 0;
1232 
1233         err = mlx4_change_port_types(mdev, new_types);
1234 
1235 out:
1236         mlx4_start_sense(mdev);
1237         mutex_unlock(&priv->port_mutex);
1238 
1239         return err;
1240 }
1241 
1242 static ssize_t set_port_type(struct device *dev,
1243                              struct device_attribute *attr,
1244                              const char *buf, size_t count)
1245 {
1246         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1247                                                    port_attr);
1248         struct mlx4_dev *mdev = info->dev;
1249         enum mlx4_port_type port_type;
1250         static DEFINE_MUTEX(set_port_type_mutex);
1251         int err;
1252 
1253         mutex_lock(&set_port_type_mutex);
1254 
1255         if (!strcmp(buf, "ib\n")) {
1256                 port_type = MLX4_PORT_TYPE_IB;
1257         } else if (!strcmp(buf, "eth\n")) {
1258                 port_type = MLX4_PORT_TYPE_ETH;
1259         } else if (!strcmp(buf, "auto\n")) {
1260                 port_type = MLX4_PORT_TYPE_AUTO;
1261         } else {
1262                 mlx4_err(mdev, "%s is not supported port type\n", buf);
1263                 err = -EINVAL;
1264                 goto err_out;
1265         }
1266 
1267         err = __set_port_type(info, port_type);
1268 
1269 err_out:
1270         mutex_unlock(&set_port_type_mutex);
1271 
1272         return err ? err : count;
1273 }
1274 
1275 enum ibta_mtu {
1276         IB_MTU_256  = 1,
1277         IB_MTU_512  = 2,
1278         IB_MTU_1024 = 3,
1279         IB_MTU_2048 = 4,
1280         IB_MTU_4096 = 5
1281 };
1282 
1283 static inline int int_to_ibta_mtu(int mtu)
1284 {
1285         switch (mtu) {
1286         case 256:  return IB_MTU_256;
1287         case 512:  return IB_MTU_512;
1288         case 1024: return IB_MTU_1024;
1289         case 2048: return IB_MTU_2048;
1290         case 4096: return IB_MTU_4096;
1291         default: return -1;
1292         }
1293 }
1294 
1295 static inline int ibta_mtu_to_int(enum ibta_mtu mtu)
1296 {
1297         switch (mtu) {
1298         case IB_MTU_256:  return  256;
1299         case IB_MTU_512:  return  512;
1300         case IB_MTU_1024: return 1024;
1301         case IB_MTU_2048: return 2048;
1302         case IB_MTU_4096: return 4096;
1303         default: return -1;
1304         }
1305 }
1306 
1307 static ssize_t show_port_ib_mtu(struct device *dev,
1308                              struct device_attribute *attr,
1309                              char *buf)
1310 {
1311         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1312                                                    port_mtu_attr);
1313         struct mlx4_dev *mdev = info->dev;
1314 
1315         if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH)
1316                 mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
1317 
1318         sprintf(buf, "%d\n",
1319                         ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port]));
1320         return strlen(buf);
1321 }
1322 
1323 static ssize_t set_port_ib_mtu(struct device *dev,
1324                              struct device_attribute *attr,
1325                              const char *buf, size_t count)
1326 {
1327         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1328                                                    port_mtu_attr);
1329         struct mlx4_dev *mdev = info->dev;
1330         struct mlx4_priv *priv = mlx4_priv(mdev);
1331         int err, port, mtu, ibta_mtu = -1;
1332 
1333         if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) {
1334                 mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
1335                 return -EINVAL;
1336         }
1337 
1338         err = kstrtoint(buf, 0, &mtu);
1339         if (!err)
1340                 ibta_mtu = int_to_ibta_mtu(mtu);
1341 
1342         if (err || ibta_mtu < 0) {
1343                 mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf);
1344                 return -EINVAL;
1345         }
1346 
1347         mdev->caps.port_ib_mtu[info->port] = ibta_mtu;
1348 
1349         mlx4_stop_sense(mdev);
1350         mutex_lock(&priv->port_mutex);
1351         mlx4_unregister_device(mdev);
1352         for (port = 1; port <= mdev->caps.num_ports; port++) {
1353                 mlx4_CLOSE_PORT(mdev, port);
1354                 err = mlx4_SET_PORT(mdev, port, -1);
1355                 if (err) {
1356                         mlx4_err(mdev, "Failed to set port %d, aborting\n",
1357                                  port);
1358                         goto err_set_port;
1359                 }
1360         }
1361         err = mlx4_register_device(mdev);
1362 err_set_port:
1363         mutex_unlock(&priv->port_mutex);
1364         mlx4_start_sense(mdev);
1365         return err ? err : count;
1366 }
1367 
1368 /* bond for multi-function device */
1369 #define MAX_MF_BOND_ALLOWED_SLAVES 63
1370 static int mlx4_mf_bond(struct mlx4_dev *dev)
1371 {
1372         int err = 0;
1373         int nvfs;
1374         struct mlx4_slaves_pport slaves_port1;
1375         struct mlx4_slaves_pport slaves_port2;
1376         DECLARE_BITMAP(slaves_port_1_2, MLX4_MFUNC_MAX);
1377 
1378         slaves_port1 = mlx4_phys_to_slaves_pport(dev, 1);
1379         slaves_port2 = mlx4_phys_to_slaves_pport(dev, 2);
1380         bitmap_and(slaves_port_1_2,
1381                    slaves_port1.slaves, slaves_port2.slaves,
1382                    dev->persist->num_vfs + 1);
1383 
1384         /* only single port vfs are allowed */
1385         if (bitmap_weight(slaves_port_1_2, dev->persist->num_vfs + 1) > 1) {
1386                 mlx4_warn(dev, "HA mode unsupported for dual ported VFs\n");
1387                 return -EINVAL;
1388         }
1389 
1390         /* number of virtual functions is number of total functions minus one
1391          * physical function for each port.
1392          */
1393         nvfs = bitmap_weight(slaves_port1.slaves, dev->persist->num_vfs + 1) +
1394                 bitmap_weight(slaves_port2.slaves, dev->persist->num_vfs + 1) - 2;
1395 
1396         /* limit on maximum allowed VFs */
1397         if (nvfs > MAX_MF_BOND_ALLOWED_SLAVES) {
1398                 mlx4_warn(dev, "HA mode is not supported for %d VFs (max %d are allowed)\n",
1399                           nvfs, MAX_MF_BOND_ALLOWED_SLAVES);
1400                 return -EINVAL;
1401         }
1402 
1403         if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) {
1404                 mlx4_warn(dev, "HA mode unsupported for NON DMFS steering\n");
1405                 return -EINVAL;
1406         }
1407 
1408         err = mlx4_bond_mac_table(dev);
1409         if (err)
1410                 return err;
1411         err = mlx4_bond_vlan_table(dev);
1412         if (err)
1413                 goto err1;
1414         err = mlx4_bond_fs_rules(dev);
1415         if (err)
1416                 goto err2;
1417 
1418         return 0;
1419 err2:
1420         (void)mlx4_unbond_vlan_table(dev);
1421 err1:
1422         (void)mlx4_unbond_mac_table(dev);
1423         return err;
1424 }
1425 
1426 static int mlx4_mf_unbond(struct mlx4_dev *dev)
1427 {
1428         int ret, ret1;
1429 
1430         ret = mlx4_unbond_fs_rules(dev);
1431         if (ret)
1432                 mlx4_warn(dev, "multifunction unbond for flow rules failed (%d)\n", ret);
1433         ret1 = mlx4_unbond_mac_table(dev);
1434         if (ret1) {
1435                 mlx4_warn(dev, "multifunction unbond for MAC table failed (%d)\n", ret1);
1436                 ret = ret1;
1437         }
1438         ret1 = mlx4_unbond_vlan_table(dev);
1439         if (ret1) {
1440                 mlx4_warn(dev, "multifunction unbond for VLAN table failed (%d)\n", ret1);
1441                 ret = ret1;
1442         }
1443         return ret;
1444 }
1445 
1446 int mlx4_bond(struct mlx4_dev *dev)
1447 {
1448         int ret = 0;
1449         struct mlx4_priv *priv = mlx4_priv(dev);
1450 
1451         mutex_lock(&priv->bond_mutex);
1452 
1453         if (!mlx4_is_bonded(dev)) {
1454                 ret = mlx4_do_bond(dev, true);
1455                 if (ret)
1456                         mlx4_err(dev, "Failed to bond device: %d\n", ret);
1457                 if (!ret && mlx4_is_master(dev)) {
1458                         ret = mlx4_mf_bond(dev);
1459                         if (ret) {
1460                                 mlx4_err(dev, "bond for multifunction failed\n");
1461                                 mlx4_do_bond(dev, false);
1462                         }
1463                 }
1464         }
1465 
1466         mutex_unlock(&priv->bond_mutex);
1467         if (!ret)
1468                 mlx4_dbg(dev, "Device is bonded\n");
1469 
1470         return ret;
1471 }
1472 EXPORT_SYMBOL_GPL(mlx4_bond);
1473 
1474 int mlx4_unbond(struct mlx4_dev *dev)
1475 {
1476         int ret = 0;
1477         struct mlx4_priv *priv = mlx4_priv(dev);
1478 
1479         mutex_lock(&priv->bond_mutex);
1480 
1481         if (mlx4_is_bonded(dev)) {
1482                 int ret2 = 0;
1483 
1484                 ret = mlx4_do_bond(dev, false);
1485                 if (ret)
1486                         mlx4_err(dev, "Failed to unbond device: %d\n", ret);
1487                 if (mlx4_is_master(dev))
1488                         ret2 = mlx4_mf_unbond(dev);
1489                 if (ret2) {
1490                         mlx4_warn(dev, "Failed to unbond device for multifunction (%d)\n", ret2);
1491                         ret = ret2;
1492                 }
1493         }
1494 
1495         mutex_unlock(&priv->bond_mutex);
1496         if (!ret)
1497                 mlx4_dbg(dev, "Device is unbonded\n");
1498 
1499         return ret;
1500 }
1501 EXPORT_SYMBOL_GPL(mlx4_unbond);
1502 
1503 
1504 int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p)
1505 {
1506         u8 port1 = v2p->port1;
1507         u8 port2 = v2p->port2;
1508         struct mlx4_priv *priv = mlx4_priv(dev);
1509         int err;
1510 
1511         if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP))
1512                 return -EOPNOTSUPP;
1513 
1514         mutex_lock(&priv->bond_mutex);
1515 
1516         /* zero means keep current mapping for this port */
1517         if (port1 == 0)
1518                 port1 = priv->v2p.port1;
1519         if (port2 == 0)
1520                 port2 = priv->v2p.port2;
1521 
1522         if ((port1 < 1) || (port1 > MLX4_MAX_PORTS) ||
1523             (port2 < 1) || (port2 > MLX4_MAX_PORTS) ||
1524             (port1 == 2 && port2 == 1)) {
1525                 /* besides boundary checks cross mapping makes
1526                  * no sense and therefore not allowed */
1527                 err = -EINVAL;
1528         } else if ((port1 == priv->v2p.port1) &&
1529                  (port2 == priv->v2p.port2)) {
1530                 err = 0;
1531         } else {
1532                 err = mlx4_virt2phy_port_map(dev, port1, port2);
1533                 if (!err) {
1534                         mlx4_dbg(dev, "port map changed: [%d][%d]\n",
1535                                  port1, port2);
1536                         priv->v2p.port1 = port1;
1537                         priv->v2p.port2 = port2;
1538                 } else {
1539                         mlx4_err(dev, "Failed to change port mape: %d\n", err);
1540                 }
1541         }
1542 
1543         mutex_unlock(&priv->bond_mutex);
1544         return err;
1545 }
1546 EXPORT_SYMBOL_GPL(mlx4_port_map_set);
1547 
1548 static int mlx4_load_fw(struct mlx4_dev *dev)
1549 {
1550         struct mlx4_priv *priv = mlx4_priv(dev);
1551         int err;
1552 
1553         priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages,
1554                                          GFP_HIGHUSER | __GFP_NOWARN, 0);
1555         if (!priv->fw.fw_icm) {
1556                 mlx4_err(dev, "Couldn't allocate FW area, aborting\n");
1557                 return -ENOMEM;
1558         }
1559 
1560         err = mlx4_MAP_FA(dev, priv->fw.fw_icm);
1561         if (err) {
1562                 mlx4_err(dev, "MAP_FA command failed, aborting\n");
1563                 goto err_free;
1564         }
1565 
1566         err = mlx4_RUN_FW(dev);
1567         if (err) {
1568                 mlx4_err(dev, "RUN_FW command failed, aborting\n");
1569                 goto err_unmap_fa;
1570         }
1571 
1572         return 0;
1573 
1574 err_unmap_fa:
1575         mlx4_UNMAP_FA(dev);
1576 
1577 err_free:
1578         mlx4_free_icm(dev, priv->fw.fw_icm, 0);
1579         return err;
1580 }
1581 
1582 static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
1583                                 int cmpt_entry_sz)
1584 {
1585         struct mlx4_priv *priv = mlx4_priv(dev);
1586         int err;
1587         int num_eqs;
1588 
1589         err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table,
1590                                   cmpt_base +
1591                                   ((u64) (MLX4_CMPT_TYPE_QP *
1592                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1593                                   cmpt_entry_sz, dev->caps.num_qps,
1594                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1595                                   0, 0);
1596         if (err)
1597                 goto err;
1598 
1599         err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table,
1600                                   cmpt_base +
1601                                   ((u64) (MLX4_CMPT_TYPE_SRQ *
1602                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1603                                   cmpt_entry_sz, dev->caps.num_srqs,
1604                                   dev->caps.reserved_srqs, 0, 0);
1605         if (err)
1606                 goto err_qp;
1607 
1608         err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table,
1609                                   cmpt_base +
1610                                   ((u64) (MLX4_CMPT_TYPE_CQ *
1611                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1612                                   cmpt_entry_sz, dev->caps.num_cqs,
1613                                   dev->caps.reserved_cqs, 0, 0);
1614         if (err)
1615                 goto err_srq;
1616 
1617         num_eqs = dev->phys_caps.num_phys_eqs;
1618         err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
1619                                   cmpt_base +
1620                                   ((u64) (MLX4_CMPT_TYPE_EQ *
1621                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1622                                   cmpt_entry_sz, num_eqs, num_eqs, 0, 0);
1623         if (err)
1624                 goto err_cq;
1625 
1626         return 0;
1627 
1628 err_cq:
1629         mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1630 
1631 err_srq:
1632         mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1633 
1634 err_qp:
1635         mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1636 
1637 err:
1638         return err;
1639 }
1640 
1641 static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
1642                          struct mlx4_init_hca_param *init_hca, u64 icm_size)
1643 {
1644         struct mlx4_priv *priv = mlx4_priv(dev);
1645         u64 aux_pages;
1646         int num_eqs;
1647         int err;
1648 
1649         err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
1650         if (err) {
1651                 mlx4_err(dev, "SET_ICM_SIZE command failed, aborting\n");
1652                 return err;
1653         }
1654 
1655         mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory\n",
1656                  (unsigned long long) icm_size >> 10,
1657                  (unsigned long long) aux_pages << 2);
1658 
1659         priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages,
1660                                           GFP_HIGHUSER | __GFP_NOWARN, 0);
1661         if (!priv->fw.aux_icm) {
1662                 mlx4_err(dev, "Couldn't allocate aux memory, aborting\n");
1663                 return -ENOMEM;
1664         }
1665 
1666         err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm);
1667         if (err) {
1668                 mlx4_err(dev, "MAP_ICM_AUX command failed, aborting\n");
1669                 goto err_free_aux;
1670         }
1671 
1672         err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz);
1673         if (err) {
1674                 mlx4_err(dev, "Failed to map cMPT context memory, aborting\n");
1675                 goto err_unmap_aux;
1676         }
1677 
1678 
1679         num_eqs = dev->phys_caps.num_phys_eqs;
1680         err = mlx4_init_icm_table(dev, &priv->eq_table.table,
1681                                   init_hca->eqc_base, dev_cap->eqc_entry_sz,
1682                                   num_eqs, num_eqs, 0, 0);
1683         if (err) {
1684                 mlx4_err(dev, "Failed to map EQ context memory, aborting\n");
1685                 goto err_unmap_cmpt;
1686         }
1687 
1688         /*
1689          * Reserved MTT entries must be aligned up to a cacheline
1690          * boundary, since the FW will write to them, while the driver
1691          * writes to all other MTT entries. (The variable
1692          * dev->caps.mtt_entry_sz below is really the MTT segment
1693          * size, not the raw entry size)
1694          */
1695         dev->caps.reserved_mtts =
1696                 ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz,
1697                       dma_get_cache_alignment()) / dev->caps.mtt_entry_sz;
1698 
1699         err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
1700                                   init_hca->mtt_base,
1701                                   dev->caps.mtt_entry_sz,
1702                                   dev->caps.num_mtts,
1703                                   dev->caps.reserved_mtts, 1, 0);
1704         if (err) {
1705                 mlx4_err(dev, "Failed to map MTT context memory, aborting\n");
1706                 goto err_unmap_eq;
1707         }
1708 
1709         err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table,
1710                                   init_hca->dmpt_base,
1711                                   dev_cap->dmpt_entry_sz,
1712                                   dev->caps.num_mpts,
1713                                   dev->caps.reserved_mrws, 1, 1);
1714         if (err) {
1715                 mlx4_err(dev, "Failed to map dMPT context memory, aborting\n");
1716                 goto err_unmap_mtt;
1717         }
1718 
1719         err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table,
1720                                   init_hca->qpc_base,
1721                                   dev_cap->qpc_entry_sz,
1722                                   dev->caps.num_qps,
1723                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1724                                   0, 0);
1725         if (err) {
1726                 mlx4_err(dev, "Failed to map QP context memory, aborting\n");
1727                 goto err_unmap_dmpt;
1728         }
1729 
1730         err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table,
1731                                   init_hca->auxc_base,
1732                                   dev_cap->aux_entry_sz,
1733                                   dev->caps.num_qps,
1734                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1735                                   0, 0);
1736         if (err) {
1737                 mlx4_err(dev, "Failed to map AUXC context memory, aborting\n");
1738                 goto err_unmap_qp;
1739         }
1740 
1741         err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table,
1742                                   init_hca->altc_base,
1743                                   dev_cap->altc_entry_sz,
1744                                   dev->caps.num_qps,
1745                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1746                                   0, 0);
1747         if (err) {
1748                 mlx4_err(dev, "Failed to map ALTC context memory, aborting\n");
1749                 goto err_unmap_auxc;
1750         }
1751 
1752         err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table,
1753                                   init_hca->rdmarc_base,
1754                                   dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift,
1755                                   dev->caps.num_qps,
1756                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1757                                   0, 0);
1758         if (err) {
1759                 mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n");
1760                 goto err_unmap_altc;
1761         }
1762 
1763         err = mlx4_init_icm_table(dev, &priv->cq_table.table,
1764                                   init_hca->cqc_base,
1765                                   dev_cap->cqc_entry_sz,
1766                                   dev->caps.num_cqs,
1767                                   dev->caps.reserved_cqs, 0, 0);
1768         if (err) {
1769                 mlx4_err(dev, "Failed to map CQ context memory, aborting\n");
1770                 goto err_unmap_rdmarc;
1771         }
1772 
1773         err = mlx4_init_icm_table(dev, &priv->srq_table.table,
1774                                   init_hca->srqc_base,
1775                                   dev_cap->srq_entry_sz,
1776                                   dev->caps.num_srqs,
1777                                   dev->caps.reserved_srqs, 0, 0);
1778         if (err) {
1779                 mlx4_err(dev, "Failed to map SRQ context memory, aborting\n");
1780                 goto err_unmap_cq;
1781         }
1782 
1783         /*
1784          * For flow steering device managed mode it is required to use
1785          * mlx4_init_icm_table. For B0 steering mode it's not strictly
1786          * required, but for simplicity just map the whole multicast
1787          * group table now.  The table isn't very big and it's a lot
1788          * easier than trying to track ref counts.
1789          */
1790         err = mlx4_init_icm_table(dev, &priv->mcg_table.table,
1791                                   init_hca->mc_base,
1792                                   mlx4_get_mgm_entry_size(dev),
1793                                   dev->caps.num_mgms + dev->caps.num_amgms,
1794                                   dev->caps.num_mgms + dev->caps.num_amgms,
1795                                   0, 0);
1796         if (err) {
1797                 mlx4_err(dev, "Failed to map MCG context memory, aborting\n");
1798                 goto err_unmap_srq;
1799         }
1800 
1801         return 0;
1802 
1803 err_unmap_srq:
1804         mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1805 
1806 err_unmap_cq:
1807         mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1808 
1809 err_unmap_rdmarc:
1810         mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1811 
1812 err_unmap_altc:
1813         mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1814 
1815 err_unmap_auxc:
1816         mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1817 
1818 err_unmap_qp:
1819         mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1820 
1821 err_unmap_dmpt:
1822         mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1823 
1824 err_unmap_mtt:
1825         mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1826 
1827 err_unmap_eq:
1828         mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1829 
1830 err_unmap_cmpt:
1831         mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1832         mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1833         mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1834         mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1835 
1836 err_unmap_aux:
1837         mlx4_UNMAP_ICM_AUX(dev);
1838 
1839 err_free_aux:
1840         mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1841 
1842         return err;
1843 }
1844 
1845 static void mlx4_free_icms(struct mlx4_dev *dev)
1846 {
1847         struct mlx4_priv *priv = mlx4_priv(dev);
1848 
1849         mlx4_cleanup_icm_table(dev, &priv->mcg_table.table);
1850         mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1851         mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1852         mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1853         mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1854         mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1855         mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1856         mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1857         mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1858         mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1859         mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1860         mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1861         mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1862         mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1863 
1864         mlx4_UNMAP_ICM_AUX(dev);
1865         mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1866 }
1867 
1868 static void mlx4_slave_exit(struct mlx4_dev *dev)
1869 {
1870         struct mlx4_priv *priv = mlx4_priv(dev);
1871 
1872         mutex_lock(&priv->cmd.slave_cmd_mutex);
1873         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP,
1874                           MLX4_COMM_TIME))
1875                 mlx4_warn(dev, "Failed to close slave function\n");
1876         mutex_unlock(&priv->cmd.slave_cmd_mutex);
1877 }
1878 
1879 static int map_bf_area(struct mlx4_dev *dev)
1880 {
1881         struct mlx4_priv *priv = mlx4_priv(dev);
1882         resource_size_t bf_start;
1883         resource_size_t bf_len;
1884         int err = 0;
1885 
1886         if (!dev->caps.bf_reg_size)
1887                 return -ENXIO;
1888 
1889         bf_start = pci_resource_start(dev->persist->pdev, 2) +
1890                         (dev->caps.num_uars << PAGE_SHIFT);
1891         bf_len = pci_resource_len(dev->persist->pdev, 2) -
1892                         (dev->caps.num_uars << PAGE_SHIFT);
1893         priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
1894         if (!priv->bf_mapping)
1895                 err = -ENOMEM;
1896 
1897         return err;
1898 }
1899 
1900 static void unmap_bf_area(struct mlx4_dev *dev)
1901 {
1902         if (mlx4_priv(dev)->bf_mapping)
1903                 io_mapping_free(mlx4_priv(dev)->bf_mapping);
1904 }
1905 
1906 u64 mlx4_read_clock(struct mlx4_dev *dev)
1907 {
1908         u32 clockhi, clocklo, clockhi1;
1909         u64 cycles;
1910         int i;
1911         struct mlx4_priv *priv = mlx4_priv(dev);
1912 
1913         for (i = 0; i < 10; i++) {
1914                 clockhi = swab32(readl(priv->clock_mapping));
1915                 clocklo = swab32(readl(priv->clock_mapping + 4));
1916                 clockhi1 = swab32(readl(priv->clock_mapping));
1917                 if (clockhi == clockhi1)
1918                         break;
1919         }
1920 
1921         cycles = (u64) clockhi << 32 | (u64) clocklo;
1922 
1923         return cycles;
1924 }
1925 EXPORT_SYMBOL_GPL(mlx4_read_clock);
1926 
1927 
1928 static int map_internal_clock(struct mlx4_dev *dev)
1929 {
1930         struct mlx4_priv *priv = mlx4_priv(dev);
1931 
1932         priv->clock_mapping =
1933                 ioremap(pci_resource_start(dev->persist->pdev,
1934                                            priv->fw.clock_bar) +
1935                         priv->fw.clock_offset, MLX4_CLOCK_SIZE);
1936 
1937         if (!priv->clock_mapping)
1938                 return -ENOMEM;
1939 
1940         return 0;
1941 }
1942 
1943 int mlx4_get_internal_clock_params(struct mlx4_dev *dev,
1944                                    struct mlx4_clock_params *params)
1945 {
1946         struct mlx4_priv *priv = mlx4_priv(dev);
1947 
1948         if (mlx4_is_slave(dev))
1949                 return -EOPNOTSUPP;
1950 
1951         if (!params)
1952                 return -EINVAL;
1953 
1954         params->bar = priv->fw.clock_bar;
1955         params->offset = priv->fw.clock_offset;
1956         params->size = MLX4_CLOCK_SIZE;
1957 
1958         return 0;
1959 }
1960 EXPORT_SYMBOL_GPL(mlx4_get_internal_clock_params);
1961 
1962 static void unmap_internal_clock(struct mlx4_dev *dev)
1963 {
1964         struct mlx4_priv *priv = mlx4_priv(dev);
1965 
1966         if (priv->clock_mapping)
1967                 iounmap(priv->clock_mapping);
1968 }
1969 
1970 static void mlx4_close_hca(struct mlx4_dev *dev)
1971 {
1972         unmap_internal_clock(dev);
1973         unmap_bf_area(dev);
1974         if (mlx4_is_slave(dev))
1975                 mlx4_slave_exit(dev);
1976         else {
1977                 mlx4_CLOSE_HCA(dev, 0);
1978                 mlx4_free_icms(dev);
1979         }
1980 }
1981 
1982 static void mlx4_close_fw(struct mlx4_dev *dev)
1983 {
1984         if (!mlx4_is_slave(dev)) {
1985                 mlx4_UNMAP_FA(dev);
1986                 mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
1987         }
1988 }
1989 
1990 static int mlx4_comm_check_offline(struct mlx4_dev *dev)
1991 {
1992 #define COMM_CHAN_OFFLINE_OFFSET 0x09
1993 
1994         u32 comm_flags;
1995         u32 offline_bit;
1996         unsigned long end;
1997         struct mlx4_priv *priv = mlx4_priv(dev);
1998 
1999         end = msecs_to_jiffies(MLX4_COMM_OFFLINE_TIME_OUT) + jiffies;
2000         while (time_before(jiffies, end)) {
2001                 comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
2002                                           MLX4_COMM_CHAN_FLAGS));
2003                 offline_bit = (comm_flags &
2004                                (u32)(1 << COMM_CHAN_OFFLINE_OFFSET));
2005                 if (!offline_bit)
2006                         return 0;
2007 
2008                 /* If device removal has been requested,
2009                  * do not continue retrying.
2010                  */
2011                 if (dev->persist->interface_state &
2012                     MLX4_INTERFACE_STATE_NOWAIT)
2013                         break;
2014 
2015                 /* There are cases as part of AER/Reset flow that PF needs
2016                  * around 100 msec to load. We therefore sleep for 100 msec
2017                  * to allow other tasks to make use of that CPU during this
2018                  * time interval.
2019                  */
2020                 msleep(100);
2021         }
2022         mlx4_err(dev, "Communication channel is offline.\n");
2023         return -EIO;
2024 }
2025 
2026 static void mlx4_reset_vf_support(struct mlx4_dev *dev)
2027 {
2028 #define COMM_CHAN_RST_OFFSET 0x1e
2029 
2030         struct mlx4_priv *priv = mlx4_priv(dev);
2031         u32 comm_rst;
2032         u32 comm_caps;
2033 
2034         comm_caps = swab32(readl((__iomem char *)priv->mfunc.comm +
2035                                  MLX4_COMM_CHAN_CAPS));
2036         comm_rst = (comm_caps & (u32)(1 << COMM_CHAN_RST_OFFSET));
2037 
2038         if (comm_rst)
2039                 dev->caps.vf_caps |= MLX4_VF_CAP_FLAG_RESET;
2040 }
2041 
2042 static int mlx4_init_slave(struct mlx4_dev *dev)
2043 {
2044         struct mlx4_priv *priv = mlx4_priv(dev);
2045         u64 dma = (u64) priv->mfunc.vhcr_dma;
2046         int ret_from_reset = 0;
2047         u32 slave_read;
2048         u32 cmd_channel_ver;
2049 
2050         if (atomic_read(&pf_loading)) {
2051                 mlx4_warn(dev, "PF is not ready - Deferring probe\n");
2052                 return -EPROBE_DEFER;
2053         }
2054 
2055         mutex_lock(&priv->cmd.slave_cmd_mutex);
2056         priv->cmd.max_cmds = 1;
2057         if (mlx4_comm_check_offline(dev)) {
2058                 mlx4_err(dev, "PF is not responsive, skipping initialization\n");
2059                 goto err_offline;
2060         }
2061 
2062         mlx4_reset_vf_support(dev);
2063         mlx4_warn(dev, "Sending reset\n");
2064         ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
2065                                        MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME);
2066         /* if we are in the middle of flr the slave will try
2067          * NUM_OF_RESET_RETRIES times before leaving.*/
2068         if (ret_from_reset) {
2069                 if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) {
2070                         mlx4_warn(dev, "slave is currently in the middle of FLR - Deferring probe\n");
2071                         mutex_unlock(&priv->cmd.slave_cmd_mutex);
2072                         return -EPROBE_DEFER;
2073                 } else
2074                         goto err;
2075         }
2076 
2077         /* check the driver version - the slave I/F revision
2078          * must match the master's */
2079         slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
2080         cmd_channel_ver = mlx4_comm_get_version();
2081 
2082         if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) !=
2083                 MLX4_COMM_GET_IF_REV(slave_read)) {
2084                 mlx4_err(dev, "slave driver version is not supported by the master\n");
2085                 goto err;
2086         }
2087 
2088         mlx4_warn(dev, "Sending vhcr0\n");
2089         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48,
2090                              MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
2091                 goto err;
2092         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32,
2093                              MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
2094                 goto err;
2095         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16,
2096                              MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
2097                 goto err;
2098         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma,
2099                           MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
2100                 goto err;
2101 
2102         mutex_unlock(&priv->cmd.slave_cmd_mutex);
2103         return 0;
2104 
2105 err:
2106         mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP, 0);
2107 err_offline:
2108         mutex_unlock(&priv->cmd.slave_cmd_mutex);
2109         return -EIO;
2110 }
2111 
2112 static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev)
2113 {
2114         int i;
2115 
2116         for (i = 1; i <= dev->caps.num_ports; i++) {
2117                 if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
2118                         dev->caps.gid_table_len[i] =
2119                                 mlx4_get_slave_num_gids(dev, 0, i);
2120                 else
2121                         dev->caps.gid_table_len[i] = 1;
2122                 dev->caps.pkey_table_len[i] =
2123                         dev->phys_caps.pkey_phys_table_len[i] - 1;
2124         }
2125 }
2126 
2127 static int choose_log_fs_mgm_entry_size(int qp_per_entry)
2128 {
2129         int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE;
2130 
2131         for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE;
2132               i++) {
2133                 if (qp_per_entry <= 4 * ((1 << i) / 16 - 2))
2134                         break;
2135         }
2136 
2137         return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1;
2138 }
2139 
2140 static const char *dmfs_high_rate_steering_mode_str(int dmfs_high_steer_mode)
2141 {
2142         switch (dmfs_high_steer_mode) {
2143         case MLX4_STEERING_DMFS_A0_DEFAULT:
2144                 return "default performance";
2145 
2146         case MLX4_STEERING_DMFS_A0_DYNAMIC:
2147                 return "dynamic hybrid mode";
2148 
2149         case MLX4_STEERING_DMFS_A0_STATIC:
2150                 return "performance optimized for limited rule configuration (static)";
2151 
2152         case MLX4_STEERING_DMFS_A0_DISABLE:
2153                 return "disabled performance optimized steering";
2154 
2155         case MLX4_STEERING_DMFS_A0_NOT_SUPPORTED:
2156                 return "performance optimized steering not supported";
2157 
2158         default:
2159                 return "Unrecognized mode";
2160         }
2161 }
2162 
2163 #define MLX4_DMFS_A0_STEERING                   (1UL << 2)
2164 
2165 static void choose_steering_mode(struct mlx4_dev *dev,
2166                                  struct mlx4_dev_cap *dev_cap)
2167 {
2168         if (mlx4_log_num_mgm_entry_size <= 0) {
2169                 if ((-mlx4_log_num_mgm_entry_size) & MLX4_DMFS_A0_STEERING) {
2170                         if (dev->caps.dmfs_high_steer_mode ==
2171                             MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
2172                                 mlx4_err(dev, "DMFS high rate mode not supported\n");
2173                         else
2174                                 dev->caps.dmfs_high_steer_mode =
2175                                         MLX4_STEERING_DMFS_A0_STATIC;
2176                 }
2177         }
2178 
2179         if (mlx4_log_num_mgm_entry_size <= 0 &&
2180             dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN &&
2181             (!mlx4_is_mfunc(dev) ||
2182              (dev_cap->fs_max_num_qp_per_entry >=
2183              (dev->persist->num_vfs + 1))) &&
2184             choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >=
2185                 MLX4_MIN_MGM_LOG_ENTRY_SIZE) {
2186                 dev->oper_log_mgm_entry_size =
2187                         choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry);
2188                 dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED;
2189                 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
2190                 dev->caps.fs_log_max_ucast_qp_range_size =
2191                         dev_cap->fs_log_max_ucast_qp_range_size;
2192         } else {
2193                 if (dev->caps.dmfs_high_steer_mode !=
2194                     MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
2195                         dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DISABLE;
2196                 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER &&
2197                     dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
2198                         dev->caps.steering_mode = MLX4_STEERING_MODE_B0;
2199                 else {
2200                         dev->caps.steering_mode = MLX4_STEERING_MODE_A0;
2201 
2202                         if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER ||
2203                             dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
2204                                 mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags set to use B0 steering - falling back to A0 steering mode\n");
2205                 }
2206                 dev->oper_log_mgm_entry_size =
2207                         mlx4_log_num_mgm_entry_size > 0 ?
2208                         mlx4_log_num_mgm_entry_size :
2209                         MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
2210                 dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev);
2211         }
2212         mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, modparam log_num_mgm_entry_size = %d\n",
2213                  mlx4_steering_mode_str(dev->caps.steering_mode),
2214                  dev->oper_log_mgm_entry_size,
2215                  mlx4_log_num_mgm_entry_size);
2216 }
2217 
2218 static void choose_tunnel_offload_mode(struct mlx4_dev *dev,
2219                                        struct mlx4_dev_cap *dev_cap)
2220 {
2221         if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED &&
2222             dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS)
2223                 dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_VXLAN;
2224         else
2225                 dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_NONE;
2226 
2227         mlx4_dbg(dev, "Tunneling offload mode is: %s\n",  (dev->caps.tunnel_offload_mode
2228                  == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) ? "vxlan" : "none");
2229 }
2230 
2231 static int mlx4_validate_optimized_steering(struct mlx4_dev *dev)
2232 {
2233         int i;
2234         struct mlx4_port_cap port_cap;
2235 
2236         if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
2237                 return -EINVAL;
2238 
2239         for (i = 1; i <= dev->caps.num_ports; i++) {
2240                 if (mlx4_dev_port(dev, i, &port_cap)) {
2241                         mlx4_err(dev,
2242                                  "QUERY_DEV_CAP command failed, can't verify DMFS high rate steering.\n");
2243                 } else if ((dev->caps.dmfs_high_steer_mode !=
2244                             MLX4_STEERING_DMFS_A0_DEFAULT) &&
2245                            (port_cap.dmfs_optimized_state ==
2246                             !!(dev->caps.dmfs_high_steer_mode ==
2247                             MLX4_STEERING_DMFS_A0_DISABLE))) {
2248                         mlx4_err(dev,
2249                                  "DMFS high rate steer mode differ, driver requested %s but %s in FW.\n",
2250                                  dmfs_high_rate_steering_mode_str(
2251                                         dev->caps.dmfs_high_steer_mode),
2252                                  (port_cap.dmfs_optimized_state ?
2253                                         "enabled" : "disabled"));
2254                 }
2255         }
2256 
2257         return 0;
2258 }
2259 
2260 static int mlx4_init_fw(struct mlx4_dev *dev)
2261 {
2262         struct mlx4_mod_stat_cfg   mlx4_cfg;
2263         int err = 0;
2264 
2265         if (!mlx4_is_slave(dev)) {
2266                 err = mlx4_QUERY_FW(dev);
2267                 if (err) {
2268                         if (err == -EACCES)
2269                                 mlx4_info(dev, "non-primary physical function, skipping\n");
2270                         else
2271                                 mlx4_err(dev, "QUERY_FW command failed, aborting\n");
2272                         return err;
2273                 }
2274 
2275                 err = mlx4_load_fw(dev);
2276                 if (err) {
2277                         mlx4_err(dev, "Failed to start FW, aborting\n");
2278                         return err;
2279                 }
2280 
2281                 mlx4_cfg.log_pg_sz_m = 1;
2282                 mlx4_cfg.log_pg_sz = 0;
2283                 err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
2284                 if (err)
2285                         mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
2286         }
2287 
2288         return err;
2289 }
2290 
2291 static int mlx4_init_hca(struct mlx4_dev *dev)
2292 {
2293         struct mlx4_priv          *priv = mlx4_priv(dev);
2294         struct mlx4_init_hca_param *init_hca = NULL;
2295         struct mlx4_dev_cap       *dev_cap = NULL;
2296         struct mlx4_adapter        adapter;
2297         struct mlx4_profile        profile;
2298         u64 icm_size;
2299         struct mlx4_config_dev_params params;
2300         int err;
2301 
2302         if (!mlx4_is_slave(dev)) {
2303                 dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);
2304                 init_hca = kzalloc(sizeof(*init_hca), GFP_KERNEL);
2305 
2306                 if (!dev_cap || !init_hca) {
2307                         err = -ENOMEM;
2308                         goto out_free;
2309                 }
2310 
2311                 err = mlx4_dev_cap(dev, dev_cap);
2312                 if (err) {
2313                         mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
2314                         goto out_free;
2315                 }
2316 
2317                 choose_steering_mode(dev, dev_cap);
2318                 choose_tunnel_offload_mode(dev, dev_cap);
2319 
2320                 if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC &&
2321                     mlx4_is_master(dev))
2322                         dev->caps.function_caps |= MLX4_FUNC_CAP_DMFS_A0_STATIC;
2323 
2324                 err = mlx4_get_phys_port_id(dev);
2325                 if (err)
2326                         mlx4_err(dev, "Fail to get physical port id\n");
2327 
2328                 if (mlx4_is_master(dev))
2329                         mlx4_parav_master_pf_caps(dev);
2330 
2331                 if (mlx4_low_memory_profile()) {
2332                         mlx4_info(dev, "Running from within kdump kernel. Using low memory profile\n");
2333                         profile = low_mem_profile;
2334                 } else {
2335                         profile = default_profile;
2336                 }
2337                 if (dev->caps.steering_mode ==
2338                     MLX4_STEERING_MODE_DEVICE_MANAGED)
2339                         profile.num_mcg = MLX4_FS_NUM_MCG;
2340 
2341                 icm_size = mlx4_make_profile(dev, &profile, dev_cap,
2342                                              init_hca);
2343                 if ((long long) icm_size < 0) {
2344                         err = icm_size;
2345                         goto out_free;
2346                 }
2347 
2348                 dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;
2349 
2350                 if (enable_4k_uar || !dev->persist->num_vfs) {
2351                         init_hca->log_uar_sz = ilog2(dev->caps.num_uars) +
2352                                                     PAGE_SHIFT - DEFAULT_UAR_PAGE_SHIFT;
2353                         init_hca->uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12;
2354                 } else {
2355                         init_hca->log_uar_sz = ilog2(dev->caps.num_uars);
2356                         init_hca->uar_page_sz = PAGE_SHIFT - 12;
2357                 }
2358 
2359                 init_hca->mw_enabled = 0;
2360                 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
2361                     dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)
2362                         init_hca->mw_enabled = INIT_HCA_TPT_MW_ENABLE;
2363 
2364                 err = mlx4_init_icm(dev, dev_cap, init_hca, icm_size);
2365                 if (err)
2366                         goto out_free;
2367 
2368                 err = mlx4_INIT_HCA(dev, init_hca);
2369                 if (err) {
2370                         mlx4_err(dev, "INIT_HCA command failed, aborting\n");
2371                         goto err_free_icm;
2372                 }
2373 
2374                 if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
2375                         err = mlx4_query_func(dev, dev_cap);
2376                         if (err < 0) {
2377                                 mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n");
2378                                 goto err_close;
2379                         } else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) {
2380                                 dev->caps.num_eqs = dev_cap->max_eqs;
2381                                 dev->caps.reserved_eqs = dev_cap->reserved_eqs;
2382                                 dev->caps.reserved_uars = dev_cap->reserved_uars;
2383                         }
2384                 }
2385 
2386                 /*
2387                  * If TS is supported by FW
2388                  * read HCA frequency by QUERY_HCA command
2389                  */
2390                 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) {
2391                         err = mlx4_QUERY_HCA(dev, init_hca);
2392                         if (err) {
2393                                 mlx4_err(dev, "QUERY_HCA command failed, disable timestamp\n");
2394                                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2395                         } else {
2396                                 dev->caps.hca_core_clock =
2397                                         init_hca->hca_core_clock;
2398                         }
2399 
2400                         /* In case we got HCA frequency 0 - disable timestamping
2401                          * to avoid dividing by zero
2402                          */
2403                         if (!dev->caps.hca_core_clock) {
2404                                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2405                                 mlx4_err(dev,
2406                                          "HCA frequency is 0 - timestamping is not supported\n");
2407                         } else if (map_internal_clock(dev)) {
2408                                 /*
2409                                  * Map internal clock,
2410                                  * in case of failure disable timestamping
2411                                  */
2412                                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2413                                 mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported\n");
2414                         }
2415                 }
2416 
2417                 if (dev->caps.dmfs_high_steer_mode !=
2418                     MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) {
2419                         if (mlx4_validate_optimized_steering(dev))
2420                                 mlx4_warn(dev, "Optimized steering validation failed\n");
2421 
2422                         if (dev->caps.dmfs_high_steer_mode ==
2423                             MLX4_STEERING_DMFS_A0_DISABLE) {
2424                                 dev->caps.dmfs_high_rate_qpn_base =
2425                                         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
2426                                 dev->caps.dmfs_high_rate_qpn_range =
2427                                         MLX4_A0_STEERING_TABLE_SIZE;
2428                         }
2429 
2430                         mlx4_info(dev, "DMFS high rate steer mode is: %s\n",
2431                                   dmfs_high_rate_steering_mode_str(
2432                                         dev->caps.dmfs_high_steer_mode));
2433                 }
2434         } else {
2435                 err = mlx4_init_slave(dev);
2436                 if (err) {
2437                         if (err != -EPROBE_DEFER)
2438                                 mlx4_err(dev, "Failed to initialize slave\n");
2439                         return err;
2440                 }
2441 
2442                 err = mlx4_slave_cap(dev);
2443                 if (err) {
2444                         mlx4_err(dev, "Failed to obtain slave caps\n");
2445                         goto err_close;
2446                 }
2447         }
2448 
2449         if (map_bf_area(dev))
2450                 mlx4_dbg(dev, "Failed to map blue flame area\n");
2451 
2452         /*Only the master set the ports, all the rest got it from it.*/
2453         if (!mlx4_is_slave(dev))
2454                 mlx4_set_port_mask(dev);
2455 
2456         err = mlx4_QUERY_ADAPTER(dev, &adapter);
2457         if (err) {
2458                 mlx4_err(dev, "QUERY_ADAPTER command failed, aborting\n");
2459                 goto unmap_bf;
2460         }
2461 
2462         /* Query CONFIG_DEV parameters */
2463         err = mlx4_config_dev_retrieval(dev, &params);
2464         if (err && err != -EOPNOTSUPP) {
2465                 mlx4_err(dev, "Failed to query CONFIG_DEV parameters\n");
2466         } else if (!err) {
2467                 dev->caps.rx_checksum_flags_port[1] = params.rx_csum_flags_port_1;
2468                 dev->caps.rx_checksum_flags_port[2] = params.rx_csum_flags_port_2;
2469         }
2470         priv->eq_table.inta_pin = adapter.inta_pin;
2471         memcpy(dev->board_id, adapter.board_id, sizeof(dev->board_id));
2472 
2473         err = 0;
2474         goto out_free;
2475 
2476 unmap_bf:
2477         unmap_internal_clock(dev);
2478         unmap_bf_area(dev);
2479 
2480         if (mlx4_is_slave(dev))
2481                 mlx4_slave_destroy_special_qp_cap(dev);
2482 
2483 err_close:
2484         if (mlx4_is_slave(dev))
2485                 mlx4_slave_exit(dev);
2486         else
2487                 mlx4_CLOSE_HCA(dev, 0);
2488 
2489 err_free_icm:
2490         if (!mlx4_is_slave(dev))
2491                 mlx4_free_icms(dev);
2492 
2493 out_free:
2494         kfree(dev_cap);
2495         kfree(init_hca);
2496 
2497         return err;
2498 }
2499 
2500 static int mlx4_init_counters_table(struct mlx4_dev *dev)
2501 {
2502         struct mlx4_priv *priv = mlx4_priv(dev);
2503         int nent_pow2;
2504 
2505         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2506                 return -ENOENT;
2507 
2508         if (!dev->caps.max_counters)
2509                 return -ENOSPC;
2510 
2511         nent_pow2 = roundup_pow_of_two(dev->caps.max_counters);
2512         /* reserve last counter index for sink counter */
2513         return mlx4_bitmap_init(&priv->counters_bitmap, nent_pow2,
2514                                 nent_pow2 - 1, 0,
2515                                 nent_pow2 - dev->caps.max_counters + 1);
2516 }
2517 
2518 static void mlx4_cleanup_counters_table(struct mlx4_dev *dev)
2519 {
2520         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2521                 return;
2522 
2523         if (!dev->caps.max_counters)
2524                 return;
2525 
2526         mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap);
2527 }
2528 
2529 static void mlx4_cleanup_default_counters(struct mlx4_dev *dev)
2530 {
2531         struct mlx4_priv *priv = mlx4_priv(dev);
2532         int port;
2533 
2534         for (port = 0; port < dev->caps.num_ports; port++)
2535                 if (priv->def_counter[port] != -1)
2536                         mlx4_counter_free(dev,  priv->def_counter[port]);
2537 }
2538 
2539 static int mlx4_allocate_default_counters(struct mlx4_dev *dev)
2540 {
2541         struct mlx4_priv *priv = mlx4_priv(dev);
2542         int port, err = 0;
2543         u32 idx;
2544 
2545         for (port = 0; port < dev->caps.num_ports; port++)
2546                 priv->def_counter[port] = -1;
2547 
2548         for (port = 0; port < dev->caps.num_ports; port++) {
2549                 err = mlx4_counter_alloc(dev, &idx, MLX4_RES_USAGE_DRIVER);
2550 
2551                 if (!err || err == -ENOSPC) {
2552                         priv->def_counter[port] = idx;
2553                         err = 0;
2554                 } else if (err == -ENOENT) {
2555                         err = 0;
2556                         continue;
2557                 } else if (mlx4_is_slave(dev) && err == -EINVAL) {
2558                         priv->def_counter[port] = MLX4_SINK_COUNTER_INDEX(dev);
2559                         mlx4_warn(dev, "can't allocate counter from old PF driver, using index %d\n",
2560                                   MLX4_SINK_COUNTER_INDEX(dev));
2561                         err = 0;
2562                 } else {
2563                         mlx4_err(dev, "%s: failed to allocate default counter port %d err %d\n",
2564                                  __func__, port + 1, err);
2565                         mlx4_cleanup_default_counters(dev);
2566                         return err;
2567                 }
2568 
2569                 mlx4_dbg(dev, "%s: default counter index %d for port %d\n",
2570                          __func__, priv->def_counter[port], port + 1);
2571         }
2572 
2573         return err;
2574 }
2575 
2576 int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
2577 {
2578         struct mlx4_priv *priv = mlx4_priv(dev);
2579 
2580         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2581                 return -ENOENT;
2582 
2583         *idx = mlx4_bitmap_alloc(&priv->counters_bitmap);
2584         if (*idx == -1) {
2585                 *idx = MLX4_SINK_COUNTER_INDEX(dev);
2586                 return -ENOSPC;
2587         }
2588 
2589         return 0;
2590 }
2591 
2592 int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx, u8 usage)
2593 {
2594         u32 in_modifier = RES_COUNTER | (((u32)usage & 3) << 30);
2595         u64 out_param;
2596         int err;
2597 
2598         if (mlx4_is_mfunc(dev)) {
2599                 err = mlx4_cmd_imm(dev, 0, &out_param, in_modifier,
2600                                    RES_OP_RESERVE, MLX4_CMD_ALLOC_RES,
2601                                    MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
2602                 if (!err)
2603                         *idx = get_param_l(&out_param);
2604                 if (WARN_ON(err == -ENOSPC))
2605                         err = -EINVAL;
2606                 return err;
2607         }
2608         return __mlx4_counter_alloc(dev, idx);
2609 }
2610 EXPORT_SYMBOL_GPL(mlx4_counter_alloc);
2611 
2612 static int __mlx4_clear_if_stat(struct mlx4_dev *dev,
2613                                 u8 counter_index)
2614 {
2615         struct mlx4_cmd_mailbox *if_stat_mailbox;
2616         int err;
2617         u32 if_stat_in_mod = (counter_index & 0xff) | MLX4_QUERY_IF_STAT_RESET;
2618 
2619         if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev);
2620         if (IS_ERR(if_stat_mailbox))
2621                 return PTR_ERR(if_stat_mailbox);
2622 
2623         err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, if_stat_in_mod, 0,
2624                            MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C,
2625                            MLX4_CMD_NATIVE);
2626 
2627         mlx4_free_cmd_mailbox(dev, if_stat_mailbox);
2628         return err;
2629 }
2630 
2631 void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
2632 {
2633         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2634                 return;
2635 
2636         if (idx == MLX4_SINK_COUNTER_INDEX(dev))
2637                 return;
2638 
2639         __mlx4_clear_if_stat(dev, idx);
2640 
2641         mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx, MLX4_USE_RR);
2642         return;
2643 }
2644 
2645 void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
2646 {
2647         u64 in_param = 0;
2648 
2649         if (mlx4_is_mfunc(dev)) {
2650                 set_param_l(&in_param, idx);
2651                 mlx4_cmd(dev, in_param, RES_COUNTER, RES_OP_RESERVE,
2652                          MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
2653                          MLX4_CMD_WRAPPED);
2654                 return;
2655         }
2656         __mlx4_counter_free(dev, idx);
2657 }
2658 EXPORT_SYMBOL_GPL(mlx4_counter_free);
2659 
2660 int mlx4_get_default_counter_index(struct mlx4_dev *dev, int port)
2661 {
2662         struct mlx4_priv *priv = mlx4_priv(dev);
2663 
2664         return priv->def_counter[port - 1];
2665 }
2666 EXPORT_SYMBOL_GPL(mlx4_get_default_counter_index);
2667 
2668 void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry, int port)
2669 {
2670         struct mlx4_priv *priv = mlx4_priv(dev);
2671 
2672         priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
2673 }
2674 EXPORT_SYMBOL_GPL(mlx4_set_admin_guid);
2675 
2676 __be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port)
2677 {
2678         struct mlx4_priv *priv = mlx4_priv(dev);
2679 
2680         return priv->mfunc.master.vf_admin[entry].vport[port].guid;
2681 }
2682 EXPORT_SYMBOL_GPL(mlx4_get_admin_guid);
2683 
2684 void mlx4_set_random_admin_guid(struct mlx4_dev *dev, int entry, int port)
2685 {
2686         struct mlx4_priv *priv = mlx4_priv(dev);
2687         __be64 guid;
2688 
2689         /* hw GUID */
2690         if (entry == 0)
2691                 return;
2692 
2693         get_random_bytes((char *)&guid, sizeof(guid));
2694         guid &= ~(cpu_to_be64(1ULL << 56));
2695         guid |= cpu_to_be64(1ULL << 57);
2696         priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
2697 }
2698 
2699 static int mlx4_setup_hca(struct mlx4_dev *dev)
2700 {
2701         struct mlx4_priv *priv = mlx4_priv(dev);
2702         int err;
2703         int port;
2704         __be32 ib_port_default_caps;
2705 
2706         err = mlx4_init_uar_table(dev);
2707         if (err) {
2708                 mlx4_err(dev, "Failed to initialize user access region table, aborting\n");
2709                 return err;
2710         }
2711 
2712         err = mlx4_uar_alloc(dev, &priv->driver_uar);
2713         if (err) {
2714                 mlx4_err(dev, "Failed to allocate driver access region, aborting\n");
2715                 goto err_uar_table_free;
2716         }
2717 
2718         priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
2719         if (!priv->kar) {
2720                 mlx4_err(dev, "Couldn't map kernel access region, aborting\n");
2721                 err = -ENOMEM;
2722                 goto err_uar_free;
2723         }
2724 
2725         err = mlx4_init_pd_table(dev);
2726         if (err) {
2727                 mlx4_err(dev, "Failed to initialize protection domain table, aborting\n");
2728                 goto err_kar_unmap;
2729         }
2730 
2731         err = mlx4_init_xrcd_table(dev);
2732         if (err) {
2733                 mlx4_err(dev, "Failed to initialize reliable connection domain table, aborting\n");
2734                 goto err_pd_table_free;
2735         }
2736 
2737         err = mlx4_init_mr_table(dev);
2738         if (err) {
2739                 mlx4_err(dev, "Failed to initialize memory region table, aborting\n");
2740                 goto err_xrcd_table_free;
2741         }
2742 
2743         if (!mlx4_is_slave(dev)) {
2744                 err = mlx4_init_mcg_table(dev);
2745                 if (err) {
2746                         mlx4_err(dev, "Failed to initialize multicast group table, aborting\n");
2747                         goto err_mr_table_free;
2748                 }
2749                 err = mlx4_config_mad_demux(dev);
2750                 if (err) {
2751                         mlx4_err(dev, "Failed in config_mad_demux, aborting\n");
2752                         goto err_mcg_table_free;
2753                 }
2754         }
2755 
2756         err = mlx4_init_eq_table(dev);
2757         if (err) {
2758                 mlx4_err(dev, "Failed to initialize event queue table, aborting\n");
2759                 goto err_mcg_table_free;
2760         }
2761 
2762         err = mlx4_cmd_use_events(dev);
2763         if (err) {
2764                 mlx4_err(dev, "Failed to switch to event-driven firmware commands, aborting\n");
2765                 goto err_eq_table_free;
2766         }
2767 
2768         err = mlx4_NOP(dev);
2769         if (err) {
2770                 if (dev->flags & MLX4_FLAG_MSI_X) {
2771                         mlx4_warn(dev, "NOP command failed to generate MSI-X interrupt IRQ %d)\n",
2772                                   priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
2773                         mlx4_warn(dev, "Trying again without MSI-X\n");
2774                 } else {
2775                         mlx4_err(dev, "NOP command failed to generate interrupt (IRQ %d), aborting\n",
2776                                  priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
2777                         mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
2778                 }
2779 
2780                 goto err_cmd_poll;
2781         }
2782 
2783         mlx4_dbg(dev, "NOP command IRQ test passed\n");
2784 
2785         err = mlx4_init_cq_table(dev);
2786         if (err) {
2787                 mlx4_err(dev, "Failed to initialize completion queue table, aborting\n");
2788                 goto err_cmd_poll;
2789         }
2790 
2791         err = mlx4_init_srq_table(dev);
2792         if (err) {
2793                 mlx4_err(dev, "Failed to initialize shared receive queue table, aborting\n");
2794                 goto err_cq_table_free;
2795         }
2796 
2797         err = mlx4_init_qp_table(dev);
2798         if (err) {
2799                 mlx4_err(dev, "Failed to initialize queue pair table, aborting\n");
2800                 goto err_srq_table_free;
2801         }
2802 
2803         if (!mlx4_is_slave(dev)) {
2804                 err = mlx4_init_counters_table(dev);
2805                 if (err && err != -ENOENT) {
2806                         mlx4_err(dev, "Failed to initialize counters table, aborting\n");
2807                         goto err_qp_table_free;
2808                 }
2809         }
2810 
2811         err = mlx4_allocate_default_counters(dev);
2812         if (err) {
2813                 mlx4_err(dev, "Failed to allocate default counters, aborting\n");
2814                 goto err_counters_table_free;
2815         }
2816 
2817         if (!mlx4_is_slave(dev)) {
2818                 for (port = 1; port <= dev->caps.num_ports; port++) {
2819                         ib_port_default_caps = 0;
2820                         err = mlx4_get_port_ib_caps(dev, port,
2821                                                     &ib_port_default_caps);
2822                         if (err)
2823                                 mlx4_warn(dev, "failed to get port %d default ib capabilities (%d). Continuing with caps = 0\n",
2824                                           port, err);
2825                         dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
2826 
2827                         /* initialize per-slave default ib port capabilities */
2828                         if (mlx4_is_master(dev)) {
2829                                 int i;
2830                                 for (i = 0; i < dev->num_slaves; i++) {
2831                                         if (i == mlx4_master_func_num(dev))
2832                                                 continue;
2833                                         priv->mfunc.master.slave_state[i].ib_cap_mask[port] =
2834                                                 ib_port_default_caps;
2835                                 }
2836                         }
2837 
2838                         if (mlx4_is_mfunc(dev))
2839                                 dev->caps.port_ib_mtu[port] = IB_MTU_2048;
2840                         else
2841                                 dev->caps.port_ib_mtu[port] = IB_MTU_4096;
2842 
2843                         err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ?
2844                                             dev->caps.pkey_table_len[port] : -1);
2845                         if (err) {
2846                                 mlx4_err(dev, "Failed to set port %d, aborting\n",
2847                                          port);
2848                                 goto err_default_countes_free;
2849                         }
2850                 }
2851         }
2852 
2853         return 0;
2854 
2855 err_default_countes_free:
2856         mlx4_cleanup_default_counters(dev);
2857 
2858 err_counters_table_free:
2859         if (!mlx4_is_slave(dev))
2860                 mlx4_cleanup_counters_table(dev);
2861 
2862 err_qp_table_free:
2863         mlx4_cleanup_qp_table(dev);
2864 
2865 err_srq_table_free:
2866         mlx4_cleanup_srq_table(dev);
2867 
2868 err_cq_table_free:
2869         mlx4_cleanup_cq_table(dev);
2870 
2871 err_cmd_poll:
2872         mlx4_cmd_use_polling(dev);
2873 
2874 err_eq_table_free:
2875         mlx4_cleanup_eq_table(dev);
2876 
2877 err_mcg_table_free:
2878         if (!mlx4_is_slave(dev))
2879                 mlx4_cleanup_mcg_table(dev);
2880 
2881 err_mr_table_free:
2882         mlx4_cleanup_mr_table(dev);
2883 
2884 err_xrcd_table_free:
2885         mlx4_cleanup_xrcd_table(dev);
2886 
2887 err_pd_table_free:
2888         mlx4_cleanup_pd_table(dev);
2889 
2890 err_kar_unmap:
2891         iounmap(priv->kar);
2892 
2893 err_uar_free:
2894         mlx4_uar_free(dev, &priv->driver_uar);
2895 
2896 err_uar_table_free:
2897         mlx4_cleanup_uar_table(dev);
2898         return err;
2899 }
2900 
2901 static int mlx4_init_affinity_hint(struct mlx4_dev *dev, int port, int eqn)
2902 {
2903         int requested_cpu = 0;
2904         struct mlx4_priv *priv = mlx4_priv(dev);
2905         struct mlx4_eq *eq;
2906         int off = 0;
2907         int i;
2908 
2909         if (eqn > dev->caps.num_comp_vectors)
2910                 return -EINVAL;
2911 
2912         for (i = 1; i < port; i++)
2913                 off += mlx4_get_eqs_per_port(dev, i);
2914 
2915         requested_cpu = eqn - off - !!(eqn > MLX4_EQ_ASYNC);
2916 
2917         /* Meaning EQs are shared, and this call comes from the second port */
2918         if (requested_cpu < 0)
2919                 return 0;
2920 
2921         eq = &priv->eq_table.eq[eqn];
2922 
2923         if (!zalloc_cpumask_var(&eq->affinity_mask, GFP_KERNEL))
2924                 return -ENOMEM;
2925 
2926         cpumask_set_cpu(requested_cpu, eq->affinity_mask);
2927 
2928         return 0;
2929 }
2930 
2931 static void mlx4_enable_msi_x(struct mlx4_dev *dev)
2932 {
2933         struct mlx4_priv *priv = mlx4_priv(dev);
2934         struct msix_entry *entries;
2935         int i;
2936         int port = 0;
2937 
2938         if (msi_x) {
2939                 int nreq = min3(dev->caps.num_ports *
2940                                 (int)num_online_cpus() + 1,
2941                                 dev->caps.num_eqs - dev->caps.reserved_eqs,
2942                                 MAX_MSIX);
2943 
2944                 if (msi_x > 1)
2945                         nreq = min_t(int, nreq, msi_x);
2946 
2947                 entries = kcalloc(nreq, sizeof(*entries), GFP_KERNEL);
2948                 if (!entries)
2949                         goto no_msi;
2950 
2951                 for (i = 0; i < nreq; ++i)
2952                         entries[i].entry = i;
2953 
2954                 nreq = pci_enable_msix_range(dev->persist->pdev, entries, 2,
2955                                              nreq);
2956 
2957                 if (nreq < 0 || nreq < MLX4_EQ_ASYNC) {
2958                         kfree(entries);
2959                         goto no_msi;
2960                 }
2961                 /* 1 is reserved for events (asyncrounous EQ) */
2962                 dev->caps.num_comp_vectors = nreq - 1;
2963 
2964                 priv->eq_table.eq[MLX4_EQ_ASYNC].irq = entries[0].vector;
2965                 bitmap_zero(priv->eq_table.eq[MLX4_EQ_ASYNC].actv_ports.ports,
2966                             dev->caps.num_ports);
2967 
2968                 for (i = 0; i < dev->caps.num_comp_vectors + 1; i++) {
2969                         if (i == MLX4_EQ_ASYNC)
2970                                 continue;
2971 
2972                         priv->eq_table.eq[i].irq =
2973                                 entries[i + 1 - !!(i > MLX4_EQ_ASYNC)].vector;
2974 
2975                         if (MLX4_IS_LEGACY_EQ_MODE(dev->caps)) {
2976                                 bitmap_fill(priv->eq_table.eq[i].actv_ports.ports,
2977                                             dev->caps.num_ports);
2978                                 /* We don't set affinity hint when there
2979                                  * aren't enough EQs
2980                                  */
2981                         } else {
2982                                 set_bit(port,
2983                                         priv->eq_table.eq[i].actv_ports.ports);
2984                                 if (mlx4_init_affinity_hint(dev, port + 1, i))
2985                                         mlx4_warn(dev, "Couldn't init hint cpumask for EQ %d\n",
2986                                                   i);
2987                         }
2988                         /* We divide the Eqs evenly between the two ports.
2989                          * (dev->caps.num_comp_vectors / dev->caps.num_ports)
2990                          * refers to the number of Eqs per port
2991                          * (i.e eqs_per_port). Theoretically, we would like to
2992                          * write something like (i + 1) % eqs_per_port == 0.
2993                          * However, since there's an asynchronous Eq, we have
2994                          * to skip over it by comparing this condition to
2995                          * !!((i + 1) > MLX4_EQ_ASYNC).
2996                          */
2997                         if ((dev->caps.num_comp_vectors > dev->caps.num_ports) &&
2998                             ((i + 1) %
2999                              (dev->caps.num_comp_vectors / dev->caps.num_ports)) ==
3000                             !!((i + 1) > MLX4_EQ_ASYNC))
3001                                 /* If dev->caps.num_comp_vectors < dev->caps.num_ports,
3002                                  * everything is shared anyway.
3003                                  */
3004                                 port++;
3005                 }
3006 
3007                 dev->flags |= MLX4_FLAG_MSI_X;
3008 
3009                 kfree(entries);
3010                 return;
3011         }
3012 
3013 no_msi:
3014         dev->caps.num_comp_vectors = 1;
3015 
3016         BUG_ON(MLX4_EQ_ASYNC >= 2);
3017         for (i = 0; i < 2; ++i) {
3018                 priv->eq_table.eq[i].irq = dev->persist->pdev->irq;
3019                 if (i != MLX4_EQ_ASYNC) {
3020                         bitmap_fill(priv->eq_table.eq[i].actv_ports.ports,
3021                                     dev->caps.num_ports);
3022                 }
3023         }
3024 }
3025 
3026 static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
3027 {
3028         struct devlink *devlink = priv_to_devlink(mlx4_priv(dev));
3029         struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
3030         int err;
3031 
3032         err = devlink_port_register(devlink, &info->devlink_port, port);
3033         if (err)
3034                 return err;
3035 
3036         info->dev = dev;
3037         info->port = port;
3038         if (!mlx4_is_slave(dev)) {
3039                 mlx4_init_mac_table(dev, &info->mac_table);
3040                 mlx4_init_vlan_table(dev, &info->vlan_table);
3041                 mlx4_init_roce_gid_table(dev, &info->gid_table);
3042                 info->base_qpn = mlx4_get_base_qpn(dev, port);
3043         }
3044 
3045         sprintf(info->dev_name, "mlx4_port%d", port);
3046         info->port_attr.attr.name = info->dev_name;
3047         if (mlx4_is_mfunc(dev)) {
3048                 info->port_attr.attr.mode = 0444;
3049         } else {
3050                 info->port_attr.attr.mode = 0644;
3051                 info->port_attr.store     = set_port_type;
3052         }
3053         info->port_attr.show      = show_port_type;
3054         sysfs_attr_init(&info->port_attr.attr);
3055 
3056         err = device_create_file(&dev->persist->pdev->dev, &info->port_attr);
3057         if (err) {
3058                 mlx4_err(dev, "Failed to create file for port %d\n", port);
3059                 devlink_port_unregister(&info->devlink_port);
3060                 info->port = -1;
3061                 return err;
3062         }
3063 
3064         sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
3065         info->port_mtu_attr.attr.name = info->dev_mtu_name;
3066         if (mlx4_is_mfunc(dev)) {
3067                 info->port_mtu_attr.attr.mode = 0444;
3068         } else {
3069                 info->port_mtu_attr.attr.mode = 0644;
3070                 info->port_mtu_attr.store     = set_port_ib_mtu;
3071         }
3072         info->port_mtu_attr.show      = show_port_ib_mtu;
3073         sysfs_attr_init(&info->port_mtu_attr.attr);
3074 
3075         err = device_create_file(&dev->persist->pdev->dev,
3076                                  &info->port_mtu_attr);
3077         if (err) {
3078                 mlx4_err(dev, "Failed to create mtu file for port %d\n", port);
3079                 device_remove_file(&info->dev->persist->pdev->dev,
3080                                    &info->port_attr);
3081                 devlink_port_unregister(&info->devlink_port);
3082                 info->port = -1;
3083                 return err;
3084         }
3085 
3086         return 0;
3087 }
3088 
3089 static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
3090 {
3091         if (info->port < 0)
3092                 return;
3093 
3094         device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr);
3095         device_remove_file(&info->dev->persist->pdev->dev,
3096                            &info->port_mtu_attr);
3097         devlink_port_unregister(&info->devlink_port);
3098 
3099 #ifdef CONFIG_RFS_ACCEL
3100         free_irq_cpu_rmap(info->rmap);
3101         info->rmap = NULL;
3102 #endif
3103 }
3104 
3105 static int mlx4_init_steering(struct mlx4_dev *dev)
3106 {
3107         struct mlx4_priv *priv = mlx4_priv(dev);
3108         int num_entries = dev->caps.num_ports;
3109         int i, j;
3110 
3111         priv->steer = kcalloc(num_entries, sizeof(struct mlx4_steer),
3112                               GFP_KERNEL);
3113         if (!priv->steer)
3114                 return -ENOMEM;
3115 
3116         for (i = 0; i < num_entries; i++)
3117                 for (j = 0; j < MLX4_NUM_STEERS; j++) {
3118                         INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]);
3119                         INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]);
3120                 }
3121         return 0;
3122 }
3123 
3124 static void mlx4_clear_steering(struct mlx4_dev *dev)
3125 {
3126         struct mlx4_priv *priv = mlx4_priv(dev);
3127         struct mlx4_steer_index *entry, *tmp_entry;
3128         struct mlx4_promisc_qp *pqp, *tmp_pqp;
3129         int num_entries = dev->caps.num_ports;
3130         int i, j;
3131 
3132         for (i = 0; i < num_entries; i++) {
3133                 for (j = 0; j < MLX4_NUM_STEERS; j++) {
3134                         list_for_each_entry_safe(pqp, tmp_pqp,
3135                                                  &priv->steer[i].promisc_qps[j],
3136                                                  list) {
3137                                 list_del(&pqp->list);
3138                                 kfree(pqp);
3139                         }
3140                         list_for_each_entry_safe(entry, tmp_entry,
3141                                                  &priv->steer[i].steer_entries[j],
3142                                                  list) {
3143                                 list_del(&entry->list);
3144                                 list_for_each_entry_safe(pqp, tmp_pqp,
3145                                                          &entry->duplicates,
3146                                                          list) {
3147                                         list_del(&pqp->list);
3148                                         kfree(pqp);
3149                                 }
3150                                 kfree(entry);
3151                         }
3152                 }
3153         }
3154         kfree(priv->steer);
3155 }
3156 
3157 static int extended_func_num(struct pci_dev *pdev)
3158 {
3159         return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn);
3160 }
3161 
3162 #define MLX4_OWNER_BASE 0x8069c
3163 #define MLX4_OWNER_SIZE 4
3164 
3165 static int mlx4_get_ownership(struct mlx4_dev *dev)
3166 {
3167         void __iomem *owner;
3168         u32 ret;
3169 
3170         if (pci_channel_offline(dev->persist->pdev))
3171                 return -EIO;
3172 
3173         owner = ioremap(pci_resource_start(dev->persist->pdev, 0) +
3174                         MLX4_OWNER_BASE,
3175                         MLX4_OWNER_SIZE);
3176         if (!owner) {
3177                 mlx4_err(dev, "Failed to obtain ownership bit\n");
3178                 return -ENOMEM;
3179         }
3180 
3181         ret = readl(owner);
3182         iounmap(owner);
3183         return (int) !!ret;
3184 }
3185 
3186 static void mlx4_free_ownership(struct mlx4_dev *dev)
3187 {
3188         void __iomem *owner;
3189 
3190         if (pci_channel_offline(dev->persist->pdev))
3191                 return;
3192 
3193         owner = ioremap(pci_resource_start(dev->persist->pdev, 0) +
3194                         MLX4_OWNER_BASE,
3195                         MLX4_OWNER_SIZE);
3196         if (!owner) {
3197                 mlx4_err(dev, "Failed to obtain ownership bit\n");
3198                 return;
3199         }
3200         writel(0, owner);
3201         msleep(1000);
3202         iounmap(owner);
3203 }
3204 
3205 #define SRIOV_VALID_STATE(flags) (!!((flags) & MLX4_FLAG_SRIOV) ==\
3206                                   !!((flags) & MLX4_FLAG_MASTER))
3207 
3208 static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev,
3209                              u8 total_vfs, int existing_vfs, int reset_flow)
3210 {
3211         u64 dev_flags = dev->flags;
3212         int err = 0;
3213         int fw_enabled_sriov_vfs = min(pci_sriov_get_totalvfs(pdev),
3214                                         MLX4_MAX_NUM_VF);
3215 
3216         if (reset_flow) {
3217                 dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs),
3218                                        GFP_KERNEL);
3219                 if (!dev->dev_vfs)
3220                         goto free_mem;
3221                 return dev_flags;
3222         }
3223 
3224         atomic_inc(&pf_loading);
3225         if (dev->flags &  MLX4_FLAG_SRIOV) {
3226                 if (existing_vfs != total_vfs) {
3227                         mlx4_err(dev, "SR-IOV was already enabled, but with num_vfs (%d) different than requested (%d)\n",
3228                                  existing_vfs, total_vfs);
3229                         total_vfs = existing_vfs;
3230                 }
3231         }
3232 
3233         dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs), GFP_KERNEL);
3234         if (NULL == dev->dev_vfs) {
3235                 mlx4_err(dev, "Failed to allocate memory for VFs\n");
3236                 goto disable_sriov;
3237         }
3238 
3239         if (!(dev->flags &  MLX4_FLAG_SRIOV)) {
3240                 if (total_vfs > fw_enabled_sriov_vfs) {
3241                         mlx4_err(dev, "requested vfs (%d) > available vfs (%d). Continuing without SR_IOV\n",
3242                                  total_vfs, fw_enabled_sriov_vfs);
3243                         err = -ENOMEM;
3244                         goto disable_sriov;
3245                 }
3246                 mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", total_vfs);
3247                 err = pci_enable_sriov(pdev, total_vfs);
3248         }
3249         if (err) {
3250                 mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n",
3251                          err);
3252                 goto disable_sriov;
3253         } else {
3254                 mlx4_warn(dev, "Running in master mode\n");
3255                 dev_flags |= MLX4_FLAG_SRIOV |
3256                         MLX4_FLAG_MASTER;
3257                 dev_flags &= ~MLX4_FLAG_SLAVE;
3258                 dev->persist->num_vfs = total_vfs;
3259         }
3260         return dev_flags;
3261 
3262 disable_sriov:
3263         atomic_dec(&pf_loading);
3264 free_mem:
3265         dev->persist->num_vfs = 0;
3266         kfree(dev->dev_vfs);
3267         dev->dev_vfs = NULL;
3268         return dev_flags & ~MLX4_FLAG_MASTER;
3269 }
3270 
3271 enum {
3272         MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64 = -1,
3273 };
3274 
3275 static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
3276                               int *nvfs)
3277 {
3278         int requested_vfs = nvfs[0] + nvfs[1] + nvfs[2];
3279         /* Checking for 64 VFs as a limitation of CX2 */
3280         if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_80_VFS) &&
3281             requested_vfs >= 64) {
3282                 mlx4_err(dev, "Requested %d VFs, but FW does not support more than 64\n",
3283                          requested_vfs);
3284                 return MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64;
3285         }
3286         return 0;
3287 }
3288 
3289 static int mlx4_pci_enable_device(struct mlx4_dev *dev)
3290 {
3291         struct pci_dev *pdev = dev->persist->pdev;
3292         int err = 0;
3293 
3294         mutex_lock(&dev->persist->pci_status_mutex);
3295         if (dev->persist->pci_status == MLX4_PCI_STATUS_DISABLED) {
3296                 err = pci_enable_device(pdev);
3297                 if (!err)
3298                         dev->persist->pci_status = MLX4_PCI_STATUS_ENABLED;
3299         }
3300         mutex_unlock(&dev->persist->pci_status_mutex);
3301 
3302         return err;
3303 }
3304 
3305 static void mlx4_pci_disable_device(struct mlx4_dev *dev)
3306 {
3307         struct pci_dev *pdev = dev->persist->pdev;
3308 
3309         mutex_lock(&dev->persist->pci_status_mutex);
3310         if (dev->persist->pci_status == MLX4_PCI_STATUS_ENABLED) {
3311                 pci_disable_device(pdev);
3312                 dev->persist->pci_status = MLX4_PCI_STATUS_DISABLED;
3313         }
3314         mutex_unlock(&dev->persist->pci_status_mutex);
3315 }
3316 
3317 static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
3318                          int total_vfs, int *nvfs, struct mlx4_priv *priv,
3319                          int reset_flow)
3320 {
3321         struct mlx4_dev *dev;
3322         unsigned sum = 0;
3323         int err;
3324         int port;
3325         int i;
3326         struct mlx4_dev_cap *dev_cap = NULL;
3327         int existing_vfs = 0;
3328 
3329         dev = &priv->dev;
3330 
3331         INIT_LIST_HEAD(&priv->ctx_list);
3332         spin_lock_init(&priv->ctx_lock);
3333 
3334         mutex_init(&priv->port_mutex);
3335         mutex_init(&priv->bond_mutex);
3336 
3337         INIT_LIST_HEAD(&priv->pgdir_list);
3338         mutex_init(&priv->pgdir_mutex);
3339         spin_lock_init(&priv->cmd.context_lock);
3340 
3341         INIT_LIST_HEAD(&priv->bf_list);
3342         mutex_init(&priv->bf_mutex);
3343 
3344         dev->rev_id = pdev->revision;
3345         dev->numa_node = dev_to_node(&pdev->dev);
3346 
3347         /* Detect if this device is a virtual function */
3348         if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
3349                 mlx4_warn(dev, "Detected virtual function - running in slave mode\n");
3350                 dev->flags |= MLX4_FLAG_SLAVE;
3351         } else {
3352                 /* We reset the device and enable SRIOV only for physical
3353                  * devices.  Try to claim ownership on the device;
3354                  * if already taken, skip -- do not allow multiple PFs */
3355                 err = mlx4_get_ownership(dev);
3356                 if (err) {
3357                         if (err < 0)
3358                                 return err;
3359                         else {
3360                                 mlx4_warn(dev, "Multiple PFs not yet supported - Skipping PF\n");
3361                                 return -EINVAL;
3362                         }
3363                 }
3364 
3365                 atomic_set(&priv->opreq_count, 0);
3366                 INIT_WORK(&priv->opreq_task, mlx4_opreq_action);
3367 
3368                 /*
3369                  * Now reset the HCA before we touch the PCI capabilities or
3370                  * attempt a firmware command, since a boot ROM may have left
3371                  * the HCA in an undefined state.
3372                  */
3373                 err = mlx4_reset(dev);
3374                 if (err) {
3375                         mlx4_err(dev, "Failed to reset HCA, aborting\n");
3376                         goto err_sriov;
3377                 }
3378 
3379                 if (total_vfs) {
3380                         dev->flags = MLX4_FLAG_MASTER;
3381                         existing_vfs = pci_num_vf(pdev);
3382                         if (existing_vfs)
3383                                 dev->flags |= MLX4_FLAG_SRIOV;
3384                         dev->persist->num_vfs = total_vfs;
3385                 }
3386         }
3387 
3388         /* on load remove any previous indication of internal error,
3389          * device is up.
3390          */
3391         dev->persist->state = MLX4_DEVICE_STATE_UP;
3392 
3393 slave_start:
3394         err = mlx4_cmd_init(dev);
3395         if (err) {
3396                 mlx4_err(dev, "Failed to init command interface, aborting\n");
3397                 goto err_sriov;
3398         }
3399 
3400         /* In slave functions, the communication channel must be initialized
3401          * before posting commands. Also, init num_slaves before calling
3402          * mlx4_init_hca */
3403         if (mlx4_is_mfunc(dev)) {
3404                 if (mlx4_is_master(dev)) {
3405                         dev->num_slaves = MLX4_MAX_NUM_SLAVES;
3406 
3407                 } else {
3408                         dev->num_slaves = 0;
3409                         err = mlx4_multi_func_init(dev);
3410                         if (err) {
3411                                 mlx4_err(dev, "Failed to init slave mfunc interface, aborting\n");
3412                                 goto err_cmd;
3413                         }
3414                 }
3415         }
3416 
3417         err = mlx4_init_fw(dev);
3418         if (err) {
3419                 mlx4_err(dev, "Failed to init fw, aborting.\n");
3420                 goto err_mfunc;
3421         }
3422 
3423         if (mlx4_is_master(dev)) {
3424                 /* when we hit the goto slave_start below, dev_cap already initialized */
3425                 if (!dev_cap) {
3426                         dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);
3427 
3428                         if (!dev_cap) {
3429                                 err = -ENOMEM;
3430                                 goto err_fw;
3431                         }
3432 
3433                         err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
3434                         if (err) {
3435                                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
3436                                 goto err_fw;
3437                         }
3438 
3439                         if (mlx4_check_dev_cap(dev, dev_cap, nvfs))
3440                                 goto err_fw;
3441 
3442                         if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
3443                                 u64 dev_flags = mlx4_enable_sriov(dev, pdev,
3444                                                                   total_vfs,
3445                                                                   existing_vfs,
3446                                                                   reset_flow);
3447 
3448                                 mlx4_close_fw(dev);
3449                                 mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
3450                                 dev->flags = dev_flags;
3451                                 if (!SRIOV_VALID_STATE(dev->flags)) {
3452                                         mlx4_err(dev, "Invalid SRIOV state\n");
3453                                         goto err_sriov;
3454                                 }
3455                                 err = mlx4_reset(dev);
3456                                 if (err) {
3457                                         mlx4_err(dev, "Failed to reset HCA, aborting.\n");
3458                                         goto err_sriov;
3459                                 }
3460                                 goto slave_start;
3461                         }
3462                 } else {
3463                         /* Legacy mode FW requires SRIOV to be enabled before
3464                          * doing QUERY_DEV_CAP, since max_eq's value is different if
3465                          * SRIOV is enabled.
3466                          */
3467                         memset(dev_cap, 0, sizeof(*dev_cap));
3468                         err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
3469                         if (err) {
3470                                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
3471                                 goto err_fw;
3472                         }
3473 
3474                         if (mlx4_check_dev_cap(dev, dev_cap, nvfs))
3475                                 goto err_fw;
3476                 }
3477         }
3478 
3479         err = mlx4_init_hca(dev);
3480         if (err) {
3481                 if (err == -EACCES) {
3482                         /* Not primary Physical function
3483                          * Running in slave mode */
3484                         mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
3485                         /* We're not a PF */
3486                         if (dev->flags & MLX4_FLAG_SRIOV) {
3487                                 if (!existing_vfs)
3488                                         pci_disable_sriov(pdev);
3489                                 if (mlx4_is_master(dev) && !reset_flow)
3490                                         atomic_dec(&pf_loading);
3491                                 dev->flags &= ~MLX4_FLAG_SRIOV;
3492                         }
3493                         if (!mlx4_is_slave(dev))
3494                                 mlx4_free_ownership(dev);
3495                         dev->flags |= MLX4_FLAG_SLAVE;
3496                         dev->flags &= ~MLX4_FLAG_MASTER;
3497                         goto slave_start;
3498                 } else
3499                         goto err_fw;
3500         }
3501 
3502         if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
3503                 u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs,
3504                                                   existing_vfs, reset_flow);
3505 
3506                 if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) {
3507                         mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR);
3508                         dev->flags = dev_flags;
3509                         err = mlx4_cmd_init(dev);
3510                         if (err) {
3511                                 /* Only VHCR is cleaned up, so could still
3512                                  * send FW commands
3513                                  */
3514                                 mlx4_err(dev, "Failed to init VHCR command interface, aborting\n");
3515                                 goto err_close;
3516                         }
3517                 } else {
3518                         dev->flags = dev_flags;
3519                 }
3520 
3521                 if (!SRIOV_VALID_STATE(dev->flags)) {
3522                         mlx4_err(dev, "Invalid SRIOV state\n");
3523                         goto err_close;
3524                 }
3525         }
3526 
3527         /* check if the device is functioning at its maximum possible speed.
3528          * No return code for this call, just warn the user in case of PCI
3529          * express device capabilities are under-satisfied by the bus.
3530          */
3531         if (!mlx4_is_slave(dev))
3532                 pcie_print_link_status(dev->persist->pdev);
3533 
3534         /* In master functions, the communication channel must be initialized
3535          * after obtaining its address from fw */
3536         if (mlx4_is_master(dev)) {
3537                 if (dev->caps.num_ports < 2 &&
3538                     num_vfs_argc > 1) {
3539                         err = -EINVAL;
3540                         mlx4_err(dev,
3541                                  "Error: Trying to configure VFs on port 2, but HCA has only %d physical ports\n",
3542                                  dev->caps.num_ports);
3543                         goto err_close;
3544                 }
3545                 memcpy(dev->persist->nvfs, nvfs, sizeof(dev->persist->nvfs));
3546 
3547                 for (i = 0;
3548                      i < sizeof(dev->persist->nvfs)/
3549                      sizeof(dev->persist->nvfs[0]); i++) {
3550                         unsigned j;
3551 
3552                         for (j = 0; j < dev->persist->nvfs[i]; ++sum, ++j) {
3553                                 dev->dev_vfs[sum].min_port = i < 2 ? i + 1 : 1;
3554                                 dev->dev_vfs[sum].n_ports = i < 2 ? 1 :
3555                                         dev->caps.num_ports;
3556                         }
3557                 }
3558 
3559                 /* In master functions, the communication channel
3560                  * must be initialized after obtaining its address from fw
3561                  */
3562                 err = mlx4_multi_func_init(dev);
3563                 if (err) {
3564                         mlx4_err(dev, "Failed to init master mfunc interface, aborting.\n");
3565                         goto err_close;
3566                 }
3567         }
3568 
3569         err = mlx4_alloc_eq_table(dev);
3570         if (err)
3571                 goto err_master_mfunc;
3572 
3573         bitmap_zero(priv->msix_ctl.pool_bm, MAX_MSIX);
3574         mutex_init(&priv->msix_ctl.pool_lock);
3575 
3576         mlx4_enable_msi_x(dev);
3577         if ((mlx4_is_mfunc(dev)) &&
3578             !(dev->flags & MLX4_FLAG_MSI_X)) {
3579                 err = -EOPNOTSUPP;
3580                 mlx4_err(dev, "INTx is not supported in multi-function mode, aborting\n");
3581                 goto err_free_eq;
3582         }
3583 
3584         if (!mlx4_is_slave(dev)) {
3585                 err = mlx4_init_steering(dev);
3586                 if (err)
3587                         goto err_disable_msix;
3588         }
3589 
3590         mlx4_init_quotas(dev);
3591 
3592         err = mlx4_setup_hca(dev);
3593         if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) &&
3594             !mlx4_is_mfunc(dev)) {
3595                 dev->flags &= ~MLX4_FLAG_MSI_X;
3596                 dev->caps.num_comp_vectors = 1;
3597                 pci_disable_msix(pdev);
3598                 err = mlx4_setup_hca(dev);
3599         }
3600 
3601         if (err)
3602                 goto err_steer;
3603 
3604         /* When PF resources are ready arm its comm channel to enable
3605          * getting commands
3606          */
3607         if (mlx4_is_master(dev)) {
3608                 err = mlx4_ARM_COMM_CHANNEL(dev);
3609                 if (err) {
3610                         mlx4_err(dev, " Failed to arm comm channel eq: %x\n",
3611                                  err);
3612                         goto err_steer;
3613                 }
3614         }
3615 
3616         for (port = 1; port <= dev->caps.num_ports; port++) {
3617                 err = mlx4_init_port_info(dev, port);
3618                 if (err)
3619                         goto err_port;
3620         }
3621 
3622         priv->v2p.port1 = 1;
3623         priv->v2p.port2 = 2;
3624 
3625         err = mlx4_register_device(dev);
3626         if (err)
3627                 goto err_port;
3628 
3629         mlx4_request_modules(dev);
3630 
3631         mlx4_sense_init(dev);
3632         mlx4_start_sense(dev);
3633 
3634         priv->removed = 0;
3635 
3636         if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow)
3637                 atomic_dec(&pf_loading);
3638 
3639         kfree(dev_cap);
3640         return 0;
3641 
3642 err_port:
3643         for (--port; port >= 1; --port)
3644                 mlx4_cleanup_port_info(&priv->port[port]);
3645 
3646         mlx4_cleanup_default_counters(dev);
3647         if (!mlx4_is_slave(dev))
3648                 mlx4_cleanup_counters_table(dev);
3649         mlx4_cleanup_qp_table(dev);
3650         mlx4_cleanup_srq_table(dev);
3651         mlx4_cleanup_cq_table(dev);
3652         mlx4_cmd_use_polling(dev);
3653         mlx4_cleanup_eq_table(dev);
3654         mlx4_cleanup_mcg_table(dev);
3655         mlx4_cleanup_mr_table(dev);
3656         mlx4_cleanup_xrcd_table(dev);
3657         mlx4_cleanup_pd_table(dev);
3658         mlx4_cleanup_uar_table(dev);
3659 
3660 err_steer:
3661         if (!mlx4_is_slave(dev))
3662                 mlx4_clear_steering(dev);
3663 
3664 err_disable_msix:
3665         if (dev->flags & MLX4_FLAG_MSI_X)
3666                 pci_disable_msix(pdev);
3667 
3668 err_free_eq:
3669         mlx4_free_eq_table(dev);
3670 
3671 err_master_mfunc:
3672         if (mlx4_is_master(dev)) {
3673                 mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY);
3674                 mlx4_multi_func_cleanup(dev);
3675         }
3676 
3677         if (mlx4_is_slave(dev))
3678                 mlx4_slave_destroy_special_qp_cap(dev);
3679 
3680 err_close:
3681         mlx4_close_hca(dev);
3682 
3683 err_fw:
3684         mlx4_close_fw(dev);
3685 
3686 err_mfunc:
3687         if (mlx4_is_slave(dev))
3688                 mlx4_multi_func_cleanup(dev);
3689 
3690 err_cmd:
3691         mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
3692 
3693 err_sriov:
3694         if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) {
3695                 pci_disable_sriov(pdev);
3696                 dev->flags &= ~MLX4_FLAG_SRIOV;
3697         }
3698 
3699         if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow)
3700                 atomic_dec(&pf_loading);
3701 
3702         kfree(priv->dev.dev_vfs);
3703 
3704         if (!mlx4_is_slave(dev))
3705                 mlx4_free_ownership(dev);
3706 
3707         kfree(dev_cap);
3708         return err;
3709 }
3710 
3711 static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
3712                            struct mlx4_priv *priv)
3713 {
3714         int err;
3715         int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
3716         int prb_vf[MLX4_MAX_PORTS + 1] = {0, 0, 0};
3717         const int param_map[MLX4_MAX_PORTS + 1][MLX4_MAX_PORTS + 1] = {
3718                 {2, 0, 0}, {0, 1, 2}, {0, 1, 2} };
3719         unsigned total_vfs = 0;
3720         unsigned int i;
3721 
3722         pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));
3723 
3724         err = mlx4_pci_enable_device(&priv->dev);
3725         if (err) {
3726                 dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
3727                 return err;
3728         }
3729 
3730         /* Due to requirement that all VFs and the PF are *guaranteed* 2 MACS
3731          * per port, we must limit the number of VFs to 63 (since their are
3732          * 128 MACs)
3733          */
3734         for (i = 0; i < ARRAY_SIZE(nvfs) && i < num_vfs_argc;
3735              total_vfs += nvfs[param_map[num_vfs_argc - 1][i]], i++) {
3736                 nvfs[param_map[num_vfs_argc - 1][i]] = num_vfs[i];
3737                 if (nvfs[i] < 0) {
3738                         dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n");
3739                         err = -EINVAL;
3740                         goto err_disable_pdev;
3741                 }
3742         }
3743         for (i = 0; i < ARRAY_SIZE(prb_vf) && i < probe_vfs_argc;
3744              i++) {
3745                 prb_vf[param_map[probe_vfs_argc - 1][i]] = probe_vf[i];
3746                 if (prb_vf[i] < 0 || prb_vf[i] > nvfs[i]) {
3747                         dev_err(&pdev->dev, "probe_vf module parameter cannot be negative or greater than num_vfs\n");
3748                         err = -EINVAL;
3749                         goto err_disable_pdev;
3750                 }
3751         }
3752         if (total_vfs > MLX4_MAX_NUM_VF) {
3753                 dev_err(&pdev->dev,
3754                         "Requested more VF's (%d) than allowed by hw (%d)\n",
3755                         total_vfs, MLX4_MAX_NUM_VF);
3756                 err = -EINVAL;
3757                 goto err_disable_pdev;
3758         }
3759 
3760         for (i = 0; i < MLX4_MAX_PORTS; i++) {
3761                 if (nvfs[i] + nvfs[2] > MLX4_MAX_NUM_VF_P_PORT) {
3762                         dev_err(&pdev->dev,
3763                                 "Requested more VF's (%d) for port (%d) than allowed by driver (%d)\n",
3764                                 nvfs[i] + nvfs[2], i + 1,
3765                                 MLX4_MAX_NUM_VF_P_PORT);
3766                         err = -EINVAL;
3767                         goto err_disable_pdev;
3768                 }
3769         }
3770 
3771         /* Check for BARs. */
3772         if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
3773             !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
3774                 dev_err(&pdev->dev, "Missing DCS, aborting (driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n",
3775                         pci_dev_data, pci_resource_flags(pdev, 0));
3776                 err = -ENODEV;
3777                 goto err_disable_pdev;
3778         }
3779         if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
3780                 dev_err(&pdev->dev, "Missing UAR, aborting\n");
3781                 err = -ENODEV;
3782                 goto err_disable_pdev;
3783         }
3784 
3785         err = pci_request_regions(pdev, DRV_NAME);
3786         if (err) {
3787                 dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
3788                 goto err_disable_pdev;
3789         }
3790 
3791         pci_set_master(pdev);
3792 
3793         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
3794         if (err) {
3795                 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
3796                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
3797                 if (err) {
3798                         dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
3799                         goto err_release_regions;
3800                 }
3801         }
3802         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
3803         if (err) {
3804                 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
3805                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
3806                 if (err) {
3807                         dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, aborting\n");
3808                         goto err_release_regions;
3809                 }
3810         }
3811 
3812         /* Allow large DMA segments, up to the firmware limit of 1 GB */
3813         dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
3814         /* Detect if this device is a virtual function */
3815         if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
3816                 /* When acting as pf, we normally skip vfs unless explicitly
3817                  * requested to probe them.
3818                  */
3819                 if (total_vfs) {
3820                         unsigned vfs_offset = 0;
3821 
3822                         for (i = 0; i < ARRAY_SIZE(nvfs) &&
3823                              vfs_offset + nvfs[i] < extended_func_num(pdev);
3824                              vfs_offset += nvfs[i], i++)
3825                                 ;
3826                         if (i == ARRAY_SIZE(nvfs)) {
3827                                 err = -ENODEV;
3828                                 goto err_release_regions;
3829                         }
3830                         if ((extended_func_num(pdev) - vfs_offset)
3831                             > prb_vf[i]) {
3832                                 dev_warn(&pdev->dev, "Skipping virtual function:%d\n",
3833                                          extended_func_num(pdev));
3834                                 err = -ENODEV;
3835                                 goto err_release_regions;
3836                         }
3837                 }
3838         }
3839 
3840         err = mlx4_crdump_init(&priv->dev);
3841         if (err)
3842                 goto err_release_regions;
3843 
3844         err = mlx4_catas_init(&priv->dev);
3845         if (err)
3846                 goto err_crdump;
3847 
3848         err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 0);
3849         if (err)
3850                 goto err_catas;
3851 
3852         return 0;
3853 
3854 err_catas:
3855         mlx4_catas_end(&priv->dev);
3856 
3857 err_crdump:
3858         mlx4_crdump_end(&priv->dev);
3859 
3860 err_release_regions:
3861         pci_release_regions(pdev);
3862 
3863 err_disable_pdev:
3864         mlx4_pci_disable_device(&priv->dev);
3865         return err;
3866 }
3867 
3868 static int mlx4_devlink_port_type_set(struct devlink_port *devlink_port,
3869                                       enum devlink_port_type port_type)
3870 {
3871         struct mlx4_port_info *info = container_of(devlink_port,
3872                                                    struct mlx4_port_info,
3873                                                    devlink_port);
3874         enum mlx4_port_type mlx4_port_type;
3875 
3876         switch (port_type) {
3877         case DEVLINK_PORT_TYPE_AUTO:
3878                 mlx4_port_type = MLX4_PORT_TYPE_AUTO;
3879                 break;
3880         case DEVLINK_PORT_TYPE_ETH:
3881                 mlx4_port_type = MLX4_PORT_TYPE_ETH;
3882                 break;
3883         case DEVLINK_PORT_TYPE_IB:
3884                 mlx4_port_type = MLX4_PORT_TYPE_IB;
3885                 break;
3886         default:
3887                 return -EOPNOTSUPP;
3888         }
3889 
3890         return __set_port_type(info, mlx4_port_type);
3891 }
3892 
3893 static void mlx4_devlink_param_load_driverinit_values(struct devlink *devlink)
3894 {
3895         struct mlx4_priv *priv = devlink_priv(devlink);
3896         struct mlx4_dev *dev = &priv->dev;
3897         struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
3898         union devlink_param_value saved_value;
3899         int err;
3900 
3901         err = devlink_param_driverinit_value_get(devlink,
3902                                                  DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
3903                                                  &saved_value);
3904         if (!err && mlx4_internal_err_reset != saved_value.vbool) {
3905                 mlx4_internal_err_reset = saved_value.vbool;
3906                 /* Notify on value changed on runtime configuration mode */
3907                 devlink_param_value_changed(devlink,
3908                                             DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET);
3909         }
3910         err = devlink_param_driverinit_value_get(devlink,
3911                                                  DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
3912                                                  &saved_value);
3913         if (!err)
3914                 log_num_mac = order_base_2(saved_value.vu32);
3915         err = devlink_param_driverinit_value_get(devlink,
3916                                                  MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE,
3917                                                  &saved_value);
3918         if (!err)
3919                 enable_64b_cqe_eqe = saved_value.vbool;
3920         err = devlink_param_driverinit_value_get(devlink,
3921                                                  MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR,
3922                                                  &saved_value);
3923         if (!err)
3924                 enable_4k_uar = saved_value.vbool;
3925         err = devlink_param_driverinit_value_get(devlink,
3926                                                  DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
3927                                                  &saved_value);
3928         if (!err && crdump->snapshot_enable != saved_value.vbool) {
3929                 crdump->snapshot_enable = saved_value.vbool;
3930                 devlink_param_value_changed(devlink,
3931                                             DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT);
3932         }
3933 }
3934 
3935 static void mlx4_restart_one_down(struct pci_dev *pdev);
3936 static int mlx4_restart_one_up(struct pci_dev *pdev, bool reload,
3937                                struct devlink *devlink);
3938 
3939 static int mlx4_devlink_reload_down(struct devlink *devlink,
3940                                     struct netlink_ext_ack *extack)
3941 {
3942         struct mlx4_priv *priv = devlink_priv(devlink);
3943         struct mlx4_dev *dev = &priv->dev;
3944         struct mlx4_dev_persistent *persist = dev->persist;
3945 
3946         if (persist->num_vfs)
3947                 mlx4_warn(persist->dev, "Reload performed on PF, will cause reset on operating Virtual Functions\n");
3948         mlx4_restart_one_down(persist->pdev);
3949         return 0;
3950 }
3951 
3952 static int mlx4_devlink_reload_up(struct devlink *devlink,
3953                                   struct netlink_ext_ack *extack)
3954 {
3955         struct mlx4_priv *priv = devlink_priv(devlink);
3956         struct mlx4_dev *dev = &priv->dev;
3957         struct mlx4_dev_persistent *persist = dev->persist;
3958         int err;
3959 
3960         err = mlx4_restart_one_up(persist->pdev, true, devlink);
3961         if (err)
3962                 mlx4_err(persist->dev, "mlx4_restart_one_up failed, ret=%d\n",
3963                          err);
3964 
3965         return err;
3966 }
3967 
3968 static const struct devlink_ops mlx4_devlink_ops = {
3969         .port_type_set  = mlx4_devlink_port_type_set,
3970         .reload_down    = mlx4_devlink_reload_down,
3971         .reload_up      = mlx4_devlink_reload_up,
3972 };
3973 
3974 static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
3975 {
3976         struct devlink *devlink;
3977         struct mlx4_priv *priv;
3978         struct mlx4_dev *dev;
3979         int ret;
3980 
3981         printk_once(KERN_INFO "%s", mlx4_version);
3982 
3983         devlink = devlink_alloc(&mlx4_devlink_ops, sizeof(*priv));
3984         if (!devlink)
3985                 return -ENOMEM;
3986         priv = devlink_priv(devlink);
3987 
3988         dev       = &priv->dev;
3989         dev->persist = kzalloc(sizeof(*dev->persist), GFP_KERNEL);
3990         if (!dev->persist) {
3991                 ret = -ENOMEM;
3992                 goto err_devlink_free;
3993         }
3994         dev->persist->pdev = pdev;
3995         dev->persist->dev = dev;
3996         pci_set_drvdata(pdev, dev->persist);
3997         priv->pci_dev_data = id->driver_data;
3998         mutex_init(&dev->persist->device_state_mutex);
3999         mutex_init(&dev->persist->interface_state_mutex);
4000         mutex_init(&dev->persist->pci_status_mutex);
4001 
4002         ret = devlink_register(devlink, &pdev->dev);
4003         if (ret)
4004                 goto err_persist_free;
4005         ret = devlink_params_register(devlink, mlx4_devlink_params,
4006                                       ARRAY_SIZE(mlx4_devlink_params));
4007         if (ret)
4008                 goto err_devlink_unregister;
4009         mlx4_devlink_set_params_init_values(devlink);
4010         ret =  __mlx4_init_one(pdev, id->driver_data, priv);
4011         if (ret)
4012                 goto err_params_unregister;
4013 
4014         devlink_params_publish(devlink);
4015         devlink_reload_enable(devlink);
4016         pci_save_state(pdev);
4017         return 0;
4018 
4019 err_params_unregister:
4020         devlink_params_unregister(devlink, mlx4_devlink_params,
4021                                   ARRAY_SIZE(mlx4_devlink_params));
4022 err_devlink_unregister:
4023         devlink_unregister(devlink);
4024 err_persist_free:
4025         kfree(dev->persist);
4026 err_devlink_free:
4027         devlink_free(devlink);
4028         return ret;
4029 }
4030 
4031 static void mlx4_clean_dev(struct mlx4_dev *dev)
4032 {
4033         struct mlx4_dev_persistent *persist = dev->persist;
4034         struct mlx4_priv *priv = mlx4_priv(dev);
4035         unsigned long   flags = (dev->flags & RESET_PERSIST_MASK_FLAGS);
4036 
4037         memset(priv, 0, sizeof(*priv));
4038         priv->dev.persist = persist;
4039         priv->dev.flags = flags;
4040 }
4041 
4042 static void mlx4_unload_one(struct pci_dev *pdev)
4043 {
4044         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4045         struct mlx4_dev  *dev  = persist->dev;
4046         struct mlx4_priv *priv = mlx4_priv(dev);
4047         int               pci_dev_data;
4048         int p, i;
4049 
4050         if (priv->removed)
4051                 return;
4052 
4053         /* saving current ports type for further use */
4054         for (i = 0; i < dev->caps.num_ports; i++) {
4055                 dev->persist->curr_port_type[i] = dev->caps.port_type[i + 1];
4056                 dev->persist->curr_port_poss_type[i] = dev->caps.
4057                                                        possible_type[i + 1];
4058         }
4059 
4060         pci_dev_data = priv->pci_dev_data;
4061 
4062         mlx4_stop_sense(dev);
4063         mlx4_unregister_device(dev);
4064 
4065         for (p = 1; p <= dev->caps.num_ports; p++) {
4066                 mlx4_cleanup_port_info(&priv->port[p]);
4067                 mlx4_CLOSE_PORT(dev, p);
4068         }
4069 
4070         if (mlx4_is_master(dev))
4071                 mlx4_free_resource_tracker(dev,
4072                                            RES_TR_FREE_SLAVES_ONLY);
4073 
4074         mlx4_cleanup_default_counters(dev);
4075         if (!mlx4_is_slave(dev))
4076                 mlx4_cleanup_counters_table(dev);
4077         mlx4_cleanup_qp_table(dev);
4078         mlx4_cleanup_srq_table(dev);
4079         mlx4_cleanup_cq_table(dev);
4080         mlx4_cmd_use_polling(dev);
4081         mlx4_cleanup_eq_table(dev);
4082         mlx4_cleanup_mcg_table(dev);
4083         mlx4_cleanup_mr_table(dev);
4084         mlx4_cleanup_xrcd_table(dev);
4085         mlx4_cleanup_pd_table(dev);
4086 
4087         if (mlx4_is_master(dev))
4088                 mlx4_free_resource_tracker(dev,
4089                                            RES_TR_FREE_STRUCTS_ONLY);
4090 
4091         iounmap(priv->kar);
4092         mlx4_uar_free(dev, &priv->driver_uar);
4093         mlx4_cleanup_uar_table(dev);
4094         if (!mlx4_is_slave(dev))
4095                 mlx4_clear_steering(dev);
4096         mlx4_free_eq_table(dev);
4097         if (mlx4_is_master(dev))
4098                 mlx4_multi_func_cleanup(dev);
4099         mlx4_close_hca(dev);
4100         mlx4_close_fw(dev);
4101         if (mlx4_is_slave(dev))
4102                 mlx4_multi_func_cleanup(dev);
4103         mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
4104 
4105         if (dev->flags & MLX4_FLAG_MSI_X)
4106                 pci_disable_msix(pdev);
4107 
4108         if (!mlx4_is_slave(dev))
4109                 mlx4_free_ownership(dev);
4110 
4111         mlx4_slave_destroy_special_qp_cap(dev);
4112         kfree(dev->dev_vfs);
4113 
4114         mlx4_clean_dev(dev);
4115         priv->pci_dev_data = pci_dev_data;
4116         priv->removed = 1;
4117 }
4118 
4119 static void mlx4_remove_one(struct pci_dev *pdev)
4120 {
4121         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4122         struct mlx4_dev  *dev  = persist->dev;
4123         struct mlx4_priv *priv = mlx4_priv(dev);
4124         struct devlink *devlink = priv_to_devlink(priv);
4125         int active_vfs = 0;
4126 
4127         devlink_reload_disable(devlink);
4128 
4129         if (mlx4_is_slave(dev))
4130                 persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT;
4131 
4132         mutex_lock(&persist->interface_state_mutex);
4133         persist->interface_state |= MLX4_INTERFACE_STATE_DELETION;
4134         mutex_unlock(&persist->interface_state_mutex);
4135 
4136         /* Disabling SR-IOV is not allowed while there are active vf's */
4137         if (mlx4_is_master(dev) && dev->flags & MLX4_FLAG_SRIOV) {
4138                 active_vfs = mlx4_how_many_lives_vf(dev);
4139                 if (active_vfs) {
4140                         pr_warn("Removing PF when there are active VF's !!\n");
4141                         pr_warn("Will not disable SR-IOV.\n");
4142                 }
4143         }
4144 
4145         /* device marked to be under deletion running now without the lock
4146          * letting other tasks to be terminated
4147          */
4148         if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
4149                 mlx4_unload_one(pdev);
4150         else
4151                 mlx4_info(dev, "%s: interface is down\n", __func__);
4152         mlx4_catas_end(dev);
4153         mlx4_crdump_end(dev);
4154         if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) {
4155                 mlx4_warn(dev, "Disabling SR-IOV\n");
4156                 pci_disable_sriov(pdev);
4157         }
4158 
4159         pci_release_regions(pdev);
4160         mlx4_pci_disable_device(dev);
4161         devlink_params_unregister(devlink, mlx4_devlink_params,
4162                                   ARRAY_SIZE(mlx4_devlink_params));
4163         devlink_unregister(devlink);
4164         kfree(dev->persist);
4165         devlink_free(devlink);
4166 }
4167 
4168 static int restore_current_port_types(struct mlx4_dev *dev,
4169                                       enum mlx4_port_type *types,
4170                                       enum mlx4_port_type *poss_types)
4171 {
4172         struct mlx4_priv *priv = mlx4_priv(dev);
4173         int err, i;
4174 
4175         mlx4_stop_sense(dev);
4176 
4177         mutex_lock(&priv->port_mutex);
4178         for (i = 0; i < dev->caps.num_ports; i++)
4179                 dev->caps.possible_type[i + 1] = poss_types[i];
4180         err = mlx4_change_port_types(dev, types);
4181         mlx4_start_sense(dev);
4182         mutex_unlock(&priv->port_mutex);
4183 
4184         return err;
4185 }
4186 
4187 static void mlx4_restart_one_down(struct pci_dev *pdev)
4188 {
4189         mlx4_unload_one(pdev);
4190 }
4191 
4192 static int mlx4_restart_one_up(struct pci_dev *pdev, bool reload,
4193                                struct devlink *devlink)
4194 {
4195         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4196         struct mlx4_dev  *dev  = persist->dev;
4197         struct mlx4_priv *priv = mlx4_priv(dev);
4198         int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
4199         int pci_dev_data, err, total_vfs;
4200 
4201         pci_dev_data = priv->pci_dev_data;
4202         total_vfs = dev->persist->num_vfs;
4203         memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
4204 
4205         if (reload)
4206                 mlx4_devlink_param_load_driverinit_values(devlink);
4207         err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 1);
4208         if (err) {
4209                 mlx4_err(dev, "%s: ERROR: mlx4_load_one failed, pci_name=%s, err=%d\n",
4210                          __func__, pci_name(pdev), err);
4211                 return err;
4212         }
4213 
4214         err = restore_current_port_types(dev, dev->persist->curr_port_type,
4215                                          dev->persist->curr_port_poss_type);
4216         if (err)
4217                 mlx4_err(dev, "could not restore original port types (%d)\n",
4218                          err);
4219 
4220         return err;
4221 }
4222 
4223 int mlx4_restart_one(struct pci_dev *pdev)
4224 {
4225         mlx4_restart_one_down(pdev);
4226         return mlx4_restart_one_up(pdev, false, NULL);
4227 }
4228 
4229 #define MLX_SP(id) { PCI_VDEVICE(MELLANOX, id), MLX4_PCI_DEV_FORCE_SENSE_PORT }
4230 #define MLX_VF(id) { PCI_VDEVICE(MELLANOX, id), MLX4_PCI_DEV_IS_VF }
4231 #define MLX_GN(id) { PCI_VDEVICE(MELLANOX, id), 0 }
4232 
4233 static const struct pci_device_id mlx4_pci_table[] = {
4234 #ifdef CONFIG_MLX4_CORE_GEN2
4235         /* MT25408 "Hermon" */
4236         MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_SDR),      /* SDR */
4237         MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_DDR),      /* DDR */
4238         MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_QDR),      /* QDR */
4239         MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2), /* DDR Gen2 */
4240         MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2), /* QDR Gen2 */
4241         MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_EN),       /* EN 10GigE */
4242         MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2),  /* EN 10GigE Gen2 */
4243         /* MT25458 ConnectX EN 10GBASE-T */
4244         MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX_EN),
4245         MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2),      /* Gen2 */
4246         /* MT26468 ConnectX EN 10GigE PCIe Gen2*/
4247         MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2),
4248         /* MT26438 ConnectX EN 40GigE PCIe Gen2 5GT/s */
4249         MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2),
4250         /* MT26478 ConnectX2 40GigE PCIe Gen2 */
4251         MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX2),
4252         /* MT25400 Family [ConnectX-2] */
4253         MLX_VF(0x1002),                                 /* Virtual Function */
4254 #endif /* CONFIG_MLX4_CORE_GEN2 */
4255         /* MT27500 Family [ConnectX-3] */
4256         MLX_GN(PCI_DEVICE_ID_MELLANOX_CONNECTX3),
4257         MLX_VF(0x1004),                                 /* Virtual Function */
4258         MLX_GN(0x1005),                                 /* MT27510 Family */
4259         MLX_GN(0x1006),                                 /* MT27511 Family */
4260         MLX_GN(PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO),   /* MT27520 Family */
4261         MLX_GN(0x1008),                                 /* MT27521 Family */
4262         MLX_GN(0x1009),                                 /* MT27530 Family */
4263         MLX_GN(0x100a),                                 /* MT27531 Family */
4264         MLX_GN(0x100b),                                 /* MT27540 Family */
4265         MLX_GN(0x100c),                                 /* MT27541 Family */
4266         MLX_GN(0x100d),                                 /* MT27550 Family */
4267         MLX_GN(0x100e),                                 /* MT27551 Family */
4268         MLX_GN(0x100f),                                 /* MT27560 Family */
4269         MLX_GN(0x1010),                                 /* MT27561 Family */
4270 
4271         /*
4272          * See the mellanox_check_broken_intx_masking() quirk when
4273          * adding devices
4274          */
4275 
4276         { 0, }
4277 };
4278 
4279 MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
4280 
4281 static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
4282                                               pci_channel_state_t state)
4283 {
4284         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4285 
4286         mlx4_err(persist->dev, "mlx4_pci_err_detected was called\n");
4287         mlx4_enter_error_state(persist);
4288 
4289         mutex_lock(&persist->interface_state_mutex);
4290         if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
4291                 mlx4_unload_one(pdev);
4292 
4293         mutex_unlock(&persist->interface_state_mutex);
4294         if (state == pci_channel_io_perm_failure)
4295                 return PCI_ERS_RESULT_DISCONNECT;
4296 
4297         mlx4_pci_disable_device(persist->dev);
4298         return PCI_ERS_RESULT_NEED_RESET;
4299 }
4300 
4301 static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
4302 {
4303         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4304         struct mlx4_dev  *dev  = persist->dev;
4305         int err;
4306 
4307         mlx4_err(dev, "mlx4_pci_slot_reset was called\n");
4308         err = mlx4_pci_enable_device(dev);
4309         if (err) {
4310                 mlx4_err(dev, "Can not re-enable device, err=%d\n", err);
4311                 return PCI_ERS_RESULT_DISCONNECT;
4312         }
4313 
4314         pci_set_master(pdev);
4315         pci_restore_state(pdev);
4316         pci_save_state(pdev);
4317         return PCI_ERS_RESULT_RECOVERED;
4318 }
4319 
4320 static void mlx4_pci_resume(struct pci_dev *pdev)
4321 {
4322         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4323         struct mlx4_dev  *dev  = persist->dev;
4324         struct mlx4_priv *priv = mlx4_priv(dev);
4325         int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
4326         int total_vfs;
4327         int err;
4328 
4329         mlx4_err(dev, "%s was called\n", __func__);
4330         total_vfs = dev->persist->num_vfs;
4331         memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
4332 
4333         mutex_lock(&persist->interface_state_mutex);
4334         if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) {
4335                 err = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs, nvfs,
4336                                     priv, 1);
4337                 if (err) {
4338                         mlx4_err(dev, "%s: mlx4_load_one failed, err=%d\n",
4339                                  __func__,  err);
4340                         goto end;
4341                 }
4342 
4343                 err = restore_current_port_types(dev, dev->persist->
4344                                                  curr_port_type, dev->persist->
4345                                                  curr_port_poss_type);
4346                 if (err)
4347                         mlx4_err(dev, "could not restore original port types (%d)\n", err);
4348         }
4349 end:
4350         mutex_unlock(&persist->interface_state_mutex);
4351 
4352 }
4353 
4354 static void mlx4_shutdown(struct pci_dev *pdev)
4355 {
4356         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4357 
4358         mlx4_info(persist->dev, "mlx4_shutdown was called\n");
4359         mutex_lock(&persist->interface_state_mutex);
4360         if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
4361                 mlx4_unload_one(pdev);
4362         mutex_unlock(&persist->interface_state_mutex);
4363 }
4364 
4365 static const struct pci_error_handlers mlx4_err_handler = {
4366         .error_detected = mlx4_pci_err_detected,
4367         .slot_reset     = mlx4_pci_slot_reset,
4368         .resume         = mlx4_pci_resume,
4369 };
4370 
4371 static int mlx4_suspend(struct pci_dev *pdev, pm_message_t state)
4372 {
4373         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4374         struct mlx4_dev *dev = persist->dev;
4375 
4376         mlx4_err(dev, "suspend was called\n");
4377         mutex_lock(&persist->interface_state_mutex);
4378         if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
4379                 mlx4_unload_one(pdev);
4380         mutex_unlock(&persist->interface_state_mutex);
4381 
4382         return 0;
4383 }
4384 
4385 static int mlx4_resume(struct pci_dev *pdev)
4386 {
4387         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4388         struct mlx4_dev *dev = persist->dev;
4389         struct mlx4_priv *priv = mlx4_priv(dev);
4390         int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
4391         int total_vfs;
4392         int ret = 0;
4393 
4394         mlx4_err(dev, "resume was called\n");
4395         total_vfs = dev->persist->num_vfs;
4396         memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
4397 
4398         mutex_lock(&persist->interface_state_mutex);
4399         if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) {
4400                 ret = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs,
4401                                     nvfs, priv, 1);
4402                 if (!ret) {
4403                         ret = restore_current_port_types(dev,
4404                                         dev->persist->curr_port_type,
4405                                         dev->persist->curr_port_poss_type);
4406                         if (ret)
4407                                 mlx4_err(dev, "resume: could not restore original port types (%d)\n", ret);
4408                 }
4409         }
4410         mutex_unlock(&persist->interface_state_mutex);
4411 
4412         return ret;
4413 }
4414 
4415 static struct pci_driver mlx4_driver = {
4416         .name           = DRV_NAME,
4417         .id_table       = mlx4_pci_table,
4418         .probe          = mlx4_init_one,
4419         .shutdown       = mlx4_shutdown,
4420         .remove         = mlx4_remove_one,
4421         .suspend        = mlx4_suspend,
4422         .resume         = mlx4_resume,
4423         .err_handler    = &mlx4_err_handler,
4424 };
4425 
4426 static int __init mlx4_verify_params(void)
4427 {
4428         if (msi_x < 0) {
4429                 pr_warn("mlx4_core: bad msi_x: %d\n", msi_x);
4430                 return -1;
4431         }
4432 
4433         if ((log_num_mac < 0) || (log_num_mac > 7)) {
4434                 pr_warn("mlx4_core: bad num_mac: %d\n", log_num_mac);
4435                 return -1;
4436         }
4437 
4438         if (log_num_vlan != 0)
4439                 pr_warn("mlx4_core: log_num_vlan - obsolete module param, using %d\n",
4440                         MLX4_LOG_NUM_VLANS);
4441 
4442         if (use_prio != 0)
4443                 pr_warn("mlx4_core: use_prio - obsolete module param, ignored\n");
4444 
4445         if ((log_mtts_per_seg < 0) || (log_mtts_per_seg > 7)) {
4446                 pr_warn("mlx4_core: bad log_mtts_per_seg: %d\n",
4447                         log_mtts_per_seg);
4448                 return -1;
4449         }
4450 
4451         /* Check if module param for ports type has legal combination */
4452         if (port_type_array[0] == false && port_type_array[1] == true) {
4453                 pr_warn("Module parameter configuration ETH/IB is not supported. Switching to default configuration IB/IB\n");
4454                 port_type_array[0] = true;
4455         }
4456 
4457         if (mlx4_log_num_mgm_entry_size < -7 ||
4458             (mlx4_log_num_mgm_entry_size > 0 &&
4459              (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE ||
4460               mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE))) {
4461                 pr_warn("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not in legal range (-7..0 or %d..%d)\n",
4462                         mlx4_log_num_mgm_entry_size,
4463                         MLX4_MIN_MGM_LOG_ENTRY_SIZE,
4464                         MLX4_MAX_MGM_LOG_ENTRY_SIZE);
4465                 return -1;
4466         }
4467 
4468         return 0;
4469 }
4470 
4471 static int __init mlx4_init(void)
4472 {
4473         int ret;
4474 
4475         if (mlx4_verify_params())
4476                 return -EINVAL;
4477 
4478 
4479         mlx4_wq = create_singlethread_workqueue("mlx4");
4480         if (!mlx4_wq)
4481                 return -ENOMEM;
4482 
4483         ret = pci_register_driver(&mlx4_driver);
4484         if (ret < 0)
4485                 destroy_workqueue(mlx4_wq);
4486         return ret < 0 ? ret : 0;
4487 }
4488 
4489 static void __exit mlx4_cleanup(void)
4490 {
4491         pci_unregister_driver(&mlx4_driver);
4492         destroy_workqueue(mlx4_wq);
4493 }
4494 
4495 module_init(mlx4_init);
4496 module_exit(mlx4_cleanup);

/* [<][>][^][v][top][bottom][index][help] */