root/drivers/net/ethernet/mellanox/mlx5/core/events.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. eqe_type_str
  2. any_notifier
  3. temp_warn
  4. mlx5_pme_status_to_string
  5. mlx5_pme_error_to_string
  6. port_module
  7. mlx5_pcie_event
  8. pcie_core
  9. mlx5_get_pme_stats
  10. forward_event
  11. mlx5_events_init
  12. mlx5_events_cleanup
  13. mlx5_events_start
  14. mlx5_events_stop
  15. mlx5_notifier_register
  16. mlx5_notifier_unregister
  17. mlx5_notifier_call_chain

   1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2 // Copyright (c) 2018 Mellanox Technologies
   3 
   4 #include <linux/mlx5/driver.h>
   5 
   6 #include "mlx5_core.h"
   7 #include "lib/eq.h"
   8 #include "lib/mlx5.h"
   9 
  10 struct mlx5_event_nb {
  11         struct mlx5_nb  nb;
  12         void           *ctx;
  13 };
  14 
  15 /* General events handlers for the low level mlx5_core driver
  16  *
  17  * Other Major feature specific events such as
  18  * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with
  19  * separate notifiers callbacks, specifically by those mlx5 components.
  20  */
  21 static int any_notifier(struct notifier_block *, unsigned long, void *);
  22 static int temp_warn(struct notifier_block *, unsigned long, void *);
  23 static int port_module(struct notifier_block *, unsigned long, void *);
  24 static int pcie_core(struct notifier_block *, unsigned long, void *);
  25 
  26 /* handler which forwards the event to events->nh, driver notifiers */
  27 static int forward_event(struct notifier_block *, unsigned long, void *);
  28 
  29 static struct mlx5_nb events_nbs_ref[] = {
  30         /* Events to be proccessed by mlx5_core */
  31         {.nb.notifier_call = any_notifier,  .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY },
  32         {.nb.notifier_call = temp_warn,     .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT },
  33         {.nb.notifier_call = port_module,   .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT },
  34         {.nb.notifier_call = pcie_core,     .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
  35 
  36         /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */
  37         {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PORT_CHANGE },
  38         {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
  39         /* QP/WQ resource events to forward */
  40         {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_DCT_DRAINED },
  41         {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PATH_MIG },
  42         {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_COMM_EST },
  43         {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SQ_DRAINED },
  44         {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE },
  45         {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR },
  46         {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED },
  47         {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR },
  48         {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR },
  49         /* SRQ events */
  50         {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR },
  51         {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT },
  52 };
  53 
  54 struct mlx5_events {
  55         struct mlx5_core_dev *dev;
  56         struct workqueue_struct *wq;
  57         struct mlx5_event_nb  notifiers[ARRAY_SIZE(events_nbs_ref)];
  58         /* driver notifier chain */
  59         struct atomic_notifier_head nh;
  60         /* port module events stats */
  61         struct mlx5_pme_stats pme_stats;
  62         /*pcie_core*/
  63         struct work_struct pcie_core_work;
  64 };
  65 
  66 static const char *eqe_type_str(u8 type)
  67 {
  68         switch (type) {
  69         case MLX5_EVENT_TYPE_COMP:
  70                 return "MLX5_EVENT_TYPE_COMP";
  71         case MLX5_EVENT_TYPE_PATH_MIG:
  72                 return "MLX5_EVENT_TYPE_PATH_MIG";
  73         case MLX5_EVENT_TYPE_COMM_EST:
  74                 return "MLX5_EVENT_TYPE_COMM_EST";
  75         case MLX5_EVENT_TYPE_SQ_DRAINED:
  76                 return "MLX5_EVENT_TYPE_SQ_DRAINED";
  77         case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
  78                 return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
  79         case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
  80                 return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
  81         case MLX5_EVENT_TYPE_CQ_ERROR:
  82                 return "MLX5_EVENT_TYPE_CQ_ERROR";
  83         case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
  84                 return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
  85         case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
  86                 return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
  87         case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
  88                 return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
  89         case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
  90                 return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
  91         case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
  92                 return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
  93         case MLX5_EVENT_TYPE_INTERNAL_ERROR:
  94                 return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
  95         case MLX5_EVENT_TYPE_PORT_CHANGE:
  96                 return "MLX5_EVENT_TYPE_PORT_CHANGE";
  97         case MLX5_EVENT_TYPE_GPIO_EVENT:
  98                 return "MLX5_EVENT_TYPE_GPIO_EVENT";
  99         case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
 100                 return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
 101         case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
 102                 return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
 103         case MLX5_EVENT_TYPE_REMOTE_CONFIG:
 104                 return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
 105         case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
 106                 return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
 107         case MLX5_EVENT_TYPE_STALL_EVENT:
 108                 return "MLX5_EVENT_TYPE_STALL_EVENT";
 109         case MLX5_EVENT_TYPE_CMD:
 110                 return "MLX5_EVENT_TYPE_CMD";
 111         case MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED:
 112                 return "MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED";
 113         case MLX5_EVENT_TYPE_PAGE_REQUEST:
 114                 return "MLX5_EVENT_TYPE_PAGE_REQUEST";
 115         case MLX5_EVENT_TYPE_PAGE_FAULT:
 116                 return "MLX5_EVENT_TYPE_PAGE_FAULT";
 117         case MLX5_EVENT_TYPE_PPS_EVENT:
 118                 return "MLX5_EVENT_TYPE_PPS_EVENT";
 119         case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
 120                 return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
 121         case MLX5_EVENT_TYPE_FPGA_ERROR:
 122                 return "MLX5_EVENT_TYPE_FPGA_ERROR";
 123         case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
 124                 return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
 125         case MLX5_EVENT_TYPE_GENERAL_EVENT:
 126                 return "MLX5_EVENT_TYPE_GENERAL_EVENT";
 127         case MLX5_EVENT_TYPE_MONITOR_COUNTER:
 128                 return "MLX5_EVENT_TYPE_MONITOR_COUNTER";
 129         case MLX5_EVENT_TYPE_DEVICE_TRACER:
 130                 return "MLX5_EVENT_TYPE_DEVICE_TRACER";
 131         default:
 132                 return "Unrecognized event";
 133         }
 134 }
 135 
 136 /* handles all FW events, type == eqe->type */
 137 static int any_notifier(struct notifier_block *nb,
 138                         unsigned long type, void *data)
 139 {
 140         struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
 141         struct mlx5_events   *events   = event_nb->ctx;
 142         struct mlx5_eqe      *eqe      = data;
 143 
 144         mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n",
 145                       eqe_type_str(eqe->type), eqe->sub_type);
 146         return NOTIFY_OK;
 147 }
 148 
 149 /* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */
 150 static int temp_warn(struct notifier_block *nb, unsigned long type, void *data)
 151 {
 152         struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
 153         struct mlx5_events   *events   = event_nb->ctx;
 154         struct mlx5_eqe      *eqe      = data;
 155         u64 value_lsb;
 156         u64 value_msb;
 157 
 158         value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
 159         value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
 160 
 161         mlx5_core_warn(events->dev,
 162                        "High temperature on sensors with bit set %llx %llx",
 163                        value_msb, value_lsb);
 164 
 165         return NOTIFY_OK;
 166 }
 167 
 168 /* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
 169 static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status)
 170 {
 171         switch (status) {
 172         case MLX5_MODULE_STATUS_PLUGGED:
 173                 return "Cable plugged";
 174         case MLX5_MODULE_STATUS_UNPLUGGED:
 175                 return "Cable unplugged";
 176         case MLX5_MODULE_STATUS_ERROR:
 177                 return "Cable error";
 178         case MLX5_MODULE_STATUS_DISABLED:
 179                 return "Cable disabled";
 180         default:
 181                 return "Unknown status";
 182         }
 183 }
 184 
 185 static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error)
 186 {
 187         switch (error) {
 188         case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED:
 189                 return "Power budget exceeded";
 190         case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX:
 191                 return "Long Range for non MLNX cable";
 192         case MLX5_MODULE_EVENT_ERROR_BUS_STUCK:
 193                 return "Bus stuck (I2C or data shorted)";
 194         case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT:
 195                 return "No EEPROM/retry timeout";
 196         case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST:
 197                 return "Enforce part number list";
 198         case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER:
 199                 return "Unknown identifier";
 200         case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE:
 201                 return "High Temperature";
 202         case MLX5_MODULE_EVENT_ERROR_BAD_CABLE:
 203                 return "Bad or shorted cable/module";
 204         case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED:
 205                 return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot";
 206         default:
 207                 return "Unknown error";
 208         }
 209 }
 210 
 211 /* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
 212 static int port_module(struct notifier_block *nb, unsigned long type, void *data)
 213 {
 214         struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
 215         struct mlx5_events   *events   = event_nb->ctx;
 216         struct mlx5_eqe      *eqe      = data;
 217 
 218         enum port_module_event_status_type module_status;
 219         enum port_module_event_error_type error_type;
 220         struct mlx5_eqe_port_module *module_event_eqe;
 221         const char *status_str;
 222         u8 module_num;
 223 
 224         module_event_eqe = &eqe->data.port_module;
 225         module_status = module_event_eqe->module_status &
 226                         PORT_MODULE_EVENT_MODULE_STATUS_MASK;
 227         error_type = module_event_eqe->error_type &
 228                      PORT_MODULE_EVENT_ERROR_TYPE_MASK;
 229 
 230         if (module_status < MLX5_MODULE_STATUS_NUM)
 231                 events->pme_stats.status_counters[module_status]++;
 232 
 233         if (module_status == MLX5_MODULE_STATUS_ERROR)
 234                 if (error_type < MLX5_MODULE_EVENT_ERROR_NUM)
 235                         events->pme_stats.error_counters[error_type]++;
 236 
 237         if (!printk_ratelimit())
 238                 return NOTIFY_OK;
 239 
 240         module_num = module_event_eqe->module;
 241         status_str = mlx5_pme_status_to_string(module_status);
 242         if (module_status == MLX5_MODULE_STATUS_ERROR) {
 243                 const char *error_str = mlx5_pme_error_to_string(error_type);
 244 
 245                 mlx5_core_err(events->dev,
 246                               "Port module event[error]: module %u, %s, %s\n",
 247                               module_num, status_str, error_str);
 248         } else {
 249                 mlx5_core_info(events->dev,
 250                                "Port module event: module %u, %s\n",
 251                                module_num, status_str);
 252         }
 253 
 254         return NOTIFY_OK;
 255 }
 256 
 257 enum {
 258         MLX5_PCI_POWER_COULD_NOT_BE_READ = 0x0,
 259         MLX5_PCI_POWER_SUFFICIENT_REPORTED = 0x1,
 260         MLX5_PCI_POWER_INSUFFICIENT_REPORTED = 0x2,
 261 };
 262 
 263 static void mlx5_pcie_event(struct work_struct *work)
 264 {
 265         u32 out[MLX5_ST_SZ_DW(mpein_reg)] = {0};
 266         u32 in[MLX5_ST_SZ_DW(mpein_reg)] = {0};
 267         struct mlx5_events *events;
 268         struct mlx5_core_dev *dev;
 269         u8 power_status;
 270         u16 pci_power;
 271 
 272         events = container_of(work, struct mlx5_events, pcie_core_work);
 273         dev  = events->dev;
 274 
 275         if (!MLX5_CAP_MCAM_FEATURE(dev, pci_status_and_power))
 276                 return;
 277 
 278         mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
 279                              MLX5_REG_MPEIN, 0, 0);
 280         power_status = MLX5_GET(mpein_reg, out, pwr_status);
 281         pci_power = MLX5_GET(mpein_reg, out, pci_power);
 282 
 283         switch (power_status) {
 284         case MLX5_PCI_POWER_COULD_NOT_BE_READ:
 285                 mlx5_core_info_rl(dev,
 286                                   "PCIe slot power capability was not advertised.\n");
 287                 break;
 288         case MLX5_PCI_POWER_INSUFFICIENT_REPORTED:
 289                 mlx5_core_warn_rl(dev,
 290                                   "Detected insufficient power on the PCIe slot (%uW).\n",
 291                                   pci_power);
 292                 break;
 293         case MLX5_PCI_POWER_SUFFICIENT_REPORTED:
 294                 mlx5_core_info_rl(dev,
 295                                   "PCIe slot advertised sufficient power (%uW).\n",
 296                                   pci_power);
 297                 break;
 298         }
 299 }
 300 
 301 static int pcie_core(struct notifier_block *nb, unsigned long type, void *data)
 302 {
 303         struct mlx5_event_nb    *event_nb = mlx5_nb_cof(nb,
 304                                                         struct mlx5_event_nb,
 305                                                         nb);
 306         struct mlx5_events      *events   = event_nb->ctx;
 307         struct mlx5_eqe         *eqe      = data;
 308 
 309         switch (eqe->sub_type) {
 310         case MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT:
 311                         queue_work(events->wq, &events->pcie_core_work);
 312                 break;
 313         default:
 314                 return NOTIFY_DONE;
 315         }
 316 
 317         return NOTIFY_OK;
 318 }
 319 
 320 void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats)
 321 {
 322         *stats = dev->priv.events->pme_stats;
 323 }
 324 
 325 /* forward event as is to registered interfaces (mlx5e/mlx5_ib) */
 326 static int forward_event(struct notifier_block *nb, unsigned long event, void *data)
 327 {
 328         struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
 329         struct mlx5_events   *events   = event_nb->ctx;
 330         struct mlx5_eqe      *eqe      = data;
 331 
 332         mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n",
 333                       eqe_type_str(eqe->type), eqe->sub_type);
 334         atomic_notifier_call_chain(&events->nh, event, data);
 335         return NOTIFY_OK;
 336 }
 337 
 338 int mlx5_events_init(struct mlx5_core_dev *dev)
 339 {
 340         struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL);
 341 
 342         if (!events)
 343                 return -ENOMEM;
 344 
 345         ATOMIC_INIT_NOTIFIER_HEAD(&events->nh);
 346         events->dev = dev;
 347         dev->priv.events = events;
 348         events->wq = create_singlethread_workqueue("mlx5_events");
 349         if (!events->wq) {
 350                 kfree(events);
 351                 return -ENOMEM;
 352         }
 353         INIT_WORK(&events->pcie_core_work, mlx5_pcie_event);
 354 
 355         return 0;
 356 }
 357 
 358 void mlx5_events_cleanup(struct mlx5_core_dev *dev)
 359 {
 360         destroy_workqueue(dev->priv.events->wq);
 361         kvfree(dev->priv.events);
 362 }
 363 
 364 void mlx5_events_start(struct mlx5_core_dev *dev)
 365 {
 366         struct mlx5_events *events = dev->priv.events;
 367         int i;
 368 
 369         for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) {
 370                 events->notifiers[i].nb  = events_nbs_ref[i];
 371                 events->notifiers[i].ctx = events;
 372                 mlx5_eq_notifier_register(dev, &events->notifiers[i].nb);
 373         }
 374 }
 375 
 376 void mlx5_events_stop(struct mlx5_core_dev *dev)
 377 {
 378         struct mlx5_events *events = dev->priv.events;
 379         int i;
 380 
 381         for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--)
 382                 mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb);
 383         flush_workqueue(events->wq);
 384 }
 385 
 386 int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
 387 {
 388         struct mlx5_events *events = dev->priv.events;
 389 
 390         return atomic_notifier_chain_register(&events->nh, nb);
 391 }
 392 EXPORT_SYMBOL(mlx5_notifier_register);
 393 
 394 int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
 395 {
 396         struct mlx5_events *events = dev->priv.events;
 397 
 398         return atomic_notifier_chain_unregister(&events->nh, nb);
 399 }
 400 EXPORT_SYMBOL(mlx5_notifier_unregister);
 401 
 402 int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data)
 403 {
 404         return atomic_notifier_call_chain(&events->nh, event, data);
 405 }

/* [<][>][^][v][top][bottom][index][help] */