root/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mlx5e_wait_for_sq_flush
  2. mlx5e_reset_txqsq_cc_pc
  3. mlx5e_tx_reporter_err_cqe_recover
  4. mlx5e_reporter_tx_err_cqe
  5. mlx5e_tx_reporter_timeout_recover
  6. mlx5e_reporter_tx_timeout
  7. mlx5e_tx_reporter_recover_from_ctx
  8. mlx5e_tx_reporter_recover
  9. mlx5e_tx_reporter_build_diagnose_output
  10. mlx5e_tx_reporter_diagnose
  11. mlx5e_reporter_tx_create
  12. mlx5e_reporter_tx_destroy

   1 /* SPDX-License-Identifier: GPL-2.0 */
   2 /* Copyright (c) 2019 Mellanox Technologies. */
   3 
   4 #include "health.h"
   5 
   6 static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
   7 {
   8         unsigned long exp_time = jiffies + msecs_to_jiffies(2000);
   9 
  10         while (time_before(jiffies, exp_time)) {
  11                 if (sq->cc == sq->pc)
  12                         return 0;
  13 
  14                 msleep(20);
  15         }
  16 
  17         netdev_err(sq->channel->netdev,
  18                    "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n",
  19                    sq->sqn, sq->cc, sq->pc);
  20 
  21         return -ETIMEDOUT;
  22 }
  23 
  24 static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
  25 {
  26         WARN_ONCE(sq->cc != sq->pc,
  27                   "SQ 0x%x: cc (0x%x) != pc (0x%x)\n",
  28                   sq->sqn, sq->cc, sq->pc);
  29         sq->cc = 0;
  30         sq->dma_fifo_cc = 0;
  31         sq->pc = 0;
  32 }
  33 
  34 static int mlx5e_tx_reporter_err_cqe_recover(void *ctx)
  35 {
  36         struct mlx5_core_dev *mdev;
  37         struct net_device *dev;
  38         struct mlx5e_txqsq *sq;
  39         u8 state;
  40         int err;
  41 
  42         sq = ctx;
  43         mdev = sq->channel->mdev;
  44         dev = sq->channel->netdev;
  45 
  46         if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
  47                 return 0;
  48 
  49         err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
  50         if (err) {
  51                 netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
  52                            sq->sqn, err);
  53                 goto out;
  54         }
  55 
  56         if (state != MLX5_SQC_STATE_ERR)
  57                 goto out;
  58 
  59         mlx5e_tx_disable_queue(sq->txq);
  60 
  61         err = mlx5e_wait_for_sq_flush(sq);
  62         if (err)
  63                 goto out;
  64 
  65         /* At this point, no new packets will arrive from the stack as TXQ is
  66          * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
  67          * pending WQEs. SQ can safely reset the SQ.
  68          */
  69 
  70         err = mlx5e_health_sq_to_ready(sq->channel, sq->sqn);
  71         if (err)
  72                 goto out;
  73 
  74         mlx5e_reset_txqsq_cc_pc(sq);
  75         sq->stats->recover++;
  76         clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
  77         mlx5e_activate_txqsq(sq);
  78 
  79         return 0;
  80 out:
  81         clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
  82         return err;
  83 }
  84 
  85 void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq)
  86 {
  87         struct mlx5e_priv *priv = sq->channel->priv;
  88         char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
  89         struct mlx5e_err_ctx err_ctx = {0};
  90 
  91         err_ctx.ctx = sq;
  92         err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover;
  93         sprintf(err_str, "ERR CQE on SQ: 0x%x", sq->sqn);
  94 
  95         mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
  96 }
  97 
  98 static int mlx5e_tx_reporter_timeout_recover(void *ctx)
  99 {
 100         struct mlx5_eq_comp *eq;
 101         struct mlx5e_txqsq *sq;
 102         int err;
 103 
 104         sq = ctx;
 105         eq = sq->cq.mcq.eq;
 106         err = mlx5e_health_channel_eq_recover(eq, sq->channel);
 107         if (err)
 108                 clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
 109 
 110         return err;
 111 }
 112 
 113 int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq)
 114 {
 115         struct mlx5e_priv *priv = sq->channel->priv;
 116         char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
 117         struct mlx5e_err_ctx err_ctx;
 118 
 119         err_ctx.ctx = sq;
 120         err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
 121         sprintf(err_str,
 122                 "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
 123                 sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
 124                 jiffies_to_usecs(jiffies - sq->txq->trans_start));
 125 
 126         return mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
 127 }
 128 
 129 /* state lock cannot be grabbed within this function.
 130  * It can cause a dead lock or a read-after-free.
 131  */
 132 static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
 133 {
 134         return err_ctx->recover(err_ctx->ctx);
 135 }
 136 
 137 static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter,
 138                                      void *context)
 139 {
 140         struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
 141         struct mlx5e_err_ctx *err_ctx = context;
 142 
 143         return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) :
 144                          mlx5e_health_recover_channels(priv);
 145 }
 146 
 147 static int
 148 mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg,
 149                                         struct mlx5e_txqsq *sq, int tc)
 150 {
 151         struct mlx5e_priv *priv = sq->channel->priv;
 152         bool stopped = netif_xmit_stopped(sq->txq);
 153         u8 state;
 154         int err;
 155 
 156         err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state);
 157         if (err)
 158                 return err;
 159 
 160         err = devlink_fmsg_obj_nest_start(fmsg);
 161         if (err)
 162                 return err;
 163 
 164         err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", sq->ch_ix);
 165         if (err)
 166                 return err;
 167 
 168         err = devlink_fmsg_u32_pair_put(fmsg, "tc", tc);
 169         if (err)
 170                 return err;
 171 
 172         err = devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix);
 173         if (err)
 174                 return err;
 175 
 176         err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn);
 177         if (err)
 178                 return err;
 179 
 180         err = devlink_fmsg_u8_pair_put(fmsg, "HW state", state);
 181         if (err)
 182                 return err;
 183 
 184         err = devlink_fmsg_bool_pair_put(fmsg, "stopped", stopped);
 185         if (err)
 186                 return err;
 187 
 188         err = devlink_fmsg_u32_pair_put(fmsg, "cc", sq->cc);
 189         if (err)
 190                 return err;
 191 
 192         err = devlink_fmsg_u32_pair_put(fmsg, "pc", sq->pc);
 193         if (err)
 194                 return err;
 195 
 196         err = mlx5e_reporter_cq_diagnose(&sq->cq, fmsg);
 197         if (err)
 198                 return err;
 199 
 200         err = devlink_fmsg_obj_nest_end(fmsg);
 201         if (err)
 202                 return err;
 203 
 204         return 0;
 205 }
 206 
 207 static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
 208                                       struct devlink_fmsg *fmsg)
 209 {
 210         struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
 211         struct mlx5e_txqsq *generic_sq = priv->txq2sq[0];
 212         u32 sq_stride, sq_sz;
 213 
 214         int i, tc, err = 0;
 215 
 216         mutex_lock(&priv->state_lock);
 217 
 218         if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
 219                 goto unlock;
 220 
 221         sq_sz = mlx5_wq_cyc_get_size(&generic_sq->wq);
 222         sq_stride = MLX5_SEND_WQE_BB;
 223 
 224         err = mlx5e_reporter_named_obj_nest_start(fmsg, "Common Config");
 225         if (err)
 226                 goto unlock;
 227 
 228         err = mlx5e_reporter_named_obj_nest_start(fmsg, "SQ");
 229         if (err)
 230                 goto unlock;
 231 
 232         err = devlink_fmsg_u64_pair_put(fmsg, "stride size", sq_stride);
 233         if (err)
 234                 goto unlock;
 235 
 236         err = devlink_fmsg_u32_pair_put(fmsg, "size", sq_sz);
 237         if (err)
 238                 goto unlock;
 239 
 240         err = mlx5e_reporter_cq_common_diagnose(&generic_sq->cq, fmsg);
 241         if (err)
 242                 goto unlock;
 243 
 244         err = mlx5e_reporter_named_obj_nest_end(fmsg);
 245         if (err)
 246                 goto unlock;
 247 
 248         err = mlx5e_reporter_named_obj_nest_end(fmsg);
 249         if (err)
 250                 goto unlock;
 251 
 252         err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
 253         if (err)
 254                 goto unlock;
 255 
 256         for (i = 0; i < priv->channels.num; i++) {
 257                 struct mlx5e_channel *c = priv->channels.c[i];
 258 
 259                 for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
 260                         struct mlx5e_txqsq *sq = &c->sq[tc];
 261 
 262                         err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc);
 263                         if (err)
 264                                 goto unlock;
 265                 }
 266         }
 267         err = devlink_fmsg_arr_pair_nest_end(fmsg);
 268         if (err)
 269                 goto unlock;
 270 
 271 unlock:
 272         mutex_unlock(&priv->state_lock);
 273         return err;
 274 }
 275 
 276 static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
 277                 .name = "tx",
 278                 .recover = mlx5e_tx_reporter_recover,
 279                 .diagnose = mlx5e_tx_reporter_diagnose,
 280 };
 281 
 282 #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500
 283 
 284 int mlx5e_reporter_tx_create(struct mlx5e_priv *priv)
 285 {
 286         struct devlink_health_reporter *reporter;
 287         struct mlx5_core_dev *mdev = priv->mdev;
 288         struct devlink *devlink;
 289 
 290         devlink = priv_to_devlink(mdev);
 291         reporter =
 292                 devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops,
 293                                                MLX5_REPORTER_TX_GRACEFUL_PERIOD,
 294                                                true, priv);
 295         if (IS_ERR(reporter)) {
 296                 netdev_warn(priv->netdev,
 297                             "Failed to create tx reporter, err = %ld\n",
 298                             PTR_ERR(reporter));
 299                 return PTR_ERR(reporter);
 300         }
 301         priv->tx_reporter = reporter;
 302         return 0;
 303 }
 304 
 305 void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv)
 306 {
 307         if (!priv->tx_reporter)
 308                 return;
 309 
 310         devlink_health_reporter_destroy(priv->tx_reporter);
 311 }

/* [<][>][^][v][top][bottom][index][help] */