root/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. dr_parse_cqe
  2. dr_cq_poll_one
  3. dr_poll_cq
  4. dr_qp_event
  5. dr_create_rc_qp
  6. dr_destroy_qp
  7. dr_cmd_notify_hw
  8. dr_rdma_segments
  9. dr_post_send
  10. mlx5dr_send_fill_and_append_ste_send_info
  11. dr_handle_pending_wc
  12. dr_fill_data_segs
  13. dr_postsend_icm_data
  14. dr_get_tbl_copy_details
  15. mlx5dr_send_postsend_ste
  16. mlx5dr_send_postsend_htbl
  17. mlx5dr_send_postsend_formatted_htbl
  18. mlx5dr_send_postsend_action
  19. dr_modify_qp_rst2init
  20. dr_cmd_modify_qp_rtr2rts
  21. dr_cmd_modify_qp_init2rtr
  22. dr_prepare_qp_to_rts
  23. dr_cq_event
  24. dr_cq_complete
  25. dr_create_cq
  26. dr_destroy_cq
  27. dr_create_mkey
  28. dr_reg_mr
  29. dr_dereg_mr
  30. mlx5dr_send_ring_alloc
  31. mlx5dr_send_ring_free
  32. mlx5dr_send_ring_force_drain

   1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2 /* Copyright (c) 2019 Mellanox Technologies. */
   3 
   4 #include <linux/smp.h>
   5 #include "dr_types.h"
   6 
   7 #define QUEUE_SIZE 128
   8 #define SIGNAL_PER_DIV_QUEUE 16
   9 #define TH_NUMS_TO_DRAIN 2
  10 
  11 enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
  12 
  13 struct dr_data_seg {
  14         u64 addr;
  15         u32 length;
  16         u32 lkey;
  17         unsigned int send_flags;
  18 };
  19 
  20 struct postsend_info {
  21         struct dr_data_seg write;
  22         struct dr_data_seg read;
  23         u64 remote_addr;
  24         u32 rkey;
  25 };
  26 
  27 struct dr_qp_rtr_attr {
  28         struct mlx5dr_cmd_gid_attr dgid_attr;
  29         enum ib_mtu mtu;
  30         u32 qp_num;
  31         u16 port_num;
  32         u8 min_rnr_timer;
  33         u8 sgid_index;
  34         u16 udp_src_port;
  35 };
  36 
  37 struct dr_qp_rts_attr {
  38         u8 timeout;
  39         u8 retry_cnt;
  40         u8 rnr_retry;
  41 };
  42 
  43 struct dr_qp_init_attr {
  44         u32 cqn;
  45         u32 pdn;
  46         u32 max_send_wr;
  47         struct mlx5_uars_page *uar;
  48 };
  49 
  50 static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64)
  51 {
  52         unsigned int idx;
  53         u8 opcode;
  54 
  55         opcode = get_cqe_opcode(cqe64);
  56         if (opcode == MLX5_CQE_REQ_ERR) {
  57                 idx = be16_to_cpu(cqe64->wqe_counter) &
  58                         (dr_cq->qp->sq.wqe_cnt - 1);
  59                 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
  60         } else if (opcode == MLX5_CQE_RESP_ERR) {
  61                 ++dr_cq->qp->sq.cc;
  62         } else {
  63                 idx = be16_to_cpu(cqe64->wqe_counter) &
  64                         (dr_cq->qp->sq.wqe_cnt - 1);
  65                 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
  66 
  67                 return CQ_OK;
  68         }
  69 
  70         return CQ_POLL_ERR;
  71 }
  72 
  73 static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq)
  74 {
  75         struct mlx5_cqe64 *cqe64;
  76         int err;
  77 
  78         cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq);
  79         if (!cqe64)
  80                 return CQ_EMPTY;
  81 
  82         mlx5_cqwq_pop(&dr_cq->wq);
  83         err = dr_parse_cqe(dr_cq, cqe64);
  84         mlx5_cqwq_update_db_record(&dr_cq->wq);
  85 
  86         return err;
  87 }
  88 
  89 static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne)
  90 {
  91         int npolled;
  92         int err = 0;
  93 
  94         for (npolled = 0; npolled < ne; ++npolled) {
  95                 err = dr_cq_poll_one(dr_cq);
  96                 if (err != CQ_OK)
  97                         break;
  98         }
  99 
 100         return err == CQ_POLL_ERR ? err : npolled;
 101 }
 102 
 103 static void dr_qp_event(struct mlx5_core_qp *mqp, int event)
 104 {
 105         pr_info("DR QP event %u on QP #%u\n", event, mqp->qpn);
 106 }
 107 
 108 static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
 109                                          struct dr_qp_init_attr *attr)
 110 {
 111         u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {};
 112         struct mlx5_wq_param wqp;
 113         struct mlx5dr_qp *dr_qp;
 114         int inlen;
 115         void *qpc;
 116         void *in;
 117         int err;
 118 
 119         dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL);
 120         if (!dr_qp)
 121                 return NULL;
 122 
 123         wqp.buf_numa_node = mdev->priv.numa_node;
 124         wqp.db_numa_node = mdev->priv.numa_node;
 125 
 126         dr_qp->rq.pc = 0;
 127         dr_qp->rq.cc = 0;
 128         dr_qp->rq.wqe_cnt = 4;
 129         dr_qp->sq.pc = 0;
 130         dr_qp->sq.cc = 0;
 131         dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr);
 132 
 133         MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
 134         MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
 135         MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
 136         err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq,
 137                                 &dr_qp->wq_ctrl);
 138         if (err) {
 139                 mlx5_core_info(mdev, "Can't create QP WQ\n");
 140                 goto err_wq;
 141         }
 142 
 143         dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt,
 144                                      sizeof(dr_qp->sq.wqe_head[0]),
 145                                      GFP_KERNEL);
 146 
 147         if (!dr_qp->sq.wqe_head) {
 148                 mlx5_core_warn(mdev, "Can't allocate wqe head\n");
 149                 goto err_wqe_head;
 150         }
 151 
 152         inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
 153                 MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) *
 154                 dr_qp->wq_ctrl.buf.npages;
 155         in = kvzalloc(inlen, GFP_KERNEL);
 156         if (!in) {
 157                 err = -ENOMEM;
 158                 goto err_in;
 159         }
 160 
 161         qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
 162         MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
 163         MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
 164         MLX5_SET(qpc, qpc, pd, attr->pdn);
 165         MLX5_SET(qpc, qpc, uar_page, attr->uar->index);
 166         MLX5_SET(qpc, qpc, log_page_size,
 167                  dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
 168         MLX5_SET(qpc, qpc, fre, 1);
 169         MLX5_SET(qpc, qpc, rlky, 1);
 170         MLX5_SET(qpc, qpc, cqn_snd, attr->cqn);
 171         MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn);
 172         MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
 173         MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
 174         MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
 175         MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
 176         MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma);
 177         if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
 178                 MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
 179         mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf,
 180                                   (__be64 *)MLX5_ADDR_OF(create_qp_in,
 181                                                          in, pas));
 182 
 183         err = mlx5_core_create_qp(mdev, &dr_qp->mqp, in, inlen);
 184         kfree(in);
 185 
 186         if (err) {
 187                 mlx5_core_warn(mdev, " Can't create QP\n");
 188                 goto err_in;
 189         }
 190         dr_qp->mqp.event = dr_qp_event;
 191         dr_qp->uar = attr->uar;
 192 
 193         return dr_qp;
 194 
 195 err_in:
 196         kfree(dr_qp->sq.wqe_head);
 197 err_wqe_head:
 198         mlx5_wq_destroy(&dr_qp->wq_ctrl);
 199 err_wq:
 200         kfree(dr_qp);
 201         return NULL;
 202 }
 203 
 204 static void dr_destroy_qp(struct mlx5_core_dev *mdev,
 205                           struct mlx5dr_qp *dr_qp)
 206 {
 207         mlx5_core_destroy_qp(mdev, &dr_qp->mqp);
 208         kfree(dr_qp->sq.wqe_head);
 209         mlx5_wq_destroy(&dr_qp->wq_ctrl);
 210         kfree(dr_qp);
 211 }
 212 
 213 static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl)
 214 {
 215         dma_wmb();
 216         *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xfffff);
 217 
 218         /* After wmb() the hw aware of new work */
 219         wmb();
 220 
 221         mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET);
 222 }
 223 
 224 static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
 225                              u32 rkey, struct dr_data_seg *data_seg,
 226                              u32 opcode, int nreq)
 227 {
 228         struct mlx5_wqe_raddr_seg *wq_raddr;
 229         struct mlx5_wqe_ctrl_seg *wq_ctrl;
 230         struct mlx5_wqe_data_seg *wq_dseg;
 231         unsigned int size;
 232         unsigned int idx;
 233 
 234         size = sizeof(*wq_ctrl) / 16 + sizeof(*wq_dseg) / 16 +
 235                 sizeof(*wq_raddr) / 16;
 236 
 237         idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1);
 238 
 239         wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
 240         wq_ctrl->imm = 0;
 241         wq_ctrl->fm_ce_se = (data_seg->send_flags) ?
 242                 MLX5_WQE_CTRL_CQ_UPDATE : 0;
 243         wq_ctrl->opmod_idx_opcode = cpu_to_be32(((dr_qp->sq.pc & 0xffff) << 8) |
 244                                                 opcode);
 245         wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->mqp.qpn << 8);
 246         wq_raddr = (void *)(wq_ctrl + 1);
 247         wq_raddr->raddr = cpu_to_be64(remote_addr);
 248         wq_raddr->rkey = cpu_to_be32(rkey);
 249         wq_raddr->reserved = 0;
 250 
 251         wq_dseg = (void *)(wq_raddr + 1);
 252         wq_dseg->byte_count = cpu_to_be32(data_seg->length);
 253         wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
 254         wq_dseg->addr = cpu_to_be64(data_seg->addr);
 255 
 256         dr_qp->sq.wqe_head[idx] = dr_qp->sq.pc++;
 257 
 258         if (nreq)
 259                 dr_cmd_notify_hw(dr_qp, wq_ctrl);
 260 }
 261 
 262 static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info)
 263 {
 264         dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
 265                          &send_info->write, MLX5_OPCODE_RDMA_WRITE, 0);
 266         dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
 267                          &send_info->read, MLX5_OPCODE_RDMA_READ, 1);
 268 }
 269 
 270 /**
 271  * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent
 272  * with send_list parameters:
 273  *
 274  *     @ste:       The data that attached to this specific ste
 275  *     @size:      of data to write
 276  *     @offset:    of the data from start of the hw_ste entry
 277  *     @data:      data
 278  *     @ste_info:  ste to be sent with send_list
 279  *     @send_list: to append into it
 280  *     @copy_data: if true indicates that the data should be kept because
 281  *                 it's not backuped any where (like in re-hash).
 282  *                 if false, it lets the data to be updated after
 283  *                 it was added to the list.
 284  */
 285 void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size,
 286                                                u16 offset, u8 *data,
 287                                                struct mlx5dr_ste_send_info *ste_info,
 288                                                struct list_head *send_list,
 289                                                bool copy_data)
 290 {
 291         ste_info->size = size;
 292         ste_info->ste = ste;
 293         ste_info->offset = offset;
 294 
 295         if (copy_data) {
 296                 memcpy(ste_info->data_cont, data, size);
 297                 ste_info->data = ste_info->data_cont;
 298         } else {
 299                 ste_info->data = data;
 300         }
 301 
 302         list_add_tail(&ste_info->send_list, send_list);
 303 }
 304 
 305 /* The function tries to consume one wc each time, unless the queue is full, in
 306  * that case, which means that the hw is behind the sw in a full queue len
 307  * the function will drain the cq till it empty.
 308  */
 309 static int dr_handle_pending_wc(struct mlx5dr_domain *dmn,
 310                                 struct mlx5dr_send_ring *send_ring)
 311 {
 312         bool is_drain = false;
 313         int ne;
 314 
 315         if (send_ring->pending_wqe < send_ring->signal_th)
 316                 return 0;
 317 
 318         /* Queue is full start drain it */
 319         if (send_ring->pending_wqe >=
 320             dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN)
 321                 is_drain = true;
 322 
 323         do {
 324                 ne = dr_poll_cq(send_ring->cq, 1);
 325                 if (ne < 0)
 326                         return ne;
 327                 else if (ne == 1)
 328                         send_ring->pending_wqe -= send_ring->signal_th;
 329         } while (is_drain && send_ring->pending_wqe);
 330 
 331         return 0;
 332 }
 333 
 334 static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring,
 335                               struct postsend_info *send_info)
 336 {
 337         send_ring->pending_wqe++;
 338 
 339         if (send_ring->pending_wqe % send_ring->signal_th == 0)
 340                 send_info->write.send_flags |= IB_SEND_SIGNALED;
 341 
 342         send_ring->pending_wqe++;
 343         send_info->read.length = send_info->write.length;
 344         /* Read into the same write area */
 345         send_info->read.addr = (uintptr_t)send_info->write.addr;
 346         send_info->read.lkey = send_ring->mr->mkey.key;
 347 
 348         if (send_ring->pending_wqe % send_ring->signal_th == 0)
 349                 send_info->read.send_flags = IB_SEND_SIGNALED;
 350         else
 351                 send_info->read.send_flags = 0;
 352 }
 353 
 354 static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
 355                                 struct postsend_info *send_info)
 356 {
 357         struct mlx5dr_send_ring *send_ring = dmn->send_ring;
 358         u32 buff_offset;
 359         int ret;
 360 
 361         ret = dr_handle_pending_wc(dmn, send_ring);
 362         if (ret)
 363                 return ret;
 364 
 365         if (send_info->write.length > dmn->info.max_inline_size) {
 366                 buff_offset = (send_ring->tx_head &
 367                                (dmn->send_ring->signal_th - 1)) *
 368                         send_ring->max_post_send_size;
 369                 /* Copy to ring mr */
 370                 memcpy(send_ring->buf + buff_offset,
 371                        (void *)(uintptr_t)send_info->write.addr,
 372                        send_info->write.length);
 373                 send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset;
 374                 send_info->write.lkey = send_ring->mr->mkey.key;
 375         }
 376 
 377         send_ring->tx_head++;
 378         dr_fill_data_segs(send_ring, send_info);
 379         dr_post_send(send_ring->qp, send_info);
 380 
 381         return 0;
 382 }
 383 
 384 static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn,
 385                                    struct mlx5dr_ste_htbl *htbl,
 386                                    u8 **data,
 387                                    u32 *byte_size,
 388                                    int *iterations,
 389                                    int *num_stes)
 390 {
 391         int alloc_size;
 392 
 393         if (htbl->chunk->byte_size > dmn->send_ring->max_post_send_size) {
 394                 *iterations = htbl->chunk->byte_size /
 395                         dmn->send_ring->max_post_send_size;
 396                 *byte_size = dmn->send_ring->max_post_send_size;
 397                 alloc_size = *byte_size;
 398                 *num_stes = *byte_size / DR_STE_SIZE;
 399         } else {
 400                 *iterations = 1;
 401                 *num_stes = htbl->chunk->num_of_entries;
 402                 alloc_size = *num_stes * DR_STE_SIZE;
 403         }
 404 
 405         *data = kzalloc(alloc_size, GFP_KERNEL);
 406         if (!*data)
 407                 return -ENOMEM;
 408 
 409         return 0;
 410 }
 411 
 412 /**
 413  * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm.
 414  *
 415  *     @dmn:    Domain
 416  *     @ste:    The ste struct that contains the data (at
 417  *              least part of it)
 418  *     @data:   The real data to send size data
 419  *     @size:   for writing.
 420  *     @offset: The offset from the icm mapped data to
 421  *              start write to this for write only part of the
 422  *              buffer.
 423  *
 424  * Return: 0 on success.
 425  */
 426 int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste,
 427                              u8 *data, u16 size, u16 offset)
 428 {
 429         struct postsend_info send_info = {};
 430 
 431         send_info.write.addr = (uintptr_t)data;
 432         send_info.write.length = size;
 433         send_info.write.lkey = 0;
 434         send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset;
 435         send_info.rkey = ste->htbl->chunk->rkey;
 436 
 437         return dr_postsend_icm_data(dmn, &send_info);
 438 }
 439 
 440 int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
 441                               struct mlx5dr_ste_htbl *htbl,
 442                               u8 *formatted_ste, u8 *mask)
 443 {
 444         u32 byte_size = htbl->chunk->byte_size;
 445         int num_stes_per_iter;
 446         int iterations;
 447         u8 *data;
 448         int ret;
 449         int i;
 450         int j;
 451 
 452         ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
 453                                       &iterations, &num_stes_per_iter);
 454         if (ret)
 455                 return ret;
 456 
 457         /* Send the data iteration times */
 458         for (i = 0; i < iterations; i++) {
 459                 u32 ste_index = i * (byte_size / DR_STE_SIZE);
 460                 struct postsend_info send_info = {};
 461 
 462                 /* Copy all ste's on the data buffer
 463                  * need to add the bit_mask
 464                  */
 465                 for (j = 0; j < num_stes_per_iter; j++) {
 466                         u8 *hw_ste = htbl->ste_arr[ste_index + j].hw_ste;
 467                         u32 ste_off = j * DR_STE_SIZE;
 468 
 469                         if (mlx5dr_ste_is_not_valid_entry(hw_ste)) {
 470                                 memcpy(data + ste_off,
 471                                        formatted_ste, DR_STE_SIZE);
 472                         } else {
 473                                 /* Copy data */
 474                                 memcpy(data + ste_off,
 475                                        htbl->ste_arr[ste_index + j].hw_ste,
 476                                        DR_STE_SIZE_REDUCED);
 477                                 /* Copy bit_mask */
 478                                 memcpy(data + ste_off + DR_STE_SIZE_REDUCED,
 479                                        mask, DR_STE_SIZE_MASK);
 480                         }
 481                 }
 482 
 483                 send_info.write.addr = (uintptr_t)data;
 484                 send_info.write.length = byte_size;
 485                 send_info.write.lkey = 0;
 486                 send_info.remote_addr =
 487                         mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index);
 488                 send_info.rkey = htbl->chunk->rkey;
 489 
 490                 ret = dr_postsend_icm_data(dmn, &send_info);
 491                 if (ret)
 492                         goto out_free;
 493         }
 494 
 495 out_free:
 496         kfree(data);
 497         return ret;
 498 }
 499 
 500 /* Initialize htble with default STEs */
 501 int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
 502                                         struct mlx5dr_ste_htbl *htbl,
 503                                         u8 *ste_init_data,
 504                                         bool update_hw_ste)
 505 {
 506         u32 byte_size = htbl->chunk->byte_size;
 507         int iterations;
 508         int num_stes;
 509         u8 *data;
 510         int ret;
 511         int i;
 512 
 513         ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
 514                                       &iterations, &num_stes);
 515         if (ret)
 516                 return ret;
 517 
 518         for (i = 0; i < num_stes; i++) {
 519                 u8 *copy_dst;
 520 
 521                 /* Copy the same ste on the data buffer */
 522                 copy_dst = data + i * DR_STE_SIZE;
 523                 memcpy(copy_dst, ste_init_data, DR_STE_SIZE);
 524 
 525                 if (update_hw_ste) {
 526                         /* Copy the reduced ste to hash table ste_arr */
 527                         copy_dst = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED;
 528                         memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED);
 529                 }
 530         }
 531 
 532         /* Send the data iteration times */
 533         for (i = 0; i < iterations; i++) {
 534                 u8 ste_index = i * (byte_size / DR_STE_SIZE);
 535                 struct postsend_info send_info = {};
 536 
 537                 send_info.write.addr = (uintptr_t)data;
 538                 send_info.write.length = byte_size;
 539                 send_info.write.lkey = 0;
 540                 send_info.remote_addr =
 541                         mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index);
 542                 send_info.rkey = htbl->chunk->rkey;
 543 
 544                 ret = dr_postsend_icm_data(dmn, &send_info);
 545                 if (ret)
 546                         goto out_free;
 547         }
 548 
 549 out_free:
 550         kfree(data);
 551         return ret;
 552 }
 553 
 554 int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
 555                                 struct mlx5dr_action *action)
 556 {
 557         struct postsend_info send_info = {};
 558         int ret;
 559 
 560         send_info.write.addr = (uintptr_t)action->rewrite.data;
 561         send_info.write.length = action->rewrite.num_of_actions *
 562                                  DR_MODIFY_ACTION_SIZE;
 563         send_info.write.lkey = 0;
 564         send_info.remote_addr = action->rewrite.chunk->mr_addr;
 565         send_info.rkey = action->rewrite.chunk->rkey;
 566 
 567         mutex_lock(&dmn->mutex);
 568         ret = dr_postsend_icm_data(dmn, &send_info);
 569         mutex_unlock(&dmn->mutex);
 570 
 571         return ret;
 572 }
 573 
 574 static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev,
 575                                  struct mlx5dr_qp *dr_qp,
 576                                  int port)
 577 {
 578         u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {};
 579         void *qpc;
 580 
 581         qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc);
 582 
 583         MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port);
 584         MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED);
 585         MLX5_SET(qpc, qpc, rre, 1);
 586         MLX5_SET(qpc, qpc, rwe, 1);
 587 
 588         return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, qpc,
 589                                    &dr_qp->mqp);
 590 }
 591 
 592 static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev,
 593                                     struct mlx5dr_qp *dr_qp,
 594                                     struct dr_qp_rts_attr *attr)
 595 {
 596         u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {};
 597         void *qpc;
 598 
 599         qpc  = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc);
 600 
 601         MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->mqp.qpn);
 602 
 603         MLX5_SET(qpc, qpc, log_ack_req_freq, 0);
 604         MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt);
 605         MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry);
 606 
 607         return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, qpc,
 608                                    &dr_qp->mqp);
 609 }
 610 
 611 static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev,
 612                                      struct mlx5dr_qp *dr_qp,
 613                                      struct dr_qp_rtr_attr *attr)
 614 {
 615         u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {};
 616         void *qpc;
 617 
 618         qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc);
 619 
 620         MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->mqp.qpn);
 621 
 622         MLX5_SET(qpc, qpc, mtu, attr->mtu);
 623         MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1);
 624         MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num);
 625         memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32),
 626                attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac));
 627         memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip),
 628                attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid));
 629         MLX5_SET(qpc, qpc, primary_address_path.src_addr_index,
 630                  attr->sgid_index);
 631 
 632         if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2)
 633                 MLX5_SET(qpc, qpc, primary_address_path.udp_sport,
 634                          attr->udp_src_port);
 635 
 636         MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num);
 637         MLX5_SET(qpc, qpc, min_rnr_nak, 1);
 638 
 639         return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, qpc,
 640                                    &dr_qp->mqp);
 641 }
 642 
 643 static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn)
 644 {
 645         struct mlx5dr_qp *dr_qp = dmn->send_ring->qp;
 646         struct dr_qp_rts_attr rts_attr = {};
 647         struct dr_qp_rtr_attr rtr_attr = {};
 648         enum ib_mtu mtu = IB_MTU_1024;
 649         u16 gid_index = 0;
 650         int port = 1;
 651         int ret;
 652 
 653         /* Init */
 654         ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port);
 655         if (ret)
 656                 return ret;
 657 
 658         /* RTR */
 659         ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, &rtr_attr.dgid_attr);
 660         if (ret)
 661                 return ret;
 662 
 663         rtr_attr.mtu            = mtu;
 664         rtr_attr.qp_num         = dr_qp->mqp.qpn;
 665         rtr_attr.min_rnr_timer  = 12;
 666         rtr_attr.port_num       = port;
 667         rtr_attr.sgid_index     = gid_index;
 668         rtr_attr.udp_src_port   = dmn->info.caps.roce_min_src_udp;
 669 
 670         ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr);
 671         if (ret)
 672                 return ret;
 673 
 674         /* RTS */
 675         rts_attr.timeout        = 14;
 676         rts_attr.retry_cnt      = 7;
 677         rts_attr.rnr_retry      = 7;
 678 
 679         ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr);
 680         if (ret)
 681                 return ret;
 682 
 683         return 0;
 684 }
 685 
 686 static void dr_cq_event(struct mlx5_core_cq *mcq,
 687                         enum mlx5_event event)
 688 {
 689         pr_info("CQ event %u on CQ #%u\n", event, mcq->cqn);
 690 }
 691 
 692 static void dr_cq_complete(struct mlx5_core_cq *mcq,
 693                            struct mlx5_eqe *eqe)
 694 {
 695         pr_err("CQ completion CQ: #%u\n", mcq->cqn);
 696 }
 697 
 698 static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
 699                                       struct mlx5_uars_page *uar,
 700                                       size_t ncqe)
 701 {
 702         u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {};
 703         u32 out[MLX5_ST_SZ_DW(create_cq_out)];
 704         struct mlx5_wq_param wqp;
 705         struct mlx5_cqe64 *cqe;
 706         struct mlx5dr_cq *cq;
 707         int inlen, err, eqn;
 708         unsigned int irqn;
 709         void *cqc, *in;
 710         __be64 *pas;
 711         int vector;
 712         u32 i;
 713 
 714         cq = kzalloc(sizeof(*cq), GFP_KERNEL);
 715         if (!cq)
 716                 return NULL;
 717 
 718         ncqe = roundup_pow_of_two(ncqe);
 719         MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe));
 720 
 721         wqp.buf_numa_node = mdev->priv.numa_node;
 722         wqp.db_numa_node = mdev->priv.numa_node;
 723 
 724         err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq,
 725                                &cq->wq_ctrl);
 726         if (err)
 727                 goto out;
 728 
 729         for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
 730                 cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
 731                 cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK;
 732         }
 733 
 734         inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
 735                 sizeof(u64) * cq->wq_ctrl.buf.npages;
 736         in = kvzalloc(inlen, GFP_KERNEL);
 737         if (!in)
 738                 goto err_cqwq;
 739 
 740         vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev);
 741         err = mlx5_vector2eqn(mdev, vector, &eqn, &irqn);
 742         if (err) {
 743                 kvfree(in);
 744                 goto err_cqwq;
 745         }
 746 
 747         cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
 748         MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe));
 749         MLX5_SET(cqc, cqc, c_eqn, eqn);
 750         MLX5_SET(cqc, cqc, uar_page, uar->index);
 751         MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
 752                  MLX5_ADAPTER_PAGE_SHIFT);
 753         MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
 754 
 755         pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
 756         mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas);
 757 
 758         cq->mcq.event = dr_cq_event;
 759         cq->mcq.comp  = dr_cq_complete;
 760 
 761         err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out));
 762         kvfree(in);
 763 
 764         if (err)
 765                 goto err_cqwq;
 766 
 767         cq->mcq.cqe_sz = 64;
 768         cq->mcq.set_ci_db = cq->wq_ctrl.db.db;
 769         cq->mcq.arm_db = cq->wq_ctrl.db.db + 1;
 770         *cq->mcq.set_ci_db = 0;
 771 
 772         /* set no-zero value, in order to avoid the HW to run db-recovery on
 773          * CQ that used in polling mode.
 774          */
 775         *cq->mcq.arm_db = cpu_to_be32(2 << 28);
 776 
 777         cq->mcq.vector = 0;
 778         cq->mcq.irqn = irqn;
 779         cq->mcq.uar = uar;
 780 
 781         return cq;
 782 
 783 err_cqwq:
 784         mlx5_wq_destroy(&cq->wq_ctrl);
 785 out:
 786         kfree(cq);
 787         return NULL;
 788 }
 789 
 790 static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq)
 791 {
 792         mlx5_core_destroy_cq(mdev, &cq->mcq);
 793         mlx5_wq_destroy(&cq->wq_ctrl);
 794         kfree(cq);
 795 }
 796 
 797 static int
 798 dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, struct mlx5_core_mkey *mkey)
 799 {
 800         u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {};
 801         void *mkc;
 802 
 803         mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 804         MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
 805         MLX5_SET(mkc, mkc, a, 1);
 806         MLX5_SET(mkc, mkc, rw, 1);
 807         MLX5_SET(mkc, mkc, rr, 1);
 808         MLX5_SET(mkc, mkc, lw, 1);
 809         MLX5_SET(mkc, mkc, lr, 1);
 810 
 811         MLX5_SET(mkc, mkc, pd, pdn);
 812         MLX5_SET(mkc, mkc, length64, 1);
 813         MLX5_SET(mkc, mkc, qpn, 0xffffff);
 814 
 815         return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in));
 816 }
 817 
 818 static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev,
 819                                    u32 pdn, void *buf, size_t size)
 820 {
 821         struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 822         struct device *dma_device;
 823         dma_addr_t dma_addr;
 824         int err;
 825 
 826         if (!mr)
 827                 return NULL;
 828 
 829         dma_device = &mdev->pdev->dev;
 830         dma_addr = dma_map_single(dma_device, buf, size,
 831                                   DMA_BIDIRECTIONAL);
 832         err = dma_mapping_error(dma_device, dma_addr);
 833         if (err) {
 834                 mlx5_core_warn(mdev, "Can't dma buf\n");
 835                 kfree(mr);
 836                 return NULL;
 837         }
 838 
 839         err = dr_create_mkey(mdev, pdn, &mr->mkey);
 840         if (err) {
 841                 mlx5_core_warn(mdev, "Can't create mkey\n");
 842                 dma_unmap_single(dma_device, dma_addr, size,
 843                                  DMA_BIDIRECTIONAL);
 844                 kfree(mr);
 845                 return NULL;
 846         }
 847 
 848         mr->dma_addr = dma_addr;
 849         mr->size = size;
 850         mr->addr = buf;
 851 
 852         return mr;
 853 }
 854 
 855 static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr)
 856 {
 857         mlx5_core_destroy_mkey(mdev, &mr->mkey);
 858         dma_unmap_single(&mdev->pdev->dev, mr->dma_addr, mr->size,
 859                          DMA_BIDIRECTIONAL);
 860         kfree(mr);
 861 }
 862 
 863 int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn)
 864 {
 865         struct dr_qp_init_attr init_attr = {};
 866         int cq_size;
 867         int size;
 868         int ret;
 869 
 870         dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL);
 871         if (!dmn->send_ring)
 872                 return -ENOMEM;
 873 
 874         cq_size = QUEUE_SIZE + 1;
 875         dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size);
 876         if (!dmn->send_ring->cq) {
 877                 ret = -ENOMEM;
 878                 goto free_send_ring;
 879         }
 880 
 881         init_attr.cqn = dmn->send_ring->cq->mcq.cqn;
 882         init_attr.pdn = dmn->pdn;
 883         init_attr.uar = dmn->uar;
 884         init_attr.max_send_wr = QUEUE_SIZE;
 885 
 886         dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr);
 887         if (!dmn->send_ring->qp)  {
 888                 ret = -ENOMEM;
 889                 goto clean_cq;
 890         }
 891 
 892         dmn->send_ring->cq->qp = dmn->send_ring->qp;
 893 
 894         dmn->info.max_send_wr = QUEUE_SIZE;
 895         dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data,
 896                                         DR_STE_SIZE);
 897 
 898         dmn->send_ring->signal_th = dmn->info.max_send_wr /
 899                 SIGNAL_PER_DIV_QUEUE;
 900 
 901         /* Prepare qp to be used */
 902         ret = dr_prepare_qp_to_rts(dmn);
 903         if (ret)
 904                 goto clean_qp;
 905 
 906         dmn->send_ring->max_post_send_size =
 907                 mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K,
 908                                                    DR_ICM_TYPE_STE);
 909 
 910         /* Allocating the max size as a buffer for writing */
 911         size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size;
 912         dmn->send_ring->buf = kzalloc(size, GFP_KERNEL);
 913         if (!dmn->send_ring->buf) {
 914                 ret = -ENOMEM;
 915                 goto clean_qp;
 916         }
 917 
 918         dmn->send_ring->buf_size = size;
 919 
 920         dmn->send_ring->mr = dr_reg_mr(dmn->mdev,
 921                                        dmn->pdn, dmn->send_ring->buf, size);
 922         if (!dmn->send_ring->mr) {
 923                 ret = -ENOMEM;
 924                 goto free_mem;
 925         }
 926 
 927         dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev,
 928                                             dmn->pdn, dmn->send_ring->sync_buff,
 929                                             MIN_READ_SYNC);
 930         if (!dmn->send_ring->sync_mr) {
 931                 ret = -ENOMEM;
 932                 goto clean_mr;
 933         }
 934 
 935         return 0;
 936 
 937 clean_mr:
 938         dr_dereg_mr(dmn->mdev, dmn->send_ring->mr);
 939 free_mem:
 940         kfree(dmn->send_ring->buf);
 941 clean_qp:
 942         dr_destroy_qp(dmn->mdev, dmn->send_ring->qp);
 943 clean_cq:
 944         dr_destroy_cq(dmn->mdev, dmn->send_ring->cq);
 945 free_send_ring:
 946         kfree(dmn->send_ring);
 947 
 948         return ret;
 949 }
 950 
 951 void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn,
 952                            struct mlx5dr_send_ring *send_ring)
 953 {
 954         dr_destroy_qp(dmn->mdev, send_ring->qp);
 955         dr_destroy_cq(dmn->mdev, send_ring->cq);
 956         dr_dereg_mr(dmn->mdev, send_ring->sync_mr);
 957         dr_dereg_mr(dmn->mdev, send_ring->mr);
 958         kfree(send_ring->buf);
 959         kfree(send_ring);
 960 }
 961 
 962 int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn)
 963 {
 964         struct mlx5dr_send_ring *send_ring = dmn->send_ring;
 965         struct postsend_info send_info = {};
 966         u8 data[DR_STE_SIZE];
 967         int num_of_sends_req;
 968         int ret;
 969         int i;
 970 
 971         /* Sending this amount of requests makes sure we will get drain */
 972         num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2;
 973 
 974         /* Send fake requests forcing the last to be signaled */
 975         send_info.write.addr = (uintptr_t)data;
 976         send_info.write.length = DR_STE_SIZE;
 977         send_info.write.lkey = 0;
 978         /* Using the sync_mr in order to write/read */
 979         send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr;
 980         send_info.rkey = send_ring->sync_mr->mkey.key;
 981 
 982         for (i = 0; i < num_of_sends_req; i++) {
 983                 ret = dr_postsend_icm_data(dmn, &send_info);
 984                 if (ret)
 985                         return ret;
 986         }
 987 
 988         ret = dr_handle_pending_wc(dmn, send_ring);
 989 
 990         return ret;
 991 }

/* [<][>][^][v][top][bottom][index][help] */