This source file includes following definitions.
- dr_parse_cqe
- dr_cq_poll_one
- dr_poll_cq
- dr_qp_event
- dr_create_rc_qp
- dr_destroy_qp
- dr_cmd_notify_hw
- dr_rdma_segments
- dr_post_send
- mlx5dr_send_fill_and_append_ste_send_info
- dr_handle_pending_wc
- dr_fill_data_segs
- dr_postsend_icm_data
- dr_get_tbl_copy_details
- mlx5dr_send_postsend_ste
- mlx5dr_send_postsend_htbl
- mlx5dr_send_postsend_formatted_htbl
- mlx5dr_send_postsend_action
- dr_modify_qp_rst2init
- dr_cmd_modify_qp_rtr2rts
- dr_cmd_modify_qp_init2rtr
- dr_prepare_qp_to_rts
- dr_cq_event
- dr_cq_complete
- dr_create_cq
- dr_destroy_cq
- dr_create_mkey
- dr_reg_mr
- dr_dereg_mr
- mlx5dr_send_ring_alloc
- mlx5dr_send_ring_free
- mlx5dr_send_ring_force_drain
1
2
3
4 #include <linux/smp.h>
5 #include "dr_types.h"
6
7 #define QUEUE_SIZE 128
8 #define SIGNAL_PER_DIV_QUEUE 16
9 #define TH_NUMS_TO_DRAIN 2
10
11 enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
12
13 struct dr_data_seg {
14 u64 addr;
15 u32 length;
16 u32 lkey;
17 unsigned int send_flags;
18 };
19
20 struct postsend_info {
21 struct dr_data_seg write;
22 struct dr_data_seg read;
23 u64 remote_addr;
24 u32 rkey;
25 };
26
27 struct dr_qp_rtr_attr {
28 struct mlx5dr_cmd_gid_attr dgid_attr;
29 enum ib_mtu mtu;
30 u32 qp_num;
31 u16 port_num;
32 u8 min_rnr_timer;
33 u8 sgid_index;
34 u16 udp_src_port;
35 };
36
37 struct dr_qp_rts_attr {
38 u8 timeout;
39 u8 retry_cnt;
40 u8 rnr_retry;
41 };
42
43 struct dr_qp_init_attr {
44 u32 cqn;
45 u32 pdn;
46 u32 max_send_wr;
47 struct mlx5_uars_page *uar;
48 };
49
50 static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64)
51 {
52 unsigned int idx;
53 u8 opcode;
54
55 opcode = get_cqe_opcode(cqe64);
56 if (opcode == MLX5_CQE_REQ_ERR) {
57 idx = be16_to_cpu(cqe64->wqe_counter) &
58 (dr_cq->qp->sq.wqe_cnt - 1);
59 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
60 } else if (opcode == MLX5_CQE_RESP_ERR) {
61 ++dr_cq->qp->sq.cc;
62 } else {
63 idx = be16_to_cpu(cqe64->wqe_counter) &
64 (dr_cq->qp->sq.wqe_cnt - 1);
65 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
66
67 return CQ_OK;
68 }
69
70 return CQ_POLL_ERR;
71 }
72
73 static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq)
74 {
75 struct mlx5_cqe64 *cqe64;
76 int err;
77
78 cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq);
79 if (!cqe64)
80 return CQ_EMPTY;
81
82 mlx5_cqwq_pop(&dr_cq->wq);
83 err = dr_parse_cqe(dr_cq, cqe64);
84 mlx5_cqwq_update_db_record(&dr_cq->wq);
85
86 return err;
87 }
88
89 static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne)
90 {
91 int npolled;
92 int err = 0;
93
94 for (npolled = 0; npolled < ne; ++npolled) {
95 err = dr_cq_poll_one(dr_cq);
96 if (err != CQ_OK)
97 break;
98 }
99
100 return err == CQ_POLL_ERR ? err : npolled;
101 }
102
103 static void dr_qp_event(struct mlx5_core_qp *mqp, int event)
104 {
105 pr_info("DR QP event %u on QP #%u\n", event, mqp->qpn);
106 }
107
108 static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
109 struct dr_qp_init_attr *attr)
110 {
111 u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {};
112 struct mlx5_wq_param wqp;
113 struct mlx5dr_qp *dr_qp;
114 int inlen;
115 void *qpc;
116 void *in;
117 int err;
118
119 dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL);
120 if (!dr_qp)
121 return NULL;
122
123 wqp.buf_numa_node = mdev->priv.numa_node;
124 wqp.db_numa_node = mdev->priv.numa_node;
125
126 dr_qp->rq.pc = 0;
127 dr_qp->rq.cc = 0;
128 dr_qp->rq.wqe_cnt = 4;
129 dr_qp->sq.pc = 0;
130 dr_qp->sq.cc = 0;
131 dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr);
132
133 MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
134 MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
135 MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
136 err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq,
137 &dr_qp->wq_ctrl);
138 if (err) {
139 mlx5_core_info(mdev, "Can't create QP WQ\n");
140 goto err_wq;
141 }
142
143 dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt,
144 sizeof(dr_qp->sq.wqe_head[0]),
145 GFP_KERNEL);
146
147 if (!dr_qp->sq.wqe_head) {
148 mlx5_core_warn(mdev, "Can't allocate wqe head\n");
149 goto err_wqe_head;
150 }
151
152 inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
153 MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) *
154 dr_qp->wq_ctrl.buf.npages;
155 in = kvzalloc(inlen, GFP_KERNEL);
156 if (!in) {
157 err = -ENOMEM;
158 goto err_in;
159 }
160
161 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
162 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
163 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
164 MLX5_SET(qpc, qpc, pd, attr->pdn);
165 MLX5_SET(qpc, qpc, uar_page, attr->uar->index);
166 MLX5_SET(qpc, qpc, log_page_size,
167 dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
168 MLX5_SET(qpc, qpc, fre, 1);
169 MLX5_SET(qpc, qpc, rlky, 1);
170 MLX5_SET(qpc, qpc, cqn_snd, attr->cqn);
171 MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn);
172 MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
173 MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
174 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
175 MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
176 MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma);
177 if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
178 MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
179 mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf,
180 (__be64 *)MLX5_ADDR_OF(create_qp_in,
181 in, pas));
182
183 err = mlx5_core_create_qp(mdev, &dr_qp->mqp, in, inlen);
184 kfree(in);
185
186 if (err) {
187 mlx5_core_warn(mdev, " Can't create QP\n");
188 goto err_in;
189 }
190 dr_qp->mqp.event = dr_qp_event;
191 dr_qp->uar = attr->uar;
192
193 return dr_qp;
194
195 err_in:
196 kfree(dr_qp->sq.wqe_head);
197 err_wqe_head:
198 mlx5_wq_destroy(&dr_qp->wq_ctrl);
199 err_wq:
200 kfree(dr_qp);
201 return NULL;
202 }
203
204 static void dr_destroy_qp(struct mlx5_core_dev *mdev,
205 struct mlx5dr_qp *dr_qp)
206 {
207 mlx5_core_destroy_qp(mdev, &dr_qp->mqp);
208 kfree(dr_qp->sq.wqe_head);
209 mlx5_wq_destroy(&dr_qp->wq_ctrl);
210 kfree(dr_qp);
211 }
212
213 static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl)
214 {
215 dma_wmb();
216 *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xfffff);
217
218
219 wmb();
220
221 mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET);
222 }
223
224 static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
225 u32 rkey, struct dr_data_seg *data_seg,
226 u32 opcode, int nreq)
227 {
228 struct mlx5_wqe_raddr_seg *wq_raddr;
229 struct mlx5_wqe_ctrl_seg *wq_ctrl;
230 struct mlx5_wqe_data_seg *wq_dseg;
231 unsigned int size;
232 unsigned int idx;
233
234 size = sizeof(*wq_ctrl) / 16 + sizeof(*wq_dseg) / 16 +
235 sizeof(*wq_raddr) / 16;
236
237 idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1);
238
239 wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
240 wq_ctrl->imm = 0;
241 wq_ctrl->fm_ce_se = (data_seg->send_flags) ?
242 MLX5_WQE_CTRL_CQ_UPDATE : 0;
243 wq_ctrl->opmod_idx_opcode = cpu_to_be32(((dr_qp->sq.pc & 0xffff) << 8) |
244 opcode);
245 wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->mqp.qpn << 8);
246 wq_raddr = (void *)(wq_ctrl + 1);
247 wq_raddr->raddr = cpu_to_be64(remote_addr);
248 wq_raddr->rkey = cpu_to_be32(rkey);
249 wq_raddr->reserved = 0;
250
251 wq_dseg = (void *)(wq_raddr + 1);
252 wq_dseg->byte_count = cpu_to_be32(data_seg->length);
253 wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
254 wq_dseg->addr = cpu_to_be64(data_seg->addr);
255
256 dr_qp->sq.wqe_head[idx] = dr_qp->sq.pc++;
257
258 if (nreq)
259 dr_cmd_notify_hw(dr_qp, wq_ctrl);
260 }
261
262 static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info)
263 {
264 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
265 &send_info->write, MLX5_OPCODE_RDMA_WRITE, 0);
266 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
267 &send_info->read, MLX5_OPCODE_RDMA_READ, 1);
268 }
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285 void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size,
286 u16 offset, u8 *data,
287 struct mlx5dr_ste_send_info *ste_info,
288 struct list_head *send_list,
289 bool copy_data)
290 {
291 ste_info->size = size;
292 ste_info->ste = ste;
293 ste_info->offset = offset;
294
295 if (copy_data) {
296 memcpy(ste_info->data_cont, data, size);
297 ste_info->data = ste_info->data_cont;
298 } else {
299 ste_info->data = data;
300 }
301
302 list_add_tail(&ste_info->send_list, send_list);
303 }
304
305
306
307
308
309 static int dr_handle_pending_wc(struct mlx5dr_domain *dmn,
310 struct mlx5dr_send_ring *send_ring)
311 {
312 bool is_drain = false;
313 int ne;
314
315 if (send_ring->pending_wqe < send_ring->signal_th)
316 return 0;
317
318
319 if (send_ring->pending_wqe >=
320 dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN)
321 is_drain = true;
322
323 do {
324 ne = dr_poll_cq(send_ring->cq, 1);
325 if (ne < 0)
326 return ne;
327 else if (ne == 1)
328 send_ring->pending_wqe -= send_ring->signal_th;
329 } while (is_drain && send_ring->pending_wqe);
330
331 return 0;
332 }
333
334 static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring,
335 struct postsend_info *send_info)
336 {
337 send_ring->pending_wqe++;
338
339 if (send_ring->pending_wqe % send_ring->signal_th == 0)
340 send_info->write.send_flags |= IB_SEND_SIGNALED;
341
342 send_ring->pending_wqe++;
343 send_info->read.length = send_info->write.length;
344
345 send_info->read.addr = (uintptr_t)send_info->write.addr;
346 send_info->read.lkey = send_ring->mr->mkey.key;
347
348 if (send_ring->pending_wqe % send_ring->signal_th == 0)
349 send_info->read.send_flags = IB_SEND_SIGNALED;
350 else
351 send_info->read.send_flags = 0;
352 }
353
354 static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
355 struct postsend_info *send_info)
356 {
357 struct mlx5dr_send_ring *send_ring = dmn->send_ring;
358 u32 buff_offset;
359 int ret;
360
361 ret = dr_handle_pending_wc(dmn, send_ring);
362 if (ret)
363 return ret;
364
365 if (send_info->write.length > dmn->info.max_inline_size) {
366 buff_offset = (send_ring->tx_head &
367 (dmn->send_ring->signal_th - 1)) *
368 send_ring->max_post_send_size;
369
370 memcpy(send_ring->buf + buff_offset,
371 (void *)(uintptr_t)send_info->write.addr,
372 send_info->write.length);
373 send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset;
374 send_info->write.lkey = send_ring->mr->mkey.key;
375 }
376
377 send_ring->tx_head++;
378 dr_fill_data_segs(send_ring, send_info);
379 dr_post_send(send_ring->qp, send_info);
380
381 return 0;
382 }
383
384 static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn,
385 struct mlx5dr_ste_htbl *htbl,
386 u8 **data,
387 u32 *byte_size,
388 int *iterations,
389 int *num_stes)
390 {
391 int alloc_size;
392
393 if (htbl->chunk->byte_size > dmn->send_ring->max_post_send_size) {
394 *iterations = htbl->chunk->byte_size /
395 dmn->send_ring->max_post_send_size;
396 *byte_size = dmn->send_ring->max_post_send_size;
397 alloc_size = *byte_size;
398 *num_stes = *byte_size / DR_STE_SIZE;
399 } else {
400 *iterations = 1;
401 *num_stes = htbl->chunk->num_of_entries;
402 alloc_size = *num_stes * DR_STE_SIZE;
403 }
404
405 *data = kzalloc(alloc_size, GFP_KERNEL);
406 if (!*data)
407 return -ENOMEM;
408
409 return 0;
410 }
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426 int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste,
427 u8 *data, u16 size, u16 offset)
428 {
429 struct postsend_info send_info = {};
430
431 send_info.write.addr = (uintptr_t)data;
432 send_info.write.length = size;
433 send_info.write.lkey = 0;
434 send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset;
435 send_info.rkey = ste->htbl->chunk->rkey;
436
437 return dr_postsend_icm_data(dmn, &send_info);
438 }
439
440 int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
441 struct mlx5dr_ste_htbl *htbl,
442 u8 *formatted_ste, u8 *mask)
443 {
444 u32 byte_size = htbl->chunk->byte_size;
445 int num_stes_per_iter;
446 int iterations;
447 u8 *data;
448 int ret;
449 int i;
450 int j;
451
452 ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
453 &iterations, &num_stes_per_iter);
454 if (ret)
455 return ret;
456
457
458 for (i = 0; i < iterations; i++) {
459 u32 ste_index = i * (byte_size / DR_STE_SIZE);
460 struct postsend_info send_info = {};
461
462
463
464
465 for (j = 0; j < num_stes_per_iter; j++) {
466 u8 *hw_ste = htbl->ste_arr[ste_index + j].hw_ste;
467 u32 ste_off = j * DR_STE_SIZE;
468
469 if (mlx5dr_ste_is_not_valid_entry(hw_ste)) {
470 memcpy(data + ste_off,
471 formatted_ste, DR_STE_SIZE);
472 } else {
473
474 memcpy(data + ste_off,
475 htbl->ste_arr[ste_index + j].hw_ste,
476 DR_STE_SIZE_REDUCED);
477
478 memcpy(data + ste_off + DR_STE_SIZE_REDUCED,
479 mask, DR_STE_SIZE_MASK);
480 }
481 }
482
483 send_info.write.addr = (uintptr_t)data;
484 send_info.write.length = byte_size;
485 send_info.write.lkey = 0;
486 send_info.remote_addr =
487 mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index);
488 send_info.rkey = htbl->chunk->rkey;
489
490 ret = dr_postsend_icm_data(dmn, &send_info);
491 if (ret)
492 goto out_free;
493 }
494
495 out_free:
496 kfree(data);
497 return ret;
498 }
499
500
501 int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
502 struct mlx5dr_ste_htbl *htbl,
503 u8 *ste_init_data,
504 bool update_hw_ste)
505 {
506 u32 byte_size = htbl->chunk->byte_size;
507 int iterations;
508 int num_stes;
509 u8 *data;
510 int ret;
511 int i;
512
513 ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
514 &iterations, &num_stes);
515 if (ret)
516 return ret;
517
518 for (i = 0; i < num_stes; i++) {
519 u8 *copy_dst;
520
521
522 copy_dst = data + i * DR_STE_SIZE;
523 memcpy(copy_dst, ste_init_data, DR_STE_SIZE);
524
525 if (update_hw_ste) {
526
527 copy_dst = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED;
528 memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED);
529 }
530 }
531
532
533 for (i = 0; i < iterations; i++) {
534 u8 ste_index = i * (byte_size / DR_STE_SIZE);
535 struct postsend_info send_info = {};
536
537 send_info.write.addr = (uintptr_t)data;
538 send_info.write.length = byte_size;
539 send_info.write.lkey = 0;
540 send_info.remote_addr =
541 mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index);
542 send_info.rkey = htbl->chunk->rkey;
543
544 ret = dr_postsend_icm_data(dmn, &send_info);
545 if (ret)
546 goto out_free;
547 }
548
549 out_free:
550 kfree(data);
551 return ret;
552 }
553
554 int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
555 struct mlx5dr_action *action)
556 {
557 struct postsend_info send_info = {};
558 int ret;
559
560 send_info.write.addr = (uintptr_t)action->rewrite.data;
561 send_info.write.length = action->rewrite.num_of_actions *
562 DR_MODIFY_ACTION_SIZE;
563 send_info.write.lkey = 0;
564 send_info.remote_addr = action->rewrite.chunk->mr_addr;
565 send_info.rkey = action->rewrite.chunk->rkey;
566
567 mutex_lock(&dmn->mutex);
568 ret = dr_postsend_icm_data(dmn, &send_info);
569 mutex_unlock(&dmn->mutex);
570
571 return ret;
572 }
573
574 static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev,
575 struct mlx5dr_qp *dr_qp,
576 int port)
577 {
578 u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {};
579 void *qpc;
580
581 qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc);
582
583 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port);
584 MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED);
585 MLX5_SET(qpc, qpc, rre, 1);
586 MLX5_SET(qpc, qpc, rwe, 1);
587
588 return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, qpc,
589 &dr_qp->mqp);
590 }
591
592 static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev,
593 struct mlx5dr_qp *dr_qp,
594 struct dr_qp_rts_attr *attr)
595 {
596 u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {};
597 void *qpc;
598
599 qpc = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc);
600
601 MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->mqp.qpn);
602
603 MLX5_SET(qpc, qpc, log_ack_req_freq, 0);
604 MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt);
605 MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry);
606
607 return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, qpc,
608 &dr_qp->mqp);
609 }
610
611 static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev,
612 struct mlx5dr_qp *dr_qp,
613 struct dr_qp_rtr_attr *attr)
614 {
615 u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {};
616 void *qpc;
617
618 qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc);
619
620 MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->mqp.qpn);
621
622 MLX5_SET(qpc, qpc, mtu, attr->mtu);
623 MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1);
624 MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num);
625 memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32),
626 attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac));
627 memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip),
628 attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid));
629 MLX5_SET(qpc, qpc, primary_address_path.src_addr_index,
630 attr->sgid_index);
631
632 if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2)
633 MLX5_SET(qpc, qpc, primary_address_path.udp_sport,
634 attr->udp_src_port);
635
636 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num);
637 MLX5_SET(qpc, qpc, min_rnr_nak, 1);
638
639 return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, qpc,
640 &dr_qp->mqp);
641 }
642
643 static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn)
644 {
645 struct mlx5dr_qp *dr_qp = dmn->send_ring->qp;
646 struct dr_qp_rts_attr rts_attr = {};
647 struct dr_qp_rtr_attr rtr_attr = {};
648 enum ib_mtu mtu = IB_MTU_1024;
649 u16 gid_index = 0;
650 int port = 1;
651 int ret;
652
653
654 ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port);
655 if (ret)
656 return ret;
657
658
659 ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, &rtr_attr.dgid_attr);
660 if (ret)
661 return ret;
662
663 rtr_attr.mtu = mtu;
664 rtr_attr.qp_num = dr_qp->mqp.qpn;
665 rtr_attr.min_rnr_timer = 12;
666 rtr_attr.port_num = port;
667 rtr_attr.sgid_index = gid_index;
668 rtr_attr.udp_src_port = dmn->info.caps.roce_min_src_udp;
669
670 ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr);
671 if (ret)
672 return ret;
673
674
675 rts_attr.timeout = 14;
676 rts_attr.retry_cnt = 7;
677 rts_attr.rnr_retry = 7;
678
679 ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr);
680 if (ret)
681 return ret;
682
683 return 0;
684 }
685
686 static void dr_cq_event(struct mlx5_core_cq *mcq,
687 enum mlx5_event event)
688 {
689 pr_info("CQ event %u on CQ #%u\n", event, mcq->cqn);
690 }
691
692 static void dr_cq_complete(struct mlx5_core_cq *mcq,
693 struct mlx5_eqe *eqe)
694 {
695 pr_err("CQ completion CQ: #%u\n", mcq->cqn);
696 }
697
698 static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
699 struct mlx5_uars_page *uar,
700 size_t ncqe)
701 {
702 u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {};
703 u32 out[MLX5_ST_SZ_DW(create_cq_out)];
704 struct mlx5_wq_param wqp;
705 struct mlx5_cqe64 *cqe;
706 struct mlx5dr_cq *cq;
707 int inlen, err, eqn;
708 unsigned int irqn;
709 void *cqc, *in;
710 __be64 *pas;
711 int vector;
712 u32 i;
713
714 cq = kzalloc(sizeof(*cq), GFP_KERNEL);
715 if (!cq)
716 return NULL;
717
718 ncqe = roundup_pow_of_two(ncqe);
719 MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe));
720
721 wqp.buf_numa_node = mdev->priv.numa_node;
722 wqp.db_numa_node = mdev->priv.numa_node;
723
724 err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq,
725 &cq->wq_ctrl);
726 if (err)
727 goto out;
728
729 for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
730 cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
731 cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK;
732 }
733
734 inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
735 sizeof(u64) * cq->wq_ctrl.buf.npages;
736 in = kvzalloc(inlen, GFP_KERNEL);
737 if (!in)
738 goto err_cqwq;
739
740 vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev);
741 err = mlx5_vector2eqn(mdev, vector, &eqn, &irqn);
742 if (err) {
743 kvfree(in);
744 goto err_cqwq;
745 }
746
747 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
748 MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe));
749 MLX5_SET(cqc, cqc, c_eqn, eqn);
750 MLX5_SET(cqc, cqc, uar_page, uar->index);
751 MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
752 MLX5_ADAPTER_PAGE_SHIFT);
753 MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
754
755 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
756 mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas);
757
758 cq->mcq.event = dr_cq_event;
759 cq->mcq.comp = dr_cq_complete;
760
761 err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out));
762 kvfree(in);
763
764 if (err)
765 goto err_cqwq;
766
767 cq->mcq.cqe_sz = 64;
768 cq->mcq.set_ci_db = cq->wq_ctrl.db.db;
769 cq->mcq.arm_db = cq->wq_ctrl.db.db + 1;
770 *cq->mcq.set_ci_db = 0;
771
772
773
774
775 *cq->mcq.arm_db = cpu_to_be32(2 << 28);
776
777 cq->mcq.vector = 0;
778 cq->mcq.irqn = irqn;
779 cq->mcq.uar = uar;
780
781 return cq;
782
783 err_cqwq:
784 mlx5_wq_destroy(&cq->wq_ctrl);
785 out:
786 kfree(cq);
787 return NULL;
788 }
789
790 static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq)
791 {
792 mlx5_core_destroy_cq(mdev, &cq->mcq);
793 mlx5_wq_destroy(&cq->wq_ctrl);
794 kfree(cq);
795 }
796
797 static int
798 dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, struct mlx5_core_mkey *mkey)
799 {
800 u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {};
801 void *mkc;
802
803 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
804 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
805 MLX5_SET(mkc, mkc, a, 1);
806 MLX5_SET(mkc, mkc, rw, 1);
807 MLX5_SET(mkc, mkc, rr, 1);
808 MLX5_SET(mkc, mkc, lw, 1);
809 MLX5_SET(mkc, mkc, lr, 1);
810
811 MLX5_SET(mkc, mkc, pd, pdn);
812 MLX5_SET(mkc, mkc, length64, 1);
813 MLX5_SET(mkc, mkc, qpn, 0xffffff);
814
815 return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in));
816 }
817
818 static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev,
819 u32 pdn, void *buf, size_t size)
820 {
821 struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL);
822 struct device *dma_device;
823 dma_addr_t dma_addr;
824 int err;
825
826 if (!mr)
827 return NULL;
828
829 dma_device = &mdev->pdev->dev;
830 dma_addr = dma_map_single(dma_device, buf, size,
831 DMA_BIDIRECTIONAL);
832 err = dma_mapping_error(dma_device, dma_addr);
833 if (err) {
834 mlx5_core_warn(mdev, "Can't dma buf\n");
835 kfree(mr);
836 return NULL;
837 }
838
839 err = dr_create_mkey(mdev, pdn, &mr->mkey);
840 if (err) {
841 mlx5_core_warn(mdev, "Can't create mkey\n");
842 dma_unmap_single(dma_device, dma_addr, size,
843 DMA_BIDIRECTIONAL);
844 kfree(mr);
845 return NULL;
846 }
847
848 mr->dma_addr = dma_addr;
849 mr->size = size;
850 mr->addr = buf;
851
852 return mr;
853 }
854
855 static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr)
856 {
857 mlx5_core_destroy_mkey(mdev, &mr->mkey);
858 dma_unmap_single(&mdev->pdev->dev, mr->dma_addr, mr->size,
859 DMA_BIDIRECTIONAL);
860 kfree(mr);
861 }
862
863 int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn)
864 {
865 struct dr_qp_init_attr init_attr = {};
866 int cq_size;
867 int size;
868 int ret;
869
870 dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL);
871 if (!dmn->send_ring)
872 return -ENOMEM;
873
874 cq_size = QUEUE_SIZE + 1;
875 dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size);
876 if (!dmn->send_ring->cq) {
877 ret = -ENOMEM;
878 goto free_send_ring;
879 }
880
881 init_attr.cqn = dmn->send_ring->cq->mcq.cqn;
882 init_attr.pdn = dmn->pdn;
883 init_attr.uar = dmn->uar;
884 init_attr.max_send_wr = QUEUE_SIZE;
885
886 dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr);
887 if (!dmn->send_ring->qp) {
888 ret = -ENOMEM;
889 goto clean_cq;
890 }
891
892 dmn->send_ring->cq->qp = dmn->send_ring->qp;
893
894 dmn->info.max_send_wr = QUEUE_SIZE;
895 dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data,
896 DR_STE_SIZE);
897
898 dmn->send_ring->signal_th = dmn->info.max_send_wr /
899 SIGNAL_PER_DIV_QUEUE;
900
901
902 ret = dr_prepare_qp_to_rts(dmn);
903 if (ret)
904 goto clean_qp;
905
906 dmn->send_ring->max_post_send_size =
907 mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K,
908 DR_ICM_TYPE_STE);
909
910
911 size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size;
912 dmn->send_ring->buf = kzalloc(size, GFP_KERNEL);
913 if (!dmn->send_ring->buf) {
914 ret = -ENOMEM;
915 goto clean_qp;
916 }
917
918 dmn->send_ring->buf_size = size;
919
920 dmn->send_ring->mr = dr_reg_mr(dmn->mdev,
921 dmn->pdn, dmn->send_ring->buf, size);
922 if (!dmn->send_ring->mr) {
923 ret = -ENOMEM;
924 goto free_mem;
925 }
926
927 dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev,
928 dmn->pdn, dmn->send_ring->sync_buff,
929 MIN_READ_SYNC);
930 if (!dmn->send_ring->sync_mr) {
931 ret = -ENOMEM;
932 goto clean_mr;
933 }
934
935 return 0;
936
937 clean_mr:
938 dr_dereg_mr(dmn->mdev, dmn->send_ring->mr);
939 free_mem:
940 kfree(dmn->send_ring->buf);
941 clean_qp:
942 dr_destroy_qp(dmn->mdev, dmn->send_ring->qp);
943 clean_cq:
944 dr_destroy_cq(dmn->mdev, dmn->send_ring->cq);
945 free_send_ring:
946 kfree(dmn->send_ring);
947
948 return ret;
949 }
950
951 void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn,
952 struct mlx5dr_send_ring *send_ring)
953 {
954 dr_destroy_qp(dmn->mdev, send_ring->qp);
955 dr_destroy_cq(dmn->mdev, send_ring->cq);
956 dr_dereg_mr(dmn->mdev, send_ring->sync_mr);
957 dr_dereg_mr(dmn->mdev, send_ring->mr);
958 kfree(send_ring->buf);
959 kfree(send_ring);
960 }
961
962 int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn)
963 {
964 struct mlx5dr_send_ring *send_ring = dmn->send_ring;
965 struct postsend_info send_info = {};
966 u8 data[DR_STE_SIZE];
967 int num_of_sends_req;
968 int ret;
969 int i;
970
971
972 num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2;
973
974
975 send_info.write.addr = (uintptr_t)data;
976 send_info.write.length = DR_STE_SIZE;
977 send_info.write.lkey = 0;
978
979 send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr;
980 send_info.rkey = send_ring->sync_mr->mkey.key;
981
982 for (i = 0; i < num_of_sends_req; i++) {
983 ret = dr_postsend_icm_data(dmn, &send_info);
984 if (ret)
985 return ret;
986 }
987
988 ret = dr_handle_pending_wc(dmn, send_ring);
989
990 return ret;
991 }