This source file includes following definitions.
- mask_generation
- validate_r_tid_ack
- tid_rdma_schedule_ack
- tid_rdma_trigger_ack
- tid_rdma_opfn_encode
- tid_rdma_opfn_decode
- tid_rdma_opfn_init
- tid_rdma_conn_req
- tid_rdma_conn_reply
- tid_rdma_conn_resp
- tid_rdma_conn_error
- hfi1_kern_exp_rcv_init
- qp_to_rcd
- hfi1_qp_priv_init
- hfi1_qp_priv_tid_free
- first_qp
- kernel_tid_waiters
- dequeue_tid_waiter
- queue_qp_for_tid_wait
- __trigger_tid_waiter
- tid_rdma_schedule_tid_wakeup
- tid_rdma_trigger_resume
- _tid_rdma_flush_wait
- hfi1_tid_rdma_flush_wait
- kern_reserve_flow
- kern_set_hw_flow
- kern_setup_hw_flow
- kern_flow_generation_next
- kern_clear_hw_flow
- hfi1_kern_setup_hw_flow
- hfi1_kern_clear_hw_flow
- hfi1_kern_init_ctxt_generations
- trdma_pset_order
- tid_rdma_find_phys_blocks_4k
- tid_flush_pages
- tid_rdma_find_phys_blocks_8k
- kern_find_pages
- dma_unmap_flow
- dma_map_flow
- dma_mapped
- kern_get_phys_blocks
- kern_add_tid_node
- kern_alloc_tids
- kern_program_rcv_group
- kern_unprogram_rcv_group
- kern_program_rcvarray
- hfi1_kern_exp_rcv_setup
- hfi1_tid_rdma_reset_flow
- hfi1_kern_exp_rcv_clear
- hfi1_kern_exp_rcv_clear_all
- hfi1_kern_exp_rcv_free_flows
- __trdma_clean_swqe
- hfi1_kern_exp_rcv_alloc_flows
- hfi1_init_trdma_req
- hfi1_access_sw_tid_wait
- find_flow_ib
- hfi1_build_tid_rdma_read_packet
- hfi1_build_tid_rdma_read_req
- tid_rdma_rcv_read_request
- tid_rdma_rcv_error
- hfi1_rc_rcv_tid_rdma_read_req
- hfi1_build_tid_rdma_read_resp
- find_tid_request
- hfi1_rc_rcv_tid_rdma_read_resp
- hfi1_kern_read_tid_flow_free
- tid_rdma_tid_err
- restart_tid_rdma_read_req
- handle_read_kdeth_eflags
- hfi1_handle_kdeth_eflags
- hfi1_tid_rdma_restart_req
- hfi1_qp_kern_exp_rcv_clear_all
- hfi1_tid_rdma_wqe_interlock
- hfi1_check_sge_align
- setup_tid_rdma_wqe
- hfi1_build_tid_rdma_write_req
- hfi1_compute_tid_rdma_flow_wt
- position_in_queue
- hfi1_compute_tid_rnr_timeout
- hfi1_tid_write_alloc_resources
- hfi1_rc_rcv_tid_rdma_write_req
- hfi1_build_tid_rdma_write_resp
- hfi1_add_tid_reap_timer
- hfi1_mod_tid_reap_timer
- hfi1_stop_tid_reap_timer
- hfi1_del_tid_reap_timer
- hfi1_tid_timeout
- hfi1_rc_rcv_tid_rdma_write_resp
- hfi1_build_tid_rdma_packet
- hfi1_rc_rcv_tid_rdma_write_data
- hfi1_tid_rdma_is_resync_psn
- hfi1_build_tid_rdma_write_ack
- hfi1_rc_rcv_tid_rdma_ack
- hfi1_add_tid_retry_timer
- hfi1_mod_tid_retry_timer
- hfi1_stop_tid_retry_timer
- hfi1_del_tid_retry_timer
- hfi1_tid_retry_timeout
- hfi1_build_tid_rdma_resync
- hfi1_rc_rcv_tid_rdma_resync
- update_tid_tail
- hfi1_make_tid_rdma_pkt
- make_tid_rdma_ack
- hfi1_send_tid_ok
- _hfi1_do_tid_send
- hfi1_do_tid_send
- _hfi1_schedule_tid_send
- hfi1_schedule_tid_send
- hfi1_tid_rdma_ack_interlock
- read_r_next_psn
- tid_rdma_rcv_err
- update_r_next_psn_fecn
1
2
3
4
5
6
7 #include "hfi.h"
8 #include "qp.h"
9 #include "rc.h"
10 #include "verbs.h"
11 #include "tid_rdma.h"
12 #include "exp_rcv.h"
13 #include "trace.h"
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 #define RCV_TID_FLOW_TABLE_CTRL_FLOW_VALID_SMASK BIT_ULL(32)
31 #define RCV_TID_FLOW_TABLE_CTRL_HDR_SUPP_EN_SMASK BIT_ULL(33)
32 #define RCV_TID_FLOW_TABLE_CTRL_KEEP_AFTER_SEQ_ERR_SMASK BIT_ULL(34)
33 #define RCV_TID_FLOW_TABLE_CTRL_KEEP_ON_GEN_ERR_SMASK BIT_ULL(35)
34 #define RCV_TID_FLOW_TABLE_STATUS_SEQ_MISMATCH_SMASK BIT_ULL(37)
35 #define RCV_TID_FLOW_TABLE_STATUS_GEN_MISMATCH_SMASK BIT_ULL(38)
36
37
38 #define MAX_TID_FLOW_PSN BIT(HFI1_KDETH_BTH_SEQ_SHIFT)
39
40 #define GENERATION_MASK 0xFFFFF
41
42 static u32 mask_generation(u32 a)
43 {
44 return a & GENERATION_MASK;
45 }
46
47
48 #define KERN_GENERATION_RESERVED mask_generation(U32_MAX)
49
50
51
52
53
54 #define TID_RDMA_JKEY 32
55 #define HFI1_KERNEL_MIN_JKEY HFI1_ADMIN_JKEY_RANGE
56 #define HFI1_KERNEL_MAX_JKEY (2 * HFI1_ADMIN_JKEY_RANGE - 1)
57
58
59 #define TID_RDMA_MAX_READ_SEGS_PER_REQ 6
60 #define TID_RDMA_MAX_WRITE_SEGS_PER_REQ 4
61 #define MAX_REQ max_t(u16, TID_RDMA_MAX_READ_SEGS_PER_REQ, \
62 TID_RDMA_MAX_WRITE_SEGS_PER_REQ)
63 #define MAX_FLOWS roundup_pow_of_two(MAX_REQ + 1)
64
65 #define MAX_EXPECTED_PAGES (MAX_EXPECTED_BUFFER / PAGE_SIZE)
66
67 #define TID_RDMA_DESTQP_FLOW_SHIFT 11
68 #define TID_RDMA_DESTQP_FLOW_MASK 0x1f
69
70 #define TID_OPFN_QP_CTXT_MASK 0xff
71 #define TID_OPFN_QP_CTXT_SHIFT 56
72 #define TID_OPFN_QP_KDETH_MASK 0xff
73 #define TID_OPFN_QP_KDETH_SHIFT 48
74 #define TID_OPFN_MAX_LEN_MASK 0x7ff
75 #define TID_OPFN_MAX_LEN_SHIFT 37
76 #define TID_OPFN_TIMEOUT_MASK 0x1f
77 #define TID_OPFN_TIMEOUT_SHIFT 32
78 #define TID_OPFN_RESERVED_MASK 0x3f
79 #define TID_OPFN_RESERVED_SHIFT 26
80 #define TID_OPFN_URG_MASK 0x1
81 #define TID_OPFN_URG_SHIFT 25
82 #define TID_OPFN_VER_MASK 0x7
83 #define TID_OPFN_VER_SHIFT 22
84 #define TID_OPFN_JKEY_MASK 0x3f
85 #define TID_OPFN_JKEY_SHIFT 16
86 #define TID_OPFN_MAX_READ_MASK 0x3f
87 #define TID_OPFN_MAX_READ_SHIFT 10
88 #define TID_OPFN_MAX_WRITE_MASK 0x3f
89 #define TID_OPFN_MAX_WRITE_SHIFT 4
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110 static void tid_rdma_trigger_resume(struct work_struct *work);
111 static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req);
112 static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
113 gfp_t gfp);
114 static void hfi1_init_trdma_req(struct rvt_qp *qp,
115 struct tid_rdma_request *req);
116 static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx);
117 static void hfi1_tid_timeout(struct timer_list *t);
118 static void hfi1_add_tid_reap_timer(struct rvt_qp *qp);
119 static void hfi1_mod_tid_reap_timer(struct rvt_qp *qp);
120 static void hfi1_mod_tid_retry_timer(struct rvt_qp *qp);
121 static int hfi1_stop_tid_retry_timer(struct rvt_qp *qp);
122 static void hfi1_tid_retry_timeout(struct timer_list *t);
123 static int make_tid_rdma_ack(struct rvt_qp *qp,
124 struct ib_other_headers *ohdr,
125 struct hfi1_pkt_state *ps);
126 static void hfi1_do_tid_send(struct rvt_qp *qp);
127 static u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx);
128 static void tid_rdma_rcv_err(struct hfi1_packet *packet,
129 struct ib_other_headers *ohdr,
130 struct rvt_qp *qp, u32 psn, int diff, bool fecn);
131 static void update_r_next_psn_fecn(struct hfi1_packet *packet,
132 struct hfi1_qp_priv *priv,
133 struct hfi1_ctxtdata *rcd,
134 struct tid_rdma_flow *flow,
135 bool fecn);
136
137 static void validate_r_tid_ack(struct hfi1_qp_priv *priv)
138 {
139 if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
140 priv->r_tid_ack = priv->r_tid_tail;
141 }
142
143 static void tid_rdma_schedule_ack(struct rvt_qp *qp)
144 {
145 struct hfi1_qp_priv *priv = qp->priv;
146
147 priv->s_flags |= RVT_S_ACK_PENDING;
148 hfi1_schedule_tid_send(qp);
149 }
150
151 static void tid_rdma_trigger_ack(struct rvt_qp *qp)
152 {
153 validate_r_tid_ack(qp->priv);
154 tid_rdma_schedule_ack(qp);
155 }
156
157 static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p)
158 {
159 return
160 (((u64)p->qp & TID_OPFN_QP_CTXT_MASK) <<
161 TID_OPFN_QP_CTXT_SHIFT) |
162 ((((u64)p->qp >> 16) & TID_OPFN_QP_KDETH_MASK) <<
163 TID_OPFN_QP_KDETH_SHIFT) |
164 (((u64)((p->max_len >> PAGE_SHIFT) - 1) &
165 TID_OPFN_MAX_LEN_MASK) << TID_OPFN_MAX_LEN_SHIFT) |
166 (((u64)p->timeout & TID_OPFN_TIMEOUT_MASK) <<
167 TID_OPFN_TIMEOUT_SHIFT) |
168 (((u64)p->urg & TID_OPFN_URG_MASK) << TID_OPFN_URG_SHIFT) |
169 (((u64)p->jkey & TID_OPFN_JKEY_MASK) << TID_OPFN_JKEY_SHIFT) |
170 (((u64)p->max_read & TID_OPFN_MAX_READ_MASK) <<
171 TID_OPFN_MAX_READ_SHIFT) |
172 (((u64)p->max_write & TID_OPFN_MAX_WRITE_MASK) <<
173 TID_OPFN_MAX_WRITE_SHIFT);
174 }
175
176 static void tid_rdma_opfn_decode(struct tid_rdma_params *p, u64 data)
177 {
178 p->max_len = (((data >> TID_OPFN_MAX_LEN_SHIFT) &
179 TID_OPFN_MAX_LEN_MASK) + 1) << PAGE_SHIFT;
180 p->jkey = (data >> TID_OPFN_JKEY_SHIFT) & TID_OPFN_JKEY_MASK;
181 p->max_write = (data >> TID_OPFN_MAX_WRITE_SHIFT) &
182 TID_OPFN_MAX_WRITE_MASK;
183 p->max_read = (data >> TID_OPFN_MAX_READ_SHIFT) &
184 TID_OPFN_MAX_READ_MASK;
185 p->qp =
186 ((((data >> TID_OPFN_QP_KDETH_SHIFT) & TID_OPFN_QP_KDETH_MASK)
187 << 16) |
188 ((data >> TID_OPFN_QP_CTXT_SHIFT) & TID_OPFN_QP_CTXT_MASK));
189 p->urg = (data >> TID_OPFN_URG_SHIFT) & TID_OPFN_URG_MASK;
190 p->timeout = (data >> TID_OPFN_TIMEOUT_SHIFT) & TID_OPFN_TIMEOUT_MASK;
191 }
192
193 void tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p)
194 {
195 struct hfi1_qp_priv *priv = qp->priv;
196
197 p->qp = (kdeth_qp << 16) | priv->rcd->ctxt;
198 p->max_len = TID_RDMA_MAX_SEGMENT_SIZE;
199 p->jkey = priv->rcd->jkey;
200 p->max_read = TID_RDMA_MAX_READ_SEGS_PER_REQ;
201 p->max_write = TID_RDMA_MAX_WRITE_SEGS_PER_REQ;
202 p->timeout = qp->timeout;
203 p->urg = is_urg_masked(priv->rcd);
204 }
205
206 bool tid_rdma_conn_req(struct rvt_qp *qp, u64 *data)
207 {
208 struct hfi1_qp_priv *priv = qp->priv;
209
210 *data = tid_rdma_opfn_encode(&priv->tid_rdma.local);
211 return true;
212 }
213
214 bool tid_rdma_conn_reply(struct rvt_qp *qp, u64 data)
215 {
216 struct hfi1_qp_priv *priv = qp->priv;
217 struct tid_rdma_params *remote, *old;
218 bool ret = true;
219
220 old = rcu_dereference_protected(priv->tid_rdma.remote,
221 lockdep_is_held(&priv->opfn.lock));
222 data &= ~0xfULL;
223
224
225
226
227 if (!data || !HFI1_CAP_IS_KSET(TID_RDMA))
228 goto null;
229
230
231
232
233
234
235
236 remote = kzalloc(sizeof(*remote), GFP_ATOMIC);
237 if (!remote) {
238 ret = false;
239 goto null;
240 }
241
242 tid_rdma_opfn_decode(remote, data);
243 priv->tid_timer_timeout_jiffies =
244 usecs_to_jiffies((((4096UL * (1UL << remote->timeout)) /
245 1000UL) << 3) * 7);
246 trace_hfi1_opfn_param(qp, 0, &priv->tid_rdma.local);
247 trace_hfi1_opfn_param(qp, 1, remote);
248 rcu_assign_pointer(priv->tid_rdma.remote, remote);
249
250
251
252
253
254
255
256
257 priv->pkts_ps = (u16)rvt_div_mtu(qp, remote->max_len);
258 priv->timeout_shift = ilog2(priv->pkts_ps - 1) + 1;
259 goto free;
260 null:
261 RCU_INIT_POINTER(priv->tid_rdma.remote, NULL);
262 priv->timeout_shift = 0;
263 free:
264 if (old)
265 kfree_rcu(old, rcu_head);
266 return ret;
267 }
268
269 bool tid_rdma_conn_resp(struct rvt_qp *qp, u64 *data)
270 {
271 bool ret;
272
273 ret = tid_rdma_conn_reply(qp, *data);
274 *data = 0;
275
276
277
278
279
280 if (ret)
281 (void)tid_rdma_conn_req(qp, data);
282 return ret;
283 }
284
285 void tid_rdma_conn_error(struct rvt_qp *qp)
286 {
287 struct hfi1_qp_priv *priv = qp->priv;
288 struct tid_rdma_params *old;
289
290 old = rcu_dereference_protected(priv->tid_rdma.remote,
291 lockdep_is_held(&priv->opfn.lock));
292 RCU_INIT_POINTER(priv->tid_rdma.remote, NULL);
293 if (old)
294 kfree_rcu(old, rcu_head);
295 }
296
297
298 int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit)
299 {
300 if (reinit)
301 return 0;
302
303 BUILD_BUG_ON(TID_RDMA_JKEY < HFI1_KERNEL_MIN_JKEY);
304 BUILD_BUG_ON(TID_RDMA_JKEY > HFI1_KERNEL_MAX_JKEY);
305 rcd->jkey = TID_RDMA_JKEY;
306 hfi1_set_ctxt_jkey(rcd->dd, rcd, rcd->jkey);
307 return hfi1_alloc_ctxt_rcv_groups(rcd);
308 }
309
310
311
312
313
314
315
316
317
318
319 static struct hfi1_ctxtdata *qp_to_rcd(struct rvt_dev_info *rdi,
320 struct rvt_qp *qp)
321 {
322 struct hfi1_ibdev *verbs_dev = container_of(rdi,
323 struct hfi1_ibdev,
324 rdi);
325 struct hfi1_devdata *dd = container_of(verbs_dev,
326 struct hfi1_devdata,
327 verbs_dev);
328 unsigned int ctxt;
329
330 if (qp->ibqp.qp_num == 0)
331 ctxt = 0;
332 else
333 ctxt = hfi1_get_qp_map(dd, qp->ibqp.qp_num >> dd->qos_shift);
334 return dd->rcd[ctxt];
335 }
336
337 int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
338 struct ib_qp_init_attr *init_attr)
339 {
340 struct hfi1_qp_priv *qpriv = qp->priv;
341 int i, ret;
342
343 qpriv->rcd = qp_to_rcd(rdi, qp);
344
345 spin_lock_init(&qpriv->opfn.lock);
346 INIT_WORK(&qpriv->opfn.opfn_work, opfn_send_conn_request);
347 INIT_WORK(&qpriv->tid_rdma.trigger_work, tid_rdma_trigger_resume);
348 qpriv->flow_state.psn = 0;
349 qpriv->flow_state.index = RXE_NUM_TID_FLOWS;
350 qpriv->flow_state.last_index = RXE_NUM_TID_FLOWS;
351 qpriv->flow_state.generation = KERN_GENERATION_RESERVED;
352 qpriv->s_state = TID_OP(WRITE_RESP);
353 qpriv->s_tid_cur = HFI1_QP_WQE_INVALID;
354 qpriv->s_tid_head = HFI1_QP_WQE_INVALID;
355 qpriv->s_tid_tail = HFI1_QP_WQE_INVALID;
356 qpriv->rnr_nak_state = TID_RNR_NAK_INIT;
357 qpriv->r_tid_head = HFI1_QP_WQE_INVALID;
358 qpriv->r_tid_tail = HFI1_QP_WQE_INVALID;
359 qpriv->r_tid_ack = HFI1_QP_WQE_INVALID;
360 qpriv->r_tid_alloc = HFI1_QP_WQE_INVALID;
361 atomic_set(&qpriv->n_requests, 0);
362 atomic_set(&qpriv->n_tid_requests, 0);
363 timer_setup(&qpriv->s_tid_timer, hfi1_tid_timeout, 0);
364 timer_setup(&qpriv->s_tid_retry_timer, hfi1_tid_retry_timeout, 0);
365 INIT_LIST_HEAD(&qpriv->tid_wait);
366
367 if (init_attr->qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
368 struct hfi1_devdata *dd = qpriv->rcd->dd;
369
370 qpriv->pages = kzalloc_node(TID_RDMA_MAX_PAGES *
371 sizeof(*qpriv->pages),
372 GFP_KERNEL, dd->node);
373 if (!qpriv->pages)
374 return -ENOMEM;
375 for (i = 0; i < qp->s_size; i++) {
376 struct hfi1_swqe_priv *priv;
377 struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, i);
378
379 priv = kzalloc_node(sizeof(*priv), GFP_KERNEL,
380 dd->node);
381 if (!priv)
382 return -ENOMEM;
383
384 hfi1_init_trdma_req(qp, &priv->tid_req);
385 priv->tid_req.e.swqe = wqe;
386 wqe->priv = priv;
387 }
388 for (i = 0; i < rvt_max_atomic(rdi); i++) {
389 struct hfi1_ack_priv *priv;
390
391 priv = kzalloc_node(sizeof(*priv), GFP_KERNEL,
392 dd->node);
393 if (!priv)
394 return -ENOMEM;
395
396 hfi1_init_trdma_req(qp, &priv->tid_req);
397 priv->tid_req.e.ack = &qp->s_ack_queue[i];
398
399 ret = hfi1_kern_exp_rcv_alloc_flows(&priv->tid_req,
400 GFP_KERNEL);
401 if (ret) {
402 kfree(priv);
403 return ret;
404 }
405 qp->s_ack_queue[i].priv = priv;
406 }
407 }
408
409 return 0;
410 }
411
412 void hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
413 {
414 struct hfi1_qp_priv *qpriv = qp->priv;
415 struct rvt_swqe *wqe;
416 u32 i;
417
418 if (qp->ibqp.qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
419 for (i = 0; i < qp->s_size; i++) {
420 wqe = rvt_get_swqe_ptr(qp, i);
421 kfree(wqe->priv);
422 wqe->priv = NULL;
423 }
424 for (i = 0; i < rvt_max_atomic(rdi); i++) {
425 struct hfi1_ack_priv *priv = qp->s_ack_queue[i].priv;
426
427 if (priv)
428 hfi1_kern_exp_rcv_free_flows(&priv->tid_req);
429 kfree(priv);
430 qp->s_ack_queue[i].priv = NULL;
431 }
432 cancel_work_sync(&qpriv->opfn.opfn_work);
433 kfree(qpriv->pages);
434 qpriv->pages = NULL;
435 }
436 }
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468 static struct rvt_qp *first_qp(struct hfi1_ctxtdata *rcd,
469 struct tid_queue *queue)
470 __must_hold(&rcd->exp_lock)
471 {
472 struct hfi1_qp_priv *priv;
473
474 lockdep_assert_held(&rcd->exp_lock);
475 priv = list_first_entry_or_null(&queue->queue_head,
476 struct hfi1_qp_priv,
477 tid_wait);
478 if (!priv)
479 return NULL;
480 rvt_get_qp(priv->owner);
481 return priv->owner;
482 }
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502 static bool kernel_tid_waiters(struct hfi1_ctxtdata *rcd,
503 struct tid_queue *queue, struct rvt_qp *qp)
504 __must_hold(&rcd->exp_lock) __must_hold(&qp->s_lock)
505 {
506 struct rvt_qp *fqp;
507 bool ret = true;
508
509 lockdep_assert_held(&qp->s_lock);
510 lockdep_assert_held(&rcd->exp_lock);
511 fqp = first_qp(rcd, queue);
512 if (!fqp || (fqp == qp && (qp->s_flags & HFI1_S_WAIT_TID_SPACE)))
513 ret = false;
514 rvt_put_qp(fqp);
515 return ret;
516 }
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533 static void dequeue_tid_waiter(struct hfi1_ctxtdata *rcd,
534 struct tid_queue *queue, struct rvt_qp *qp)
535 __must_hold(&rcd->exp_lock) __must_hold(&qp->s_lock)
536 {
537 struct hfi1_qp_priv *priv = qp->priv;
538
539 lockdep_assert_held(&qp->s_lock);
540 lockdep_assert_held(&rcd->exp_lock);
541 if (list_empty(&priv->tid_wait))
542 return;
543 list_del_init(&priv->tid_wait);
544 qp->s_flags &= ~HFI1_S_WAIT_TID_SPACE;
545 queue->dequeue++;
546 rvt_put_qp(qp);
547 }
548
549
550
551
552
553
554
555
556
557
558
559 static void queue_qp_for_tid_wait(struct hfi1_ctxtdata *rcd,
560 struct tid_queue *queue, struct rvt_qp *qp)
561 __must_hold(&rcd->exp_lock) __must_hold(&qp->s_lock)
562 {
563 struct hfi1_qp_priv *priv = qp->priv;
564
565 lockdep_assert_held(&qp->s_lock);
566 lockdep_assert_held(&rcd->exp_lock);
567 if (list_empty(&priv->tid_wait)) {
568 qp->s_flags |= HFI1_S_WAIT_TID_SPACE;
569 list_add_tail(&priv->tid_wait, &queue->queue_head);
570 priv->tid_enqueue = ++queue->enqueue;
571 rcd->dd->verbs_dev.n_tidwait++;
572 trace_hfi1_qpsleep(qp, HFI1_S_WAIT_TID_SPACE);
573 rvt_get_qp(qp);
574 }
575 }
576
577
578
579
580
581
582
583
584 static void __trigger_tid_waiter(struct rvt_qp *qp)
585 __must_hold(&qp->s_lock)
586 {
587 lockdep_assert_held(&qp->s_lock);
588 if (!(qp->s_flags & HFI1_S_WAIT_TID_SPACE))
589 return;
590 trace_hfi1_qpwakeup(qp, HFI1_S_WAIT_TID_SPACE);
591 hfi1_schedule_send(qp);
592 }
593
594
595
596
597
598
599
600
601
602
603
604
605
606 static void tid_rdma_schedule_tid_wakeup(struct rvt_qp *qp)
607 {
608 struct hfi1_qp_priv *priv;
609 struct hfi1_ibport *ibp;
610 struct hfi1_pportdata *ppd;
611 struct hfi1_devdata *dd;
612 bool rval;
613
614 if (!qp)
615 return;
616
617 priv = qp->priv;
618 ibp = to_iport(qp->ibqp.device, qp->port_num);
619 ppd = ppd_from_ibp(ibp);
620 dd = dd_from_ibdev(qp->ibqp.device);
621
622 rval = queue_work_on(priv->s_sde ?
623 priv->s_sde->cpu :
624 cpumask_first(cpumask_of_node(dd->node)),
625 ppd->hfi1_wq,
626 &priv->tid_rdma.trigger_work);
627 if (!rval)
628 rvt_put_qp(qp);
629 }
630
631
632
633
634
635
636
637
638 static void tid_rdma_trigger_resume(struct work_struct *work)
639 {
640 struct tid_rdma_qp_params *tr;
641 struct hfi1_qp_priv *priv;
642 struct rvt_qp *qp;
643
644 tr = container_of(work, struct tid_rdma_qp_params, trigger_work);
645 priv = container_of(tr, struct hfi1_qp_priv, tid_rdma);
646 qp = priv->owner;
647 spin_lock_irq(&qp->s_lock);
648 if (qp->s_flags & HFI1_S_WAIT_TID_SPACE) {
649 spin_unlock_irq(&qp->s_lock);
650 hfi1_do_send(priv->owner, true);
651 } else {
652 spin_unlock_irq(&qp->s_lock);
653 }
654 rvt_put_qp(qp);
655 }
656
657
658
659
660
661
662
663
664 static void _tid_rdma_flush_wait(struct rvt_qp *qp, struct tid_queue *queue)
665 __must_hold(&qp->s_lock)
666 {
667 struct hfi1_qp_priv *priv;
668
669 if (!qp)
670 return;
671 lockdep_assert_held(&qp->s_lock);
672 priv = qp->priv;
673 qp->s_flags &= ~HFI1_S_WAIT_TID_SPACE;
674 spin_lock(&priv->rcd->exp_lock);
675 if (!list_empty(&priv->tid_wait)) {
676 list_del_init(&priv->tid_wait);
677 qp->s_flags &= ~HFI1_S_WAIT_TID_SPACE;
678 queue->dequeue++;
679 rvt_put_qp(qp);
680 }
681 spin_unlock(&priv->rcd->exp_lock);
682 }
683
684 void hfi1_tid_rdma_flush_wait(struct rvt_qp *qp)
685 __must_hold(&qp->s_lock)
686 {
687 struct hfi1_qp_priv *priv = qp->priv;
688
689 _tid_rdma_flush_wait(qp, &priv->rcd->flow_queue);
690 _tid_rdma_flush_wait(qp, &priv->rcd->rarr_queue);
691 }
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711 static int kern_reserve_flow(struct hfi1_ctxtdata *rcd, int last)
712 __must_hold(&rcd->exp_lock)
713 {
714 int nr;
715
716
717 if (last >= 0 && last < RXE_NUM_TID_FLOWS &&
718 !test_and_set_bit(last, &rcd->flow_mask))
719 return last;
720
721 nr = ffz(rcd->flow_mask);
722 BUILD_BUG_ON(RXE_NUM_TID_FLOWS >=
723 (sizeof(rcd->flow_mask) * BITS_PER_BYTE));
724 if (nr > (RXE_NUM_TID_FLOWS - 1))
725 return -EAGAIN;
726 set_bit(nr, &rcd->flow_mask);
727 return nr;
728 }
729
730 static void kern_set_hw_flow(struct hfi1_ctxtdata *rcd, u32 generation,
731 u32 flow_idx)
732 {
733 u64 reg;
734
735 reg = ((u64)generation << HFI1_KDETH_BTH_SEQ_SHIFT) |
736 RCV_TID_FLOW_TABLE_CTRL_FLOW_VALID_SMASK |
737 RCV_TID_FLOW_TABLE_CTRL_KEEP_AFTER_SEQ_ERR_SMASK |
738 RCV_TID_FLOW_TABLE_CTRL_KEEP_ON_GEN_ERR_SMASK |
739 RCV_TID_FLOW_TABLE_STATUS_SEQ_MISMATCH_SMASK |
740 RCV_TID_FLOW_TABLE_STATUS_GEN_MISMATCH_SMASK;
741
742 if (generation != KERN_GENERATION_RESERVED)
743 reg |= RCV_TID_FLOW_TABLE_CTRL_HDR_SUPP_EN_SMASK;
744
745 write_uctxt_csr(rcd->dd, rcd->ctxt,
746 RCV_TID_FLOW_TABLE + 8 * flow_idx, reg);
747 }
748
749 static u32 kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, u32 flow_idx)
750 __must_hold(&rcd->exp_lock)
751 {
752 u32 generation = rcd->flows[flow_idx].generation;
753
754 kern_set_hw_flow(rcd, generation, flow_idx);
755 return generation;
756 }
757
758 static u32 kern_flow_generation_next(u32 gen)
759 {
760 u32 generation = mask_generation(gen + 1);
761
762 if (generation == KERN_GENERATION_RESERVED)
763 generation = mask_generation(generation + 1);
764 return generation;
765 }
766
767 static void kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, u32 flow_idx)
768 __must_hold(&rcd->exp_lock)
769 {
770 rcd->flows[flow_idx].generation =
771 kern_flow_generation_next(rcd->flows[flow_idx].generation);
772 kern_set_hw_flow(rcd, KERN_GENERATION_RESERVED, flow_idx);
773 }
774
775 int hfi1_kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp)
776 {
777 struct hfi1_qp_priv *qpriv = (struct hfi1_qp_priv *)qp->priv;
778 struct tid_flow_state *fs = &qpriv->flow_state;
779 struct rvt_qp *fqp;
780 unsigned long flags;
781 int ret = 0;
782
783
784 if (fs->index != RXE_NUM_TID_FLOWS)
785 return ret;
786
787 spin_lock_irqsave(&rcd->exp_lock, flags);
788 if (kernel_tid_waiters(rcd, &rcd->flow_queue, qp))
789 goto queue;
790
791 ret = kern_reserve_flow(rcd, fs->last_index);
792 if (ret < 0)
793 goto queue;
794 fs->index = ret;
795 fs->last_index = fs->index;
796
797
798 if (fs->generation != KERN_GENERATION_RESERVED)
799 rcd->flows[fs->index].generation = fs->generation;
800 fs->generation = kern_setup_hw_flow(rcd, fs->index);
801 fs->psn = 0;
802 dequeue_tid_waiter(rcd, &rcd->flow_queue, qp);
803
804 fqp = first_qp(rcd, &rcd->flow_queue);
805 spin_unlock_irqrestore(&rcd->exp_lock, flags);
806
807 tid_rdma_schedule_tid_wakeup(fqp);
808 return 0;
809 queue:
810 queue_qp_for_tid_wait(rcd, &rcd->flow_queue, qp);
811 spin_unlock_irqrestore(&rcd->exp_lock, flags);
812 return -EAGAIN;
813 }
814
815 void hfi1_kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp)
816 {
817 struct hfi1_qp_priv *qpriv = (struct hfi1_qp_priv *)qp->priv;
818 struct tid_flow_state *fs = &qpriv->flow_state;
819 struct rvt_qp *fqp;
820 unsigned long flags;
821
822 if (fs->index >= RXE_NUM_TID_FLOWS)
823 return;
824 spin_lock_irqsave(&rcd->exp_lock, flags);
825 kern_clear_hw_flow(rcd, fs->index);
826 clear_bit(fs->index, &rcd->flow_mask);
827 fs->index = RXE_NUM_TID_FLOWS;
828 fs->psn = 0;
829 fs->generation = KERN_GENERATION_RESERVED;
830
831
832 fqp = first_qp(rcd, &rcd->flow_queue);
833 spin_unlock_irqrestore(&rcd->exp_lock, flags);
834
835 if (fqp == qp) {
836 __trigger_tid_waiter(fqp);
837 rvt_put_qp(fqp);
838 } else {
839 tid_rdma_schedule_tid_wakeup(fqp);
840 }
841 }
842
843 void hfi1_kern_init_ctxt_generations(struct hfi1_ctxtdata *rcd)
844 {
845 int i;
846
847 for (i = 0; i < RXE_NUM_TID_FLOWS; i++) {
848 rcd->flows[i].generation = mask_generation(prandom_u32());
849 kern_set_hw_flow(rcd, KERN_GENERATION_RESERVED, i);
850 }
851 }
852
853
854 static u8 trdma_pset_order(struct tid_rdma_pageset *s)
855 {
856 u8 count = s->count;
857
858 return ilog2(count) + 1;
859 }
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875 static u32 tid_rdma_find_phys_blocks_4k(struct tid_rdma_flow *flow,
876 struct page **pages,
877 u32 npages,
878 struct tid_rdma_pageset *list)
879 {
880 u32 pagecount, pageidx, setcount = 0, i;
881 void *vaddr, *this_vaddr;
882
883 if (!npages)
884 return 0;
885
886
887
888
889
890
891 vaddr = page_address(pages[0]);
892 trace_hfi1_tid_flow_page(flow->req->qp, flow, 0, 0, 0, vaddr);
893 for (pageidx = 0, pagecount = 1, i = 1; i <= npages; i++) {
894 this_vaddr = i < npages ? page_address(pages[i]) : NULL;
895 trace_hfi1_tid_flow_page(flow->req->qp, flow, i, 0, 0,
896 this_vaddr);
897
898
899
900
901 if (this_vaddr != (vaddr + PAGE_SIZE)) {
902
903
904
905
906
907
908
909
910
911
912
913
914 while (pagecount) {
915 int maxpages = pagecount;
916 u32 bufsize = pagecount * PAGE_SIZE;
917
918 if (bufsize > MAX_EXPECTED_BUFFER)
919 maxpages =
920 MAX_EXPECTED_BUFFER >>
921 PAGE_SHIFT;
922 else if (!is_power_of_2(bufsize))
923 maxpages =
924 rounddown_pow_of_two(bufsize) >>
925 PAGE_SHIFT;
926
927 list[setcount].idx = pageidx;
928 list[setcount].count = maxpages;
929 trace_hfi1_tid_pageset(flow->req->qp, setcount,
930 list[setcount].idx,
931 list[setcount].count);
932 pagecount -= maxpages;
933 pageidx += maxpages;
934 setcount++;
935 }
936 pageidx = i;
937 pagecount = 1;
938 vaddr = this_vaddr;
939 } else {
940 vaddr += PAGE_SIZE;
941 pagecount++;
942 }
943 }
944
945 if (setcount & 1)
946 list[setcount++].count = 0;
947 return setcount;
948 }
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970 static u32 tid_flush_pages(struct tid_rdma_pageset *list,
971 u32 *idx, u32 pages, u32 sets)
972 {
973 while (pages) {
974 u32 maxpages = pages;
975
976 if (maxpages > MAX_EXPECTED_PAGES)
977 maxpages = MAX_EXPECTED_PAGES;
978 else if (!is_power_of_2(maxpages))
979 maxpages = rounddown_pow_of_two(maxpages);
980 list[sets].idx = *idx;
981 list[sets++].count = maxpages;
982 *idx += maxpages;
983 pages -= maxpages;
984 }
985
986 if (sets & 1)
987 list[sets++].count = 0;
988 return sets;
989 }
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013 static u32 tid_rdma_find_phys_blocks_8k(struct tid_rdma_flow *flow,
1014 struct page **pages,
1015 u32 npages,
1016 struct tid_rdma_pageset *list)
1017 {
1018 u32 idx, sets = 0, i;
1019 u32 pagecnt = 0;
1020 void *v0, *v1, *vm1;
1021
1022 if (!npages)
1023 return 0;
1024 for (idx = 0, i = 0, vm1 = NULL; i < npages; i += 2) {
1025
1026 v0 = page_address(pages[i]);
1027 trace_hfi1_tid_flow_page(flow->req->qp, flow, i, 1, 0, v0);
1028 v1 = i + 1 < npages ?
1029 page_address(pages[i + 1]) : NULL;
1030 trace_hfi1_tid_flow_page(flow->req->qp, flow, i, 1, 1, v1);
1031
1032 if (v1 != (v0 + PAGE_SIZE)) {
1033
1034 sets = tid_flush_pages(list, &idx, pagecnt, sets);
1035
1036 list[sets].idx = idx++;
1037 list[sets++].count = 1;
1038 if (v1) {
1039 list[sets].count = 1;
1040 list[sets++].idx = idx++;
1041 } else {
1042 list[sets++].count = 0;
1043 }
1044 vm1 = NULL;
1045 pagecnt = 0;
1046 continue;
1047 }
1048
1049 if (vm1 && v0 != (vm1 + PAGE_SIZE)) {
1050
1051 sets = tid_flush_pages(list, &idx, pagecnt, sets);
1052 pagecnt = 0;
1053 }
1054
1055 pagecnt += 2;
1056
1057 vm1 = v1;
1058
1059 }
1060
1061 sets = tid_flush_pages(list, &idx, npages - idx, sets);
1062
1063 WARN_ON(sets & 1);
1064 return sets;
1065 }
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080 static u32 kern_find_pages(struct tid_rdma_flow *flow,
1081 struct page **pages,
1082 struct rvt_sge_state *ss, bool *last)
1083 {
1084 struct tid_rdma_request *req = flow->req;
1085 struct rvt_sge *sge = &ss->sge;
1086 u32 length = flow->req->seg_len;
1087 u32 len = PAGE_SIZE;
1088 u32 i = 0;
1089
1090 while (length && req->isge < ss->num_sge) {
1091 pages[i++] = virt_to_page(sge->vaddr);
1092
1093 sge->vaddr += len;
1094 sge->length -= len;
1095 sge->sge_length -= len;
1096 if (!sge->sge_length) {
1097 if (++req->isge < ss->num_sge)
1098 *sge = ss->sg_list[req->isge - 1];
1099 } else if (sge->length == 0 && sge->mr->lkey) {
1100 if (++sge->n >= RVT_SEGSZ) {
1101 ++sge->m;
1102 sge->n = 0;
1103 }
1104 sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
1105 sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
1106 }
1107 length -= len;
1108 }
1109
1110 flow->length = flow->req->seg_len - length;
1111 *last = req->isge == ss->num_sge ? false : true;
1112 return i;
1113 }
1114
1115 static void dma_unmap_flow(struct tid_rdma_flow *flow)
1116 {
1117 struct hfi1_devdata *dd;
1118 int i;
1119 struct tid_rdma_pageset *pset;
1120
1121 dd = flow->req->rcd->dd;
1122 for (i = 0, pset = &flow->pagesets[0]; i < flow->npagesets;
1123 i++, pset++) {
1124 if (pset->count && pset->addr) {
1125 dma_unmap_page(&dd->pcidev->dev,
1126 pset->addr,
1127 PAGE_SIZE * pset->count,
1128 DMA_FROM_DEVICE);
1129 pset->mapped = 0;
1130 }
1131 }
1132 }
1133
1134 static int dma_map_flow(struct tid_rdma_flow *flow, struct page **pages)
1135 {
1136 int i;
1137 struct hfi1_devdata *dd = flow->req->rcd->dd;
1138 struct tid_rdma_pageset *pset;
1139
1140 for (i = 0, pset = &flow->pagesets[0]; i < flow->npagesets;
1141 i++, pset++) {
1142 if (pset->count) {
1143 pset->addr = dma_map_page(&dd->pcidev->dev,
1144 pages[pset->idx],
1145 0,
1146 PAGE_SIZE * pset->count,
1147 DMA_FROM_DEVICE);
1148
1149 if (dma_mapping_error(&dd->pcidev->dev, pset->addr)) {
1150 dma_unmap_flow(flow);
1151 return -ENOMEM;
1152 }
1153 pset->mapped = 1;
1154 }
1155 }
1156 return 0;
1157 }
1158
1159 static inline bool dma_mapped(struct tid_rdma_flow *flow)
1160 {
1161 return !!flow->pagesets[0].mapped;
1162 }
1163
1164
1165
1166
1167
1168 static int kern_get_phys_blocks(struct tid_rdma_flow *flow,
1169 struct page **pages,
1170 struct rvt_sge_state *ss, bool *last)
1171 {
1172 u8 npages;
1173
1174
1175 if (flow->npagesets) {
1176 trace_hfi1_tid_flow_alloc(flow->req->qp, flow->req->setup_head,
1177 flow);
1178 if (!dma_mapped(flow))
1179 return dma_map_flow(flow, pages);
1180 return 0;
1181 }
1182
1183 npages = kern_find_pages(flow, pages, ss, last);
1184
1185 if (flow->req->qp->pmtu == enum_to_mtu(OPA_MTU_4096))
1186 flow->npagesets =
1187 tid_rdma_find_phys_blocks_4k(flow, pages, npages,
1188 flow->pagesets);
1189 else
1190 flow->npagesets =
1191 tid_rdma_find_phys_blocks_8k(flow, pages, npages,
1192 flow->pagesets);
1193
1194 return dma_map_flow(flow, pages);
1195 }
1196
1197 static inline void kern_add_tid_node(struct tid_rdma_flow *flow,
1198 struct hfi1_ctxtdata *rcd, char *s,
1199 struct tid_group *grp, u8 cnt)
1200 {
1201 struct kern_tid_node *node = &flow->tnode[flow->tnode_cnt++];
1202
1203 WARN_ON_ONCE(flow->tnode_cnt >=
1204 (TID_RDMA_MAX_SEGMENT_SIZE >> PAGE_SHIFT));
1205 if (WARN_ON_ONCE(cnt & 1))
1206 dd_dev_err(rcd->dd,
1207 "unexpected odd allocation cnt %u map 0x%x used %u",
1208 cnt, grp->map, grp->used);
1209
1210 node->grp = grp;
1211 node->map = grp->map;
1212 node->cnt = cnt;
1213 trace_hfi1_tid_node_add(flow->req->qp, s, flow->tnode_cnt - 1,
1214 grp->base, grp->map, grp->used, cnt);
1215 }
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230 static int kern_alloc_tids(struct tid_rdma_flow *flow)
1231 {
1232 struct hfi1_ctxtdata *rcd = flow->req->rcd;
1233 struct hfi1_devdata *dd = rcd->dd;
1234 u32 ngroups, pageidx = 0;
1235 struct tid_group *group = NULL, *used;
1236 u8 use;
1237
1238 flow->tnode_cnt = 0;
1239 ngroups = flow->npagesets / dd->rcv_entries.group_size;
1240 if (!ngroups)
1241 goto used_list;
1242
1243
1244 list_for_each_entry(group, &rcd->tid_group_list.list, list) {
1245 kern_add_tid_node(flow, rcd, "complete groups", group,
1246 group->size);
1247
1248 pageidx += group->size;
1249 if (!--ngroups)
1250 break;
1251 }
1252
1253 if (pageidx >= flow->npagesets)
1254 goto ok;
1255
1256 used_list:
1257
1258 list_for_each_entry(used, &rcd->tid_used_list.list, list) {
1259 use = min_t(u32, flow->npagesets - pageidx,
1260 used->size - used->used);
1261 kern_add_tid_node(flow, rcd, "used groups", used, use);
1262
1263 pageidx += use;
1264 if (pageidx >= flow->npagesets)
1265 goto ok;
1266 }
1267
1268
1269
1270
1271
1272
1273 if (group && &group->list == &rcd->tid_group_list.list)
1274 goto bail_eagain;
1275 group = list_prepare_entry(group, &rcd->tid_group_list.list,
1276 list);
1277 if (list_is_last(&group->list, &rcd->tid_group_list.list))
1278 goto bail_eagain;
1279 group = list_next_entry(group, list);
1280 use = min_t(u32, flow->npagesets - pageidx, group->size);
1281 kern_add_tid_node(flow, rcd, "complete continue", group, use);
1282 pageidx += use;
1283 if (pageidx >= flow->npagesets)
1284 goto ok;
1285 bail_eagain:
1286 trace_hfi1_msg_alloc_tids(flow->req->qp, " insufficient tids: needed ",
1287 (u64)flow->npagesets);
1288 return -EAGAIN;
1289 ok:
1290 return 0;
1291 }
1292
1293 static void kern_program_rcv_group(struct tid_rdma_flow *flow, int grp_num,
1294 u32 *pset_idx)
1295 {
1296 struct hfi1_ctxtdata *rcd = flow->req->rcd;
1297 struct hfi1_devdata *dd = rcd->dd;
1298 struct kern_tid_node *node = &flow->tnode[grp_num];
1299 struct tid_group *grp = node->grp;
1300 struct tid_rdma_pageset *pset;
1301 u32 pmtu_pg = flow->req->qp->pmtu >> PAGE_SHIFT;
1302 u32 rcventry, npages = 0, pair = 0, tidctrl;
1303 u8 i, cnt = 0;
1304
1305 for (i = 0; i < grp->size; i++) {
1306 rcventry = grp->base + i;
1307
1308 if (node->map & BIT(i) || cnt >= node->cnt) {
1309 rcv_array_wc_fill(dd, rcventry);
1310 continue;
1311 }
1312 pset = &flow->pagesets[(*pset_idx)++];
1313 if (pset->count) {
1314 hfi1_put_tid(dd, rcventry, PT_EXPECTED,
1315 pset->addr, trdma_pset_order(pset));
1316 } else {
1317 hfi1_put_tid(dd, rcventry, PT_INVALID, 0, 0);
1318 }
1319 npages += pset->count;
1320
1321 rcventry -= rcd->expected_base;
1322 tidctrl = pair ? 0x3 : rcventry & 0x1 ? 0x2 : 0x1;
1323
1324
1325
1326
1327
1328
1329
1330 pair = !(i & 0x1) && !((node->map >> i) & 0x3) &&
1331 node->cnt >= cnt + 2;
1332 if (!pair) {
1333 if (!pset->count)
1334 tidctrl = 0x1;
1335 flow->tid_entry[flow->tidcnt++] =
1336 EXP_TID_SET(IDX, rcventry >> 1) |
1337 EXP_TID_SET(CTRL, tidctrl) |
1338 EXP_TID_SET(LEN, npages);
1339 trace_hfi1_tid_entry_alloc(
1340 flow->req->qp, flow->tidcnt - 1,
1341 flow->tid_entry[flow->tidcnt - 1]);
1342
1343
1344 flow->npkts += (npages + pmtu_pg - 1) >> ilog2(pmtu_pg);
1345 npages = 0;
1346 }
1347
1348 if (grp->used == grp->size - 1)
1349 tid_group_move(grp, &rcd->tid_used_list,
1350 &rcd->tid_full_list);
1351 else if (!grp->used)
1352 tid_group_move(grp, &rcd->tid_group_list,
1353 &rcd->tid_used_list);
1354
1355 grp->used++;
1356 grp->map |= BIT(i);
1357 cnt++;
1358 }
1359 }
1360
1361 static void kern_unprogram_rcv_group(struct tid_rdma_flow *flow, int grp_num)
1362 {
1363 struct hfi1_ctxtdata *rcd = flow->req->rcd;
1364 struct hfi1_devdata *dd = rcd->dd;
1365 struct kern_tid_node *node = &flow->tnode[grp_num];
1366 struct tid_group *grp = node->grp;
1367 u32 rcventry;
1368 u8 i, cnt = 0;
1369
1370 for (i = 0; i < grp->size; i++) {
1371 rcventry = grp->base + i;
1372
1373 if (node->map & BIT(i) || cnt >= node->cnt) {
1374 rcv_array_wc_fill(dd, rcventry);
1375 continue;
1376 }
1377
1378 hfi1_put_tid(dd, rcventry, PT_INVALID, 0, 0);
1379
1380 grp->used--;
1381 grp->map &= ~BIT(i);
1382 cnt++;
1383
1384 if (grp->used == grp->size - 1)
1385 tid_group_move(grp, &rcd->tid_full_list,
1386 &rcd->tid_used_list);
1387 else if (!grp->used)
1388 tid_group_move(grp, &rcd->tid_used_list,
1389 &rcd->tid_group_list);
1390 }
1391 if (WARN_ON_ONCE(cnt & 1)) {
1392 struct hfi1_ctxtdata *rcd = flow->req->rcd;
1393 struct hfi1_devdata *dd = rcd->dd;
1394
1395 dd_dev_err(dd, "unexpected odd free cnt %u map 0x%x used %u",
1396 cnt, grp->map, grp->used);
1397 }
1398 }
1399
1400 static void kern_program_rcvarray(struct tid_rdma_flow *flow)
1401 {
1402 u32 pset_idx = 0;
1403 int i;
1404
1405 flow->npkts = 0;
1406 flow->tidcnt = 0;
1407 for (i = 0; i < flow->tnode_cnt; i++)
1408 kern_program_rcv_group(flow, i, &pset_idx);
1409 trace_hfi1_tid_flow_alloc(flow->req->qp, flow->req->setup_head, flow);
1410 }
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454 int hfi1_kern_exp_rcv_setup(struct tid_rdma_request *req,
1455 struct rvt_sge_state *ss, bool *last)
1456 __must_hold(&req->qp->s_lock)
1457 {
1458 struct tid_rdma_flow *flow = &req->flows[req->setup_head];
1459 struct hfi1_ctxtdata *rcd = req->rcd;
1460 struct hfi1_qp_priv *qpriv = req->qp->priv;
1461 unsigned long flags;
1462 struct rvt_qp *fqp;
1463 u16 clear_tail = req->clear_tail;
1464
1465 lockdep_assert_held(&req->qp->s_lock);
1466
1467
1468
1469
1470
1471
1472 if (!CIRC_SPACE(req->setup_head, clear_tail, MAX_FLOWS) ||
1473 CIRC_CNT(req->setup_head, clear_tail, MAX_FLOWS) >=
1474 req->n_flows)
1475 return -EINVAL;
1476
1477
1478
1479
1480
1481
1482 if (kern_get_phys_blocks(flow, qpriv->pages, ss, last)) {
1483 hfi1_wait_kmem(flow->req->qp);
1484 return -ENOMEM;
1485 }
1486
1487 spin_lock_irqsave(&rcd->exp_lock, flags);
1488 if (kernel_tid_waiters(rcd, &rcd->rarr_queue, flow->req->qp))
1489 goto queue;
1490
1491
1492
1493
1494
1495
1496 if (kern_alloc_tids(flow))
1497 goto queue;
1498
1499
1500
1501
1502 kern_program_rcvarray(flow);
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512 memset(&flow->flow_state, 0x0, sizeof(flow->flow_state));
1513 flow->idx = qpriv->flow_state.index;
1514 flow->flow_state.generation = qpriv->flow_state.generation;
1515 flow->flow_state.spsn = qpriv->flow_state.psn;
1516 flow->flow_state.lpsn = flow->flow_state.spsn + flow->npkts - 1;
1517 flow->flow_state.r_next_psn =
1518 full_flow_psn(flow, flow->flow_state.spsn);
1519 qpriv->flow_state.psn += flow->npkts;
1520
1521 dequeue_tid_waiter(rcd, &rcd->rarr_queue, flow->req->qp);
1522
1523 fqp = first_qp(rcd, &rcd->rarr_queue);
1524 spin_unlock_irqrestore(&rcd->exp_lock, flags);
1525 tid_rdma_schedule_tid_wakeup(fqp);
1526
1527 req->setup_head = (req->setup_head + 1) & (MAX_FLOWS - 1);
1528 return 0;
1529 queue:
1530 queue_qp_for_tid_wait(rcd, &rcd->rarr_queue, flow->req->qp);
1531 spin_unlock_irqrestore(&rcd->exp_lock, flags);
1532 return -EAGAIN;
1533 }
1534
1535 static void hfi1_tid_rdma_reset_flow(struct tid_rdma_flow *flow)
1536 {
1537 flow->npagesets = 0;
1538 }
1539
1540
1541
1542
1543
1544
1545
1546 int hfi1_kern_exp_rcv_clear(struct tid_rdma_request *req)
1547 __must_hold(&req->qp->s_lock)
1548 {
1549 struct tid_rdma_flow *flow = &req->flows[req->clear_tail];
1550 struct hfi1_ctxtdata *rcd = req->rcd;
1551 unsigned long flags;
1552 int i;
1553 struct rvt_qp *fqp;
1554
1555 lockdep_assert_held(&req->qp->s_lock);
1556
1557 if (!CIRC_CNT(req->setup_head, req->clear_tail, MAX_FLOWS))
1558 return -EINVAL;
1559
1560 spin_lock_irqsave(&rcd->exp_lock, flags);
1561
1562 for (i = 0; i < flow->tnode_cnt; i++)
1563 kern_unprogram_rcv_group(flow, i);
1564
1565 flow->tnode_cnt = 0;
1566
1567 fqp = first_qp(rcd, &rcd->rarr_queue);
1568 spin_unlock_irqrestore(&rcd->exp_lock, flags);
1569
1570 dma_unmap_flow(flow);
1571
1572 hfi1_tid_rdma_reset_flow(flow);
1573 req->clear_tail = (req->clear_tail + 1) & (MAX_FLOWS - 1);
1574
1575 if (fqp == req->qp) {
1576 __trigger_tid_waiter(fqp);
1577 rvt_put_qp(fqp);
1578 } else {
1579 tid_rdma_schedule_tid_wakeup(fqp);
1580 }
1581
1582 return 0;
1583 }
1584
1585
1586
1587
1588
1589 void hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req)
1590 __must_hold(&req->qp->s_lock)
1591 {
1592
1593 while (CIRC_CNT(req->setup_head, req->clear_tail, MAX_FLOWS)) {
1594 if (hfi1_kern_exp_rcv_clear(req))
1595 break;
1596 }
1597 }
1598
1599
1600
1601
1602
1603 static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req)
1604 {
1605 kfree(req->flows);
1606 req->flows = NULL;
1607 }
1608
1609
1610
1611
1612
1613
1614 void __trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
1615 {
1616 struct hfi1_swqe_priv *p = wqe->priv;
1617
1618 hfi1_kern_exp_rcv_free_flows(&p->tid_req);
1619 }
1620
1621
1622
1623
1624 static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
1625 gfp_t gfp)
1626 {
1627 struct tid_rdma_flow *flows;
1628 int i;
1629
1630 if (likely(req->flows))
1631 return 0;
1632 flows = kmalloc_node(MAX_FLOWS * sizeof(*flows), gfp,
1633 req->rcd->numa_id);
1634 if (!flows)
1635 return -ENOMEM;
1636
1637 for (i = 0; i < MAX_FLOWS; i++) {
1638 flows[i].req = req;
1639 flows[i].npagesets = 0;
1640 flows[i].pagesets[0].mapped = 0;
1641 flows[i].resync_npkts = 0;
1642 }
1643 req->flows = flows;
1644 return 0;
1645 }
1646
1647 static void hfi1_init_trdma_req(struct rvt_qp *qp,
1648 struct tid_rdma_request *req)
1649 {
1650 struct hfi1_qp_priv *qpriv = qp->priv;
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662 req->qp = qp;
1663 req->rcd = qpriv->rcd;
1664 }
1665
1666 u64 hfi1_access_sw_tid_wait(const struct cntr_entry *entry,
1667 void *context, int vl, int mode, u64 data)
1668 {
1669 struct hfi1_devdata *dd = context;
1670
1671 return dd->verbs_dev.n_tidwait;
1672 }
1673
1674 static struct tid_rdma_flow *find_flow_ib(struct tid_rdma_request *req,
1675 u32 psn, u16 *fidx)
1676 {
1677 u16 head, tail;
1678 struct tid_rdma_flow *flow;
1679
1680 head = req->setup_head;
1681 tail = req->clear_tail;
1682 for ( ; CIRC_CNT(head, tail, MAX_FLOWS);
1683 tail = CIRC_NEXT(tail, MAX_FLOWS)) {
1684 flow = &req->flows[tail];
1685 if (cmp_psn(psn, flow->flow_state.ib_spsn) >= 0 &&
1686 cmp_psn(psn, flow->flow_state.ib_lpsn) <= 0) {
1687 if (fidx)
1688 *fidx = tail;
1689 return flow;
1690 }
1691 }
1692 return NULL;
1693 }
1694
1695
1696 u32 hfi1_build_tid_rdma_read_packet(struct rvt_swqe *wqe,
1697 struct ib_other_headers *ohdr, u32 *bth1,
1698 u32 *bth2, u32 *len)
1699 {
1700 struct tid_rdma_request *req = wqe_to_tid_req(wqe);
1701 struct tid_rdma_flow *flow = &req->flows[req->flow_idx];
1702 struct rvt_qp *qp = req->qp;
1703 struct hfi1_qp_priv *qpriv = qp->priv;
1704 struct hfi1_swqe_priv *wpriv = wqe->priv;
1705 struct tid_rdma_read_req *rreq = &ohdr->u.tid_rdma.r_req;
1706 struct tid_rdma_params *remote;
1707 u32 req_len = 0;
1708 void *req_addr = NULL;
1709
1710
1711 *bth2 = mask_psn(flow->flow_state.ib_spsn + flow->pkt);
1712 trace_hfi1_tid_flow_build_read_pkt(qp, req->flow_idx, flow);
1713
1714
1715 req_addr = &flow->tid_entry[flow->tid_idx];
1716 req_len = sizeof(*flow->tid_entry) *
1717 (flow->tidcnt - flow->tid_idx);
1718
1719 memset(&ohdr->u.tid_rdma.r_req, 0, sizeof(ohdr->u.tid_rdma.r_req));
1720 wpriv->ss.sge.vaddr = req_addr;
1721 wpriv->ss.sge.sge_length = req_len;
1722 wpriv->ss.sge.length = wpriv->ss.sge.sge_length;
1723
1724
1725
1726
1727 wpriv->ss.sge.mr = NULL;
1728 wpriv->ss.sge.m = 0;
1729 wpriv->ss.sge.n = 0;
1730
1731 wpriv->ss.sg_list = NULL;
1732 wpriv->ss.total_len = wpriv->ss.sge.sge_length;
1733 wpriv->ss.num_sge = 1;
1734
1735
1736 rcu_read_lock();
1737 remote = rcu_dereference(qpriv->tid_rdma.remote);
1738
1739 KDETH_RESET(rreq->kdeth0, KVER, 0x1);
1740 KDETH_RESET(rreq->kdeth1, JKEY, remote->jkey);
1741 rreq->reth.vaddr = cpu_to_be64(wqe->rdma_wr.remote_addr +
1742 req->cur_seg * req->seg_len + flow->sent);
1743 rreq->reth.rkey = cpu_to_be32(wqe->rdma_wr.rkey);
1744 rreq->reth.length = cpu_to_be32(*len);
1745 rreq->tid_flow_psn =
1746 cpu_to_be32((flow->flow_state.generation <<
1747 HFI1_KDETH_BTH_SEQ_SHIFT) |
1748 ((flow->flow_state.spsn + flow->pkt) &
1749 HFI1_KDETH_BTH_SEQ_MASK));
1750 rreq->tid_flow_qp =
1751 cpu_to_be32(qpriv->tid_rdma.local.qp |
1752 ((flow->idx & TID_RDMA_DESTQP_FLOW_MASK) <<
1753 TID_RDMA_DESTQP_FLOW_SHIFT) |
1754 qpriv->rcd->ctxt);
1755 rreq->verbs_qp = cpu_to_be32(qp->remote_qpn);
1756 *bth1 &= ~RVT_QPN_MASK;
1757 *bth1 |= remote->qp;
1758 *bth2 |= IB_BTH_REQ_ACK;
1759 rcu_read_unlock();
1760
1761
1762 flow->sent += *len;
1763 req->cur_seg++;
1764 qp->s_state = TID_OP(READ_REQ);
1765 req->ack_pending++;
1766 req->flow_idx = (req->flow_idx + 1) & (MAX_FLOWS - 1);
1767 qpriv->pending_tid_r_segs++;
1768 qp->s_num_rd_atomic++;
1769
1770
1771 *len = req_len;
1772
1773 return sizeof(ohdr->u.tid_rdma.r_req) / sizeof(u32);
1774 }
1775
1776
1777
1778
1779
1780 u32 hfi1_build_tid_rdma_read_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
1781 struct ib_other_headers *ohdr, u32 *bth1,
1782 u32 *bth2, u32 *len)
1783 __must_hold(&qp->s_lock)
1784 {
1785 struct hfi1_qp_priv *qpriv = qp->priv;
1786 struct tid_rdma_request *req = wqe_to_tid_req(wqe);
1787 struct tid_rdma_flow *flow = NULL;
1788 u32 hdwords = 0;
1789 bool last;
1790 bool retry = true;
1791 u32 npkts = rvt_div_round_up_mtu(qp, *len);
1792
1793 trace_hfi1_tid_req_build_read_req(qp, 0, wqe->wr.opcode, wqe->psn,
1794 wqe->lpsn, req);
1795
1796
1797
1798
1799 sync_check:
1800 if (req->state == TID_REQUEST_SYNC) {
1801 if (qpriv->pending_tid_r_segs)
1802 goto done;
1803
1804 hfi1_kern_clear_hw_flow(req->rcd, qp);
1805 qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
1806 req->state = TID_REQUEST_ACTIVE;
1807 }
1808
1809
1810
1811
1812
1813
1814 if (req->flow_idx == req->setup_head) {
1815 retry = false;
1816 if (req->state == TID_REQUEST_RESEND) {
1817
1818
1819
1820
1821
1822 restart_sge(&qp->s_sge, wqe, req->s_next_psn,
1823 qp->pmtu);
1824 req->isge = 0;
1825 req->state = TID_REQUEST_ACTIVE;
1826 }
1827
1828
1829
1830
1831
1832 if ((qpriv->flow_state.psn + npkts) > MAX_TID_FLOW_PSN - 1) {
1833 req->state = TID_REQUEST_SYNC;
1834 goto sync_check;
1835 }
1836
1837
1838 if (hfi1_kern_setup_hw_flow(qpriv->rcd, qp))
1839 goto done;
1840
1841
1842
1843
1844
1845 if (hfi1_kern_exp_rcv_setup(req, &qp->s_sge, &last)) {
1846 req->state = TID_REQUEST_QUEUED;
1847
1848
1849
1850
1851
1852 goto done;
1853 }
1854 }
1855
1856
1857 flow = &req->flows[req->flow_idx];
1858 flow->pkt = 0;
1859 flow->tid_idx = 0;
1860 flow->sent = 0;
1861 if (!retry) {
1862
1863 flow->flow_state.ib_spsn = req->s_next_psn;
1864 flow->flow_state.ib_lpsn =
1865 flow->flow_state.ib_spsn + flow->npkts - 1;
1866 }
1867
1868
1869 req->s_next_psn += flow->npkts;
1870
1871
1872 hdwords = hfi1_build_tid_rdma_read_packet(wqe, ohdr, bth1, bth2, len);
1873 done:
1874 return hdwords;
1875 }
1876
1877
1878
1879
1880
1881
1882 static int tid_rdma_rcv_read_request(struct rvt_qp *qp,
1883 struct rvt_ack_entry *e,
1884 struct hfi1_packet *packet,
1885 struct ib_other_headers *ohdr,
1886 u32 bth0, u32 psn, u64 vaddr, u32 len)
1887 {
1888 struct hfi1_qp_priv *qpriv = qp->priv;
1889 struct tid_rdma_request *req;
1890 struct tid_rdma_flow *flow;
1891 u32 flow_psn, i, tidlen = 0, pktlen, tlen;
1892
1893 req = ack_to_tid_req(e);
1894
1895
1896 flow = &req->flows[req->setup_head];
1897
1898
1899 pktlen = packet->tlen - (packet->hlen + 4);
1900 if (pktlen > sizeof(flow->tid_entry))
1901 return 1;
1902 memcpy(flow->tid_entry, packet->ebuf, pktlen);
1903 flow->tidcnt = pktlen / sizeof(*flow->tid_entry);
1904
1905
1906
1907
1908
1909 flow->npkts = rvt_div_round_up_mtu(qp, len);
1910 for (i = 0; i < flow->tidcnt; i++) {
1911 trace_hfi1_tid_entry_rcv_read_req(qp, i,
1912 flow->tid_entry[i]);
1913 tlen = EXP_TID_GET(flow->tid_entry[i], LEN);
1914 if (!tlen)
1915 return 1;
1916
1917
1918
1919
1920
1921
1922
1923 tidlen += tlen;
1924 }
1925 if (tidlen * PAGE_SIZE < len)
1926 return 1;
1927
1928
1929 req->clear_tail = req->setup_head;
1930 flow->pkt = 0;
1931 flow->tid_idx = 0;
1932 flow->tid_offset = 0;
1933 flow->sent = 0;
1934 flow->tid_qpn = be32_to_cpu(ohdr->u.tid_rdma.r_req.tid_flow_qp);
1935 flow->idx = (flow->tid_qpn >> TID_RDMA_DESTQP_FLOW_SHIFT) &
1936 TID_RDMA_DESTQP_FLOW_MASK;
1937 flow_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.r_req.tid_flow_psn));
1938 flow->flow_state.generation = flow_psn >> HFI1_KDETH_BTH_SEQ_SHIFT;
1939 flow->flow_state.spsn = flow_psn & HFI1_KDETH_BTH_SEQ_MASK;
1940 flow->length = len;
1941
1942 flow->flow_state.lpsn = flow->flow_state.spsn +
1943 flow->npkts - 1;
1944 flow->flow_state.ib_spsn = psn;
1945 flow->flow_state.ib_lpsn = flow->flow_state.ib_spsn + flow->npkts - 1;
1946
1947 trace_hfi1_tid_flow_rcv_read_req(qp, req->setup_head, flow);
1948
1949 req->flow_idx = req->setup_head;
1950
1951
1952 req->setup_head = (req->setup_head + 1) & (MAX_FLOWS - 1);
1953
1954
1955
1956
1957 e->opcode = (bth0 >> 24) & 0xff;
1958 e->psn = psn;
1959 e->lpsn = psn + flow->npkts - 1;
1960 e->sent = 0;
1961
1962 req->n_flows = qpriv->tid_rdma.local.max_read;
1963 req->state = TID_REQUEST_ACTIVE;
1964 req->cur_seg = 0;
1965 req->comp_seg = 0;
1966 req->ack_seg = 0;
1967 req->isge = 0;
1968 req->seg_len = qpriv->tid_rdma.local.max_len;
1969 req->total_len = len;
1970 req->total_segs = 1;
1971 req->r_flow_psn = e->psn;
1972
1973 trace_hfi1_tid_req_rcv_read_req(qp, 0, e->opcode, e->psn, e->lpsn,
1974 req);
1975 return 0;
1976 }
1977
1978 static int tid_rdma_rcv_error(struct hfi1_packet *packet,
1979 struct ib_other_headers *ohdr,
1980 struct rvt_qp *qp, u32 psn, int diff)
1981 {
1982 struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
1983 struct hfi1_ctxtdata *rcd = ((struct hfi1_qp_priv *)qp->priv)->rcd;
1984 struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
1985 struct hfi1_qp_priv *qpriv = qp->priv;
1986 struct rvt_ack_entry *e;
1987 struct tid_rdma_request *req;
1988 unsigned long flags;
1989 u8 prev;
1990 bool old_req;
1991
1992 trace_hfi1_rsp_tid_rcv_error(qp, psn);
1993 trace_hfi1_tid_rdma_rcv_err(qp, 0, psn, diff);
1994 if (diff > 0) {
1995
1996 if (!qp->r_nak_state) {
1997 ibp->rvp.n_rc_seqnak++;
1998 qp->r_nak_state = IB_NAK_PSN_ERROR;
1999 qp->r_ack_psn = qp->r_psn;
2000 rc_defered_ack(rcd, qp);
2001 }
2002 goto done;
2003 }
2004
2005 ibp->rvp.n_rc_dupreq++;
2006
2007 spin_lock_irqsave(&qp->s_lock, flags);
2008 e = find_prev_entry(qp, psn, &prev, NULL, &old_req);
2009 if (!e || (e->opcode != TID_OP(READ_REQ) &&
2010 e->opcode != TID_OP(WRITE_REQ)))
2011 goto unlock;
2012
2013 req = ack_to_tid_req(e);
2014 req->r_flow_psn = psn;
2015 trace_hfi1_tid_req_rcv_err(qp, 0, e->opcode, e->psn, e->lpsn, req);
2016 if (e->opcode == TID_OP(READ_REQ)) {
2017 struct ib_reth *reth;
2018 u32 len;
2019 u32 rkey;
2020 u64 vaddr;
2021 int ok;
2022 u32 bth0;
2023
2024 reth = &ohdr->u.tid_rdma.r_req.reth;
2025
2026
2027
2028
2029 len = be32_to_cpu(reth->length);
2030 if (psn != e->psn || len != req->total_len)
2031 goto unlock;
2032
2033 release_rdma_sge_mr(e);
2034
2035 rkey = be32_to_cpu(reth->rkey);
2036 vaddr = get_ib_reth_vaddr(reth);
2037
2038 qp->r_len = len;
2039 ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
2040 IB_ACCESS_REMOTE_READ);
2041 if (unlikely(!ok))
2042 goto unlock;
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054 bth0 = be32_to_cpu(ohdr->bth[0]);
2055 if (tid_rdma_rcv_read_request(qp, e, packet, ohdr, bth0, psn,
2056 vaddr, len))
2057 goto unlock;
2058
2059
2060
2061
2062
2063 if (old_req)
2064 goto unlock;
2065 } else {
2066 struct flow_state *fstate;
2067 bool schedule = false;
2068 u8 i;
2069
2070 if (req->state == TID_REQUEST_RESEND) {
2071 req->state = TID_REQUEST_RESEND_ACTIVE;
2072 } else if (req->state == TID_REQUEST_INIT_RESEND) {
2073 req->state = TID_REQUEST_INIT;
2074 schedule = true;
2075 }
2076
2077
2078
2079
2080
2081
2082
2083
2084 if (old_req || req->state == TID_REQUEST_INIT ||
2085 (req->state == TID_REQUEST_SYNC && !req->cur_seg)) {
2086 for (i = prev + 1; ; i++) {
2087 if (i > rvt_size_atomic(&dev->rdi))
2088 i = 0;
2089 if (i == qp->r_head_ack_queue)
2090 break;
2091 e = &qp->s_ack_queue[i];
2092 req = ack_to_tid_req(e);
2093 if (e->opcode == TID_OP(WRITE_REQ) &&
2094 req->state == TID_REQUEST_INIT)
2095 req->state = TID_REQUEST_INIT_RESEND;
2096 }
2097
2098
2099
2100
2101
2102
2103 if (!schedule)
2104 goto unlock;
2105 }
2106
2107
2108
2109
2110
2111 if (req->clear_tail == req->setup_head)
2112 goto schedule;
2113
2114
2115
2116
2117
2118
2119 if (CIRC_CNT(req->flow_idx, req->clear_tail, MAX_FLOWS)) {
2120 fstate = &req->flows[req->clear_tail].flow_state;
2121 qpriv->pending_tid_w_segs -=
2122 CIRC_CNT(req->flow_idx, req->clear_tail,
2123 MAX_FLOWS);
2124 req->flow_idx =
2125 CIRC_ADD(req->clear_tail,
2126 delta_psn(psn, fstate->resp_ib_psn),
2127 MAX_FLOWS);
2128 qpriv->pending_tid_w_segs +=
2129 delta_psn(psn, fstate->resp_ib_psn);
2130
2131
2132
2133
2134
2135
2136
2137 if (CIRC_CNT(req->setup_head, req->flow_idx,
2138 MAX_FLOWS)) {
2139 req->cur_seg = delta_psn(psn, e->psn);
2140 req->state = TID_REQUEST_RESEND_ACTIVE;
2141 }
2142 }
2143
2144 for (i = prev + 1; ; i++) {
2145
2146
2147
2148
2149 if (i > rvt_size_atomic(&dev->rdi))
2150 i = 0;
2151 if (i == qp->r_head_ack_queue)
2152 break;
2153 e = &qp->s_ack_queue[i];
2154 req = ack_to_tid_req(e);
2155 trace_hfi1_tid_req_rcv_err(qp, 0, e->opcode, e->psn,
2156 e->lpsn, req);
2157 if (e->opcode != TID_OP(WRITE_REQ) ||
2158 req->cur_seg == req->comp_seg ||
2159 req->state == TID_REQUEST_INIT ||
2160 req->state == TID_REQUEST_INIT_RESEND) {
2161 if (req->state == TID_REQUEST_INIT)
2162 req->state = TID_REQUEST_INIT_RESEND;
2163 continue;
2164 }
2165 qpriv->pending_tid_w_segs -=
2166 CIRC_CNT(req->flow_idx,
2167 req->clear_tail,
2168 MAX_FLOWS);
2169 req->flow_idx = req->clear_tail;
2170 req->state = TID_REQUEST_RESEND;
2171 req->cur_seg = req->comp_seg;
2172 }
2173 qpriv->s_flags &= ~HFI1_R_TID_WAIT_INTERLCK;
2174 }
2175
2176 if (qp->s_acked_ack_queue == qp->s_tail_ack_queue)
2177 qp->s_acked_ack_queue = prev;
2178 qp->s_tail_ack_queue = prev;
2179
2180
2181
2182
2183
2184
2185 qp->s_ack_state = OP(ACKNOWLEDGE);
2186 schedule:
2187
2188
2189
2190
2191 if (qpriv->rnr_nak_state) {
2192 qp->s_nak_state = 0;
2193 qpriv->rnr_nak_state = TID_RNR_NAK_INIT;
2194 qp->r_psn = e->lpsn + 1;
2195 hfi1_tid_write_alloc_resources(qp, true);
2196 }
2197
2198 qp->r_state = e->opcode;
2199 qp->r_nak_state = 0;
2200 qp->s_flags |= RVT_S_RESP_PENDING;
2201 hfi1_schedule_send(qp);
2202 unlock:
2203 spin_unlock_irqrestore(&qp->s_lock, flags);
2204 done:
2205 return 1;
2206 }
2207
2208 void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet)
2209 {
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223 struct hfi1_ctxtdata *rcd = packet->rcd;
2224 struct rvt_qp *qp = packet->qp;
2225 struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
2226 struct ib_other_headers *ohdr = packet->ohdr;
2227 struct rvt_ack_entry *e;
2228 unsigned long flags;
2229 struct ib_reth *reth;
2230 struct hfi1_qp_priv *qpriv = qp->priv;
2231 u32 bth0, psn, len, rkey;
2232 bool fecn;
2233 u8 next;
2234 u64 vaddr;
2235 int diff;
2236 u8 nack_state = IB_NAK_INVALID_REQUEST;
2237
2238 bth0 = be32_to_cpu(ohdr->bth[0]);
2239 if (hfi1_ruc_check_hdr(ibp, packet))
2240 return;
2241
2242 fecn = process_ecn(qp, packet);
2243 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
2244 trace_hfi1_rsp_rcv_tid_read_req(qp, psn);
2245
2246 if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
2247 rvt_comm_est(qp);
2248
2249 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
2250 goto nack_inv;
2251
2252 reth = &ohdr->u.tid_rdma.r_req.reth;
2253 vaddr = be64_to_cpu(reth->vaddr);
2254 len = be32_to_cpu(reth->length);
2255
2256 if (!len || len & ~PAGE_MASK || len > qpriv->tid_rdma.local.max_len)
2257 goto nack_inv;
2258
2259 diff = delta_psn(psn, qp->r_psn);
2260 if (unlikely(diff)) {
2261 tid_rdma_rcv_err(packet, ohdr, qp, psn, diff, fecn);
2262 return;
2263 }
2264
2265
2266 next = qp->r_head_ack_queue + 1;
2267 if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
2268 next = 0;
2269 spin_lock_irqsave(&qp->s_lock, flags);
2270 if (unlikely(next == qp->s_tail_ack_queue)) {
2271 if (!qp->s_ack_queue[next].sent) {
2272 nack_state = IB_NAK_REMOTE_OPERATIONAL_ERROR;
2273 goto nack_inv_unlock;
2274 }
2275 update_ack_queue(qp, next);
2276 }
2277 e = &qp->s_ack_queue[qp->r_head_ack_queue];
2278 release_rdma_sge_mr(e);
2279
2280 rkey = be32_to_cpu(reth->rkey);
2281 qp->r_len = len;
2282
2283 if (unlikely(!rvt_rkey_ok(qp, &e->rdma_sge, qp->r_len, vaddr,
2284 rkey, IB_ACCESS_REMOTE_READ)))
2285 goto nack_acc;
2286
2287
2288 if (tid_rdma_rcv_read_request(qp, e, packet, ohdr, bth0, psn, vaddr,
2289 len))
2290 goto nack_inv_unlock;
2291
2292 qp->r_state = e->opcode;
2293 qp->r_nak_state = 0;
2294
2295
2296
2297
2298
2299 qp->r_msn++;
2300 qp->r_psn += e->lpsn - e->psn + 1;
2301
2302 qp->r_head_ack_queue = next;
2303
2304
2305
2306
2307
2308
2309
2310 qpriv->r_tid_alloc = qp->r_head_ack_queue;
2311
2312
2313 qp->s_flags |= RVT_S_RESP_PENDING;
2314 if (fecn)
2315 qp->s_flags |= RVT_S_ECN;
2316 hfi1_schedule_send(qp);
2317
2318 spin_unlock_irqrestore(&qp->s_lock, flags);
2319 return;
2320
2321 nack_inv_unlock:
2322 spin_unlock_irqrestore(&qp->s_lock, flags);
2323 nack_inv:
2324 rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
2325 qp->r_nak_state = nack_state;
2326 qp->r_ack_psn = qp->r_psn;
2327
2328 rc_defered_ack(rcd, qp);
2329 return;
2330 nack_acc:
2331 spin_unlock_irqrestore(&qp->s_lock, flags);
2332 rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
2333 qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
2334 qp->r_ack_psn = qp->r_psn;
2335 }
2336
2337 u32 hfi1_build_tid_rdma_read_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
2338 struct ib_other_headers *ohdr, u32 *bth0,
2339 u32 *bth1, u32 *bth2, u32 *len, bool *last)
2340 {
2341 struct hfi1_ack_priv *epriv = e->priv;
2342 struct tid_rdma_request *req = &epriv->tid_req;
2343 struct hfi1_qp_priv *qpriv = qp->priv;
2344 struct tid_rdma_flow *flow = &req->flows[req->clear_tail];
2345 u32 tidentry = flow->tid_entry[flow->tid_idx];
2346 u32 tidlen = EXP_TID_GET(tidentry, LEN) << PAGE_SHIFT;
2347 struct tid_rdma_read_resp *resp = &ohdr->u.tid_rdma.r_rsp;
2348 u32 next_offset, om = KDETH_OM_LARGE;
2349 bool last_pkt;
2350 u32 hdwords = 0;
2351 struct tid_rdma_params *remote;
2352
2353 *len = min_t(u32, qp->pmtu, tidlen - flow->tid_offset);
2354 flow->sent += *len;
2355 next_offset = flow->tid_offset + *len;
2356 last_pkt = (flow->sent >= flow->length);
2357
2358 trace_hfi1_tid_entry_build_read_resp(qp, flow->tid_idx, tidentry);
2359 trace_hfi1_tid_flow_build_read_resp(qp, req->clear_tail, flow);
2360
2361 rcu_read_lock();
2362 remote = rcu_dereference(qpriv->tid_rdma.remote);
2363 if (!remote) {
2364 rcu_read_unlock();
2365 goto done;
2366 }
2367 KDETH_RESET(resp->kdeth0, KVER, 0x1);
2368 KDETH_SET(resp->kdeth0, SH, !last_pkt);
2369 KDETH_SET(resp->kdeth0, INTR, !!(!last_pkt && remote->urg));
2370 KDETH_SET(resp->kdeth0, TIDCTRL, EXP_TID_GET(tidentry, CTRL));
2371 KDETH_SET(resp->kdeth0, TID, EXP_TID_GET(tidentry, IDX));
2372 KDETH_SET(resp->kdeth0, OM, om == KDETH_OM_LARGE);
2373 KDETH_SET(resp->kdeth0, OFFSET, flow->tid_offset / om);
2374 KDETH_RESET(resp->kdeth1, JKEY, remote->jkey);
2375 resp->verbs_qp = cpu_to_be32(qp->remote_qpn);
2376 rcu_read_unlock();
2377
2378 resp->aeth = rvt_compute_aeth(qp);
2379 resp->verbs_psn = cpu_to_be32(mask_psn(flow->flow_state.ib_spsn +
2380 flow->pkt));
2381
2382 *bth0 = TID_OP(READ_RESP) << 24;
2383 *bth1 = flow->tid_qpn;
2384 *bth2 = mask_psn(((flow->flow_state.spsn + flow->pkt++) &
2385 HFI1_KDETH_BTH_SEQ_MASK) |
2386 (flow->flow_state.generation <<
2387 HFI1_KDETH_BTH_SEQ_SHIFT));
2388 *last = last_pkt;
2389 if (last_pkt)
2390
2391 req->clear_tail = (req->clear_tail + 1) &
2392 (MAX_FLOWS - 1);
2393
2394 if (next_offset >= tidlen) {
2395 flow->tid_offset = 0;
2396 flow->tid_idx++;
2397 } else {
2398 flow->tid_offset = next_offset;
2399 }
2400
2401 hdwords = sizeof(ohdr->u.tid_rdma.r_rsp) / sizeof(u32);
2402
2403 done:
2404 return hdwords;
2405 }
2406
2407 static inline struct tid_rdma_request *
2408 find_tid_request(struct rvt_qp *qp, u32 psn, enum ib_wr_opcode opcode)
2409 __must_hold(&qp->s_lock)
2410 {
2411 struct rvt_swqe *wqe;
2412 struct tid_rdma_request *req = NULL;
2413 u32 i, end;
2414
2415 end = qp->s_cur + 1;
2416 if (end == qp->s_size)
2417 end = 0;
2418 for (i = qp->s_acked; i != end;) {
2419 wqe = rvt_get_swqe_ptr(qp, i);
2420 if (cmp_psn(psn, wqe->psn) >= 0 &&
2421 cmp_psn(psn, wqe->lpsn) <= 0) {
2422 if (wqe->wr.opcode == opcode)
2423 req = wqe_to_tid_req(wqe);
2424 break;
2425 }
2426 if (++i == qp->s_size)
2427 i = 0;
2428 }
2429
2430 return req;
2431 }
2432
2433 void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet)
2434 {
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444 struct ib_other_headers *ohdr = packet->ohdr;
2445 struct rvt_qp *qp = packet->qp;
2446 struct hfi1_qp_priv *priv = qp->priv;
2447 struct hfi1_ctxtdata *rcd = packet->rcd;
2448 struct tid_rdma_request *req;
2449 struct tid_rdma_flow *flow;
2450 u32 opcode, aeth;
2451 bool fecn;
2452 unsigned long flags;
2453 u32 kpsn, ipsn;
2454
2455 trace_hfi1_sender_rcv_tid_read_resp(qp);
2456 fecn = process_ecn(qp, packet);
2457 kpsn = mask_psn(be32_to_cpu(ohdr->bth[2]));
2458 aeth = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.aeth);
2459 opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
2460
2461 spin_lock_irqsave(&qp->s_lock, flags);
2462 ipsn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_psn));
2463 req = find_tid_request(qp, ipsn, IB_WR_TID_RDMA_READ);
2464 if (unlikely(!req))
2465 goto ack_op_err;
2466
2467 flow = &req->flows[req->clear_tail];
2468
2469 if (cmp_psn(ipsn, flow->flow_state.ib_lpsn)) {
2470 update_r_next_psn_fecn(packet, priv, rcd, flow, fecn);
2471
2472 if (cmp_psn(kpsn, flow->flow_state.r_next_psn))
2473 goto ack_done;
2474 flow->flow_state.r_next_psn = mask_psn(kpsn + 1);
2475
2476
2477
2478
2479
2480
2481
2482 if (fecn && packet->etype == RHF_RCV_TYPE_EAGER) {
2483 struct rvt_sge_state ss;
2484 u32 len;
2485 u32 tlen = packet->tlen;
2486 u16 hdrsize = packet->hlen;
2487 u8 pad = packet->pad;
2488 u8 extra_bytes = pad + packet->extra_byte +
2489 (SIZE_OF_CRC << 2);
2490 u32 pmtu = qp->pmtu;
2491
2492 if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
2493 goto ack_op_err;
2494 len = restart_sge(&ss, req->e.swqe, ipsn, pmtu);
2495 if (unlikely(len < pmtu))
2496 goto ack_op_err;
2497 rvt_copy_sge(qp, &ss, packet->payload, pmtu, false,
2498 false);
2499
2500 priv->s_flags |= HFI1_R_TID_SW_PSN;
2501 }
2502
2503 goto ack_done;
2504 }
2505 flow->flow_state.r_next_psn = mask_psn(kpsn + 1);
2506 req->ack_pending--;
2507 priv->pending_tid_r_segs--;
2508 qp->s_num_rd_atomic--;
2509 if ((qp->s_flags & RVT_S_WAIT_FENCE) &&
2510 !qp->s_num_rd_atomic) {
2511 qp->s_flags &= ~(RVT_S_WAIT_FENCE |
2512 RVT_S_WAIT_ACK);
2513 hfi1_schedule_send(qp);
2514 }
2515 if (qp->s_flags & RVT_S_WAIT_RDMAR) {
2516 qp->s_flags &= ~(RVT_S_WAIT_RDMAR | RVT_S_WAIT_ACK);
2517 hfi1_schedule_send(qp);
2518 }
2519
2520 trace_hfi1_ack(qp, ipsn);
2521 trace_hfi1_tid_req_rcv_read_resp(qp, 0, req->e.swqe->wr.opcode,
2522 req->e.swqe->psn, req->e.swqe->lpsn,
2523 req);
2524 trace_hfi1_tid_flow_rcv_read_resp(qp, req->clear_tail, flow);
2525
2526
2527 hfi1_kern_exp_rcv_clear(req);
2528
2529 if (!do_rc_ack(qp, aeth, ipsn, opcode, 0, rcd))
2530 goto ack_done;
2531
2532
2533 if (++req->comp_seg >= req->total_segs) {
2534 priv->tid_r_comp++;
2535 req->state = TID_REQUEST_COMPLETE;
2536 }
2537
2538
2539
2540
2541
2542
2543 if ((req->state == TID_REQUEST_SYNC &&
2544 req->comp_seg == req->cur_seg) ||
2545 priv->tid_r_comp == priv->tid_r_reqs) {
2546 hfi1_kern_clear_hw_flow(priv->rcd, qp);
2547 priv->s_flags &= ~HFI1_R_TID_SW_PSN;
2548 if (req->state == TID_REQUEST_SYNC)
2549 req->state = TID_REQUEST_ACTIVE;
2550 }
2551
2552 hfi1_schedule_send(qp);
2553 goto ack_done;
2554
2555 ack_op_err:
2556
2557
2558
2559
2560
2561
2562
2563
2564 if (qp->s_last == qp->s_acked)
2565 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
2566
2567 ack_done:
2568 spin_unlock_irqrestore(&qp->s_lock, flags);
2569 }
2570
2571 void hfi1_kern_read_tid_flow_free(struct rvt_qp *qp)
2572 __must_hold(&qp->s_lock)
2573 {
2574 u32 n = qp->s_acked;
2575 struct rvt_swqe *wqe;
2576 struct tid_rdma_request *req;
2577 struct hfi1_qp_priv *priv = qp->priv;
2578
2579 lockdep_assert_held(&qp->s_lock);
2580
2581 while (n != qp->s_tail) {
2582 wqe = rvt_get_swqe_ptr(qp, n);
2583 if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
2584 req = wqe_to_tid_req(wqe);
2585 hfi1_kern_exp_rcv_clear_all(req);
2586 }
2587
2588 if (++n == qp->s_size)
2589 n = 0;
2590 }
2591
2592 hfi1_kern_clear_hw_flow(priv->rcd, qp);
2593 }
2594
2595 static bool tid_rdma_tid_err(struct hfi1_packet *packet, u8 rcv_type)
2596 {
2597 struct rvt_qp *qp = packet->qp;
2598
2599 if (rcv_type >= RHF_RCV_TYPE_IB)
2600 goto done;
2601
2602 spin_lock(&qp->s_lock);
2603
2604
2605
2606
2607
2608
2609
2610
2611 if (rcv_type == RHF_RCV_TYPE_EAGER) {
2612 hfi1_restart_rc(qp, qp->s_last_psn + 1, 1);
2613 hfi1_schedule_send(qp);
2614 }
2615
2616
2617 spin_unlock(&qp->s_lock);
2618 done:
2619 return true;
2620 }
2621
2622 static void restart_tid_rdma_read_req(struct hfi1_ctxtdata *rcd,
2623 struct rvt_qp *qp, struct rvt_swqe *wqe)
2624 {
2625 struct tid_rdma_request *req;
2626 struct tid_rdma_flow *flow;
2627
2628
2629 qp->r_flags |= RVT_R_RDMAR_SEQ;
2630 req = wqe_to_tid_req(wqe);
2631 flow = &req->flows[req->clear_tail];
2632 hfi1_restart_rc(qp, flow->flow_state.ib_spsn, 0);
2633 if (list_empty(&qp->rspwait)) {
2634 qp->r_flags |= RVT_R_RSP_SEND;
2635 rvt_get_qp(qp);
2636 list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
2637 }
2638 }
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648 static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd,
2649 struct hfi1_packet *packet, u8 rcv_type,
2650 u8 rte, u32 psn, u32 ibpsn)
2651 __must_hold(&packet->qp->r_lock) __must_hold(RCU)
2652 {
2653 struct hfi1_pportdata *ppd = rcd->ppd;
2654 struct hfi1_devdata *dd = ppd->dd;
2655 struct hfi1_ibport *ibp;
2656 struct rvt_swqe *wqe;
2657 struct tid_rdma_request *req;
2658 struct tid_rdma_flow *flow;
2659 u32 ack_psn;
2660 struct rvt_qp *qp = packet->qp;
2661 struct hfi1_qp_priv *priv = qp->priv;
2662 bool ret = true;
2663 int diff = 0;
2664 u32 fpsn;
2665
2666 lockdep_assert_held(&qp->r_lock);
2667 trace_hfi1_rsp_read_kdeth_eflags(qp, ibpsn);
2668 trace_hfi1_sender_read_kdeth_eflags(qp);
2669 trace_hfi1_tid_read_sender_kdeth_eflags(qp, 0);
2670 spin_lock(&qp->s_lock);
2671
2672 if (cmp_psn(ibpsn, qp->s_last_psn) < 0 ||
2673 cmp_psn(ibpsn, qp->s_psn) > 0)
2674 goto s_unlock;
2675
2676
2677
2678
2679
2680
2681 ack_psn = ibpsn - 1;
2682 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
2683 ibp = to_iport(qp->ibqp.device, qp->port_num);
2684
2685
2686 while ((int)delta_psn(ack_psn, wqe->lpsn) >= 0) {
2687
2688
2689
2690
2691
2692 if (wqe->wr.opcode == IB_WR_RDMA_READ ||
2693 wqe->wr.opcode == IB_WR_TID_RDMA_READ ||
2694 wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
2695 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
2696
2697 if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
2698 qp->r_flags |= RVT_R_RDMAR_SEQ;
2699 if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
2700 restart_tid_rdma_read_req(rcd, qp,
2701 wqe);
2702 } else {
2703 hfi1_restart_rc(qp, qp->s_last_psn + 1,
2704 0);
2705 if (list_empty(&qp->rspwait)) {
2706 qp->r_flags |= RVT_R_RSP_SEND;
2707 rvt_get_qp(qp);
2708 list_add_tail(
2709 &qp->rspwait,
2710 &rcd->qp_wait_list);
2711 }
2712 }
2713 }
2714
2715
2716
2717
2718 break;
2719 }
2720
2721 wqe = do_rc_completion(qp, wqe, ibp);
2722 if (qp->s_acked == qp->s_tail)
2723 goto s_unlock;
2724 }
2725
2726 if (qp->s_acked == qp->s_tail)
2727 goto s_unlock;
2728
2729
2730 if (wqe->wr.opcode != IB_WR_TID_RDMA_READ)
2731 goto s_unlock;
2732
2733 req = wqe_to_tid_req(wqe);
2734 trace_hfi1_tid_req_read_kdeth_eflags(qp, 0, wqe->wr.opcode, wqe->psn,
2735 wqe->lpsn, req);
2736 switch (rcv_type) {
2737 case RHF_RCV_TYPE_EXPECTED:
2738 switch (rte) {
2739 case RHF_RTE_EXPECTED_FLOW_SEQ_ERR:
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749 flow = &req->flows[req->clear_tail];
2750 trace_hfi1_tid_flow_read_kdeth_eflags(qp,
2751 req->clear_tail,
2752 flow);
2753 if (priv->s_flags & HFI1_R_TID_SW_PSN) {
2754 diff = cmp_psn(psn,
2755 flow->flow_state.r_next_psn);
2756 if (diff > 0) {
2757
2758 goto s_unlock;
2759 } else if (diff < 0) {
2760
2761
2762
2763
2764
2765 if (qp->r_flags & RVT_R_RDMAR_SEQ)
2766 qp->r_flags &=
2767 ~RVT_R_RDMAR_SEQ;
2768
2769
2770 goto s_unlock;
2771 }
2772
2773
2774
2775
2776
2777
2778 fpsn = full_flow_psn(flow,
2779 flow->flow_state.lpsn);
2780 if (cmp_psn(fpsn, psn) == 0) {
2781 ret = false;
2782 if (qp->r_flags & RVT_R_RDMAR_SEQ)
2783 qp->r_flags &=
2784 ~RVT_R_RDMAR_SEQ;
2785 }
2786 flow->flow_state.r_next_psn =
2787 mask_psn(psn + 1);
2788 } else {
2789 u32 last_psn;
2790
2791 last_psn = read_r_next_psn(dd, rcd->ctxt,
2792 flow->idx);
2793 flow->flow_state.r_next_psn = last_psn;
2794 priv->s_flags |= HFI1_R_TID_SW_PSN;
2795
2796
2797
2798
2799 if (!(qp->r_flags & RVT_R_RDMAR_SEQ))
2800 restart_tid_rdma_read_req(rcd, qp,
2801 wqe);
2802 }
2803
2804 break;
2805
2806 case RHF_RTE_EXPECTED_FLOW_GEN_ERR:
2807
2808
2809
2810
2811 break;
2812
2813 default:
2814 break;
2815 }
2816 break;
2817
2818 case RHF_RCV_TYPE_ERROR:
2819 switch (rte) {
2820 case RHF_RTE_ERROR_OP_CODE_ERR:
2821 case RHF_RTE_ERROR_KHDR_MIN_LEN_ERR:
2822 case RHF_RTE_ERROR_KHDR_HCRC_ERR:
2823 case RHF_RTE_ERROR_KHDR_KVER_ERR:
2824 case RHF_RTE_ERROR_CONTEXT_ERR:
2825 case RHF_RTE_ERROR_KHDR_TID_ERR:
2826 default:
2827 break;
2828 }
2829 default:
2830 break;
2831 }
2832 s_unlock:
2833 spin_unlock(&qp->s_lock);
2834 return ret;
2835 }
2836
2837 bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
2838 struct hfi1_pportdata *ppd,
2839 struct hfi1_packet *packet)
2840 {
2841 struct hfi1_ibport *ibp = &ppd->ibport_data;
2842 struct hfi1_devdata *dd = ppd->dd;
2843 struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
2844 u8 rcv_type = rhf_rcv_type(packet->rhf);
2845 u8 rte = rhf_rcv_type_err(packet->rhf);
2846 struct ib_header *hdr = packet->hdr;
2847 struct ib_other_headers *ohdr = NULL;
2848 int lnh = be16_to_cpu(hdr->lrh[0]) & 3;
2849 u16 lid = be16_to_cpu(hdr->lrh[1]);
2850 u8 opcode;
2851 u32 qp_num, psn, ibpsn;
2852 struct rvt_qp *qp;
2853 struct hfi1_qp_priv *qpriv;
2854 unsigned long flags;
2855 bool ret = true;
2856 struct rvt_ack_entry *e;
2857 struct tid_rdma_request *req;
2858 struct tid_rdma_flow *flow;
2859 int diff = 0;
2860
2861 trace_hfi1_msg_handle_kdeth_eflags(NULL, "Kdeth error: rhf ",
2862 packet->rhf);
2863 if (packet->rhf & RHF_ICRC_ERR)
2864 return ret;
2865
2866 packet->ohdr = &hdr->u.oth;
2867 ohdr = packet->ohdr;
2868 trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
2869
2870
2871 qp_num = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_qp) &
2872 RVT_QPN_MASK;
2873 if (lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
2874 goto drop;
2875
2876 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
2877 opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
2878
2879 rcu_read_lock();
2880 qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
2881 if (!qp)
2882 goto rcu_unlock;
2883
2884 packet->qp = qp;
2885
2886
2887 spin_lock_irqsave(&qp->r_lock, flags);
2888 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
2889 ibp->rvp.n_pkt_drops++;
2890 goto r_unlock;
2891 }
2892
2893 if (packet->rhf & RHF_TID_ERR) {
2894
2895 u32 tlen = rhf_pkt_len(packet->rhf);
2896
2897
2898 if (tlen < 24)
2899 goto r_unlock;
2900
2901
2902
2903
2904
2905 if (lnh == HFI1_LRH_GRH)
2906 goto r_unlock;
2907
2908 if (tid_rdma_tid_err(packet, rcv_type))
2909 goto r_unlock;
2910 }
2911
2912
2913 if (opcode == TID_OP(READ_RESP)) {
2914 ibpsn = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_psn);
2915 ibpsn = mask_psn(ibpsn);
2916 ret = handle_read_kdeth_eflags(rcd, packet, rcv_type, rte, psn,
2917 ibpsn);
2918 goto r_unlock;
2919 }
2920
2921
2922
2923
2924
2925
2926 spin_lock(&qp->s_lock);
2927 qpriv = qp->priv;
2928 if (qpriv->r_tid_tail == HFI1_QP_WQE_INVALID ||
2929 qpriv->r_tid_tail == qpriv->r_tid_head)
2930 goto unlock;
2931 e = &qp->s_ack_queue[qpriv->r_tid_tail];
2932 if (e->opcode != TID_OP(WRITE_REQ))
2933 goto unlock;
2934 req = ack_to_tid_req(e);
2935 if (req->comp_seg == req->cur_seg)
2936 goto unlock;
2937 flow = &req->flows[req->clear_tail];
2938 trace_hfi1_eflags_err_write(qp, rcv_type, rte, psn);
2939 trace_hfi1_rsp_handle_kdeth_eflags(qp, psn);
2940 trace_hfi1_tid_write_rsp_handle_kdeth_eflags(qp);
2941 trace_hfi1_tid_req_handle_kdeth_eflags(qp, 0, e->opcode, e->psn,
2942 e->lpsn, req);
2943 trace_hfi1_tid_flow_handle_kdeth_eflags(qp, req->clear_tail, flow);
2944
2945 switch (rcv_type) {
2946 case RHF_RCV_TYPE_EXPECTED:
2947 switch (rte) {
2948 case RHF_RTE_EXPECTED_FLOW_SEQ_ERR:
2949 if (!(qpriv->s_flags & HFI1_R_TID_SW_PSN)) {
2950 qpriv->s_flags |= HFI1_R_TID_SW_PSN;
2951 flow->flow_state.r_next_psn =
2952 read_r_next_psn(dd, rcd->ctxt,
2953 flow->idx);
2954 qpriv->r_next_psn_kdeth =
2955 flow->flow_state.r_next_psn;
2956 goto nak_psn;
2957 } else {
2958
2959
2960
2961
2962
2963
2964
2965
2966 diff = cmp_psn(psn,
2967 flow->flow_state.r_next_psn);
2968 if (diff > 0)
2969 goto nak_psn;
2970 else if (diff < 0)
2971 break;
2972
2973 qpriv->s_nak_state = 0;
2974
2975
2976
2977
2978
2979 if (psn == full_flow_psn(flow,
2980 flow->flow_state.lpsn))
2981 ret = false;
2982 flow->flow_state.r_next_psn =
2983 mask_psn(psn + 1);
2984 qpriv->r_next_psn_kdeth =
2985 flow->flow_state.r_next_psn;
2986 }
2987 break;
2988
2989 case RHF_RTE_EXPECTED_FLOW_GEN_ERR:
2990 goto nak_psn;
2991
2992 default:
2993 break;
2994 }
2995 break;
2996
2997 case RHF_RCV_TYPE_ERROR:
2998 switch (rte) {
2999 case RHF_RTE_ERROR_OP_CODE_ERR:
3000 case RHF_RTE_ERROR_KHDR_MIN_LEN_ERR:
3001 case RHF_RTE_ERROR_KHDR_HCRC_ERR:
3002 case RHF_RTE_ERROR_KHDR_KVER_ERR:
3003 case RHF_RTE_ERROR_CONTEXT_ERR:
3004 case RHF_RTE_ERROR_KHDR_TID_ERR:
3005 default:
3006 break;
3007 }
3008 default:
3009 break;
3010 }
3011
3012 unlock:
3013 spin_unlock(&qp->s_lock);
3014 r_unlock:
3015 spin_unlock_irqrestore(&qp->r_lock, flags);
3016 rcu_unlock:
3017 rcu_read_unlock();
3018 drop:
3019 return ret;
3020 nak_psn:
3021 ibp->rvp.n_rc_seqnak++;
3022 if (!qpriv->s_nak_state) {
3023 qpriv->s_nak_state = IB_NAK_PSN_ERROR;
3024
3025 qpriv->s_nak_psn = mask_psn(flow->flow_state.r_next_psn);
3026 tid_rdma_trigger_ack(qp);
3027 }
3028 goto unlock;
3029 }
3030
3031
3032
3033
3034
3035
3036
3037 void hfi1_tid_rdma_restart_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
3038 u32 *bth2)
3039 {
3040 struct tid_rdma_request *req = wqe_to_tid_req(wqe);
3041 struct tid_rdma_flow *flow;
3042 struct hfi1_qp_priv *qpriv = qp->priv;
3043 int diff, delta_pkts;
3044 u32 tididx = 0, i;
3045 u16 fidx;
3046
3047 if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
3048 *bth2 = mask_psn(qp->s_psn);
3049 flow = find_flow_ib(req, *bth2, &fidx);
3050 if (!flow) {
3051 trace_hfi1_msg_tid_restart_req(
3052 qp, "!!!!!! Could not find flow to restart: bth2 ",
3053 (u64)*bth2);
3054 trace_hfi1_tid_req_restart_req(qp, 0, wqe->wr.opcode,
3055 wqe->psn, wqe->lpsn,
3056 req);
3057 return;
3058 }
3059 } else {
3060 fidx = req->acked_tail;
3061 flow = &req->flows[fidx];
3062 *bth2 = mask_psn(req->r_ack_psn);
3063 }
3064
3065 if (wqe->wr.opcode == IB_WR_TID_RDMA_READ)
3066 delta_pkts = delta_psn(*bth2, flow->flow_state.ib_spsn);
3067 else
3068 delta_pkts = delta_psn(*bth2,
3069 full_flow_psn(flow,
3070 flow->flow_state.spsn));
3071
3072 trace_hfi1_tid_flow_restart_req(qp, fidx, flow);
3073 diff = delta_pkts + flow->resync_npkts;
3074
3075 flow->sent = 0;
3076 flow->pkt = 0;
3077 flow->tid_idx = 0;
3078 flow->tid_offset = 0;
3079 if (diff) {
3080 for (tididx = 0; tididx < flow->tidcnt; tididx++) {
3081 u32 tidentry = flow->tid_entry[tididx], tidlen,
3082 tidnpkts, npkts;
3083
3084 flow->tid_offset = 0;
3085 tidlen = EXP_TID_GET(tidentry, LEN) * PAGE_SIZE;
3086 tidnpkts = rvt_div_round_up_mtu(qp, tidlen);
3087 npkts = min_t(u32, diff, tidnpkts);
3088 flow->pkt += npkts;
3089 flow->sent += (npkts == tidnpkts ? tidlen :
3090 npkts * qp->pmtu);
3091 flow->tid_offset += npkts * qp->pmtu;
3092 diff -= npkts;
3093 if (!diff)
3094 break;
3095 }
3096 }
3097 if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE) {
3098 rvt_skip_sge(&qpriv->tid_ss, (req->cur_seg * req->seg_len) +
3099 flow->sent, 0);
3100
3101
3102
3103
3104
3105
3106
3107 flow->pkt -= flow->resync_npkts;
3108 }
3109
3110 if (flow->tid_offset ==
3111 EXP_TID_GET(flow->tid_entry[tididx], LEN) * PAGE_SIZE) {
3112 tididx++;
3113 flow->tid_offset = 0;
3114 }
3115 flow->tid_idx = tididx;
3116 if (wqe->wr.opcode == IB_WR_TID_RDMA_READ)
3117
3118 req->flow_idx = fidx;
3119 else
3120 req->clear_tail = fidx;
3121
3122 trace_hfi1_tid_flow_restart_req(qp, fidx, flow);
3123 trace_hfi1_tid_req_restart_req(qp, 0, wqe->wr.opcode, wqe->psn,
3124 wqe->lpsn, req);
3125 req->state = TID_REQUEST_ACTIVE;
3126 if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE) {
3127
3128 fidx = CIRC_NEXT(fidx, MAX_FLOWS);
3129 i = qpriv->s_tid_tail;
3130 do {
3131 for (; CIRC_CNT(req->setup_head, fidx, MAX_FLOWS);
3132 fidx = CIRC_NEXT(fidx, MAX_FLOWS)) {
3133 req->flows[fidx].sent = 0;
3134 req->flows[fidx].pkt = 0;
3135 req->flows[fidx].tid_idx = 0;
3136 req->flows[fidx].tid_offset = 0;
3137 req->flows[fidx].resync_npkts = 0;
3138 }
3139 if (i == qpriv->s_tid_cur)
3140 break;
3141 do {
3142 i = (++i == qp->s_size ? 0 : i);
3143 wqe = rvt_get_swqe_ptr(qp, i);
3144 } while (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE);
3145 req = wqe_to_tid_req(wqe);
3146 req->cur_seg = req->ack_seg;
3147 fidx = req->acked_tail;
3148
3149 req->clear_tail = fidx;
3150 } while (1);
3151 }
3152 }
3153
3154 void hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp)
3155 {
3156 int i, ret;
3157 struct hfi1_qp_priv *qpriv = qp->priv;
3158 struct tid_flow_state *fs;
3159
3160 if (qp->ibqp.qp_type != IB_QPT_RC || !HFI1_CAP_IS_KSET(TID_RDMA))
3161 return;
3162
3163
3164
3165
3166
3167 fs = &qpriv->flow_state;
3168 if (fs->index != RXE_NUM_TID_FLOWS)
3169 hfi1_kern_clear_hw_flow(qpriv->rcd, qp);
3170
3171 for (i = qp->s_acked; i != qp->s_head;) {
3172 struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, i);
3173
3174 if (++i == qp->s_size)
3175 i = 0;
3176
3177 if (wqe->wr.opcode != IB_WR_TID_RDMA_READ)
3178 continue;
3179 do {
3180 struct hfi1_swqe_priv *priv = wqe->priv;
3181
3182 ret = hfi1_kern_exp_rcv_clear(&priv->tid_req);
3183 } while (!ret);
3184 }
3185 for (i = qp->s_acked_ack_queue; i != qp->r_head_ack_queue;) {
3186 struct rvt_ack_entry *e = &qp->s_ack_queue[i];
3187
3188 if (++i == rvt_max_atomic(ib_to_rvt(qp->ibqp.device)))
3189 i = 0;
3190
3191 if (e->opcode != TID_OP(WRITE_REQ))
3192 continue;
3193 do {
3194 struct hfi1_ack_priv *priv = e->priv;
3195
3196 ret = hfi1_kern_exp_rcv_clear(&priv->tid_req);
3197 } while (!ret);
3198 }
3199 }
3200
3201 bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
3202 {
3203 struct rvt_swqe *prev;
3204 struct hfi1_qp_priv *priv = qp->priv;
3205 u32 s_prev;
3206 struct tid_rdma_request *req;
3207
3208 s_prev = (qp->s_cur == 0 ? qp->s_size : qp->s_cur) - 1;
3209 prev = rvt_get_swqe_ptr(qp, s_prev);
3210
3211 switch (wqe->wr.opcode) {
3212 case IB_WR_SEND:
3213 case IB_WR_SEND_WITH_IMM:
3214 case IB_WR_SEND_WITH_INV:
3215 case IB_WR_ATOMIC_CMP_AND_SWP:
3216 case IB_WR_ATOMIC_FETCH_AND_ADD:
3217 case IB_WR_RDMA_WRITE:
3218 switch (prev->wr.opcode) {
3219 case IB_WR_TID_RDMA_WRITE:
3220 req = wqe_to_tid_req(prev);
3221 if (req->ack_seg != req->total_segs)
3222 goto interlock;
3223 default:
3224 break;
3225 }
3226 break;
3227 case IB_WR_RDMA_READ:
3228 if (prev->wr.opcode != IB_WR_TID_RDMA_WRITE)
3229 break;
3230
3231 case IB_WR_TID_RDMA_READ:
3232 switch (prev->wr.opcode) {
3233 case IB_WR_RDMA_READ:
3234 if (qp->s_acked != qp->s_cur)
3235 goto interlock;
3236 break;
3237 case IB_WR_TID_RDMA_WRITE:
3238 req = wqe_to_tid_req(prev);
3239 if (req->ack_seg != req->total_segs)
3240 goto interlock;
3241 default:
3242 break;
3243 }
3244 default:
3245 break;
3246 }
3247 return false;
3248
3249 interlock:
3250 priv->s_flags |= HFI1_S_TID_WAIT_INTERLCK;
3251 return true;
3252 }
3253
3254
3255 static inline bool hfi1_check_sge_align(struct rvt_qp *qp,
3256 struct rvt_sge *sge, int num_sge)
3257 {
3258 int i;
3259
3260 for (i = 0; i < num_sge; i++, sge++) {
3261 trace_hfi1_sge_check_align(qp, i, sge);
3262 if ((u64)sge->vaddr & ~PAGE_MASK ||
3263 sge->sge_length & ~PAGE_MASK)
3264 return false;
3265 }
3266 return true;
3267 }
3268
3269 void setup_tid_rdma_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
3270 {
3271 struct hfi1_qp_priv *qpriv = (struct hfi1_qp_priv *)qp->priv;
3272 struct hfi1_swqe_priv *priv = wqe->priv;
3273 struct tid_rdma_params *remote;
3274 enum ib_wr_opcode new_opcode;
3275 bool do_tid_rdma = false;
3276 struct hfi1_pportdata *ppd = qpriv->rcd->ppd;
3277
3278 if ((rdma_ah_get_dlid(&qp->remote_ah_attr) & ~((1 << ppd->lmc) - 1)) ==
3279 ppd->lid)
3280 return;
3281 if (qpriv->hdr_type != HFI1_PKT_TYPE_9B)
3282 return;
3283
3284 rcu_read_lock();
3285 remote = rcu_dereference(qpriv->tid_rdma.remote);
3286
3287
3288
3289
3290 if (!remote)
3291 goto exit;
3292
3293 if (wqe->wr.opcode == IB_WR_RDMA_READ) {
3294 if (hfi1_check_sge_align(qp, &wqe->sg_list[0],
3295 wqe->wr.num_sge)) {
3296 new_opcode = IB_WR_TID_RDMA_READ;
3297 do_tid_rdma = true;
3298 }
3299 } else if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
3300
3301
3302
3303
3304
3305
3306 if (!(wqe->rdma_wr.remote_addr & ~PAGE_MASK) &&
3307 !(wqe->length & ~PAGE_MASK)) {
3308 new_opcode = IB_WR_TID_RDMA_WRITE;
3309 do_tid_rdma = true;
3310 }
3311 }
3312
3313 if (do_tid_rdma) {
3314 if (hfi1_kern_exp_rcv_alloc_flows(&priv->tid_req, GFP_ATOMIC))
3315 goto exit;
3316 wqe->wr.opcode = new_opcode;
3317 priv->tid_req.seg_len =
3318 min_t(u32, remote->max_len, wqe->length);
3319 priv->tid_req.total_segs =
3320 DIV_ROUND_UP(wqe->length, priv->tid_req.seg_len);
3321
3322 wqe->lpsn = wqe->psn;
3323 if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
3324 priv->tid_req.n_flows = remote->max_read;
3325 qpriv->tid_r_reqs++;
3326 wqe->lpsn += rvt_div_round_up_mtu(qp, wqe->length) - 1;
3327 } else {
3328 wqe->lpsn += priv->tid_req.total_segs - 1;
3329 atomic_inc(&qpriv->n_requests);
3330 }
3331
3332 priv->tid_req.cur_seg = 0;
3333 priv->tid_req.comp_seg = 0;
3334 priv->tid_req.ack_seg = 0;
3335 priv->tid_req.state = TID_REQUEST_INACTIVE;
3336
3337
3338
3339
3340
3341
3342 priv->tid_req.acked_tail = priv->tid_req.setup_head;
3343 trace_hfi1_tid_req_setup_tid_wqe(qp, 1, wqe->wr.opcode,
3344 wqe->psn, wqe->lpsn,
3345 &priv->tid_req);
3346 }
3347 exit:
3348 rcu_read_unlock();
3349 }
3350
3351
3352
3353 u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
3354 struct ib_other_headers *ohdr,
3355 u32 *bth1, u32 *bth2, u32 *len)
3356 {
3357 struct hfi1_qp_priv *qpriv = qp->priv;
3358 struct tid_rdma_request *req = wqe_to_tid_req(wqe);
3359 struct tid_rdma_params *remote;
3360
3361 rcu_read_lock();
3362 remote = rcu_dereference(qpriv->tid_rdma.remote);
3363
3364
3365
3366
3367 req->n_flows = remote->max_write;
3368 req->state = TID_REQUEST_ACTIVE;
3369
3370 KDETH_RESET(ohdr->u.tid_rdma.w_req.kdeth0, KVER, 0x1);
3371 KDETH_RESET(ohdr->u.tid_rdma.w_req.kdeth1, JKEY, remote->jkey);
3372 ohdr->u.tid_rdma.w_req.reth.vaddr =
3373 cpu_to_be64(wqe->rdma_wr.remote_addr + (wqe->length - *len));
3374 ohdr->u.tid_rdma.w_req.reth.rkey =
3375 cpu_to_be32(wqe->rdma_wr.rkey);
3376 ohdr->u.tid_rdma.w_req.reth.length = cpu_to_be32(*len);
3377 ohdr->u.tid_rdma.w_req.verbs_qp = cpu_to_be32(qp->remote_qpn);
3378 *bth1 &= ~RVT_QPN_MASK;
3379 *bth1 |= remote->qp;
3380 qp->s_state = TID_OP(WRITE_REQ);
3381 qp->s_flags |= HFI1_S_WAIT_TID_RESP;
3382 *bth2 |= IB_BTH_REQ_ACK;
3383 *len = 0;
3384
3385 rcu_read_unlock();
3386 return sizeof(ohdr->u.tid_rdma.w_req) / sizeof(u32);
3387 }
3388
3389 static u32 hfi1_compute_tid_rdma_flow_wt(struct rvt_qp *qp)
3390 {
3391
3392
3393
3394
3395
3396
3397
3398
3399 return (MAX_TID_FLOW_PSN * qp->pmtu) >> TID_RDMA_SEGMENT_SHIFT;
3400 }
3401
3402 static u32 position_in_queue(struct hfi1_qp_priv *qpriv,
3403 struct tid_queue *queue)
3404 {
3405 return qpriv->tid_enqueue - queue->dequeue;
3406 }
3407
3408
3409
3410
3411
3412
3413 static u32 hfi1_compute_tid_rnr_timeout(struct rvt_qp *qp, u32 to_seg)
3414 {
3415 struct hfi1_qp_priv *qpriv = qp->priv;
3416 u64 timeout;
3417 u32 bytes_per_us;
3418 u8 i;
3419
3420 bytes_per_us = active_egress_rate(qpriv->rcd->ppd) / 8;
3421 timeout = (to_seg * TID_RDMA_MAX_SEGMENT_SIZE) / bytes_per_us;
3422
3423
3424
3425
3426 for (i = 1; i <= IB_AETH_CREDIT_MASK; i++)
3427 if (rvt_rnr_tbl_to_usec(i) >= timeout)
3428 return i;
3429 return 0;
3430 }
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451 static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
3452 {
3453 struct tid_rdma_request *req;
3454 struct hfi1_qp_priv *qpriv = qp->priv;
3455 struct hfi1_ctxtdata *rcd = qpriv->rcd;
3456 struct tid_rdma_params *local = &qpriv->tid_rdma.local;
3457 struct rvt_ack_entry *e;
3458 u32 npkts, to_seg;
3459 bool last;
3460 int ret = 0;
3461
3462 lockdep_assert_held(&qp->s_lock);
3463
3464 while (1) {
3465 trace_hfi1_rsp_tid_write_alloc_res(qp, 0);
3466 trace_hfi1_tid_write_rsp_alloc_res(qp);
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479 if (qpriv->rnr_nak_state == TID_RNR_NAK_SEND)
3480 break;
3481
3482
3483 if (qpriv->r_tid_alloc == qpriv->r_tid_head) {
3484
3485 if (qpriv->flow_state.index < RXE_NUM_TID_FLOWS &&
3486 !qpriv->alloc_w_segs) {
3487 hfi1_kern_clear_hw_flow(rcd, qp);
3488 qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
3489 }
3490 break;
3491 }
3492
3493 e = &qp->s_ack_queue[qpriv->r_tid_alloc];
3494 if (e->opcode != TID_OP(WRITE_REQ))
3495 goto next_req;
3496 req = ack_to_tid_req(e);
3497 trace_hfi1_tid_req_write_alloc_res(qp, 0, e->opcode, e->psn,
3498 e->lpsn, req);
3499
3500 if (req->alloc_seg >= req->total_segs)
3501 goto next_req;
3502
3503
3504 if (qpriv->alloc_w_segs >= local->max_write)
3505 break;
3506
3507
3508 if (qpriv->sync_pt && qpriv->alloc_w_segs)
3509 break;
3510
3511
3512 if (qpriv->sync_pt && !qpriv->alloc_w_segs) {
3513 hfi1_kern_clear_hw_flow(rcd, qp);
3514 qpriv->sync_pt = false;
3515 qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
3516 }
3517
3518
3519 if (qpriv->flow_state.index >= RXE_NUM_TID_FLOWS) {
3520 ret = hfi1_kern_setup_hw_flow(qpriv->rcd, qp);
3521 if (ret) {
3522 to_seg = hfi1_compute_tid_rdma_flow_wt(qp) *
3523 position_in_queue(qpriv,
3524 &rcd->flow_queue);
3525 break;
3526 }
3527 }
3528
3529 npkts = rvt_div_round_up_mtu(qp, req->seg_len);
3530
3531
3532
3533
3534
3535 if (qpriv->flow_state.psn + npkts > MAX_TID_FLOW_PSN - 1) {
3536 qpriv->sync_pt = true;
3537 break;
3538 }
3539
3540
3541
3542
3543
3544
3545
3546
3547 if (!CIRC_SPACE(req->setup_head, req->acked_tail,
3548 MAX_FLOWS)) {
3549 ret = -EAGAIN;
3550 to_seg = MAX_FLOWS >> 1;
3551 tid_rdma_trigger_ack(qp);
3552 break;
3553 }
3554
3555
3556 ret = hfi1_kern_exp_rcv_setup(req, &req->ss, &last);
3557 if (ret == -EAGAIN)
3558 to_seg = position_in_queue(qpriv, &rcd->rarr_queue);
3559 if (ret)
3560 break;
3561
3562 qpriv->alloc_w_segs++;
3563 req->alloc_seg++;
3564 continue;
3565 next_req:
3566
3567 if (++qpriv->r_tid_alloc >
3568 rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
3569 qpriv->r_tid_alloc = 0;
3570 }
3571
3572
3573
3574
3575
3576
3577 if (ret == -EAGAIN && intr_ctx && !qp->r_nak_state)
3578 goto send_rnr_nak;
3579
3580 return;
3581
3582 send_rnr_nak:
3583 lockdep_assert_held(&qp->r_lock);
3584
3585
3586 qp->r_nak_state = hfi1_compute_tid_rnr_timeout(qp, to_seg) | IB_RNR_NAK;
3587
3588
3589 qp->r_psn = e->psn + req->alloc_seg;
3590 qp->r_ack_psn = qp->r_psn;
3591
3592
3593
3594
3595
3596 qp->r_head_ack_queue = qpriv->r_tid_alloc + 1;
3597 if (qp->r_head_ack_queue > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
3598 qp->r_head_ack_queue = 0;
3599 qpriv->r_tid_head = qp->r_head_ack_queue;
3600
3601
3602
3603
3604
3605 qp->s_nak_state = qp->r_nak_state;
3606 qp->s_ack_psn = qp->r_ack_psn;
3607
3608
3609
3610
3611 qp->s_flags &= ~(RVT_S_ACK_PENDING);
3612
3613 trace_hfi1_rsp_tid_write_alloc_res(qp, qp->r_psn);
3614
3615
3616
3617
3618
3619
3620
3621 qpriv->rnr_nak_state = TID_RNR_NAK_SEND;
3622
3623
3624
3625
3626
3627
3628 rc_defered_ack(rcd, qp);
3629 }
3630
3631 void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet)
3632 {
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646 struct hfi1_ctxtdata *rcd = packet->rcd;
3647 struct rvt_qp *qp = packet->qp;
3648 struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
3649 struct ib_other_headers *ohdr = packet->ohdr;
3650 struct rvt_ack_entry *e;
3651 unsigned long flags;
3652 struct ib_reth *reth;
3653 struct hfi1_qp_priv *qpriv = qp->priv;
3654 struct tid_rdma_request *req;
3655 u32 bth0, psn, len, rkey, num_segs;
3656 bool fecn;
3657 u8 next;
3658 u64 vaddr;
3659 int diff;
3660
3661 bth0 = be32_to_cpu(ohdr->bth[0]);
3662 if (hfi1_ruc_check_hdr(ibp, packet))
3663 return;
3664
3665 fecn = process_ecn(qp, packet);
3666 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
3667 trace_hfi1_rsp_rcv_tid_write_req(qp, psn);
3668
3669 if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
3670 rvt_comm_est(qp);
3671
3672 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
3673 goto nack_inv;
3674
3675 reth = &ohdr->u.tid_rdma.w_req.reth;
3676 vaddr = be64_to_cpu(reth->vaddr);
3677 len = be32_to_cpu(reth->length);
3678
3679 num_segs = DIV_ROUND_UP(len, qpriv->tid_rdma.local.max_len);
3680 diff = delta_psn(psn, qp->r_psn);
3681 if (unlikely(diff)) {
3682 tid_rdma_rcv_err(packet, ohdr, qp, psn, diff, fecn);
3683 return;
3684 }
3685
3686
3687
3688
3689
3690
3691 if (qpriv->rnr_nak_state)
3692 qp->r_head_ack_queue = qp->r_head_ack_queue ?
3693 qp->r_head_ack_queue - 1 :
3694 rvt_size_atomic(ib_to_rvt(qp->ibqp.device));
3695
3696
3697 next = qp->r_head_ack_queue + 1;
3698 if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
3699 next = 0;
3700 spin_lock_irqsave(&qp->s_lock, flags);
3701 if (unlikely(next == qp->s_acked_ack_queue)) {
3702 if (!qp->s_ack_queue[next].sent)
3703 goto nack_inv_unlock;
3704 update_ack_queue(qp, next);
3705 }
3706 e = &qp->s_ack_queue[qp->r_head_ack_queue];
3707 req = ack_to_tid_req(e);
3708
3709
3710 if (qpriv->rnr_nak_state) {
3711 qp->r_nak_state = 0;
3712 qp->s_nak_state = 0;
3713 qpriv->rnr_nak_state = TID_RNR_NAK_INIT;
3714 qp->r_psn = e->lpsn + 1;
3715 req->state = TID_REQUEST_INIT;
3716 goto update_head;
3717 }
3718
3719 release_rdma_sge_mr(e);
3720
3721
3722 if (!len || len & ~PAGE_MASK)
3723 goto nack_inv_unlock;
3724
3725 rkey = be32_to_cpu(reth->rkey);
3726 qp->r_len = len;
3727
3728 if (e->opcode == TID_OP(WRITE_REQ) &&
3729 (req->setup_head != req->clear_tail ||
3730 req->clear_tail != req->acked_tail))
3731 goto nack_inv_unlock;
3732
3733 if (unlikely(!rvt_rkey_ok(qp, &e->rdma_sge, qp->r_len, vaddr,
3734 rkey, IB_ACCESS_REMOTE_WRITE)))
3735 goto nack_acc;
3736
3737 qp->r_psn += num_segs - 1;
3738
3739 e->opcode = (bth0 >> 24) & 0xff;
3740 e->psn = psn;
3741 e->lpsn = qp->r_psn;
3742 e->sent = 0;
3743
3744 req->n_flows = min_t(u16, num_segs, qpriv->tid_rdma.local.max_write);
3745 req->state = TID_REQUEST_INIT;
3746 req->cur_seg = 0;
3747 req->comp_seg = 0;
3748 req->ack_seg = 0;
3749 req->alloc_seg = 0;
3750 req->isge = 0;
3751 req->seg_len = qpriv->tid_rdma.local.max_len;
3752 req->total_len = len;
3753 req->total_segs = num_segs;
3754 req->r_flow_psn = e->psn;
3755 req->ss.sge = e->rdma_sge;
3756 req->ss.num_sge = 1;
3757
3758 req->flow_idx = req->setup_head;
3759 req->clear_tail = req->setup_head;
3760 req->acked_tail = req->setup_head;
3761
3762 qp->r_state = e->opcode;
3763 qp->r_nak_state = 0;
3764
3765
3766
3767
3768
3769 qp->r_msn++;
3770 qp->r_psn++;
3771
3772 trace_hfi1_tid_req_rcv_write_req(qp, 0, e->opcode, e->psn, e->lpsn,
3773 req);
3774
3775 if (qpriv->r_tid_tail == HFI1_QP_WQE_INVALID) {
3776 qpriv->r_tid_tail = qp->r_head_ack_queue;
3777 } else if (qpriv->r_tid_tail == qpriv->r_tid_head) {
3778 struct tid_rdma_request *ptr;
3779
3780 e = &qp->s_ack_queue[qpriv->r_tid_tail];
3781 ptr = ack_to_tid_req(e);
3782
3783 if (e->opcode != TID_OP(WRITE_REQ) ||
3784 ptr->comp_seg == ptr->total_segs) {
3785 if (qpriv->r_tid_tail == qpriv->r_tid_ack)
3786 qpriv->r_tid_ack = qp->r_head_ack_queue;
3787 qpriv->r_tid_tail = qp->r_head_ack_queue;
3788 }
3789 }
3790 update_head:
3791 qp->r_head_ack_queue = next;
3792 qpriv->r_tid_head = qp->r_head_ack_queue;
3793
3794 hfi1_tid_write_alloc_resources(qp, true);
3795 trace_hfi1_tid_write_rsp_rcv_req(qp);
3796
3797
3798 qp->s_flags |= RVT_S_RESP_PENDING;
3799 if (fecn)
3800 qp->s_flags |= RVT_S_ECN;
3801 hfi1_schedule_send(qp);
3802
3803 spin_unlock_irqrestore(&qp->s_lock, flags);
3804 return;
3805
3806 nack_inv_unlock:
3807 spin_unlock_irqrestore(&qp->s_lock, flags);
3808 nack_inv:
3809 rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
3810 qp->r_nak_state = IB_NAK_INVALID_REQUEST;
3811 qp->r_ack_psn = qp->r_psn;
3812
3813 rc_defered_ack(rcd, qp);
3814 return;
3815 nack_acc:
3816 spin_unlock_irqrestore(&qp->s_lock, flags);
3817 rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
3818 qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
3819 qp->r_ack_psn = qp->r_psn;
3820 }
3821
3822 u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
3823 struct ib_other_headers *ohdr, u32 *bth1,
3824 u32 bth2, u32 *len,
3825 struct rvt_sge_state **ss)
3826 {
3827 struct hfi1_ack_priv *epriv = e->priv;
3828 struct tid_rdma_request *req = &epriv->tid_req;
3829 struct hfi1_qp_priv *qpriv = qp->priv;
3830 struct tid_rdma_flow *flow = NULL;
3831 u32 resp_len = 0, hdwords = 0;
3832 void *resp_addr = NULL;
3833 struct tid_rdma_params *remote;
3834
3835 trace_hfi1_tid_req_build_write_resp(qp, 0, e->opcode, e->psn, e->lpsn,
3836 req);
3837 trace_hfi1_tid_write_rsp_build_resp(qp);
3838 trace_hfi1_rsp_build_tid_write_resp(qp, bth2);
3839 flow = &req->flows[req->flow_idx];
3840 switch (req->state) {
3841 default:
3842
3843
3844
3845
3846 hfi1_tid_write_alloc_resources(qp, false);
3847
3848
3849 if (req->cur_seg >= req->alloc_seg)
3850 goto done;
3851
3852
3853
3854
3855
3856 if (qpriv->rnr_nak_state == TID_RNR_NAK_SENT)
3857 goto done;
3858
3859 req->state = TID_REQUEST_ACTIVE;
3860 trace_hfi1_tid_flow_build_write_resp(qp, req->flow_idx, flow);
3861 req->flow_idx = CIRC_NEXT(req->flow_idx, MAX_FLOWS);
3862 hfi1_add_tid_reap_timer(qp);
3863 break;
3864
3865 case TID_REQUEST_RESEND_ACTIVE:
3866 case TID_REQUEST_RESEND:
3867 trace_hfi1_tid_flow_build_write_resp(qp, req->flow_idx, flow);
3868 req->flow_idx = CIRC_NEXT(req->flow_idx, MAX_FLOWS);
3869 if (!CIRC_CNT(req->setup_head, req->flow_idx, MAX_FLOWS))
3870 req->state = TID_REQUEST_ACTIVE;
3871
3872 hfi1_mod_tid_reap_timer(qp);
3873 break;
3874 }
3875 flow->flow_state.resp_ib_psn = bth2;
3876 resp_addr = (void *)flow->tid_entry;
3877 resp_len = sizeof(*flow->tid_entry) * flow->tidcnt;
3878 req->cur_seg++;
3879
3880 memset(&ohdr->u.tid_rdma.w_rsp, 0, sizeof(ohdr->u.tid_rdma.w_rsp));
3881 epriv->ss.sge.vaddr = resp_addr;
3882 epriv->ss.sge.sge_length = resp_len;
3883 epriv->ss.sge.length = epriv->ss.sge.sge_length;
3884
3885
3886
3887
3888 epriv->ss.sge.mr = NULL;
3889 epriv->ss.sge.m = 0;
3890 epriv->ss.sge.n = 0;
3891
3892 epriv->ss.sg_list = NULL;
3893 epriv->ss.total_len = epriv->ss.sge.sge_length;
3894 epriv->ss.num_sge = 1;
3895
3896 *ss = &epriv->ss;
3897 *len = epriv->ss.total_len;
3898
3899
3900 rcu_read_lock();
3901 remote = rcu_dereference(qpriv->tid_rdma.remote);
3902
3903 KDETH_RESET(ohdr->u.tid_rdma.w_rsp.kdeth0, KVER, 0x1);
3904 KDETH_RESET(ohdr->u.tid_rdma.w_rsp.kdeth1, JKEY, remote->jkey);
3905 ohdr->u.tid_rdma.w_rsp.aeth = rvt_compute_aeth(qp);
3906 ohdr->u.tid_rdma.w_rsp.tid_flow_psn =
3907 cpu_to_be32((flow->flow_state.generation <<
3908 HFI1_KDETH_BTH_SEQ_SHIFT) |
3909 (flow->flow_state.spsn &
3910 HFI1_KDETH_BTH_SEQ_MASK));
3911 ohdr->u.tid_rdma.w_rsp.tid_flow_qp =
3912 cpu_to_be32(qpriv->tid_rdma.local.qp |
3913 ((flow->idx & TID_RDMA_DESTQP_FLOW_MASK) <<
3914 TID_RDMA_DESTQP_FLOW_SHIFT) |
3915 qpriv->rcd->ctxt);
3916 ohdr->u.tid_rdma.w_rsp.verbs_qp = cpu_to_be32(qp->remote_qpn);
3917 *bth1 = remote->qp;
3918 rcu_read_unlock();
3919 hdwords = sizeof(ohdr->u.tid_rdma.w_rsp) / sizeof(u32);
3920 qpriv->pending_tid_w_segs++;
3921 done:
3922 return hdwords;
3923 }
3924
3925 static void hfi1_add_tid_reap_timer(struct rvt_qp *qp)
3926 {
3927 struct hfi1_qp_priv *qpriv = qp->priv;
3928
3929 lockdep_assert_held(&qp->s_lock);
3930 if (!(qpriv->s_flags & HFI1_R_TID_RSC_TIMER)) {
3931 qpriv->s_flags |= HFI1_R_TID_RSC_TIMER;
3932 qpriv->s_tid_timer.expires = jiffies +
3933 qpriv->tid_timer_timeout_jiffies;
3934 add_timer(&qpriv->s_tid_timer);
3935 }
3936 }
3937
3938 static void hfi1_mod_tid_reap_timer(struct rvt_qp *qp)
3939 {
3940 struct hfi1_qp_priv *qpriv = qp->priv;
3941
3942 lockdep_assert_held(&qp->s_lock);
3943 qpriv->s_flags |= HFI1_R_TID_RSC_TIMER;
3944 mod_timer(&qpriv->s_tid_timer, jiffies +
3945 qpriv->tid_timer_timeout_jiffies);
3946 }
3947
3948 static int hfi1_stop_tid_reap_timer(struct rvt_qp *qp)
3949 {
3950 struct hfi1_qp_priv *qpriv = qp->priv;
3951 int rval = 0;
3952
3953 lockdep_assert_held(&qp->s_lock);
3954 if (qpriv->s_flags & HFI1_R_TID_RSC_TIMER) {
3955 rval = del_timer(&qpriv->s_tid_timer);
3956 qpriv->s_flags &= ~HFI1_R_TID_RSC_TIMER;
3957 }
3958 return rval;
3959 }
3960
3961 void hfi1_del_tid_reap_timer(struct rvt_qp *qp)
3962 {
3963 struct hfi1_qp_priv *qpriv = qp->priv;
3964
3965 del_timer_sync(&qpriv->s_tid_timer);
3966 qpriv->s_flags &= ~HFI1_R_TID_RSC_TIMER;
3967 }
3968
3969 static void hfi1_tid_timeout(struct timer_list *t)
3970 {
3971 struct hfi1_qp_priv *qpriv = from_timer(qpriv, t, s_tid_timer);
3972 struct rvt_qp *qp = qpriv->owner;
3973 struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
3974 unsigned long flags;
3975 u32 i;
3976
3977 spin_lock_irqsave(&qp->r_lock, flags);
3978 spin_lock(&qp->s_lock);
3979 if (qpriv->s_flags & HFI1_R_TID_RSC_TIMER) {
3980 dd_dev_warn(dd_from_ibdev(qp->ibqp.device), "[QP%u] %s %d\n",
3981 qp->ibqp.qp_num, __func__, __LINE__);
3982 trace_hfi1_msg_tid_timeout(
3983 qp, "resource timeout = ",
3984 (u64)qpriv->tid_timer_timeout_jiffies);
3985 hfi1_stop_tid_reap_timer(qp);
3986
3987
3988
3989
3990 hfi1_kern_clear_hw_flow(qpriv->rcd, qp);
3991 for (i = 0; i < rvt_max_atomic(rdi); i++) {
3992 struct tid_rdma_request *req =
3993 ack_to_tid_req(&qp->s_ack_queue[i]);
3994
3995 hfi1_kern_exp_rcv_clear_all(req);
3996 }
3997 spin_unlock(&qp->s_lock);
3998 if (qp->ibqp.event_handler) {
3999 struct ib_event ev;
4000
4001 ev.device = qp->ibqp.device;
4002 ev.element.qp = &qp->ibqp;
4003 ev.event = IB_EVENT_QP_FATAL;
4004 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
4005 }
4006 rvt_rc_error(qp, IB_WC_RESP_TIMEOUT_ERR);
4007 goto unlock_r_lock;
4008 }
4009 spin_unlock(&qp->s_lock);
4010 unlock_r_lock:
4011 spin_unlock_irqrestore(&qp->r_lock, flags);
4012 }
4013
4014 void hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet)
4015 {
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027 struct ib_other_headers *ohdr = packet->ohdr;
4028 struct rvt_qp *qp = packet->qp;
4029 struct hfi1_qp_priv *qpriv = qp->priv;
4030 struct hfi1_ctxtdata *rcd = packet->rcd;
4031 struct rvt_swqe *wqe;
4032 struct tid_rdma_request *req;
4033 struct tid_rdma_flow *flow;
4034 enum ib_wc_status status;
4035 u32 opcode, aeth, psn, flow_psn, i, tidlen = 0, pktlen;
4036 bool fecn;
4037 unsigned long flags;
4038
4039 fecn = process_ecn(qp, packet);
4040 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
4041 aeth = be32_to_cpu(ohdr->u.tid_rdma.w_rsp.aeth);
4042 opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
4043
4044 spin_lock_irqsave(&qp->s_lock, flags);
4045
4046
4047 if (cmp_psn(psn, qp->s_next_psn) >= 0)
4048 goto ack_done;
4049
4050
4051 if (unlikely(cmp_psn(psn, qp->s_last_psn) <= 0))
4052 goto ack_done;
4053
4054 if (unlikely(qp->s_acked == qp->s_tail))
4055 goto ack_done;
4056
4057
4058
4059
4060
4061
4062 if (qp->r_flags & RVT_R_RDMAR_SEQ) {
4063 if (cmp_psn(psn, qp->s_last_psn + 1) != 0)
4064 goto ack_done;
4065 qp->r_flags &= ~RVT_R_RDMAR_SEQ;
4066 }
4067
4068 wqe = rvt_get_swqe_ptr(qp, qpriv->s_tid_cur);
4069 if (unlikely(wqe->wr.opcode != IB_WR_TID_RDMA_WRITE))
4070 goto ack_op_err;
4071
4072 req = wqe_to_tid_req(wqe);
4073
4074
4075
4076
4077
4078 if (!CIRC_SPACE(req->setup_head, req->acked_tail, MAX_FLOWS))
4079 goto ack_done;
4080
4081
4082
4083
4084
4085
4086
4087
4088 if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd))
4089 goto ack_done;
4090
4091 trace_hfi1_ack(qp, psn);
4092
4093 flow = &req->flows[req->setup_head];
4094 flow->pkt = 0;
4095 flow->tid_idx = 0;
4096 flow->tid_offset = 0;
4097 flow->sent = 0;
4098 flow->resync_npkts = 0;
4099 flow->tid_qpn = be32_to_cpu(ohdr->u.tid_rdma.w_rsp.tid_flow_qp);
4100 flow->idx = (flow->tid_qpn >> TID_RDMA_DESTQP_FLOW_SHIFT) &
4101 TID_RDMA_DESTQP_FLOW_MASK;
4102 flow_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.w_rsp.tid_flow_psn));
4103 flow->flow_state.generation = flow_psn >> HFI1_KDETH_BTH_SEQ_SHIFT;
4104 flow->flow_state.spsn = flow_psn & HFI1_KDETH_BTH_SEQ_MASK;
4105 flow->flow_state.resp_ib_psn = psn;
4106 flow->length = min_t(u32, req->seg_len,
4107 (wqe->length - (req->comp_seg * req->seg_len)));
4108
4109 flow->npkts = rvt_div_round_up_mtu(qp, flow->length);
4110 flow->flow_state.lpsn = flow->flow_state.spsn +
4111 flow->npkts - 1;
4112
4113 pktlen = packet->tlen - (packet->hlen + 4);
4114 if (pktlen > sizeof(flow->tid_entry)) {
4115 status = IB_WC_LOC_LEN_ERR;
4116 goto ack_err;
4117 }
4118 memcpy(flow->tid_entry, packet->ebuf, pktlen);
4119 flow->tidcnt = pktlen / sizeof(*flow->tid_entry);
4120 trace_hfi1_tid_flow_rcv_write_resp(qp, req->setup_head, flow);
4121
4122 req->comp_seg++;
4123 trace_hfi1_tid_write_sender_rcv_resp(qp, 0);
4124
4125
4126
4127
4128 for (i = 0; i < flow->tidcnt; i++) {
4129 trace_hfi1_tid_entry_rcv_write_resp(
4130 qp, i, flow->tid_entry[i]);
4131 if (!EXP_TID_GET(flow->tid_entry[i], LEN)) {
4132 status = IB_WC_LOC_LEN_ERR;
4133 goto ack_err;
4134 }
4135 tidlen += EXP_TID_GET(flow->tid_entry[i], LEN);
4136 }
4137 if (tidlen * PAGE_SIZE < flow->length) {
4138 status = IB_WC_LOC_LEN_ERR;
4139 goto ack_err;
4140 }
4141
4142 trace_hfi1_tid_req_rcv_write_resp(qp, 0, wqe->wr.opcode, wqe->psn,
4143 wqe->lpsn, req);
4144
4145
4146
4147
4148 if (!cmp_psn(psn, wqe->psn)) {
4149 req->r_last_acked = mask_psn(wqe->psn - 1);
4150
4151 req->acked_tail = req->setup_head;
4152 }
4153
4154
4155 req->setup_head = CIRC_NEXT(req->setup_head, MAX_FLOWS);
4156 req->state = TID_REQUEST_ACTIVE;
4157
4158
4159
4160
4161
4162
4163
4164
4165 if (qpriv->s_tid_cur != qpriv->s_tid_head &&
4166 req->comp_seg == req->total_segs) {
4167 for (i = qpriv->s_tid_cur + 1; ; i++) {
4168 if (i == qp->s_size)
4169 i = 0;
4170 wqe = rvt_get_swqe_ptr(qp, i);
4171 if (i == qpriv->s_tid_head)
4172 break;
4173 if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
4174 break;
4175 }
4176 qpriv->s_tid_cur = i;
4177 }
4178 qp->s_flags &= ~HFI1_S_WAIT_TID_RESP;
4179 hfi1_schedule_tid_send(qp);
4180 goto ack_done;
4181
4182 ack_op_err:
4183 status = IB_WC_LOC_QP_OP_ERR;
4184 ack_err:
4185 rvt_error_qp(qp, status);
4186 ack_done:
4187 if (fecn)
4188 qp->s_flags |= RVT_S_ECN;
4189 spin_unlock_irqrestore(&qp->s_lock, flags);
4190 }
4191
4192 bool hfi1_build_tid_rdma_packet(struct rvt_swqe *wqe,
4193 struct ib_other_headers *ohdr,
4194 u32 *bth1, u32 *bth2, u32 *len)
4195 {
4196 struct tid_rdma_request *req = wqe_to_tid_req(wqe);
4197 struct tid_rdma_flow *flow = &req->flows[req->clear_tail];
4198 struct tid_rdma_params *remote;
4199 struct rvt_qp *qp = req->qp;
4200 struct hfi1_qp_priv *qpriv = qp->priv;
4201 u32 tidentry = flow->tid_entry[flow->tid_idx];
4202 u32 tidlen = EXP_TID_GET(tidentry, LEN) << PAGE_SHIFT;
4203 struct tid_rdma_write_data *wd = &ohdr->u.tid_rdma.w_data;
4204 u32 next_offset, om = KDETH_OM_LARGE;
4205 bool last_pkt;
4206
4207 if (!tidlen) {
4208 hfi1_trdma_send_complete(qp, wqe, IB_WC_REM_INV_RD_REQ_ERR);
4209 rvt_error_qp(qp, IB_WC_REM_INV_RD_REQ_ERR);
4210 }
4211
4212 *len = min_t(u32, qp->pmtu, tidlen - flow->tid_offset);
4213 flow->sent += *len;
4214 next_offset = flow->tid_offset + *len;
4215 last_pkt = (flow->tid_idx == (flow->tidcnt - 1) &&
4216 next_offset >= tidlen) || (flow->sent >= flow->length);
4217 trace_hfi1_tid_entry_build_write_data(qp, flow->tid_idx, tidentry);
4218 trace_hfi1_tid_flow_build_write_data(qp, req->clear_tail, flow);
4219
4220 rcu_read_lock();
4221 remote = rcu_dereference(qpriv->tid_rdma.remote);
4222 KDETH_RESET(wd->kdeth0, KVER, 0x1);
4223 KDETH_SET(wd->kdeth0, SH, !last_pkt);
4224 KDETH_SET(wd->kdeth0, INTR, !!(!last_pkt && remote->urg));
4225 KDETH_SET(wd->kdeth0, TIDCTRL, EXP_TID_GET(tidentry, CTRL));
4226 KDETH_SET(wd->kdeth0, TID, EXP_TID_GET(tidentry, IDX));
4227 KDETH_SET(wd->kdeth0, OM, om == KDETH_OM_LARGE);
4228 KDETH_SET(wd->kdeth0, OFFSET, flow->tid_offset / om);
4229 KDETH_RESET(wd->kdeth1, JKEY, remote->jkey);
4230 wd->verbs_qp = cpu_to_be32(qp->remote_qpn);
4231 rcu_read_unlock();
4232
4233 *bth1 = flow->tid_qpn;
4234 *bth2 = mask_psn(((flow->flow_state.spsn + flow->pkt++) &
4235 HFI1_KDETH_BTH_SEQ_MASK) |
4236 (flow->flow_state.generation <<
4237 HFI1_KDETH_BTH_SEQ_SHIFT));
4238 if (last_pkt) {
4239
4240 if (flow->flow_state.lpsn + 1 +
4241 rvt_div_round_up_mtu(qp, req->seg_len) >
4242 MAX_TID_FLOW_PSN)
4243 req->state = TID_REQUEST_SYNC;
4244 *bth2 |= IB_BTH_REQ_ACK;
4245 }
4246
4247 if (next_offset >= tidlen) {
4248 flow->tid_offset = 0;
4249 flow->tid_idx++;
4250 } else {
4251 flow->tid_offset = next_offset;
4252 }
4253 return last_pkt;
4254 }
4255
4256 void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
4257 {
4258 struct rvt_qp *qp = packet->qp;
4259 struct hfi1_qp_priv *priv = qp->priv;
4260 struct hfi1_ctxtdata *rcd = priv->rcd;
4261 struct ib_other_headers *ohdr = packet->ohdr;
4262 struct rvt_ack_entry *e;
4263 struct tid_rdma_request *req;
4264 struct tid_rdma_flow *flow;
4265 struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
4266 unsigned long flags;
4267 u32 psn, next;
4268 u8 opcode;
4269 bool fecn;
4270
4271 fecn = process_ecn(qp, packet);
4272 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
4273 opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
4274
4275
4276
4277
4278
4279 spin_lock_irqsave(&qp->s_lock, flags);
4280 e = &qp->s_ack_queue[priv->r_tid_tail];
4281 req = ack_to_tid_req(e);
4282 flow = &req->flows[req->clear_tail];
4283 if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.lpsn))) {
4284 update_r_next_psn_fecn(packet, priv, rcd, flow, fecn);
4285
4286 if (cmp_psn(psn, flow->flow_state.r_next_psn))
4287 goto send_nak;
4288
4289 flow->flow_state.r_next_psn = mask_psn(psn + 1);
4290
4291
4292
4293
4294
4295
4296
4297 if (fecn && packet->etype == RHF_RCV_TYPE_EAGER) {
4298 struct rvt_sge_state ss;
4299 u32 len;
4300 u32 tlen = packet->tlen;
4301 u16 hdrsize = packet->hlen;
4302 u8 pad = packet->pad;
4303 u8 extra_bytes = pad + packet->extra_byte +
4304 (SIZE_OF_CRC << 2);
4305 u32 pmtu = qp->pmtu;
4306
4307 if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
4308 goto send_nak;
4309 len = req->comp_seg * req->seg_len;
4310 len += delta_psn(psn,
4311 full_flow_psn(flow, flow->flow_state.spsn)) *
4312 pmtu;
4313 if (unlikely(req->total_len - len < pmtu))
4314 goto send_nak;
4315
4316
4317
4318
4319
4320 ss.sge = e->rdma_sge;
4321 ss.sg_list = NULL;
4322 ss.num_sge = 1;
4323 ss.total_len = req->total_len;
4324 rvt_skip_sge(&ss, len, false);
4325 rvt_copy_sge(qp, &ss, packet->payload, pmtu, false,
4326 false);
4327
4328 priv->r_next_psn_kdeth = mask_psn(psn + 1);
4329 priv->s_flags |= HFI1_R_TID_SW_PSN;
4330 }
4331 goto exit;
4332 }
4333 flow->flow_state.r_next_psn = mask_psn(psn + 1);
4334 hfi1_kern_exp_rcv_clear(req);
4335 priv->alloc_w_segs--;
4336 rcd->flows[flow->idx].psn = psn & HFI1_KDETH_BTH_SEQ_MASK;
4337 req->comp_seg++;
4338 priv->s_nak_state = 0;
4339
4340
4341
4342
4343
4344
4345
4346
4347 trace_hfi1_rsp_rcv_tid_write_data(qp, psn);
4348 trace_hfi1_tid_req_rcv_write_data(qp, 0, e->opcode, e->psn, e->lpsn,
4349 req);
4350 trace_hfi1_tid_write_rsp_rcv_data(qp);
4351 validate_r_tid_ack(priv);
4352
4353 if (opcode == TID_OP(WRITE_DATA_LAST)) {
4354 release_rdma_sge_mr(e);
4355 for (next = priv->r_tid_tail + 1; ; next++) {
4356 if (next > rvt_size_atomic(&dev->rdi))
4357 next = 0;
4358 if (next == priv->r_tid_head)
4359 break;
4360 e = &qp->s_ack_queue[next];
4361 if (e->opcode == TID_OP(WRITE_REQ))
4362 break;
4363 }
4364 priv->r_tid_tail = next;
4365 if (++qp->s_acked_ack_queue > rvt_size_atomic(&dev->rdi))
4366 qp->s_acked_ack_queue = 0;
4367 }
4368
4369 hfi1_tid_write_alloc_resources(qp, true);
4370
4371
4372
4373
4374
4375 if (req->cur_seg < req->total_segs ||
4376 qp->s_tail_ack_queue != qp->r_head_ack_queue) {
4377 qp->s_flags |= RVT_S_RESP_PENDING;
4378 hfi1_schedule_send(qp);
4379 }
4380
4381 priv->pending_tid_w_segs--;
4382 if (priv->s_flags & HFI1_R_TID_RSC_TIMER) {
4383 if (priv->pending_tid_w_segs)
4384 hfi1_mod_tid_reap_timer(req->qp);
4385 else
4386 hfi1_stop_tid_reap_timer(req->qp);
4387 }
4388
4389 done:
4390 tid_rdma_schedule_ack(qp);
4391 exit:
4392 priv->r_next_psn_kdeth = flow->flow_state.r_next_psn;
4393 if (fecn)
4394 qp->s_flags |= RVT_S_ECN;
4395 spin_unlock_irqrestore(&qp->s_lock, flags);
4396 return;
4397
4398 send_nak:
4399 if (!priv->s_nak_state) {
4400 priv->s_nak_state = IB_NAK_PSN_ERROR;
4401 priv->s_nak_psn = flow->flow_state.r_next_psn;
4402 tid_rdma_trigger_ack(qp);
4403 }
4404 goto done;
4405 }
4406
4407 static bool hfi1_tid_rdma_is_resync_psn(u32 psn)
4408 {
4409 return (bool)((psn & HFI1_KDETH_BTH_SEQ_MASK) ==
4410 HFI1_KDETH_BTH_SEQ_MASK);
4411 }
4412
4413 u32 hfi1_build_tid_rdma_write_ack(struct rvt_qp *qp, struct rvt_ack_entry *e,
4414 struct ib_other_headers *ohdr, u16 iflow,
4415 u32 *bth1, u32 *bth2)
4416 {
4417 struct hfi1_qp_priv *qpriv = qp->priv;
4418 struct tid_flow_state *fs = &qpriv->flow_state;
4419 struct tid_rdma_request *req = ack_to_tid_req(e);
4420 struct tid_rdma_flow *flow = &req->flows[iflow];
4421 struct tid_rdma_params *remote;
4422
4423 rcu_read_lock();
4424 remote = rcu_dereference(qpriv->tid_rdma.remote);
4425 KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth1, JKEY, remote->jkey);
4426 ohdr->u.tid_rdma.ack.verbs_qp = cpu_to_be32(qp->remote_qpn);
4427 *bth1 = remote->qp;
4428 rcu_read_unlock();
4429
4430 if (qpriv->resync) {
4431 *bth2 = mask_psn((fs->generation <<
4432 HFI1_KDETH_BTH_SEQ_SHIFT) - 1);
4433 ohdr->u.tid_rdma.ack.aeth = rvt_compute_aeth(qp);
4434 } else if (qpriv->s_nak_state) {
4435 *bth2 = mask_psn(qpriv->s_nak_psn);
4436 ohdr->u.tid_rdma.ack.aeth =
4437 cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
4438 (qpriv->s_nak_state <<
4439 IB_AETH_CREDIT_SHIFT));
4440 } else {
4441 *bth2 = full_flow_psn(flow, flow->flow_state.lpsn);
4442 ohdr->u.tid_rdma.ack.aeth = rvt_compute_aeth(qp);
4443 }
4444 KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth0, KVER, 0x1);
4445 ohdr->u.tid_rdma.ack.tid_flow_qp =
4446 cpu_to_be32(qpriv->tid_rdma.local.qp |
4447 ((flow->idx & TID_RDMA_DESTQP_FLOW_MASK) <<
4448 TID_RDMA_DESTQP_FLOW_SHIFT) |
4449 qpriv->rcd->ctxt);
4450
4451 ohdr->u.tid_rdma.ack.tid_flow_psn = 0;
4452 ohdr->u.tid_rdma.ack.verbs_psn =
4453 cpu_to_be32(flow->flow_state.resp_ib_psn);
4454
4455 if (qpriv->resync) {
4456
4457
4458
4459
4460
4461
4462 if (hfi1_tid_rdma_is_resync_psn(qpriv->r_next_psn_kdeth - 1)) {
4463 ohdr->u.tid_rdma.ack.tid_flow_psn =
4464 cpu_to_be32(qpriv->r_next_psn_kdeth_save);
4465 } else {
4466
4467
4468
4469
4470
4471
4472 qpriv->r_next_psn_kdeth_save =
4473 qpriv->r_next_psn_kdeth - 1;
4474 ohdr->u.tid_rdma.ack.tid_flow_psn =
4475 cpu_to_be32(qpriv->r_next_psn_kdeth_save);
4476 qpriv->r_next_psn_kdeth = mask_psn(*bth2 + 1);
4477 }
4478 qpriv->resync = false;
4479 }
4480
4481 return sizeof(ohdr->u.tid_rdma.ack) / sizeof(u32);
4482 }
4483
4484 void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet)
4485 {
4486 struct ib_other_headers *ohdr = packet->ohdr;
4487 struct rvt_qp *qp = packet->qp;
4488 struct hfi1_qp_priv *qpriv = qp->priv;
4489 struct rvt_swqe *wqe;
4490 struct tid_rdma_request *req;
4491 struct tid_rdma_flow *flow;
4492 u32 aeth, psn, req_psn, ack_psn, flpsn, resync_psn, ack_kpsn;
4493 unsigned long flags;
4494 u16 fidx;
4495
4496 trace_hfi1_tid_write_sender_rcv_tid_ack(qp, 0);
4497 process_ecn(qp, packet);
4498 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
4499 aeth = be32_to_cpu(ohdr->u.tid_rdma.ack.aeth);
4500 req_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.ack.verbs_psn));
4501 resync_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.ack.tid_flow_psn));
4502
4503 spin_lock_irqsave(&qp->s_lock, flags);
4504 trace_hfi1_rcv_tid_ack(qp, aeth, psn, req_psn, resync_psn);
4505
4506
4507 if ((qp->s_flags & HFI1_S_WAIT_HALT) &&
4508 cmp_psn(psn, qpriv->s_resync_psn))
4509 goto ack_op_err;
4510
4511 ack_psn = req_psn;
4512 if (hfi1_tid_rdma_is_resync_psn(psn))
4513 ack_kpsn = resync_psn;
4514 else
4515 ack_kpsn = psn;
4516 if (aeth >> 29) {
4517 ack_psn--;
4518 ack_kpsn--;
4519 }
4520
4521 if (unlikely(qp->s_acked == qp->s_tail))
4522 goto ack_op_err;
4523
4524 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
4525
4526 if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
4527 goto ack_op_err;
4528
4529 req = wqe_to_tid_req(wqe);
4530 trace_hfi1_tid_req_rcv_tid_ack(qp, 0, wqe->wr.opcode, wqe->psn,
4531 wqe->lpsn, req);
4532 flow = &req->flows[req->acked_tail];
4533 trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow);
4534
4535
4536 if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.spsn)) < 0 ||
4537 cmp_psn(req_psn, flow->flow_state.resp_ib_psn) < 0)
4538 goto ack_op_err;
4539
4540 while (cmp_psn(ack_kpsn,
4541 full_flow_psn(flow, flow->flow_state.lpsn)) >= 0 &&
4542 req->ack_seg < req->cur_seg) {
4543 req->ack_seg++;
4544
4545 req->acked_tail = CIRC_NEXT(req->acked_tail, MAX_FLOWS);
4546 req->r_last_acked = flow->flow_state.resp_ib_psn;
4547 trace_hfi1_tid_req_rcv_tid_ack(qp, 0, wqe->wr.opcode, wqe->psn,
4548 wqe->lpsn, req);
4549 if (req->ack_seg == req->total_segs) {
4550 req->state = TID_REQUEST_COMPLETE;
4551 wqe = do_rc_completion(qp, wqe,
4552 to_iport(qp->ibqp.device,
4553 qp->port_num));
4554 trace_hfi1_sender_rcv_tid_ack(qp);
4555 atomic_dec(&qpriv->n_tid_requests);
4556 if (qp->s_acked == qp->s_tail)
4557 break;
4558 if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
4559 break;
4560 req = wqe_to_tid_req(wqe);
4561 }
4562 flow = &req->flows[req->acked_tail];
4563 trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow);
4564 }
4565
4566 trace_hfi1_tid_req_rcv_tid_ack(qp, 0, wqe->wr.opcode, wqe->psn,
4567 wqe->lpsn, req);
4568 switch (aeth >> 29) {
4569 case 0:
4570 if (qpriv->s_flags & RVT_S_WAIT_ACK)
4571 qpriv->s_flags &= ~RVT_S_WAIT_ACK;
4572 if (!hfi1_tid_rdma_is_resync_psn(psn)) {
4573
4574 if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE &&
4575 req->ack_seg < req->cur_seg)
4576 hfi1_mod_tid_retry_timer(qp);
4577 else
4578 hfi1_stop_tid_retry_timer(qp);
4579 hfi1_schedule_send(qp);
4580 } else {
4581 u32 spsn, fpsn, last_acked, generation;
4582 struct tid_rdma_request *rptr;
4583
4584
4585 hfi1_stop_tid_retry_timer(qp);
4586
4587 qp->s_flags &= ~HFI1_S_WAIT_HALT;
4588
4589
4590
4591
4592
4593
4594 qpriv->s_flags &= ~RVT_S_SEND_ONE;
4595 hfi1_schedule_send(qp);
4596
4597 if ((qp->s_acked == qpriv->s_tid_tail &&
4598 req->ack_seg == req->total_segs) ||
4599 qp->s_acked == qp->s_tail) {
4600 qpriv->s_state = TID_OP(WRITE_DATA_LAST);
4601 goto done;
4602 }
4603
4604 if (req->ack_seg == req->comp_seg) {
4605 qpriv->s_state = TID_OP(WRITE_DATA);
4606 goto done;
4607 }
4608
4609
4610
4611
4612
4613 psn = mask_psn(psn + 1);
4614 generation = psn >> HFI1_KDETH_BTH_SEQ_SHIFT;
4615 spsn = 0;
4616
4617
4618
4619
4620
4621 if (delta_psn(ack_psn, wqe->lpsn))
4622 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
4623 req = wqe_to_tid_req(wqe);
4624 flow = &req->flows[req->acked_tail];
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634 fpsn = full_flow_psn(flow, flow->flow_state.spsn);
4635 req->r_ack_psn = psn;
4636
4637
4638
4639
4640
4641
4642 if (flow->flow_state.generation !=
4643 (resync_psn >> HFI1_KDETH_BTH_SEQ_SHIFT))
4644 resync_psn = mask_psn(fpsn - 1);
4645 flow->resync_npkts +=
4646 delta_psn(mask_psn(resync_psn + 1), fpsn);
4647
4648
4649
4650
4651 last_acked = qp->s_acked;
4652 rptr = req;
4653 while (1) {
4654
4655 for (fidx = rptr->acked_tail;
4656 CIRC_CNT(rptr->setup_head, fidx,
4657 MAX_FLOWS);
4658 fidx = CIRC_NEXT(fidx, MAX_FLOWS)) {
4659 u32 lpsn;
4660 u32 gen;
4661
4662 flow = &rptr->flows[fidx];
4663 gen = flow->flow_state.generation;
4664 if (WARN_ON(gen == generation &&
4665 flow->flow_state.spsn !=
4666 spsn))
4667 continue;
4668 lpsn = flow->flow_state.lpsn;
4669 lpsn = full_flow_psn(flow, lpsn);
4670 flow->npkts =
4671 delta_psn(lpsn,
4672 mask_psn(resync_psn)
4673 );
4674 flow->flow_state.generation =
4675 generation;
4676 flow->flow_state.spsn = spsn;
4677 flow->flow_state.lpsn =
4678 flow->flow_state.spsn +
4679 flow->npkts - 1;
4680 flow->pkt = 0;
4681 spsn += flow->npkts;
4682 resync_psn += flow->npkts;
4683 trace_hfi1_tid_flow_rcv_tid_ack(qp,
4684 fidx,
4685 flow);
4686 }
4687 if (++last_acked == qpriv->s_tid_cur + 1)
4688 break;
4689 if (last_acked == qp->s_size)
4690 last_acked = 0;
4691 wqe = rvt_get_swqe_ptr(qp, last_acked);
4692 rptr = wqe_to_tid_req(wqe);
4693 }
4694 req->cur_seg = req->ack_seg;
4695 qpriv->s_tid_tail = qp->s_acked;
4696 qpriv->s_state = TID_OP(WRITE_REQ);
4697 hfi1_schedule_tid_send(qp);
4698 }
4699 done:
4700 qpriv->s_retry = qp->s_retry_cnt;
4701 break;
4702
4703 case 3:
4704 hfi1_stop_tid_retry_timer(qp);
4705 switch ((aeth >> IB_AETH_CREDIT_SHIFT) &
4706 IB_AETH_CREDIT_MASK) {
4707 case 0:
4708 if (!req->flows)
4709 break;
4710 flow = &req->flows[req->acked_tail];
4711 flpsn = full_flow_psn(flow, flow->flow_state.lpsn);
4712 if (cmp_psn(psn, flpsn) > 0)
4713 break;
4714 trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail,
4715 flow);
4716 req->r_ack_psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
4717 req->cur_seg = req->ack_seg;
4718 qpriv->s_tid_tail = qp->s_acked;
4719 qpriv->s_state = TID_OP(WRITE_REQ);
4720 qpriv->s_retry = qp->s_retry_cnt;
4721 hfi1_schedule_tid_send(qp);
4722 break;
4723
4724 default:
4725 break;
4726 }
4727 break;
4728
4729 default:
4730 break;
4731 }
4732
4733 ack_op_err:
4734 spin_unlock_irqrestore(&qp->s_lock, flags);
4735 }
4736
4737 void hfi1_add_tid_retry_timer(struct rvt_qp *qp)
4738 {
4739 struct hfi1_qp_priv *priv = qp->priv;
4740 struct ib_qp *ibqp = &qp->ibqp;
4741 struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
4742
4743 lockdep_assert_held(&qp->s_lock);
4744 if (!(priv->s_flags & HFI1_S_TID_RETRY_TIMER)) {
4745 priv->s_flags |= HFI1_S_TID_RETRY_TIMER;
4746 priv->s_tid_retry_timer.expires = jiffies +
4747 priv->tid_retry_timeout_jiffies + rdi->busy_jiffies;
4748 add_timer(&priv->s_tid_retry_timer);
4749 }
4750 }
4751
4752 static void hfi1_mod_tid_retry_timer(struct rvt_qp *qp)
4753 {
4754 struct hfi1_qp_priv *priv = qp->priv;
4755 struct ib_qp *ibqp = &qp->ibqp;
4756 struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
4757
4758 lockdep_assert_held(&qp->s_lock);
4759 priv->s_flags |= HFI1_S_TID_RETRY_TIMER;
4760 mod_timer(&priv->s_tid_retry_timer, jiffies +
4761 priv->tid_retry_timeout_jiffies + rdi->busy_jiffies);
4762 }
4763
4764 static int hfi1_stop_tid_retry_timer(struct rvt_qp *qp)
4765 {
4766 struct hfi1_qp_priv *priv = qp->priv;
4767 int rval = 0;
4768
4769 lockdep_assert_held(&qp->s_lock);
4770 if (priv->s_flags & HFI1_S_TID_RETRY_TIMER) {
4771 rval = del_timer(&priv->s_tid_retry_timer);
4772 priv->s_flags &= ~HFI1_S_TID_RETRY_TIMER;
4773 }
4774 return rval;
4775 }
4776
4777 void hfi1_del_tid_retry_timer(struct rvt_qp *qp)
4778 {
4779 struct hfi1_qp_priv *priv = qp->priv;
4780
4781 del_timer_sync(&priv->s_tid_retry_timer);
4782 priv->s_flags &= ~HFI1_S_TID_RETRY_TIMER;
4783 }
4784
4785 static void hfi1_tid_retry_timeout(struct timer_list *t)
4786 {
4787 struct hfi1_qp_priv *priv = from_timer(priv, t, s_tid_retry_timer);
4788 struct rvt_qp *qp = priv->owner;
4789 struct rvt_swqe *wqe;
4790 unsigned long flags;
4791 struct tid_rdma_request *req;
4792
4793 spin_lock_irqsave(&qp->r_lock, flags);
4794 spin_lock(&qp->s_lock);
4795 trace_hfi1_tid_write_sender_retry_timeout(qp, 0);
4796 if (priv->s_flags & HFI1_S_TID_RETRY_TIMER) {
4797 hfi1_stop_tid_retry_timer(qp);
4798 if (!priv->s_retry) {
4799 trace_hfi1_msg_tid_retry_timeout(
4800 qp,
4801 "Exhausted retries. Tid retry timeout = ",
4802 (u64)priv->tid_retry_timeout_jiffies);
4803
4804 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
4805 hfi1_trdma_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
4806 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
4807 } else {
4808 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
4809 req = wqe_to_tid_req(wqe);
4810 trace_hfi1_tid_req_tid_retry_timeout(
4811 qp, 0, wqe->wr.opcode, wqe->psn, wqe->lpsn, req);
4812
4813 priv->s_flags &= ~RVT_S_WAIT_ACK;
4814
4815 priv->s_flags |= RVT_S_SEND_ONE;
4816
4817
4818
4819
4820 qp->s_flags |= HFI1_S_WAIT_HALT;
4821 priv->s_state = TID_OP(RESYNC);
4822 priv->s_retry--;
4823 hfi1_schedule_tid_send(qp);
4824 }
4825 }
4826 spin_unlock(&qp->s_lock);
4827 spin_unlock_irqrestore(&qp->r_lock, flags);
4828 }
4829
4830 u32 hfi1_build_tid_rdma_resync(struct rvt_qp *qp, struct rvt_swqe *wqe,
4831 struct ib_other_headers *ohdr, u32 *bth1,
4832 u32 *bth2, u16 fidx)
4833 {
4834 struct hfi1_qp_priv *qpriv = qp->priv;
4835 struct tid_rdma_params *remote;
4836 struct tid_rdma_request *req = wqe_to_tid_req(wqe);
4837 struct tid_rdma_flow *flow = &req->flows[fidx];
4838 u32 generation;
4839
4840 rcu_read_lock();
4841 remote = rcu_dereference(qpriv->tid_rdma.remote);
4842 KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth1, JKEY, remote->jkey);
4843 ohdr->u.tid_rdma.ack.verbs_qp = cpu_to_be32(qp->remote_qpn);
4844 *bth1 = remote->qp;
4845 rcu_read_unlock();
4846
4847 generation = kern_flow_generation_next(flow->flow_state.generation);
4848 *bth2 = mask_psn((generation << HFI1_KDETH_BTH_SEQ_SHIFT) - 1);
4849 qpriv->s_resync_psn = *bth2;
4850 *bth2 |= IB_BTH_REQ_ACK;
4851 KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth0, KVER, 0x1);
4852
4853 return sizeof(ohdr->u.tid_rdma.resync) / sizeof(u32);
4854 }
4855
4856 void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet)
4857 {
4858 struct ib_other_headers *ohdr = packet->ohdr;
4859 struct rvt_qp *qp = packet->qp;
4860 struct hfi1_qp_priv *qpriv = qp->priv;
4861 struct hfi1_ctxtdata *rcd = qpriv->rcd;
4862 struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
4863 struct rvt_ack_entry *e;
4864 struct tid_rdma_request *req;
4865 struct tid_rdma_flow *flow;
4866 struct tid_flow_state *fs = &qpriv->flow_state;
4867 u32 psn, generation, idx, gen_next;
4868 bool fecn;
4869 unsigned long flags;
4870
4871 fecn = process_ecn(qp, packet);
4872 psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
4873
4874 generation = mask_psn(psn + 1) >> HFI1_KDETH_BTH_SEQ_SHIFT;
4875 spin_lock_irqsave(&qp->s_lock, flags);
4876
4877 gen_next = (fs->generation == KERN_GENERATION_RESERVED) ?
4878 generation : kern_flow_generation_next(fs->generation);
4879
4880
4881
4882
4883 if (generation != mask_generation(gen_next - 1) &&
4884 generation != gen_next)
4885 goto bail;
4886
4887 if (qpriv->resync)
4888 goto bail;
4889
4890 spin_lock(&rcd->exp_lock);
4891 if (fs->index >= RXE_NUM_TID_FLOWS) {
4892
4893
4894
4895
4896 fs->generation = generation;
4897 } else {
4898
4899 rcd->flows[fs->index].generation = generation;
4900 fs->generation = kern_setup_hw_flow(rcd, fs->index);
4901 }
4902 fs->psn = 0;
4903
4904
4905
4906
4907 qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
4908 trace_hfi1_tid_write_rsp_rcv_resync(qp);
4909
4910
4911
4912
4913
4914
4915 for (idx = qpriv->r_tid_tail; ; idx++) {
4916 u16 flow_idx;
4917
4918 if (idx > rvt_size_atomic(&dev->rdi))
4919 idx = 0;
4920 e = &qp->s_ack_queue[idx];
4921 if (e->opcode == TID_OP(WRITE_REQ)) {
4922 req = ack_to_tid_req(e);
4923 trace_hfi1_tid_req_rcv_resync(qp, 0, e->opcode, e->psn,
4924 e->lpsn, req);
4925
4926
4927 for (flow_idx = req->clear_tail;
4928 CIRC_CNT(req->setup_head, flow_idx,
4929 MAX_FLOWS);
4930 flow_idx = CIRC_NEXT(flow_idx, MAX_FLOWS)) {
4931 u32 lpsn;
4932 u32 next;
4933
4934 flow = &req->flows[flow_idx];
4935 lpsn = full_flow_psn(flow,
4936 flow->flow_state.lpsn);
4937 next = flow->flow_state.r_next_psn;
4938 flow->npkts = delta_psn(lpsn, next - 1);
4939 flow->flow_state.generation = fs->generation;
4940 flow->flow_state.spsn = fs->psn;
4941 flow->flow_state.lpsn =
4942 flow->flow_state.spsn + flow->npkts - 1;
4943 flow->flow_state.r_next_psn =
4944 full_flow_psn(flow,
4945 flow->flow_state.spsn);
4946 fs->psn += flow->npkts;
4947 trace_hfi1_tid_flow_rcv_resync(qp, flow_idx,
4948 flow);
4949 }
4950 }
4951 if (idx == qp->s_tail_ack_queue)
4952 break;
4953 }
4954
4955 spin_unlock(&rcd->exp_lock);
4956 qpriv->resync = true;
4957
4958 qpriv->s_nak_state = 0;
4959 tid_rdma_trigger_ack(qp);
4960 bail:
4961 if (fecn)
4962 qp->s_flags |= RVT_S_ECN;
4963 spin_unlock_irqrestore(&qp->s_lock, flags);
4964 }
4965
4966
4967
4968
4969
4970 static void update_tid_tail(struct rvt_qp *qp)
4971 __must_hold(&qp->s_lock)
4972 {
4973 struct hfi1_qp_priv *priv = qp->priv;
4974 u32 i;
4975 struct rvt_swqe *wqe;
4976
4977 lockdep_assert_held(&qp->s_lock);
4978
4979 if (priv->s_tid_tail == priv->s_tid_cur)
4980 return;
4981 for (i = priv->s_tid_tail + 1; ; i++) {
4982 if (i == qp->s_size)
4983 i = 0;
4984
4985 if (i == priv->s_tid_cur)
4986 break;
4987 wqe = rvt_get_swqe_ptr(qp, i);
4988 if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
4989 break;
4990 }
4991 priv->s_tid_tail = i;
4992 priv->s_state = TID_OP(WRITE_RESP);
4993 }
4994
4995 int hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
4996 __must_hold(&qp->s_lock)
4997 {
4998 struct hfi1_qp_priv *priv = qp->priv;
4999 struct rvt_swqe *wqe;
5000 u32 bth1 = 0, bth2 = 0, hwords = 5, len, middle = 0;
5001 struct ib_other_headers *ohdr;
5002 struct rvt_sge_state *ss = &qp->s_sge;
5003 struct rvt_ack_entry *e = &qp->s_ack_queue[qp->s_tail_ack_queue];
5004 struct tid_rdma_request *req = ack_to_tid_req(e);
5005 bool last = false;
5006 u8 opcode = TID_OP(WRITE_DATA);
5007
5008 lockdep_assert_held(&qp->s_lock);
5009 trace_hfi1_tid_write_sender_make_tid_pkt(qp, 0);
5010
5011
5012
5013
5014 if (((atomic_read(&priv->n_tid_requests) < HFI1_TID_RDMA_WRITE_CNT) &&
5015 atomic_read(&priv->n_requests) &&
5016 !(qp->s_flags & (RVT_S_BUSY | RVT_S_WAIT_ACK |
5017 HFI1_S_ANY_WAIT_IO))) ||
5018 (e->opcode == TID_OP(WRITE_REQ) && req->cur_seg < req->alloc_seg &&
5019 !(qp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT_IO)))) {
5020 struct iowait_work *iowork;
5021
5022 iowork = iowait_get_ib_work(&priv->s_iowait);
5023 ps->s_txreq = get_waiting_verbs_txreq(iowork);
5024 if (ps->s_txreq || hfi1_make_rc_req(qp, ps)) {
5025 priv->s_flags |= HFI1_S_TID_BUSY_SET;
5026 return 1;
5027 }
5028 }
5029
5030 ps->s_txreq = get_txreq(ps->dev, qp);
5031 if (!ps->s_txreq)
5032 goto bail_no_tx;
5033
5034 ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth;
5035
5036 if ((priv->s_flags & RVT_S_ACK_PENDING) &&
5037 make_tid_rdma_ack(qp, ohdr, ps))
5038 return 1;
5039
5040
5041
5042
5043
5044
5045
5046 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK))
5047 goto bail;
5048
5049 if (priv->s_flags & RVT_S_WAIT_ACK)
5050 goto bail;
5051
5052
5053 if (priv->s_tid_tail == HFI1_QP_WQE_INVALID)
5054 goto bail;
5055 wqe = rvt_get_swqe_ptr(qp, priv->s_tid_tail);
5056 req = wqe_to_tid_req(wqe);
5057 trace_hfi1_tid_req_make_tid_pkt(qp, 0, wqe->wr.opcode, wqe->psn,
5058 wqe->lpsn, req);
5059 switch (priv->s_state) {
5060 case TID_OP(WRITE_REQ):
5061 case TID_OP(WRITE_RESP):
5062 priv->tid_ss.sge = wqe->sg_list[0];
5063 priv->tid_ss.sg_list = wqe->sg_list + 1;
5064 priv->tid_ss.num_sge = wqe->wr.num_sge;
5065 priv->tid_ss.total_len = wqe->length;
5066
5067 if (priv->s_state == TID_OP(WRITE_REQ))
5068 hfi1_tid_rdma_restart_req(qp, wqe, &bth2);
5069 priv->s_state = TID_OP(WRITE_DATA);
5070
5071
5072 case TID_OP(WRITE_DATA):
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086 trace_hfi1_sender_make_tid_pkt(qp);
5087 trace_hfi1_tid_write_sender_make_tid_pkt(qp, 0);
5088 wqe = rvt_get_swqe_ptr(qp, priv->s_tid_tail);
5089 req = wqe_to_tid_req(wqe);
5090 len = wqe->length;
5091
5092 if (!req->comp_seg || req->cur_seg == req->comp_seg)
5093 goto bail;
5094
5095 trace_hfi1_tid_req_make_tid_pkt(qp, 0, wqe->wr.opcode,
5096 wqe->psn, wqe->lpsn, req);
5097 last = hfi1_build_tid_rdma_packet(wqe, ohdr, &bth1, &bth2,
5098 &len);
5099
5100 if (last) {
5101
5102 req->clear_tail = CIRC_NEXT(req->clear_tail,
5103 MAX_FLOWS);
5104 if (++req->cur_seg < req->total_segs) {
5105 if (!CIRC_CNT(req->setup_head, req->clear_tail,
5106 MAX_FLOWS))
5107 qp->s_flags |= HFI1_S_WAIT_TID_RESP;
5108 } else {
5109 priv->s_state = TID_OP(WRITE_DATA_LAST);
5110 opcode = TID_OP(WRITE_DATA_LAST);
5111
5112
5113 update_tid_tail(qp);
5114 }
5115 }
5116 hwords += sizeof(ohdr->u.tid_rdma.w_data) / sizeof(u32);
5117 ss = &priv->tid_ss;
5118 break;
5119
5120 case TID_OP(RESYNC):
5121 trace_hfi1_sender_make_tid_pkt(qp);
5122
5123 wqe = rvt_get_swqe_ptr(qp, priv->s_tid_cur);
5124 req = wqe_to_tid_req(wqe);
5125
5126 if (!req->comp_seg) {
5127 wqe = rvt_get_swqe_ptr(qp,
5128 (!priv->s_tid_cur ? qp->s_size :
5129 priv->s_tid_cur) - 1);
5130 req = wqe_to_tid_req(wqe);
5131 }
5132 hwords += hfi1_build_tid_rdma_resync(qp, wqe, ohdr, &bth1,
5133 &bth2,
5134 CIRC_PREV(req->setup_head,
5135 MAX_FLOWS));
5136 ss = NULL;
5137 len = 0;
5138 opcode = TID_OP(RESYNC);
5139 break;
5140
5141 default:
5142 goto bail;
5143 }
5144 if (priv->s_flags & RVT_S_SEND_ONE) {
5145 priv->s_flags &= ~RVT_S_SEND_ONE;
5146 priv->s_flags |= RVT_S_WAIT_ACK;
5147 bth2 |= IB_BTH_REQ_ACK;
5148 }
5149 qp->s_len -= len;
5150 ps->s_txreq->hdr_dwords = hwords;
5151 ps->s_txreq->sde = priv->s_sde;
5152 ps->s_txreq->ss = ss;
5153 ps->s_txreq->s_cur_size = len;
5154 hfi1_make_ruc_header(qp, ohdr, (opcode << 24), bth1, bth2,
5155 middle, ps);
5156 return 1;
5157 bail:
5158 hfi1_put_txreq(ps->s_txreq);
5159 bail_no_tx:
5160 ps->s_txreq = NULL;
5161 priv->s_flags &= ~RVT_S_BUSY;
5162
5163
5164
5165
5166
5167
5168
5169 iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
5170 return 0;
5171 }
5172
5173 static int make_tid_rdma_ack(struct rvt_qp *qp,
5174 struct ib_other_headers *ohdr,
5175 struct hfi1_pkt_state *ps)
5176 {
5177 struct rvt_ack_entry *e;
5178 struct hfi1_qp_priv *qpriv = qp->priv;
5179 struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
5180 u32 hwords, next;
5181 u32 len = 0;
5182 u32 bth1 = 0, bth2 = 0;
5183 int middle = 0;
5184 u16 flow;
5185 struct tid_rdma_request *req, *nreq;
5186
5187 trace_hfi1_tid_write_rsp_make_tid_ack(qp);
5188
5189 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
5190 goto bail;
5191
5192
5193 hwords = 5;
5194
5195 e = &qp->s_ack_queue[qpriv->r_tid_ack];
5196 req = ack_to_tid_req(e);
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209 if (qpriv->resync) {
5210 if (!req->ack_seg || req->ack_seg == req->total_segs)
5211 qpriv->r_tid_ack = !qpriv->r_tid_ack ?
5212 rvt_size_atomic(&dev->rdi) :
5213 qpriv->r_tid_ack - 1;
5214 e = &qp->s_ack_queue[qpriv->r_tid_ack];
5215 req = ack_to_tid_req(e);
5216 }
5217
5218 trace_hfi1_rsp_make_tid_ack(qp, e->psn);
5219 trace_hfi1_tid_req_make_tid_ack(qp, 0, e->opcode, e->psn, e->lpsn,
5220 req);
5221
5222
5223
5224
5225 if (!qpriv->s_nak_state && !qpriv->resync &&
5226 req->ack_seg == req->comp_seg)
5227 goto bail;
5228
5229 do {
5230
5231
5232
5233
5234
5235
5236 req->ack_seg +=
5237
5238 CIRC_CNT(req->clear_tail, req->acked_tail,
5239 MAX_FLOWS);
5240
5241 req->acked_tail = req->clear_tail;
5242
5243
5244
5245
5246
5247
5248 flow = CIRC_PREV(req->acked_tail, MAX_FLOWS);
5249 if (req->ack_seg != req->total_segs)
5250 break;
5251 req->state = TID_REQUEST_COMPLETE;
5252
5253 next = qpriv->r_tid_ack + 1;
5254 if (next > rvt_size_atomic(&dev->rdi))
5255 next = 0;
5256 qpriv->r_tid_ack = next;
5257 if (qp->s_ack_queue[next].opcode != TID_OP(WRITE_REQ))
5258 break;
5259 nreq = ack_to_tid_req(&qp->s_ack_queue[next]);
5260 if (!nreq->comp_seg || nreq->ack_seg == nreq->comp_seg)
5261 break;
5262
5263
5264 e = &qp->s_ack_queue[qpriv->r_tid_ack];
5265 req = ack_to_tid_req(e);
5266 } while (1);
5267
5268
5269
5270
5271
5272 if (qpriv->s_nak_state ||
5273 (qpriv->resync &&
5274 !hfi1_tid_rdma_is_resync_psn(qpriv->r_next_psn_kdeth - 1) &&
5275 (cmp_psn(qpriv->r_next_psn_kdeth - 1,
5276 full_flow_psn(&req->flows[flow],
5277 req->flows[flow].flow_state.lpsn)) > 0))) {
5278
5279
5280
5281
5282
5283
5284
5285 e = &qp->s_ack_queue[qpriv->r_tid_ack];
5286 req = ack_to_tid_req(e);
5287 flow = req->acked_tail;
5288 } else if (req->ack_seg == req->total_segs &&
5289 qpriv->s_flags & HFI1_R_TID_WAIT_INTERLCK)
5290 qpriv->s_flags &= ~HFI1_R_TID_WAIT_INTERLCK;
5291
5292 trace_hfi1_tid_write_rsp_make_tid_ack(qp);
5293 trace_hfi1_tid_req_make_tid_ack(qp, 0, e->opcode, e->psn, e->lpsn,
5294 req);
5295 hwords += hfi1_build_tid_rdma_write_ack(qp, e, ohdr, flow, &bth1,
5296 &bth2);
5297 len = 0;
5298 qpriv->s_flags &= ~RVT_S_ACK_PENDING;
5299 ps->s_txreq->hdr_dwords = hwords;
5300 ps->s_txreq->sde = qpriv->s_sde;
5301 ps->s_txreq->s_cur_size = len;
5302 ps->s_txreq->ss = NULL;
5303 hfi1_make_ruc_header(qp, ohdr, (TID_OP(ACK) << 24), bth1, bth2, middle,
5304 ps);
5305 ps->s_txreq->txreq.flags |= SDMA_TXREQ_F_VIP;
5306 return 1;
5307 bail:
5308
5309
5310
5311
5312 smp_wmb();
5313 qpriv->s_flags &= ~RVT_S_ACK_PENDING;
5314 return 0;
5315 }
5316
5317 static int hfi1_send_tid_ok(struct rvt_qp *qp)
5318 {
5319 struct hfi1_qp_priv *priv = qp->priv;
5320
5321 return !(priv->s_flags & RVT_S_BUSY ||
5322 qp->s_flags & HFI1_S_ANY_WAIT_IO) &&
5323 (verbs_txreq_queued(iowait_get_tid_work(&priv->s_iowait)) ||
5324 (priv->s_flags & RVT_S_RESP_PENDING) ||
5325 !(qp->s_flags & HFI1_S_ANY_TID_WAIT_SEND));
5326 }
5327
5328 void _hfi1_do_tid_send(struct work_struct *work)
5329 {
5330 struct iowait_work *w = container_of(work, struct iowait_work, iowork);
5331 struct rvt_qp *qp = iowait_to_qp(w->iow);
5332
5333 hfi1_do_tid_send(qp);
5334 }
5335
5336 static void hfi1_do_tid_send(struct rvt_qp *qp)
5337 {
5338 struct hfi1_pkt_state ps;
5339 struct hfi1_qp_priv *priv = qp->priv;
5340
5341 ps.dev = to_idev(qp->ibqp.device);
5342 ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
5343 ps.ppd = ppd_from_ibp(ps.ibp);
5344 ps.wait = iowait_get_tid_work(&priv->s_iowait);
5345 ps.in_thread = false;
5346 ps.timeout_int = qp->timeout_jiffies / 8;
5347
5348 trace_hfi1_rc_do_tid_send(qp, false);
5349 spin_lock_irqsave(&qp->s_lock, ps.flags);
5350
5351
5352 if (!hfi1_send_tid_ok(qp)) {
5353 if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
5354 iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
5355 spin_unlock_irqrestore(&qp->s_lock, ps.flags);
5356 return;
5357 }
5358
5359 priv->s_flags |= RVT_S_BUSY;
5360
5361 ps.timeout = jiffies + ps.timeout_int;
5362 ps.cpu = priv->s_sde ? priv->s_sde->cpu :
5363 cpumask_first(cpumask_of_node(ps.ppd->dd->node));
5364 ps.pkts_sent = false;
5365
5366
5367 ps.s_txreq = get_waiting_verbs_txreq(ps.wait);
5368 do {
5369
5370 if (ps.s_txreq) {
5371 if (priv->s_flags & HFI1_S_TID_BUSY_SET) {
5372 qp->s_flags |= RVT_S_BUSY;
5373 ps.wait = iowait_get_ib_work(&priv->s_iowait);
5374 }
5375 spin_unlock_irqrestore(&qp->s_lock, ps.flags);
5376
5377
5378
5379
5380
5381 if (hfi1_verbs_send(qp, &ps))
5382 return;
5383
5384
5385 if (hfi1_schedule_send_yield(qp, &ps, true))
5386 return;
5387
5388 spin_lock_irqsave(&qp->s_lock, ps.flags);
5389 if (priv->s_flags & HFI1_S_TID_BUSY_SET) {
5390 qp->s_flags &= ~RVT_S_BUSY;
5391 priv->s_flags &= ~HFI1_S_TID_BUSY_SET;
5392 ps.wait = iowait_get_tid_work(&priv->s_iowait);
5393 if (iowait_flag_set(&priv->s_iowait,
5394 IOWAIT_PENDING_IB))
5395 hfi1_schedule_send(qp);
5396 }
5397 }
5398 } while (hfi1_make_tid_rdma_pkt(qp, &ps));
5399 iowait_starve_clear(ps.pkts_sent, &priv->s_iowait);
5400 spin_unlock_irqrestore(&qp->s_lock, ps.flags);
5401 }
5402
5403 static bool _hfi1_schedule_tid_send(struct rvt_qp *qp)
5404 {
5405 struct hfi1_qp_priv *priv = qp->priv;
5406 struct hfi1_ibport *ibp =
5407 to_iport(qp->ibqp.device, qp->port_num);
5408 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
5409 struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
5410
5411 return iowait_tid_schedule(&priv->s_iowait, ppd->hfi1_wq,
5412 priv->s_sde ?
5413 priv->s_sde->cpu :
5414 cpumask_first(cpumask_of_node(dd->node)));
5415 }
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430 bool hfi1_schedule_tid_send(struct rvt_qp *qp)
5431 {
5432 lockdep_assert_held(&qp->s_lock);
5433 if (hfi1_send_tid_ok(qp)) {
5434
5435
5436
5437
5438
5439
5440 _hfi1_schedule_tid_send(qp);
5441 return true;
5442 }
5443 if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
5444 iowait_set_flag(&((struct hfi1_qp_priv *)qp->priv)->s_iowait,
5445 IOWAIT_PENDING_TID);
5446 return false;
5447 }
5448
5449 bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e)
5450 {
5451 struct rvt_ack_entry *prev;
5452 struct tid_rdma_request *req;
5453 struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
5454 struct hfi1_qp_priv *priv = qp->priv;
5455 u32 s_prev;
5456
5457 s_prev = qp->s_tail_ack_queue == 0 ? rvt_size_atomic(&dev->rdi) :
5458 (qp->s_tail_ack_queue - 1);
5459 prev = &qp->s_ack_queue[s_prev];
5460
5461 if ((e->opcode == TID_OP(READ_REQ) ||
5462 e->opcode == OP(RDMA_READ_REQUEST)) &&
5463 prev->opcode == TID_OP(WRITE_REQ)) {
5464 req = ack_to_tid_req(prev);
5465 if (req->ack_seg != req->total_segs) {
5466 priv->s_flags |= HFI1_R_TID_WAIT_INTERLCK;
5467 return true;
5468 }
5469 }
5470 return false;
5471 }
5472
5473 static u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx)
5474 {
5475 u64 reg;
5476
5477
5478
5479
5480
5481 reg = read_uctxt_csr(dd, ctxt, RCV_TID_FLOW_TABLE + (8 * fidx));
5482 return mask_psn(reg);
5483 }
5484
5485 static void tid_rdma_rcv_err(struct hfi1_packet *packet,
5486 struct ib_other_headers *ohdr,
5487 struct rvt_qp *qp, u32 psn, int diff, bool fecn)
5488 {
5489 unsigned long flags;
5490
5491 tid_rdma_rcv_error(packet, ohdr, qp, psn, diff);
5492 if (fecn) {
5493 spin_lock_irqsave(&qp->s_lock, flags);
5494 qp->s_flags |= RVT_S_ECN;
5495 spin_unlock_irqrestore(&qp->s_lock, flags);
5496 }
5497 }
5498
5499 static void update_r_next_psn_fecn(struct hfi1_packet *packet,
5500 struct hfi1_qp_priv *priv,
5501 struct hfi1_ctxtdata *rcd,
5502 struct tid_rdma_flow *flow,
5503 bool fecn)
5504 {
5505
5506
5507
5508
5509 if (fecn && packet->etype == RHF_RCV_TYPE_EAGER &&
5510 !(priv->s_flags & HFI1_R_TID_SW_PSN)) {
5511 struct hfi1_devdata *dd = rcd->dd;
5512
5513 flow->flow_state.r_next_psn =
5514 read_r_next_psn(dd, rcd->ctxt, flow->idx);
5515 }
5516 }