This source file includes following definitions.
- rds_ib_send_complete
- rds_ib_send_unmap_data
- rds_ib_send_unmap_rdma
- rds_ib_send_unmap_atomic
- rds_ib_send_unmap_op
- rds_ib_send_init_ring
- rds_ib_send_clear_ring
- rds_ib_sub_signaled
- rds_ib_send_cqe_handler
- rds_ib_send_grab_credits
- rds_ib_send_add_credits
- rds_ib_advertise_credits
- rds_ib_set_wr_signal_state
- rds_ib_xmit
- rds_ib_xmit_atomic
- rds_ib_xmit_rdma
- rds_ib_xmit_path_complete
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33 #include <linux/kernel.h>
34 #include <linux/in.h>
35 #include <linux/device.h>
36 #include <linux/dmapool.h>
37 #include <linux/ratelimit.h>
38
39 #include "rds_single_path.h"
40 #include "rds.h"
41 #include "ib.h"
42
43
44
45
46
47 static void rds_ib_send_complete(struct rds_message *rm,
48 int wc_status,
49 void (*complete)(struct rds_message *rm, int status))
50 {
51 int notify_status;
52
53 switch (wc_status) {
54 case IB_WC_WR_FLUSH_ERR:
55 return;
56
57 case IB_WC_SUCCESS:
58 notify_status = RDS_RDMA_SUCCESS;
59 break;
60
61 case IB_WC_REM_ACCESS_ERR:
62 notify_status = RDS_RDMA_REMOTE_ERROR;
63 break;
64
65 default:
66 notify_status = RDS_RDMA_OTHER_ERROR;
67 break;
68 }
69 complete(rm, notify_status);
70 }
71
72 static void rds_ib_send_unmap_data(struct rds_ib_connection *ic,
73 struct rm_data_op *op,
74 int wc_status)
75 {
76 if (op->op_nents)
77 ib_dma_unmap_sg(ic->i_cm_id->device,
78 op->op_sg, op->op_nents,
79 DMA_TO_DEVICE);
80 }
81
82 static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic,
83 struct rm_rdma_op *op,
84 int wc_status)
85 {
86 if (op->op_mapped) {
87 ib_dma_unmap_sg(ic->i_cm_id->device,
88 op->op_sg, op->op_nents,
89 op->op_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
90 op->op_mapped = 0;
91 }
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113 rds_ib_send_complete(container_of(op, struct rds_message, rdma),
114 wc_status, rds_rdma_send_complete);
115
116 if (op->op_write)
117 rds_stats_add(s_send_rdma_bytes, op->op_bytes);
118 else
119 rds_stats_add(s_recv_rdma_bytes, op->op_bytes);
120 }
121
122 static void rds_ib_send_unmap_atomic(struct rds_ib_connection *ic,
123 struct rm_atomic_op *op,
124 int wc_status)
125 {
126
127 if (op->op_mapped) {
128 ib_dma_unmap_sg(ic->i_cm_id->device, op->op_sg, 1,
129 DMA_FROM_DEVICE);
130 op->op_mapped = 0;
131 }
132
133 rds_ib_send_complete(container_of(op, struct rds_message, atomic),
134 wc_status, rds_atomic_send_complete);
135
136 if (op->op_type == RDS_ATOMIC_TYPE_CSWP)
137 rds_ib_stats_inc(s_ib_atomic_cswp);
138 else
139 rds_ib_stats_inc(s_ib_atomic_fadd);
140 }
141
142
143
144
145
146
147
148
149 static struct rds_message *rds_ib_send_unmap_op(struct rds_ib_connection *ic,
150 struct rds_ib_send_work *send,
151 int wc_status)
152 {
153 struct rds_message *rm = NULL;
154
155
156 switch (send->s_wr.opcode) {
157 case IB_WR_SEND:
158 if (send->s_op) {
159 rm = container_of(send->s_op, struct rds_message, data);
160 rds_ib_send_unmap_data(ic, send->s_op, wc_status);
161 }
162 break;
163 case IB_WR_RDMA_WRITE:
164 case IB_WR_RDMA_READ:
165 if (send->s_op) {
166 rm = container_of(send->s_op, struct rds_message, rdma);
167 rds_ib_send_unmap_rdma(ic, send->s_op, wc_status);
168 }
169 break;
170 case IB_WR_ATOMIC_FETCH_AND_ADD:
171 case IB_WR_ATOMIC_CMP_AND_SWP:
172 if (send->s_op) {
173 rm = container_of(send->s_op, struct rds_message, atomic);
174 rds_ib_send_unmap_atomic(ic, send->s_op, wc_status);
175 }
176 break;
177 default:
178 printk_ratelimited(KERN_NOTICE
179 "RDS/IB: %s: unexpected opcode 0x%x in WR!\n",
180 __func__, send->s_wr.opcode);
181 break;
182 }
183
184 send->s_wr.opcode = 0xdead;
185
186 return rm;
187 }
188
189 void rds_ib_send_init_ring(struct rds_ib_connection *ic)
190 {
191 struct rds_ib_send_work *send;
192 u32 i;
193
194 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
195 struct ib_sge *sge;
196
197 send->s_op = NULL;
198
199 send->s_wr.wr_id = i;
200 send->s_wr.sg_list = send->s_sge;
201 send->s_wr.ex.imm_data = 0;
202
203 sge = &send->s_sge[0];
204 sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header));
205 sge->length = sizeof(struct rds_header);
206 sge->lkey = ic->i_pd->local_dma_lkey;
207
208 send->s_sge[1].lkey = ic->i_pd->local_dma_lkey;
209 }
210 }
211
212 void rds_ib_send_clear_ring(struct rds_ib_connection *ic)
213 {
214 struct rds_ib_send_work *send;
215 u32 i;
216
217 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
218 if (send->s_op && send->s_wr.opcode != 0xdead)
219 rds_ib_send_unmap_op(ic, send, IB_WC_WR_FLUSH_ERR);
220 }
221 }
222
223
224
225
226
227 static void rds_ib_sub_signaled(struct rds_ib_connection *ic, int nr)
228 {
229 if ((atomic_sub_return(nr, &ic->i_signaled_sends) == 0) &&
230 waitqueue_active(&rds_ib_ring_empty_wait))
231 wake_up(&rds_ib_ring_empty_wait);
232 BUG_ON(atomic_read(&ic->i_signaled_sends) < 0);
233 }
234
235
236
237
238
239
240
241 void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
242 {
243 struct rds_message *rm = NULL;
244 struct rds_connection *conn = ic->conn;
245 struct rds_ib_send_work *send;
246 u32 completed;
247 u32 oldest;
248 u32 i = 0;
249 int nr_sig = 0;
250
251
252 rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
253 (unsigned long long)wc->wr_id, wc->status,
254 ib_wc_status_msg(wc->status), wc->byte_len,
255 be32_to_cpu(wc->ex.imm_data));
256 rds_ib_stats_inc(s_ib_tx_cq_event);
257
258 if (wc->wr_id == RDS_IB_ACK_WR_ID) {
259 if (time_after(jiffies, ic->i_ack_queued + HZ / 2))
260 rds_ib_stats_inc(s_ib_tx_stalled);
261 rds_ib_ack_send_complete(ic);
262 return;
263 }
264
265 oldest = rds_ib_ring_oldest(&ic->i_send_ring);
266
267 completed = rds_ib_ring_completed(&ic->i_send_ring, wc->wr_id, oldest);
268
269 for (i = 0; i < completed; i++) {
270 send = &ic->i_sends[oldest];
271 if (send->s_wr.send_flags & IB_SEND_SIGNALED)
272 nr_sig++;
273
274 rm = rds_ib_send_unmap_op(ic, send, wc->status);
275
276 if (time_after(jiffies, send->s_queued + HZ / 2))
277 rds_ib_stats_inc(s_ib_tx_stalled);
278
279 if (send->s_op) {
280 if (send->s_op == rm->m_final_op) {
281
282
283
284 rds_message_unmapped(rm);
285 }
286 rds_message_put(rm);
287 send->s_op = NULL;
288 }
289
290 oldest = (oldest + 1) % ic->i_send_ring.w_nr;
291 }
292
293 rds_ib_ring_free(&ic->i_send_ring, completed);
294 rds_ib_sub_signaled(ic, nr_sig);
295 nr_sig = 0;
296
297 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
298 test_bit(0, &conn->c_map_queued))
299 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
300
301
302 if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) {
303 rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c,%d> had status %u (%s), disconnecting and reconnecting\n",
304 &conn->c_laddr, &conn->c_faddr,
305 conn->c_tos, wc->status,
306 ib_wc_status_msg(wc->status));
307 }
308 }
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354 int rds_ib_send_grab_credits(struct rds_ib_connection *ic,
355 u32 wanted, u32 *adv_credits, int need_posted, int max_posted)
356 {
357 unsigned int avail, posted, got = 0, advertise;
358 long oldval, newval;
359
360 *adv_credits = 0;
361 if (!ic->i_flowctl)
362 return wanted;
363
364 try_again:
365 advertise = 0;
366 oldval = newval = atomic_read(&ic->i_credits);
367 posted = IB_GET_POST_CREDITS(oldval);
368 avail = IB_GET_SEND_CREDITS(oldval);
369
370 rdsdebug("wanted=%u credits=%u posted=%u\n",
371 wanted, avail, posted);
372
373
374 if (avail && !posted)
375 avail--;
376
377 if (avail < wanted) {
378 struct rds_connection *conn = ic->i_cm_id->context;
379
380
381 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
382 got = avail;
383 } else {
384
385 got = wanted;
386 }
387 newval -= IB_SET_SEND_CREDITS(got);
388
389
390
391
392
393
394 if (posted && (got || need_posted)) {
395 advertise = min_t(unsigned int, posted, max_posted);
396 newval -= IB_SET_POST_CREDITS(advertise);
397 }
398
399
400 if (atomic_cmpxchg(&ic->i_credits, oldval, newval) != oldval)
401 goto try_again;
402
403 *adv_credits = advertise;
404 return got;
405 }
406
407 void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits)
408 {
409 struct rds_ib_connection *ic = conn->c_transport_data;
410
411 if (credits == 0)
412 return;
413
414 rdsdebug("credits=%u current=%u%s\n",
415 credits,
416 IB_GET_SEND_CREDITS(atomic_read(&ic->i_credits)),
417 test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ? ", ll_send_full" : "");
418
419 atomic_add(IB_SET_SEND_CREDITS(credits), &ic->i_credits);
420 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags))
421 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
422
423 WARN_ON(IB_GET_SEND_CREDITS(credits) >= 16384);
424
425 rds_ib_stats_inc(s_ib_rx_credit_updates);
426 }
427
428 void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted)
429 {
430 struct rds_ib_connection *ic = conn->c_transport_data;
431
432 if (posted == 0)
433 return;
434
435 atomic_add(IB_SET_POST_CREDITS(posted), &ic->i_credits);
436
437
438
439
440
441
442
443
444
445
446
447
448
449 if (IB_GET_POST_CREDITS(atomic_read(&ic->i_credits)) >= 16)
450 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
451 }
452
453 static inline int rds_ib_set_wr_signal_state(struct rds_ib_connection *ic,
454 struct rds_ib_send_work *send,
455 bool notify)
456 {
457
458
459
460
461
462 if (ic->i_unsignaled_wrs-- == 0 || notify) {
463 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
464 send->s_wr.send_flags |= IB_SEND_SIGNALED;
465 return 1;
466 }
467 return 0;
468 }
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483 int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
484 unsigned int hdr_off, unsigned int sg, unsigned int off)
485 {
486 struct rds_ib_connection *ic = conn->c_transport_data;
487 struct ib_device *dev = ic->i_cm_id->device;
488 struct rds_ib_send_work *send = NULL;
489 struct rds_ib_send_work *first;
490 struct rds_ib_send_work *prev;
491 const struct ib_send_wr *failed_wr;
492 struct scatterlist *scat;
493 u32 pos;
494 u32 i;
495 u32 work_alloc;
496 u32 credit_alloc = 0;
497 u32 posted;
498 u32 adv_credits = 0;
499 int send_flags = 0;
500 int bytes_sent = 0;
501 int ret;
502 int flow_controlled = 0;
503 int nr_sig = 0;
504
505 BUG_ON(off % RDS_FRAG_SIZE);
506 BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header));
507
508
509 if (conn->c_loopback
510 && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
511 rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
512 scat = &rm->data.op_sg[sg];
513 ret = max_t(int, RDS_CONG_MAP_BYTES, scat->length);
514 return sizeof(struct rds_header) + ret;
515 }
516
517
518 if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0)
519 i = 1;
520 else
521 i = DIV_ROUND_UP(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE);
522
523 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos);
524 if (work_alloc == 0) {
525 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
526 rds_ib_stats_inc(s_ib_tx_ring_full);
527 ret = -ENOMEM;
528 goto out;
529 }
530
531 if (ic->i_flowctl) {
532 credit_alloc = rds_ib_send_grab_credits(ic, work_alloc, &posted, 0, RDS_MAX_ADV_CREDIT);
533 adv_credits += posted;
534 if (credit_alloc < work_alloc) {
535 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc);
536 work_alloc = credit_alloc;
537 flow_controlled = 1;
538 }
539 if (work_alloc == 0) {
540 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
541 rds_ib_stats_inc(s_ib_tx_throttle);
542 ret = -ENOMEM;
543 goto out;
544 }
545 }
546
547
548 if (!ic->i_data_op) {
549 if (rm->data.op_nents) {
550 rm->data.op_count = ib_dma_map_sg(dev,
551 rm->data.op_sg,
552 rm->data.op_nents,
553 DMA_TO_DEVICE);
554 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->data.op_count);
555 if (rm->data.op_count == 0) {
556 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
557 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
558 ret = -ENOMEM;
559 goto out;
560 }
561 } else {
562 rm->data.op_count = 0;
563 }
564
565 rds_message_addref(rm);
566 rm->data.op_dmasg = 0;
567 rm->data.op_dmaoff = 0;
568 ic->i_data_op = &rm->data;
569
570
571 if (test_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags))
572 rm->m_inc.i_hdr.h_flags |= RDS_FLAG_ACK_REQUIRED;
573 if (test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags))
574 rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED;
575
576
577
578 if (rm->rdma.op_active) {
579 struct rds_ext_header_rdma ext_hdr;
580
581 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.op_rkey);
582 rds_message_add_extension(&rm->m_inc.i_hdr,
583 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
584 }
585 if (rm->m_rdma_cookie) {
586 rds_message_add_rdma_dest_extension(&rm->m_inc.i_hdr,
587 rds_rdma_cookie_key(rm->m_rdma_cookie),
588 rds_rdma_cookie_offset(rm->m_rdma_cookie));
589 }
590
591
592
593
594
595 rm->m_inc.i_hdr.h_ack = cpu_to_be64(rds_ib_piggyb_ack(ic));
596 rds_message_make_checksum(&rm->m_inc.i_hdr);
597
598
599
600
601 if (ic->i_flowctl) {
602 rds_ib_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits);
603 adv_credits += posted;
604 BUG_ON(adv_credits > 255);
605 }
606 }
607
608
609
610
611
612
613
614 if (rm->rdma.op_active && rm->rdma.op_fence)
615 send_flags = IB_SEND_FENCE;
616
617
618 send = &ic->i_sends[pos];
619 first = send;
620 prev = NULL;
621 scat = &ic->i_data_op->op_sg[rm->data.op_dmasg];
622 i = 0;
623 do {
624 unsigned int len = 0;
625
626
627 send->s_wr.send_flags = send_flags;
628 send->s_wr.opcode = IB_WR_SEND;
629 send->s_wr.num_sge = 1;
630 send->s_wr.next = NULL;
631 send->s_queued = jiffies;
632 send->s_op = NULL;
633
634 send->s_sge[0].addr = ic->i_send_hdrs_dma
635 + (pos * sizeof(struct rds_header));
636 send->s_sge[0].length = sizeof(struct rds_header);
637
638 memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header));
639
640
641 if (i < work_alloc
642 && scat != &rm->data.op_sg[rm->data.op_count]) {
643 len = min(RDS_FRAG_SIZE,
644 sg_dma_len(scat) - rm->data.op_dmaoff);
645 send->s_wr.num_sge = 2;
646
647 send->s_sge[1].addr = sg_dma_address(scat);
648 send->s_sge[1].addr += rm->data.op_dmaoff;
649 send->s_sge[1].length = len;
650
651 bytes_sent += len;
652 rm->data.op_dmaoff += len;
653 if (rm->data.op_dmaoff == sg_dma_len(scat)) {
654 scat++;
655 rm->data.op_dmasg++;
656 rm->data.op_dmaoff = 0;
657 }
658 }
659
660 rds_ib_set_wr_signal_state(ic, send, false);
661
662
663
664
665 if (ic->i_flowctl && flow_controlled && i == (work_alloc - 1)) {
666 rds_ib_set_wr_signal_state(ic, send, true);
667 send->s_wr.send_flags |= IB_SEND_SOLICITED;
668 }
669
670 if (send->s_wr.send_flags & IB_SEND_SIGNALED)
671 nr_sig++;
672
673 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
674 &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
675
676 if (ic->i_flowctl && adv_credits) {
677 struct rds_header *hdr = &ic->i_send_hdrs[pos];
678
679
680 hdr->h_credit = adv_credits;
681 rds_message_make_checksum(hdr);
682 adv_credits = 0;
683 rds_ib_stats_inc(s_ib_tx_credit_updates);
684 }
685
686 if (prev)
687 prev->s_wr.next = &send->s_wr;
688 prev = send;
689
690 pos = (pos + 1) % ic->i_send_ring.w_nr;
691 send = &ic->i_sends[pos];
692 i++;
693
694 } while (i < work_alloc
695 && scat != &rm->data.op_sg[rm->data.op_count]);
696
697
698
699 if (hdr_off == 0)
700 bytes_sent += sizeof(struct rds_header);
701
702
703 if (scat == &rm->data.op_sg[rm->data.op_count]) {
704 prev->s_op = ic->i_data_op;
705 prev->s_wr.send_flags |= IB_SEND_SOLICITED;
706 if (!(prev->s_wr.send_flags & IB_SEND_SIGNALED))
707 nr_sig += rds_ib_set_wr_signal_state(ic, prev, true);
708 ic->i_data_op = NULL;
709 }
710
711
712 if (i < work_alloc) {
713 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i);
714 work_alloc = i;
715 }
716 if (ic->i_flowctl && i < credit_alloc)
717 rds_ib_send_add_credits(conn, credit_alloc - i);
718
719 if (nr_sig)
720 atomic_add(nr_sig, &ic->i_signaled_sends);
721
722
723 failed_wr = &first->s_wr;
724 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
725 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
726 first, &first->s_wr, ret, failed_wr);
727 BUG_ON(failed_wr != &first->s_wr);
728 if (ret) {
729 printk(KERN_WARNING "RDS/IB: ib_post_send to %pI6c "
730 "returned %d\n", &conn->c_faddr, ret);
731 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
732 rds_ib_sub_signaled(ic, nr_sig);
733 if (prev->s_op) {
734 ic->i_data_op = prev->s_op;
735 prev->s_op = NULL;
736 }
737
738 rds_ib_conn_error(ic->conn, "ib_post_send failed\n");
739 goto out;
740 }
741
742 ret = bytes_sent;
743 out:
744 BUG_ON(adv_credits);
745 return ret;
746 }
747
748
749
750
751
752
753 int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
754 {
755 struct rds_ib_connection *ic = conn->c_transport_data;
756 struct rds_ib_send_work *send = NULL;
757 const struct ib_send_wr *failed_wr;
758 u32 pos;
759 u32 work_alloc;
760 int ret;
761 int nr_sig = 0;
762
763 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, 1, &pos);
764 if (work_alloc != 1) {
765 rds_ib_stats_inc(s_ib_tx_ring_full);
766 ret = -ENOMEM;
767 goto out;
768 }
769
770
771 send = &ic->i_sends[pos];
772 send->s_queued = jiffies;
773
774 if (op->op_type == RDS_ATOMIC_TYPE_CSWP) {
775 send->s_atomic_wr.wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP;
776 send->s_atomic_wr.compare_add = op->op_m_cswp.compare;
777 send->s_atomic_wr.swap = op->op_m_cswp.swap;
778 send->s_atomic_wr.compare_add_mask = op->op_m_cswp.compare_mask;
779 send->s_atomic_wr.swap_mask = op->op_m_cswp.swap_mask;
780 } else {
781 send->s_atomic_wr.wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD;
782 send->s_atomic_wr.compare_add = op->op_m_fadd.add;
783 send->s_atomic_wr.swap = 0;
784 send->s_atomic_wr.compare_add_mask = op->op_m_fadd.nocarry_mask;
785 send->s_atomic_wr.swap_mask = 0;
786 }
787 send->s_wr.send_flags = 0;
788 nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify);
789 send->s_atomic_wr.wr.num_sge = 1;
790 send->s_atomic_wr.wr.next = NULL;
791 send->s_atomic_wr.remote_addr = op->op_remote_addr;
792 send->s_atomic_wr.rkey = op->op_rkey;
793 send->s_op = op;
794 rds_message_addref(container_of(send->s_op, struct rds_message, atomic));
795
796
797 ret = ib_dma_map_sg(ic->i_cm_id->device, op->op_sg, 1, DMA_FROM_DEVICE);
798 rdsdebug("ic %p mapping atomic op %p. mapped %d pg\n", ic, op, ret);
799 if (ret != 1) {
800 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
801 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
802 ret = -ENOMEM;
803 goto out;
804 }
805
806
807 send->s_sge[0].addr = sg_dma_address(op->op_sg);
808 send->s_sge[0].length = sg_dma_len(op->op_sg);
809 send->s_sge[0].lkey = ic->i_pd->local_dma_lkey;
810
811 rdsdebug("rva %Lx rpa %Lx len %u\n", op->op_remote_addr,
812 send->s_sge[0].addr, send->s_sge[0].length);
813
814 if (nr_sig)
815 atomic_add(nr_sig, &ic->i_signaled_sends);
816
817 failed_wr = &send->s_atomic_wr.wr;
818 ret = ib_post_send(ic->i_cm_id->qp, &send->s_atomic_wr.wr, &failed_wr);
819 rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic,
820 send, &send->s_atomic_wr, ret, failed_wr);
821 BUG_ON(failed_wr != &send->s_atomic_wr.wr);
822 if (ret) {
823 printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI6c "
824 "returned %d\n", &conn->c_faddr, ret);
825 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
826 rds_ib_sub_signaled(ic, nr_sig);
827 goto out;
828 }
829
830 if (unlikely(failed_wr != &send->s_atomic_wr.wr)) {
831 printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
832 BUG_ON(failed_wr != &send->s_atomic_wr.wr);
833 }
834
835 out:
836 return ret;
837 }
838
839 int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
840 {
841 struct rds_ib_connection *ic = conn->c_transport_data;
842 struct rds_ib_send_work *send = NULL;
843 struct rds_ib_send_work *first;
844 struct rds_ib_send_work *prev;
845 const struct ib_send_wr *failed_wr;
846 struct scatterlist *scat;
847 unsigned long len;
848 u64 remote_addr = op->op_remote_addr;
849 u32 max_sge = ic->rds_ibdev->max_sge;
850 u32 pos;
851 u32 work_alloc;
852 u32 i;
853 u32 j;
854 int sent;
855 int ret;
856 int num_sge;
857 int nr_sig = 0;
858
859
860 if (!op->op_mapped) {
861 op->op_count = ib_dma_map_sg(ic->i_cm_id->device,
862 op->op_sg, op->op_nents, (op->op_write) ?
863 DMA_TO_DEVICE : DMA_FROM_DEVICE);
864 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->op_count);
865 if (op->op_count == 0) {
866 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
867 ret = -ENOMEM;
868 goto out;
869 }
870
871 op->op_mapped = 1;
872 }
873
874
875
876
877
878 i = DIV_ROUND_UP(op->op_count, max_sge);
879
880 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos);
881 if (work_alloc != i) {
882 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
883 rds_ib_stats_inc(s_ib_tx_ring_full);
884 ret = -ENOMEM;
885 goto out;
886 }
887
888 send = &ic->i_sends[pos];
889 first = send;
890 prev = NULL;
891 scat = &op->op_sg[0];
892 sent = 0;
893 num_sge = op->op_count;
894
895 for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) {
896 send->s_wr.send_flags = 0;
897 send->s_queued = jiffies;
898 send->s_op = NULL;
899
900 if (!op->op_notify)
901 nr_sig += rds_ib_set_wr_signal_state(ic, send,
902 op->op_notify);
903
904 send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
905 send->s_rdma_wr.remote_addr = remote_addr;
906 send->s_rdma_wr.rkey = op->op_rkey;
907
908 if (num_sge > max_sge) {
909 send->s_rdma_wr.wr.num_sge = max_sge;
910 num_sge -= max_sge;
911 } else {
912 send->s_rdma_wr.wr.num_sge = num_sge;
913 }
914
915 send->s_rdma_wr.wr.next = NULL;
916
917 if (prev)
918 prev->s_rdma_wr.wr.next = &send->s_rdma_wr.wr;
919
920 for (j = 0; j < send->s_rdma_wr.wr.num_sge &&
921 scat != &op->op_sg[op->op_count]; j++) {
922 len = sg_dma_len(scat);
923 send->s_sge[j].addr = sg_dma_address(scat);
924 send->s_sge[j].length = len;
925 send->s_sge[j].lkey = ic->i_pd->local_dma_lkey;
926
927 sent += len;
928 rdsdebug("ic %p sent %d remote_addr %llu\n", ic, sent, remote_addr);
929
930 remote_addr += len;
931 scat++;
932 }
933
934 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
935 &send->s_rdma_wr.wr,
936 send->s_rdma_wr.wr.num_sge,
937 send->s_rdma_wr.wr.next);
938
939 prev = send;
940 if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
941 send = ic->i_sends;
942 }
943
944
945 if (scat == &op->op_sg[op->op_count]) {
946 prev->s_op = op;
947 rds_message_addref(container_of(op, struct rds_message, rdma));
948 }
949
950 if (i < work_alloc) {
951 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i);
952 work_alloc = i;
953 }
954
955 if (nr_sig)
956 atomic_add(nr_sig, &ic->i_signaled_sends);
957
958 failed_wr = &first->s_rdma_wr.wr;
959 ret = ib_post_send(ic->i_cm_id->qp, &first->s_rdma_wr.wr, &failed_wr);
960 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
961 first, &first->s_rdma_wr.wr, ret, failed_wr);
962 BUG_ON(failed_wr != &first->s_rdma_wr.wr);
963 if (ret) {
964 printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI6c "
965 "returned %d\n", &conn->c_faddr, ret);
966 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
967 rds_ib_sub_signaled(ic, nr_sig);
968 goto out;
969 }
970
971 if (unlikely(failed_wr != &first->s_rdma_wr.wr)) {
972 printk(KERN_WARNING "RDS/IB: ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
973 BUG_ON(failed_wr != &first->s_rdma_wr.wr);
974 }
975
976
977 out:
978 return ret;
979 }
980
981 void rds_ib_xmit_path_complete(struct rds_conn_path *cp)
982 {
983 struct rds_connection *conn = cp->cp_conn;
984 struct rds_ib_connection *ic = conn->c_transport_data;
985
986
987
988 rds_ib_attempt_ack(ic);
989 }