This source file includes following definitions.
- svc_rdma_next_recv_ctxt
- svc_rdma_recv_ctxt_alloc
- svc_rdma_recv_ctxt_destroy
- svc_rdma_recv_ctxts_destroy
- svc_rdma_recv_ctxt_get
- svc_rdma_recv_ctxt_put
- svc_rdma_release_rqst
- __svc_rdma_post_recv
- svc_rdma_post_recv
- svc_rdma_post_recvs
- svc_rdma_wc_receive
- svc_rdma_flush_recv_queues
- svc_rdma_build_arg_xdr
- xdr_check_read_list
- xdr_check_write_chunk
- xdr_check_write_list
- xdr_check_reply_chunk
- svc_rdma_get_inv_rkey
- svc_rdma_xdr_decode_req
- rdma_read_complete
- svc_rdma_send_error
- svc_rdma_is_backchannel_reply
- svc_rdma_recvfrom
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96 #include <linux/spinlock.h>
97 #include <asm/unaligned.h>
98 #include <rdma/ib_verbs.h>
99 #include <rdma/rdma_cm.h>
100
101 #include <linux/sunrpc/xdr.h>
102 #include <linux/sunrpc/debug.h>
103 #include <linux/sunrpc/rpc_rdma.h>
104 #include <linux/sunrpc/svc_rdma.h>
105
106 #include "xprt_rdma.h"
107 #include <trace/events/rpcrdma.h>
108
109 #define RPCDBG_FACILITY RPCDBG_SVCXPRT
110
111 static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc);
112
113 static inline struct svc_rdma_recv_ctxt *
114 svc_rdma_next_recv_ctxt(struct list_head *list)
115 {
116 return list_first_entry_or_null(list, struct svc_rdma_recv_ctxt,
117 rc_list);
118 }
119
120 static struct svc_rdma_recv_ctxt *
121 svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
122 {
123 struct svc_rdma_recv_ctxt *ctxt;
124 dma_addr_t addr;
125 void *buffer;
126
127 ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
128 if (!ctxt)
129 goto fail0;
130 buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL);
131 if (!buffer)
132 goto fail1;
133 addr = ib_dma_map_single(rdma->sc_pd->device, buffer,
134 rdma->sc_max_req_size, DMA_FROM_DEVICE);
135 if (ib_dma_mapping_error(rdma->sc_pd->device, addr))
136 goto fail2;
137
138 ctxt->rc_recv_wr.next = NULL;
139 ctxt->rc_recv_wr.wr_cqe = &ctxt->rc_cqe;
140 ctxt->rc_recv_wr.sg_list = &ctxt->rc_recv_sge;
141 ctxt->rc_recv_wr.num_sge = 1;
142 ctxt->rc_cqe.done = svc_rdma_wc_receive;
143 ctxt->rc_recv_sge.addr = addr;
144 ctxt->rc_recv_sge.length = rdma->sc_max_req_size;
145 ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey;
146 ctxt->rc_recv_buf = buffer;
147 ctxt->rc_temp = false;
148 return ctxt;
149
150 fail2:
151 kfree(buffer);
152 fail1:
153 kfree(ctxt);
154 fail0:
155 return NULL;
156 }
157
158 static void svc_rdma_recv_ctxt_destroy(struct svcxprt_rdma *rdma,
159 struct svc_rdma_recv_ctxt *ctxt)
160 {
161 ib_dma_unmap_single(rdma->sc_pd->device, ctxt->rc_recv_sge.addr,
162 ctxt->rc_recv_sge.length, DMA_FROM_DEVICE);
163 kfree(ctxt->rc_recv_buf);
164 kfree(ctxt);
165 }
166
167
168
169
170
171
172 void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma)
173 {
174 struct svc_rdma_recv_ctxt *ctxt;
175 struct llist_node *node;
176
177 while ((node = llist_del_first(&rdma->sc_recv_ctxts))) {
178 ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);
179 svc_rdma_recv_ctxt_destroy(rdma, ctxt);
180 }
181 }
182
183 static struct svc_rdma_recv_ctxt *
184 svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma)
185 {
186 struct svc_rdma_recv_ctxt *ctxt;
187 struct llist_node *node;
188
189 node = llist_del_first(&rdma->sc_recv_ctxts);
190 if (!node)
191 goto out_empty;
192 ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);
193
194 out:
195 ctxt->rc_page_count = 0;
196 return ctxt;
197
198 out_empty:
199 ctxt = svc_rdma_recv_ctxt_alloc(rdma);
200 if (!ctxt)
201 return NULL;
202 goto out;
203 }
204
205
206
207
208
209
210
211 void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
212 struct svc_rdma_recv_ctxt *ctxt)
213 {
214 unsigned int i;
215
216 for (i = 0; i < ctxt->rc_page_count; i++)
217 put_page(ctxt->rc_pages[i]);
218
219 if (!ctxt->rc_temp)
220 llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
221 else
222 svc_rdma_recv_ctxt_destroy(rdma, ctxt);
223 }
224
225
226
227
228
229
230
231
232
233 void svc_rdma_release_rqst(struct svc_rqst *rqstp)
234 {
235 struct svc_rdma_recv_ctxt *ctxt = rqstp->rq_xprt_ctxt;
236 struct svc_xprt *xprt = rqstp->rq_xprt;
237 struct svcxprt_rdma *rdma =
238 container_of(xprt, struct svcxprt_rdma, sc_xprt);
239
240 rqstp->rq_xprt_ctxt = NULL;
241 if (ctxt)
242 svc_rdma_recv_ctxt_put(rdma, ctxt);
243 }
244
245 static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma,
246 struct svc_rdma_recv_ctxt *ctxt)
247 {
248 int ret;
249
250 svc_xprt_get(&rdma->sc_xprt);
251 ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, NULL);
252 trace_svcrdma_post_recv(&ctxt->rc_recv_wr, ret);
253 if (ret)
254 goto err_post;
255 return 0;
256
257 err_post:
258 svc_rdma_recv_ctxt_put(rdma, ctxt);
259 svc_xprt_put(&rdma->sc_xprt);
260 return ret;
261 }
262
263 static int svc_rdma_post_recv(struct svcxprt_rdma *rdma)
264 {
265 struct svc_rdma_recv_ctxt *ctxt;
266
267 ctxt = svc_rdma_recv_ctxt_get(rdma);
268 if (!ctxt)
269 return -ENOMEM;
270 return __svc_rdma_post_recv(rdma, ctxt);
271 }
272
273
274
275
276
277
278
279 bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
280 {
281 struct svc_rdma_recv_ctxt *ctxt;
282 unsigned int i;
283 int ret;
284
285 for (i = 0; i < rdma->sc_max_requests; i++) {
286 ctxt = svc_rdma_recv_ctxt_get(rdma);
287 if (!ctxt)
288 return false;
289 ctxt->rc_temp = true;
290 ret = __svc_rdma_post_recv(rdma, ctxt);
291 if (ret)
292 return false;
293 }
294 return true;
295 }
296
297
298
299
300
301
302
303
304
305 static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
306 {
307 struct svcxprt_rdma *rdma = cq->cq_context;
308 struct ib_cqe *cqe = wc->wr_cqe;
309 struct svc_rdma_recv_ctxt *ctxt;
310
311 trace_svcrdma_wc_receive(wc);
312
313
314 ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe);
315
316 if (wc->status != IB_WC_SUCCESS)
317 goto flushed;
318
319 if (svc_rdma_post_recv(rdma))
320 goto post_err;
321
322
323 ctxt->rc_byte_len = wc->byte_len;
324 ib_dma_sync_single_for_cpu(rdma->sc_pd->device,
325 ctxt->rc_recv_sge.addr,
326 wc->byte_len, DMA_FROM_DEVICE);
327
328 spin_lock(&rdma->sc_rq_dto_lock);
329 list_add_tail(&ctxt->rc_list, &rdma->sc_rq_dto_q);
330
331 set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags);
332 spin_unlock(&rdma->sc_rq_dto_lock);
333 if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags))
334 svc_xprt_enqueue(&rdma->sc_xprt);
335 goto out;
336
337 flushed:
338 post_err:
339 svc_rdma_recv_ctxt_put(rdma, ctxt);
340 set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
341 svc_xprt_enqueue(&rdma->sc_xprt);
342 out:
343 svc_xprt_put(&rdma->sc_xprt);
344 }
345
346
347
348
349
350
351 void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma)
352 {
353 struct svc_rdma_recv_ctxt *ctxt;
354
355 while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_read_complete_q))) {
356 list_del(&ctxt->rc_list);
357 svc_rdma_recv_ctxt_put(rdma, ctxt);
358 }
359 while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_rq_dto_q))) {
360 list_del(&ctxt->rc_list);
361 svc_rdma_recv_ctxt_put(rdma, ctxt);
362 }
363 }
364
365 static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
366 struct svc_rdma_recv_ctxt *ctxt)
367 {
368 struct xdr_buf *arg = &rqstp->rq_arg;
369
370 arg->head[0].iov_base = ctxt->rc_recv_buf;
371 arg->head[0].iov_len = ctxt->rc_byte_len;
372 arg->tail[0].iov_base = NULL;
373 arg->tail[0].iov_len = 0;
374 arg->page_len = 0;
375 arg->page_base = 0;
376 arg->buflen = ctxt->rc_byte_len;
377 arg->len = ctxt->rc_byte_len;
378 }
379
380
381
382
383 #define MAX_BYTES_WRITE_SEG ((u32)(RPCSVC_MAXPAGES << PAGE_SHIFT))
384
385
386
387
388 #define MAX_BYTES_SPECIAL_SEG ((u32)((RPCSVC_MAXPAGES + 2) << PAGE_SHIFT))
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406 static __be32 *xdr_check_read_list(__be32 *p, const __be32 *end)
407 {
408 u32 position;
409 bool first;
410
411 first = true;
412 while (*p++ != xdr_zero) {
413 if (first) {
414 position = be32_to_cpup(p++);
415 first = false;
416 } else if (be32_to_cpup(p++) != position) {
417 return NULL;
418 }
419 p++;
420 if (be32_to_cpup(p++) > MAX_BYTES_SPECIAL_SEG)
421 return NULL;
422 p += 2;
423
424 if (p > end)
425 return NULL;
426 }
427 return p;
428 }
429
430
431
432
433
434
435 static __be32 *xdr_check_write_chunk(__be32 *p, const __be32 *end,
436 u32 maxlen)
437 {
438 u32 i, segcount;
439
440 segcount = be32_to_cpup(p++);
441 for (i = 0; i < segcount; i++) {
442 p++;
443 if (be32_to_cpup(p++) > maxlen)
444 return NULL;
445 p += 2;
446
447 if (p > end)
448 return NULL;
449 }
450
451 return p;
452 }
453
454
455
456
457
458
459
460
461
462
463
464
465 static __be32 *xdr_check_write_list(__be32 *p, const __be32 *end)
466 {
467 u32 chcount;
468
469 chcount = 0;
470 while (*p++ != xdr_zero) {
471 p = xdr_check_write_chunk(p, end, MAX_BYTES_WRITE_SEG);
472 if (!p)
473 return NULL;
474 if (chcount++ > 1)
475 return NULL;
476 }
477 return p;
478 }
479
480
481
482
483
484
485
486
487
488 static __be32 *xdr_check_reply_chunk(__be32 *p, const __be32 *end)
489 {
490 if (*p++ != xdr_zero) {
491 p = xdr_check_write_chunk(p, end, MAX_BYTES_SPECIAL_SEG);
492 if (!p)
493 return NULL;
494 }
495 return p;
496 }
497
498
499
500
501
502
503
504
505
506
507
508 static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma,
509 struct svc_rdma_recv_ctxt *ctxt)
510 {
511 __be32 inv_rkey, *p;
512 u32 i, segcount;
513
514 ctxt->rc_inv_rkey = 0;
515
516 if (!rdma->sc_snd_w_inv)
517 return;
518
519 inv_rkey = xdr_zero;
520 p = ctxt->rc_recv_buf;
521 p += rpcrdma_fixed_maxsz;
522
523
524 while (*p++ != xdr_zero) {
525 p++;
526 if (inv_rkey == xdr_zero)
527 inv_rkey = *p;
528 else if (inv_rkey != *p)
529 return;
530 p += 4;
531 }
532
533
534 while (*p++ != xdr_zero) {
535 segcount = be32_to_cpup(p++);
536 for (i = 0; i < segcount; i++) {
537 if (inv_rkey == xdr_zero)
538 inv_rkey = *p;
539 else if (inv_rkey != *p)
540 return;
541 p += 4;
542 }
543 }
544
545
546 if (*p++ != xdr_zero) {
547 segcount = be32_to_cpup(p++);
548 for (i = 0; i < segcount; i++) {
549 if (inv_rkey == xdr_zero)
550 inv_rkey = *p;
551 else if (inv_rkey != *p)
552 return;
553 p += 4;
554 }
555 }
556
557 ctxt->rc_inv_rkey = be32_to_cpu(inv_rkey);
558 }
559
560
561
562
563
564
565
566
567
568
569
570 static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
571 {
572 __be32 *p, *end, *rdma_argp;
573 unsigned int hdr_len;
574
575
576 if (rq_arg->len <= RPCRDMA_HDRLEN_ERR)
577 goto out_short;
578
579 rdma_argp = rq_arg->head[0].iov_base;
580 if (*(rdma_argp + 1) != rpcrdma_version)
581 goto out_version;
582
583 switch (*(rdma_argp + 3)) {
584 case rdma_msg:
585 break;
586 case rdma_nomsg:
587 break;
588
589 case rdma_done:
590 goto out_drop;
591
592 case rdma_error:
593 goto out_drop;
594
595 default:
596 goto out_proc;
597 }
598
599 end = (__be32 *)((unsigned long)rdma_argp + rq_arg->len);
600 p = xdr_check_read_list(rdma_argp + 4, end);
601 if (!p)
602 goto out_inval;
603 p = xdr_check_write_list(p, end);
604 if (!p)
605 goto out_inval;
606 p = xdr_check_reply_chunk(p, end);
607 if (!p)
608 goto out_inval;
609 if (p > end)
610 goto out_inval;
611
612 rq_arg->head[0].iov_base = p;
613 hdr_len = (unsigned long)p - (unsigned long)rdma_argp;
614 rq_arg->head[0].iov_len -= hdr_len;
615 rq_arg->len -= hdr_len;
616 trace_svcrdma_decode_rqst(rdma_argp, hdr_len);
617 return hdr_len;
618
619 out_short:
620 trace_svcrdma_decode_short(rq_arg->len);
621 return -EINVAL;
622
623 out_version:
624 trace_svcrdma_decode_badvers(rdma_argp);
625 return -EPROTONOSUPPORT;
626
627 out_drop:
628 trace_svcrdma_decode_drop(rdma_argp);
629 return 0;
630
631 out_proc:
632 trace_svcrdma_decode_badproc(rdma_argp);
633 return -EINVAL;
634
635 out_inval:
636 trace_svcrdma_decode_parse(rdma_argp);
637 return -EINVAL;
638 }
639
640 static void rdma_read_complete(struct svc_rqst *rqstp,
641 struct svc_rdma_recv_ctxt *head)
642 {
643 int page_no;
644
645
646
647
648 for (page_no = 0; page_no < head->rc_page_count; page_no++) {
649 put_page(rqstp->rq_pages[page_no]);
650 rqstp->rq_pages[page_no] = head->rc_pages[page_no];
651 }
652 head->rc_page_count = 0;
653
654
655 rqstp->rq_arg.pages = &rqstp->rq_pages[head->rc_hdr_count];
656 rqstp->rq_arg.page_len = head->rc_arg.page_len;
657
658
659 rqstp->rq_respages = &rqstp->rq_pages[page_no];
660 rqstp->rq_next_page = rqstp->rq_respages + 1;
661
662
663 rqstp->rq_arg.head[0] = head->rc_arg.head[0];
664 rqstp->rq_arg.tail[0] = head->rc_arg.tail[0];
665 rqstp->rq_arg.len = head->rc_arg.len;
666 rqstp->rq_arg.buflen = head->rc_arg.buflen;
667 }
668
669 static void svc_rdma_send_error(struct svcxprt_rdma *xprt,
670 __be32 *rdma_argp, int status)
671 {
672 struct svc_rdma_send_ctxt *ctxt;
673 unsigned int length;
674 __be32 *p;
675 int ret;
676
677 ctxt = svc_rdma_send_ctxt_get(xprt);
678 if (!ctxt)
679 return;
680
681 p = ctxt->sc_xprt_buf;
682 *p++ = *rdma_argp;
683 *p++ = *(rdma_argp + 1);
684 *p++ = xprt->sc_fc_credits;
685 *p++ = rdma_error;
686 switch (status) {
687 case -EPROTONOSUPPORT:
688 *p++ = err_vers;
689 *p++ = rpcrdma_version;
690 *p++ = rpcrdma_version;
691 trace_svcrdma_err_vers(*rdma_argp);
692 break;
693 default:
694 *p++ = err_chunk;
695 trace_svcrdma_err_chunk(*rdma_argp);
696 }
697 length = (unsigned long)p - (unsigned long)ctxt->sc_xprt_buf;
698 svc_rdma_sync_reply_hdr(xprt, ctxt, length);
699
700 ctxt->sc_send_wr.opcode = IB_WR_SEND;
701 ret = svc_rdma_send(xprt, &ctxt->sc_send_wr);
702 if (ret)
703 svc_rdma_send_ctxt_put(xprt, ctxt);
704 }
705
706
707
708
709
710
711 static bool svc_rdma_is_backchannel_reply(struct svc_xprt *xprt,
712 __be32 *rdma_resp)
713 {
714 __be32 *p;
715
716 if (!xprt->xpt_bc_xprt)
717 return false;
718
719 p = rdma_resp + 3;
720 if (*p++ != rdma_msg)
721 return false;
722
723 if (*p++ != xdr_zero)
724 return false;
725 if (*p++ != xdr_zero)
726 return false;
727 if (*p++ != xdr_zero)
728 return false;
729
730
731 if (*p++ != *rdma_resp)
732 return false;
733
734 if (*p == cpu_to_be32(RPC_CALL))
735 return false;
736
737 return true;
738 }
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770 int svc_rdma_recvfrom(struct svc_rqst *rqstp)
771 {
772 struct svc_xprt *xprt = rqstp->rq_xprt;
773 struct svcxprt_rdma *rdma_xprt =
774 container_of(xprt, struct svcxprt_rdma, sc_xprt);
775 struct svc_rdma_recv_ctxt *ctxt;
776 __be32 *p;
777 int ret;
778
779 rqstp->rq_xprt_ctxt = NULL;
780
781 spin_lock(&rdma_xprt->sc_rq_dto_lock);
782 ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_read_complete_q);
783 if (ctxt) {
784 list_del(&ctxt->rc_list);
785 spin_unlock(&rdma_xprt->sc_rq_dto_lock);
786 rdma_read_complete(rqstp, ctxt);
787 goto complete;
788 }
789 ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_rq_dto_q);
790 if (!ctxt) {
791
792 clear_bit(XPT_DATA, &xprt->xpt_flags);
793 spin_unlock(&rdma_xprt->sc_rq_dto_lock);
794 return 0;
795 }
796 list_del(&ctxt->rc_list);
797 spin_unlock(&rdma_xprt->sc_rq_dto_lock);
798
799 atomic_inc(&rdma_stat_recv);
800
801 svc_rdma_build_arg_xdr(rqstp, ctxt);
802
803
804
805
806 rqstp->rq_respages = rqstp->rq_pages;
807 rqstp->rq_next_page = rqstp->rq_respages;
808
809 p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
810 ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg);
811 if (ret < 0)
812 goto out_err;
813 if (ret == 0)
814 goto out_drop;
815 rqstp->rq_xprt_hlen = ret;
816
817 if (svc_rdma_is_backchannel_reply(xprt, p)) {
818 ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, p,
819 &rqstp->rq_arg);
820 svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
821 return ret;
822 }
823 svc_rdma_get_inv_rkey(rdma_xprt, ctxt);
824
825 p += rpcrdma_fixed_maxsz;
826 if (*p != xdr_zero)
827 goto out_readchunk;
828
829 complete:
830 rqstp->rq_xprt_ctxt = ctxt;
831 rqstp->rq_prot = IPPROTO_MAX;
832 svc_xprt_copy_addrs(rqstp, xprt);
833 return rqstp->rq_arg.len;
834
835 out_readchunk:
836 ret = svc_rdma_recv_read_chunk(rdma_xprt, rqstp, ctxt, p);
837 if (ret < 0)
838 goto out_postfail;
839 return 0;
840
841 out_err:
842 svc_rdma_send_error(rdma_xprt, p, ret);
843 svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
844 return 0;
845
846 out_postfail:
847 if (ret == -EINVAL)
848 svc_rdma_send_error(rdma_xprt, p, ret);
849 svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
850 return ret;
851
852 out_drop:
853 svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
854 return 0;
855 }