1 /*
2  * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the BSD-type
8  * license below:
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  *
14  *      Redistributions of source code must retain the above copyright
15  *      notice, this list of conditions and the following disclaimer.
16  *
17  *      Redistributions in binary form must reproduce the above
18  *      copyright notice, this list of conditions and the following
19  *      disclaimer in the documentation and/or other materials provided
20  *      with the distribution.
21  *
22  *      Neither the name of the Network Appliance, Inc. nor the names of
23  *      its contributors may be used to endorse or promote products
24  *      derived from this software without specific prior written
25  *      permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38  *
39  * Author: Tom Tucker <tom@opengridcomputing.com>
40  */
41 
42 #include <linux/sunrpc/xdr.h>
43 #include <linux/sunrpc/debug.h>
44 #include <asm/unaligned.h>
45 #include <linux/sunrpc/rpc_rdma.h>
46 #include <linux/sunrpc/svc_rdma.h>
47 
48 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
49 
50 /*
51  * Decodes a read chunk list. The expected format is as follows:
52  *    descrim  : xdr_one
53  *    position : u32 offset into XDR stream
54  *    handle   : u32 RKEY
55  *    . . .
56  *  end-of-list: xdr_zero
57  */
decode_read_list(u32 * va,u32 * vaend)58 static u32 *decode_read_list(u32 *va, u32 *vaend)
59 {
60 	struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va;
61 
62 	while (ch->rc_discrim != xdr_zero) {
63 		if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) >
64 		    (unsigned long)vaend) {
65 			dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch);
66 			return NULL;
67 		}
68 		ch++;
69 	}
70 	return (u32 *)&ch->rc_position;
71 }
72 
73 /*
74  * Decodes a write chunk list. The expected format is as follows:
75  *    descrim  : xdr_one
76  *    nchunks  : <count>
77  *       handle   : u32 RKEY              ---+
78  *       length   : u32 <len of segment>     |
79  *       offset   : remove va                + <count>
80  *       . . .                               |
81  *                                        ---+
82  */
decode_write_list(u32 * va,u32 * vaend)83 static u32 *decode_write_list(u32 *va, u32 *vaend)
84 {
85 	unsigned long start, end;
86 	int nchunks;
87 
88 	struct rpcrdma_write_array *ary =
89 		(struct rpcrdma_write_array *)va;
90 
91 	/* Check for not write-array */
92 	if (ary->wc_discrim == xdr_zero)
93 		return (u32 *)&ary->wc_nchunks;
94 
95 	if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
96 	    (unsigned long)vaend) {
97 		dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
98 		return NULL;
99 	}
100 	nchunks = ntohl(ary->wc_nchunks);
101 
102 	start = (unsigned long)&ary->wc_array[0];
103 	end = (unsigned long)vaend;
104 	if (nchunks < 0 ||
105 	    nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
106 	    (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
107 		dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
108 			ary, nchunks, vaend);
109 		return NULL;
110 	}
111 	/*
112 	 * rs_length is the 2nd 4B field in wc_target and taking its
113 	 * address skips the list terminator
114 	 */
115 	return (u32 *)&ary->wc_array[nchunks].wc_target.rs_length;
116 }
117 
decode_reply_array(u32 * va,u32 * vaend)118 static u32 *decode_reply_array(u32 *va, u32 *vaend)
119 {
120 	unsigned long start, end;
121 	int nchunks;
122 	struct rpcrdma_write_array *ary =
123 		(struct rpcrdma_write_array *)va;
124 
125 	/* Check for no reply-array */
126 	if (ary->wc_discrim == xdr_zero)
127 		return (u32 *)&ary->wc_nchunks;
128 
129 	if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
130 	    (unsigned long)vaend) {
131 		dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
132 		return NULL;
133 	}
134 	nchunks = ntohl(ary->wc_nchunks);
135 
136 	start = (unsigned long)&ary->wc_array[0];
137 	end = (unsigned long)vaend;
138 	if (nchunks < 0 ||
139 	    nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
140 	    (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
141 		dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
142 			ary, nchunks, vaend);
143 		return NULL;
144 	}
145 	return (u32 *)&ary->wc_array[nchunks];
146 }
147 
svc_rdma_xdr_decode_req(struct rpcrdma_msg ** rdma_req,struct svc_rqst * rqstp)148 int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
149 			    struct svc_rqst *rqstp)
150 {
151 	struct rpcrdma_msg *rmsgp = NULL;
152 	u32 *va;
153 	u32 *vaend;
154 	u32 hdr_len;
155 
156 	rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
157 
158 	/* Verify that there's enough bytes for header + something */
159 	if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_MIN) {
160 		dprintk("svcrdma: header too short = %d\n",
161 			rqstp->rq_arg.len);
162 		return -EINVAL;
163 	}
164 
165 	/* Decode the header */
166 	rmsgp->rm_xid = ntohl(rmsgp->rm_xid);
167 	rmsgp->rm_vers = ntohl(rmsgp->rm_vers);
168 	rmsgp->rm_credit = ntohl(rmsgp->rm_credit);
169 	rmsgp->rm_type = ntohl(rmsgp->rm_type);
170 
171 	if (rmsgp->rm_vers != RPCRDMA_VERSION)
172 		return -ENOSYS;
173 
174 	/* Pull in the extra for the padded case and bump our pointer */
175 	if (rmsgp->rm_type == RDMA_MSGP) {
176 		int hdrlen;
177 		rmsgp->rm_body.rm_padded.rm_align =
178 			ntohl(rmsgp->rm_body.rm_padded.rm_align);
179 		rmsgp->rm_body.rm_padded.rm_thresh =
180 			ntohl(rmsgp->rm_body.rm_padded.rm_thresh);
181 
182 		va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
183 		rqstp->rq_arg.head[0].iov_base = va;
184 		hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
185 		rqstp->rq_arg.head[0].iov_len -= hdrlen;
186 		if (hdrlen > rqstp->rq_arg.len)
187 			return -EINVAL;
188 		return hdrlen;
189 	}
190 
191 	/* The chunk list may contain either a read chunk list or a write
192 	 * chunk list and a reply chunk list.
193 	 */
194 	va = &rmsgp->rm_body.rm_chunks[0];
195 	vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
196 	va = decode_read_list(va, vaend);
197 	if (!va)
198 		return -EINVAL;
199 	va = decode_write_list(va, vaend);
200 	if (!va)
201 		return -EINVAL;
202 	va = decode_reply_array(va, vaend);
203 	if (!va)
204 		return -EINVAL;
205 
206 	rqstp->rq_arg.head[0].iov_base = va;
207 	hdr_len = (unsigned long)va - (unsigned long)rmsgp;
208 	rqstp->rq_arg.head[0].iov_len -= hdr_len;
209 
210 	*rdma_req = rmsgp;
211 	return hdr_len;
212 }
213 
svc_rdma_xdr_decode_deferred_req(struct svc_rqst * rqstp)214 int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *rqstp)
215 {
216 	struct rpcrdma_msg *rmsgp = NULL;
217 	struct rpcrdma_read_chunk *ch;
218 	struct rpcrdma_write_array *ary;
219 	u32 *va;
220 	u32 hdrlen;
221 
222 	dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n",
223 		rqstp);
224 	rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
225 
226 	/* Pull in the extra for the padded case and bump our pointer */
227 	if (rmsgp->rm_type == RDMA_MSGP) {
228 		va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
229 		rqstp->rq_arg.head[0].iov_base = va;
230 		hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
231 		rqstp->rq_arg.head[0].iov_len -= hdrlen;
232 		return hdrlen;
233 	}
234 
235 	/*
236 	 * Skip all chunks to find RPC msg. These were previously processed
237 	 */
238 	va = &rmsgp->rm_body.rm_chunks[0];
239 
240 	/* Skip read-list */
241 	for (ch = (struct rpcrdma_read_chunk *)va;
242 	     ch->rc_discrim != xdr_zero; ch++);
243 	va = (u32 *)&ch->rc_position;
244 
245 	/* Skip write-list */
246 	ary = (struct rpcrdma_write_array *)va;
247 	if (ary->wc_discrim == xdr_zero)
248 		va = (u32 *)&ary->wc_nchunks;
249 	else
250 		/*
251 		 * rs_length is the 2nd 4B field in wc_target and taking its
252 		 * address skips the list terminator
253 		 */
254 		va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length;
255 
256 	/* Skip reply-array */
257 	ary = (struct rpcrdma_write_array *)va;
258 	if (ary->wc_discrim == xdr_zero)
259 		va = (u32 *)&ary->wc_nchunks;
260 	else
261 		va = (u32 *)&ary->wc_array[ary->wc_nchunks];
262 
263 	rqstp->rq_arg.head[0].iov_base = va;
264 	hdrlen = (unsigned long)va - (unsigned long)rmsgp;
265 	rqstp->rq_arg.head[0].iov_len -= hdrlen;
266 
267 	return hdrlen;
268 }
269 
svc_rdma_xdr_encode_error(struct svcxprt_rdma * xprt,struct rpcrdma_msg * rmsgp,enum rpcrdma_errcode err,u32 * va)270 int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
271 			      struct rpcrdma_msg *rmsgp,
272 			      enum rpcrdma_errcode err, u32 *va)
273 {
274 	u32 *startp = va;
275 
276 	*va++ = htonl(rmsgp->rm_xid);
277 	*va++ = htonl(rmsgp->rm_vers);
278 	*va++ = htonl(xprt->sc_max_requests);
279 	*va++ = htonl(RDMA_ERROR);
280 	*va++ = htonl(err);
281 	if (err == ERR_VERS) {
282 		*va++ = htonl(RPCRDMA_VERSION);
283 		*va++ = htonl(RPCRDMA_VERSION);
284 	}
285 
286 	return (int)((unsigned long)va - (unsigned long)startp);
287 }
288 
svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg * rmsgp)289 int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
290 {
291 	struct rpcrdma_write_array *wr_ary;
292 
293 	/* There is no read-list in a reply */
294 
295 	/* skip write list */
296 	wr_ary = (struct rpcrdma_write_array *)
297 		&rmsgp->rm_body.rm_chunks[1];
298 	if (wr_ary->wc_discrim)
299 		wr_ary = (struct rpcrdma_write_array *)
300 			&wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)].
301 			wc_target.rs_length;
302 	else
303 		wr_ary = (struct rpcrdma_write_array *)
304 			&wr_ary->wc_nchunks;
305 
306 	/* skip reply array */
307 	if (wr_ary->wc_discrim)
308 		wr_ary = (struct rpcrdma_write_array *)
309 			&wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)];
310 	else
311 		wr_ary = (struct rpcrdma_write_array *)
312 			&wr_ary->wc_nchunks;
313 
314 	return (unsigned long) wr_ary - (unsigned long) rmsgp;
315 }
316 
svc_rdma_xdr_encode_write_list(struct rpcrdma_msg * rmsgp,int chunks)317 void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
318 {
319 	struct rpcrdma_write_array *ary;
320 
321 	/* no read-list */
322 	rmsgp->rm_body.rm_chunks[0] = xdr_zero;
323 
324 	/* write-array discrim */
325 	ary = (struct rpcrdma_write_array *)
326 		&rmsgp->rm_body.rm_chunks[1];
327 	ary->wc_discrim = xdr_one;
328 	ary->wc_nchunks = htonl(chunks);
329 
330 	/* write-list terminator */
331 	ary->wc_array[chunks].wc_target.rs_handle = xdr_zero;
332 
333 	/* reply-array discriminator */
334 	ary->wc_array[chunks].wc_target.rs_length = xdr_zero;
335 }
336 
svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array * ary,int chunks)337 void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary,
338 				 int chunks)
339 {
340 	ary->wc_discrim = xdr_one;
341 	ary->wc_nchunks = htonl(chunks);
342 }
343 
svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array * ary,int chunk_no,__be32 rs_handle,__be64 rs_offset,u32 write_len)344 void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
345 				     int chunk_no,
346 				     __be32 rs_handle,
347 				     __be64 rs_offset,
348 				     u32 write_len)
349 {
350 	struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target;
351 	seg->rs_handle = rs_handle;
352 	seg->rs_offset = rs_offset;
353 	seg->rs_length = htonl(write_len);
354 }
355 
svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma * xprt,struct rpcrdma_msg * rdma_argp,struct rpcrdma_msg * rdma_resp,enum rpcrdma_proc rdma_type)356 void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
357 				  struct rpcrdma_msg *rdma_argp,
358 				  struct rpcrdma_msg *rdma_resp,
359 				  enum rpcrdma_proc rdma_type)
360 {
361 	rdma_resp->rm_xid = htonl(rdma_argp->rm_xid);
362 	rdma_resp->rm_vers = htonl(rdma_argp->rm_vers);
363 	rdma_resp->rm_credit = htonl(xprt->sc_max_requests);
364 	rdma_resp->rm_type = htonl(rdma_type);
365 
366 	/* Encode <nul> chunks lists */
367 	rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
368 	rdma_resp->rm_body.rm_chunks[1] = xdr_zero;
369 	rdma_resp->rm_body.rm_chunks[2] = xdr_zero;
370 }
371