1/* 2 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the BSD-type 8 * license below: 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 14 * Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 17 * Redistributions in binary form must reproduce the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer in the documentation and/or other materials provided 20 * with the distribution. 21 * 22 * Neither the name of the Network Appliance, Inc. nor the names of 23 * its contributors may be used to endorse or promote products 24 * derived from this software without specific prior written 25 * permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 * 39 * Author: Tom Tucker <tom@opengridcomputing.com> 40 */ 41 42#include <linux/sunrpc/xdr.h> 43#include <linux/sunrpc/debug.h> 44#include <asm/unaligned.h> 45#include <linux/sunrpc/rpc_rdma.h> 46#include <linux/sunrpc/svc_rdma.h> 47 48#define RPCDBG_FACILITY RPCDBG_SVCXPRT 49 50/* 51 * Decodes a read chunk list. The expected format is as follows: 52 * descrim : xdr_one 53 * position : u32 offset into XDR stream 54 * handle : u32 RKEY 55 * . . . 56 * end-of-list: xdr_zero 57 */ 58static u32 *decode_read_list(u32 *va, u32 *vaend) 59{ 60 struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va; 61 62 while (ch->rc_discrim != xdr_zero) { 63 if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) > 64 (unsigned long)vaend) { 65 dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch); 66 return NULL; 67 } 68 ch++; 69 } 70 return (u32 *)&ch->rc_position; 71} 72 73/* 74 * Decodes a write chunk list. The expected format is as follows: 75 * descrim : xdr_one 76 * nchunks : <count> 77 * handle : u32 RKEY ---+ 78 * length : u32 <len of segment> | 79 * offset : remove va + <count> 80 * . . . | 81 * ---+ 82 */ 83static u32 *decode_write_list(u32 *va, u32 *vaend) 84{ 85 unsigned long start, end; 86 int nchunks; 87 88 struct rpcrdma_write_array *ary = 89 (struct rpcrdma_write_array *)va; 90 91 /* Check for not write-array */ 92 if (ary->wc_discrim == xdr_zero) 93 return (u32 *)&ary->wc_nchunks; 94 95 if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) > 96 (unsigned long)vaend) { 97 dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend); 98 return NULL; 99 } 100 nchunks = ntohl(ary->wc_nchunks); 101 102 start = (unsigned long)&ary->wc_array[0]; 103 end = (unsigned long)vaend; 104 if (nchunks < 0 || 105 nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) || 106 (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) { 107 dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", 108 ary, nchunks, vaend); 109 return NULL; 110 } 111 /* 112 * rs_length is the 2nd 4B field in wc_target and taking its 113 * address skips the list terminator 114 */ 115 return (u32 *)&ary->wc_array[nchunks].wc_target.rs_length; 116} 117 118static u32 *decode_reply_array(u32 *va, u32 *vaend) 119{ 120 unsigned long start, end; 121 int nchunks; 122 struct rpcrdma_write_array *ary = 123 (struct rpcrdma_write_array *)va; 124 125 /* Check for no reply-array */ 126 if (ary->wc_discrim == xdr_zero) 127 return (u32 *)&ary->wc_nchunks; 128 129 if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) > 130 (unsigned long)vaend) { 131 dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend); 132 return NULL; 133 } 134 nchunks = ntohl(ary->wc_nchunks); 135 136 start = (unsigned long)&ary->wc_array[0]; 137 end = (unsigned long)vaend; 138 if (nchunks < 0 || 139 nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) || 140 (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) { 141 dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", 142 ary, nchunks, vaend); 143 return NULL; 144 } 145 return (u32 *)&ary->wc_array[nchunks]; 146} 147 148int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req, 149 struct svc_rqst *rqstp) 150{ 151 struct rpcrdma_msg *rmsgp = NULL; 152 u32 *va; 153 u32 *vaend; 154 u32 hdr_len; 155 156 rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; 157 158 /* Verify that there's enough bytes for header + something */ 159 if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_MIN) { 160 dprintk("svcrdma: header too short = %d\n", 161 rqstp->rq_arg.len); 162 return -EINVAL; 163 } 164 165 /* Decode the header */ 166 rmsgp->rm_xid = ntohl(rmsgp->rm_xid); 167 rmsgp->rm_vers = ntohl(rmsgp->rm_vers); 168 rmsgp->rm_credit = ntohl(rmsgp->rm_credit); 169 rmsgp->rm_type = ntohl(rmsgp->rm_type); 170 171 if (rmsgp->rm_vers != RPCRDMA_VERSION) 172 return -ENOSYS; 173 174 /* Pull in the extra for the padded case and bump our pointer */ 175 if (rmsgp->rm_type == RDMA_MSGP) { 176 int hdrlen; 177 rmsgp->rm_body.rm_padded.rm_align = 178 ntohl(rmsgp->rm_body.rm_padded.rm_align); 179 rmsgp->rm_body.rm_padded.rm_thresh = 180 ntohl(rmsgp->rm_body.rm_padded.rm_thresh); 181 182 va = &rmsgp->rm_body.rm_padded.rm_pempty[4]; 183 rqstp->rq_arg.head[0].iov_base = va; 184 hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp); 185 rqstp->rq_arg.head[0].iov_len -= hdrlen; 186 if (hdrlen > rqstp->rq_arg.len) 187 return -EINVAL; 188 return hdrlen; 189 } 190 191 /* The chunk list may contain either a read chunk list or a write 192 * chunk list and a reply chunk list. 193 */ 194 va = &rmsgp->rm_body.rm_chunks[0]; 195 vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len); 196 va = decode_read_list(va, vaend); 197 if (!va) 198 return -EINVAL; 199 va = decode_write_list(va, vaend); 200 if (!va) 201 return -EINVAL; 202 va = decode_reply_array(va, vaend); 203 if (!va) 204 return -EINVAL; 205 206 rqstp->rq_arg.head[0].iov_base = va; 207 hdr_len = (unsigned long)va - (unsigned long)rmsgp; 208 rqstp->rq_arg.head[0].iov_len -= hdr_len; 209 210 *rdma_req = rmsgp; 211 return hdr_len; 212} 213 214int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *rqstp) 215{ 216 struct rpcrdma_msg *rmsgp = NULL; 217 struct rpcrdma_read_chunk *ch; 218 struct rpcrdma_write_array *ary; 219 u32 *va; 220 u32 hdrlen; 221 222 dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n", 223 rqstp); 224 rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; 225 226 /* Pull in the extra for the padded case and bump our pointer */ 227 if (rmsgp->rm_type == RDMA_MSGP) { 228 va = &rmsgp->rm_body.rm_padded.rm_pempty[4]; 229 rqstp->rq_arg.head[0].iov_base = va; 230 hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp); 231 rqstp->rq_arg.head[0].iov_len -= hdrlen; 232 return hdrlen; 233 } 234 235 /* 236 * Skip all chunks to find RPC msg. These were previously processed 237 */ 238 va = &rmsgp->rm_body.rm_chunks[0]; 239 240 /* Skip read-list */ 241 for (ch = (struct rpcrdma_read_chunk *)va; 242 ch->rc_discrim != xdr_zero; ch++); 243 va = (u32 *)&ch->rc_position; 244 245 /* Skip write-list */ 246 ary = (struct rpcrdma_write_array *)va; 247 if (ary->wc_discrim == xdr_zero) 248 va = (u32 *)&ary->wc_nchunks; 249 else 250 /* 251 * rs_length is the 2nd 4B field in wc_target and taking its 252 * address skips the list terminator 253 */ 254 va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length; 255 256 /* Skip reply-array */ 257 ary = (struct rpcrdma_write_array *)va; 258 if (ary->wc_discrim == xdr_zero) 259 va = (u32 *)&ary->wc_nchunks; 260 else 261 va = (u32 *)&ary->wc_array[ary->wc_nchunks]; 262 263 rqstp->rq_arg.head[0].iov_base = va; 264 hdrlen = (unsigned long)va - (unsigned long)rmsgp; 265 rqstp->rq_arg.head[0].iov_len -= hdrlen; 266 267 return hdrlen; 268} 269 270int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt, 271 struct rpcrdma_msg *rmsgp, 272 enum rpcrdma_errcode err, u32 *va) 273{ 274 u32 *startp = va; 275 276 *va++ = htonl(rmsgp->rm_xid); 277 *va++ = htonl(rmsgp->rm_vers); 278 *va++ = htonl(xprt->sc_max_requests); 279 *va++ = htonl(RDMA_ERROR); 280 *va++ = htonl(err); 281 if (err == ERR_VERS) { 282 *va++ = htonl(RPCRDMA_VERSION); 283 *va++ = htonl(RPCRDMA_VERSION); 284 } 285 286 return (int)((unsigned long)va - (unsigned long)startp); 287} 288 289int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp) 290{ 291 struct rpcrdma_write_array *wr_ary; 292 293 /* There is no read-list in a reply */ 294 295 /* skip write list */ 296 wr_ary = (struct rpcrdma_write_array *) 297 &rmsgp->rm_body.rm_chunks[1]; 298 if (wr_ary->wc_discrim) 299 wr_ary = (struct rpcrdma_write_array *) 300 &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)]. 301 wc_target.rs_length; 302 else 303 wr_ary = (struct rpcrdma_write_array *) 304 &wr_ary->wc_nchunks; 305 306 /* skip reply array */ 307 if (wr_ary->wc_discrim) 308 wr_ary = (struct rpcrdma_write_array *) 309 &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)]; 310 else 311 wr_ary = (struct rpcrdma_write_array *) 312 &wr_ary->wc_nchunks; 313 314 return (unsigned long) wr_ary - (unsigned long) rmsgp; 315} 316 317void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks) 318{ 319 struct rpcrdma_write_array *ary; 320 321 /* no read-list */ 322 rmsgp->rm_body.rm_chunks[0] = xdr_zero; 323 324 /* write-array discrim */ 325 ary = (struct rpcrdma_write_array *) 326 &rmsgp->rm_body.rm_chunks[1]; 327 ary->wc_discrim = xdr_one; 328 ary->wc_nchunks = htonl(chunks); 329 330 /* write-list terminator */ 331 ary->wc_array[chunks].wc_target.rs_handle = xdr_zero; 332 333 /* reply-array discriminator */ 334 ary->wc_array[chunks].wc_target.rs_length = xdr_zero; 335} 336 337void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary, 338 int chunks) 339{ 340 ary->wc_discrim = xdr_one; 341 ary->wc_nchunks = htonl(chunks); 342} 343 344void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary, 345 int chunk_no, 346 __be32 rs_handle, 347 __be64 rs_offset, 348 u32 write_len) 349{ 350 struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target; 351 seg->rs_handle = rs_handle; 352 seg->rs_offset = rs_offset; 353 seg->rs_length = htonl(write_len); 354} 355 356void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt, 357 struct rpcrdma_msg *rdma_argp, 358 struct rpcrdma_msg *rdma_resp, 359 enum rpcrdma_proc rdma_type) 360{ 361 rdma_resp->rm_xid = htonl(rdma_argp->rm_xid); 362 rdma_resp->rm_vers = htonl(rdma_argp->rm_vers); 363 rdma_resp->rm_credit = htonl(xprt->sc_max_requests); 364 rdma_resp->rm_type = htonl(rdma_type); 365 366 /* Encode <nul> chunks lists */ 367 rdma_resp->rm_body.rm_chunks[0] = xdr_zero; 368 rdma_resp->rm_body.rm_chunks[1] = xdr_zero; 369 rdma_resp->rm_body.rm_chunks[2] = xdr_zero; 370} 371