root/net/sunrpc/svcsock.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. svc_reclassify_socket
  2. svc_reclassify_socket
  3. svc_release_skb
  4. svc_release_udp_skb
  5. svc_send_common
  6. svc_sendto
  7. svc_one_sock_name
  8. svc_recvfrom
  9. svc_sock_setbufsize
  10. svc_sock_secure_port
  11. svc_data_ready
  12. svc_write_space
  13. svc_tcp_has_wspace
  14. svc_tcp_kill_temp_xprt
  15. svc_udp_get_dest_address4
  16. svc_udp_get_dest_address6
  17. svc_udp_get_dest_address
  18. svc_udp_recvfrom
  19. svc_udp_sendto
  20. svc_udp_has_wspace
  21. svc_udp_accept
  22. svc_udp_kill_temp_xprt
  23. svc_udp_create
  24. svc_udp_init
  25. svc_tcp_listen_data_ready
  26. svc_tcp_state_change
  27. svc_tcp_accept
  28. svc_tcp_restore_pages
  29. svc_tcp_save_pages
  30. svc_tcp_clear_pages
  31. svc_tcp_recv_record
  32. receive_cb_reply
  33. copy_pages_to_kvecs
  34. svc_tcp_fragment_received
  35. svc_tcp_recvfrom
  36. svc_tcp_sendto
  37. svc_tcp_create
  38. svc_init_xprt_sock
  39. svc_cleanup_xprt_sock
  40. svc_tcp_init
  41. svc_sock_update_bufs
  42. svc_setup_socket
  43. svc_alien_sock
  44. svc_addsock
  45. svc_create_socket
  46. svc_sock_detach
  47. svc_tcp_sock_detach
  48. svc_sock_free

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * linux/net/sunrpc/svcsock.c
   4  *
   5  * These are the RPC server socket internals.
   6  *
   7  * The server scheduling algorithm does not always distribute the load
   8  * evenly when servicing a single client. May need to modify the
   9  * svc_xprt_enqueue procedure...
  10  *
  11  * TCP support is largely untested and may be a little slow. The problem
  12  * is that we currently do two separate recvfrom's, one for the 4-byte
  13  * record length, and the second for the actual record. This could possibly
  14  * be improved by always reading a minimum size of around 100 bytes and
  15  * tucking any superfluous bytes away in a temporary store. Still, that
  16  * leaves write requests out in the rain. An alternative may be to peek at
  17  * the first skb in the queue, and if it matches the next TCP sequence
  18  * number, to extract the record marker. Yuck.
  19  *
  20  * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
  21  */
  22 
  23 #include <linux/kernel.h>
  24 #include <linux/sched.h>
  25 #include <linux/module.h>
  26 #include <linux/errno.h>
  27 #include <linux/fcntl.h>
  28 #include <linux/net.h>
  29 #include <linux/in.h>
  30 #include <linux/inet.h>
  31 #include <linux/udp.h>
  32 #include <linux/tcp.h>
  33 #include <linux/unistd.h>
  34 #include <linux/slab.h>
  35 #include <linux/netdevice.h>
  36 #include <linux/skbuff.h>
  37 #include <linux/file.h>
  38 #include <linux/freezer.h>
  39 #include <net/sock.h>
  40 #include <net/checksum.h>
  41 #include <net/ip.h>
  42 #include <net/ipv6.h>
  43 #include <net/udp.h>
  44 #include <net/tcp.h>
  45 #include <net/tcp_states.h>
  46 #include <linux/uaccess.h>
  47 #include <asm/ioctls.h>
  48 #include <trace/events/skb.h>
  49 
  50 #include <linux/sunrpc/types.h>
  51 #include <linux/sunrpc/clnt.h>
  52 #include <linux/sunrpc/xdr.h>
  53 #include <linux/sunrpc/msg_prot.h>
  54 #include <linux/sunrpc/svcsock.h>
  55 #include <linux/sunrpc/stats.h>
  56 #include <linux/sunrpc/xprt.h>
  57 
  58 #include "sunrpc.h"
  59 
  60 #define RPCDBG_FACILITY RPCDBG_SVCXPRT
  61 
  62 
  63 static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
  64                                          int flags);
  65 static int              svc_udp_recvfrom(struct svc_rqst *);
  66 static int              svc_udp_sendto(struct svc_rqst *);
  67 static void             svc_sock_detach(struct svc_xprt *);
  68 static void             svc_tcp_sock_detach(struct svc_xprt *);
  69 static void             svc_sock_free(struct svc_xprt *);
  70 
  71 static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
  72                                           struct net *, struct sockaddr *,
  73                                           int, int);
  74 #ifdef CONFIG_DEBUG_LOCK_ALLOC
  75 static struct lock_class_key svc_key[2];
  76 static struct lock_class_key svc_slock_key[2];
  77 
  78 static void svc_reclassify_socket(struct socket *sock)
  79 {
  80         struct sock *sk = sock->sk;
  81 
  82         if (WARN_ON_ONCE(!sock_allow_reclassification(sk)))
  83                 return;
  84 
  85         switch (sk->sk_family) {
  86         case AF_INET:
  87                 sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD",
  88                                               &svc_slock_key[0],
  89                                               "sk_xprt.xpt_lock-AF_INET-NFSD",
  90                                               &svc_key[0]);
  91                 break;
  92 
  93         case AF_INET6:
  94                 sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFSD",
  95                                               &svc_slock_key[1],
  96                                               "sk_xprt.xpt_lock-AF_INET6-NFSD",
  97                                               &svc_key[1]);
  98                 break;
  99 
 100         default:
 101                 BUG();
 102         }
 103 }
 104 #else
 105 static void svc_reclassify_socket(struct socket *sock)
 106 {
 107 }
 108 #endif
 109 
 110 /*
 111  * Release an skbuff after use
 112  */
 113 static void svc_release_skb(struct svc_rqst *rqstp)
 114 {
 115         struct sk_buff *skb = rqstp->rq_xprt_ctxt;
 116 
 117         if (skb) {
 118                 struct svc_sock *svsk =
 119                         container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
 120                 rqstp->rq_xprt_ctxt = NULL;
 121 
 122                 dprintk("svc: service %p, releasing skb %p\n", rqstp, skb);
 123                 skb_free_datagram_locked(svsk->sk_sk, skb);
 124         }
 125 }
 126 
 127 static void svc_release_udp_skb(struct svc_rqst *rqstp)
 128 {
 129         struct sk_buff *skb = rqstp->rq_xprt_ctxt;
 130 
 131         if (skb) {
 132                 rqstp->rq_xprt_ctxt = NULL;
 133 
 134                 dprintk("svc: service %p, releasing skb %p\n", rqstp, skb);
 135                 consume_skb(skb);
 136         }
 137 }
 138 
 139 union svc_pktinfo_u {
 140         struct in_pktinfo pkti;
 141         struct in6_pktinfo pkti6;
 142 };
 143 #define SVC_PKTINFO_SPACE \
 144         CMSG_SPACE(sizeof(union svc_pktinfo_u))
 145 
 146 static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
 147 {
 148         struct svc_sock *svsk =
 149                 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
 150         switch (svsk->sk_sk->sk_family) {
 151         case AF_INET: {
 152                         struct in_pktinfo *pki = CMSG_DATA(cmh);
 153 
 154                         cmh->cmsg_level = SOL_IP;
 155                         cmh->cmsg_type = IP_PKTINFO;
 156                         pki->ipi_ifindex = 0;
 157                         pki->ipi_spec_dst.s_addr =
 158                                  svc_daddr_in(rqstp)->sin_addr.s_addr;
 159                         cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
 160                 }
 161                 break;
 162 
 163         case AF_INET6: {
 164                         struct in6_pktinfo *pki = CMSG_DATA(cmh);
 165                         struct sockaddr_in6 *daddr = svc_daddr_in6(rqstp);
 166 
 167                         cmh->cmsg_level = SOL_IPV6;
 168                         cmh->cmsg_type = IPV6_PKTINFO;
 169                         pki->ipi6_ifindex = daddr->sin6_scope_id;
 170                         pki->ipi6_addr = daddr->sin6_addr;
 171                         cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
 172                 }
 173                 break;
 174         }
 175 }
 176 
 177 /*
 178  * send routine intended to be shared by the fore- and back-channel
 179  */
 180 int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
 181                     struct page *headpage, unsigned long headoffset,
 182                     struct page *tailpage, unsigned long tailoffset)
 183 {
 184         int             result;
 185         int             size;
 186         struct page     **ppage = xdr->pages;
 187         size_t          base = xdr->page_base;
 188         unsigned int    pglen = xdr->page_len;
 189         unsigned int    flags = MSG_MORE | MSG_SENDPAGE_NOTLAST;
 190         int             slen;
 191         int             len = 0;
 192 
 193         slen = xdr->len;
 194 
 195         /* send head */
 196         if (slen == xdr->head[0].iov_len)
 197                 flags = 0;
 198         len = kernel_sendpage(sock, headpage, headoffset,
 199                                   xdr->head[0].iov_len, flags);
 200         if (len != xdr->head[0].iov_len)
 201                 goto out;
 202         slen -= xdr->head[0].iov_len;
 203         if (slen == 0)
 204                 goto out;
 205 
 206         /* send page data */
 207         size = PAGE_SIZE - base < pglen ? PAGE_SIZE - base : pglen;
 208         while (pglen > 0) {
 209                 if (slen == size)
 210                         flags = 0;
 211                 result = kernel_sendpage(sock, *ppage, base, size, flags);
 212                 if (result > 0)
 213                         len += result;
 214                 if (result != size)
 215                         goto out;
 216                 slen -= size;
 217                 pglen -= size;
 218                 size = PAGE_SIZE < pglen ? PAGE_SIZE : pglen;
 219                 base = 0;
 220                 ppage++;
 221         }
 222 
 223         /* send tail */
 224         if (xdr->tail[0].iov_len) {
 225                 result = kernel_sendpage(sock, tailpage, tailoffset,
 226                                    xdr->tail[0].iov_len, 0);
 227                 if (result > 0)
 228                         len += result;
 229         }
 230 
 231 out:
 232         return len;
 233 }
 234 
 235 
 236 /*
 237  * Generic sendto routine
 238  */
 239 static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
 240 {
 241         struct svc_sock *svsk =
 242                 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
 243         struct socket   *sock = svsk->sk_sock;
 244         union {
 245                 struct cmsghdr  hdr;
 246                 long            all[SVC_PKTINFO_SPACE / sizeof(long)];
 247         } buffer;
 248         struct cmsghdr *cmh = &buffer.hdr;
 249         int             len = 0;
 250         unsigned long tailoff;
 251         unsigned long headoff;
 252         RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
 253 
 254         if (rqstp->rq_prot == IPPROTO_UDP) {
 255                 struct msghdr msg = {
 256                         .msg_name       = &rqstp->rq_addr,
 257                         .msg_namelen    = rqstp->rq_addrlen,
 258                         .msg_control    = cmh,
 259                         .msg_controllen = sizeof(buffer),
 260                         .msg_flags      = MSG_MORE,
 261                 };
 262 
 263                 svc_set_cmsg_data(rqstp, cmh);
 264 
 265                 if (sock_sendmsg(sock, &msg) < 0)
 266                         goto out;
 267         }
 268 
 269         tailoff = ((unsigned long)xdr->tail[0].iov_base) & (PAGE_SIZE-1);
 270         headoff = 0;
 271         len = svc_send_common(sock, xdr, rqstp->rq_respages[0], headoff,
 272                                rqstp->rq_respages[0], tailoff);
 273 
 274 out:
 275         dprintk("svc: socket %p sendto([%p %zu... ], %d) = %d (addr %s)\n",
 276                 svsk, xdr->head[0].iov_base, xdr->head[0].iov_len,
 277                 xdr->len, len, svc_print_addr(rqstp, buf, sizeof(buf)));
 278 
 279         return len;
 280 }
 281 
 282 /*
 283  * Report socket names for nfsdfs
 284  */
 285 static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
 286 {
 287         const struct sock *sk = svsk->sk_sk;
 288         const char *proto_name = sk->sk_protocol == IPPROTO_UDP ?
 289                                                         "udp" : "tcp";
 290         int len;
 291 
 292         switch (sk->sk_family) {
 293         case PF_INET:
 294                 len = snprintf(buf, remaining, "ipv4 %s %pI4 %d\n",
 295                                 proto_name,
 296                                 &inet_sk(sk)->inet_rcv_saddr,
 297                                 inet_sk(sk)->inet_num);
 298                 break;
 299 #if IS_ENABLED(CONFIG_IPV6)
 300         case PF_INET6:
 301                 len = snprintf(buf, remaining, "ipv6 %s %pI6 %d\n",
 302                                 proto_name,
 303                                 &sk->sk_v6_rcv_saddr,
 304                                 inet_sk(sk)->inet_num);
 305                 break;
 306 #endif
 307         default:
 308                 len = snprintf(buf, remaining, "*unknown-%d*\n",
 309                                 sk->sk_family);
 310         }
 311 
 312         if (len >= remaining) {
 313                 *buf = '\0';
 314                 return -ENAMETOOLONG;
 315         }
 316         return len;
 317 }
 318 
 319 /*
 320  * Generic recvfrom routine.
 321  */
 322 static ssize_t svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov,
 323                             unsigned int nr, size_t buflen, unsigned int base)
 324 {
 325         struct svc_sock *svsk =
 326                 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
 327         struct msghdr msg = { NULL };
 328         ssize_t len;
 329 
 330         rqstp->rq_xprt_hlen = 0;
 331 
 332         clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 333         iov_iter_kvec(&msg.msg_iter, READ, iov, nr, buflen);
 334         if (base != 0) {
 335                 iov_iter_advance(&msg.msg_iter, base);
 336                 buflen -= base;
 337         }
 338         len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT);
 339         /* If we read a full record, then assume there may be more
 340          * data to read (stream based sockets only!)
 341          */
 342         if (len == buflen)
 343                 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 344 
 345         dprintk("svc: socket %p recvfrom(%p, %zu) = %zd\n",
 346                 svsk, iov[0].iov_base, iov[0].iov_len, len);
 347         return len;
 348 }
 349 
 350 /*
 351  * Set socket snd and rcv buffer lengths
 352  */
 353 static void svc_sock_setbufsize(struct svc_sock *svsk, unsigned int nreqs)
 354 {
 355         unsigned int max_mesg = svsk->sk_xprt.xpt_server->sv_max_mesg;
 356         struct socket *sock = svsk->sk_sock;
 357 
 358         nreqs = min(nreqs, INT_MAX / 2 / max_mesg);
 359 
 360         lock_sock(sock->sk);
 361         sock->sk->sk_sndbuf = nreqs * max_mesg * 2;
 362         sock->sk->sk_rcvbuf = nreqs * max_mesg * 2;
 363         sock->sk->sk_write_space(sock->sk);
 364         release_sock(sock->sk);
 365 }
 366 
 367 static void svc_sock_secure_port(struct svc_rqst *rqstp)
 368 {
 369         if (svc_port_is_privileged(svc_addr(rqstp)))
 370                 set_bit(RQ_SECURE, &rqstp->rq_flags);
 371         else
 372                 clear_bit(RQ_SECURE, &rqstp->rq_flags);
 373 }
 374 
 375 /*
 376  * INET callback when data has been received on the socket.
 377  */
 378 static void svc_data_ready(struct sock *sk)
 379 {
 380         struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
 381 
 382         if (svsk) {
 383                 dprintk("svc: socket %p(inet %p), busy=%d\n",
 384                         svsk, sk,
 385                         test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
 386 
 387                 /* Refer to svc_setup_socket() for details. */
 388                 rmb();
 389                 svsk->sk_odata(sk);
 390                 if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags))
 391                         svc_xprt_enqueue(&svsk->sk_xprt);
 392         }
 393 }
 394 
 395 /*
 396  * INET callback when space is newly available on the socket.
 397  */
 398 static void svc_write_space(struct sock *sk)
 399 {
 400         struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
 401 
 402         if (svsk) {
 403                 dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
 404                         svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
 405 
 406                 /* Refer to svc_setup_socket() for details. */
 407                 rmb();
 408                 svsk->sk_owspace(sk);
 409                 svc_xprt_enqueue(&svsk->sk_xprt);
 410         }
 411 }
 412 
 413 static int svc_tcp_has_wspace(struct svc_xprt *xprt)
 414 {
 415         struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
 416 
 417         if (test_bit(XPT_LISTENER, &xprt->xpt_flags))
 418                 return 1;
 419         return !test_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
 420 }
 421 
 422 static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt)
 423 {
 424         struct svc_sock *svsk;
 425         struct socket *sock;
 426         struct linger no_linger = {
 427                 .l_onoff = 1,
 428                 .l_linger = 0,
 429         };
 430 
 431         svsk = container_of(xprt, struct svc_sock, sk_xprt);
 432         sock = svsk->sk_sock;
 433         kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER,
 434                           (char *)&no_linger, sizeof(no_linger));
 435 }
 436 
 437 /*
 438  * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo
 439  */
 440 static int svc_udp_get_dest_address4(struct svc_rqst *rqstp,
 441                                      struct cmsghdr *cmh)
 442 {
 443         struct in_pktinfo *pki = CMSG_DATA(cmh);
 444         struct sockaddr_in *daddr = svc_daddr_in(rqstp);
 445 
 446         if (cmh->cmsg_type != IP_PKTINFO)
 447                 return 0;
 448 
 449         daddr->sin_family = AF_INET;
 450         daddr->sin_addr.s_addr = pki->ipi_spec_dst.s_addr;
 451         return 1;
 452 }
 453 
 454 /*
 455  * See net/ipv6/datagram.c : ip6_datagram_recv_ctl
 456  */
 457 static int svc_udp_get_dest_address6(struct svc_rqst *rqstp,
 458                                      struct cmsghdr *cmh)
 459 {
 460         struct in6_pktinfo *pki = CMSG_DATA(cmh);
 461         struct sockaddr_in6 *daddr = svc_daddr_in6(rqstp);
 462 
 463         if (cmh->cmsg_type != IPV6_PKTINFO)
 464                 return 0;
 465 
 466         daddr->sin6_family = AF_INET6;
 467         daddr->sin6_addr = pki->ipi6_addr;
 468         daddr->sin6_scope_id = pki->ipi6_ifindex;
 469         return 1;
 470 }
 471 
 472 /*
 473  * Copy the UDP datagram's destination address to the rqstp structure.
 474  * The 'destination' address in this case is the address to which the
 475  * peer sent the datagram, i.e. our local address. For multihomed
 476  * hosts, this can change from msg to msg. Note that only the IP
 477  * address changes, the port number should remain the same.
 478  */
 479 static int svc_udp_get_dest_address(struct svc_rqst *rqstp,
 480                                     struct cmsghdr *cmh)
 481 {
 482         switch (cmh->cmsg_level) {
 483         case SOL_IP:
 484                 return svc_udp_get_dest_address4(rqstp, cmh);
 485         case SOL_IPV6:
 486                 return svc_udp_get_dest_address6(rqstp, cmh);
 487         }
 488 
 489         return 0;
 490 }
 491 
 492 /*
 493  * Receive a datagram from a UDP socket.
 494  */
 495 static int svc_udp_recvfrom(struct svc_rqst *rqstp)
 496 {
 497         struct svc_sock *svsk =
 498                 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
 499         struct svc_serv *serv = svsk->sk_xprt.xpt_server;
 500         struct sk_buff  *skb;
 501         union {
 502                 struct cmsghdr  hdr;
 503                 long            all[SVC_PKTINFO_SPACE / sizeof(long)];
 504         } buffer;
 505         struct cmsghdr *cmh = &buffer.hdr;
 506         struct msghdr msg = {
 507                 .msg_name = svc_addr(rqstp),
 508                 .msg_control = cmh,
 509                 .msg_controllen = sizeof(buffer),
 510                 .msg_flags = MSG_DONTWAIT,
 511         };
 512         size_t len;
 513         int err;
 514 
 515         if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
 516             /* udp sockets need large rcvbuf as all pending
 517              * requests are still in that buffer.  sndbuf must
 518              * also be large enough that there is enough space
 519              * for one reply per thread.  We count all threads
 520              * rather than threads in a particular pool, which
 521              * provides an upper bound on the number of threads
 522              * which will access the socket.
 523              */
 524             svc_sock_setbufsize(svsk, serv->sv_nrthreads + 3);
 525 
 526         clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 527         skb = NULL;
 528         err = kernel_recvmsg(svsk->sk_sock, &msg, NULL,
 529                              0, 0, MSG_PEEK | MSG_DONTWAIT);
 530         if (err >= 0)
 531                 skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err);
 532 
 533         if (skb == NULL) {
 534                 if (err != -EAGAIN) {
 535                         /* possibly an icmp error */
 536                         dprintk("svc: recvfrom returned error %d\n", -err);
 537                         set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 538                 }
 539                 return 0;
 540         }
 541         len = svc_addr_len(svc_addr(rqstp));
 542         rqstp->rq_addrlen = len;
 543         if (skb->tstamp == 0) {
 544                 skb->tstamp = ktime_get_real();
 545                 /* Don't enable netstamp, sunrpc doesn't
 546                    need that much accuracy */
 547         }
 548         sock_write_timestamp(svsk->sk_sk, skb->tstamp);
 549         set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */
 550 
 551         len  = skb->len;
 552         rqstp->rq_arg.len = len;
 553 
 554         rqstp->rq_prot = IPPROTO_UDP;
 555 
 556         if (!svc_udp_get_dest_address(rqstp, cmh)) {
 557                 net_warn_ratelimited("svc: received unknown control message %d/%d; dropping RPC reply datagram\n",
 558                                      cmh->cmsg_level, cmh->cmsg_type);
 559                 goto out_free;
 560         }
 561         rqstp->rq_daddrlen = svc_addr_len(svc_daddr(rqstp));
 562 
 563         if (skb_is_nonlinear(skb)) {
 564                 /* we have to copy */
 565                 local_bh_disable();
 566                 if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) {
 567                         local_bh_enable();
 568                         /* checksum error */
 569                         goto out_free;
 570                 }
 571                 local_bh_enable();
 572                 consume_skb(skb);
 573         } else {
 574                 /* we can use it in-place */
 575                 rqstp->rq_arg.head[0].iov_base = skb->data;
 576                 rqstp->rq_arg.head[0].iov_len = len;
 577                 if (skb_checksum_complete(skb))
 578                         goto out_free;
 579                 rqstp->rq_xprt_ctxt = skb;
 580         }
 581 
 582         rqstp->rq_arg.page_base = 0;
 583         if (len <= rqstp->rq_arg.head[0].iov_len) {
 584                 rqstp->rq_arg.head[0].iov_len = len;
 585                 rqstp->rq_arg.page_len = 0;
 586                 rqstp->rq_respages = rqstp->rq_pages+1;
 587         } else {
 588                 rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
 589                 rqstp->rq_respages = rqstp->rq_pages + 1 +
 590                         DIV_ROUND_UP(rqstp->rq_arg.page_len, PAGE_SIZE);
 591         }
 592         rqstp->rq_next_page = rqstp->rq_respages+1;
 593 
 594         if (serv->sv_stats)
 595                 serv->sv_stats->netudpcnt++;
 596 
 597         return len;
 598 out_free:
 599         kfree_skb(skb);
 600         return 0;
 601 }
 602 
 603 static int
 604 svc_udp_sendto(struct svc_rqst *rqstp)
 605 {
 606         int             error;
 607 
 608         svc_release_udp_skb(rqstp);
 609 
 610         error = svc_sendto(rqstp, &rqstp->rq_res);
 611         if (error == -ECONNREFUSED)
 612                 /* ICMP error on earlier request. */
 613                 error = svc_sendto(rqstp, &rqstp->rq_res);
 614 
 615         return error;
 616 }
 617 
 618 static int svc_udp_has_wspace(struct svc_xprt *xprt)
 619 {
 620         struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
 621         struct svc_serv *serv = xprt->xpt_server;
 622         unsigned long required;
 623 
 624         /*
 625          * Set the SOCK_NOSPACE flag before checking the available
 626          * sock space.
 627          */
 628         set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
 629         required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg;
 630         if (required*2 > sock_wspace(svsk->sk_sk))
 631                 return 0;
 632         clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
 633         return 1;
 634 }
 635 
 636 static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt)
 637 {
 638         BUG();
 639         return NULL;
 640 }
 641 
 642 static void svc_udp_kill_temp_xprt(struct svc_xprt *xprt)
 643 {
 644 }
 645 
 646 static struct svc_xprt *svc_udp_create(struct svc_serv *serv,
 647                                        struct net *net,
 648                                        struct sockaddr *sa, int salen,
 649                                        int flags)
 650 {
 651         return svc_create_socket(serv, IPPROTO_UDP, net, sa, salen, flags);
 652 }
 653 
 654 static const struct svc_xprt_ops svc_udp_ops = {
 655         .xpo_create = svc_udp_create,
 656         .xpo_recvfrom = svc_udp_recvfrom,
 657         .xpo_sendto = svc_udp_sendto,
 658         .xpo_release_rqst = svc_release_udp_skb,
 659         .xpo_detach = svc_sock_detach,
 660         .xpo_free = svc_sock_free,
 661         .xpo_has_wspace = svc_udp_has_wspace,
 662         .xpo_accept = svc_udp_accept,
 663         .xpo_secure_port = svc_sock_secure_port,
 664         .xpo_kill_temp_xprt = svc_udp_kill_temp_xprt,
 665 };
 666 
 667 static struct svc_xprt_class svc_udp_class = {
 668         .xcl_name = "udp",
 669         .xcl_owner = THIS_MODULE,
 670         .xcl_ops = &svc_udp_ops,
 671         .xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP,
 672         .xcl_ident = XPRT_TRANSPORT_UDP,
 673 };
 674 
 675 static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
 676 {
 677         int err, level, optname, one = 1;
 678 
 679         svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class,
 680                       &svsk->sk_xprt, serv);
 681         clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
 682         svsk->sk_sk->sk_data_ready = svc_data_ready;
 683         svsk->sk_sk->sk_write_space = svc_write_space;
 684 
 685         /* initialise setting must have enough space to
 686          * receive and respond to one request.
 687          * svc_udp_recvfrom will re-adjust if necessary
 688          */
 689         svc_sock_setbufsize(svsk, 3);
 690 
 691         /* data might have come in before data_ready set up */
 692         set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 693         set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
 694 
 695         /* make sure we get destination address info */
 696         switch (svsk->sk_sk->sk_family) {
 697         case AF_INET:
 698                 level = SOL_IP;
 699                 optname = IP_PKTINFO;
 700                 break;
 701         case AF_INET6:
 702                 level = SOL_IPV6;
 703                 optname = IPV6_RECVPKTINFO;
 704                 break;
 705         default:
 706                 BUG();
 707         }
 708         err = kernel_setsockopt(svsk->sk_sock, level, optname,
 709                                         (char *)&one, sizeof(one));
 710         dprintk("svc: kernel_setsockopt returned %d\n", err);
 711 }
 712 
 713 /*
 714  * A data_ready event on a listening socket means there's a connection
 715  * pending. Do not use state_change as a substitute for it.
 716  */
 717 static void svc_tcp_listen_data_ready(struct sock *sk)
 718 {
 719         struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
 720 
 721         dprintk("svc: socket %p TCP (listen) state change %d\n",
 722                 sk, sk->sk_state);
 723 
 724         if (svsk) {
 725                 /* Refer to svc_setup_socket() for details. */
 726                 rmb();
 727                 svsk->sk_odata(sk);
 728         }
 729 
 730         /*
 731          * This callback may called twice when a new connection
 732          * is established as a child socket inherits everything
 733          * from a parent LISTEN socket.
 734          * 1) data_ready method of the parent socket will be called
 735          *    when one of child sockets become ESTABLISHED.
 736          * 2) data_ready method of the child socket may be called
 737          *    when it receives data before the socket is accepted.
 738          * In case of 2, we should ignore it silently.
 739          */
 740         if (sk->sk_state == TCP_LISTEN) {
 741                 if (svsk) {
 742                         set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
 743                         svc_xprt_enqueue(&svsk->sk_xprt);
 744                 } else
 745                         printk("svc: socket %p: no user data\n", sk);
 746         }
 747 }
 748 
 749 /*
 750  * A state change on a connected socket means it's dying or dead.
 751  */
 752 static void svc_tcp_state_change(struct sock *sk)
 753 {
 754         struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
 755 
 756         dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n",
 757                 sk, sk->sk_state, sk->sk_user_data);
 758 
 759         if (!svsk)
 760                 printk("svc: socket %p: no user data\n", sk);
 761         else {
 762                 /* Refer to svc_setup_socket() for details. */
 763                 rmb();
 764                 svsk->sk_ostate(sk);
 765                 if (sk->sk_state != TCP_ESTABLISHED) {
 766                         set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
 767                         svc_xprt_enqueue(&svsk->sk_xprt);
 768                 }
 769         }
 770 }
 771 
 772 /*
 773  * Accept a TCP connection
 774  */
 775 static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
 776 {
 777         struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
 778         struct sockaddr_storage addr;
 779         struct sockaddr *sin = (struct sockaddr *) &addr;
 780         struct svc_serv *serv = svsk->sk_xprt.xpt_server;
 781         struct socket   *sock = svsk->sk_sock;
 782         struct socket   *newsock;
 783         struct svc_sock *newsvsk;
 784         int             err, slen;
 785         RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
 786 
 787         dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
 788         if (!sock)
 789                 return NULL;
 790 
 791         clear_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
 792         err = kernel_accept(sock, &newsock, O_NONBLOCK);
 793         if (err < 0) {
 794                 if (err == -ENOMEM)
 795                         printk(KERN_WARNING "%s: no more sockets!\n",
 796                                serv->sv_name);
 797                 else if (err != -EAGAIN)
 798                         net_warn_ratelimited("%s: accept failed (err %d)!\n",
 799                                              serv->sv_name, -err);
 800                 return NULL;
 801         }
 802         set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
 803 
 804         err = kernel_getpeername(newsock, sin);
 805         if (err < 0) {
 806                 net_warn_ratelimited("%s: peername failed (err %d)!\n",
 807                                      serv->sv_name, -err);
 808                 goto failed;            /* aborted connection or whatever */
 809         }
 810         slen = err;
 811 
 812         /* Ideally, we would want to reject connections from unauthorized
 813          * hosts here, but when we get encryption, the IP of the host won't
 814          * tell us anything.  For now just warn about unpriv connections.
 815          */
 816         if (!svc_port_is_privileged(sin)) {
 817                 dprintk("%s: connect from unprivileged port: %s\n",
 818                         serv->sv_name,
 819                         __svc_print_addr(sin, buf, sizeof(buf)));
 820         }
 821         dprintk("%s: connect from %s\n", serv->sv_name,
 822                 __svc_print_addr(sin, buf, sizeof(buf)));
 823 
 824         /* Reset the inherited callbacks before calling svc_setup_socket */
 825         newsock->sk->sk_state_change = svsk->sk_ostate;
 826         newsock->sk->sk_data_ready = svsk->sk_odata;
 827         newsock->sk->sk_write_space = svsk->sk_owspace;
 828 
 829         /* make sure that a write doesn't block forever when
 830          * low on memory
 831          */
 832         newsock->sk->sk_sndtimeo = HZ*30;
 833 
 834         newsvsk = svc_setup_socket(serv, newsock,
 835                                  (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY));
 836         if (IS_ERR(newsvsk))
 837                 goto failed;
 838         svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen);
 839         err = kernel_getsockname(newsock, sin);
 840         slen = err;
 841         if (unlikely(err < 0)) {
 842                 dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err);
 843                 slen = offsetof(struct sockaddr, sa_data);
 844         }
 845         svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen);
 846 
 847         if (sock_is_loopback(newsock->sk))
 848                 set_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags);
 849         else
 850                 clear_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags);
 851         if (serv->sv_stats)
 852                 serv->sv_stats->nettcpconn++;
 853 
 854         return &newsvsk->sk_xprt;
 855 
 856 failed:
 857         sock_release(newsock);
 858         return NULL;
 859 }
 860 
 861 static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
 862 {
 863         unsigned int i, len, npages;
 864 
 865         if (svsk->sk_datalen == 0)
 866                 return 0;
 867         len = svsk->sk_datalen;
 868         npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 869         for (i = 0; i < npages; i++) {
 870                 if (rqstp->rq_pages[i] != NULL)
 871                         put_page(rqstp->rq_pages[i]);
 872                 BUG_ON(svsk->sk_pages[i] == NULL);
 873                 rqstp->rq_pages[i] = svsk->sk_pages[i];
 874                 svsk->sk_pages[i] = NULL;
 875         }
 876         rqstp->rq_arg.head[0].iov_base = page_address(rqstp->rq_pages[0]);
 877         return len;
 878 }
 879 
 880 static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
 881 {
 882         unsigned int i, len, npages;
 883 
 884         if (svsk->sk_datalen == 0)
 885                 return;
 886         len = svsk->sk_datalen;
 887         npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 888         for (i = 0; i < npages; i++) {
 889                 svsk->sk_pages[i] = rqstp->rq_pages[i];
 890                 rqstp->rq_pages[i] = NULL;
 891         }
 892 }
 893 
 894 static void svc_tcp_clear_pages(struct svc_sock *svsk)
 895 {
 896         unsigned int i, len, npages;
 897 
 898         if (svsk->sk_datalen == 0)
 899                 goto out;
 900         len = svsk->sk_datalen;
 901         npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 902         for (i = 0; i < npages; i++) {
 903                 if (svsk->sk_pages[i] == NULL) {
 904                         WARN_ON_ONCE(1);
 905                         continue;
 906                 }
 907                 put_page(svsk->sk_pages[i]);
 908                 svsk->sk_pages[i] = NULL;
 909         }
 910 out:
 911         svsk->sk_tcplen = 0;
 912         svsk->sk_datalen = 0;
 913 }
 914 
 915 /*
 916  * Receive fragment record header.
 917  * If we haven't gotten the record length yet, get the next four bytes.
 918  */
 919 static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
 920 {
 921         struct svc_serv *serv = svsk->sk_xprt.xpt_server;
 922         unsigned int want;
 923         int len;
 924 
 925         if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) {
 926                 struct kvec     iov;
 927 
 928                 want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
 929                 iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen;
 930                 iov.iov_len  = want;
 931                 len = svc_recvfrom(rqstp, &iov, 1, want, 0);
 932                 if (len < 0)
 933                         goto error;
 934                 svsk->sk_tcplen += len;
 935 
 936                 if (len < want) {
 937                         dprintk("svc: short recvfrom while reading record "
 938                                 "length (%d of %d)\n", len, want);
 939                         return -EAGAIN;
 940                 }
 941 
 942                 dprintk("svc: TCP record, %d bytes\n", svc_sock_reclen(svsk));
 943                 if (svc_sock_reclen(svsk) + svsk->sk_datalen >
 944                                                         serv->sv_max_mesg) {
 945                         net_notice_ratelimited("RPC: fragment too large: %d\n",
 946                                         svc_sock_reclen(svsk));
 947                         goto err_delete;
 948                 }
 949         }
 950 
 951         return svc_sock_reclen(svsk);
 952 error:
 953         dprintk("RPC: TCP recv_record got %d\n", len);
 954         return len;
 955 err_delete:
 956         set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
 957         return -EAGAIN;
 958 }
 959 
 960 static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
 961 {
 962         struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt;
 963         struct rpc_rqst *req = NULL;
 964         struct kvec *src, *dst;
 965         __be32 *p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
 966         __be32 xid;
 967         __be32 calldir;
 968 
 969         xid = *p++;
 970         calldir = *p;
 971 
 972         if (!bc_xprt)
 973                 return -EAGAIN;
 974         spin_lock(&bc_xprt->queue_lock);
 975         req = xprt_lookup_rqst(bc_xprt, xid);
 976         if (!req)
 977                 goto unlock_notfound;
 978 
 979         memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf));
 980         /*
 981          * XXX!: cheating for now!  Only copying HEAD.
 982          * But we know this is good enough for now (in fact, for any
 983          * callback reply in the forseeable future).
 984          */
 985         dst = &req->rq_private_buf.head[0];
 986         src = &rqstp->rq_arg.head[0];
 987         if (dst->iov_len < src->iov_len)
 988                 goto unlock_eagain; /* whatever; just giving up. */
 989         memcpy(dst->iov_base, src->iov_base, src->iov_len);
 990         xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len);
 991         rqstp->rq_arg.len = 0;
 992         spin_unlock(&bc_xprt->queue_lock);
 993         return 0;
 994 unlock_notfound:
 995         printk(KERN_NOTICE
 996                 "%s: Got unrecognized reply: "
 997                 "calldir 0x%x xpt_bc_xprt %p xid %08x\n",
 998                 __func__, ntohl(calldir),
 999                 bc_xprt, ntohl(xid));
1000 unlock_eagain:
1001         spin_unlock(&bc_xprt->queue_lock);
1002         return -EAGAIN;
1003 }
1004 
1005 static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len)
1006 {
1007         int i = 0;
1008         int t = 0;
1009 
1010         while (t < len) {
1011                 vec[i].iov_base = page_address(pages[i]);
1012                 vec[i].iov_len = PAGE_SIZE;
1013                 i++;
1014                 t += PAGE_SIZE;
1015         }
1016         return i;
1017 }
1018 
1019 static void svc_tcp_fragment_received(struct svc_sock *svsk)
1020 {
1021         /* If we have more data, signal svc_xprt_enqueue() to try again */
1022         dprintk("svc: TCP %s record (%d bytes)\n",
1023                 svc_sock_final_rec(svsk) ? "final" : "nonfinal",
1024                 svc_sock_reclen(svsk));
1025         svsk->sk_tcplen = 0;
1026         svsk->sk_reclen = 0;
1027 }
1028 
1029 /*
1030  * Receive data from a TCP socket.
1031  */
1032 static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
1033 {
1034         struct svc_sock *svsk =
1035                 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
1036         struct svc_serv *serv = svsk->sk_xprt.xpt_server;
1037         int             len;
1038         struct kvec *vec;
1039         unsigned int want, base;
1040         __be32 *p;
1041         __be32 calldir;
1042         int pnum;
1043 
1044         dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
1045                 svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
1046                 test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags),
1047                 test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
1048 
1049         len = svc_tcp_recv_record(svsk, rqstp);
1050         if (len < 0)
1051                 goto error;
1052 
1053         base = svc_tcp_restore_pages(svsk, rqstp);
1054         want = svc_sock_reclen(svsk) - (svsk->sk_tcplen - sizeof(rpc_fraghdr));
1055 
1056         vec = rqstp->rq_vec;
1057 
1058         pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], base + want);
1059 
1060         rqstp->rq_respages = &rqstp->rq_pages[pnum];
1061         rqstp->rq_next_page = rqstp->rq_respages + 1;
1062 
1063         /* Now receive data */
1064         len = svc_recvfrom(rqstp, vec, pnum, base + want, base);
1065         if (len >= 0) {
1066                 svsk->sk_tcplen += len;
1067                 svsk->sk_datalen += len;
1068         }
1069         if (len != want || !svc_sock_final_rec(svsk)) {
1070                 svc_tcp_save_pages(svsk, rqstp);
1071                 if (len < 0 && len != -EAGAIN)
1072                         goto err_delete;
1073                 if (len == want)
1074                         svc_tcp_fragment_received(svsk);
1075                 else
1076                         dprintk("svc: incomplete TCP record (%d of %d)\n",
1077                                 (int)(svsk->sk_tcplen - sizeof(rpc_fraghdr)),
1078                                 svc_sock_reclen(svsk));
1079                 goto err_noclose;
1080         }
1081 
1082         if (svsk->sk_datalen < 8) {
1083                 svsk->sk_datalen = 0;
1084                 goto err_delete; /* client is nuts. */
1085         }
1086 
1087         rqstp->rq_arg.len = svsk->sk_datalen;
1088         rqstp->rq_arg.page_base = 0;
1089         if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) {
1090                 rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len;
1091                 rqstp->rq_arg.page_len = 0;
1092         } else
1093                 rqstp->rq_arg.page_len = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
1094 
1095         rqstp->rq_xprt_ctxt   = NULL;
1096         rqstp->rq_prot        = IPPROTO_TCP;
1097         if (test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags))
1098                 set_bit(RQ_LOCAL, &rqstp->rq_flags);
1099         else
1100                 clear_bit(RQ_LOCAL, &rqstp->rq_flags);
1101 
1102         p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
1103         calldir = p[1];
1104         if (calldir)
1105                 len = receive_cb_reply(svsk, rqstp);
1106 
1107         /* Reset TCP read info */
1108         svsk->sk_datalen = 0;
1109         svc_tcp_fragment_received(svsk);
1110 
1111         if (len < 0)
1112                 goto error;
1113 
1114         svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt);
1115         if (serv->sv_stats)
1116                 serv->sv_stats->nettcpcnt++;
1117 
1118         return rqstp->rq_arg.len;
1119 
1120 error:
1121         if (len != -EAGAIN)
1122                 goto err_delete;
1123         dprintk("RPC: TCP recvfrom got EAGAIN\n");
1124         return 0;
1125 err_delete:
1126         printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
1127                svsk->sk_xprt.xpt_server->sv_name, -len);
1128         set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
1129 err_noclose:
1130         return 0;       /* record not complete */
1131 }
1132 
1133 /*
1134  * Send out data on TCP socket.
1135  */
1136 static int svc_tcp_sendto(struct svc_rqst *rqstp)
1137 {
1138         struct xdr_buf  *xbufp = &rqstp->rq_res;
1139         int sent;
1140         __be32 reclen;
1141 
1142         svc_release_skb(rqstp);
1143 
1144         /* Set up the first element of the reply kvec.
1145          * Any other kvecs that may be in use have been taken
1146          * care of by the server implementation itself.
1147          */
1148         reclen = htonl(0x80000000|((xbufp->len ) - 4));
1149         memcpy(xbufp->head[0].iov_base, &reclen, 4);
1150 
1151         sent = svc_sendto(rqstp, &rqstp->rq_res);
1152         if (sent != xbufp->len) {
1153                 printk(KERN_NOTICE
1154                        "rpc-srv/tcp: %s: %s %d when sending %d bytes "
1155                        "- shutting down socket\n",
1156                        rqstp->rq_xprt->xpt_server->sv_name,
1157                        (sent<0)?"got error":"sent only",
1158                        sent, xbufp->len);
1159                 set_bit(XPT_CLOSE, &rqstp->rq_xprt->xpt_flags);
1160                 svc_xprt_enqueue(rqstp->rq_xprt);
1161                 sent = -EAGAIN;
1162         }
1163         return sent;
1164 }
1165 
1166 static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
1167                                        struct net *net,
1168                                        struct sockaddr *sa, int salen,
1169                                        int flags)
1170 {
1171         return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
1172 }
1173 
1174 static const struct svc_xprt_ops svc_tcp_ops = {
1175         .xpo_create = svc_tcp_create,
1176         .xpo_recvfrom = svc_tcp_recvfrom,
1177         .xpo_sendto = svc_tcp_sendto,
1178         .xpo_release_rqst = svc_release_skb,
1179         .xpo_detach = svc_tcp_sock_detach,
1180         .xpo_free = svc_sock_free,
1181         .xpo_has_wspace = svc_tcp_has_wspace,
1182         .xpo_accept = svc_tcp_accept,
1183         .xpo_secure_port = svc_sock_secure_port,
1184         .xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt,
1185 };
1186 
1187 static struct svc_xprt_class svc_tcp_class = {
1188         .xcl_name = "tcp",
1189         .xcl_owner = THIS_MODULE,
1190         .xcl_ops = &svc_tcp_ops,
1191         .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
1192         .xcl_ident = XPRT_TRANSPORT_TCP,
1193 };
1194 
1195 void svc_init_xprt_sock(void)
1196 {
1197         svc_reg_xprt_class(&svc_tcp_class);
1198         svc_reg_xprt_class(&svc_udp_class);
1199 }
1200 
1201 void svc_cleanup_xprt_sock(void)
1202 {
1203         svc_unreg_xprt_class(&svc_tcp_class);
1204         svc_unreg_xprt_class(&svc_udp_class);
1205 }
1206 
1207 static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
1208 {
1209         struct sock     *sk = svsk->sk_sk;
1210 
1211         svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_tcp_class,
1212                       &svsk->sk_xprt, serv);
1213         set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
1214         set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags);
1215         if (sk->sk_state == TCP_LISTEN) {
1216                 dprintk("setting up TCP socket for listening\n");
1217                 strcpy(svsk->sk_xprt.xpt_remotebuf, "listener");
1218                 set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags);
1219                 sk->sk_data_ready = svc_tcp_listen_data_ready;
1220                 set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
1221         } else {
1222                 dprintk("setting up TCP socket for reading\n");
1223                 sk->sk_state_change = svc_tcp_state_change;
1224                 sk->sk_data_ready = svc_data_ready;
1225                 sk->sk_write_space = svc_write_space;
1226 
1227                 svsk->sk_reclen = 0;
1228                 svsk->sk_tcplen = 0;
1229                 svsk->sk_datalen = 0;
1230                 memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages));
1231 
1232                 tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
1233 
1234                 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
1235                 switch (sk->sk_state) {
1236                 case TCP_SYN_RECV:
1237                 case TCP_ESTABLISHED:
1238                         break;
1239                 default:
1240                         set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
1241                 }
1242         }
1243 }
1244 
1245 void svc_sock_update_bufs(struct svc_serv *serv)
1246 {
1247         /*
1248          * The number of server threads has changed. Update
1249          * rcvbuf and sndbuf accordingly on all sockets
1250          */
1251         struct svc_sock *svsk;
1252 
1253         spin_lock_bh(&serv->sv_lock);
1254         list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list)
1255                 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1256         spin_unlock_bh(&serv->sv_lock);
1257 }
1258 EXPORT_SYMBOL_GPL(svc_sock_update_bufs);
1259 
1260 /*
1261  * Initialize socket for RPC use and create svc_sock struct
1262  */
1263 static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1264                                                 struct socket *sock,
1265                                                 int flags)
1266 {
1267         struct svc_sock *svsk;
1268         struct sock     *inet;
1269         int             pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
1270         int             err = 0;
1271 
1272         dprintk("svc: svc_setup_socket %p\n", sock);
1273         svsk = kzalloc(sizeof(*svsk), GFP_KERNEL);
1274         if (!svsk)
1275                 return ERR_PTR(-ENOMEM);
1276 
1277         inet = sock->sk;
1278 
1279         /* Register socket with portmapper */
1280         if (pmap_register)
1281                 err = svc_register(serv, sock_net(sock->sk), inet->sk_family,
1282                                      inet->sk_protocol,
1283                                      ntohs(inet_sk(inet)->inet_sport));
1284 
1285         if (err < 0) {
1286                 kfree(svsk);
1287                 return ERR_PTR(err);
1288         }
1289 
1290         svsk->sk_sock = sock;
1291         svsk->sk_sk = inet;
1292         svsk->sk_ostate = inet->sk_state_change;
1293         svsk->sk_odata = inet->sk_data_ready;
1294         svsk->sk_owspace = inet->sk_write_space;
1295         /*
1296          * This barrier is necessary in order to prevent race condition
1297          * with svc_data_ready(), svc_listen_data_ready() and others
1298          * when calling callbacks above.
1299          */
1300         wmb();
1301         inet->sk_user_data = svsk;
1302 
1303         /* Initialize the socket */
1304         if (sock->type == SOCK_DGRAM)
1305                 svc_udp_init(svsk, serv);
1306         else
1307                 svc_tcp_init(svsk, serv);
1308 
1309         dprintk("svc: svc_setup_socket created %p (inet %p), "
1310                         "listen %d close %d\n",
1311                         svsk, svsk->sk_sk,
1312                         test_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags),
1313                         test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
1314 
1315         return svsk;
1316 }
1317 
1318 bool svc_alien_sock(struct net *net, int fd)
1319 {
1320         int err;
1321         struct socket *sock = sockfd_lookup(fd, &err);
1322         bool ret = false;
1323 
1324         if (!sock)
1325                 goto out;
1326         if (sock_net(sock->sk) != net)
1327                 ret = true;
1328         sockfd_put(sock);
1329 out:
1330         return ret;
1331 }
1332 EXPORT_SYMBOL_GPL(svc_alien_sock);
1333 
1334 /**
1335  * svc_addsock - add a listener socket to an RPC service
1336  * @serv: pointer to RPC service to which to add a new listener
1337  * @fd: file descriptor of the new listener
1338  * @name_return: pointer to buffer to fill in with name of listener
1339  * @len: size of the buffer
1340  * @cred: credential
1341  *
1342  * Fills in socket name and returns positive length of name if successful.
1343  * Name is terminated with '\n'.  On error, returns a negative errno
1344  * value.
1345  */
1346 int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
1347                 const size_t len, const struct cred *cred)
1348 {
1349         int err = 0;
1350         struct socket *so = sockfd_lookup(fd, &err);
1351         struct svc_sock *svsk = NULL;
1352         struct sockaddr_storage addr;
1353         struct sockaddr *sin = (struct sockaddr *)&addr;
1354         int salen;
1355 
1356         if (!so)
1357                 return err;
1358         err = -EAFNOSUPPORT;
1359         if ((so->sk->sk_family != PF_INET) && (so->sk->sk_family != PF_INET6))
1360                 goto out;
1361         err =  -EPROTONOSUPPORT;
1362         if (so->sk->sk_protocol != IPPROTO_TCP &&
1363             so->sk->sk_protocol != IPPROTO_UDP)
1364                 goto out;
1365         err = -EISCONN;
1366         if (so->state > SS_UNCONNECTED)
1367                 goto out;
1368         err = -ENOENT;
1369         if (!try_module_get(THIS_MODULE))
1370                 goto out;
1371         svsk = svc_setup_socket(serv, so, SVC_SOCK_DEFAULTS);
1372         if (IS_ERR(svsk)) {
1373                 module_put(THIS_MODULE);
1374                 err = PTR_ERR(svsk);
1375                 goto out;
1376         }
1377         salen = kernel_getsockname(svsk->sk_sock, sin);
1378         if (salen >= 0)
1379                 svc_xprt_set_local(&svsk->sk_xprt, sin, salen);
1380         svsk->sk_xprt.xpt_cred = get_cred(cred);
1381         svc_add_new_perm_xprt(serv, &svsk->sk_xprt);
1382         return svc_one_sock_name(svsk, name_return, len);
1383 out:
1384         sockfd_put(so);
1385         return err;
1386 }
1387 EXPORT_SYMBOL_GPL(svc_addsock);
1388 
1389 /*
1390  * Create socket for RPC service.
1391  */
1392 static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
1393                                           int protocol,
1394                                           struct net *net,
1395                                           struct sockaddr *sin, int len,
1396                                           int flags)
1397 {
1398         struct svc_sock *svsk;
1399         struct socket   *sock;
1400         int             error;
1401         int             type;
1402         struct sockaddr_storage addr;
1403         struct sockaddr *newsin = (struct sockaddr *)&addr;
1404         int             newlen;
1405         int             family;
1406         int             val;
1407         RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
1408 
1409         dprintk("svc: svc_create_socket(%s, %d, %s)\n",
1410                         serv->sv_program->pg_name, protocol,
1411                         __svc_print_addr(sin, buf, sizeof(buf)));
1412 
1413         if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) {
1414                 printk(KERN_WARNING "svc: only UDP and TCP "
1415                                 "sockets supported\n");
1416                 return ERR_PTR(-EINVAL);
1417         }
1418 
1419         type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
1420         switch (sin->sa_family) {
1421         case AF_INET6:
1422                 family = PF_INET6;
1423                 break;
1424         case AF_INET:
1425                 family = PF_INET;
1426                 break;
1427         default:
1428                 return ERR_PTR(-EINVAL);
1429         }
1430 
1431         error = __sock_create(net, family, type, protocol, &sock, 1);
1432         if (error < 0)
1433                 return ERR_PTR(error);
1434 
1435         svc_reclassify_socket(sock);
1436 
1437         /*
1438          * If this is an PF_INET6 listener, we want to avoid
1439          * getting requests from IPv4 remotes.  Those should
1440          * be shunted to a PF_INET listener via rpcbind.
1441          */
1442         val = 1;
1443         if (family == PF_INET6)
1444                 kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
1445                                         (char *)&val, sizeof(val));
1446 
1447         if (type == SOCK_STREAM)
1448                 sock->sk->sk_reuse = SK_CAN_REUSE; /* allow address reuse */
1449         error = kernel_bind(sock, sin, len);
1450         if (error < 0)
1451                 goto bummer;
1452 
1453         error = kernel_getsockname(sock, newsin);
1454         if (error < 0)
1455                 goto bummer;
1456         newlen = error;
1457 
1458         if (protocol == IPPROTO_TCP) {
1459                 if ((error = kernel_listen(sock, 64)) < 0)
1460                         goto bummer;
1461         }
1462 
1463         svsk = svc_setup_socket(serv, sock, flags);
1464         if (IS_ERR(svsk)) {
1465                 error = PTR_ERR(svsk);
1466                 goto bummer;
1467         }
1468         svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen);
1469         return (struct svc_xprt *)svsk;
1470 bummer:
1471         dprintk("svc: svc_create_socket error = %d\n", -error);
1472         sock_release(sock);
1473         return ERR_PTR(error);
1474 }
1475 
1476 /*
1477  * Detach the svc_sock from the socket so that no
1478  * more callbacks occur.
1479  */
1480 static void svc_sock_detach(struct svc_xprt *xprt)
1481 {
1482         struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
1483         struct sock *sk = svsk->sk_sk;
1484 
1485         dprintk("svc: svc_sock_detach(%p)\n", svsk);
1486 
1487         /* put back the old socket callbacks */
1488         lock_sock(sk);
1489         sk->sk_state_change = svsk->sk_ostate;
1490         sk->sk_data_ready = svsk->sk_odata;
1491         sk->sk_write_space = svsk->sk_owspace;
1492         sk->sk_user_data = NULL;
1493         release_sock(sk);
1494 }
1495 
1496 /*
1497  * Disconnect the socket, and reset the callbacks
1498  */
1499 static void svc_tcp_sock_detach(struct svc_xprt *xprt)
1500 {
1501         struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
1502 
1503         dprintk("svc: svc_tcp_sock_detach(%p)\n", svsk);
1504 
1505         svc_sock_detach(xprt);
1506 
1507         if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
1508                 svc_tcp_clear_pages(svsk);
1509                 kernel_sock_shutdown(svsk->sk_sock, SHUT_RDWR);
1510         }
1511 }
1512 
1513 /*
1514  * Free the svc_sock's socket resources and the svc_sock itself.
1515  */
1516 static void svc_sock_free(struct svc_xprt *xprt)
1517 {
1518         struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
1519         dprintk("svc: svc_sock_free(%p)\n", svsk);
1520 
1521         if (svsk->sk_sock->file)
1522                 sockfd_put(svsk->sk_sock);
1523         else
1524                 sock_release(svsk->sk_sock);
1525         kfree(svsk);
1526 }

/* [<][>][^][v][top][bottom][index][help] */