root/net/rds/af_rds.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rds_release
  2. rds_wake_sk_sleep
  3. rds_getname
  4. rds_poll
  5. rds_ioctl
  6. rds_cancel_sent_to
  7. rds_set_bool_option
  8. rds_cong_monitor
  9. rds_set_transport
  10. rds_enable_recvtstamp
  11. rds_recv_track_latency
  12. rds_setsockopt
  13. rds_getsockopt
  14. rds_connect
  15. rds_sock_destruct
  16. __rds_create
  17. rds_create
  18. rds_sock_addref
  19. rds_sock_put
  20. rds_sock_inc_info
  21. rds6_sock_inc_info
  22. rds_sock_info
  23. rds6_sock_info
  24. rds_exit
  25. rds_init

   1 /*
   2  * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
   3  *
   4  * This software is available to you under a choice of one of two
   5  * licenses.  You may choose to be licensed under the terms of the GNU
   6  * General Public License (GPL) Version 2, available from the file
   7  * COPYING in the main directory of this source tree, or the
   8  * OpenIB.org BSD license below:
   9  *
  10  *     Redistribution and use in source and binary forms, with or
  11  *     without modification, are permitted provided that the following
  12  *     conditions are met:
  13  *
  14  *      - Redistributions of source code must retain the above
  15  *        copyright notice, this list of conditions and the following
  16  *        disclaimer.
  17  *
  18  *      - Redistributions in binary form must reproduce the above
  19  *        copyright notice, this list of conditions and the following
  20  *        disclaimer in the documentation and/or other materials
  21  *        provided with the distribution.
  22  *
  23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30  * SOFTWARE.
  31  *
  32  */
  33 #include <linux/module.h>
  34 #include <linux/errno.h>
  35 #include <linux/kernel.h>
  36 #include <linux/gfp.h>
  37 #include <linux/in.h>
  38 #include <linux/ipv6.h>
  39 #include <linux/poll.h>
  40 #include <net/sock.h>
  41 
  42 #include "rds.h"
  43 
  44 /* this is just used for stats gathering :/ */
  45 static DEFINE_SPINLOCK(rds_sock_lock);
  46 static unsigned long rds_sock_count;
  47 static LIST_HEAD(rds_sock_list);
  48 DECLARE_WAIT_QUEUE_HEAD(rds_poll_waitq);
  49 
  50 /*
  51  * This is called as the final descriptor referencing this socket is closed.
  52  * We have to unbind the socket so that another socket can be bound to the
  53  * address it was using.
  54  *
  55  * We have to be careful about racing with the incoming path.  sock_orphan()
  56  * sets SOCK_DEAD and we use that as an indicator to the rx path that new
  57  * messages shouldn't be queued.
  58  */
  59 static int rds_release(struct socket *sock)
  60 {
  61         struct sock *sk = sock->sk;
  62         struct rds_sock *rs;
  63 
  64         if (!sk)
  65                 goto out;
  66 
  67         rs = rds_sk_to_rs(sk);
  68 
  69         sock_orphan(sk);
  70         /* Note - rds_clear_recv_queue grabs rs_recv_lock, so
  71          * that ensures the recv path has completed messing
  72          * with the socket. */
  73         rds_clear_recv_queue(rs);
  74         rds_cong_remove_socket(rs);
  75 
  76         rds_remove_bound(rs);
  77 
  78         rds_send_drop_to(rs, NULL);
  79         rds_rdma_drop_keys(rs);
  80         rds_notify_queue_get(rs, NULL);
  81         rds_notify_msg_zcopy_purge(&rs->rs_zcookie_queue);
  82 
  83         spin_lock_bh(&rds_sock_lock);
  84         list_del_init(&rs->rs_item);
  85         rds_sock_count--;
  86         spin_unlock_bh(&rds_sock_lock);
  87 
  88         rds_trans_put(rs->rs_transport);
  89 
  90         sock->sk = NULL;
  91         sock_put(sk);
  92 out:
  93         return 0;
  94 }
  95 
  96 /*
  97  * Careful not to race with rds_release -> sock_orphan which clears sk_sleep.
  98  * _bh() isn't OK here, we're called from interrupt handlers.  It's probably OK
  99  * to wake the waitqueue after sk_sleep is clear as we hold a sock ref, but
 100  * this seems more conservative.
 101  * NB - normally, one would use sk_callback_lock for this, but we can
 102  * get here from interrupts, whereas the network code grabs sk_callback_lock
 103  * with _lock_bh only - so relying on sk_callback_lock introduces livelocks.
 104  */
 105 void rds_wake_sk_sleep(struct rds_sock *rs)
 106 {
 107         unsigned long flags;
 108 
 109         read_lock_irqsave(&rs->rs_recv_lock, flags);
 110         __rds_wake_sk_sleep(rds_rs_to_sk(rs));
 111         read_unlock_irqrestore(&rs->rs_recv_lock, flags);
 112 }
 113 
 114 static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
 115                        int peer)
 116 {
 117         struct rds_sock *rs = rds_sk_to_rs(sock->sk);
 118         struct sockaddr_in6 *sin6;
 119         struct sockaddr_in *sin;
 120         int uaddr_len;
 121 
 122         /* racey, don't care */
 123         if (peer) {
 124                 if (ipv6_addr_any(&rs->rs_conn_addr))
 125                         return -ENOTCONN;
 126 
 127                 if (ipv6_addr_v4mapped(&rs->rs_conn_addr)) {
 128                         sin = (struct sockaddr_in *)uaddr;
 129                         memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
 130                         sin->sin_family = AF_INET;
 131                         sin->sin_port = rs->rs_conn_port;
 132                         sin->sin_addr.s_addr = rs->rs_conn_addr_v4;
 133                         uaddr_len = sizeof(*sin);
 134                 } else {
 135                         sin6 = (struct sockaddr_in6 *)uaddr;
 136                         sin6->sin6_family = AF_INET6;
 137                         sin6->sin6_port = rs->rs_conn_port;
 138                         sin6->sin6_addr = rs->rs_conn_addr;
 139                         sin6->sin6_flowinfo = 0;
 140                         /* scope_id is the same as in the bound address. */
 141                         sin6->sin6_scope_id = rs->rs_bound_scope_id;
 142                         uaddr_len = sizeof(*sin6);
 143                 }
 144         } else {
 145                 /* If socket is not yet bound and the socket is connected,
 146                  * set the return address family to be the same as the
 147                  * connected address, but with 0 address value.  If it is not
 148                  * connected, set the family to be AF_UNSPEC (value 0) and
 149                  * the address size to be that of an IPv4 address.
 150                  */
 151                 if (ipv6_addr_any(&rs->rs_bound_addr)) {
 152                         if (ipv6_addr_any(&rs->rs_conn_addr)) {
 153                                 sin = (struct sockaddr_in *)uaddr;
 154                                 memset(sin, 0, sizeof(*sin));
 155                                 sin->sin_family = AF_UNSPEC;
 156                                 return sizeof(*sin);
 157                         }
 158 
 159 #if IS_ENABLED(CONFIG_IPV6)
 160                         if (!(ipv6_addr_type(&rs->rs_conn_addr) &
 161                               IPV6_ADDR_MAPPED)) {
 162                                 sin6 = (struct sockaddr_in6 *)uaddr;
 163                                 memset(sin6, 0, sizeof(*sin6));
 164                                 sin6->sin6_family = AF_INET6;
 165                                 return sizeof(*sin6);
 166                         }
 167 #endif
 168 
 169                         sin = (struct sockaddr_in *)uaddr;
 170                         memset(sin, 0, sizeof(*sin));
 171                         sin->sin_family = AF_INET;
 172                         return sizeof(*sin);
 173                 }
 174                 if (ipv6_addr_v4mapped(&rs->rs_bound_addr)) {
 175                         sin = (struct sockaddr_in *)uaddr;
 176                         memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
 177                         sin->sin_family = AF_INET;
 178                         sin->sin_port = rs->rs_bound_port;
 179                         sin->sin_addr.s_addr = rs->rs_bound_addr_v4;
 180                         uaddr_len = sizeof(*sin);
 181                 } else {
 182                         sin6 = (struct sockaddr_in6 *)uaddr;
 183                         sin6->sin6_family = AF_INET6;
 184                         sin6->sin6_port = rs->rs_bound_port;
 185                         sin6->sin6_addr = rs->rs_bound_addr;
 186                         sin6->sin6_flowinfo = 0;
 187                         sin6->sin6_scope_id = rs->rs_bound_scope_id;
 188                         uaddr_len = sizeof(*sin6);
 189                 }
 190         }
 191 
 192         return uaddr_len;
 193 }
 194 
 195 /*
 196  * RDS' poll is without a doubt the least intuitive part of the interface,
 197  * as EPOLLIN and EPOLLOUT do not behave entirely as you would expect from
 198  * a network protocol.
 199  *
 200  * EPOLLIN is asserted if
 201  *  -   there is data on the receive queue.
 202  *  -   to signal that a previously congested destination may have become
 203  *      uncongested
 204  *  -   A notification has been queued to the socket (this can be a congestion
 205  *      update, or a RDMA completion, or a MSG_ZEROCOPY completion).
 206  *
 207  * EPOLLOUT is asserted if there is room on the send queue. This does not mean
 208  * however, that the next sendmsg() call will succeed. If the application tries
 209  * to send to a congested destination, the system call may still fail (and
 210  * return ENOBUFS).
 211  */
 212 static __poll_t rds_poll(struct file *file, struct socket *sock,
 213                              poll_table *wait)
 214 {
 215         struct sock *sk = sock->sk;
 216         struct rds_sock *rs = rds_sk_to_rs(sk);
 217         __poll_t mask = 0;
 218         unsigned long flags;
 219 
 220         poll_wait(file, sk_sleep(sk), wait);
 221 
 222         if (rs->rs_seen_congestion)
 223                 poll_wait(file, &rds_poll_waitq, wait);
 224 
 225         read_lock_irqsave(&rs->rs_recv_lock, flags);
 226         if (!rs->rs_cong_monitor) {
 227                 /* When a congestion map was updated, we signal EPOLLIN for
 228                  * "historical" reasons. Applications can also poll for
 229                  * WRBAND instead. */
 230                 if (rds_cong_updated_since(&rs->rs_cong_track))
 231                         mask |= (EPOLLIN | EPOLLRDNORM | EPOLLWRBAND);
 232         } else {
 233                 spin_lock(&rs->rs_lock);
 234                 if (rs->rs_cong_notify)
 235                         mask |= (EPOLLIN | EPOLLRDNORM);
 236                 spin_unlock(&rs->rs_lock);
 237         }
 238         if (!list_empty(&rs->rs_recv_queue) ||
 239             !list_empty(&rs->rs_notify_queue) ||
 240             !list_empty(&rs->rs_zcookie_queue.zcookie_head))
 241                 mask |= (EPOLLIN | EPOLLRDNORM);
 242         if (rs->rs_snd_bytes < rds_sk_sndbuf(rs))
 243                 mask |= (EPOLLOUT | EPOLLWRNORM);
 244         if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
 245                 mask |= POLLERR;
 246         read_unlock_irqrestore(&rs->rs_recv_lock, flags);
 247 
 248         /* clear state any time we wake a seen-congested socket */
 249         if (mask)
 250                 rs->rs_seen_congestion = 0;
 251 
 252         return mask;
 253 }
 254 
 255 static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 256 {
 257         struct rds_sock *rs = rds_sk_to_rs(sock->sk);
 258         rds_tos_t utos, tos = 0;
 259 
 260         switch (cmd) {
 261         case SIOCRDSSETTOS:
 262                 if (get_user(utos, (rds_tos_t __user *)arg))
 263                         return -EFAULT;
 264 
 265                 if (rs->rs_transport &&
 266                     rs->rs_transport->get_tos_map)
 267                         tos = rs->rs_transport->get_tos_map(utos);
 268                 else
 269                         return -ENOIOCTLCMD;
 270 
 271                 spin_lock_bh(&rds_sock_lock);
 272                 if (rs->rs_tos || rs->rs_conn) {
 273                         spin_unlock_bh(&rds_sock_lock);
 274                         return -EINVAL;
 275                 }
 276                 rs->rs_tos = tos;
 277                 spin_unlock_bh(&rds_sock_lock);
 278                 break;
 279         case SIOCRDSGETTOS:
 280                 spin_lock_bh(&rds_sock_lock);
 281                 tos = rs->rs_tos;
 282                 spin_unlock_bh(&rds_sock_lock);
 283                 if (put_user(tos, (rds_tos_t __user *)arg))
 284                         return -EFAULT;
 285                 break;
 286         default:
 287                 return -ENOIOCTLCMD;
 288         }
 289 
 290         return 0;
 291 }
 292 
 293 static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval,
 294                               int len)
 295 {
 296         struct sockaddr_in6 sin6;
 297         struct sockaddr_in sin;
 298         int ret = 0;
 299 
 300         /* racing with another thread binding seems ok here */
 301         if (ipv6_addr_any(&rs->rs_bound_addr)) {
 302                 ret = -ENOTCONN; /* XXX not a great errno */
 303                 goto out;
 304         }
 305 
 306         if (len < sizeof(struct sockaddr_in)) {
 307                 ret = -EINVAL;
 308                 goto out;
 309         } else if (len < sizeof(struct sockaddr_in6)) {
 310                 /* Assume IPv4 */
 311                 if (copy_from_user(&sin, optval, sizeof(struct sockaddr_in))) {
 312                         ret = -EFAULT;
 313                         goto out;
 314                 }
 315                 ipv6_addr_set_v4mapped(sin.sin_addr.s_addr, &sin6.sin6_addr);
 316                 sin6.sin6_port = sin.sin_port;
 317         } else {
 318                 if (copy_from_user(&sin6, optval,
 319                                    sizeof(struct sockaddr_in6))) {
 320                         ret = -EFAULT;
 321                         goto out;
 322                 }
 323         }
 324 
 325         rds_send_drop_to(rs, &sin6);
 326 out:
 327         return ret;
 328 }
 329 
 330 static int rds_set_bool_option(unsigned char *optvar, char __user *optval,
 331                                int optlen)
 332 {
 333         int value;
 334 
 335         if (optlen < sizeof(int))
 336                 return -EINVAL;
 337         if (get_user(value, (int __user *) optval))
 338                 return -EFAULT;
 339         *optvar = !!value;
 340         return 0;
 341 }
 342 
 343 static int rds_cong_monitor(struct rds_sock *rs, char __user *optval,
 344                             int optlen)
 345 {
 346         int ret;
 347 
 348         ret = rds_set_bool_option(&rs->rs_cong_monitor, optval, optlen);
 349         if (ret == 0) {
 350                 if (rs->rs_cong_monitor) {
 351                         rds_cong_add_socket(rs);
 352                 } else {
 353                         rds_cong_remove_socket(rs);
 354                         rs->rs_cong_mask = 0;
 355                         rs->rs_cong_notify = 0;
 356                 }
 357         }
 358         return ret;
 359 }
 360 
 361 static int rds_set_transport(struct rds_sock *rs, char __user *optval,
 362                              int optlen)
 363 {
 364         int t_type;
 365 
 366         if (rs->rs_transport)
 367                 return -EOPNOTSUPP; /* previously attached to transport */
 368 
 369         if (optlen != sizeof(int))
 370                 return -EINVAL;
 371 
 372         if (copy_from_user(&t_type, (int __user *)optval, sizeof(t_type)))
 373                 return -EFAULT;
 374 
 375         if (t_type < 0 || t_type >= RDS_TRANS_COUNT)
 376                 return -EINVAL;
 377 
 378         rs->rs_transport = rds_trans_get(t_type);
 379 
 380         return rs->rs_transport ? 0 : -ENOPROTOOPT;
 381 }
 382 
 383 static int rds_enable_recvtstamp(struct sock *sk, char __user *optval,
 384                                  int optlen, int optname)
 385 {
 386         int val, valbool;
 387 
 388         if (optlen != sizeof(int))
 389                 return -EFAULT;
 390 
 391         if (get_user(val, (int __user *)optval))
 392                 return -EFAULT;
 393 
 394         valbool = val ? 1 : 0;
 395 
 396         if (optname == SO_TIMESTAMP_NEW)
 397                 sock_set_flag(sk, SOCK_TSTAMP_NEW);
 398 
 399         if (valbool)
 400                 sock_set_flag(sk, SOCK_RCVTSTAMP);
 401         else
 402                 sock_reset_flag(sk, SOCK_RCVTSTAMP);
 403 
 404         return 0;
 405 }
 406 
 407 static int rds_recv_track_latency(struct rds_sock *rs, char __user *optval,
 408                                   int optlen)
 409 {
 410         struct rds_rx_trace_so trace;
 411         int i;
 412 
 413         if (optlen != sizeof(struct rds_rx_trace_so))
 414                 return -EFAULT;
 415 
 416         if (copy_from_user(&trace, optval, sizeof(trace)))
 417                 return -EFAULT;
 418 
 419         if (trace.rx_traces > RDS_MSG_RX_DGRAM_TRACE_MAX)
 420                 return -EFAULT;
 421 
 422         rs->rs_rx_traces = trace.rx_traces;
 423         for (i = 0; i < rs->rs_rx_traces; i++) {
 424                 if (trace.rx_trace_pos[i] > RDS_MSG_RX_DGRAM_TRACE_MAX) {
 425                         rs->rs_rx_traces = 0;
 426                         return -EFAULT;
 427                 }
 428                 rs->rs_rx_trace[i] = trace.rx_trace_pos[i];
 429         }
 430 
 431         return 0;
 432 }
 433 
 434 static int rds_setsockopt(struct socket *sock, int level, int optname,
 435                           char __user *optval, unsigned int optlen)
 436 {
 437         struct rds_sock *rs = rds_sk_to_rs(sock->sk);
 438         int ret;
 439 
 440         if (level != SOL_RDS) {
 441                 ret = -ENOPROTOOPT;
 442                 goto out;
 443         }
 444 
 445         switch (optname) {
 446         case RDS_CANCEL_SENT_TO:
 447                 ret = rds_cancel_sent_to(rs, optval, optlen);
 448                 break;
 449         case RDS_GET_MR:
 450                 ret = rds_get_mr(rs, optval, optlen);
 451                 break;
 452         case RDS_GET_MR_FOR_DEST:
 453                 ret = rds_get_mr_for_dest(rs, optval, optlen);
 454                 break;
 455         case RDS_FREE_MR:
 456                 ret = rds_free_mr(rs, optval, optlen);
 457                 break;
 458         case RDS_RECVERR:
 459                 ret = rds_set_bool_option(&rs->rs_recverr, optval, optlen);
 460                 break;
 461         case RDS_CONG_MONITOR:
 462                 ret = rds_cong_monitor(rs, optval, optlen);
 463                 break;
 464         case SO_RDS_TRANSPORT:
 465                 lock_sock(sock->sk);
 466                 ret = rds_set_transport(rs, optval, optlen);
 467                 release_sock(sock->sk);
 468                 break;
 469         case SO_TIMESTAMP_OLD:
 470         case SO_TIMESTAMP_NEW:
 471                 lock_sock(sock->sk);
 472                 ret = rds_enable_recvtstamp(sock->sk, optval, optlen, optname);
 473                 release_sock(sock->sk);
 474                 break;
 475         case SO_RDS_MSG_RXPATH_LATENCY:
 476                 ret = rds_recv_track_latency(rs, optval, optlen);
 477                 break;
 478         default:
 479                 ret = -ENOPROTOOPT;
 480         }
 481 out:
 482         return ret;
 483 }
 484 
 485 static int rds_getsockopt(struct socket *sock, int level, int optname,
 486                           char __user *optval, int __user *optlen)
 487 {
 488         struct rds_sock *rs = rds_sk_to_rs(sock->sk);
 489         int ret = -ENOPROTOOPT, len;
 490         int trans;
 491 
 492         if (level != SOL_RDS)
 493                 goto out;
 494 
 495         if (get_user(len, optlen)) {
 496                 ret = -EFAULT;
 497                 goto out;
 498         }
 499 
 500         switch (optname) {
 501         case RDS_INFO_FIRST ... RDS_INFO_LAST:
 502                 ret = rds_info_getsockopt(sock, optname, optval,
 503                                           optlen);
 504                 break;
 505 
 506         case RDS_RECVERR:
 507                 if (len < sizeof(int))
 508                         ret = -EINVAL;
 509                 else
 510                 if (put_user(rs->rs_recverr, (int __user *) optval) ||
 511                     put_user(sizeof(int), optlen))
 512                         ret = -EFAULT;
 513                 else
 514                         ret = 0;
 515                 break;
 516         case SO_RDS_TRANSPORT:
 517                 if (len < sizeof(int)) {
 518                         ret = -EINVAL;
 519                         break;
 520                 }
 521                 trans = (rs->rs_transport ? rs->rs_transport->t_type :
 522                          RDS_TRANS_NONE); /* unbound */
 523                 if (put_user(trans, (int __user *)optval) ||
 524                     put_user(sizeof(int), optlen))
 525                         ret = -EFAULT;
 526                 else
 527                         ret = 0;
 528                 break;
 529         default:
 530                 break;
 531         }
 532 
 533 out:
 534         return ret;
 535 
 536 }
 537 
 538 static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
 539                        int addr_len, int flags)
 540 {
 541         struct sock *sk = sock->sk;
 542         struct sockaddr_in *sin;
 543         struct rds_sock *rs = rds_sk_to_rs(sk);
 544         int ret = 0;
 545 
 546         if (addr_len < offsetofend(struct sockaddr, sa_family))
 547                 return -EINVAL;
 548 
 549         lock_sock(sk);
 550 
 551         switch (uaddr->sa_family) {
 552         case AF_INET:
 553                 sin = (struct sockaddr_in *)uaddr;
 554                 if (addr_len < sizeof(struct sockaddr_in)) {
 555                         ret = -EINVAL;
 556                         break;
 557                 }
 558                 if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) {
 559                         ret = -EDESTADDRREQ;
 560                         break;
 561                 }
 562                 if (ipv4_is_multicast(sin->sin_addr.s_addr) ||
 563                     sin->sin_addr.s_addr == htonl(INADDR_BROADCAST)) {
 564                         ret = -EINVAL;
 565                         break;
 566                 }
 567                 ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &rs->rs_conn_addr);
 568                 rs->rs_conn_port = sin->sin_port;
 569                 break;
 570 
 571 #if IS_ENABLED(CONFIG_IPV6)
 572         case AF_INET6: {
 573                 struct sockaddr_in6 *sin6;
 574                 int addr_type;
 575 
 576                 sin6 = (struct sockaddr_in6 *)uaddr;
 577                 if (addr_len < sizeof(struct sockaddr_in6)) {
 578                         ret = -EINVAL;
 579                         break;
 580                 }
 581                 addr_type = ipv6_addr_type(&sin6->sin6_addr);
 582                 if (!(addr_type & IPV6_ADDR_UNICAST)) {
 583                         __be32 addr4;
 584 
 585                         if (!(addr_type & IPV6_ADDR_MAPPED)) {
 586                                 ret = -EPROTOTYPE;
 587                                 break;
 588                         }
 589 
 590                         /* It is a mapped address.  Need to do some sanity
 591                          * checks.
 592                          */
 593                         addr4 = sin6->sin6_addr.s6_addr32[3];
 594                         if (addr4 == htonl(INADDR_ANY) ||
 595                             addr4 == htonl(INADDR_BROADCAST) ||
 596                             ipv4_is_multicast(addr4)) {
 597                                 ret = -EPROTOTYPE;
 598                                 break;
 599                         }
 600                 }
 601 
 602                 if (addr_type & IPV6_ADDR_LINKLOCAL) {
 603                         /* If socket is arleady bound to a link local address,
 604                          * the peer address must be on the same link.
 605                          */
 606                         if (sin6->sin6_scope_id == 0 ||
 607                             (!ipv6_addr_any(&rs->rs_bound_addr) &&
 608                              rs->rs_bound_scope_id &&
 609                              sin6->sin6_scope_id != rs->rs_bound_scope_id)) {
 610                                 ret = -EINVAL;
 611                                 break;
 612                         }
 613                         /* Remember the connected address scope ID.  It will
 614                          * be checked against the binding local address when
 615                          * the socket is bound.
 616                          */
 617                         rs->rs_bound_scope_id = sin6->sin6_scope_id;
 618                 }
 619                 rs->rs_conn_addr = sin6->sin6_addr;
 620                 rs->rs_conn_port = sin6->sin6_port;
 621                 break;
 622         }
 623 #endif
 624 
 625         default:
 626                 ret = -EAFNOSUPPORT;
 627                 break;
 628         }
 629 
 630         release_sock(sk);
 631         return ret;
 632 }
 633 
 634 static struct proto rds_proto = {
 635         .name     = "RDS",
 636         .owner    = THIS_MODULE,
 637         .obj_size = sizeof(struct rds_sock),
 638 };
 639 
 640 static const struct proto_ops rds_proto_ops = {
 641         .family =       AF_RDS,
 642         .owner =        THIS_MODULE,
 643         .release =      rds_release,
 644         .bind =         rds_bind,
 645         .connect =      rds_connect,
 646         .socketpair =   sock_no_socketpair,
 647         .accept =       sock_no_accept,
 648         .getname =      rds_getname,
 649         .poll =         rds_poll,
 650         .ioctl =        rds_ioctl,
 651         .listen =       sock_no_listen,
 652         .shutdown =     sock_no_shutdown,
 653         .setsockopt =   rds_setsockopt,
 654         .getsockopt =   rds_getsockopt,
 655         .sendmsg =      rds_sendmsg,
 656         .recvmsg =      rds_recvmsg,
 657         .mmap =         sock_no_mmap,
 658         .sendpage =     sock_no_sendpage,
 659 };
 660 
 661 static void rds_sock_destruct(struct sock *sk)
 662 {
 663         struct rds_sock *rs = rds_sk_to_rs(sk);
 664 
 665         WARN_ON((&rs->rs_item != rs->rs_item.next ||
 666                  &rs->rs_item != rs->rs_item.prev));
 667 }
 668 
 669 static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
 670 {
 671         struct rds_sock *rs;
 672 
 673         sock_init_data(sock, sk);
 674         sock->ops               = &rds_proto_ops;
 675         sk->sk_protocol         = protocol;
 676         sk->sk_destruct         = rds_sock_destruct;
 677 
 678         rs = rds_sk_to_rs(sk);
 679         spin_lock_init(&rs->rs_lock);
 680         rwlock_init(&rs->rs_recv_lock);
 681         INIT_LIST_HEAD(&rs->rs_send_queue);
 682         INIT_LIST_HEAD(&rs->rs_recv_queue);
 683         INIT_LIST_HEAD(&rs->rs_notify_queue);
 684         INIT_LIST_HEAD(&rs->rs_cong_list);
 685         rds_message_zcopy_queue_init(&rs->rs_zcookie_queue);
 686         spin_lock_init(&rs->rs_rdma_lock);
 687         rs->rs_rdma_keys = RB_ROOT;
 688         rs->rs_rx_traces = 0;
 689         rs->rs_tos = 0;
 690         rs->rs_conn = NULL;
 691 
 692         spin_lock_bh(&rds_sock_lock);
 693         list_add_tail(&rs->rs_item, &rds_sock_list);
 694         rds_sock_count++;
 695         spin_unlock_bh(&rds_sock_lock);
 696 
 697         return 0;
 698 }
 699 
 700 static int rds_create(struct net *net, struct socket *sock, int protocol,
 701                       int kern)
 702 {
 703         struct sock *sk;
 704 
 705         if (sock->type != SOCK_SEQPACKET || protocol)
 706                 return -ESOCKTNOSUPPORT;
 707 
 708         sk = sk_alloc(net, AF_RDS, GFP_KERNEL, &rds_proto, kern);
 709         if (!sk)
 710                 return -ENOMEM;
 711 
 712         return __rds_create(sock, sk, protocol);
 713 }
 714 
 715 void rds_sock_addref(struct rds_sock *rs)
 716 {
 717         sock_hold(rds_rs_to_sk(rs));
 718 }
 719 
 720 void rds_sock_put(struct rds_sock *rs)
 721 {
 722         sock_put(rds_rs_to_sk(rs));
 723 }
 724 
 725 static const struct net_proto_family rds_family_ops = {
 726         .family =       AF_RDS,
 727         .create =       rds_create,
 728         .owner  =       THIS_MODULE,
 729 };
 730 
 731 static void rds_sock_inc_info(struct socket *sock, unsigned int len,
 732                               struct rds_info_iterator *iter,
 733                               struct rds_info_lengths *lens)
 734 {
 735         struct rds_sock *rs;
 736         struct rds_incoming *inc;
 737         unsigned int total = 0;
 738 
 739         len /= sizeof(struct rds_info_message);
 740 
 741         spin_lock_bh(&rds_sock_lock);
 742 
 743         list_for_each_entry(rs, &rds_sock_list, rs_item) {
 744                 /* This option only supports IPv4 sockets. */
 745                 if (!ipv6_addr_v4mapped(&rs->rs_bound_addr))
 746                         continue;
 747 
 748                 read_lock(&rs->rs_recv_lock);
 749 
 750                 /* XXX too lazy to maintain counts.. */
 751                 list_for_each_entry(inc, &rs->rs_recv_queue, i_item) {
 752                         total++;
 753                         if (total <= len)
 754                                 rds_inc_info_copy(inc, iter,
 755                                                   inc->i_saddr.s6_addr32[3],
 756                                                   rs->rs_bound_addr_v4,
 757                                                   1);
 758                 }
 759 
 760                 read_unlock(&rs->rs_recv_lock);
 761         }
 762 
 763         spin_unlock_bh(&rds_sock_lock);
 764 
 765         lens->nr = total;
 766         lens->each = sizeof(struct rds_info_message);
 767 }
 768 
 769 #if IS_ENABLED(CONFIG_IPV6)
 770 static void rds6_sock_inc_info(struct socket *sock, unsigned int len,
 771                                struct rds_info_iterator *iter,
 772                                struct rds_info_lengths *lens)
 773 {
 774         struct rds_incoming *inc;
 775         unsigned int total = 0;
 776         struct rds_sock *rs;
 777 
 778         len /= sizeof(struct rds6_info_message);
 779 
 780         spin_lock_bh(&rds_sock_lock);
 781 
 782         list_for_each_entry(rs, &rds_sock_list, rs_item) {
 783                 read_lock(&rs->rs_recv_lock);
 784 
 785                 list_for_each_entry(inc, &rs->rs_recv_queue, i_item) {
 786                         total++;
 787                         if (total <= len)
 788                                 rds6_inc_info_copy(inc, iter, &inc->i_saddr,
 789                                                    &rs->rs_bound_addr, 1);
 790                 }
 791 
 792                 read_unlock(&rs->rs_recv_lock);
 793         }
 794 
 795         spin_unlock_bh(&rds_sock_lock);
 796 
 797         lens->nr = total;
 798         lens->each = sizeof(struct rds6_info_message);
 799 }
 800 #endif
 801 
 802 static void rds_sock_info(struct socket *sock, unsigned int len,
 803                           struct rds_info_iterator *iter,
 804                           struct rds_info_lengths *lens)
 805 {
 806         struct rds_info_socket sinfo;
 807         unsigned int cnt = 0;
 808         struct rds_sock *rs;
 809 
 810         len /= sizeof(struct rds_info_socket);
 811 
 812         spin_lock_bh(&rds_sock_lock);
 813 
 814         if (len < rds_sock_count) {
 815                 cnt = rds_sock_count;
 816                 goto out;
 817         }
 818 
 819         list_for_each_entry(rs, &rds_sock_list, rs_item) {
 820                 /* This option only supports IPv4 sockets. */
 821                 if (!ipv6_addr_v4mapped(&rs->rs_bound_addr))
 822                         continue;
 823                 sinfo.sndbuf = rds_sk_sndbuf(rs);
 824                 sinfo.rcvbuf = rds_sk_rcvbuf(rs);
 825                 sinfo.bound_addr = rs->rs_bound_addr_v4;
 826                 sinfo.connected_addr = rs->rs_conn_addr_v4;
 827                 sinfo.bound_port = rs->rs_bound_port;
 828                 sinfo.connected_port = rs->rs_conn_port;
 829                 sinfo.inum = sock_i_ino(rds_rs_to_sk(rs));
 830 
 831                 rds_info_copy(iter, &sinfo, sizeof(sinfo));
 832                 cnt++;
 833         }
 834 
 835 out:
 836         lens->nr = cnt;
 837         lens->each = sizeof(struct rds_info_socket);
 838 
 839         spin_unlock_bh(&rds_sock_lock);
 840 }
 841 
 842 #if IS_ENABLED(CONFIG_IPV6)
 843 static void rds6_sock_info(struct socket *sock, unsigned int len,
 844                            struct rds_info_iterator *iter,
 845                            struct rds_info_lengths *lens)
 846 {
 847         struct rds6_info_socket sinfo6;
 848         struct rds_sock *rs;
 849 
 850         len /= sizeof(struct rds6_info_socket);
 851 
 852         spin_lock_bh(&rds_sock_lock);
 853 
 854         if (len < rds_sock_count)
 855                 goto out;
 856 
 857         list_for_each_entry(rs, &rds_sock_list, rs_item) {
 858                 sinfo6.sndbuf = rds_sk_sndbuf(rs);
 859                 sinfo6.rcvbuf = rds_sk_rcvbuf(rs);
 860                 sinfo6.bound_addr = rs->rs_bound_addr;
 861                 sinfo6.connected_addr = rs->rs_conn_addr;
 862                 sinfo6.bound_port = rs->rs_bound_port;
 863                 sinfo6.connected_port = rs->rs_conn_port;
 864                 sinfo6.inum = sock_i_ino(rds_rs_to_sk(rs));
 865 
 866                 rds_info_copy(iter, &sinfo6, sizeof(sinfo6));
 867         }
 868 
 869  out:
 870         lens->nr = rds_sock_count;
 871         lens->each = sizeof(struct rds6_info_socket);
 872 
 873         spin_unlock_bh(&rds_sock_lock);
 874 }
 875 #endif
 876 
 877 static void rds_exit(void)
 878 {
 879         sock_unregister(rds_family_ops.family);
 880         proto_unregister(&rds_proto);
 881         rds_conn_exit();
 882         rds_cong_exit();
 883         rds_sysctl_exit();
 884         rds_threads_exit();
 885         rds_stats_exit();
 886         rds_page_exit();
 887         rds_bind_lock_destroy();
 888         rds_info_deregister_func(RDS_INFO_SOCKETS, rds_sock_info);
 889         rds_info_deregister_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info);
 890 #if IS_ENABLED(CONFIG_IPV6)
 891         rds_info_deregister_func(RDS6_INFO_SOCKETS, rds6_sock_info);
 892         rds_info_deregister_func(RDS6_INFO_RECV_MESSAGES, rds6_sock_inc_info);
 893 #endif
 894 }
 895 module_exit(rds_exit);
 896 
 897 u32 rds_gen_num;
 898 
 899 static int rds_init(void)
 900 {
 901         int ret;
 902 
 903         net_get_random_once(&rds_gen_num, sizeof(rds_gen_num));
 904 
 905         ret = rds_bind_lock_init();
 906         if (ret)
 907                 goto out;
 908 
 909         ret = rds_conn_init();
 910         if (ret)
 911                 goto out_bind;
 912 
 913         ret = rds_threads_init();
 914         if (ret)
 915                 goto out_conn;
 916         ret = rds_sysctl_init();
 917         if (ret)
 918                 goto out_threads;
 919         ret = rds_stats_init();
 920         if (ret)
 921                 goto out_sysctl;
 922         ret = proto_register(&rds_proto, 1);
 923         if (ret)
 924                 goto out_stats;
 925         ret = sock_register(&rds_family_ops);
 926         if (ret)
 927                 goto out_proto;
 928 
 929         rds_info_register_func(RDS_INFO_SOCKETS, rds_sock_info);
 930         rds_info_register_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info);
 931 #if IS_ENABLED(CONFIG_IPV6)
 932         rds_info_register_func(RDS6_INFO_SOCKETS, rds6_sock_info);
 933         rds_info_register_func(RDS6_INFO_RECV_MESSAGES, rds6_sock_inc_info);
 934 #endif
 935 
 936         goto out;
 937 
 938 out_proto:
 939         proto_unregister(&rds_proto);
 940 out_stats:
 941         rds_stats_exit();
 942 out_sysctl:
 943         rds_sysctl_exit();
 944 out_threads:
 945         rds_threads_exit();
 946 out_conn:
 947         rds_conn_exit();
 948         rds_cong_exit();
 949         rds_page_exit();
 950 out_bind:
 951         rds_bind_lock_destroy();
 952 out:
 953         return ret;
 954 }
 955 module_init(rds_init);
 956 
 957 #define DRV_VERSION     "4.0"
 958 #define DRV_RELDATE     "Feb 12, 2009"
 959 
 960 MODULE_AUTHOR("Oracle Corporation <rds-devel@oss.oracle.com>");
 961 MODULE_DESCRIPTION("RDS: Reliable Datagram Sockets"
 962                    " v" DRV_VERSION " (" DRV_RELDATE ")");
 963 MODULE_VERSION(DRV_VERSION);
 964 MODULE_LICENSE("Dual BSD/GPL");
 965 MODULE_ALIAS_NETPROTO(PF_RDS);

/* [<][>][^][v][top][bottom][index][help] */