root/net/ipv4/ip_sockglue.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ip_cmsg_recv_pktinfo
  2. ip_cmsg_recv_ttl
  3. ip_cmsg_recv_tos
  4. ip_cmsg_recv_opts
  5. ip_cmsg_recv_retopts
  6. ip_cmsg_recv_fragsize
  7. ip_cmsg_recv_checksum
  8. ip_cmsg_recv_security
  9. ip_cmsg_recv_dstaddr
  10. ip_cmsg_recv_offset
  11. ip_cmsg_send
  12. ip_ra_destroy_rcu
  13. ip_ra_control
  14. ip_icmp_error
  15. ip_local_error
  16. ipv4_datagram_support_addr
  17. ipv4_datagram_support_cmsg
  18. ip_recv_error
  19. setsockopt_needs_rtnl
  20. do_ip_setsockopt
  21. ipv4_pktinfo_prepare
  22. ip_setsockopt
  23. compat_ip_setsockopt
  24. getsockopt_needs_rtnl
  25. do_ip_getsockopt
  26. ip_getsockopt
  27. compat_ip_getsockopt

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   4  *              operating system.  INET is implemented using the  BSD Socket
   5  *              interface as the means of communication with the user level.
   6  *
   7  *              The IP to API glue.
   8  *
   9  * Authors:     see ip.c
  10  *
  11  * Fixes:
  12  *              Many            :       Split from ip.c , see ip.c for history.
  13  *              Martin Mares    :       TOS setting fixed.
  14  *              Alan Cox        :       Fixed a couple of oopses in Martin's
  15  *                                      TOS tweaks.
  16  *              Mike McLagan    :       Routing by source
  17  */
  18 
  19 #include <linux/module.h>
  20 #include <linux/types.h>
  21 #include <linux/mm.h>
  22 #include <linux/skbuff.h>
  23 #include <linux/ip.h>
  24 #include <linux/icmp.h>
  25 #include <linux/inetdevice.h>
  26 #include <linux/netdevice.h>
  27 #include <linux/slab.h>
  28 #include <net/sock.h>
  29 #include <net/ip.h>
  30 #include <net/icmp.h>
  31 #include <net/tcp_states.h>
  32 #include <linux/udp.h>
  33 #include <linux/igmp.h>
  34 #include <linux/netfilter.h>
  35 #include <linux/route.h>
  36 #include <linux/mroute.h>
  37 #include <net/inet_ecn.h>
  38 #include <net/route.h>
  39 #include <net/xfrm.h>
  40 #include <net/compat.h>
  41 #include <net/checksum.h>
  42 #if IS_ENABLED(CONFIG_IPV6)
  43 #include <net/transp_v6.h>
  44 #endif
  45 #include <net/ip_fib.h>
  46 
  47 #include <linux/errqueue.h>
  48 #include <linux/uaccess.h>
  49 
  50 #include <linux/bpfilter.h>
  51 
  52 /*
  53  *      SOL_IP control messages.
  54  */
  55 
  56 static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
  57 {
  58         struct in_pktinfo info = *PKTINFO_SKB_CB(skb);
  59 
  60         info.ipi_addr.s_addr = ip_hdr(skb)->daddr;
  61 
  62         put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
  63 }
  64 
  65 static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb)
  66 {
  67         int ttl = ip_hdr(skb)->ttl;
  68         put_cmsg(msg, SOL_IP, IP_TTL, sizeof(int), &ttl);
  69 }
  70 
  71 static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb)
  72 {
  73         put_cmsg(msg, SOL_IP, IP_TOS, 1, &ip_hdr(skb)->tos);
  74 }
  75 
  76 static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
  77 {
  78         if (IPCB(skb)->opt.optlen == 0)
  79                 return;
  80 
  81         put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen,
  82                  ip_hdr(skb) + 1);
  83 }
  84 
  85 
  86 static void ip_cmsg_recv_retopts(struct net *net, struct msghdr *msg,
  87                                  struct sk_buff *skb)
  88 {
  89         unsigned char optbuf[sizeof(struct ip_options) + 40];
  90         struct ip_options *opt = (struct ip_options *)optbuf;
  91 
  92         if (IPCB(skb)->opt.optlen == 0)
  93                 return;
  94 
  95         if (ip_options_echo(net, opt, skb)) {
  96                 msg->msg_flags |= MSG_CTRUNC;
  97                 return;
  98         }
  99         ip_options_undo(opt);
 100 
 101         put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data);
 102 }
 103 
 104 static void ip_cmsg_recv_fragsize(struct msghdr *msg, struct sk_buff *skb)
 105 {
 106         int val;
 107 
 108         if (IPCB(skb)->frag_max_size == 0)
 109                 return;
 110 
 111         val = IPCB(skb)->frag_max_size;
 112         put_cmsg(msg, SOL_IP, IP_RECVFRAGSIZE, sizeof(val), &val);
 113 }
 114 
 115 static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb,
 116                                   int tlen, int offset)
 117 {
 118         __wsum csum = skb->csum;
 119 
 120         if (skb->ip_summed != CHECKSUM_COMPLETE)
 121                 return;
 122 
 123         if (offset != 0) {
 124                 int tend_off = skb_transport_offset(skb) + tlen;
 125                 csum = csum_sub(csum, skb_checksum(skb, tend_off, offset, 0));
 126         }
 127 
 128         put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum);
 129 }
 130 
 131 static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb)
 132 {
 133         char *secdata;
 134         u32 seclen, secid;
 135         int err;
 136 
 137         err = security_socket_getpeersec_dgram(NULL, skb, &secid);
 138         if (err)
 139                 return;
 140 
 141         err = security_secid_to_secctx(secid, &secdata, &seclen);
 142         if (err)
 143                 return;
 144 
 145         put_cmsg(msg, SOL_IP, SCM_SECURITY, seclen, secdata);
 146         security_release_secctx(secdata, seclen);
 147 }
 148 
 149 static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
 150 {
 151         __be16 _ports[2], *ports;
 152         struct sockaddr_in sin;
 153 
 154         /* All current transport protocols have the port numbers in the
 155          * first four bytes of the transport header and this function is
 156          * written with this assumption in mind.
 157          */
 158         ports = skb_header_pointer(skb, skb_transport_offset(skb),
 159                                    sizeof(_ports), &_ports);
 160         if (!ports)
 161                 return;
 162 
 163         sin.sin_family = AF_INET;
 164         sin.sin_addr.s_addr = ip_hdr(skb)->daddr;
 165         sin.sin_port = ports[1];
 166         memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
 167 
 168         put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin);
 169 }
 170 
 171 void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
 172                          struct sk_buff *skb, int tlen, int offset)
 173 {
 174         struct inet_sock *inet = inet_sk(sk);
 175         unsigned int flags = inet->cmsg_flags;
 176 
 177         /* Ordered by supposed usage frequency */
 178         if (flags & IP_CMSG_PKTINFO) {
 179                 ip_cmsg_recv_pktinfo(msg, skb);
 180 
 181                 flags &= ~IP_CMSG_PKTINFO;
 182                 if (!flags)
 183                         return;
 184         }
 185 
 186         if (flags & IP_CMSG_TTL) {
 187                 ip_cmsg_recv_ttl(msg, skb);
 188 
 189                 flags &= ~IP_CMSG_TTL;
 190                 if (!flags)
 191                         return;
 192         }
 193 
 194         if (flags & IP_CMSG_TOS) {
 195                 ip_cmsg_recv_tos(msg, skb);
 196 
 197                 flags &= ~IP_CMSG_TOS;
 198                 if (!flags)
 199                         return;
 200         }
 201 
 202         if (flags & IP_CMSG_RECVOPTS) {
 203                 ip_cmsg_recv_opts(msg, skb);
 204 
 205                 flags &= ~IP_CMSG_RECVOPTS;
 206                 if (!flags)
 207                         return;
 208         }
 209 
 210         if (flags & IP_CMSG_RETOPTS) {
 211                 ip_cmsg_recv_retopts(sock_net(sk), msg, skb);
 212 
 213                 flags &= ~IP_CMSG_RETOPTS;
 214                 if (!flags)
 215                         return;
 216         }
 217 
 218         if (flags & IP_CMSG_PASSSEC) {
 219                 ip_cmsg_recv_security(msg, skb);
 220 
 221                 flags &= ~IP_CMSG_PASSSEC;
 222                 if (!flags)
 223                         return;
 224         }
 225 
 226         if (flags & IP_CMSG_ORIGDSTADDR) {
 227                 ip_cmsg_recv_dstaddr(msg, skb);
 228 
 229                 flags &= ~IP_CMSG_ORIGDSTADDR;
 230                 if (!flags)
 231                         return;
 232         }
 233 
 234         if (flags & IP_CMSG_CHECKSUM)
 235                 ip_cmsg_recv_checksum(msg, skb, tlen, offset);
 236 
 237         if (flags & IP_CMSG_RECVFRAGSIZE)
 238                 ip_cmsg_recv_fragsize(msg, skb);
 239 }
 240 EXPORT_SYMBOL(ip_cmsg_recv_offset);
 241 
 242 int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
 243                  bool allow_ipv6)
 244 {
 245         int err, val;
 246         struct cmsghdr *cmsg;
 247         struct net *net = sock_net(sk);
 248 
 249         for_each_cmsghdr(cmsg, msg) {
 250                 if (!CMSG_OK(msg, cmsg))
 251                         return -EINVAL;
 252 #if IS_ENABLED(CONFIG_IPV6)
 253                 if (allow_ipv6 &&
 254                     cmsg->cmsg_level == SOL_IPV6 &&
 255                     cmsg->cmsg_type == IPV6_PKTINFO) {
 256                         struct in6_pktinfo *src_info;
 257 
 258                         if (cmsg->cmsg_len < CMSG_LEN(sizeof(*src_info)))
 259                                 return -EINVAL;
 260                         src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
 261                         if (!ipv6_addr_v4mapped(&src_info->ipi6_addr))
 262                                 return -EINVAL;
 263                         if (src_info->ipi6_ifindex)
 264                                 ipc->oif = src_info->ipi6_ifindex;
 265                         ipc->addr = src_info->ipi6_addr.s6_addr32[3];
 266                         continue;
 267                 }
 268 #endif
 269                 if (cmsg->cmsg_level == SOL_SOCKET) {
 270                         err = __sock_cmsg_send(sk, msg, cmsg, &ipc->sockc);
 271                         if (err)
 272                                 return err;
 273                         continue;
 274                 }
 275 
 276                 if (cmsg->cmsg_level != SOL_IP)
 277                         continue;
 278                 switch (cmsg->cmsg_type) {
 279                 case IP_RETOPTS:
 280                         err = cmsg->cmsg_len - sizeof(struct cmsghdr);
 281 
 282                         /* Our caller is responsible for freeing ipc->opt */
 283                         err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg),
 284                                              err < 40 ? err : 40);
 285                         if (err)
 286                                 return err;
 287                         break;
 288                 case IP_PKTINFO:
 289                 {
 290                         struct in_pktinfo *info;
 291                         if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo)))
 292                                 return -EINVAL;
 293                         info = (struct in_pktinfo *)CMSG_DATA(cmsg);
 294                         if (info->ipi_ifindex)
 295                                 ipc->oif = info->ipi_ifindex;
 296                         ipc->addr = info->ipi_spec_dst.s_addr;
 297                         break;
 298                 }
 299                 case IP_TTL:
 300                         if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
 301                                 return -EINVAL;
 302                         val = *(int *)CMSG_DATA(cmsg);
 303                         if (val < 1 || val > 255)
 304                                 return -EINVAL;
 305                         ipc->ttl = val;
 306                         break;
 307                 case IP_TOS:
 308                         if (cmsg->cmsg_len == CMSG_LEN(sizeof(int)))
 309                                 val = *(int *)CMSG_DATA(cmsg);
 310                         else if (cmsg->cmsg_len == CMSG_LEN(sizeof(u8)))
 311                                 val = *(u8 *)CMSG_DATA(cmsg);
 312                         else
 313                                 return -EINVAL;
 314                         if (val < 0 || val > 255)
 315                                 return -EINVAL;
 316                         ipc->tos = val;
 317                         ipc->priority = rt_tos2priority(ipc->tos);
 318                         break;
 319 
 320                 default:
 321                         return -EINVAL;
 322                 }
 323         }
 324         return 0;
 325 }
 326 
 327 static void ip_ra_destroy_rcu(struct rcu_head *head)
 328 {
 329         struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu);
 330 
 331         sock_put(ra->saved_sk);
 332         kfree(ra);
 333 }
 334 
 335 int ip_ra_control(struct sock *sk, unsigned char on,
 336                   void (*destructor)(struct sock *))
 337 {
 338         struct ip_ra_chain *ra, *new_ra;
 339         struct ip_ra_chain __rcu **rap;
 340         struct net *net = sock_net(sk);
 341 
 342         if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW)
 343                 return -EINVAL;
 344 
 345         new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
 346         if (on && !new_ra)
 347                 return -ENOMEM;
 348 
 349         mutex_lock(&net->ipv4.ra_mutex);
 350         for (rap = &net->ipv4.ra_chain;
 351              (ra = rcu_dereference_protected(*rap,
 352                         lockdep_is_held(&net->ipv4.ra_mutex))) != NULL;
 353              rap = &ra->next) {
 354                 if (ra->sk == sk) {
 355                         if (on) {
 356                                 mutex_unlock(&net->ipv4.ra_mutex);
 357                                 kfree(new_ra);
 358                                 return -EADDRINUSE;
 359                         }
 360                         /* dont let ip_call_ra_chain() use sk again */
 361                         ra->sk = NULL;
 362                         RCU_INIT_POINTER(*rap, ra->next);
 363                         mutex_unlock(&net->ipv4.ra_mutex);
 364 
 365                         if (ra->destructor)
 366                                 ra->destructor(sk);
 367                         /*
 368                          * Delay sock_put(sk) and kfree(ra) after one rcu grace
 369                          * period. This guarantee ip_call_ra_chain() dont need
 370                          * to mess with socket refcounts.
 371                          */
 372                         ra->saved_sk = sk;
 373                         call_rcu(&ra->rcu, ip_ra_destroy_rcu);
 374                         return 0;
 375                 }
 376         }
 377         if (!new_ra) {
 378                 mutex_unlock(&net->ipv4.ra_mutex);
 379                 return -ENOBUFS;
 380         }
 381         new_ra->sk = sk;
 382         new_ra->destructor = destructor;
 383 
 384         RCU_INIT_POINTER(new_ra->next, ra);
 385         rcu_assign_pointer(*rap, new_ra);
 386         sock_hold(sk);
 387         mutex_unlock(&net->ipv4.ra_mutex);
 388 
 389         return 0;
 390 }
 391 
 392 void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
 393                    __be16 port, u32 info, u8 *payload)
 394 {
 395         struct sock_exterr_skb *serr;
 396 
 397         skb = skb_clone(skb, GFP_ATOMIC);
 398         if (!skb)
 399                 return;
 400 
 401         serr = SKB_EXT_ERR(skb);
 402         serr->ee.ee_errno = err;
 403         serr->ee.ee_origin = SO_EE_ORIGIN_ICMP;
 404         serr->ee.ee_type = icmp_hdr(skb)->type;
 405         serr->ee.ee_code = icmp_hdr(skb)->code;
 406         serr->ee.ee_pad = 0;
 407         serr->ee.ee_info = info;
 408         serr->ee.ee_data = 0;
 409         serr->addr_offset = (u8 *)&(((struct iphdr *)(icmp_hdr(skb) + 1))->daddr) -
 410                                    skb_network_header(skb);
 411         serr->port = port;
 412 
 413         if (skb_pull(skb, payload - skb->data)) {
 414                 skb_reset_transport_header(skb);
 415                 if (sock_queue_err_skb(sk, skb) == 0)
 416                         return;
 417         }
 418         kfree_skb(skb);
 419 }
 420 
 421 void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info)
 422 {
 423         struct inet_sock *inet = inet_sk(sk);
 424         struct sock_exterr_skb *serr;
 425         struct iphdr *iph;
 426         struct sk_buff *skb;
 427 
 428         if (!inet->recverr)
 429                 return;
 430 
 431         skb = alloc_skb(sizeof(struct iphdr), GFP_ATOMIC);
 432         if (!skb)
 433                 return;
 434 
 435         skb_put(skb, sizeof(struct iphdr));
 436         skb_reset_network_header(skb);
 437         iph = ip_hdr(skb);
 438         iph->daddr = daddr;
 439 
 440         serr = SKB_EXT_ERR(skb);
 441         serr->ee.ee_errno = err;
 442         serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
 443         serr->ee.ee_type = 0;
 444         serr->ee.ee_code = 0;
 445         serr->ee.ee_pad = 0;
 446         serr->ee.ee_info = info;
 447         serr->ee.ee_data = 0;
 448         serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
 449         serr->port = port;
 450 
 451         __skb_pull(skb, skb_tail_pointer(skb) - skb->data);
 452         skb_reset_transport_header(skb);
 453 
 454         if (sock_queue_err_skb(sk, skb))
 455                 kfree_skb(skb);
 456 }
 457 
 458 /* For some errors we have valid addr_offset even with zero payload and
 459  * zero port. Also, addr_offset should be supported if port is set.
 460  */
 461 static inline bool ipv4_datagram_support_addr(struct sock_exterr_skb *serr)
 462 {
 463         return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
 464                serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port;
 465 }
 466 
 467 /* IPv4 supports cmsg on all imcp errors and some timestamps
 468  *
 469  * Timestamp code paths do not initialize the fields expected by cmsg:
 470  * the PKTINFO fields in skb->cb[]. Fill those in here.
 471  */
 472 static bool ipv4_datagram_support_cmsg(const struct sock *sk,
 473                                        struct sk_buff *skb,
 474                                        int ee_origin)
 475 {
 476         struct in_pktinfo *info;
 477 
 478         if (ee_origin == SO_EE_ORIGIN_ICMP)
 479                 return true;
 480 
 481         if (ee_origin == SO_EE_ORIGIN_LOCAL)
 482                 return false;
 483 
 484         /* Support IP_PKTINFO on tstamp packets if requested, to correlate
 485          * timestamp with egress dev. Not possible for packets without iif
 486          * or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
 487          */
 488         info = PKTINFO_SKB_CB(skb);
 489         if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) ||
 490             !info->ipi_ifindex)
 491                 return false;
 492 
 493         info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr;
 494         return true;
 495 }
 496 
 497 /*
 498  *      Handle MSG_ERRQUEUE
 499  */
 500 int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 501 {
 502         struct sock_exterr_skb *serr;
 503         struct sk_buff *skb;
 504         DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
 505         struct {
 506                 struct sock_extended_err ee;
 507                 struct sockaddr_in       offender;
 508         } errhdr;
 509         int err;
 510         int copied;
 511 
 512         err = -EAGAIN;
 513         skb = sock_dequeue_err_skb(sk);
 514         if (!skb)
 515                 goto out;
 516 
 517         copied = skb->len;
 518         if (copied > len) {
 519                 msg->msg_flags |= MSG_TRUNC;
 520                 copied = len;
 521         }
 522         err = skb_copy_datagram_msg(skb, 0, msg, copied);
 523         if (unlikely(err)) {
 524                 kfree_skb(skb);
 525                 return err;
 526         }
 527         sock_recv_timestamp(msg, sk, skb);
 528 
 529         serr = SKB_EXT_ERR(skb);
 530 
 531         if (sin && ipv4_datagram_support_addr(serr)) {
 532                 sin->sin_family = AF_INET;
 533                 sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
 534                                                    serr->addr_offset);
 535                 sin->sin_port = serr->port;
 536                 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
 537                 *addr_len = sizeof(*sin);
 538         }
 539 
 540         memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
 541         sin = &errhdr.offender;
 542         memset(sin, 0, sizeof(*sin));
 543 
 544         if (ipv4_datagram_support_cmsg(sk, skb, serr->ee.ee_origin)) {
 545                 sin->sin_family = AF_INET;
 546                 sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
 547                 if (inet_sk(sk)->cmsg_flags)
 548                         ip_cmsg_recv(msg, skb);
 549         }
 550 
 551         put_cmsg(msg, SOL_IP, IP_RECVERR, sizeof(errhdr), &errhdr);
 552 
 553         /* Now we could try to dump offended packet options */
 554 
 555         msg->msg_flags |= MSG_ERRQUEUE;
 556         err = copied;
 557 
 558         consume_skb(skb);
 559 out:
 560         return err;
 561 }
 562 
 563 
 564 /*
 565  *      Socket option code for IP. This is the end of the line after any
 566  *      TCP,UDP etc options on an IP socket.
 567  */
 568 static bool setsockopt_needs_rtnl(int optname)
 569 {
 570         switch (optname) {
 571         case IP_ADD_MEMBERSHIP:
 572         case IP_ADD_SOURCE_MEMBERSHIP:
 573         case IP_BLOCK_SOURCE:
 574         case IP_DROP_MEMBERSHIP:
 575         case IP_DROP_SOURCE_MEMBERSHIP:
 576         case IP_MSFILTER:
 577         case IP_UNBLOCK_SOURCE:
 578         case MCAST_BLOCK_SOURCE:
 579         case MCAST_MSFILTER:
 580         case MCAST_JOIN_GROUP:
 581         case MCAST_JOIN_SOURCE_GROUP:
 582         case MCAST_LEAVE_GROUP:
 583         case MCAST_LEAVE_SOURCE_GROUP:
 584         case MCAST_UNBLOCK_SOURCE:
 585                 return true;
 586         }
 587         return false;
 588 }
 589 
 590 static int do_ip_setsockopt(struct sock *sk, int level,
 591                             int optname, char __user *optval, unsigned int optlen)
 592 {
 593         struct inet_sock *inet = inet_sk(sk);
 594         struct net *net = sock_net(sk);
 595         int val = 0, err;
 596         bool needs_rtnl = setsockopt_needs_rtnl(optname);
 597 
 598         switch (optname) {
 599         case IP_PKTINFO:
 600         case IP_RECVTTL:
 601         case IP_RECVOPTS:
 602         case IP_RECVTOS:
 603         case IP_RETOPTS:
 604         case IP_TOS:
 605         case IP_TTL:
 606         case IP_HDRINCL:
 607         case IP_MTU_DISCOVER:
 608         case IP_RECVERR:
 609         case IP_ROUTER_ALERT:
 610         case IP_FREEBIND:
 611         case IP_PASSSEC:
 612         case IP_TRANSPARENT:
 613         case IP_MINTTL:
 614         case IP_NODEFRAG:
 615         case IP_BIND_ADDRESS_NO_PORT:
 616         case IP_UNICAST_IF:
 617         case IP_MULTICAST_TTL:
 618         case IP_MULTICAST_ALL:
 619         case IP_MULTICAST_LOOP:
 620         case IP_RECVORIGDSTADDR:
 621         case IP_CHECKSUM:
 622         case IP_RECVFRAGSIZE:
 623                 if (optlen >= sizeof(int)) {
 624                         if (get_user(val, (int __user *) optval))
 625                                 return -EFAULT;
 626                 } else if (optlen >= sizeof(char)) {
 627                         unsigned char ucval;
 628 
 629                         if (get_user(ucval, (unsigned char __user *) optval))
 630                                 return -EFAULT;
 631                         val = (int) ucval;
 632                 }
 633         }
 634 
 635         /* If optlen==0, it is equivalent to val == 0 */
 636 
 637         if (optname == IP_ROUTER_ALERT)
 638                 return ip_ra_control(sk, val ? 1 : 0, NULL);
 639         if (ip_mroute_opt(optname))
 640                 return ip_mroute_setsockopt(sk, optname, optval, optlen);
 641 
 642         err = 0;
 643         if (needs_rtnl)
 644                 rtnl_lock();
 645         lock_sock(sk);
 646 
 647         switch (optname) {
 648         case IP_OPTIONS:
 649         {
 650                 struct ip_options_rcu *old, *opt = NULL;
 651 
 652                 if (optlen > 40)
 653                         goto e_inval;
 654                 err = ip_options_get_from_user(sock_net(sk), &opt,
 655                                                optval, optlen);
 656                 if (err)
 657                         break;
 658                 old = rcu_dereference_protected(inet->inet_opt,
 659                                                 lockdep_sock_is_held(sk));
 660                 if (inet->is_icsk) {
 661                         struct inet_connection_sock *icsk = inet_csk(sk);
 662 #if IS_ENABLED(CONFIG_IPV6)
 663                         if (sk->sk_family == PF_INET ||
 664                             (!((1 << sk->sk_state) &
 665                                (TCPF_LISTEN | TCPF_CLOSE)) &&
 666                              inet->inet_daddr != LOOPBACK4_IPV6)) {
 667 #endif
 668                                 if (old)
 669                                         icsk->icsk_ext_hdr_len -= old->opt.optlen;
 670                                 if (opt)
 671                                         icsk->icsk_ext_hdr_len += opt->opt.optlen;
 672                                 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
 673 #if IS_ENABLED(CONFIG_IPV6)
 674                         }
 675 #endif
 676                 }
 677                 rcu_assign_pointer(inet->inet_opt, opt);
 678                 if (old)
 679                         kfree_rcu(old, rcu);
 680                 break;
 681         }
 682         case IP_PKTINFO:
 683                 if (val)
 684                         inet->cmsg_flags |= IP_CMSG_PKTINFO;
 685                 else
 686                         inet->cmsg_flags &= ~IP_CMSG_PKTINFO;
 687                 break;
 688         case IP_RECVTTL:
 689                 if (val)
 690                         inet->cmsg_flags |=  IP_CMSG_TTL;
 691                 else
 692                         inet->cmsg_flags &= ~IP_CMSG_TTL;
 693                 break;
 694         case IP_RECVTOS:
 695                 if (val)
 696                         inet->cmsg_flags |=  IP_CMSG_TOS;
 697                 else
 698                         inet->cmsg_flags &= ~IP_CMSG_TOS;
 699                 break;
 700         case IP_RECVOPTS:
 701                 if (val)
 702                         inet->cmsg_flags |=  IP_CMSG_RECVOPTS;
 703                 else
 704                         inet->cmsg_flags &= ~IP_CMSG_RECVOPTS;
 705                 break;
 706         case IP_RETOPTS:
 707                 if (val)
 708                         inet->cmsg_flags |= IP_CMSG_RETOPTS;
 709                 else
 710                         inet->cmsg_flags &= ~IP_CMSG_RETOPTS;
 711                 break;
 712         case IP_PASSSEC:
 713                 if (val)
 714                         inet->cmsg_flags |= IP_CMSG_PASSSEC;
 715                 else
 716                         inet->cmsg_flags &= ~IP_CMSG_PASSSEC;
 717                 break;
 718         case IP_RECVORIGDSTADDR:
 719                 if (val)
 720                         inet->cmsg_flags |= IP_CMSG_ORIGDSTADDR;
 721                 else
 722                         inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR;
 723                 break;
 724         case IP_CHECKSUM:
 725                 if (val) {
 726                         if (!(inet->cmsg_flags & IP_CMSG_CHECKSUM)) {
 727                                 inet_inc_convert_csum(sk);
 728                                 inet->cmsg_flags |= IP_CMSG_CHECKSUM;
 729                         }
 730                 } else {
 731                         if (inet->cmsg_flags & IP_CMSG_CHECKSUM) {
 732                                 inet_dec_convert_csum(sk);
 733                                 inet->cmsg_flags &= ~IP_CMSG_CHECKSUM;
 734                         }
 735                 }
 736                 break;
 737         case IP_RECVFRAGSIZE:
 738                 if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM)
 739                         goto e_inval;
 740                 if (val)
 741                         inet->cmsg_flags |= IP_CMSG_RECVFRAGSIZE;
 742                 else
 743                         inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE;
 744                 break;
 745         case IP_TOS:    /* This sets both TOS and Precedence */
 746                 if (sk->sk_type == SOCK_STREAM) {
 747                         val &= ~INET_ECN_MASK;
 748                         val |= inet->tos & INET_ECN_MASK;
 749                 }
 750                 if (inet->tos != val) {
 751                         inet->tos = val;
 752                         sk->sk_priority = rt_tos2priority(val);
 753                         sk_dst_reset(sk);
 754                 }
 755                 break;
 756         case IP_TTL:
 757                 if (optlen < 1)
 758                         goto e_inval;
 759                 if (val != -1 && (val < 1 || val > 255))
 760                         goto e_inval;
 761                 inet->uc_ttl = val;
 762                 break;
 763         case IP_HDRINCL:
 764                 if (sk->sk_type != SOCK_RAW) {
 765                         err = -ENOPROTOOPT;
 766                         break;
 767                 }
 768                 inet->hdrincl = val ? 1 : 0;
 769                 break;
 770         case IP_NODEFRAG:
 771                 if (sk->sk_type != SOCK_RAW) {
 772                         err = -ENOPROTOOPT;
 773                         break;
 774                 }
 775                 inet->nodefrag = val ? 1 : 0;
 776                 break;
 777         case IP_BIND_ADDRESS_NO_PORT:
 778                 inet->bind_address_no_port = val ? 1 : 0;
 779                 break;
 780         case IP_MTU_DISCOVER:
 781                 if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
 782                         goto e_inval;
 783                 inet->pmtudisc = val;
 784                 break;
 785         case IP_RECVERR:
 786                 inet->recverr = !!val;
 787                 if (!val)
 788                         skb_queue_purge(&sk->sk_error_queue);
 789                 break;
 790         case IP_MULTICAST_TTL:
 791                 if (sk->sk_type == SOCK_STREAM)
 792                         goto e_inval;
 793                 if (optlen < 1)
 794                         goto e_inval;
 795                 if (val == -1)
 796                         val = 1;
 797                 if (val < 0 || val > 255)
 798                         goto e_inval;
 799                 inet->mc_ttl = val;
 800                 break;
 801         case IP_MULTICAST_LOOP:
 802                 if (optlen < 1)
 803                         goto e_inval;
 804                 inet->mc_loop = !!val;
 805                 break;
 806         case IP_UNICAST_IF:
 807         {
 808                 struct net_device *dev = NULL;
 809                 int ifindex;
 810                 int midx;
 811 
 812                 if (optlen != sizeof(int))
 813                         goto e_inval;
 814 
 815                 ifindex = (__force int)ntohl((__force __be32)val);
 816                 if (ifindex == 0) {
 817                         inet->uc_index = 0;
 818                         err = 0;
 819                         break;
 820                 }
 821 
 822                 dev = dev_get_by_index(sock_net(sk), ifindex);
 823                 err = -EADDRNOTAVAIL;
 824                 if (!dev)
 825                         break;
 826 
 827                 midx = l3mdev_master_ifindex(dev);
 828                 dev_put(dev);
 829 
 830                 err = -EINVAL;
 831                 if (sk->sk_bound_dev_if &&
 832                     (!midx || midx != sk->sk_bound_dev_if))
 833                         break;
 834 
 835                 inet->uc_index = ifindex;
 836                 err = 0;
 837                 break;
 838         }
 839         case IP_MULTICAST_IF:
 840         {
 841                 struct ip_mreqn mreq;
 842                 struct net_device *dev = NULL;
 843                 int midx;
 844 
 845                 if (sk->sk_type == SOCK_STREAM)
 846                         goto e_inval;
 847                 /*
 848                  *      Check the arguments are allowable
 849                  */
 850 
 851                 if (optlen < sizeof(struct in_addr))
 852                         goto e_inval;
 853 
 854                 err = -EFAULT;
 855                 if (optlen >= sizeof(struct ip_mreqn)) {
 856                         if (copy_from_user(&mreq, optval, sizeof(mreq)))
 857                                 break;
 858                 } else {
 859                         memset(&mreq, 0, sizeof(mreq));
 860                         if (optlen >= sizeof(struct ip_mreq)) {
 861                                 if (copy_from_user(&mreq, optval,
 862                                                    sizeof(struct ip_mreq)))
 863                                         break;
 864                         } else if (optlen >= sizeof(struct in_addr)) {
 865                                 if (copy_from_user(&mreq.imr_address, optval,
 866                                                    sizeof(struct in_addr)))
 867                                         break;
 868                         }
 869                 }
 870 
 871                 if (!mreq.imr_ifindex) {
 872                         if (mreq.imr_address.s_addr == htonl(INADDR_ANY)) {
 873                                 inet->mc_index = 0;
 874                                 inet->mc_addr  = 0;
 875                                 err = 0;
 876                                 break;
 877                         }
 878                         dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr);
 879                         if (dev)
 880                                 mreq.imr_ifindex = dev->ifindex;
 881                 } else
 882                         dev = dev_get_by_index(sock_net(sk), mreq.imr_ifindex);
 883 
 884 
 885                 err = -EADDRNOTAVAIL;
 886                 if (!dev)
 887                         break;
 888 
 889                 midx = l3mdev_master_ifindex(dev);
 890 
 891                 dev_put(dev);
 892 
 893                 err = -EINVAL;
 894                 if (sk->sk_bound_dev_if &&
 895                     mreq.imr_ifindex != sk->sk_bound_dev_if &&
 896                     (!midx || midx != sk->sk_bound_dev_if))
 897                         break;
 898 
 899                 inet->mc_index = mreq.imr_ifindex;
 900                 inet->mc_addr  = mreq.imr_address.s_addr;
 901                 err = 0;
 902                 break;
 903         }
 904 
 905         case IP_ADD_MEMBERSHIP:
 906         case IP_DROP_MEMBERSHIP:
 907         {
 908                 struct ip_mreqn mreq;
 909 
 910                 err = -EPROTO;
 911                 if (inet_sk(sk)->is_icsk)
 912                         break;
 913 
 914                 if (optlen < sizeof(struct ip_mreq))
 915                         goto e_inval;
 916                 err = -EFAULT;
 917                 if (optlen >= sizeof(struct ip_mreqn)) {
 918                         if (copy_from_user(&mreq, optval, sizeof(mreq)))
 919                                 break;
 920                 } else {
 921                         memset(&mreq, 0, sizeof(mreq));
 922                         if (copy_from_user(&mreq, optval, sizeof(struct ip_mreq)))
 923                                 break;
 924                 }
 925 
 926                 if (optname == IP_ADD_MEMBERSHIP)
 927                         err = ip_mc_join_group(sk, &mreq);
 928                 else
 929                         err = ip_mc_leave_group(sk, &mreq);
 930                 break;
 931         }
 932         case IP_MSFILTER:
 933         {
 934                 struct ip_msfilter *msf;
 935 
 936                 if (optlen < IP_MSFILTER_SIZE(0))
 937                         goto e_inval;
 938                 if (optlen > sysctl_optmem_max) {
 939                         err = -ENOBUFS;
 940                         break;
 941                 }
 942                 msf = memdup_user(optval, optlen);
 943                 if (IS_ERR(msf)) {
 944                         err = PTR_ERR(msf);
 945                         break;
 946                 }
 947                 /* numsrc >= (1G-4) overflow in 32 bits */
 948                 if (msf->imsf_numsrc >= 0x3ffffffcU ||
 949                     msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
 950                         kfree(msf);
 951                         err = -ENOBUFS;
 952                         break;
 953                 }
 954                 if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) {
 955                         kfree(msf);
 956                         err = -EINVAL;
 957                         break;
 958                 }
 959                 err = ip_mc_msfilter(sk, msf, 0);
 960                 kfree(msf);
 961                 break;
 962         }
 963         case IP_BLOCK_SOURCE:
 964         case IP_UNBLOCK_SOURCE:
 965         case IP_ADD_SOURCE_MEMBERSHIP:
 966         case IP_DROP_SOURCE_MEMBERSHIP:
 967         {
 968                 struct ip_mreq_source mreqs;
 969                 int omode, add;
 970 
 971                 if (optlen != sizeof(struct ip_mreq_source))
 972                         goto e_inval;
 973                 if (copy_from_user(&mreqs, optval, sizeof(mreqs))) {
 974                         err = -EFAULT;
 975                         break;
 976                 }
 977                 if (optname == IP_BLOCK_SOURCE) {
 978                         omode = MCAST_EXCLUDE;
 979                         add = 1;
 980                 } else if (optname == IP_UNBLOCK_SOURCE) {
 981                         omode = MCAST_EXCLUDE;
 982                         add = 0;
 983                 } else if (optname == IP_ADD_SOURCE_MEMBERSHIP) {
 984                         struct ip_mreqn mreq;
 985 
 986                         mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr;
 987                         mreq.imr_address.s_addr = mreqs.imr_interface;
 988                         mreq.imr_ifindex = 0;
 989                         err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE);
 990                         if (err && err != -EADDRINUSE)
 991                                 break;
 992                         omode = MCAST_INCLUDE;
 993                         add = 1;
 994                 } else /* IP_DROP_SOURCE_MEMBERSHIP */ {
 995                         omode = MCAST_INCLUDE;
 996                         add = 0;
 997                 }
 998                 err = ip_mc_source(add, omode, sk, &mreqs, 0);
 999                 break;
1000         }
1001         case MCAST_JOIN_GROUP:
1002         case MCAST_LEAVE_GROUP:
1003         {
1004                 struct group_req greq;
1005                 struct sockaddr_in *psin;
1006                 struct ip_mreqn mreq;
1007 
1008                 if (optlen < sizeof(struct group_req))
1009                         goto e_inval;
1010                 err = -EFAULT;
1011                 if (copy_from_user(&greq, optval, sizeof(greq)))
1012                         break;
1013                 psin = (struct sockaddr_in *)&greq.gr_group;
1014                 if (psin->sin_family != AF_INET)
1015                         goto e_inval;
1016                 memset(&mreq, 0, sizeof(mreq));
1017                 mreq.imr_multiaddr = psin->sin_addr;
1018                 mreq.imr_ifindex = greq.gr_interface;
1019 
1020                 if (optname == MCAST_JOIN_GROUP)
1021                         err = ip_mc_join_group(sk, &mreq);
1022                 else
1023                         err = ip_mc_leave_group(sk, &mreq);
1024                 break;
1025         }
1026         case MCAST_JOIN_SOURCE_GROUP:
1027         case MCAST_LEAVE_SOURCE_GROUP:
1028         case MCAST_BLOCK_SOURCE:
1029         case MCAST_UNBLOCK_SOURCE:
1030         {
1031                 struct group_source_req greqs;
1032                 struct ip_mreq_source mreqs;
1033                 struct sockaddr_in *psin;
1034                 int omode, add;
1035 
1036                 if (optlen != sizeof(struct group_source_req))
1037                         goto e_inval;
1038                 if (copy_from_user(&greqs, optval, sizeof(greqs))) {
1039                         err = -EFAULT;
1040                         break;
1041                 }
1042                 if (greqs.gsr_group.ss_family != AF_INET ||
1043                     greqs.gsr_source.ss_family != AF_INET) {
1044                         err = -EADDRNOTAVAIL;
1045                         break;
1046                 }
1047                 psin = (struct sockaddr_in *)&greqs.gsr_group;
1048                 mreqs.imr_multiaddr = psin->sin_addr.s_addr;
1049                 psin = (struct sockaddr_in *)&greqs.gsr_source;
1050                 mreqs.imr_sourceaddr = psin->sin_addr.s_addr;
1051                 mreqs.imr_interface = 0; /* use index for mc_source */
1052 
1053                 if (optname == MCAST_BLOCK_SOURCE) {
1054                         omode = MCAST_EXCLUDE;
1055                         add = 1;
1056                 } else if (optname == MCAST_UNBLOCK_SOURCE) {
1057                         omode = MCAST_EXCLUDE;
1058                         add = 0;
1059                 } else if (optname == MCAST_JOIN_SOURCE_GROUP) {
1060                         struct ip_mreqn mreq;
1061 
1062                         psin = (struct sockaddr_in *)&greqs.gsr_group;
1063                         mreq.imr_multiaddr = psin->sin_addr;
1064                         mreq.imr_address.s_addr = 0;
1065                         mreq.imr_ifindex = greqs.gsr_interface;
1066                         err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE);
1067                         if (err && err != -EADDRINUSE)
1068                                 break;
1069                         greqs.gsr_interface = mreq.imr_ifindex;
1070                         omode = MCAST_INCLUDE;
1071                         add = 1;
1072                 } else /* MCAST_LEAVE_SOURCE_GROUP */ {
1073                         omode = MCAST_INCLUDE;
1074                         add = 0;
1075                 }
1076                 err = ip_mc_source(add, omode, sk, &mreqs,
1077                                    greqs.gsr_interface);
1078                 break;
1079         }
1080         case MCAST_MSFILTER:
1081         {
1082                 struct sockaddr_in *psin;
1083                 struct ip_msfilter *msf = NULL;
1084                 struct group_filter *gsf = NULL;
1085                 int msize, i, ifindex;
1086 
1087                 if (optlen < GROUP_FILTER_SIZE(0))
1088                         goto e_inval;
1089                 if (optlen > sysctl_optmem_max) {
1090                         err = -ENOBUFS;
1091                         break;
1092                 }
1093                 gsf = memdup_user(optval, optlen);
1094                 if (IS_ERR(gsf)) {
1095                         err = PTR_ERR(gsf);
1096                         break;
1097                 }
1098 
1099                 /* numsrc >= (4G-140)/128 overflow in 32 bits */
1100                 if (gsf->gf_numsrc >= 0x1ffffff ||
1101                     gsf->gf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
1102                         err = -ENOBUFS;
1103                         goto mc_msf_out;
1104                 }
1105                 if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) {
1106                         err = -EINVAL;
1107                         goto mc_msf_out;
1108                 }
1109                 msize = IP_MSFILTER_SIZE(gsf->gf_numsrc);
1110                 msf = kmalloc(msize, GFP_KERNEL);
1111                 if (!msf) {
1112                         err = -ENOBUFS;
1113                         goto mc_msf_out;
1114                 }
1115                 ifindex = gsf->gf_interface;
1116                 psin = (struct sockaddr_in *)&gsf->gf_group;
1117                 if (psin->sin_family != AF_INET) {
1118                         err = -EADDRNOTAVAIL;
1119                         goto mc_msf_out;
1120                 }
1121                 msf->imsf_multiaddr = psin->sin_addr.s_addr;
1122                 msf->imsf_interface = 0;
1123                 msf->imsf_fmode = gsf->gf_fmode;
1124                 msf->imsf_numsrc = gsf->gf_numsrc;
1125                 err = -EADDRNOTAVAIL;
1126                 for (i = 0; i < gsf->gf_numsrc; ++i) {
1127                         psin = (struct sockaddr_in *)&gsf->gf_slist[i];
1128 
1129                         if (psin->sin_family != AF_INET)
1130                                 goto mc_msf_out;
1131                         msf->imsf_slist[i] = psin->sin_addr.s_addr;
1132                 }
1133                 kfree(gsf);
1134                 gsf = NULL;
1135 
1136                 err = ip_mc_msfilter(sk, msf, ifindex);
1137 mc_msf_out:
1138                 kfree(msf);
1139                 kfree(gsf);
1140                 break;
1141         }
1142         case IP_MULTICAST_ALL:
1143                 if (optlen < 1)
1144                         goto e_inval;
1145                 if (val != 0 && val != 1)
1146                         goto e_inval;
1147                 inet->mc_all = val;
1148                 break;
1149 
1150         case IP_FREEBIND:
1151                 if (optlen < 1)
1152                         goto e_inval;
1153                 inet->freebind = !!val;
1154                 break;
1155 
1156         case IP_IPSEC_POLICY:
1157         case IP_XFRM_POLICY:
1158                 err = -EPERM;
1159                 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1160                         break;
1161                 err = xfrm_user_policy(sk, optname, optval, optlen);
1162                 break;
1163 
1164         case IP_TRANSPARENT:
1165                 if (!!val && !ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
1166                     !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1167                         err = -EPERM;
1168                         break;
1169                 }
1170                 if (optlen < 1)
1171                         goto e_inval;
1172                 inet->transparent = !!val;
1173                 break;
1174 
1175         case IP_MINTTL:
1176                 if (optlen < 1)
1177                         goto e_inval;
1178                 if (val < 0 || val > 255)
1179                         goto e_inval;
1180                 inet->min_ttl = val;
1181                 break;
1182 
1183         default:
1184                 err = -ENOPROTOOPT;
1185                 break;
1186         }
1187         release_sock(sk);
1188         if (needs_rtnl)
1189                 rtnl_unlock();
1190         return err;
1191 
1192 e_inval:
1193         release_sock(sk);
1194         if (needs_rtnl)
1195                 rtnl_unlock();
1196         return -EINVAL;
1197 }
1198 
1199 /**
1200  * ipv4_pktinfo_prepare - transfer some info from rtable to skb
1201  * @sk: socket
1202  * @skb: buffer
1203  *
1204  * To support IP_CMSG_PKTINFO option, we store rt_iif and specific
1205  * destination in skb->cb[] before dst drop.
1206  * This way, receiver doesn't make cache line misses to read rtable.
1207  */
1208 void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
1209 {
1210         struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
1211         bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) ||
1212                        ipv6_sk_rxinfo(sk);
1213 
1214         if (prepare && skb_rtable(skb)) {
1215                 /* skb->cb is overloaded: prior to this point it is IP{6}CB
1216                  * which has interface index (iif) as the first member of the
1217                  * underlying inet{6}_skb_parm struct. This code then overlays
1218                  * PKTINFO_SKB_CB and in_pktinfo also has iif as the first
1219                  * element so the iif is picked up from the prior IPCB. If iif
1220                  * is the loopback interface, then return the sending interface
1221                  * (e.g., process binds socket to eth0 for Tx which is
1222                  * redirected to loopback in the rtable/dst).
1223                  */
1224                 struct rtable *rt = skb_rtable(skb);
1225                 bool l3slave = ipv4_l3mdev_skb(IPCB(skb)->flags);
1226 
1227                 if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX)
1228                         pktinfo->ipi_ifindex = inet_iif(skb);
1229                 else if (l3slave && rt && rt->rt_iif)
1230                         pktinfo->ipi_ifindex = rt->rt_iif;
1231 
1232                 pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
1233         } else {
1234                 pktinfo->ipi_ifindex = 0;
1235                 pktinfo->ipi_spec_dst.s_addr = 0;
1236         }
1237         skb_dst_drop(skb);
1238 }
1239 
1240 int ip_setsockopt(struct sock *sk, int level,
1241                 int optname, char __user *optval, unsigned int optlen)
1242 {
1243         int err;
1244 
1245         if (level != SOL_IP)
1246                 return -ENOPROTOOPT;
1247 
1248         err = do_ip_setsockopt(sk, level, optname, optval, optlen);
1249 #if IS_ENABLED(CONFIG_BPFILTER_UMH)
1250         if (optname >= BPFILTER_IPT_SO_SET_REPLACE &&
1251             optname < BPFILTER_IPT_SET_MAX)
1252                 err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen);
1253 #endif
1254 #ifdef CONFIG_NETFILTER
1255         /* we need to exclude all possible ENOPROTOOPTs except default case */
1256         if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
1257                         optname != IP_IPSEC_POLICY &&
1258                         optname != IP_XFRM_POLICY &&
1259                         !ip_mroute_opt(optname))
1260                 err = nf_setsockopt(sk, PF_INET, optname, optval, optlen);
1261 #endif
1262         return err;
1263 }
1264 EXPORT_SYMBOL(ip_setsockopt);
1265 
1266 #ifdef CONFIG_COMPAT
1267 int compat_ip_setsockopt(struct sock *sk, int level, int optname,
1268                          char __user *optval, unsigned int optlen)
1269 {
1270         int err;
1271 
1272         if (level != SOL_IP)
1273                 return -ENOPROTOOPT;
1274 
1275         if (optname >= MCAST_JOIN_GROUP && optname <= MCAST_MSFILTER)
1276                 return compat_mc_setsockopt(sk, level, optname, optval, optlen,
1277                         ip_setsockopt);
1278 
1279         err = do_ip_setsockopt(sk, level, optname, optval, optlen);
1280 #ifdef CONFIG_NETFILTER
1281         /* we need to exclude all possible ENOPROTOOPTs except default case */
1282         if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
1283                         optname != IP_IPSEC_POLICY &&
1284                         optname != IP_XFRM_POLICY &&
1285                         !ip_mroute_opt(optname))
1286                 err = compat_nf_setsockopt(sk, PF_INET, optname, optval,
1287                                            optlen);
1288 #endif
1289         return err;
1290 }
1291 EXPORT_SYMBOL(compat_ip_setsockopt);
1292 #endif
1293 
1294 /*
1295  *      Get the options. Note for future reference. The GET of IP options gets
1296  *      the _received_ ones. The set sets the _sent_ ones.
1297  */
1298 
1299 static bool getsockopt_needs_rtnl(int optname)
1300 {
1301         switch (optname) {
1302         case IP_MSFILTER:
1303         case MCAST_MSFILTER:
1304                 return true;
1305         }
1306         return false;
1307 }
1308 
1309 static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1310                             char __user *optval, int __user *optlen, unsigned int flags)
1311 {
1312         struct inet_sock *inet = inet_sk(sk);
1313         bool needs_rtnl = getsockopt_needs_rtnl(optname);
1314         int val, err = 0;
1315         int len;
1316 
1317         if (level != SOL_IP)
1318                 return -EOPNOTSUPP;
1319 
1320         if (ip_mroute_opt(optname))
1321                 return ip_mroute_getsockopt(sk, optname, optval, optlen);
1322 
1323         if (get_user(len, optlen))
1324                 return -EFAULT;
1325         if (len < 0)
1326                 return -EINVAL;
1327 
1328         if (needs_rtnl)
1329                 rtnl_lock();
1330         lock_sock(sk);
1331 
1332         switch (optname) {
1333         case IP_OPTIONS:
1334         {
1335                 unsigned char optbuf[sizeof(struct ip_options)+40];
1336                 struct ip_options *opt = (struct ip_options *)optbuf;
1337                 struct ip_options_rcu *inet_opt;
1338 
1339                 inet_opt = rcu_dereference_protected(inet->inet_opt,
1340                                                      lockdep_sock_is_held(sk));
1341                 opt->optlen = 0;
1342                 if (inet_opt)
1343                         memcpy(optbuf, &inet_opt->opt,
1344                                sizeof(struct ip_options) +
1345                                inet_opt->opt.optlen);
1346                 release_sock(sk);
1347 
1348                 if (opt->optlen == 0)
1349                         return put_user(0, optlen);
1350 
1351                 ip_options_undo(opt);
1352 
1353                 len = min_t(unsigned int, len, opt->optlen);
1354                 if (put_user(len, optlen))
1355                         return -EFAULT;
1356                 if (copy_to_user(optval, opt->__data, len))
1357                         return -EFAULT;
1358                 return 0;
1359         }
1360         case IP_PKTINFO:
1361                 val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0;
1362                 break;
1363         case IP_RECVTTL:
1364                 val = (inet->cmsg_flags & IP_CMSG_TTL) != 0;
1365                 break;
1366         case IP_RECVTOS:
1367                 val = (inet->cmsg_flags & IP_CMSG_TOS) != 0;
1368                 break;
1369         case IP_RECVOPTS:
1370                 val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0;
1371                 break;
1372         case IP_RETOPTS:
1373                 val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0;
1374                 break;
1375         case IP_PASSSEC:
1376                 val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0;
1377                 break;
1378         case IP_RECVORIGDSTADDR:
1379                 val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0;
1380                 break;
1381         case IP_CHECKSUM:
1382                 val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0;
1383                 break;
1384         case IP_RECVFRAGSIZE:
1385                 val = (inet->cmsg_flags & IP_CMSG_RECVFRAGSIZE) != 0;
1386                 break;
1387         case IP_TOS:
1388                 val = inet->tos;
1389                 break;
1390         case IP_TTL:
1391         {
1392                 struct net *net = sock_net(sk);
1393                 val = (inet->uc_ttl == -1 ?
1394                        net->ipv4.sysctl_ip_default_ttl :
1395                        inet->uc_ttl);
1396                 break;
1397         }
1398         case IP_HDRINCL:
1399                 val = inet->hdrincl;
1400                 break;
1401         case IP_NODEFRAG:
1402                 val = inet->nodefrag;
1403                 break;
1404         case IP_BIND_ADDRESS_NO_PORT:
1405                 val = inet->bind_address_no_port;
1406                 break;
1407         case IP_MTU_DISCOVER:
1408                 val = inet->pmtudisc;
1409                 break;
1410         case IP_MTU:
1411         {
1412                 struct dst_entry *dst;
1413                 val = 0;
1414                 dst = sk_dst_get(sk);
1415                 if (dst) {
1416                         val = dst_mtu(dst);
1417                         dst_release(dst);
1418                 }
1419                 if (!val) {
1420                         release_sock(sk);
1421                         return -ENOTCONN;
1422                 }
1423                 break;
1424         }
1425         case IP_RECVERR:
1426                 val = inet->recverr;
1427                 break;
1428         case IP_MULTICAST_TTL:
1429                 val = inet->mc_ttl;
1430                 break;
1431         case IP_MULTICAST_LOOP:
1432                 val = inet->mc_loop;
1433                 break;
1434         case IP_UNICAST_IF:
1435                 val = (__force int)htonl((__u32) inet->uc_index);
1436                 break;
1437         case IP_MULTICAST_IF:
1438         {
1439                 struct in_addr addr;
1440                 len = min_t(unsigned int, len, sizeof(struct in_addr));
1441                 addr.s_addr = inet->mc_addr;
1442                 release_sock(sk);
1443 
1444                 if (put_user(len, optlen))
1445                         return -EFAULT;
1446                 if (copy_to_user(optval, &addr, len))
1447                         return -EFAULT;
1448                 return 0;
1449         }
1450         case IP_MSFILTER:
1451         {
1452                 struct ip_msfilter msf;
1453 
1454                 if (len < IP_MSFILTER_SIZE(0)) {
1455                         err = -EINVAL;
1456                         goto out;
1457                 }
1458                 if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) {
1459                         err = -EFAULT;
1460                         goto out;
1461                 }
1462                 err = ip_mc_msfget(sk, &msf,
1463                                    (struct ip_msfilter __user *)optval, optlen);
1464                 goto out;
1465         }
1466         case MCAST_MSFILTER:
1467         {
1468                 struct group_filter gsf;
1469 
1470                 if (len < GROUP_FILTER_SIZE(0)) {
1471                         err = -EINVAL;
1472                         goto out;
1473                 }
1474                 if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) {
1475                         err = -EFAULT;
1476                         goto out;
1477                 }
1478                 err = ip_mc_gsfget(sk, &gsf,
1479                                    (struct group_filter __user *)optval,
1480                                    optlen);
1481                 goto out;
1482         }
1483         case IP_MULTICAST_ALL:
1484                 val = inet->mc_all;
1485                 break;
1486         case IP_PKTOPTIONS:
1487         {
1488                 struct msghdr msg;
1489 
1490                 release_sock(sk);
1491 
1492                 if (sk->sk_type != SOCK_STREAM)
1493                         return -ENOPROTOOPT;
1494 
1495                 msg.msg_control = (__force void *) optval;
1496                 msg.msg_controllen = len;
1497                 msg.msg_flags = flags;
1498 
1499                 if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
1500                         struct in_pktinfo info;
1501 
1502                         info.ipi_addr.s_addr = inet->inet_rcv_saddr;
1503                         info.ipi_spec_dst.s_addr = inet->inet_rcv_saddr;
1504                         info.ipi_ifindex = inet->mc_index;
1505                         put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
1506                 }
1507                 if (inet->cmsg_flags & IP_CMSG_TTL) {
1508                         int hlim = inet->mc_ttl;
1509                         put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
1510                 }
1511                 if (inet->cmsg_flags & IP_CMSG_TOS) {
1512                         int tos = inet->rcv_tos;
1513                         put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos);
1514                 }
1515                 len -= msg.msg_controllen;
1516                 return put_user(len, optlen);
1517         }
1518         case IP_FREEBIND:
1519                 val = inet->freebind;
1520                 break;
1521         case IP_TRANSPARENT:
1522                 val = inet->transparent;
1523                 break;
1524         case IP_MINTTL:
1525                 val = inet->min_ttl;
1526                 break;
1527         default:
1528                 release_sock(sk);
1529                 return -ENOPROTOOPT;
1530         }
1531         release_sock(sk);
1532 
1533         if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) {
1534                 unsigned char ucval = (unsigned char)val;
1535                 len = 1;
1536                 if (put_user(len, optlen))
1537                         return -EFAULT;
1538                 if (copy_to_user(optval, &ucval, 1))
1539                         return -EFAULT;
1540         } else {
1541                 len = min_t(unsigned int, sizeof(int), len);
1542                 if (put_user(len, optlen))
1543                         return -EFAULT;
1544                 if (copy_to_user(optval, &val, len))
1545                         return -EFAULT;
1546         }
1547         return 0;
1548 
1549 out:
1550         release_sock(sk);
1551         if (needs_rtnl)
1552                 rtnl_unlock();
1553         return err;
1554 }
1555 
1556 int ip_getsockopt(struct sock *sk, int level,
1557                   int optname, char __user *optval, int __user *optlen)
1558 {
1559         int err;
1560 
1561         err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0);
1562 #if IS_ENABLED(CONFIG_BPFILTER_UMH)
1563         if (optname >= BPFILTER_IPT_SO_GET_INFO &&
1564             optname < BPFILTER_IPT_GET_MAX)
1565                 err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
1566 #endif
1567 #ifdef CONFIG_NETFILTER
1568         /* we need to exclude all possible ENOPROTOOPTs except default case */
1569         if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
1570                         !ip_mroute_opt(optname)) {
1571                 int len;
1572 
1573                 if (get_user(len, optlen))
1574                         return -EFAULT;
1575 
1576                 err = nf_getsockopt(sk, PF_INET, optname, optval, &len);
1577                 if (err >= 0)
1578                         err = put_user(len, optlen);
1579                 return err;
1580         }
1581 #endif
1582         return err;
1583 }
1584 EXPORT_SYMBOL(ip_getsockopt);
1585 
1586 #ifdef CONFIG_COMPAT
1587 int compat_ip_getsockopt(struct sock *sk, int level, int optname,
1588                          char __user *optval, int __user *optlen)
1589 {
1590         int err;
1591 
1592         if (optname == MCAST_MSFILTER)
1593                 return compat_mc_getsockopt(sk, level, optname, optval, optlen,
1594                         ip_getsockopt);
1595 
1596         err = do_ip_getsockopt(sk, level, optname, optval, optlen,
1597                 MSG_CMSG_COMPAT);
1598 
1599 #if IS_ENABLED(CONFIG_BPFILTER_UMH)
1600         if (optname >= BPFILTER_IPT_SO_GET_INFO &&
1601             optname < BPFILTER_IPT_GET_MAX)
1602                 err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
1603 #endif
1604 #ifdef CONFIG_NETFILTER
1605         /* we need to exclude all possible ENOPROTOOPTs except default case */
1606         if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
1607                         !ip_mroute_opt(optname)) {
1608                 int len;
1609 
1610                 if (get_user(len, optlen))
1611                         return -EFAULT;
1612 
1613                 err = compat_nf_getsockopt(sk, PF_INET, optname, optval, &len);
1614                 if (err >= 0)
1615                         err = put_user(len, optlen);
1616                 return err;
1617         }
1618 #endif
1619         return err;
1620 }
1621 EXPORT_SYMBOL(compat_ip_getsockopt);
1622 #endif

/* [<][>][^][v][top][bottom][index][help] */