root/net/ipv4/fou.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. fou_from_sock
  2. fou_recv_pull
  3. fou_udp_recv
  4. gue_remcsum
  5. gue_control_message
  6. gue_udp_recv
  7. fou_gro_receive
  8. fou_gro_complete
  9. gue_gro_remcsum
  10. gue_gro_receive
  11. gue_gro_complete
  12. fou_cfg_cmp
  13. fou_add_to_port_list
  14. fou_release
  15. fou_create
  16. fou_destroy
  17. parse_nl_config
  18. fou_nl_cmd_add_port
  19. fou_nl_cmd_rm_port
  20. fou_fill_info
  21. fou_dump_info
  22. fou_nl_cmd_get_port
  23. fou_nl_dump
  24. fou_encap_hlen
  25. gue_encap_hlen
  26. __fou_build_header
  27. __gue_build_header
  28. fou_build_udp
  29. fou_build_header
  30. gue_build_header
  31. gue_err_proto_handler
  32. gue_err
  33. ip_tunnel_encap_add_fou_ops
  34. ip_tunnel_encap_del_fou_ops
  35. ip_tunnel_encap_add_fou_ops
  36. ip_tunnel_encap_del_fou_ops
  37. fou_init_net
  38. fou_exit_net
  39. fou_init
  40. fou_fini

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 #include <linux/module.h>
   3 #include <linux/errno.h>
   4 #include <linux/socket.h>
   5 #include <linux/skbuff.h>
   6 #include <linux/ip.h>
   7 #include <linux/icmp.h>
   8 #include <linux/udp.h>
   9 #include <linux/types.h>
  10 #include <linux/kernel.h>
  11 #include <net/genetlink.h>
  12 #include <net/gue.h>
  13 #include <net/fou.h>
  14 #include <net/ip.h>
  15 #include <net/protocol.h>
  16 #include <net/udp.h>
  17 #include <net/udp_tunnel.h>
  18 #include <net/xfrm.h>
  19 #include <uapi/linux/fou.h>
  20 #include <uapi/linux/genetlink.h>
  21 
  22 struct fou {
  23         struct socket *sock;
  24         u8 protocol;
  25         u8 flags;
  26         __be16 port;
  27         u8 family;
  28         u16 type;
  29         struct list_head list;
  30         struct rcu_head rcu;
  31 };
  32 
  33 #define FOU_F_REMCSUM_NOPARTIAL BIT(0)
  34 
  35 struct fou_cfg {
  36         u16 type;
  37         u8 protocol;
  38         u8 flags;
  39         struct udp_port_cfg udp_config;
  40 };
  41 
  42 static unsigned int fou_net_id;
  43 
  44 struct fou_net {
  45         struct list_head fou_list;
  46         struct mutex fou_lock;
  47 };
  48 
  49 static inline struct fou *fou_from_sock(struct sock *sk)
  50 {
  51         return sk->sk_user_data;
  52 }
  53 
  54 static int fou_recv_pull(struct sk_buff *skb, struct fou *fou, size_t len)
  55 {
  56         /* Remove 'len' bytes from the packet (UDP header and
  57          * FOU header if present).
  58          */
  59         if (fou->family == AF_INET)
  60                 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len);
  61         else
  62                 ipv6_hdr(skb)->payload_len =
  63                     htons(ntohs(ipv6_hdr(skb)->payload_len) - len);
  64 
  65         __skb_pull(skb, len);
  66         skb_postpull_rcsum(skb, udp_hdr(skb), len);
  67         skb_reset_transport_header(skb);
  68         return iptunnel_pull_offloads(skb);
  69 }
  70 
  71 static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
  72 {
  73         struct fou *fou = fou_from_sock(sk);
  74 
  75         if (!fou)
  76                 return 1;
  77 
  78         if (fou_recv_pull(skb, fou, sizeof(struct udphdr)))
  79                 goto drop;
  80 
  81         return -fou->protocol;
  82 
  83 drop:
  84         kfree_skb(skb);
  85         return 0;
  86 }
  87 
  88 static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
  89                                   void *data, size_t hdrlen, u8 ipproto,
  90                                   bool nopartial)
  91 {
  92         __be16 *pd = data;
  93         size_t start = ntohs(pd[0]);
  94         size_t offset = ntohs(pd[1]);
  95         size_t plen = sizeof(struct udphdr) + hdrlen +
  96             max_t(size_t, offset + sizeof(u16), start);
  97 
  98         if (skb->remcsum_offload)
  99                 return guehdr;
 100 
 101         if (!pskb_may_pull(skb, plen))
 102                 return NULL;
 103         guehdr = (struct guehdr *)&udp_hdr(skb)[1];
 104 
 105         skb_remcsum_process(skb, (void *)guehdr + hdrlen,
 106                             start, offset, nopartial);
 107 
 108         return guehdr;
 109 }
 110 
 111 static int gue_control_message(struct sk_buff *skb, struct guehdr *guehdr)
 112 {
 113         /* No support yet */
 114         kfree_skb(skb);
 115         return 0;
 116 }
 117 
 118 static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
 119 {
 120         struct fou *fou = fou_from_sock(sk);
 121         size_t len, optlen, hdrlen;
 122         struct guehdr *guehdr;
 123         void *data;
 124         u16 doffset = 0;
 125         u8 proto_ctype;
 126 
 127         if (!fou)
 128                 return 1;
 129 
 130         len = sizeof(struct udphdr) + sizeof(struct guehdr);
 131         if (!pskb_may_pull(skb, len))
 132                 goto drop;
 133 
 134         guehdr = (struct guehdr *)&udp_hdr(skb)[1];
 135 
 136         switch (guehdr->version) {
 137         case 0: /* Full GUE header present */
 138                 break;
 139 
 140         case 1: {
 141                 /* Direct encapsulation of IPv4 or IPv6 */
 142 
 143                 int prot;
 144 
 145                 switch (((struct iphdr *)guehdr)->version) {
 146                 case 4:
 147                         prot = IPPROTO_IPIP;
 148                         break;
 149                 case 6:
 150                         prot = IPPROTO_IPV6;
 151                         break;
 152                 default:
 153                         goto drop;
 154                 }
 155 
 156                 if (fou_recv_pull(skb, fou, sizeof(struct udphdr)))
 157                         goto drop;
 158 
 159                 return -prot;
 160         }
 161 
 162         default: /* Undefined version */
 163                 goto drop;
 164         }
 165 
 166         optlen = guehdr->hlen << 2;
 167         len += optlen;
 168 
 169         if (!pskb_may_pull(skb, len))
 170                 goto drop;
 171 
 172         /* guehdr may change after pull */
 173         guehdr = (struct guehdr *)&udp_hdr(skb)[1];
 174 
 175         if (validate_gue_flags(guehdr, optlen))
 176                 goto drop;
 177 
 178         hdrlen = sizeof(struct guehdr) + optlen;
 179 
 180         if (fou->family == AF_INET)
 181                 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len);
 182         else
 183                 ipv6_hdr(skb)->payload_len =
 184                     htons(ntohs(ipv6_hdr(skb)->payload_len) - len);
 185 
 186         /* Pull csum through the guehdr now . This can be used if
 187          * there is a remote checksum offload.
 188          */
 189         skb_postpull_rcsum(skb, udp_hdr(skb), len);
 190 
 191         data = &guehdr[1];
 192 
 193         if (guehdr->flags & GUE_FLAG_PRIV) {
 194                 __be32 flags = *(__be32 *)(data + doffset);
 195 
 196                 doffset += GUE_LEN_PRIV;
 197 
 198                 if (flags & GUE_PFLAG_REMCSUM) {
 199                         guehdr = gue_remcsum(skb, guehdr, data + doffset,
 200                                              hdrlen, guehdr->proto_ctype,
 201                                              !!(fou->flags &
 202                                                 FOU_F_REMCSUM_NOPARTIAL));
 203                         if (!guehdr)
 204                                 goto drop;
 205 
 206                         data = &guehdr[1];
 207 
 208                         doffset += GUE_PLEN_REMCSUM;
 209                 }
 210         }
 211 
 212         if (unlikely(guehdr->control))
 213                 return gue_control_message(skb, guehdr);
 214 
 215         proto_ctype = guehdr->proto_ctype;
 216         __skb_pull(skb, sizeof(struct udphdr) + hdrlen);
 217         skb_reset_transport_header(skb);
 218 
 219         if (iptunnel_pull_offloads(skb))
 220                 goto drop;
 221 
 222         return -proto_ctype;
 223 
 224 drop:
 225         kfree_skb(skb);
 226         return 0;
 227 }
 228 
 229 static struct sk_buff *fou_gro_receive(struct sock *sk,
 230                                        struct list_head *head,
 231                                        struct sk_buff *skb)
 232 {
 233         u8 proto = fou_from_sock(sk)->protocol;
 234         const struct net_offload **offloads;
 235         const struct net_offload *ops;
 236         struct sk_buff *pp = NULL;
 237 
 238         /* We can clear the encap_mark for FOU as we are essentially doing
 239          * one of two possible things.  We are either adding an L4 tunnel
 240          * header to the outer L3 tunnel header, or we are are simply
 241          * treating the GRE tunnel header as though it is a UDP protocol
 242          * specific header such as VXLAN or GENEVE.
 243          */
 244         NAPI_GRO_CB(skb)->encap_mark = 0;
 245 
 246         /* Flag this frame as already having an outer encap header */
 247         NAPI_GRO_CB(skb)->is_fou = 1;
 248 
 249         rcu_read_lock();
 250         offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
 251         ops = rcu_dereference(offloads[proto]);
 252         if (!ops || !ops->callbacks.gro_receive)
 253                 goto out_unlock;
 254 
 255         pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
 256 
 257 out_unlock:
 258         rcu_read_unlock();
 259 
 260         return pp;
 261 }
 262 
 263 static int fou_gro_complete(struct sock *sk, struct sk_buff *skb,
 264                             int nhoff)
 265 {
 266         const struct net_offload *ops;
 267         u8 proto = fou_from_sock(sk)->protocol;
 268         int err = -ENOSYS;
 269         const struct net_offload **offloads;
 270 
 271         rcu_read_lock();
 272         offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
 273         ops = rcu_dereference(offloads[proto]);
 274         if (WARN_ON(!ops || !ops->callbacks.gro_complete))
 275                 goto out_unlock;
 276 
 277         err = ops->callbacks.gro_complete(skb, nhoff);
 278 
 279         skb_set_inner_mac_header(skb, nhoff);
 280 
 281 out_unlock:
 282         rcu_read_unlock();
 283 
 284         return err;
 285 }
 286 
 287 static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
 288                                       struct guehdr *guehdr, void *data,
 289                                       size_t hdrlen, struct gro_remcsum *grc,
 290                                       bool nopartial)
 291 {
 292         __be16 *pd = data;
 293         size_t start = ntohs(pd[0]);
 294         size_t offset = ntohs(pd[1]);
 295 
 296         if (skb->remcsum_offload)
 297                 return guehdr;
 298 
 299         if (!NAPI_GRO_CB(skb)->csum_valid)
 300                 return NULL;
 301 
 302         guehdr = skb_gro_remcsum_process(skb, (void *)guehdr, off, hdrlen,
 303                                          start, offset, grc, nopartial);
 304 
 305         skb->remcsum_offload = 1;
 306 
 307         return guehdr;
 308 }
 309 
 310 static struct sk_buff *gue_gro_receive(struct sock *sk,
 311                                        struct list_head *head,
 312                                        struct sk_buff *skb)
 313 {
 314         const struct net_offload **offloads;
 315         const struct net_offload *ops;
 316         struct sk_buff *pp = NULL;
 317         struct sk_buff *p;
 318         struct guehdr *guehdr;
 319         size_t len, optlen, hdrlen, off;
 320         void *data;
 321         u16 doffset = 0;
 322         int flush = 1;
 323         struct fou *fou = fou_from_sock(sk);
 324         struct gro_remcsum grc;
 325         u8 proto;
 326 
 327         skb_gro_remcsum_init(&grc);
 328 
 329         off = skb_gro_offset(skb);
 330         len = off + sizeof(*guehdr);
 331 
 332         guehdr = skb_gro_header_fast(skb, off);
 333         if (skb_gro_header_hard(skb, len)) {
 334                 guehdr = skb_gro_header_slow(skb, len, off);
 335                 if (unlikely(!guehdr))
 336                         goto out;
 337         }
 338 
 339         switch (guehdr->version) {
 340         case 0:
 341                 break;
 342         case 1:
 343                 switch (((struct iphdr *)guehdr)->version) {
 344                 case 4:
 345                         proto = IPPROTO_IPIP;
 346                         break;
 347                 case 6:
 348                         proto = IPPROTO_IPV6;
 349                         break;
 350                 default:
 351                         goto out;
 352                 }
 353                 goto next_proto;
 354         default:
 355                 goto out;
 356         }
 357 
 358         optlen = guehdr->hlen << 2;
 359         len += optlen;
 360 
 361         if (skb_gro_header_hard(skb, len)) {
 362                 guehdr = skb_gro_header_slow(skb, len, off);
 363                 if (unlikely(!guehdr))
 364                         goto out;
 365         }
 366 
 367         if (unlikely(guehdr->control) || guehdr->version != 0 ||
 368             validate_gue_flags(guehdr, optlen))
 369                 goto out;
 370 
 371         hdrlen = sizeof(*guehdr) + optlen;
 372 
 373         /* Adjust NAPI_GRO_CB(skb)->csum to account for guehdr,
 374          * this is needed if there is a remote checkcsum offload.
 375          */
 376         skb_gro_postpull_rcsum(skb, guehdr, hdrlen);
 377 
 378         data = &guehdr[1];
 379 
 380         if (guehdr->flags & GUE_FLAG_PRIV) {
 381                 __be32 flags = *(__be32 *)(data + doffset);
 382 
 383                 doffset += GUE_LEN_PRIV;
 384 
 385                 if (flags & GUE_PFLAG_REMCSUM) {
 386                         guehdr = gue_gro_remcsum(skb, off, guehdr,
 387                                                  data + doffset, hdrlen, &grc,
 388                                                  !!(fou->flags &
 389                                                     FOU_F_REMCSUM_NOPARTIAL));
 390 
 391                         if (!guehdr)
 392                                 goto out;
 393 
 394                         data = &guehdr[1];
 395 
 396                         doffset += GUE_PLEN_REMCSUM;
 397                 }
 398         }
 399 
 400         skb_gro_pull(skb, hdrlen);
 401 
 402         list_for_each_entry(p, head, list) {
 403                 const struct guehdr *guehdr2;
 404 
 405                 if (!NAPI_GRO_CB(p)->same_flow)
 406                         continue;
 407 
 408                 guehdr2 = (struct guehdr *)(p->data + off);
 409 
 410                 /* Compare base GUE header to be equal (covers
 411                  * hlen, version, proto_ctype, and flags.
 412                  */
 413                 if (guehdr->word != guehdr2->word) {
 414                         NAPI_GRO_CB(p)->same_flow = 0;
 415                         continue;
 416                 }
 417 
 418                 /* Compare optional fields are the same. */
 419                 if (guehdr->hlen && memcmp(&guehdr[1], &guehdr2[1],
 420                                            guehdr->hlen << 2)) {
 421                         NAPI_GRO_CB(p)->same_flow = 0;
 422                         continue;
 423                 }
 424         }
 425 
 426         proto = guehdr->proto_ctype;
 427 
 428 next_proto:
 429 
 430         /* We can clear the encap_mark for GUE as we are essentially doing
 431          * one of two possible things.  We are either adding an L4 tunnel
 432          * header to the outer L3 tunnel header, or we are are simply
 433          * treating the GRE tunnel header as though it is a UDP protocol
 434          * specific header such as VXLAN or GENEVE.
 435          */
 436         NAPI_GRO_CB(skb)->encap_mark = 0;
 437 
 438         /* Flag this frame as already having an outer encap header */
 439         NAPI_GRO_CB(skb)->is_fou = 1;
 440 
 441         rcu_read_lock();
 442         offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
 443         ops = rcu_dereference(offloads[proto]);
 444         if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive))
 445                 goto out_unlock;
 446 
 447         pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
 448         flush = 0;
 449 
 450 out_unlock:
 451         rcu_read_unlock();
 452 out:
 453         skb_gro_flush_final_remcsum(skb, pp, flush, &grc);
 454 
 455         return pp;
 456 }
 457 
 458 static int gue_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
 459 {
 460         const struct net_offload **offloads;
 461         struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff);
 462         const struct net_offload *ops;
 463         unsigned int guehlen = 0;
 464         u8 proto;
 465         int err = -ENOENT;
 466 
 467         switch (guehdr->version) {
 468         case 0:
 469                 proto = guehdr->proto_ctype;
 470                 guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
 471                 break;
 472         case 1:
 473                 switch (((struct iphdr *)guehdr)->version) {
 474                 case 4:
 475                         proto = IPPROTO_IPIP;
 476                         break;
 477                 case 6:
 478                         proto = IPPROTO_IPV6;
 479                         break;
 480                 default:
 481                         return err;
 482                 }
 483                 break;
 484         default:
 485                 return err;
 486         }
 487 
 488         rcu_read_lock();
 489         offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
 490         ops = rcu_dereference(offloads[proto]);
 491         if (WARN_ON(!ops || !ops->callbacks.gro_complete))
 492                 goto out_unlock;
 493 
 494         err = ops->callbacks.gro_complete(skb, nhoff + guehlen);
 495 
 496         skb_set_inner_mac_header(skb, nhoff + guehlen);
 497 
 498 out_unlock:
 499         rcu_read_unlock();
 500         return err;
 501 }
 502 
 503 static bool fou_cfg_cmp(struct fou *fou, struct fou_cfg *cfg)
 504 {
 505         struct sock *sk = fou->sock->sk;
 506         struct udp_port_cfg *udp_cfg = &cfg->udp_config;
 507 
 508         if (fou->family != udp_cfg->family ||
 509             fou->port != udp_cfg->local_udp_port ||
 510             sk->sk_dport != udp_cfg->peer_udp_port ||
 511             sk->sk_bound_dev_if != udp_cfg->bind_ifindex)
 512                 return false;
 513 
 514         if (fou->family == AF_INET) {
 515                 if (sk->sk_rcv_saddr != udp_cfg->local_ip.s_addr ||
 516                     sk->sk_daddr != udp_cfg->peer_ip.s_addr)
 517                         return false;
 518                 else
 519                         return true;
 520 #if IS_ENABLED(CONFIG_IPV6)
 521         } else {
 522                 if (ipv6_addr_cmp(&sk->sk_v6_rcv_saddr, &udp_cfg->local_ip6) ||
 523                     ipv6_addr_cmp(&sk->sk_v6_daddr, &udp_cfg->peer_ip6))
 524                         return false;
 525                 else
 526                         return true;
 527 #endif
 528         }
 529 
 530         return false;
 531 }
 532 
 533 static int fou_add_to_port_list(struct net *net, struct fou *fou,
 534                                 struct fou_cfg *cfg)
 535 {
 536         struct fou_net *fn = net_generic(net, fou_net_id);
 537         struct fou *fout;
 538 
 539         mutex_lock(&fn->fou_lock);
 540         list_for_each_entry(fout, &fn->fou_list, list) {
 541                 if (fou_cfg_cmp(fout, cfg)) {
 542                         mutex_unlock(&fn->fou_lock);
 543                         return -EALREADY;
 544                 }
 545         }
 546 
 547         list_add(&fou->list, &fn->fou_list);
 548         mutex_unlock(&fn->fou_lock);
 549 
 550         return 0;
 551 }
 552 
 553 static void fou_release(struct fou *fou)
 554 {
 555         struct socket *sock = fou->sock;
 556 
 557         list_del(&fou->list);
 558         udp_tunnel_sock_release(sock);
 559 
 560         kfree_rcu(fou, rcu);
 561 }
 562 
 563 static int fou_create(struct net *net, struct fou_cfg *cfg,
 564                       struct socket **sockp)
 565 {
 566         struct socket *sock = NULL;
 567         struct fou *fou = NULL;
 568         struct sock *sk;
 569         struct udp_tunnel_sock_cfg tunnel_cfg;
 570         int err;
 571 
 572         /* Open UDP socket */
 573         err = udp_sock_create(net, &cfg->udp_config, &sock);
 574         if (err < 0)
 575                 goto error;
 576 
 577         /* Allocate FOU port structure */
 578         fou = kzalloc(sizeof(*fou), GFP_KERNEL);
 579         if (!fou) {
 580                 err = -ENOMEM;
 581                 goto error;
 582         }
 583 
 584         sk = sock->sk;
 585 
 586         fou->port = cfg->udp_config.local_udp_port;
 587         fou->family = cfg->udp_config.family;
 588         fou->flags = cfg->flags;
 589         fou->type = cfg->type;
 590         fou->sock = sock;
 591 
 592         memset(&tunnel_cfg, 0, sizeof(tunnel_cfg));
 593         tunnel_cfg.encap_type = 1;
 594         tunnel_cfg.sk_user_data = fou;
 595         tunnel_cfg.encap_destroy = NULL;
 596 
 597         /* Initial for fou type */
 598         switch (cfg->type) {
 599         case FOU_ENCAP_DIRECT:
 600                 tunnel_cfg.encap_rcv = fou_udp_recv;
 601                 tunnel_cfg.gro_receive = fou_gro_receive;
 602                 tunnel_cfg.gro_complete = fou_gro_complete;
 603                 fou->protocol = cfg->protocol;
 604                 break;
 605         case FOU_ENCAP_GUE:
 606                 tunnel_cfg.encap_rcv = gue_udp_recv;
 607                 tunnel_cfg.gro_receive = gue_gro_receive;
 608                 tunnel_cfg.gro_complete = gue_gro_complete;
 609                 break;
 610         default:
 611                 err = -EINVAL;
 612                 goto error;
 613         }
 614 
 615         setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
 616 
 617         sk->sk_allocation = GFP_ATOMIC;
 618 
 619         err = fou_add_to_port_list(net, fou, cfg);
 620         if (err)
 621                 goto error;
 622 
 623         if (sockp)
 624                 *sockp = sock;
 625 
 626         return 0;
 627 
 628 error:
 629         kfree(fou);
 630         if (sock)
 631                 udp_tunnel_sock_release(sock);
 632 
 633         return err;
 634 }
 635 
 636 static int fou_destroy(struct net *net, struct fou_cfg *cfg)
 637 {
 638         struct fou_net *fn = net_generic(net, fou_net_id);
 639         int err = -EINVAL;
 640         struct fou *fou;
 641 
 642         mutex_lock(&fn->fou_lock);
 643         list_for_each_entry(fou, &fn->fou_list, list) {
 644                 if (fou_cfg_cmp(fou, cfg)) {
 645                         fou_release(fou);
 646                         err = 0;
 647                         break;
 648                 }
 649         }
 650         mutex_unlock(&fn->fou_lock);
 651 
 652         return err;
 653 }
 654 
 655 static struct genl_family fou_nl_family;
 656 
 657 static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
 658         [FOU_ATTR_PORT]                 = { .type = NLA_U16, },
 659         [FOU_ATTR_AF]                   = { .type = NLA_U8, },
 660         [FOU_ATTR_IPPROTO]              = { .type = NLA_U8, },
 661         [FOU_ATTR_TYPE]                 = { .type = NLA_U8, },
 662         [FOU_ATTR_REMCSUM_NOPARTIAL]    = { .type = NLA_FLAG, },
 663         [FOU_ATTR_LOCAL_V4]             = { .type = NLA_U32, },
 664         [FOU_ATTR_PEER_V4]              = { .type = NLA_U32, },
 665         [FOU_ATTR_LOCAL_V6]             = { .len = sizeof(struct in6_addr), },
 666         [FOU_ATTR_PEER_V6]              = { .len = sizeof(struct in6_addr), },
 667         [FOU_ATTR_PEER_PORT]            = { .type = NLA_U16, },
 668         [FOU_ATTR_IFINDEX]              = { .type = NLA_S32, },
 669 };
 670 
 671 static int parse_nl_config(struct genl_info *info,
 672                            struct fou_cfg *cfg)
 673 {
 674         bool has_local = false, has_peer = false;
 675         struct nlattr *attr;
 676         int ifindex;
 677         __be16 port;
 678 
 679         memset(cfg, 0, sizeof(*cfg));
 680 
 681         cfg->udp_config.family = AF_INET;
 682 
 683         if (info->attrs[FOU_ATTR_AF]) {
 684                 u8 family = nla_get_u8(info->attrs[FOU_ATTR_AF]);
 685 
 686                 switch (family) {
 687                 case AF_INET:
 688                         break;
 689                 case AF_INET6:
 690                         cfg->udp_config.ipv6_v6only = 1;
 691                         break;
 692                 default:
 693                         return -EAFNOSUPPORT;
 694                 }
 695 
 696                 cfg->udp_config.family = family;
 697         }
 698 
 699         if (info->attrs[FOU_ATTR_PORT]) {
 700                 port = nla_get_be16(info->attrs[FOU_ATTR_PORT]);
 701                 cfg->udp_config.local_udp_port = port;
 702         }
 703 
 704         if (info->attrs[FOU_ATTR_IPPROTO])
 705                 cfg->protocol = nla_get_u8(info->attrs[FOU_ATTR_IPPROTO]);
 706 
 707         if (info->attrs[FOU_ATTR_TYPE])
 708                 cfg->type = nla_get_u8(info->attrs[FOU_ATTR_TYPE]);
 709 
 710         if (info->attrs[FOU_ATTR_REMCSUM_NOPARTIAL])
 711                 cfg->flags |= FOU_F_REMCSUM_NOPARTIAL;
 712 
 713         if (cfg->udp_config.family == AF_INET) {
 714                 if (info->attrs[FOU_ATTR_LOCAL_V4]) {
 715                         attr = info->attrs[FOU_ATTR_LOCAL_V4];
 716                         cfg->udp_config.local_ip.s_addr = nla_get_in_addr(attr);
 717                         has_local = true;
 718                 }
 719 
 720                 if (info->attrs[FOU_ATTR_PEER_V4]) {
 721                         attr = info->attrs[FOU_ATTR_PEER_V4];
 722                         cfg->udp_config.peer_ip.s_addr = nla_get_in_addr(attr);
 723                         has_peer = true;
 724                 }
 725 #if IS_ENABLED(CONFIG_IPV6)
 726         } else {
 727                 if (info->attrs[FOU_ATTR_LOCAL_V6]) {
 728                         attr = info->attrs[FOU_ATTR_LOCAL_V6];
 729                         cfg->udp_config.local_ip6 = nla_get_in6_addr(attr);
 730                         has_local = true;
 731                 }
 732 
 733                 if (info->attrs[FOU_ATTR_PEER_V6]) {
 734                         attr = info->attrs[FOU_ATTR_PEER_V6];
 735                         cfg->udp_config.peer_ip6 = nla_get_in6_addr(attr);
 736                         has_peer = true;
 737                 }
 738 #endif
 739         }
 740 
 741         if (has_peer) {
 742                 if (info->attrs[FOU_ATTR_PEER_PORT]) {
 743                         port = nla_get_be16(info->attrs[FOU_ATTR_PEER_PORT]);
 744                         cfg->udp_config.peer_udp_port = port;
 745                 } else {
 746                         return -EINVAL;
 747                 }
 748         }
 749 
 750         if (info->attrs[FOU_ATTR_IFINDEX]) {
 751                 if (!has_local)
 752                         return -EINVAL;
 753 
 754                 ifindex = nla_get_s32(info->attrs[FOU_ATTR_IFINDEX]);
 755 
 756                 cfg->udp_config.bind_ifindex = ifindex;
 757         }
 758 
 759         return 0;
 760 }
 761 
 762 static int fou_nl_cmd_add_port(struct sk_buff *skb, struct genl_info *info)
 763 {
 764         struct net *net = genl_info_net(info);
 765         struct fou_cfg cfg;
 766         int err;
 767 
 768         err = parse_nl_config(info, &cfg);
 769         if (err)
 770                 return err;
 771 
 772         return fou_create(net, &cfg, NULL);
 773 }
 774 
 775 static int fou_nl_cmd_rm_port(struct sk_buff *skb, struct genl_info *info)
 776 {
 777         struct net *net = genl_info_net(info);
 778         struct fou_cfg cfg;
 779         int err;
 780 
 781         err = parse_nl_config(info, &cfg);
 782         if (err)
 783                 return err;
 784 
 785         return fou_destroy(net, &cfg);
 786 }
 787 
 788 static int fou_fill_info(struct fou *fou, struct sk_buff *msg)
 789 {
 790         struct sock *sk = fou->sock->sk;
 791 
 792         if (nla_put_u8(msg, FOU_ATTR_AF, fou->sock->sk->sk_family) ||
 793             nla_put_be16(msg, FOU_ATTR_PORT, fou->port) ||
 794             nla_put_be16(msg, FOU_ATTR_PEER_PORT, sk->sk_dport) ||
 795             nla_put_u8(msg, FOU_ATTR_IPPROTO, fou->protocol) ||
 796             nla_put_u8(msg, FOU_ATTR_TYPE, fou->type) ||
 797             nla_put_s32(msg, FOU_ATTR_IFINDEX, sk->sk_bound_dev_if))
 798                 return -1;
 799 
 800         if (fou->flags & FOU_F_REMCSUM_NOPARTIAL)
 801                 if (nla_put_flag(msg, FOU_ATTR_REMCSUM_NOPARTIAL))
 802                         return -1;
 803 
 804         if (fou->sock->sk->sk_family == AF_INET) {
 805                 if (nla_put_in_addr(msg, FOU_ATTR_LOCAL_V4, sk->sk_rcv_saddr))
 806                         return -1;
 807 
 808                 if (nla_put_in_addr(msg, FOU_ATTR_PEER_V4, sk->sk_daddr))
 809                         return -1;
 810 #if IS_ENABLED(CONFIG_IPV6)
 811         } else {
 812                 if (nla_put_in6_addr(msg, FOU_ATTR_LOCAL_V6,
 813                                      &sk->sk_v6_rcv_saddr))
 814                         return -1;
 815 
 816                 if (nla_put_in6_addr(msg, FOU_ATTR_PEER_V6, &sk->sk_v6_daddr))
 817                         return -1;
 818 #endif
 819         }
 820 
 821         return 0;
 822 }
 823 
 824 static int fou_dump_info(struct fou *fou, u32 portid, u32 seq,
 825                          u32 flags, struct sk_buff *skb, u8 cmd)
 826 {
 827         void *hdr;
 828 
 829         hdr = genlmsg_put(skb, portid, seq, &fou_nl_family, flags, cmd);
 830         if (!hdr)
 831                 return -ENOMEM;
 832 
 833         if (fou_fill_info(fou, skb) < 0)
 834                 goto nla_put_failure;
 835 
 836         genlmsg_end(skb, hdr);
 837         return 0;
 838 
 839 nla_put_failure:
 840         genlmsg_cancel(skb, hdr);
 841         return -EMSGSIZE;
 842 }
 843 
 844 static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info)
 845 {
 846         struct net *net = genl_info_net(info);
 847         struct fou_net *fn = net_generic(net, fou_net_id);
 848         struct sk_buff *msg;
 849         struct fou_cfg cfg;
 850         struct fou *fout;
 851         __be16 port;
 852         u8 family;
 853         int ret;
 854 
 855         ret = parse_nl_config(info, &cfg);
 856         if (ret)
 857                 return ret;
 858         port = cfg.udp_config.local_udp_port;
 859         if (port == 0)
 860                 return -EINVAL;
 861 
 862         family = cfg.udp_config.family;
 863         if (family != AF_INET && family != AF_INET6)
 864                 return -EINVAL;
 865 
 866         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 867         if (!msg)
 868                 return -ENOMEM;
 869 
 870         ret = -ESRCH;
 871         mutex_lock(&fn->fou_lock);
 872         list_for_each_entry(fout, &fn->fou_list, list) {
 873                 if (fou_cfg_cmp(fout, &cfg)) {
 874                         ret = fou_dump_info(fout, info->snd_portid,
 875                                             info->snd_seq, 0, msg,
 876                                             info->genlhdr->cmd);
 877                         break;
 878                 }
 879         }
 880         mutex_unlock(&fn->fou_lock);
 881         if (ret < 0)
 882                 goto out_free;
 883 
 884         return genlmsg_reply(msg, info);
 885 
 886 out_free:
 887         nlmsg_free(msg);
 888         return ret;
 889 }
 890 
 891 static int fou_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
 892 {
 893         struct net *net = sock_net(skb->sk);
 894         struct fou_net *fn = net_generic(net, fou_net_id);
 895         struct fou *fout;
 896         int idx = 0, ret;
 897 
 898         mutex_lock(&fn->fou_lock);
 899         list_for_each_entry(fout, &fn->fou_list, list) {
 900                 if (idx++ < cb->args[0])
 901                         continue;
 902                 ret = fou_dump_info(fout, NETLINK_CB(cb->skb).portid,
 903                                     cb->nlh->nlmsg_seq, NLM_F_MULTI,
 904                                     skb, FOU_CMD_GET);
 905                 if (ret)
 906                         break;
 907         }
 908         mutex_unlock(&fn->fou_lock);
 909 
 910         cb->args[0] = idx;
 911         return skb->len;
 912 }
 913 
 914 static const struct genl_ops fou_nl_ops[] = {
 915         {
 916                 .cmd = FOU_CMD_ADD,
 917                 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 918                 .doit = fou_nl_cmd_add_port,
 919                 .flags = GENL_ADMIN_PERM,
 920         },
 921         {
 922                 .cmd = FOU_CMD_DEL,
 923                 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 924                 .doit = fou_nl_cmd_rm_port,
 925                 .flags = GENL_ADMIN_PERM,
 926         },
 927         {
 928                 .cmd = FOU_CMD_GET,
 929                 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 930                 .doit = fou_nl_cmd_get_port,
 931                 .dumpit = fou_nl_dump,
 932         },
 933 };
 934 
 935 static struct genl_family fou_nl_family __ro_after_init = {
 936         .hdrsize        = 0,
 937         .name           = FOU_GENL_NAME,
 938         .version        = FOU_GENL_VERSION,
 939         .maxattr        = FOU_ATTR_MAX,
 940         .policy = fou_nl_policy,
 941         .netnsok        = true,
 942         .module         = THIS_MODULE,
 943         .ops            = fou_nl_ops,
 944         .n_ops          = ARRAY_SIZE(fou_nl_ops),
 945 };
 946 
 947 size_t fou_encap_hlen(struct ip_tunnel_encap *e)
 948 {
 949         return sizeof(struct udphdr);
 950 }
 951 EXPORT_SYMBOL(fou_encap_hlen);
 952 
 953 size_t gue_encap_hlen(struct ip_tunnel_encap *e)
 954 {
 955         size_t len;
 956         bool need_priv = false;
 957 
 958         len = sizeof(struct udphdr) + sizeof(struct guehdr);
 959 
 960         if (e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) {
 961                 len += GUE_PLEN_REMCSUM;
 962                 need_priv = true;
 963         }
 964 
 965         len += need_priv ? GUE_LEN_PRIV : 0;
 966 
 967         return len;
 968 }
 969 EXPORT_SYMBOL(gue_encap_hlen);
 970 
 971 int __fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
 972                        u8 *protocol, __be16 *sport, int type)
 973 {
 974         int err;
 975 
 976         err = iptunnel_handle_offloads(skb, type);
 977         if (err)
 978                 return err;
 979 
 980         *sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
 981                                                 skb, 0, 0, false);
 982 
 983         return 0;
 984 }
 985 EXPORT_SYMBOL(__fou_build_header);
 986 
 987 int __gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
 988                        u8 *protocol, __be16 *sport, int type)
 989 {
 990         struct guehdr *guehdr;
 991         size_t hdrlen, optlen = 0;
 992         void *data;
 993         bool need_priv = false;
 994         int err;
 995 
 996         if ((e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) &&
 997             skb->ip_summed == CHECKSUM_PARTIAL) {
 998                 optlen += GUE_PLEN_REMCSUM;
 999                 type |= SKB_GSO_TUNNEL_REMCSUM;
1000                 need_priv = true;
1001         }
1002 
1003         optlen += need_priv ? GUE_LEN_PRIV : 0;
1004 
1005         err = iptunnel_handle_offloads(skb, type);
1006         if (err)
1007                 return err;
1008 
1009         /* Get source port (based on flow hash) before skb_push */
1010         *sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
1011                                                 skb, 0, 0, false);
1012 
1013         hdrlen = sizeof(struct guehdr) + optlen;
1014 
1015         skb_push(skb, hdrlen);
1016 
1017         guehdr = (struct guehdr *)skb->data;
1018 
1019         guehdr->control = 0;
1020         guehdr->version = 0;
1021         guehdr->hlen = optlen >> 2;
1022         guehdr->flags = 0;
1023         guehdr->proto_ctype = *protocol;
1024 
1025         data = &guehdr[1];
1026 
1027         if (need_priv) {
1028                 __be32 *flags = data;
1029 
1030                 guehdr->flags |= GUE_FLAG_PRIV;
1031                 *flags = 0;
1032                 data += GUE_LEN_PRIV;
1033 
1034                 if (type & SKB_GSO_TUNNEL_REMCSUM) {
1035                         u16 csum_start = skb_checksum_start_offset(skb);
1036                         __be16 *pd = data;
1037 
1038                         if (csum_start < hdrlen)
1039                                 return -EINVAL;
1040 
1041                         csum_start -= hdrlen;
1042                         pd[0] = htons(csum_start);
1043                         pd[1] = htons(csum_start + skb->csum_offset);
1044 
1045                         if (!skb_is_gso(skb)) {
1046                                 skb->ip_summed = CHECKSUM_NONE;
1047                                 skb->encapsulation = 0;
1048                         }
1049 
1050                         *flags |= GUE_PFLAG_REMCSUM;
1051                         data += GUE_PLEN_REMCSUM;
1052                 }
1053 
1054         }
1055 
1056         return 0;
1057 }
1058 EXPORT_SYMBOL(__gue_build_header);
1059 
1060 #ifdef CONFIG_NET_FOU_IP_TUNNELS
1061 
1062 static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
1063                           struct flowi4 *fl4, u8 *protocol, __be16 sport)
1064 {
1065         struct udphdr *uh;
1066 
1067         skb_push(skb, sizeof(struct udphdr));
1068         skb_reset_transport_header(skb);
1069 
1070         uh = udp_hdr(skb);
1071 
1072         uh->dest = e->dport;
1073         uh->source = sport;
1074         uh->len = htons(skb->len);
1075         udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
1076                      fl4->saddr, fl4->daddr, skb->len);
1077 
1078         *protocol = IPPROTO_UDP;
1079 }
1080 
1081 static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
1082                             u8 *protocol, struct flowi4 *fl4)
1083 {
1084         int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM :
1085                                                        SKB_GSO_UDP_TUNNEL;
1086         __be16 sport;
1087         int err;
1088 
1089         err = __fou_build_header(skb, e, protocol, &sport, type);
1090         if (err)
1091                 return err;
1092 
1093         fou_build_udp(skb, e, fl4, protocol, sport);
1094 
1095         return 0;
1096 }
1097 
1098 static int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
1099                             u8 *protocol, struct flowi4 *fl4)
1100 {
1101         int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM :
1102                                                        SKB_GSO_UDP_TUNNEL;
1103         __be16 sport;
1104         int err;
1105 
1106         err = __gue_build_header(skb, e, protocol, &sport, type);
1107         if (err)
1108                 return err;
1109 
1110         fou_build_udp(skb, e, fl4, protocol, sport);
1111 
1112         return 0;
1113 }
1114 
1115 static int gue_err_proto_handler(int proto, struct sk_buff *skb, u32 info)
1116 {
1117         const struct net_protocol *ipprot = rcu_dereference(inet_protos[proto]);
1118 
1119         if (ipprot && ipprot->err_handler) {
1120                 if (!ipprot->err_handler(skb, info))
1121                         return 0;
1122         }
1123 
1124         return -ENOENT;
1125 }
1126 
1127 static int gue_err(struct sk_buff *skb, u32 info)
1128 {
1129         int transport_offset = skb_transport_offset(skb);
1130         struct guehdr *guehdr;
1131         size_t len, optlen;
1132         int ret;
1133 
1134         len = sizeof(struct udphdr) + sizeof(struct guehdr);
1135         if (!pskb_may_pull(skb, transport_offset + len))
1136                 return -EINVAL;
1137 
1138         guehdr = (struct guehdr *)&udp_hdr(skb)[1];
1139 
1140         switch (guehdr->version) {
1141         case 0: /* Full GUE header present */
1142                 break;
1143         case 1: {
1144                 /* Direct encapsulation of IPv4 or IPv6 */
1145                 skb_set_transport_header(skb, -(int)sizeof(struct icmphdr));
1146 
1147                 switch (((struct iphdr *)guehdr)->version) {
1148                 case 4:
1149                         ret = gue_err_proto_handler(IPPROTO_IPIP, skb, info);
1150                         goto out;
1151 #if IS_ENABLED(CONFIG_IPV6)
1152                 case 6:
1153                         ret = gue_err_proto_handler(IPPROTO_IPV6, skb, info);
1154                         goto out;
1155 #endif
1156                 default:
1157                         ret = -EOPNOTSUPP;
1158                         goto out;
1159                 }
1160         }
1161         default: /* Undefined version */
1162                 return -EOPNOTSUPP;
1163         }
1164 
1165         if (guehdr->control)
1166                 return -ENOENT;
1167 
1168         optlen = guehdr->hlen << 2;
1169 
1170         if (!pskb_may_pull(skb, transport_offset + len + optlen))
1171                 return -EINVAL;
1172 
1173         guehdr = (struct guehdr *)&udp_hdr(skb)[1];
1174         if (validate_gue_flags(guehdr, optlen))
1175                 return -EINVAL;
1176 
1177         /* Handling exceptions for direct UDP encapsulation in GUE would lead to
1178          * recursion. Besides, this kind of encapsulation can't even be
1179          * configured currently. Discard this.
1180          */
1181         if (guehdr->proto_ctype == IPPROTO_UDP ||
1182             guehdr->proto_ctype == IPPROTO_UDPLITE)
1183                 return -EOPNOTSUPP;
1184 
1185         skb_set_transport_header(skb, -(int)sizeof(struct icmphdr));
1186         ret = gue_err_proto_handler(guehdr->proto_ctype, skb, info);
1187 
1188 out:
1189         skb_set_transport_header(skb, transport_offset);
1190         return ret;
1191 }
1192 
1193 
1194 static const struct ip_tunnel_encap_ops fou_iptun_ops = {
1195         .encap_hlen = fou_encap_hlen,
1196         .build_header = fou_build_header,
1197         .err_handler = gue_err,
1198 };
1199 
1200 static const struct ip_tunnel_encap_ops gue_iptun_ops = {
1201         .encap_hlen = gue_encap_hlen,
1202         .build_header = gue_build_header,
1203         .err_handler = gue_err,
1204 };
1205 
1206 static int ip_tunnel_encap_add_fou_ops(void)
1207 {
1208         int ret;
1209 
1210         ret = ip_tunnel_encap_add_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU);
1211         if (ret < 0) {
1212                 pr_err("can't add fou ops\n");
1213                 return ret;
1214         }
1215 
1216         ret = ip_tunnel_encap_add_ops(&gue_iptun_ops, TUNNEL_ENCAP_GUE);
1217         if (ret < 0) {
1218                 pr_err("can't add gue ops\n");
1219                 ip_tunnel_encap_del_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU);
1220                 return ret;
1221         }
1222 
1223         return 0;
1224 }
1225 
1226 static void ip_tunnel_encap_del_fou_ops(void)
1227 {
1228         ip_tunnel_encap_del_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU);
1229         ip_tunnel_encap_del_ops(&gue_iptun_ops, TUNNEL_ENCAP_GUE);
1230 }
1231 
1232 #else
1233 
1234 static int ip_tunnel_encap_add_fou_ops(void)
1235 {
1236         return 0;
1237 }
1238 
1239 static void ip_tunnel_encap_del_fou_ops(void)
1240 {
1241 }
1242 
1243 #endif
1244 
1245 static __net_init int fou_init_net(struct net *net)
1246 {
1247         struct fou_net *fn = net_generic(net, fou_net_id);
1248 
1249         INIT_LIST_HEAD(&fn->fou_list);
1250         mutex_init(&fn->fou_lock);
1251         return 0;
1252 }
1253 
1254 static __net_exit void fou_exit_net(struct net *net)
1255 {
1256         struct fou_net *fn = net_generic(net, fou_net_id);
1257         struct fou *fou, *next;
1258 
1259         /* Close all the FOU sockets */
1260         mutex_lock(&fn->fou_lock);
1261         list_for_each_entry_safe(fou, next, &fn->fou_list, list)
1262                 fou_release(fou);
1263         mutex_unlock(&fn->fou_lock);
1264 }
1265 
1266 static struct pernet_operations fou_net_ops = {
1267         .init = fou_init_net,
1268         .exit = fou_exit_net,
1269         .id   = &fou_net_id,
1270         .size = sizeof(struct fou_net),
1271 };
1272 
1273 static int __init fou_init(void)
1274 {
1275         int ret;
1276 
1277         ret = register_pernet_device(&fou_net_ops);
1278         if (ret)
1279                 goto exit;
1280 
1281         ret = genl_register_family(&fou_nl_family);
1282         if (ret < 0)
1283                 goto unregister;
1284 
1285         ret = ip_tunnel_encap_add_fou_ops();
1286         if (ret == 0)
1287                 return 0;
1288 
1289         genl_unregister_family(&fou_nl_family);
1290 unregister:
1291         unregister_pernet_device(&fou_net_ops);
1292 exit:
1293         return ret;
1294 }
1295 
1296 static void __exit fou_fini(void)
1297 {
1298         ip_tunnel_encap_del_fou_ops();
1299         genl_unregister_family(&fou_nl_family);
1300         unregister_pernet_device(&fou_net_ops);
1301 }
1302 
1303 module_init(fou_init);
1304 module_exit(fou_fini);
1305 MODULE_AUTHOR("Tom Herbert <therbert@google.com>");
1306 MODULE_LICENSE("GPL");

/* [<][>][^][v][top][bottom][index][help] */