1/* 2 * VXLAN: Virtual eXtensible Local Area Network 3 * 4 * Copyright (c) 2012-2013 Vyatta Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 12 13#include <linux/kernel.h> 14#include <linux/types.h> 15#include <linux/module.h> 16#include <linux/errno.h> 17#include <linux/slab.h> 18#include <linux/skbuff.h> 19#include <linux/rculist.h> 20#include <linux/netdevice.h> 21#include <linux/in.h> 22#include <linux/ip.h> 23#include <linux/udp.h> 24#include <linux/igmp.h> 25#include <linux/etherdevice.h> 26#include <linux/if_ether.h> 27#include <linux/if_vlan.h> 28#include <linux/hash.h> 29#include <linux/ethtool.h> 30#include <net/arp.h> 31#include <net/ndisc.h> 32#include <net/ip.h> 33#include <net/ip_tunnels.h> 34#include <net/icmp.h> 35#include <net/udp.h> 36#include <net/udp_tunnel.h> 37#include <net/rtnetlink.h> 38#include <net/route.h> 39#include <net/dsfield.h> 40#include <net/inet_ecn.h> 41#include <net/net_namespace.h> 42#include <net/netns/generic.h> 43#include <net/vxlan.h> 44#include <net/protocol.h> 45#include <net/udp_tunnel.h> 46#if IS_ENABLED(CONFIG_IPV6) 47#include <net/ipv6.h> 48#include <net/addrconf.h> 49#include <net/ip6_tunnel.h> 50#include <net/ip6_checksum.h> 51#endif 52 53#define VXLAN_VERSION "0.1" 54 55#define PORT_HASH_BITS 8 56#define PORT_HASH_SIZE (1<<PORT_HASH_BITS) 57#define VNI_HASH_BITS 10 58#define VNI_HASH_SIZE (1<<VNI_HASH_BITS) 59#define FDB_HASH_BITS 8 60#define FDB_HASH_SIZE (1<<FDB_HASH_BITS) 61#define FDB_AGE_DEFAULT 300 /* 5 min */ 62#define FDB_AGE_INTERVAL (10 * HZ) /* rescan interval */ 63 64/* UDP port for VXLAN traffic. 65 * The IANA assigned port is 4789, but the Linux default is 8472 66 * for compatibility with early adopters. 67 */ 68static unsigned short vxlan_port __read_mostly = 8472; 69module_param_named(udp_port, vxlan_port, ushort, 0444); 70MODULE_PARM_DESC(udp_port, "Destination UDP port"); 71 72static bool log_ecn_error = true; 73module_param(log_ecn_error, bool, 0644); 74MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 75 76static int vxlan_net_id; 77 78static const u8 all_zeros_mac[ETH_ALEN]; 79 80/* per-network namespace private data for this module */ 81struct vxlan_net { 82 struct list_head vxlan_list; 83 struct hlist_head sock_list[PORT_HASH_SIZE]; 84 spinlock_t sock_lock; 85}; 86 87union vxlan_addr { 88 struct sockaddr_in sin; 89 struct sockaddr_in6 sin6; 90 struct sockaddr sa; 91}; 92 93struct vxlan_rdst { 94 union vxlan_addr remote_ip; 95 __be16 remote_port; 96 u32 remote_vni; 97 u32 remote_ifindex; 98 struct list_head list; 99 struct rcu_head rcu; 100}; 101 102/* Forwarding table entry */ 103struct vxlan_fdb { 104 struct hlist_node hlist; /* linked list of entries */ 105 struct rcu_head rcu; 106 unsigned long updated; /* jiffies */ 107 unsigned long used; 108 struct list_head remotes; 109 u16 state; /* see ndm_state */ 110 u8 flags; /* see ndm_flags */ 111 u8 eth_addr[ETH_ALEN]; 112}; 113 114/* Pseudo network device */ 115struct vxlan_dev { 116 struct hlist_node hlist; /* vni hash table */ 117 struct list_head next; /* vxlan's per namespace list */ 118 struct vxlan_sock *vn_sock; /* listening socket */ 119 struct net_device *dev; 120 struct net *net; /* netns for packet i/o */ 121 struct vxlan_rdst default_dst; /* default destination */ 122 union vxlan_addr saddr; /* source address */ 123 __be16 dst_port; 124 __u16 port_min; /* source port range */ 125 __u16 port_max; 126 __u8 tos; /* TOS override */ 127 __u8 ttl; 128 u32 flags; /* VXLAN_F_* in vxlan.h */ 129 130 unsigned long age_interval; 131 struct timer_list age_timer; 132 spinlock_t hash_lock; 133 unsigned int addrcnt; 134 unsigned int addrmax; 135 136 struct hlist_head fdb_head[FDB_HASH_SIZE]; 137}; 138 139/* salt for hash table */ 140static u32 vxlan_salt __read_mostly; 141static struct workqueue_struct *vxlan_wq; 142 143#if IS_ENABLED(CONFIG_IPV6) 144static inline 145bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b) 146{ 147 if (a->sa.sa_family != b->sa.sa_family) 148 return false; 149 if (a->sa.sa_family == AF_INET6) 150 return ipv6_addr_equal(&a->sin6.sin6_addr, &b->sin6.sin6_addr); 151 else 152 return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr; 153} 154 155static inline bool vxlan_addr_any(const union vxlan_addr *ipa) 156{ 157 if (ipa->sa.sa_family == AF_INET6) 158 return ipv6_addr_any(&ipa->sin6.sin6_addr); 159 else 160 return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY); 161} 162 163static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa) 164{ 165 if (ipa->sa.sa_family == AF_INET6) 166 return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr); 167 else 168 return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr)); 169} 170 171static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla) 172{ 173 if (nla_len(nla) >= sizeof(struct in6_addr)) { 174 ip->sin6.sin6_addr = nla_get_in6_addr(nla); 175 ip->sa.sa_family = AF_INET6; 176 return 0; 177 } else if (nla_len(nla) >= sizeof(__be32)) { 178 ip->sin.sin_addr.s_addr = nla_get_in_addr(nla); 179 ip->sa.sa_family = AF_INET; 180 return 0; 181 } else { 182 return -EAFNOSUPPORT; 183 } 184} 185 186static int vxlan_nla_put_addr(struct sk_buff *skb, int attr, 187 const union vxlan_addr *ip) 188{ 189 if (ip->sa.sa_family == AF_INET6) 190 return nla_put_in6_addr(skb, attr, &ip->sin6.sin6_addr); 191 else 192 return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr); 193} 194 195#else /* !CONFIG_IPV6 */ 196 197static inline 198bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b) 199{ 200 return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr; 201} 202 203static inline bool vxlan_addr_any(const union vxlan_addr *ipa) 204{ 205 return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY); 206} 207 208static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa) 209{ 210 return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr)); 211} 212 213static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla) 214{ 215 if (nla_len(nla) >= sizeof(struct in6_addr)) { 216 return -EAFNOSUPPORT; 217 } else if (nla_len(nla) >= sizeof(__be32)) { 218 ip->sin.sin_addr.s_addr = nla_get_in_addr(nla); 219 ip->sa.sa_family = AF_INET; 220 return 0; 221 } else { 222 return -EAFNOSUPPORT; 223 } 224} 225 226static int vxlan_nla_put_addr(struct sk_buff *skb, int attr, 227 const union vxlan_addr *ip) 228{ 229 return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr); 230} 231#endif 232 233/* Virtual Network hash table head */ 234static inline struct hlist_head *vni_head(struct vxlan_sock *vs, u32 id) 235{ 236 return &vs->vni_list[hash_32(id, VNI_HASH_BITS)]; 237} 238 239/* Socket hash table head */ 240static inline struct hlist_head *vs_head(struct net *net, __be16 port) 241{ 242 struct vxlan_net *vn = net_generic(net, vxlan_net_id); 243 244 return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)]; 245} 246 247/* First remote destination for a forwarding entry. 248 * Guaranteed to be non-NULL because remotes are never deleted. 249 */ 250static inline struct vxlan_rdst *first_remote_rcu(struct vxlan_fdb *fdb) 251{ 252 return list_entry_rcu(fdb->remotes.next, struct vxlan_rdst, list); 253} 254 255static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb) 256{ 257 return list_first_entry(&fdb->remotes, struct vxlan_rdst, list); 258} 259 260/* Find VXLAN socket based on network namespace, address family and UDP port 261 * and enabled unshareable flags. 262 */ 263static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family, 264 __be16 port, u32 flags) 265{ 266 struct vxlan_sock *vs; 267 268 flags &= VXLAN_F_RCV_FLAGS; 269 270 hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) { 271 if (inet_sk(vs->sock->sk)->inet_sport == port && 272 inet_sk(vs->sock->sk)->sk.sk_family == family && 273 vs->flags == flags) 274 return vs; 275 } 276 return NULL; 277} 278 279static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id) 280{ 281 struct vxlan_dev *vxlan; 282 283 hlist_for_each_entry_rcu(vxlan, vni_head(vs, id), hlist) { 284 if (vxlan->default_dst.remote_vni == id) 285 return vxlan; 286 } 287 288 return NULL; 289} 290 291/* Look up VNI in a per net namespace table */ 292static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, 293 sa_family_t family, __be16 port, 294 u32 flags) 295{ 296 struct vxlan_sock *vs; 297 298 vs = vxlan_find_sock(net, family, port, flags); 299 if (!vs) 300 return NULL; 301 302 return vxlan_vs_find_vni(vs, id); 303} 304 305/* Fill in neighbour message in skbuff. */ 306static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, 307 const struct vxlan_fdb *fdb, 308 u32 portid, u32 seq, int type, unsigned int flags, 309 const struct vxlan_rdst *rdst) 310{ 311 unsigned long now = jiffies; 312 struct nda_cacheinfo ci; 313 struct nlmsghdr *nlh; 314 struct ndmsg *ndm; 315 bool send_ip, send_eth; 316 317 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags); 318 if (nlh == NULL) 319 return -EMSGSIZE; 320 321 ndm = nlmsg_data(nlh); 322 memset(ndm, 0, sizeof(*ndm)); 323 324 send_eth = send_ip = true; 325 326 if (type == RTM_GETNEIGH) { 327 ndm->ndm_family = AF_INET; 328 send_ip = !vxlan_addr_any(&rdst->remote_ip); 329 send_eth = !is_zero_ether_addr(fdb->eth_addr); 330 } else 331 ndm->ndm_family = AF_BRIDGE; 332 ndm->ndm_state = fdb->state; 333 ndm->ndm_ifindex = vxlan->dev->ifindex; 334 ndm->ndm_flags = fdb->flags; 335 ndm->ndm_type = RTN_UNICAST; 336 337 if (!net_eq(dev_net(vxlan->dev), vxlan->net) && 338 nla_put_s32(skb, NDA_LINK_NETNSID, 339 peernet2id(dev_net(vxlan->dev), vxlan->net))) 340 goto nla_put_failure; 341 342 if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr)) 343 goto nla_put_failure; 344 345 if (send_ip && vxlan_nla_put_addr(skb, NDA_DST, &rdst->remote_ip)) 346 goto nla_put_failure; 347 348 if (rdst->remote_port && rdst->remote_port != vxlan->dst_port && 349 nla_put_be16(skb, NDA_PORT, rdst->remote_port)) 350 goto nla_put_failure; 351 if (rdst->remote_vni != vxlan->default_dst.remote_vni && 352 nla_put_u32(skb, NDA_VNI, rdst->remote_vni)) 353 goto nla_put_failure; 354 if (rdst->remote_ifindex && 355 nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex)) 356 goto nla_put_failure; 357 358 ci.ndm_used = jiffies_to_clock_t(now - fdb->used); 359 ci.ndm_confirmed = 0; 360 ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated); 361 ci.ndm_refcnt = 0; 362 363 if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) 364 goto nla_put_failure; 365 366 nlmsg_end(skb, nlh); 367 return 0; 368 369nla_put_failure: 370 nlmsg_cancel(skb, nlh); 371 return -EMSGSIZE; 372} 373 374static inline size_t vxlan_nlmsg_size(void) 375{ 376 return NLMSG_ALIGN(sizeof(struct ndmsg)) 377 + nla_total_size(ETH_ALEN) /* NDA_LLADDR */ 378 + nla_total_size(sizeof(struct in6_addr)) /* NDA_DST */ 379 + nla_total_size(sizeof(__be16)) /* NDA_PORT */ 380 + nla_total_size(sizeof(__be32)) /* NDA_VNI */ 381 + nla_total_size(sizeof(__u32)) /* NDA_IFINDEX */ 382 + nla_total_size(sizeof(__s32)) /* NDA_LINK_NETNSID */ 383 + nla_total_size(sizeof(struct nda_cacheinfo)); 384} 385 386static void vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb, 387 struct vxlan_rdst *rd, int type) 388{ 389 struct net *net = dev_net(vxlan->dev); 390 struct sk_buff *skb; 391 int err = -ENOBUFS; 392 393 skb = nlmsg_new(vxlan_nlmsg_size(), GFP_ATOMIC); 394 if (skb == NULL) 395 goto errout; 396 397 err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0, rd); 398 if (err < 0) { 399 /* -EMSGSIZE implies BUG in vxlan_nlmsg_size() */ 400 WARN_ON(err == -EMSGSIZE); 401 kfree_skb(skb); 402 goto errout; 403 } 404 405 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); 406 return; 407errout: 408 if (err < 0) 409 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); 410} 411 412static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa) 413{ 414 struct vxlan_dev *vxlan = netdev_priv(dev); 415 struct vxlan_fdb f = { 416 .state = NUD_STALE, 417 }; 418 struct vxlan_rdst remote = { 419 .remote_ip = *ipa, /* goes to NDA_DST */ 420 .remote_vni = VXLAN_N_VID, 421 }; 422 423 vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH); 424} 425 426static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN]) 427{ 428 struct vxlan_fdb f = { 429 .state = NUD_STALE, 430 }; 431 struct vxlan_rdst remote = { }; 432 433 memcpy(f.eth_addr, eth_addr, ETH_ALEN); 434 435 vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH); 436} 437 438/* Hash Ethernet address */ 439static u32 eth_hash(const unsigned char *addr) 440{ 441 u64 value = get_unaligned((u64 *)addr); 442 443 /* only want 6 bytes */ 444#ifdef __BIG_ENDIAN 445 value >>= 16; 446#else 447 value <<= 16; 448#endif 449 return hash_64(value, FDB_HASH_BITS); 450} 451 452/* Hash chain to use given mac address */ 453static inline struct hlist_head *vxlan_fdb_head(struct vxlan_dev *vxlan, 454 const u8 *mac) 455{ 456 return &vxlan->fdb_head[eth_hash(mac)]; 457} 458 459/* Look up Ethernet address in forwarding table */ 460static struct vxlan_fdb *__vxlan_find_mac(struct vxlan_dev *vxlan, 461 const u8 *mac) 462{ 463 struct hlist_head *head = vxlan_fdb_head(vxlan, mac); 464 struct vxlan_fdb *f; 465 466 hlist_for_each_entry_rcu(f, head, hlist) { 467 if (ether_addr_equal(mac, f->eth_addr)) 468 return f; 469 } 470 471 return NULL; 472} 473 474static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan, 475 const u8 *mac) 476{ 477 struct vxlan_fdb *f; 478 479 f = __vxlan_find_mac(vxlan, mac); 480 if (f) 481 f->used = jiffies; 482 483 return f; 484} 485 486/* caller should hold vxlan->hash_lock */ 487static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f, 488 union vxlan_addr *ip, __be16 port, 489 __u32 vni, __u32 ifindex) 490{ 491 struct vxlan_rdst *rd; 492 493 list_for_each_entry(rd, &f->remotes, list) { 494 if (vxlan_addr_equal(&rd->remote_ip, ip) && 495 rd->remote_port == port && 496 rd->remote_vni == vni && 497 rd->remote_ifindex == ifindex) 498 return rd; 499 } 500 501 return NULL; 502} 503 504/* Replace destination of unicast mac */ 505static int vxlan_fdb_replace(struct vxlan_fdb *f, 506 union vxlan_addr *ip, __be16 port, __u32 vni, __u32 ifindex) 507{ 508 struct vxlan_rdst *rd; 509 510 rd = vxlan_fdb_find_rdst(f, ip, port, vni, ifindex); 511 if (rd) 512 return 0; 513 514 rd = list_first_entry_or_null(&f->remotes, struct vxlan_rdst, list); 515 if (!rd) 516 return 0; 517 rd->remote_ip = *ip; 518 rd->remote_port = port; 519 rd->remote_vni = vni; 520 rd->remote_ifindex = ifindex; 521 return 1; 522} 523 524/* Add/update destinations for multicast */ 525static int vxlan_fdb_append(struct vxlan_fdb *f, 526 union vxlan_addr *ip, __be16 port, __u32 vni, 527 __u32 ifindex, struct vxlan_rdst **rdp) 528{ 529 struct vxlan_rdst *rd; 530 531 rd = vxlan_fdb_find_rdst(f, ip, port, vni, ifindex); 532 if (rd) 533 return 0; 534 535 rd = kmalloc(sizeof(*rd), GFP_ATOMIC); 536 if (rd == NULL) 537 return -ENOBUFS; 538 rd->remote_ip = *ip; 539 rd->remote_port = port; 540 rd->remote_vni = vni; 541 rd->remote_ifindex = ifindex; 542 543 list_add_tail_rcu(&rd->list, &f->remotes); 544 545 *rdp = rd; 546 return 1; 547} 548 549static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb, 550 unsigned int off, 551 struct vxlanhdr *vh, size_t hdrlen, 552 u32 data, struct gro_remcsum *grc, 553 bool nopartial) 554{ 555 size_t start, offset, plen; 556 557 if (skb->remcsum_offload) 558 return NULL; 559 560 if (!NAPI_GRO_CB(skb)->csum_valid) 561 return NULL; 562 563 start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT; 564 offset = start + ((data & VXLAN_RCO_UDP) ? 565 offsetof(struct udphdr, check) : 566 offsetof(struct tcphdr, check)); 567 568 plen = hdrlen + offset + sizeof(u16); 569 570 /* Pull checksum that will be written */ 571 if (skb_gro_header_hard(skb, off + plen)) { 572 vh = skb_gro_header_slow(skb, off + plen, off); 573 if (!vh) 574 return NULL; 575 } 576 577 skb_gro_remcsum_process(skb, (void *)vh + hdrlen, 578 start, offset, grc, nopartial); 579 580 skb->remcsum_offload = 1; 581 582 return vh; 583} 584 585static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, 586 struct sk_buff *skb, 587 struct udp_offload *uoff) 588{ 589 struct sk_buff *p, **pp = NULL; 590 struct vxlanhdr *vh, *vh2; 591 unsigned int hlen, off_vx; 592 int flush = 1; 593 struct vxlan_sock *vs = container_of(uoff, struct vxlan_sock, 594 udp_offloads); 595 u32 flags; 596 struct gro_remcsum grc; 597 598 skb_gro_remcsum_init(&grc); 599 600 off_vx = skb_gro_offset(skb); 601 hlen = off_vx + sizeof(*vh); 602 vh = skb_gro_header_fast(skb, off_vx); 603 if (skb_gro_header_hard(skb, hlen)) { 604 vh = skb_gro_header_slow(skb, hlen, off_vx); 605 if (unlikely(!vh)) 606 goto out; 607 } 608 609 skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */ 610 skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr)); 611 612 flags = ntohl(vh->vx_flags); 613 614 if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) { 615 vh = vxlan_gro_remcsum(skb, off_vx, vh, sizeof(struct vxlanhdr), 616 ntohl(vh->vx_vni), &grc, 617 !!(vs->flags & 618 VXLAN_F_REMCSUM_NOPARTIAL)); 619 620 if (!vh) 621 goto out; 622 } 623 624 flush = 0; 625 626 for (p = *head; p; p = p->next) { 627 if (!NAPI_GRO_CB(p)->same_flow) 628 continue; 629 630 vh2 = (struct vxlanhdr *)(p->data + off_vx); 631 if (vh->vx_flags != vh2->vx_flags || 632 vh->vx_vni != vh2->vx_vni) { 633 NAPI_GRO_CB(p)->same_flow = 0; 634 continue; 635 } 636 } 637 638 pp = eth_gro_receive(head, skb); 639 640out: 641 skb_gro_remcsum_cleanup(skb, &grc); 642 NAPI_GRO_CB(skb)->flush |= flush; 643 644 return pp; 645} 646 647static int vxlan_gro_complete(struct sk_buff *skb, int nhoff, 648 struct udp_offload *uoff) 649{ 650 udp_tunnel_gro_complete(skb, nhoff); 651 652 return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr)); 653} 654 655/* Notify netdevs that UDP port started listening */ 656static void vxlan_notify_add_rx_port(struct vxlan_sock *vs) 657{ 658 struct net_device *dev; 659 struct sock *sk = vs->sock->sk; 660 struct net *net = sock_net(sk); 661 sa_family_t sa_family = sk->sk_family; 662 __be16 port = inet_sk(sk)->inet_sport; 663 int err; 664 665 if (sa_family == AF_INET) { 666 err = udp_add_offload(&vs->udp_offloads); 667 if (err) 668 pr_warn("vxlan: udp_add_offload failed with status %d\n", err); 669 } 670 671 rcu_read_lock(); 672 for_each_netdev_rcu(net, dev) { 673 if (dev->netdev_ops->ndo_add_vxlan_port) 674 dev->netdev_ops->ndo_add_vxlan_port(dev, sa_family, 675 port); 676 } 677 rcu_read_unlock(); 678} 679 680/* Notify netdevs that UDP port is no more listening */ 681static void vxlan_notify_del_rx_port(struct vxlan_sock *vs) 682{ 683 struct net_device *dev; 684 struct sock *sk = vs->sock->sk; 685 struct net *net = sock_net(sk); 686 sa_family_t sa_family = sk->sk_family; 687 __be16 port = inet_sk(sk)->inet_sport; 688 689 rcu_read_lock(); 690 for_each_netdev_rcu(net, dev) { 691 if (dev->netdev_ops->ndo_del_vxlan_port) 692 dev->netdev_ops->ndo_del_vxlan_port(dev, sa_family, 693 port); 694 } 695 rcu_read_unlock(); 696 697 if (sa_family == AF_INET) 698 udp_del_offload(&vs->udp_offloads); 699} 700 701/* Add new entry to forwarding table -- assumes lock held */ 702static int vxlan_fdb_create(struct vxlan_dev *vxlan, 703 const u8 *mac, union vxlan_addr *ip, 704 __u16 state, __u16 flags, 705 __be16 port, __u32 vni, __u32 ifindex, 706 __u8 ndm_flags) 707{ 708 struct vxlan_rdst *rd = NULL; 709 struct vxlan_fdb *f; 710 int notify = 0; 711 712 f = __vxlan_find_mac(vxlan, mac); 713 if (f) { 714 if (flags & NLM_F_EXCL) { 715 netdev_dbg(vxlan->dev, 716 "lost race to create %pM\n", mac); 717 return -EEXIST; 718 } 719 if (f->state != state) { 720 f->state = state; 721 f->updated = jiffies; 722 notify = 1; 723 } 724 if (f->flags != ndm_flags) { 725 f->flags = ndm_flags; 726 f->updated = jiffies; 727 notify = 1; 728 } 729 if ((flags & NLM_F_REPLACE)) { 730 /* Only change unicasts */ 731 if (!(is_multicast_ether_addr(f->eth_addr) || 732 is_zero_ether_addr(f->eth_addr))) { 733 notify |= vxlan_fdb_replace(f, ip, port, vni, 734 ifindex); 735 } else 736 return -EOPNOTSUPP; 737 } 738 if ((flags & NLM_F_APPEND) && 739 (is_multicast_ether_addr(f->eth_addr) || 740 is_zero_ether_addr(f->eth_addr))) { 741 int rc = vxlan_fdb_append(f, ip, port, vni, ifindex, 742 &rd); 743 744 if (rc < 0) 745 return rc; 746 notify |= rc; 747 } 748 } else { 749 if (!(flags & NLM_F_CREATE)) 750 return -ENOENT; 751 752 if (vxlan->addrmax && vxlan->addrcnt >= vxlan->addrmax) 753 return -ENOSPC; 754 755 /* Disallow replace to add a multicast entry */ 756 if ((flags & NLM_F_REPLACE) && 757 (is_multicast_ether_addr(mac) || is_zero_ether_addr(mac))) 758 return -EOPNOTSUPP; 759 760 netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip); 761 f = kmalloc(sizeof(*f), GFP_ATOMIC); 762 if (!f) 763 return -ENOMEM; 764 765 notify = 1; 766 f->state = state; 767 f->flags = ndm_flags; 768 f->updated = f->used = jiffies; 769 INIT_LIST_HEAD(&f->remotes); 770 memcpy(f->eth_addr, mac, ETH_ALEN); 771 772 vxlan_fdb_append(f, ip, port, vni, ifindex, &rd); 773 774 ++vxlan->addrcnt; 775 hlist_add_head_rcu(&f->hlist, 776 vxlan_fdb_head(vxlan, mac)); 777 } 778 779 if (notify) { 780 if (rd == NULL) 781 rd = first_remote_rtnl(f); 782 vxlan_fdb_notify(vxlan, f, rd, RTM_NEWNEIGH); 783 } 784 785 return 0; 786} 787 788static void vxlan_fdb_free(struct rcu_head *head) 789{ 790 struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu); 791 struct vxlan_rdst *rd, *nd; 792 793 list_for_each_entry_safe(rd, nd, &f->remotes, list) 794 kfree(rd); 795 kfree(f); 796} 797 798static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f) 799{ 800 netdev_dbg(vxlan->dev, 801 "delete %pM\n", f->eth_addr); 802 803 --vxlan->addrcnt; 804 vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH); 805 806 hlist_del_rcu(&f->hlist); 807 call_rcu(&f->rcu, vxlan_fdb_free); 808} 809 810static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan, 811 union vxlan_addr *ip, __be16 *port, u32 *vni, u32 *ifindex) 812{ 813 struct net *net = dev_net(vxlan->dev); 814 int err; 815 816 if (tb[NDA_DST]) { 817 err = vxlan_nla_get_addr(ip, tb[NDA_DST]); 818 if (err) 819 return err; 820 } else { 821 union vxlan_addr *remote = &vxlan->default_dst.remote_ip; 822 if (remote->sa.sa_family == AF_INET) { 823 ip->sin.sin_addr.s_addr = htonl(INADDR_ANY); 824 ip->sa.sa_family = AF_INET; 825#if IS_ENABLED(CONFIG_IPV6) 826 } else { 827 ip->sin6.sin6_addr = in6addr_any; 828 ip->sa.sa_family = AF_INET6; 829#endif 830 } 831 } 832 833 if (tb[NDA_PORT]) { 834 if (nla_len(tb[NDA_PORT]) != sizeof(__be16)) 835 return -EINVAL; 836 *port = nla_get_be16(tb[NDA_PORT]); 837 } else { 838 *port = vxlan->dst_port; 839 } 840 841 if (tb[NDA_VNI]) { 842 if (nla_len(tb[NDA_VNI]) != sizeof(u32)) 843 return -EINVAL; 844 *vni = nla_get_u32(tb[NDA_VNI]); 845 } else { 846 *vni = vxlan->default_dst.remote_vni; 847 } 848 849 if (tb[NDA_IFINDEX]) { 850 struct net_device *tdev; 851 852 if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32)) 853 return -EINVAL; 854 *ifindex = nla_get_u32(tb[NDA_IFINDEX]); 855 tdev = __dev_get_by_index(net, *ifindex); 856 if (!tdev) 857 return -EADDRNOTAVAIL; 858 } else { 859 *ifindex = 0; 860 } 861 862 return 0; 863} 864 865/* Add static entry (via netlink) */ 866static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], 867 struct net_device *dev, 868 const unsigned char *addr, u16 vid, u16 flags) 869{ 870 struct vxlan_dev *vxlan = netdev_priv(dev); 871 /* struct net *net = dev_net(vxlan->dev); */ 872 union vxlan_addr ip; 873 __be16 port; 874 u32 vni, ifindex; 875 int err; 876 877 if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) { 878 pr_info("RTM_NEWNEIGH with invalid state %#x\n", 879 ndm->ndm_state); 880 return -EINVAL; 881 } 882 883 if (tb[NDA_DST] == NULL) 884 return -EINVAL; 885 886 err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni, &ifindex); 887 if (err) 888 return err; 889 890 if (vxlan->default_dst.remote_ip.sa.sa_family != ip.sa.sa_family) 891 return -EAFNOSUPPORT; 892 893 spin_lock_bh(&vxlan->hash_lock); 894 err = vxlan_fdb_create(vxlan, addr, &ip, ndm->ndm_state, flags, 895 port, vni, ifindex, ndm->ndm_flags); 896 spin_unlock_bh(&vxlan->hash_lock); 897 898 return err; 899} 900 901/* Delete entry (via netlink) */ 902static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], 903 struct net_device *dev, 904 const unsigned char *addr, u16 vid) 905{ 906 struct vxlan_dev *vxlan = netdev_priv(dev); 907 struct vxlan_fdb *f; 908 struct vxlan_rdst *rd = NULL; 909 union vxlan_addr ip; 910 __be16 port; 911 u32 vni, ifindex; 912 int err; 913 914 err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni, &ifindex); 915 if (err) 916 return err; 917 918 err = -ENOENT; 919 920 spin_lock_bh(&vxlan->hash_lock); 921 f = vxlan_find_mac(vxlan, addr); 922 if (!f) 923 goto out; 924 925 if (!vxlan_addr_any(&ip)) { 926 rd = vxlan_fdb_find_rdst(f, &ip, port, vni, ifindex); 927 if (!rd) 928 goto out; 929 } 930 931 err = 0; 932 933 /* remove a destination if it's not the only one on the list, 934 * otherwise destroy the fdb entry 935 */ 936 if (rd && !list_is_singular(&f->remotes)) { 937 list_del_rcu(&rd->list); 938 vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH); 939 kfree_rcu(rd, rcu); 940 goto out; 941 } 942 943 vxlan_fdb_destroy(vxlan, f); 944 945out: 946 spin_unlock_bh(&vxlan->hash_lock); 947 948 return err; 949} 950 951/* Dump forwarding table */ 952static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, 953 struct net_device *dev, 954 struct net_device *filter_dev, int idx) 955{ 956 struct vxlan_dev *vxlan = netdev_priv(dev); 957 unsigned int h; 958 959 for (h = 0; h < FDB_HASH_SIZE; ++h) { 960 struct vxlan_fdb *f; 961 int err; 962 963 hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) { 964 struct vxlan_rdst *rd; 965 966 if (idx < cb->args[0]) 967 goto skip; 968 969 list_for_each_entry_rcu(rd, &f->remotes, list) { 970 err = vxlan_fdb_info(skb, vxlan, f, 971 NETLINK_CB(cb->skb).portid, 972 cb->nlh->nlmsg_seq, 973 RTM_NEWNEIGH, 974 NLM_F_MULTI, rd); 975 if (err < 0) 976 goto out; 977 } 978skip: 979 ++idx; 980 } 981 } 982out: 983 return idx; 984} 985 986/* Watch incoming packets to learn mapping between Ethernet address 987 * and Tunnel endpoint. 988 * Return true if packet is bogus and should be dropped. 989 */ 990static bool vxlan_snoop(struct net_device *dev, 991 union vxlan_addr *src_ip, const u8 *src_mac) 992{ 993 struct vxlan_dev *vxlan = netdev_priv(dev); 994 struct vxlan_fdb *f; 995 996 f = vxlan_find_mac(vxlan, src_mac); 997 if (likely(f)) { 998 struct vxlan_rdst *rdst = first_remote_rcu(f); 999 1000 if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip))) 1001 return false; 1002 1003 /* Don't migrate static entries, drop packets */ 1004 if (f->state & NUD_NOARP) 1005 return true; 1006 1007 if (net_ratelimit()) 1008 netdev_info(dev, 1009 "%pM migrated from %pIS to %pIS\n", 1010 src_mac, &rdst->remote_ip.sa, &src_ip->sa); 1011 1012 rdst->remote_ip = *src_ip; 1013 f->updated = jiffies; 1014 vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH); 1015 } else { 1016 /* learned new entry */ 1017 spin_lock(&vxlan->hash_lock); 1018 1019 /* close off race between vxlan_flush and incoming packets */ 1020 if (netif_running(dev)) 1021 vxlan_fdb_create(vxlan, src_mac, src_ip, 1022 NUD_REACHABLE, 1023 NLM_F_EXCL|NLM_F_CREATE, 1024 vxlan->dst_port, 1025 vxlan->default_dst.remote_vni, 1026 0, NTF_SELF); 1027 spin_unlock(&vxlan->hash_lock); 1028 } 1029 1030 return false; 1031} 1032 1033/* See if multicast group is already in use by other ID */ 1034static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev) 1035{ 1036 struct vxlan_dev *vxlan; 1037 1038 /* The vxlan_sock is only used by dev, leaving group has 1039 * no effect on other vxlan devices. 1040 */ 1041 if (atomic_read(&dev->vn_sock->refcnt) == 1) 1042 return false; 1043 1044 list_for_each_entry(vxlan, &vn->vxlan_list, next) { 1045 if (!netif_running(vxlan->dev) || vxlan == dev) 1046 continue; 1047 1048 if (vxlan->vn_sock != dev->vn_sock) 1049 continue; 1050 1051 if (!vxlan_addr_equal(&vxlan->default_dst.remote_ip, 1052 &dev->default_dst.remote_ip)) 1053 continue; 1054 1055 if (vxlan->default_dst.remote_ifindex != 1056 dev->default_dst.remote_ifindex) 1057 continue; 1058 1059 return true; 1060 } 1061 1062 return false; 1063} 1064 1065void vxlan_sock_release(struct vxlan_sock *vs) 1066{ 1067 struct sock *sk = vs->sock->sk; 1068 struct net *net = sock_net(sk); 1069 struct vxlan_net *vn = net_generic(net, vxlan_net_id); 1070 1071 if (!atomic_dec_and_test(&vs->refcnt)) 1072 return; 1073 1074 spin_lock(&vn->sock_lock); 1075 hlist_del_rcu(&vs->hlist); 1076 vxlan_notify_del_rx_port(vs); 1077 spin_unlock(&vn->sock_lock); 1078 1079 queue_work(vxlan_wq, &vs->del_work); 1080} 1081EXPORT_SYMBOL_GPL(vxlan_sock_release); 1082 1083/* Update multicast group membership when first VNI on 1084 * multicast address is brought up 1085 */ 1086static int vxlan_igmp_join(struct vxlan_dev *vxlan) 1087{ 1088 struct vxlan_sock *vs = vxlan->vn_sock; 1089 struct sock *sk = vs->sock->sk; 1090 union vxlan_addr *ip = &vxlan->default_dst.remote_ip; 1091 int ifindex = vxlan->default_dst.remote_ifindex; 1092 int ret = -EINVAL; 1093 1094 lock_sock(sk); 1095 if (ip->sa.sa_family == AF_INET) { 1096 struct ip_mreqn mreq = { 1097 .imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr, 1098 .imr_ifindex = ifindex, 1099 }; 1100 1101 ret = ip_mc_join_group(sk, &mreq); 1102#if IS_ENABLED(CONFIG_IPV6) 1103 } else { 1104 ret = ipv6_stub->ipv6_sock_mc_join(sk, ifindex, 1105 &ip->sin6.sin6_addr); 1106#endif 1107 } 1108 release_sock(sk); 1109 1110 return ret; 1111} 1112 1113/* Inverse of vxlan_igmp_join when last VNI is brought down */ 1114static int vxlan_igmp_leave(struct vxlan_dev *vxlan) 1115{ 1116 struct vxlan_sock *vs = vxlan->vn_sock; 1117 struct sock *sk = vs->sock->sk; 1118 union vxlan_addr *ip = &vxlan->default_dst.remote_ip; 1119 int ifindex = vxlan->default_dst.remote_ifindex; 1120 int ret = -EINVAL; 1121 1122 lock_sock(sk); 1123 if (ip->sa.sa_family == AF_INET) { 1124 struct ip_mreqn mreq = { 1125 .imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr, 1126 .imr_ifindex = ifindex, 1127 }; 1128 1129 ret = ip_mc_leave_group(sk, &mreq); 1130#if IS_ENABLED(CONFIG_IPV6) 1131 } else { 1132 ret = ipv6_stub->ipv6_sock_mc_drop(sk, ifindex, 1133 &ip->sin6.sin6_addr); 1134#endif 1135 } 1136 release_sock(sk); 1137 1138 return ret; 1139} 1140 1141static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh, 1142 size_t hdrlen, u32 data, bool nopartial) 1143{ 1144 size_t start, offset, plen; 1145 1146 start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT; 1147 offset = start + ((data & VXLAN_RCO_UDP) ? 1148 offsetof(struct udphdr, check) : 1149 offsetof(struct tcphdr, check)); 1150 1151 plen = hdrlen + offset + sizeof(u16); 1152 1153 if (!pskb_may_pull(skb, plen)) 1154 return NULL; 1155 1156 vh = (struct vxlanhdr *)(udp_hdr(skb) + 1); 1157 1158 skb_remcsum_process(skb, (void *)vh + hdrlen, start, offset, 1159 nopartial); 1160 1161 return vh; 1162} 1163 1164/* Callback from net/ipv4/udp.c to receive packets */ 1165static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) 1166{ 1167 struct vxlan_sock *vs; 1168 struct vxlanhdr *vxh; 1169 u32 flags, vni; 1170 struct vxlan_metadata md = {0}; 1171 1172 /* Need Vxlan and inner Ethernet header to be present */ 1173 if (!pskb_may_pull(skb, VXLAN_HLEN)) 1174 goto error; 1175 1176 vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1); 1177 flags = ntohl(vxh->vx_flags); 1178 vni = ntohl(vxh->vx_vni); 1179 1180 if (flags & VXLAN_HF_VNI) { 1181 flags &= ~VXLAN_HF_VNI; 1182 } else { 1183 /* VNI flag always required to be set */ 1184 goto bad_flags; 1185 } 1186 1187 if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB))) 1188 goto drop; 1189 vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1); 1190 1191 vs = rcu_dereference_sk_user_data(sk); 1192 if (!vs) 1193 goto drop; 1194 1195 if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) { 1196 vxh = vxlan_remcsum(skb, vxh, sizeof(struct vxlanhdr), vni, 1197 !!(vs->flags & VXLAN_F_REMCSUM_NOPARTIAL)); 1198 if (!vxh) 1199 goto drop; 1200 1201 flags &= ~VXLAN_HF_RCO; 1202 vni &= VXLAN_VNI_MASK; 1203 } 1204 1205 /* For backwards compatibility, only allow reserved fields to be 1206 * used by VXLAN extensions if explicitly requested. 1207 */ 1208 if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) { 1209 struct vxlanhdr_gbp *gbp; 1210 1211 gbp = (struct vxlanhdr_gbp *)vxh; 1212 md.gbp = ntohs(gbp->policy_id); 1213 1214 if (gbp->dont_learn) 1215 md.gbp |= VXLAN_GBP_DONT_LEARN; 1216 1217 if (gbp->policy_applied) 1218 md.gbp |= VXLAN_GBP_POLICY_APPLIED; 1219 1220 flags &= ~VXLAN_GBP_USED_BITS; 1221 } 1222 1223 if (flags || vni & ~VXLAN_VNI_MASK) { 1224 /* If there are any unprocessed flags remaining treat 1225 * this as a malformed packet. This behavior diverges from 1226 * VXLAN RFC (RFC7348) which stipulates that bits in reserved 1227 * in reserved fields are to be ignored. The approach here 1228 * maintains compatibility with previous stack code, and also 1229 * is more robust and provides a little more security in 1230 * adding extensions to VXLAN. 1231 */ 1232 1233 goto bad_flags; 1234 } 1235 1236 md.vni = vxh->vx_vni; 1237 vs->rcv(vs, skb, &md); 1238 return 0; 1239 1240drop: 1241 /* Consume bad packet */ 1242 kfree_skb(skb); 1243 return 0; 1244 1245bad_flags: 1246 netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n", 1247 ntohl(vxh->vx_flags), ntohl(vxh->vx_vni)); 1248 1249error: 1250 /* Return non vxlan pkt */ 1251 return 1; 1252} 1253 1254static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, 1255 struct vxlan_metadata *md) 1256{ 1257 struct iphdr *oip = NULL; 1258 struct ipv6hdr *oip6 = NULL; 1259 struct vxlan_dev *vxlan; 1260 struct pcpu_sw_netstats *stats; 1261 union vxlan_addr saddr; 1262 __u32 vni; 1263 int err = 0; 1264 union vxlan_addr *remote_ip; 1265 1266 vni = ntohl(md->vni) >> 8; 1267 /* Is this VNI defined? */ 1268 vxlan = vxlan_vs_find_vni(vs, vni); 1269 if (!vxlan) 1270 goto drop; 1271 1272 remote_ip = &vxlan->default_dst.remote_ip; 1273 skb_reset_mac_header(skb); 1274 skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev))); 1275 skb->protocol = eth_type_trans(skb, vxlan->dev); 1276 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 1277 1278 /* Ignore packet loops (and multicast echo) */ 1279 if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr)) 1280 goto drop; 1281 1282 /* Re-examine inner Ethernet packet */ 1283 if (remote_ip->sa.sa_family == AF_INET) { 1284 oip = ip_hdr(skb); 1285 saddr.sin.sin_addr.s_addr = oip->saddr; 1286 saddr.sa.sa_family = AF_INET; 1287#if IS_ENABLED(CONFIG_IPV6) 1288 } else { 1289 oip6 = ipv6_hdr(skb); 1290 saddr.sin6.sin6_addr = oip6->saddr; 1291 saddr.sa.sa_family = AF_INET6; 1292#endif 1293 } 1294 1295 if ((vxlan->flags & VXLAN_F_LEARN) && 1296 vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source)) 1297 goto drop; 1298 1299 skb_reset_network_header(skb); 1300 skb->mark = md->gbp; 1301 1302 if (oip6) 1303 err = IP6_ECN_decapsulate(oip6, skb); 1304 if (oip) 1305 err = IP_ECN_decapsulate(oip, skb); 1306 1307 if (unlikely(err)) { 1308 if (log_ecn_error) { 1309 if (oip6) 1310 net_info_ratelimited("non-ECT from %pI6\n", 1311 &oip6->saddr); 1312 if (oip) 1313 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", 1314 &oip->saddr, oip->tos); 1315 } 1316 if (err > 1) { 1317 ++vxlan->dev->stats.rx_frame_errors; 1318 ++vxlan->dev->stats.rx_errors; 1319 goto drop; 1320 } 1321 } 1322 1323 stats = this_cpu_ptr(vxlan->dev->tstats); 1324 u64_stats_update_begin(&stats->syncp); 1325 stats->rx_packets++; 1326 stats->rx_bytes += skb->len; 1327 u64_stats_update_end(&stats->syncp); 1328 1329 netif_rx(skb); 1330 1331 return; 1332drop: 1333 /* Consume bad packet */ 1334 kfree_skb(skb); 1335} 1336 1337static int arp_reduce(struct net_device *dev, struct sk_buff *skb) 1338{ 1339 struct vxlan_dev *vxlan = netdev_priv(dev); 1340 struct arphdr *parp; 1341 u8 *arpptr, *sha; 1342 __be32 sip, tip; 1343 struct neighbour *n; 1344 1345 if (dev->flags & IFF_NOARP) 1346 goto out; 1347 1348 if (!pskb_may_pull(skb, arp_hdr_len(dev))) { 1349 dev->stats.tx_dropped++; 1350 goto out; 1351 } 1352 parp = arp_hdr(skb); 1353 1354 if ((parp->ar_hrd != htons(ARPHRD_ETHER) && 1355 parp->ar_hrd != htons(ARPHRD_IEEE802)) || 1356 parp->ar_pro != htons(ETH_P_IP) || 1357 parp->ar_op != htons(ARPOP_REQUEST) || 1358 parp->ar_hln != dev->addr_len || 1359 parp->ar_pln != 4) 1360 goto out; 1361 arpptr = (u8 *)parp + sizeof(struct arphdr); 1362 sha = arpptr; 1363 arpptr += dev->addr_len; /* sha */ 1364 memcpy(&sip, arpptr, sizeof(sip)); 1365 arpptr += sizeof(sip); 1366 arpptr += dev->addr_len; /* tha */ 1367 memcpy(&tip, arpptr, sizeof(tip)); 1368 1369 if (ipv4_is_loopback(tip) || 1370 ipv4_is_multicast(tip)) 1371 goto out; 1372 1373 n = neigh_lookup(&arp_tbl, &tip, dev); 1374 1375 if (n) { 1376 struct vxlan_fdb *f; 1377 struct sk_buff *reply; 1378 1379 if (!(n->nud_state & NUD_CONNECTED)) { 1380 neigh_release(n); 1381 goto out; 1382 } 1383 1384 f = vxlan_find_mac(vxlan, n->ha); 1385 if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) { 1386 /* bridge-local neighbor */ 1387 neigh_release(n); 1388 goto out; 1389 } 1390 1391 reply = arp_create(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha, 1392 n->ha, sha); 1393 1394 neigh_release(n); 1395 1396 if (reply == NULL) 1397 goto out; 1398 1399 skb_reset_mac_header(reply); 1400 __skb_pull(reply, skb_network_offset(reply)); 1401 reply->ip_summed = CHECKSUM_UNNECESSARY; 1402 reply->pkt_type = PACKET_HOST; 1403 1404 if (netif_rx_ni(reply) == NET_RX_DROP) 1405 dev->stats.rx_dropped++; 1406 } else if (vxlan->flags & VXLAN_F_L3MISS) { 1407 union vxlan_addr ipa = { 1408 .sin.sin_addr.s_addr = tip, 1409 .sin.sin_family = AF_INET, 1410 }; 1411 1412 vxlan_ip_miss(dev, &ipa); 1413 } 1414out: 1415 consume_skb(skb); 1416 return NETDEV_TX_OK; 1417} 1418 1419#if IS_ENABLED(CONFIG_IPV6) 1420static struct sk_buff *vxlan_na_create(struct sk_buff *request, 1421 struct neighbour *n, bool isrouter) 1422{ 1423 struct net_device *dev = request->dev; 1424 struct sk_buff *reply; 1425 struct nd_msg *ns, *na; 1426 struct ipv6hdr *pip6; 1427 u8 *daddr; 1428 int na_olen = 8; /* opt hdr + ETH_ALEN for target */ 1429 int ns_olen; 1430 int i, len; 1431 1432 if (dev == NULL) 1433 return NULL; 1434 1435 len = LL_RESERVED_SPACE(dev) + sizeof(struct ipv6hdr) + 1436 sizeof(*na) + na_olen + dev->needed_tailroom; 1437 reply = alloc_skb(len, GFP_ATOMIC); 1438 if (reply == NULL) 1439 return NULL; 1440 1441 reply->protocol = htons(ETH_P_IPV6); 1442 reply->dev = dev; 1443 skb_reserve(reply, LL_RESERVED_SPACE(request->dev)); 1444 skb_push(reply, sizeof(struct ethhdr)); 1445 skb_set_mac_header(reply, 0); 1446 1447 ns = (struct nd_msg *)skb_transport_header(request); 1448 1449 daddr = eth_hdr(request)->h_source; 1450 ns_olen = request->len - skb_transport_offset(request) - sizeof(*ns); 1451 for (i = 0; i < ns_olen-1; i += (ns->opt[i+1]<<3)) { 1452 if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) { 1453 daddr = ns->opt + i + sizeof(struct nd_opt_hdr); 1454 break; 1455 } 1456 } 1457 1458 /* Ethernet header */ 1459 ether_addr_copy(eth_hdr(reply)->h_dest, daddr); 1460 ether_addr_copy(eth_hdr(reply)->h_source, n->ha); 1461 eth_hdr(reply)->h_proto = htons(ETH_P_IPV6); 1462 reply->protocol = htons(ETH_P_IPV6); 1463 1464 skb_pull(reply, sizeof(struct ethhdr)); 1465 skb_set_network_header(reply, 0); 1466 skb_put(reply, sizeof(struct ipv6hdr)); 1467 1468 /* IPv6 header */ 1469 1470 pip6 = ipv6_hdr(reply); 1471 memset(pip6, 0, sizeof(struct ipv6hdr)); 1472 pip6->version = 6; 1473 pip6->priority = ipv6_hdr(request)->priority; 1474 pip6->nexthdr = IPPROTO_ICMPV6; 1475 pip6->hop_limit = 255; 1476 pip6->daddr = ipv6_hdr(request)->saddr; 1477 pip6->saddr = *(struct in6_addr *)n->primary_key; 1478 1479 skb_pull(reply, sizeof(struct ipv6hdr)); 1480 skb_set_transport_header(reply, 0); 1481 1482 na = (struct nd_msg *)skb_put(reply, sizeof(*na) + na_olen); 1483 1484 /* Neighbor Advertisement */ 1485 memset(na, 0, sizeof(*na)+na_olen); 1486 na->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT; 1487 na->icmph.icmp6_router = isrouter; 1488 na->icmph.icmp6_override = 1; 1489 na->icmph.icmp6_solicited = 1; 1490 na->target = ns->target; 1491 ether_addr_copy(&na->opt[2], n->ha); 1492 na->opt[0] = ND_OPT_TARGET_LL_ADDR; 1493 na->opt[1] = na_olen >> 3; 1494 1495 na->icmph.icmp6_cksum = csum_ipv6_magic(&pip6->saddr, 1496 &pip6->daddr, sizeof(*na)+na_olen, IPPROTO_ICMPV6, 1497 csum_partial(na, sizeof(*na)+na_olen, 0)); 1498 1499 pip6->payload_len = htons(sizeof(*na)+na_olen); 1500 1501 skb_push(reply, sizeof(struct ipv6hdr)); 1502 1503 reply->ip_summed = CHECKSUM_UNNECESSARY; 1504 1505 return reply; 1506} 1507 1508static int neigh_reduce(struct net_device *dev, struct sk_buff *skb) 1509{ 1510 struct vxlan_dev *vxlan = netdev_priv(dev); 1511 struct nd_msg *msg; 1512 const struct ipv6hdr *iphdr; 1513 const struct in6_addr *saddr, *daddr; 1514 struct neighbour *n; 1515 struct inet6_dev *in6_dev; 1516 1517 in6_dev = __in6_dev_get(dev); 1518 if (!in6_dev) 1519 goto out; 1520 1521 iphdr = ipv6_hdr(skb); 1522 saddr = &iphdr->saddr; 1523 daddr = &iphdr->daddr; 1524 1525 msg = (struct nd_msg *)skb_transport_header(skb); 1526 if (msg->icmph.icmp6_code != 0 || 1527 msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) 1528 goto out; 1529 1530 if (ipv6_addr_loopback(daddr) || 1531 ipv6_addr_is_multicast(&msg->target)) 1532 goto out; 1533 1534 n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, dev); 1535 1536 if (n) { 1537 struct vxlan_fdb *f; 1538 struct sk_buff *reply; 1539 1540 if (!(n->nud_state & NUD_CONNECTED)) { 1541 neigh_release(n); 1542 goto out; 1543 } 1544 1545 f = vxlan_find_mac(vxlan, n->ha); 1546 if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) { 1547 /* bridge-local neighbor */ 1548 neigh_release(n); 1549 goto out; 1550 } 1551 1552 reply = vxlan_na_create(skb, n, 1553 !!(f ? f->flags & NTF_ROUTER : 0)); 1554 1555 neigh_release(n); 1556 1557 if (reply == NULL) 1558 goto out; 1559 1560 if (netif_rx_ni(reply) == NET_RX_DROP) 1561 dev->stats.rx_dropped++; 1562 1563 } else if (vxlan->flags & VXLAN_F_L3MISS) { 1564 union vxlan_addr ipa = { 1565 .sin6.sin6_addr = msg->target, 1566 .sin6.sin6_family = AF_INET6, 1567 }; 1568 1569 vxlan_ip_miss(dev, &ipa); 1570 } 1571 1572out: 1573 consume_skb(skb); 1574 return NETDEV_TX_OK; 1575} 1576#endif 1577 1578static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) 1579{ 1580 struct vxlan_dev *vxlan = netdev_priv(dev); 1581 struct neighbour *n; 1582 1583 if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) 1584 return false; 1585 1586 n = NULL; 1587 switch (ntohs(eth_hdr(skb)->h_proto)) { 1588 case ETH_P_IP: 1589 { 1590 struct iphdr *pip; 1591 1592 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 1593 return false; 1594 pip = ip_hdr(skb); 1595 n = neigh_lookup(&arp_tbl, &pip->daddr, dev); 1596 if (!n && (vxlan->flags & VXLAN_F_L3MISS)) { 1597 union vxlan_addr ipa = { 1598 .sin.sin_addr.s_addr = pip->daddr, 1599 .sin.sin_family = AF_INET, 1600 }; 1601 1602 vxlan_ip_miss(dev, &ipa); 1603 return false; 1604 } 1605 1606 break; 1607 } 1608#if IS_ENABLED(CONFIG_IPV6) 1609 case ETH_P_IPV6: 1610 { 1611 struct ipv6hdr *pip6; 1612 1613 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 1614 return false; 1615 pip6 = ipv6_hdr(skb); 1616 n = neigh_lookup(ipv6_stub->nd_tbl, &pip6->daddr, dev); 1617 if (!n && (vxlan->flags & VXLAN_F_L3MISS)) { 1618 union vxlan_addr ipa = { 1619 .sin6.sin6_addr = pip6->daddr, 1620 .sin6.sin6_family = AF_INET6, 1621 }; 1622 1623 vxlan_ip_miss(dev, &ipa); 1624 return false; 1625 } 1626 1627 break; 1628 } 1629#endif 1630 default: 1631 return false; 1632 } 1633 1634 if (n) { 1635 bool diff; 1636 1637 diff = !ether_addr_equal(eth_hdr(skb)->h_dest, n->ha); 1638 if (diff) { 1639 memcpy(eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, 1640 dev->addr_len); 1641 memcpy(eth_hdr(skb)->h_dest, n->ha, dev->addr_len); 1642 } 1643 neigh_release(n); 1644 return diff; 1645 } 1646 1647 return false; 1648} 1649 1650static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags, 1651 struct vxlan_metadata *md) 1652{ 1653 struct vxlanhdr_gbp *gbp; 1654 1655 if (!md->gbp) 1656 return; 1657 1658 gbp = (struct vxlanhdr_gbp *)vxh; 1659 vxh->vx_flags |= htonl(VXLAN_HF_GBP); 1660 1661 if (md->gbp & VXLAN_GBP_DONT_LEARN) 1662 gbp->dont_learn = 1; 1663 1664 if (md->gbp & VXLAN_GBP_POLICY_APPLIED) 1665 gbp->policy_applied = 1; 1666 1667 gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK); 1668} 1669 1670#if IS_ENABLED(CONFIG_IPV6) 1671static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk, 1672 struct sk_buff *skb, 1673 struct net_device *dev, struct in6_addr *saddr, 1674 struct in6_addr *daddr, __u8 prio, __u8 ttl, 1675 __be16 src_port, __be16 dst_port, 1676 struct vxlan_metadata *md, bool xnet, u32 vxflags) 1677{ 1678 struct vxlanhdr *vxh; 1679 int min_headroom; 1680 int err; 1681 bool udp_sum = !(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX); 1682 int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; 1683 u16 hdrlen = sizeof(struct vxlanhdr); 1684 1685 if ((vxflags & VXLAN_F_REMCSUM_TX) && 1686 skb->ip_summed == CHECKSUM_PARTIAL) { 1687 int csum_start = skb_checksum_start_offset(skb); 1688 1689 if (csum_start <= VXLAN_MAX_REMCSUM_START && 1690 !(csum_start & VXLAN_RCO_SHIFT_MASK) && 1691 (skb->csum_offset == offsetof(struct udphdr, check) || 1692 skb->csum_offset == offsetof(struct tcphdr, check))) { 1693 udp_sum = false; 1694 type |= SKB_GSO_TUNNEL_REMCSUM; 1695 } 1696 } 1697 1698 skb_scrub_packet(skb, xnet); 1699 1700 min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len 1701 + VXLAN_HLEN + sizeof(struct ipv6hdr) 1702 + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); 1703 1704 /* Need space for new headers (invalidates iph ptr) */ 1705 err = skb_cow_head(skb, min_headroom); 1706 if (unlikely(err)) { 1707 kfree_skb(skb); 1708 goto err; 1709 } 1710 1711 skb = vlan_hwaccel_push_inside(skb); 1712 if (WARN_ON(!skb)) { 1713 err = -ENOMEM; 1714 goto err; 1715 } 1716 1717 skb = iptunnel_handle_offloads(skb, udp_sum, type); 1718 if (IS_ERR(skb)) { 1719 err = -EINVAL; 1720 goto err; 1721 } 1722 1723 vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); 1724 vxh->vx_flags = htonl(VXLAN_HF_VNI); 1725 vxh->vx_vni = md->vni; 1726 1727 if (type & SKB_GSO_TUNNEL_REMCSUM) { 1728 u32 data = (skb_checksum_start_offset(skb) - hdrlen) >> 1729 VXLAN_RCO_SHIFT; 1730 1731 if (skb->csum_offset == offsetof(struct udphdr, check)) 1732 data |= VXLAN_RCO_UDP; 1733 1734 vxh->vx_vni |= htonl(data); 1735 vxh->vx_flags |= htonl(VXLAN_HF_RCO); 1736 1737 if (!skb_is_gso(skb)) { 1738 skb->ip_summed = CHECKSUM_NONE; 1739 skb->encapsulation = 0; 1740 } 1741 } 1742 1743 if (vxflags & VXLAN_F_GBP) 1744 vxlan_build_gbp_hdr(vxh, vxflags, md); 1745 1746 skb_set_inner_protocol(skb, htons(ETH_P_TEB)); 1747 1748 udp_tunnel6_xmit_skb(dst, sk, skb, dev, saddr, daddr, prio, 1749 ttl, src_port, dst_port, 1750 !!(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX)); 1751 return 0; 1752err: 1753 dst_release(dst); 1754 return err; 1755} 1756#endif 1757 1758int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, 1759 __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, 1760 __be16 src_port, __be16 dst_port, 1761 struct vxlan_metadata *md, bool xnet, u32 vxflags) 1762{ 1763 struct vxlanhdr *vxh; 1764 int min_headroom; 1765 int err; 1766 bool udp_sum = !!(vxflags & VXLAN_F_UDP_CSUM); 1767 int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; 1768 u16 hdrlen = sizeof(struct vxlanhdr); 1769 1770 if ((vxflags & VXLAN_F_REMCSUM_TX) && 1771 skb->ip_summed == CHECKSUM_PARTIAL) { 1772 int csum_start = skb_checksum_start_offset(skb); 1773 1774 if (csum_start <= VXLAN_MAX_REMCSUM_START && 1775 !(csum_start & VXLAN_RCO_SHIFT_MASK) && 1776 (skb->csum_offset == offsetof(struct udphdr, check) || 1777 skb->csum_offset == offsetof(struct tcphdr, check))) { 1778 udp_sum = false; 1779 type |= SKB_GSO_TUNNEL_REMCSUM; 1780 } 1781 } 1782 1783 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len 1784 + VXLAN_HLEN + sizeof(struct iphdr) 1785 + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); 1786 1787 /* Need space for new headers (invalidates iph ptr) */ 1788 err = skb_cow_head(skb, min_headroom); 1789 if (unlikely(err)) { 1790 kfree_skb(skb); 1791 return err; 1792 } 1793 1794 skb = vlan_hwaccel_push_inside(skb); 1795 if (WARN_ON(!skb)) 1796 return -ENOMEM; 1797 1798 skb = iptunnel_handle_offloads(skb, udp_sum, type); 1799 if (IS_ERR(skb)) 1800 return PTR_ERR(skb); 1801 1802 vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); 1803 vxh->vx_flags = htonl(VXLAN_HF_VNI); 1804 vxh->vx_vni = md->vni; 1805 1806 if (type & SKB_GSO_TUNNEL_REMCSUM) { 1807 u32 data = (skb_checksum_start_offset(skb) - hdrlen) >> 1808 VXLAN_RCO_SHIFT; 1809 1810 if (skb->csum_offset == offsetof(struct udphdr, check)) 1811 data |= VXLAN_RCO_UDP; 1812 1813 vxh->vx_vni |= htonl(data); 1814 vxh->vx_flags |= htonl(VXLAN_HF_RCO); 1815 1816 if (!skb_is_gso(skb)) { 1817 skb->ip_summed = CHECKSUM_NONE; 1818 skb->encapsulation = 0; 1819 } 1820 } 1821 1822 if (vxflags & VXLAN_F_GBP) 1823 vxlan_build_gbp_hdr(vxh, vxflags, md); 1824 1825 skb_set_inner_protocol(skb, htons(ETH_P_TEB)); 1826 1827 return udp_tunnel_xmit_skb(rt, sk, skb, src, dst, tos, 1828 ttl, df, src_port, dst_port, xnet, 1829 !(vxflags & VXLAN_F_UDP_CSUM)); 1830} 1831EXPORT_SYMBOL_GPL(vxlan_xmit_skb); 1832 1833/* Bypass encapsulation if the destination is local */ 1834static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, 1835 struct vxlan_dev *dst_vxlan) 1836{ 1837 struct pcpu_sw_netstats *tx_stats, *rx_stats; 1838 union vxlan_addr loopback; 1839 union vxlan_addr *remote_ip = &dst_vxlan->default_dst.remote_ip; 1840 struct net_device *dev = skb->dev; 1841 int len = skb->len; 1842 1843 tx_stats = this_cpu_ptr(src_vxlan->dev->tstats); 1844 rx_stats = this_cpu_ptr(dst_vxlan->dev->tstats); 1845 skb->pkt_type = PACKET_HOST; 1846 skb->encapsulation = 0; 1847 skb->dev = dst_vxlan->dev; 1848 __skb_pull(skb, skb_network_offset(skb)); 1849 1850 if (remote_ip->sa.sa_family == AF_INET) { 1851 loopback.sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); 1852 loopback.sa.sa_family = AF_INET; 1853#if IS_ENABLED(CONFIG_IPV6) 1854 } else { 1855 loopback.sin6.sin6_addr = in6addr_loopback; 1856 loopback.sa.sa_family = AF_INET6; 1857#endif 1858 } 1859 1860 if (dst_vxlan->flags & VXLAN_F_LEARN) 1861 vxlan_snoop(skb->dev, &loopback, eth_hdr(skb)->h_source); 1862 1863 u64_stats_update_begin(&tx_stats->syncp); 1864 tx_stats->tx_packets++; 1865 tx_stats->tx_bytes += len; 1866 u64_stats_update_end(&tx_stats->syncp); 1867 1868 if (netif_rx(skb) == NET_RX_SUCCESS) { 1869 u64_stats_update_begin(&rx_stats->syncp); 1870 rx_stats->rx_packets++; 1871 rx_stats->rx_bytes += len; 1872 u64_stats_update_end(&rx_stats->syncp); 1873 } else { 1874 dev->stats.rx_dropped++; 1875 } 1876} 1877 1878static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, 1879 struct vxlan_rdst *rdst, bool did_rsc) 1880{ 1881 struct vxlan_dev *vxlan = netdev_priv(dev); 1882 struct sock *sk = vxlan->vn_sock->sock->sk; 1883 struct rtable *rt = NULL; 1884 const struct iphdr *old_iph; 1885 struct flowi4 fl4; 1886 union vxlan_addr *dst; 1887 struct vxlan_metadata md; 1888 __be16 src_port = 0, dst_port; 1889 u32 vni; 1890 __be16 df = 0; 1891 __u8 tos, ttl; 1892 int err; 1893 1894 dst_port = rdst->remote_port ? rdst->remote_port : vxlan->dst_port; 1895 vni = rdst->remote_vni; 1896 dst = &rdst->remote_ip; 1897 1898 if (vxlan_addr_any(dst)) { 1899 if (did_rsc) { 1900 /* short-circuited back to local bridge */ 1901 vxlan_encap_bypass(skb, vxlan, vxlan); 1902 return; 1903 } 1904 goto drop; 1905 } 1906 1907 old_iph = ip_hdr(skb); 1908 1909 ttl = vxlan->ttl; 1910 if (!ttl && vxlan_addr_multicast(dst)) 1911 ttl = 1; 1912 1913 tos = vxlan->tos; 1914 if (tos == 1) 1915 tos = ip_tunnel_get_dsfield(old_iph, skb); 1916 1917 src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->port_min, 1918 vxlan->port_max, true); 1919 1920 if (dst->sa.sa_family == AF_INET) { 1921 memset(&fl4, 0, sizeof(fl4)); 1922 fl4.flowi4_oif = rdst->remote_ifindex; 1923 fl4.flowi4_tos = RT_TOS(tos); 1924 fl4.daddr = dst->sin.sin_addr.s_addr; 1925 fl4.saddr = vxlan->saddr.sin.sin_addr.s_addr; 1926 1927 rt = ip_route_output_key(vxlan->net, &fl4); 1928 if (IS_ERR(rt)) { 1929 netdev_dbg(dev, "no route to %pI4\n", 1930 &dst->sin.sin_addr.s_addr); 1931 dev->stats.tx_carrier_errors++; 1932 goto tx_error; 1933 } 1934 1935 if (rt->dst.dev == dev) { 1936 netdev_dbg(dev, "circular route to %pI4\n", 1937 &dst->sin.sin_addr.s_addr); 1938 dev->stats.collisions++; 1939 goto rt_tx_error; 1940 } 1941 1942 /* Bypass encapsulation if the destination is local */ 1943 if (rt->rt_flags & RTCF_LOCAL && 1944 !(rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) { 1945 struct vxlan_dev *dst_vxlan; 1946 1947 ip_rt_put(rt); 1948 dst_vxlan = vxlan_find_vni(vxlan->net, vni, 1949 dst->sa.sa_family, dst_port, 1950 vxlan->flags); 1951 if (!dst_vxlan) 1952 goto tx_error; 1953 vxlan_encap_bypass(skb, vxlan, dst_vxlan); 1954 return; 1955 } 1956 1957 tos = ip_tunnel_ecn_encap(tos, old_iph, skb); 1958 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); 1959 md.vni = htonl(vni << 8); 1960 md.gbp = skb->mark; 1961 1962 err = vxlan_xmit_skb(rt, sk, skb, fl4.saddr, 1963 dst->sin.sin_addr.s_addr, tos, ttl, df, 1964 src_port, dst_port, &md, 1965 !net_eq(vxlan->net, dev_net(vxlan->dev)), 1966 vxlan->flags); 1967 if (err < 0) { 1968 /* skb is already freed. */ 1969 skb = NULL; 1970 goto rt_tx_error; 1971 } 1972 1973 iptunnel_xmit_stats(err, &dev->stats, dev->tstats); 1974#if IS_ENABLED(CONFIG_IPV6) 1975 } else { 1976 struct dst_entry *ndst; 1977 struct flowi6 fl6; 1978 u32 flags; 1979 1980 memset(&fl6, 0, sizeof(fl6)); 1981 fl6.flowi6_oif = rdst->remote_ifindex; 1982 fl6.daddr = dst->sin6.sin6_addr; 1983 fl6.saddr = vxlan->saddr.sin6.sin6_addr; 1984 fl6.flowi6_proto = IPPROTO_UDP; 1985 1986 if (ipv6_stub->ipv6_dst_lookup(sk, &ndst, &fl6)) { 1987 netdev_dbg(dev, "no route to %pI6\n", 1988 &dst->sin6.sin6_addr); 1989 dev->stats.tx_carrier_errors++; 1990 goto tx_error; 1991 } 1992 1993 if (ndst->dev == dev) { 1994 netdev_dbg(dev, "circular route to %pI6\n", 1995 &dst->sin6.sin6_addr); 1996 dst_release(ndst); 1997 dev->stats.collisions++; 1998 goto tx_error; 1999 } 2000 2001 /* Bypass encapsulation if the destination is local */ 2002 flags = ((struct rt6_info *)ndst)->rt6i_flags; 2003 if (flags & RTF_LOCAL && 2004 !(flags & (RTCF_BROADCAST | RTCF_MULTICAST))) { 2005 struct vxlan_dev *dst_vxlan; 2006 2007 dst_release(ndst); 2008 dst_vxlan = vxlan_find_vni(vxlan->net, vni, 2009 dst->sa.sa_family, dst_port, 2010 vxlan->flags); 2011 if (!dst_vxlan) 2012 goto tx_error; 2013 vxlan_encap_bypass(skb, vxlan, dst_vxlan); 2014 return; 2015 } 2016 2017 ttl = ttl ? : ip6_dst_hoplimit(ndst); 2018 md.vni = htonl(vni << 8); 2019 md.gbp = skb->mark; 2020 2021 err = vxlan6_xmit_skb(ndst, sk, skb, dev, &fl6.saddr, &fl6.daddr, 2022 0, ttl, src_port, dst_port, &md, 2023 !net_eq(vxlan->net, dev_net(vxlan->dev)), 2024 vxlan->flags); 2025#endif 2026 } 2027 2028 return; 2029 2030drop: 2031 dev->stats.tx_dropped++; 2032 goto tx_free; 2033 2034rt_tx_error: 2035 ip_rt_put(rt); 2036tx_error: 2037 dev->stats.tx_errors++; 2038tx_free: 2039 dev_kfree_skb(skb); 2040} 2041 2042/* Transmit local packets over Vxlan 2043 * 2044 * Outer IP header inherits ECN and DF from inner header. 2045 * Outer UDP destination is the VXLAN assigned port. 2046 * source port is based on hash of flow 2047 */ 2048static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) 2049{ 2050 struct vxlan_dev *vxlan = netdev_priv(dev); 2051 struct ethhdr *eth; 2052 bool did_rsc = false; 2053 struct vxlan_rdst *rdst, *fdst = NULL; 2054 struct vxlan_fdb *f; 2055 2056 skb_reset_mac_header(skb); 2057 eth = eth_hdr(skb); 2058 2059 if ((vxlan->flags & VXLAN_F_PROXY)) { 2060 if (ntohs(eth->h_proto) == ETH_P_ARP) 2061 return arp_reduce(dev, skb); 2062#if IS_ENABLED(CONFIG_IPV6) 2063 else if (ntohs(eth->h_proto) == ETH_P_IPV6 && 2064 pskb_may_pull(skb, sizeof(struct ipv6hdr) 2065 + sizeof(struct nd_msg)) && 2066 ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) { 2067 struct nd_msg *msg; 2068 2069 msg = (struct nd_msg *)skb_transport_header(skb); 2070 if (msg->icmph.icmp6_code == 0 && 2071 msg->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) 2072 return neigh_reduce(dev, skb); 2073 } 2074 eth = eth_hdr(skb); 2075#endif 2076 } 2077 2078 f = vxlan_find_mac(vxlan, eth->h_dest); 2079 did_rsc = false; 2080 2081 if (f && (f->flags & NTF_ROUTER) && (vxlan->flags & VXLAN_F_RSC) && 2082 (ntohs(eth->h_proto) == ETH_P_IP || 2083 ntohs(eth->h_proto) == ETH_P_IPV6)) { 2084 did_rsc = route_shortcircuit(dev, skb); 2085 if (did_rsc) 2086 f = vxlan_find_mac(vxlan, eth->h_dest); 2087 } 2088 2089 if (f == NULL) { 2090 f = vxlan_find_mac(vxlan, all_zeros_mac); 2091 if (f == NULL) { 2092 if ((vxlan->flags & VXLAN_F_L2MISS) && 2093 !is_multicast_ether_addr(eth->h_dest)) 2094 vxlan_fdb_miss(vxlan, eth->h_dest); 2095 2096 dev->stats.tx_dropped++; 2097 kfree_skb(skb); 2098 return NETDEV_TX_OK; 2099 } 2100 } 2101 2102 list_for_each_entry_rcu(rdst, &f->remotes, list) { 2103 struct sk_buff *skb1; 2104 2105 if (!fdst) { 2106 fdst = rdst; 2107 continue; 2108 } 2109 skb1 = skb_clone(skb, GFP_ATOMIC); 2110 if (skb1) 2111 vxlan_xmit_one(skb1, dev, rdst, did_rsc); 2112 } 2113 2114 if (fdst) 2115 vxlan_xmit_one(skb, dev, fdst, did_rsc); 2116 else 2117 kfree_skb(skb); 2118 return NETDEV_TX_OK; 2119} 2120 2121/* Walk the forwarding table and purge stale entries */ 2122static void vxlan_cleanup(unsigned long arg) 2123{ 2124 struct vxlan_dev *vxlan = (struct vxlan_dev *) arg; 2125 unsigned long next_timer = jiffies + FDB_AGE_INTERVAL; 2126 unsigned int h; 2127 2128 if (!netif_running(vxlan->dev)) 2129 return; 2130 2131 spin_lock_bh(&vxlan->hash_lock); 2132 for (h = 0; h < FDB_HASH_SIZE; ++h) { 2133 struct hlist_node *p, *n; 2134 hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) { 2135 struct vxlan_fdb *f 2136 = container_of(p, struct vxlan_fdb, hlist); 2137 unsigned long timeout; 2138 2139 if (f->state & NUD_PERMANENT) 2140 continue; 2141 2142 timeout = f->used + vxlan->age_interval * HZ; 2143 if (time_before_eq(timeout, jiffies)) { 2144 netdev_dbg(vxlan->dev, 2145 "garbage collect %pM\n", 2146 f->eth_addr); 2147 f->state = NUD_STALE; 2148 vxlan_fdb_destroy(vxlan, f); 2149 } else if (time_before(timeout, next_timer)) 2150 next_timer = timeout; 2151 } 2152 } 2153 spin_unlock_bh(&vxlan->hash_lock); 2154 2155 mod_timer(&vxlan->age_timer, next_timer); 2156} 2157 2158static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan) 2159{ 2160 struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); 2161 __u32 vni = vxlan->default_dst.remote_vni; 2162 2163 vxlan->vn_sock = vs; 2164 spin_lock(&vn->sock_lock); 2165 hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni)); 2166 spin_unlock(&vn->sock_lock); 2167} 2168 2169/* Setup stats when device is created */ 2170static int vxlan_init(struct net_device *dev) 2171{ 2172 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); 2173 if (!dev->tstats) 2174 return -ENOMEM; 2175 2176 return 0; 2177} 2178 2179static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan) 2180{ 2181 struct vxlan_fdb *f; 2182 2183 spin_lock_bh(&vxlan->hash_lock); 2184 f = __vxlan_find_mac(vxlan, all_zeros_mac); 2185 if (f) 2186 vxlan_fdb_destroy(vxlan, f); 2187 spin_unlock_bh(&vxlan->hash_lock); 2188} 2189 2190static void vxlan_uninit(struct net_device *dev) 2191{ 2192 struct vxlan_dev *vxlan = netdev_priv(dev); 2193 2194 vxlan_fdb_delete_default(vxlan); 2195 2196 free_percpu(dev->tstats); 2197} 2198 2199/* Start ageing timer and join group when device is brought up */ 2200static int vxlan_open(struct net_device *dev) 2201{ 2202 struct vxlan_dev *vxlan = netdev_priv(dev); 2203 struct vxlan_sock *vs; 2204 int ret = 0; 2205 2206 vs = vxlan_sock_add(vxlan->net, vxlan->dst_port, vxlan_rcv, NULL, 2207 false, vxlan->flags); 2208 if (IS_ERR(vs)) 2209 return PTR_ERR(vs); 2210 2211 vxlan_vs_add_dev(vs, vxlan); 2212 2213 if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) { 2214 ret = vxlan_igmp_join(vxlan); 2215 if (ret == -EADDRINUSE) 2216 ret = 0; 2217 if (ret) { 2218 vxlan_sock_release(vs); 2219 return ret; 2220 } 2221 } 2222 2223 if (vxlan->age_interval) 2224 mod_timer(&vxlan->age_timer, jiffies + FDB_AGE_INTERVAL); 2225 2226 return ret; 2227} 2228 2229/* Purge the forwarding table */ 2230static void vxlan_flush(struct vxlan_dev *vxlan) 2231{ 2232 unsigned int h; 2233 2234 spin_lock_bh(&vxlan->hash_lock); 2235 for (h = 0; h < FDB_HASH_SIZE; ++h) { 2236 struct hlist_node *p, *n; 2237 hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) { 2238 struct vxlan_fdb *f 2239 = container_of(p, struct vxlan_fdb, hlist); 2240 /* the all_zeros_mac entry is deleted at vxlan_uninit */ 2241 if (!is_zero_ether_addr(f->eth_addr)) 2242 vxlan_fdb_destroy(vxlan, f); 2243 } 2244 } 2245 spin_unlock_bh(&vxlan->hash_lock); 2246} 2247 2248/* Cleanup timer and forwarding table on shutdown */ 2249static int vxlan_stop(struct net_device *dev) 2250{ 2251 struct vxlan_dev *vxlan = netdev_priv(dev); 2252 struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); 2253 struct vxlan_sock *vs = vxlan->vn_sock; 2254 int ret = 0; 2255 2256 if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip) && 2257 !vxlan_group_used(vn, vxlan)) 2258 ret = vxlan_igmp_leave(vxlan); 2259 2260 del_timer_sync(&vxlan->age_timer); 2261 2262 vxlan_flush(vxlan); 2263 vxlan_sock_release(vs); 2264 2265 return ret; 2266} 2267 2268/* Stub, nothing needs to be done. */ 2269static void vxlan_set_multicast_list(struct net_device *dev) 2270{ 2271} 2272 2273static int vxlan_change_mtu(struct net_device *dev, int new_mtu) 2274{ 2275 struct vxlan_dev *vxlan = netdev_priv(dev); 2276 struct vxlan_rdst *dst = &vxlan->default_dst; 2277 struct net_device *lowerdev; 2278 int max_mtu; 2279 2280 lowerdev = __dev_get_by_index(vxlan->net, dst->remote_ifindex); 2281 if (lowerdev == NULL) 2282 return eth_change_mtu(dev, new_mtu); 2283 2284 if (dst->remote_ip.sa.sa_family == AF_INET6) 2285 max_mtu = lowerdev->mtu - VXLAN6_HEADROOM; 2286 else 2287 max_mtu = lowerdev->mtu - VXLAN_HEADROOM; 2288 2289 if (new_mtu < 68 || new_mtu > max_mtu) 2290 return -EINVAL; 2291 2292 dev->mtu = new_mtu; 2293 return 0; 2294} 2295 2296static const struct net_device_ops vxlan_netdev_ops = { 2297 .ndo_init = vxlan_init, 2298 .ndo_uninit = vxlan_uninit, 2299 .ndo_open = vxlan_open, 2300 .ndo_stop = vxlan_stop, 2301 .ndo_start_xmit = vxlan_xmit, 2302 .ndo_get_stats64 = ip_tunnel_get_stats64, 2303 .ndo_set_rx_mode = vxlan_set_multicast_list, 2304 .ndo_change_mtu = vxlan_change_mtu, 2305 .ndo_validate_addr = eth_validate_addr, 2306 .ndo_set_mac_address = eth_mac_addr, 2307 .ndo_fdb_add = vxlan_fdb_add, 2308 .ndo_fdb_del = vxlan_fdb_delete, 2309 .ndo_fdb_dump = vxlan_fdb_dump, 2310}; 2311 2312/* Info for udev, that this is a virtual tunnel endpoint */ 2313static struct device_type vxlan_type = { 2314 .name = "vxlan", 2315}; 2316 2317/* Calls the ndo_add_vxlan_port of the caller in order to 2318 * supply the listening VXLAN udp ports. Callers are expected 2319 * to implement the ndo_add_vxlan_port. 2320 */ 2321void vxlan_get_rx_port(struct net_device *dev) 2322{ 2323 struct vxlan_sock *vs; 2324 struct net *net = dev_net(dev); 2325 struct vxlan_net *vn = net_generic(net, vxlan_net_id); 2326 sa_family_t sa_family; 2327 __be16 port; 2328 unsigned int i; 2329 2330 spin_lock(&vn->sock_lock); 2331 for (i = 0; i < PORT_HASH_SIZE; ++i) { 2332 hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) { 2333 port = inet_sk(vs->sock->sk)->inet_sport; 2334 sa_family = vs->sock->sk->sk_family; 2335 dev->netdev_ops->ndo_add_vxlan_port(dev, sa_family, 2336 port); 2337 } 2338 } 2339 spin_unlock(&vn->sock_lock); 2340} 2341EXPORT_SYMBOL_GPL(vxlan_get_rx_port); 2342 2343/* Initialize the device structure. */ 2344static void vxlan_setup(struct net_device *dev) 2345{ 2346 struct vxlan_dev *vxlan = netdev_priv(dev); 2347 unsigned int h; 2348 2349 eth_hw_addr_random(dev); 2350 ether_setup(dev); 2351 if (vxlan->default_dst.remote_ip.sa.sa_family == AF_INET6) 2352 dev->needed_headroom = ETH_HLEN + VXLAN6_HEADROOM; 2353 else 2354 dev->needed_headroom = ETH_HLEN + VXLAN_HEADROOM; 2355 2356 dev->netdev_ops = &vxlan_netdev_ops; 2357 dev->destructor = free_netdev; 2358 SET_NETDEV_DEVTYPE(dev, &vxlan_type); 2359 2360 dev->tx_queue_len = 0; 2361 dev->features |= NETIF_F_LLTX; 2362 dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; 2363 dev->features |= NETIF_F_RXCSUM; 2364 dev->features |= NETIF_F_GSO_SOFTWARE; 2365 2366 dev->vlan_features = dev->features; 2367 dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; 2368 dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; 2369 dev->hw_features |= NETIF_F_GSO_SOFTWARE; 2370 dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; 2371 netif_keep_dst(dev); 2372 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 2373 2374 INIT_LIST_HEAD(&vxlan->next); 2375 spin_lock_init(&vxlan->hash_lock); 2376 2377 init_timer_deferrable(&vxlan->age_timer); 2378 vxlan->age_timer.function = vxlan_cleanup; 2379 vxlan->age_timer.data = (unsigned long) vxlan; 2380 2381 vxlan->dst_port = htons(vxlan_port); 2382 2383 vxlan->dev = dev; 2384 2385 for (h = 0; h < FDB_HASH_SIZE; ++h) 2386 INIT_HLIST_HEAD(&vxlan->fdb_head[h]); 2387} 2388 2389static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { 2390 [IFLA_VXLAN_ID] = { .type = NLA_U32 }, 2391 [IFLA_VXLAN_GROUP] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, 2392 [IFLA_VXLAN_GROUP6] = { .len = sizeof(struct in6_addr) }, 2393 [IFLA_VXLAN_LINK] = { .type = NLA_U32 }, 2394 [IFLA_VXLAN_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, 2395 [IFLA_VXLAN_LOCAL6] = { .len = sizeof(struct in6_addr) }, 2396 [IFLA_VXLAN_TOS] = { .type = NLA_U8 }, 2397 [IFLA_VXLAN_TTL] = { .type = NLA_U8 }, 2398 [IFLA_VXLAN_LEARNING] = { .type = NLA_U8 }, 2399 [IFLA_VXLAN_AGEING] = { .type = NLA_U32 }, 2400 [IFLA_VXLAN_LIMIT] = { .type = NLA_U32 }, 2401 [IFLA_VXLAN_PORT_RANGE] = { .len = sizeof(struct ifla_vxlan_port_range) }, 2402 [IFLA_VXLAN_PROXY] = { .type = NLA_U8 }, 2403 [IFLA_VXLAN_RSC] = { .type = NLA_U8 }, 2404 [IFLA_VXLAN_L2MISS] = { .type = NLA_U8 }, 2405 [IFLA_VXLAN_L3MISS] = { .type = NLA_U8 }, 2406 [IFLA_VXLAN_PORT] = { .type = NLA_U16 }, 2407 [IFLA_VXLAN_UDP_CSUM] = { .type = NLA_U8 }, 2408 [IFLA_VXLAN_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 }, 2409 [IFLA_VXLAN_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 }, 2410 [IFLA_VXLAN_REMCSUM_TX] = { .type = NLA_U8 }, 2411 [IFLA_VXLAN_REMCSUM_RX] = { .type = NLA_U8 }, 2412 [IFLA_VXLAN_GBP] = { .type = NLA_FLAG, }, 2413 [IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG }, 2414}; 2415 2416static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[]) 2417{ 2418 if (tb[IFLA_ADDRESS]) { 2419 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) { 2420 pr_debug("invalid link address (not ethernet)\n"); 2421 return -EINVAL; 2422 } 2423 2424 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) { 2425 pr_debug("invalid all zero ethernet address\n"); 2426 return -EADDRNOTAVAIL; 2427 } 2428 } 2429 2430 if (!data) 2431 return -EINVAL; 2432 2433 if (data[IFLA_VXLAN_ID]) { 2434 __u32 id = nla_get_u32(data[IFLA_VXLAN_ID]); 2435 if (id >= VXLAN_VID_MASK) 2436 return -ERANGE; 2437 } 2438 2439 if (data[IFLA_VXLAN_PORT_RANGE]) { 2440 const struct ifla_vxlan_port_range *p 2441 = nla_data(data[IFLA_VXLAN_PORT_RANGE]); 2442 2443 if (ntohs(p->high) < ntohs(p->low)) { 2444 pr_debug("port range %u .. %u not valid\n", 2445 ntohs(p->low), ntohs(p->high)); 2446 return -EINVAL; 2447 } 2448 } 2449 2450 return 0; 2451} 2452 2453static void vxlan_get_drvinfo(struct net_device *netdev, 2454 struct ethtool_drvinfo *drvinfo) 2455{ 2456 strlcpy(drvinfo->version, VXLAN_VERSION, sizeof(drvinfo->version)); 2457 strlcpy(drvinfo->driver, "vxlan", sizeof(drvinfo->driver)); 2458} 2459 2460static const struct ethtool_ops vxlan_ethtool_ops = { 2461 .get_drvinfo = vxlan_get_drvinfo, 2462 .get_link = ethtool_op_get_link, 2463}; 2464 2465static void vxlan_del_work(struct work_struct *work) 2466{ 2467 struct vxlan_sock *vs = container_of(work, struct vxlan_sock, del_work); 2468 udp_tunnel_sock_release(vs->sock); 2469 kfree_rcu(vs, rcu); 2470} 2471 2472static struct socket *vxlan_create_sock(struct net *net, bool ipv6, 2473 __be16 port, u32 flags) 2474{ 2475 struct socket *sock; 2476 struct udp_port_cfg udp_conf; 2477 int err; 2478 2479 memset(&udp_conf, 0, sizeof(udp_conf)); 2480 2481 if (ipv6) { 2482 udp_conf.family = AF_INET6; 2483 udp_conf.use_udp6_rx_checksums = 2484 !(flags & VXLAN_F_UDP_ZERO_CSUM6_RX); 2485 } else { 2486 udp_conf.family = AF_INET; 2487 } 2488 2489 udp_conf.local_udp_port = port; 2490 2491 /* Open UDP socket */ 2492 err = udp_sock_create(net, &udp_conf, &sock); 2493 if (err < 0) 2494 return ERR_PTR(err); 2495 2496 return sock; 2497} 2498 2499/* Create new listen socket if needed */ 2500static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port, 2501 vxlan_rcv_t *rcv, void *data, 2502 u32 flags) 2503{ 2504 struct vxlan_net *vn = net_generic(net, vxlan_net_id); 2505 struct vxlan_sock *vs; 2506 struct socket *sock; 2507 unsigned int h; 2508 bool ipv6 = !!(flags & VXLAN_F_IPV6); 2509 struct udp_tunnel_sock_cfg tunnel_cfg; 2510 2511 vs = kzalloc(sizeof(*vs), GFP_KERNEL); 2512 if (!vs) 2513 return ERR_PTR(-ENOMEM); 2514 2515 for (h = 0; h < VNI_HASH_SIZE; ++h) 2516 INIT_HLIST_HEAD(&vs->vni_list[h]); 2517 2518 INIT_WORK(&vs->del_work, vxlan_del_work); 2519 2520 sock = vxlan_create_sock(net, ipv6, port, flags); 2521 if (IS_ERR(sock)) { 2522 pr_info("Cannot bind port %d, err=%ld\n", ntohs(port), 2523 PTR_ERR(sock)); 2524 kfree(vs); 2525 return ERR_CAST(sock); 2526 } 2527 2528 vs->sock = sock; 2529 atomic_set(&vs->refcnt, 1); 2530 vs->rcv = rcv; 2531 vs->data = data; 2532 vs->flags = (flags & VXLAN_F_RCV_FLAGS); 2533 2534 /* Initialize the vxlan udp offloads structure */ 2535 vs->udp_offloads.port = port; 2536 vs->udp_offloads.callbacks.gro_receive = vxlan_gro_receive; 2537 vs->udp_offloads.callbacks.gro_complete = vxlan_gro_complete; 2538 2539 spin_lock(&vn->sock_lock); 2540 hlist_add_head_rcu(&vs->hlist, vs_head(net, port)); 2541 vxlan_notify_add_rx_port(vs); 2542 spin_unlock(&vn->sock_lock); 2543 2544 /* Mark socket as an encapsulation socket. */ 2545 tunnel_cfg.sk_user_data = vs; 2546 tunnel_cfg.encap_type = 1; 2547 tunnel_cfg.encap_rcv = vxlan_udp_encap_recv; 2548 tunnel_cfg.encap_destroy = NULL; 2549 2550 setup_udp_tunnel_sock(net, sock, &tunnel_cfg); 2551 2552 return vs; 2553} 2554 2555struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, 2556 vxlan_rcv_t *rcv, void *data, 2557 bool no_share, u32 flags) 2558{ 2559 struct vxlan_net *vn = net_generic(net, vxlan_net_id); 2560 struct vxlan_sock *vs; 2561 bool ipv6 = flags & VXLAN_F_IPV6; 2562 2563 if (!no_share) { 2564 spin_lock(&vn->sock_lock); 2565 vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port, 2566 flags); 2567 if (vs && vs->rcv == rcv) { 2568 if (!atomic_add_unless(&vs->refcnt, 1, 0)) 2569 vs = ERR_PTR(-EBUSY); 2570 spin_unlock(&vn->sock_lock); 2571 return vs; 2572 } 2573 spin_unlock(&vn->sock_lock); 2574 } 2575 2576 return vxlan_socket_create(net, port, rcv, data, flags); 2577} 2578EXPORT_SYMBOL_GPL(vxlan_sock_add); 2579 2580static int vxlan_newlink(struct net *src_net, struct net_device *dev, 2581 struct nlattr *tb[], struct nlattr *data[]) 2582{ 2583 struct vxlan_net *vn = net_generic(src_net, vxlan_net_id); 2584 struct vxlan_dev *vxlan = netdev_priv(dev), *tmp; 2585 struct vxlan_rdst *dst = &vxlan->default_dst; 2586 __u32 vni; 2587 int err; 2588 bool use_ipv6 = false; 2589 2590 if (!data[IFLA_VXLAN_ID]) 2591 return -EINVAL; 2592 2593 vxlan->net = src_net; 2594 2595 vni = nla_get_u32(data[IFLA_VXLAN_ID]); 2596 dst->remote_vni = vni; 2597 2598 /* Unless IPv6 is explicitly requested, assume IPv4 */ 2599 dst->remote_ip.sa.sa_family = AF_INET; 2600 if (data[IFLA_VXLAN_GROUP]) { 2601 dst->remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]); 2602 } else if (data[IFLA_VXLAN_GROUP6]) { 2603 if (!IS_ENABLED(CONFIG_IPV6)) 2604 return -EPFNOSUPPORT; 2605 2606 dst->remote_ip.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_GROUP6]); 2607 dst->remote_ip.sa.sa_family = AF_INET6; 2608 use_ipv6 = true; 2609 } 2610 2611 if (data[IFLA_VXLAN_LOCAL]) { 2612 vxlan->saddr.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_LOCAL]); 2613 vxlan->saddr.sa.sa_family = AF_INET; 2614 } else if (data[IFLA_VXLAN_LOCAL6]) { 2615 if (!IS_ENABLED(CONFIG_IPV6)) 2616 return -EPFNOSUPPORT; 2617 2618 /* TODO: respect scope id */ 2619 vxlan->saddr.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_LOCAL6]); 2620 vxlan->saddr.sa.sa_family = AF_INET6; 2621 use_ipv6 = true; 2622 } 2623 2624 if (data[IFLA_VXLAN_LINK] && 2625 (dst->remote_ifindex = nla_get_u32(data[IFLA_VXLAN_LINK]))) { 2626 struct net_device *lowerdev 2627 = __dev_get_by_index(src_net, dst->remote_ifindex); 2628 2629 if (!lowerdev) { 2630 pr_info("ifindex %d does not exist\n", dst->remote_ifindex); 2631 return -ENODEV; 2632 } 2633 2634#if IS_ENABLED(CONFIG_IPV6) 2635 if (use_ipv6) { 2636 struct inet6_dev *idev = __in6_dev_get(lowerdev); 2637 if (idev && idev->cnf.disable_ipv6) { 2638 pr_info("IPv6 is disabled via sysctl\n"); 2639 return -EPERM; 2640 } 2641 vxlan->flags |= VXLAN_F_IPV6; 2642 } 2643#endif 2644 2645 if (!tb[IFLA_MTU]) 2646 dev->mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); 2647 2648 dev->needed_headroom = lowerdev->hard_header_len + 2649 (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); 2650 } else if (use_ipv6) 2651 vxlan->flags |= VXLAN_F_IPV6; 2652 2653 if (data[IFLA_VXLAN_TOS]) 2654 vxlan->tos = nla_get_u8(data[IFLA_VXLAN_TOS]); 2655 2656 if (data[IFLA_VXLAN_TTL]) 2657 vxlan->ttl = nla_get_u8(data[IFLA_VXLAN_TTL]); 2658 2659 if (!data[IFLA_VXLAN_LEARNING] || nla_get_u8(data[IFLA_VXLAN_LEARNING])) 2660 vxlan->flags |= VXLAN_F_LEARN; 2661 2662 if (data[IFLA_VXLAN_AGEING]) 2663 vxlan->age_interval = nla_get_u32(data[IFLA_VXLAN_AGEING]); 2664 else 2665 vxlan->age_interval = FDB_AGE_DEFAULT; 2666 2667 if (data[IFLA_VXLAN_PROXY] && nla_get_u8(data[IFLA_VXLAN_PROXY])) 2668 vxlan->flags |= VXLAN_F_PROXY; 2669 2670 if (data[IFLA_VXLAN_RSC] && nla_get_u8(data[IFLA_VXLAN_RSC])) 2671 vxlan->flags |= VXLAN_F_RSC; 2672 2673 if (data[IFLA_VXLAN_L2MISS] && nla_get_u8(data[IFLA_VXLAN_L2MISS])) 2674 vxlan->flags |= VXLAN_F_L2MISS; 2675 2676 if (data[IFLA_VXLAN_L3MISS] && nla_get_u8(data[IFLA_VXLAN_L3MISS])) 2677 vxlan->flags |= VXLAN_F_L3MISS; 2678 2679 if (data[IFLA_VXLAN_LIMIT]) 2680 vxlan->addrmax = nla_get_u32(data[IFLA_VXLAN_LIMIT]); 2681 2682 if (data[IFLA_VXLAN_PORT_RANGE]) { 2683 const struct ifla_vxlan_port_range *p 2684 = nla_data(data[IFLA_VXLAN_PORT_RANGE]); 2685 vxlan->port_min = ntohs(p->low); 2686 vxlan->port_max = ntohs(p->high); 2687 } 2688 2689 if (data[IFLA_VXLAN_PORT]) 2690 vxlan->dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]); 2691 2692 if (data[IFLA_VXLAN_UDP_CSUM] && nla_get_u8(data[IFLA_VXLAN_UDP_CSUM])) 2693 vxlan->flags |= VXLAN_F_UDP_CSUM; 2694 2695 if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX] && 2696 nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX])) 2697 vxlan->flags |= VXLAN_F_UDP_ZERO_CSUM6_TX; 2698 2699 if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX] && 2700 nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX])) 2701 vxlan->flags |= VXLAN_F_UDP_ZERO_CSUM6_RX; 2702 2703 if (data[IFLA_VXLAN_REMCSUM_TX] && 2704 nla_get_u8(data[IFLA_VXLAN_REMCSUM_TX])) 2705 vxlan->flags |= VXLAN_F_REMCSUM_TX; 2706 2707 if (data[IFLA_VXLAN_REMCSUM_RX] && 2708 nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX])) 2709 vxlan->flags |= VXLAN_F_REMCSUM_RX; 2710 2711 if (data[IFLA_VXLAN_GBP]) 2712 vxlan->flags |= VXLAN_F_GBP; 2713 2714 if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL]) 2715 vxlan->flags |= VXLAN_F_REMCSUM_NOPARTIAL; 2716 2717 list_for_each_entry(tmp, &vn->vxlan_list, next) { 2718 if (tmp->default_dst.remote_vni == vni && 2719 (tmp->default_dst.remote_ip.sa.sa_family == AF_INET6 || 2720 tmp->saddr.sa.sa_family == AF_INET6) == use_ipv6 && 2721 tmp->dst_port == vxlan->dst_port && 2722 (tmp->flags & VXLAN_F_RCV_FLAGS) == 2723 (vxlan->flags & VXLAN_F_RCV_FLAGS)) 2724 return -EEXIST; 2725 } 2726 2727 dev->ethtool_ops = &vxlan_ethtool_ops; 2728 2729 /* create an fdb entry for a valid default destination */ 2730 if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) { 2731 err = vxlan_fdb_create(vxlan, all_zeros_mac, 2732 &vxlan->default_dst.remote_ip, 2733 NUD_REACHABLE|NUD_PERMANENT, 2734 NLM_F_EXCL|NLM_F_CREATE, 2735 vxlan->dst_port, 2736 vxlan->default_dst.remote_vni, 2737 vxlan->default_dst.remote_ifindex, 2738 NTF_SELF); 2739 if (err) 2740 return err; 2741 } 2742 2743 err = register_netdevice(dev); 2744 if (err) { 2745 vxlan_fdb_delete_default(vxlan); 2746 return err; 2747 } 2748 2749 list_add(&vxlan->next, &vn->vxlan_list); 2750 2751 return 0; 2752} 2753 2754static void vxlan_dellink(struct net_device *dev, struct list_head *head) 2755{ 2756 struct vxlan_dev *vxlan = netdev_priv(dev); 2757 struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); 2758 2759 spin_lock(&vn->sock_lock); 2760 if (!hlist_unhashed(&vxlan->hlist)) 2761 hlist_del_rcu(&vxlan->hlist); 2762 spin_unlock(&vn->sock_lock); 2763 2764 list_del(&vxlan->next); 2765 unregister_netdevice_queue(dev, head); 2766} 2767 2768static size_t vxlan_get_size(const struct net_device *dev) 2769{ 2770 2771 return nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_ID */ 2772 nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_GROUP{6} */ 2773 nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LINK */ 2774 nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_LOCAL{6} */ 2775 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL */ 2776 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */ 2777 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */ 2778 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_PROXY */ 2779 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_RSC */ 2780 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_L2MISS */ 2781 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_L3MISS */ 2782 nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_AGEING */ 2783 nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LIMIT */ 2784 nla_total_size(sizeof(struct ifla_vxlan_port_range)) + 2785 nla_total_size(sizeof(__be16)) + /* IFLA_VXLAN_PORT */ 2786 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_CSUM */ 2787 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_TX */ 2788 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_RX */ 2789 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_TX */ 2790 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_RX */ 2791 0; 2792} 2793 2794static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) 2795{ 2796 const struct vxlan_dev *vxlan = netdev_priv(dev); 2797 const struct vxlan_rdst *dst = &vxlan->default_dst; 2798 struct ifla_vxlan_port_range ports = { 2799 .low = htons(vxlan->port_min), 2800 .high = htons(vxlan->port_max), 2801 }; 2802 2803 if (nla_put_u32(skb, IFLA_VXLAN_ID, dst->remote_vni)) 2804 goto nla_put_failure; 2805 2806 if (!vxlan_addr_any(&dst->remote_ip)) { 2807 if (dst->remote_ip.sa.sa_family == AF_INET) { 2808 if (nla_put_in_addr(skb, IFLA_VXLAN_GROUP, 2809 dst->remote_ip.sin.sin_addr.s_addr)) 2810 goto nla_put_failure; 2811#if IS_ENABLED(CONFIG_IPV6) 2812 } else { 2813 if (nla_put_in6_addr(skb, IFLA_VXLAN_GROUP6, 2814 &dst->remote_ip.sin6.sin6_addr)) 2815 goto nla_put_failure; 2816#endif 2817 } 2818 } 2819 2820 if (dst->remote_ifindex && nla_put_u32(skb, IFLA_VXLAN_LINK, dst->remote_ifindex)) 2821 goto nla_put_failure; 2822 2823 if (!vxlan_addr_any(&vxlan->saddr)) { 2824 if (vxlan->saddr.sa.sa_family == AF_INET) { 2825 if (nla_put_in_addr(skb, IFLA_VXLAN_LOCAL, 2826 vxlan->saddr.sin.sin_addr.s_addr)) 2827 goto nla_put_failure; 2828#if IS_ENABLED(CONFIG_IPV6) 2829 } else { 2830 if (nla_put_in6_addr(skb, IFLA_VXLAN_LOCAL6, 2831 &vxlan->saddr.sin6.sin6_addr)) 2832 goto nla_put_failure; 2833#endif 2834 } 2835 } 2836 2837 if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->ttl) || 2838 nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->tos) || 2839 nla_put_u8(skb, IFLA_VXLAN_LEARNING, 2840 !!(vxlan->flags & VXLAN_F_LEARN)) || 2841 nla_put_u8(skb, IFLA_VXLAN_PROXY, 2842 !!(vxlan->flags & VXLAN_F_PROXY)) || 2843 nla_put_u8(skb, IFLA_VXLAN_RSC, !!(vxlan->flags & VXLAN_F_RSC)) || 2844 nla_put_u8(skb, IFLA_VXLAN_L2MISS, 2845 !!(vxlan->flags & VXLAN_F_L2MISS)) || 2846 nla_put_u8(skb, IFLA_VXLAN_L3MISS, 2847 !!(vxlan->flags & VXLAN_F_L3MISS)) || 2848 nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->age_interval) || 2849 nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->addrmax) || 2850 nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->dst_port) || 2851 nla_put_u8(skb, IFLA_VXLAN_UDP_CSUM, 2852 !!(vxlan->flags & VXLAN_F_UDP_CSUM)) || 2853 nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX, 2854 !!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) || 2855 nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX, 2856 !!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) || 2857 nla_put_u8(skb, IFLA_VXLAN_REMCSUM_TX, 2858 !!(vxlan->flags & VXLAN_F_REMCSUM_TX)) || 2859 nla_put_u8(skb, IFLA_VXLAN_REMCSUM_RX, 2860 !!(vxlan->flags & VXLAN_F_REMCSUM_RX))) 2861 goto nla_put_failure; 2862 2863 if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports)) 2864 goto nla_put_failure; 2865 2866 if (vxlan->flags & VXLAN_F_GBP && 2867 nla_put_flag(skb, IFLA_VXLAN_GBP)) 2868 goto nla_put_failure; 2869 2870 if (vxlan->flags & VXLAN_F_REMCSUM_NOPARTIAL && 2871 nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL)) 2872 goto nla_put_failure; 2873 2874 return 0; 2875 2876nla_put_failure: 2877 return -EMSGSIZE; 2878} 2879 2880static struct net *vxlan_get_link_net(const struct net_device *dev) 2881{ 2882 struct vxlan_dev *vxlan = netdev_priv(dev); 2883 2884 return vxlan->net; 2885} 2886 2887static struct rtnl_link_ops vxlan_link_ops __read_mostly = { 2888 .kind = "vxlan", 2889 .maxtype = IFLA_VXLAN_MAX, 2890 .policy = vxlan_policy, 2891 .priv_size = sizeof(struct vxlan_dev), 2892 .setup = vxlan_setup, 2893 .validate = vxlan_validate, 2894 .newlink = vxlan_newlink, 2895 .dellink = vxlan_dellink, 2896 .get_size = vxlan_get_size, 2897 .fill_info = vxlan_fill_info, 2898 .get_link_net = vxlan_get_link_net, 2899}; 2900 2901static void vxlan_handle_lowerdev_unregister(struct vxlan_net *vn, 2902 struct net_device *dev) 2903{ 2904 struct vxlan_dev *vxlan, *next; 2905 LIST_HEAD(list_kill); 2906 2907 list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) { 2908 struct vxlan_rdst *dst = &vxlan->default_dst; 2909 2910 /* In case we created vxlan device with carrier 2911 * and we loose the carrier due to module unload 2912 * we also need to remove vxlan device. In other 2913 * cases, it's not necessary and remote_ifindex 2914 * is 0 here, so no matches. 2915 */ 2916 if (dst->remote_ifindex == dev->ifindex) 2917 vxlan_dellink(vxlan->dev, &list_kill); 2918 } 2919 2920 unregister_netdevice_many(&list_kill); 2921} 2922 2923static int vxlan_lowerdev_event(struct notifier_block *unused, 2924 unsigned long event, void *ptr) 2925{ 2926 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 2927 struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); 2928 2929 if (event == NETDEV_UNREGISTER) 2930 vxlan_handle_lowerdev_unregister(vn, dev); 2931 2932 return NOTIFY_DONE; 2933} 2934 2935static struct notifier_block vxlan_notifier_block __read_mostly = { 2936 .notifier_call = vxlan_lowerdev_event, 2937}; 2938 2939static __net_init int vxlan_init_net(struct net *net) 2940{ 2941 struct vxlan_net *vn = net_generic(net, vxlan_net_id); 2942 unsigned int h; 2943 2944 INIT_LIST_HEAD(&vn->vxlan_list); 2945 spin_lock_init(&vn->sock_lock); 2946 2947 for (h = 0; h < PORT_HASH_SIZE; ++h) 2948 INIT_HLIST_HEAD(&vn->sock_list[h]); 2949 2950 return 0; 2951} 2952 2953static void __net_exit vxlan_exit_net(struct net *net) 2954{ 2955 struct vxlan_net *vn = net_generic(net, vxlan_net_id); 2956 struct vxlan_dev *vxlan, *next; 2957 struct net_device *dev, *aux; 2958 LIST_HEAD(list); 2959 2960 rtnl_lock(); 2961 for_each_netdev_safe(net, dev, aux) 2962 if (dev->rtnl_link_ops == &vxlan_link_ops) 2963 unregister_netdevice_queue(dev, &list); 2964 2965 list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) { 2966 /* If vxlan->dev is in the same netns, it has already been added 2967 * to the list by the previous loop. 2968 */ 2969 if (!net_eq(dev_net(vxlan->dev), net)) 2970 unregister_netdevice_queue(vxlan->dev, &list); 2971 } 2972 2973 unregister_netdevice_many(&list); 2974 rtnl_unlock(); 2975} 2976 2977static struct pernet_operations vxlan_net_ops = { 2978 .init = vxlan_init_net, 2979 .exit = vxlan_exit_net, 2980 .id = &vxlan_net_id, 2981 .size = sizeof(struct vxlan_net), 2982}; 2983 2984static int __init vxlan_init_module(void) 2985{ 2986 int rc; 2987 2988 vxlan_wq = alloc_workqueue("vxlan", 0, 0); 2989 if (!vxlan_wq) 2990 return -ENOMEM; 2991 2992 get_random_bytes(&vxlan_salt, sizeof(vxlan_salt)); 2993 2994 rc = register_pernet_subsys(&vxlan_net_ops); 2995 if (rc) 2996 goto out1; 2997 2998 rc = register_netdevice_notifier(&vxlan_notifier_block); 2999 if (rc) 3000 goto out2; 3001 3002 rc = rtnl_link_register(&vxlan_link_ops); 3003 if (rc) 3004 goto out3; 3005 3006 return 0; 3007out3: 3008 unregister_netdevice_notifier(&vxlan_notifier_block); 3009out2: 3010 unregister_pernet_subsys(&vxlan_net_ops); 3011out1: 3012 destroy_workqueue(vxlan_wq); 3013 return rc; 3014} 3015late_initcall(vxlan_init_module); 3016 3017static void __exit vxlan_cleanup_module(void) 3018{ 3019 rtnl_link_unregister(&vxlan_link_ops); 3020 unregister_netdevice_notifier(&vxlan_notifier_block); 3021 destroy_workqueue(vxlan_wq); 3022 unregister_pernet_subsys(&vxlan_net_ops); 3023 /* rcu_barrier() is called by netns */ 3024} 3025module_exit(vxlan_cleanup_module); 3026 3027MODULE_LICENSE("GPL"); 3028MODULE_VERSION(VXLAN_VERSION); 3029MODULE_AUTHOR("Stephen Hemminger <stephen@networkplumber.org>"); 3030MODULE_DESCRIPTION("Driver for VXLAN encapsulated traffic"); 3031MODULE_ALIAS_RTNL_LINK("vxlan"); 3032