1/*
2 * VXLAN: Virtual eXtensible Local Area Network
3 *
4 * Copyright (c) 2012-2013 Vyatta Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
13#include <linux/kernel.h>
14#include <linux/types.h>
15#include <linux/module.h>
16#include <linux/errno.h>
17#include <linux/slab.h>
18#include <linux/skbuff.h>
19#include <linux/rculist.h>
20#include <linux/netdevice.h>
21#include <linux/in.h>
22#include <linux/ip.h>
23#include <linux/udp.h>
24#include <linux/igmp.h>
25#include <linux/etherdevice.h>
26#include <linux/if_ether.h>
27#include <linux/if_vlan.h>
28#include <linux/hash.h>
29#include <linux/ethtool.h>
30#include <net/arp.h>
31#include <net/ndisc.h>
32#include <net/ip.h>
33#include <net/ip_tunnels.h>
34#include <net/icmp.h>
35#include <net/udp.h>
36#include <net/udp_tunnel.h>
37#include <net/rtnetlink.h>
38#include <net/route.h>
39#include <net/dsfield.h>
40#include <net/inet_ecn.h>
41#include <net/net_namespace.h>
42#include <net/netns/generic.h>
43#include <net/vxlan.h>
44#include <net/protocol.h>
45#include <net/udp_tunnel.h>
46#if IS_ENABLED(CONFIG_IPV6)
47#include <net/ipv6.h>
48#include <net/addrconf.h>
49#include <net/ip6_tunnel.h>
50#include <net/ip6_checksum.h>
51#endif
52
53#define VXLAN_VERSION	"0.1"
54
55#define PORT_HASH_BITS	8
56#define PORT_HASH_SIZE  (1<<PORT_HASH_BITS)
57#define VNI_HASH_BITS	10
58#define VNI_HASH_SIZE	(1<<VNI_HASH_BITS)
59#define FDB_HASH_BITS	8
60#define FDB_HASH_SIZE	(1<<FDB_HASH_BITS)
61#define FDB_AGE_DEFAULT 300 /* 5 min */
62#define FDB_AGE_INTERVAL (10 * HZ)	/* rescan interval */
63
64/* UDP port for VXLAN traffic.
65 * The IANA assigned port is 4789, but the Linux default is 8472
66 * for compatibility with early adopters.
67 */
68static unsigned short vxlan_port __read_mostly = 8472;
69module_param_named(udp_port, vxlan_port, ushort, 0444);
70MODULE_PARM_DESC(udp_port, "Destination UDP port");
71
72static bool log_ecn_error = true;
73module_param(log_ecn_error, bool, 0644);
74MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
75
76static int vxlan_net_id;
77
78static const u8 all_zeros_mac[ETH_ALEN];
79
80/* per-network namespace private data for this module */
81struct vxlan_net {
82	struct list_head  vxlan_list;
83	struct hlist_head sock_list[PORT_HASH_SIZE];
84	spinlock_t	  sock_lock;
85};
86
87union vxlan_addr {
88	struct sockaddr_in sin;
89	struct sockaddr_in6 sin6;
90	struct sockaddr sa;
91};
92
93struct vxlan_rdst {
94	union vxlan_addr	 remote_ip;
95	__be16			 remote_port;
96	u32			 remote_vni;
97	u32			 remote_ifindex;
98	struct list_head	 list;
99	struct rcu_head		 rcu;
100};
101
102/* Forwarding table entry */
103struct vxlan_fdb {
104	struct hlist_node hlist;	/* linked list of entries */
105	struct rcu_head	  rcu;
106	unsigned long	  updated;	/* jiffies */
107	unsigned long	  used;
108	struct list_head  remotes;
109	u16		  state;	/* see ndm_state */
110	u8		  flags;	/* see ndm_flags */
111	u8		  eth_addr[ETH_ALEN];
112};
113
114/* Pseudo network device */
115struct vxlan_dev {
116	struct hlist_node hlist;	/* vni hash table */
117	struct list_head  next;		/* vxlan's per namespace list */
118	struct vxlan_sock *vn_sock;	/* listening socket */
119	struct net_device *dev;
120	struct net	  *net;		/* netns for packet i/o */
121	struct vxlan_rdst default_dst;	/* default destination */
122	union vxlan_addr  saddr;	/* source address */
123	__be16		  dst_port;
124	__u16		  port_min;	/* source port range */
125	__u16		  port_max;
126	__u8		  tos;		/* TOS override */
127	__u8		  ttl;
128	u32		  flags;	/* VXLAN_F_* in vxlan.h */
129
130	unsigned long	  age_interval;
131	struct timer_list age_timer;
132	spinlock_t	  hash_lock;
133	unsigned int	  addrcnt;
134	unsigned int	  addrmax;
135
136	struct hlist_head fdb_head[FDB_HASH_SIZE];
137};
138
139/* salt for hash table */
140static u32 vxlan_salt __read_mostly;
141static struct workqueue_struct *vxlan_wq;
142
143#if IS_ENABLED(CONFIG_IPV6)
144static inline
145bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
146{
147	if (a->sa.sa_family != b->sa.sa_family)
148		return false;
149	if (a->sa.sa_family == AF_INET6)
150		return ipv6_addr_equal(&a->sin6.sin6_addr, &b->sin6.sin6_addr);
151	else
152		return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
153}
154
155static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
156{
157	if (ipa->sa.sa_family == AF_INET6)
158		return ipv6_addr_any(&ipa->sin6.sin6_addr);
159	else
160		return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
161}
162
163static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
164{
165	if (ipa->sa.sa_family == AF_INET6)
166		return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr);
167	else
168		return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
169}
170
171static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
172{
173	if (nla_len(nla) >= sizeof(struct in6_addr)) {
174		ip->sin6.sin6_addr = nla_get_in6_addr(nla);
175		ip->sa.sa_family = AF_INET6;
176		return 0;
177	} else if (nla_len(nla) >= sizeof(__be32)) {
178		ip->sin.sin_addr.s_addr = nla_get_in_addr(nla);
179		ip->sa.sa_family = AF_INET;
180		return 0;
181	} else {
182		return -EAFNOSUPPORT;
183	}
184}
185
186static int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
187			      const union vxlan_addr *ip)
188{
189	if (ip->sa.sa_family == AF_INET6)
190		return nla_put_in6_addr(skb, attr, &ip->sin6.sin6_addr);
191	else
192		return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
193}
194
195#else /* !CONFIG_IPV6 */
196
197static inline
198bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
199{
200	return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
201}
202
203static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
204{
205	return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
206}
207
208static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
209{
210	return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
211}
212
213static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
214{
215	if (nla_len(nla) >= sizeof(struct in6_addr)) {
216		return -EAFNOSUPPORT;
217	} else if (nla_len(nla) >= sizeof(__be32)) {
218		ip->sin.sin_addr.s_addr = nla_get_in_addr(nla);
219		ip->sa.sa_family = AF_INET;
220		return 0;
221	} else {
222		return -EAFNOSUPPORT;
223	}
224}
225
226static int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
227			      const union vxlan_addr *ip)
228{
229	return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
230}
231#endif
232
233/* Virtual Network hash table head */
234static inline struct hlist_head *vni_head(struct vxlan_sock *vs, u32 id)
235{
236	return &vs->vni_list[hash_32(id, VNI_HASH_BITS)];
237}
238
239/* Socket hash table head */
240static inline struct hlist_head *vs_head(struct net *net, __be16 port)
241{
242	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
243
244	return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
245}
246
247/* First remote destination for a forwarding entry.
248 * Guaranteed to be non-NULL because remotes are never deleted.
249 */
250static inline struct vxlan_rdst *first_remote_rcu(struct vxlan_fdb *fdb)
251{
252	return list_entry_rcu(fdb->remotes.next, struct vxlan_rdst, list);
253}
254
255static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
256{
257	return list_first_entry(&fdb->remotes, struct vxlan_rdst, list);
258}
259
260/* Find VXLAN socket based on network namespace, address family and UDP port
261 * and enabled unshareable flags.
262 */
263static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
264					  __be16 port, u32 flags)
265{
266	struct vxlan_sock *vs;
267
268	flags &= VXLAN_F_RCV_FLAGS;
269
270	hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
271		if (inet_sk(vs->sock->sk)->inet_sport == port &&
272		    inet_sk(vs->sock->sk)->sk.sk_family == family &&
273		    vs->flags == flags)
274			return vs;
275	}
276	return NULL;
277}
278
279static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)
280{
281	struct vxlan_dev *vxlan;
282
283	hlist_for_each_entry_rcu(vxlan, vni_head(vs, id), hlist) {
284		if (vxlan->default_dst.remote_vni == id)
285			return vxlan;
286	}
287
288	return NULL;
289}
290
291/* Look up VNI in a per net namespace table */
292static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id,
293					sa_family_t family, __be16 port,
294					u32 flags)
295{
296	struct vxlan_sock *vs;
297
298	vs = vxlan_find_sock(net, family, port, flags);
299	if (!vs)
300		return NULL;
301
302	return vxlan_vs_find_vni(vs, id);
303}
304
305/* Fill in neighbour message in skbuff. */
306static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
307			  const struct vxlan_fdb *fdb,
308			  u32 portid, u32 seq, int type, unsigned int flags,
309			  const struct vxlan_rdst *rdst)
310{
311	unsigned long now = jiffies;
312	struct nda_cacheinfo ci;
313	struct nlmsghdr *nlh;
314	struct ndmsg *ndm;
315	bool send_ip, send_eth;
316
317	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
318	if (nlh == NULL)
319		return -EMSGSIZE;
320
321	ndm = nlmsg_data(nlh);
322	memset(ndm, 0, sizeof(*ndm));
323
324	send_eth = send_ip = true;
325
326	if (type == RTM_GETNEIGH) {
327		ndm->ndm_family	= AF_INET;
328		send_ip = !vxlan_addr_any(&rdst->remote_ip);
329		send_eth = !is_zero_ether_addr(fdb->eth_addr);
330	} else
331		ndm->ndm_family	= AF_BRIDGE;
332	ndm->ndm_state = fdb->state;
333	ndm->ndm_ifindex = vxlan->dev->ifindex;
334	ndm->ndm_flags = fdb->flags;
335	ndm->ndm_type = RTN_UNICAST;
336
337	if (!net_eq(dev_net(vxlan->dev), vxlan->net) &&
338	    nla_put_s32(skb, NDA_LINK_NETNSID,
339			peernet2id(dev_net(vxlan->dev), vxlan->net)))
340		goto nla_put_failure;
341
342	if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
343		goto nla_put_failure;
344
345	if (send_ip && vxlan_nla_put_addr(skb, NDA_DST, &rdst->remote_ip))
346		goto nla_put_failure;
347
348	if (rdst->remote_port && rdst->remote_port != vxlan->dst_port &&
349	    nla_put_be16(skb, NDA_PORT, rdst->remote_port))
350		goto nla_put_failure;
351	if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
352	    nla_put_u32(skb, NDA_VNI, rdst->remote_vni))
353		goto nla_put_failure;
354	if (rdst->remote_ifindex &&
355	    nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex))
356		goto nla_put_failure;
357
358	ci.ndm_used	 = jiffies_to_clock_t(now - fdb->used);
359	ci.ndm_confirmed = 0;
360	ci.ndm_updated	 = jiffies_to_clock_t(now - fdb->updated);
361	ci.ndm_refcnt	 = 0;
362
363	if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
364		goto nla_put_failure;
365
366	nlmsg_end(skb, nlh);
367	return 0;
368
369nla_put_failure:
370	nlmsg_cancel(skb, nlh);
371	return -EMSGSIZE;
372}
373
374static inline size_t vxlan_nlmsg_size(void)
375{
376	return NLMSG_ALIGN(sizeof(struct ndmsg))
377		+ nla_total_size(ETH_ALEN) /* NDA_LLADDR */
378		+ nla_total_size(sizeof(struct in6_addr)) /* NDA_DST */
379		+ nla_total_size(sizeof(__be16)) /* NDA_PORT */
380		+ nla_total_size(sizeof(__be32)) /* NDA_VNI */
381		+ nla_total_size(sizeof(__u32)) /* NDA_IFINDEX */
382		+ nla_total_size(sizeof(__s32)) /* NDA_LINK_NETNSID */
383		+ nla_total_size(sizeof(struct nda_cacheinfo));
384}
385
386static void vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
387			     struct vxlan_rdst *rd, int type)
388{
389	struct net *net = dev_net(vxlan->dev);
390	struct sk_buff *skb;
391	int err = -ENOBUFS;
392
393	skb = nlmsg_new(vxlan_nlmsg_size(), GFP_ATOMIC);
394	if (skb == NULL)
395		goto errout;
396
397	err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0, rd);
398	if (err < 0) {
399		/* -EMSGSIZE implies BUG in vxlan_nlmsg_size() */
400		WARN_ON(err == -EMSGSIZE);
401		kfree_skb(skb);
402		goto errout;
403	}
404
405	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
406	return;
407errout:
408	if (err < 0)
409		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
410}
411
412static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa)
413{
414	struct vxlan_dev *vxlan = netdev_priv(dev);
415	struct vxlan_fdb f = {
416		.state = NUD_STALE,
417	};
418	struct vxlan_rdst remote = {
419		.remote_ip = *ipa, /* goes to NDA_DST */
420		.remote_vni = VXLAN_N_VID,
421	};
422
423	vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH);
424}
425
426static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN])
427{
428	struct vxlan_fdb f = {
429		.state = NUD_STALE,
430	};
431	struct vxlan_rdst remote = { };
432
433	memcpy(f.eth_addr, eth_addr, ETH_ALEN);
434
435	vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH);
436}
437
438/* Hash Ethernet address */
439static u32 eth_hash(const unsigned char *addr)
440{
441	u64 value = get_unaligned((u64 *)addr);
442
443	/* only want 6 bytes */
444#ifdef __BIG_ENDIAN
445	value >>= 16;
446#else
447	value <<= 16;
448#endif
449	return hash_64(value, FDB_HASH_BITS);
450}
451
452/* Hash chain to use given mac address */
453static inline struct hlist_head *vxlan_fdb_head(struct vxlan_dev *vxlan,
454						const u8 *mac)
455{
456	return &vxlan->fdb_head[eth_hash(mac)];
457}
458
459/* Look up Ethernet address in forwarding table */
460static struct vxlan_fdb *__vxlan_find_mac(struct vxlan_dev *vxlan,
461					const u8 *mac)
462{
463	struct hlist_head *head = vxlan_fdb_head(vxlan, mac);
464	struct vxlan_fdb *f;
465
466	hlist_for_each_entry_rcu(f, head, hlist) {
467		if (ether_addr_equal(mac, f->eth_addr))
468			return f;
469	}
470
471	return NULL;
472}
473
474static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan,
475					const u8 *mac)
476{
477	struct vxlan_fdb *f;
478
479	f = __vxlan_find_mac(vxlan, mac);
480	if (f)
481		f->used = jiffies;
482
483	return f;
484}
485
486/* caller should hold vxlan->hash_lock */
487static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f,
488					      union vxlan_addr *ip, __be16 port,
489					      __u32 vni, __u32 ifindex)
490{
491	struct vxlan_rdst *rd;
492
493	list_for_each_entry(rd, &f->remotes, list) {
494		if (vxlan_addr_equal(&rd->remote_ip, ip) &&
495		    rd->remote_port == port &&
496		    rd->remote_vni == vni &&
497		    rd->remote_ifindex == ifindex)
498			return rd;
499	}
500
501	return NULL;
502}
503
504/* Replace destination of unicast mac */
505static int vxlan_fdb_replace(struct vxlan_fdb *f,
506			     union vxlan_addr *ip, __be16 port, __u32 vni, __u32 ifindex)
507{
508	struct vxlan_rdst *rd;
509
510	rd = vxlan_fdb_find_rdst(f, ip, port, vni, ifindex);
511	if (rd)
512		return 0;
513
514	rd = list_first_entry_or_null(&f->remotes, struct vxlan_rdst, list);
515	if (!rd)
516		return 0;
517	rd->remote_ip = *ip;
518	rd->remote_port = port;
519	rd->remote_vni = vni;
520	rd->remote_ifindex = ifindex;
521	return 1;
522}
523
524/* Add/update destinations for multicast */
525static int vxlan_fdb_append(struct vxlan_fdb *f,
526			    union vxlan_addr *ip, __be16 port, __u32 vni,
527			    __u32 ifindex, struct vxlan_rdst **rdp)
528{
529	struct vxlan_rdst *rd;
530
531	rd = vxlan_fdb_find_rdst(f, ip, port, vni, ifindex);
532	if (rd)
533		return 0;
534
535	rd = kmalloc(sizeof(*rd), GFP_ATOMIC);
536	if (rd == NULL)
537		return -ENOBUFS;
538	rd->remote_ip = *ip;
539	rd->remote_port = port;
540	rd->remote_vni = vni;
541	rd->remote_ifindex = ifindex;
542
543	list_add_tail_rcu(&rd->list, &f->remotes);
544
545	*rdp = rd;
546	return 1;
547}
548
549static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
550					  unsigned int off,
551					  struct vxlanhdr *vh, size_t hdrlen,
552					  u32 data, struct gro_remcsum *grc,
553					  bool nopartial)
554{
555	size_t start, offset, plen;
556
557	if (skb->remcsum_offload)
558		return NULL;
559
560	if (!NAPI_GRO_CB(skb)->csum_valid)
561		return NULL;
562
563	start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT;
564	offset = start + ((data & VXLAN_RCO_UDP) ?
565			  offsetof(struct udphdr, check) :
566			  offsetof(struct tcphdr, check));
567
568	plen = hdrlen + offset + sizeof(u16);
569
570	/* Pull checksum that will be written */
571	if (skb_gro_header_hard(skb, off + plen)) {
572		vh = skb_gro_header_slow(skb, off + plen, off);
573		if (!vh)
574			return NULL;
575	}
576
577	skb_gro_remcsum_process(skb, (void *)vh + hdrlen,
578				start, offset, grc, nopartial);
579
580	skb->remcsum_offload = 1;
581
582	return vh;
583}
584
585static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
586					  struct sk_buff *skb,
587					  struct udp_offload *uoff)
588{
589	struct sk_buff *p, **pp = NULL;
590	struct vxlanhdr *vh, *vh2;
591	unsigned int hlen, off_vx;
592	int flush = 1;
593	struct vxlan_sock *vs = container_of(uoff, struct vxlan_sock,
594					     udp_offloads);
595	u32 flags;
596	struct gro_remcsum grc;
597
598	skb_gro_remcsum_init(&grc);
599
600	off_vx = skb_gro_offset(skb);
601	hlen = off_vx + sizeof(*vh);
602	vh   = skb_gro_header_fast(skb, off_vx);
603	if (skb_gro_header_hard(skb, hlen)) {
604		vh = skb_gro_header_slow(skb, hlen, off_vx);
605		if (unlikely(!vh))
606			goto out;
607	}
608
609	skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
610	skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
611
612	flags = ntohl(vh->vx_flags);
613
614	if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
615		vh = vxlan_gro_remcsum(skb, off_vx, vh, sizeof(struct vxlanhdr),
616				       ntohl(vh->vx_vni), &grc,
617				       !!(vs->flags &
618					  VXLAN_F_REMCSUM_NOPARTIAL));
619
620		if (!vh)
621			goto out;
622	}
623
624	flush = 0;
625
626	for (p = *head; p; p = p->next) {
627		if (!NAPI_GRO_CB(p)->same_flow)
628			continue;
629
630		vh2 = (struct vxlanhdr *)(p->data + off_vx);
631		if (vh->vx_flags != vh2->vx_flags ||
632		    vh->vx_vni != vh2->vx_vni) {
633			NAPI_GRO_CB(p)->same_flow = 0;
634			continue;
635		}
636	}
637
638	pp = eth_gro_receive(head, skb);
639
640out:
641	skb_gro_remcsum_cleanup(skb, &grc);
642	NAPI_GRO_CB(skb)->flush |= flush;
643
644	return pp;
645}
646
647static int vxlan_gro_complete(struct sk_buff *skb, int nhoff,
648			      struct udp_offload *uoff)
649{
650	udp_tunnel_gro_complete(skb, nhoff);
651
652	return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
653}
654
655/* Notify netdevs that UDP port started listening */
656static void vxlan_notify_add_rx_port(struct vxlan_sock *vs)
657{
658	struct net_device *dev;
659	struct sock *sk = vs->sock->sk;
660	struct net *net = sock_net(sk);
661	sa_family_t sa_family = sk->sk_family;
662	__be16 port = inet_sk(sk)->inet_sport;
663	int err;
664
665	if (sa_family == AF_INET) {
666		err = udp_add_offload(&vs->udp_offloads);
667		if (err)
668			pr_warn("vxlan: udp_add_offload failed with status %d\n", err);
669	}
670
671	rcu_read_lock();
672	for_each_netdev_rcu(net, dev) {
673		if (dev->netdev_ops->ndo_add_vxlan_port)
674			dev->netdev_ops->ndo_add_vxlan_port(dev, sa_family,
675							    port);
676	}
677	rcu_read_unlock();
678}
679
680/* Notify netdevs that UDP port is no more listening */
681static void vxlan_notify_del_rx_port(struct vxlan_sock *vs)
682{
683	struct net_device *dev;
684	struct sock *sk = vs->sock->sk;
685	struct net *net = sock_net(sk);
686	sa_family_t sa_family = sk->sk_family;
687	__be16 port = inet_sk(sk)->inet_sport;
688
689	rcu_read_lock();
690	for_each_netdev_rcu(net, dev) {
691		if (dev->netdev_ops->ndo_del_vxlan_port)
692			dev->netdev_ops->ndo_del_vxlan_port(dev, sa_family,
693							    port);
694	}
695	rcu_read_unlock();
696
697	if (sa_family == AF_INET)
698		udp_del_offload(&vs->udp_offloads);
699}
700
701/* Add new entry to forwarding table -- assumes lock held */
702static int vxlan_fdb_create(struct vxlan_dev *vxlan,
703			    const u8 *mac, union vxlan_addr *ip,
704			    __u16 state, __u16 flags,
705			    __be16 port, __u32 vni, __u32 ifindex,
706			    __u8 ndm_flags)
707{
708	struct vxlan_rdst *rd = NULL;
709	struct vxlan_fdb *f;
710	int notify = 0;
711
712	f = __vxlan_find_mac(vxlan, mac);
713	if (f) {
714		if (flags & NLM_F_EXCL) {
715			netdev_dbg(vxlan->dev,
716				   "lost race to create %pM\n", mac);
717			return -EEXIST;
718		}
719		if (f->state != state) {
720			f->state = state;
721			f->updated = jiffies;
722			notify = 1;
723		}
724		if (f->flags != ndm_flags) {
725			f->flags = ndm_flags;
726			f->updated = jiffies;
727			notify = 1;
728		}
729		if ((flags & NLM_F_REPLACE)) {
730			/* Only change unicasts */
731			if (!(is_multicast_ether_addr(f->eth_addr) ||
732			     is_zero_ether_addr(f->eth_addr))) {
733				notify |= vxlan_fdb_replace(f, ip, port, vni,
734							   ifindex);
735			} else
736				return -EOPNOTSUPP;
737		}
738		if ((flags & NLM_F_APPEND) &&
739		    (is_multicast_ether_addr(f->eth_addr) ||
740		     is_zero_ether_addr(f->eth_addr))) {
741			int rc = vxlan_fdb_append(f, ip, port, vni, ifindex,
742						  &rd);
743
744			if (rc < 0)
745				return rc;
746			notify |= rc;
747		}
748	} else {
749		if (!(flags & NLM_F_CREATE))
750			return -ENOENT;
751
752		if (vxlan->addrmax && vxlan->addrcnt >= vxlan->addrmax)
753			return -ENOSPC;
754
755		/* Disallow replace to add a multicast entry */
756		if ((flags & NLM_F_REPLACE) &&
757		    (is_multicast_ether_addr(mac) || is_zero_ether_addr(mac)))
758			return -EOPNOTSUPP;
759
760		netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
761		f = kmalloc(sizeof(*f), GFP_ATOMIC);
762		if (!f)
763			return -ENOMEM;
764
765		notify = 1;
766		f->state = state;
767		f->flags = ndm_flags;
768		f->updated = f->used = jiffies;
769		INIT_LIST_HEAD(&f->remotes);
770		memcpy(f->eth_addr, mac, ETH_ALEN);
771
772		vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
773
774		++vxlan->addrcnt;
775		hlist_add_head_rcu(&f->hlist,
776				   vxlan_fdb_head(vxlan, mac));
777	}
778
779	if (notify) {
780		if (rd == NULL)
781			rd = first_remote_rtnl(f);
782		vxlan_fdb_notify(vxlan, f, rd, RTM_NEWNEIGH);
783	}
784
785	return 0;
786}
787
788static void vxlan_fdb_free(struct rcu_head *head)
789{
790	struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu);
791	struct vxlan_rdst *rd, *nd;
792
793	list_for_each_entry_safe(rd, nd, &f->remotes, list)
794		kfree(rd);
795	kfree(f);
796}
797
798static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f)
799{
800	netdev_dbg(vxlan->dev,
801		    "delete %pM\n", f->eth_addr);
802
803	--vxlan->addrcnt;
804	vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH);
805
806	hlist_del_rcu(&f->hlist);
807	call_rcu(&f->rcu, vxlan_fdb_free);
808}
809
810static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
811			   union vxlan_addr *ip, __be16 *port, u32 *vni, u32 *ifindex)
812{
813	struct net *net = dev_net(vxlan->dev);
814	int err;
815
816	if (tb[NDA_DST]) {
817		err = vxlan_nla_get_addr(ip, tb[NDA_DST]);
818		if (err)
819			return err;
820	} else {
821		union vxlan_addr *remote = &vxlan->default_dst.remote_ip;
822		if (remote->sa.sa_family == AF_INET) {
823			ip->sin.sin_addr.s_addr = htonl(INADDR_ANY);
824			ip->sa.sa_family = AF_INET;
825#if IS_ENABLED(CONFIG_IPV6)
826		} else {
827			ip->sin6.sin6_addr = in6addr_any;
828			ip->sa.sa_family = AF_INET6;
829#endif
830		}
831	}
832
833	if (tb[NDA_PORT]) {
834		if (nla_len(tb[NDA_PORT]) != sizeof(__be16))
835			return -EINVAL;
836		*port = nla_get_be16(tb[NDA_PORT]);
837	} else {
838		*port = vxlan->dst_port;
839	}
840
841	if (tb[NDA_VNI]) {
842		if (nla_len(tb[NDA_VNI]) != sizeof(u32))
843			return -EINVAL;
844		*vni = nla_get_u32(tb[NDA_VNI]);
845	} else {
846		*vni = vxlan->default_dst.remote_vni;
847	}
848
849	if (tb[NDA_IFINDEX]) {
850		struct net_device *tdev;
851
852		if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32))
853			return -EINVAL;
854		*ifindex = nla_get_u32(tb[NDA_IFINDEX]);
855		tdev = __dev_get_by_index(net, *ifindex);
856		if (!tdev)
857			return -EADDRNOTAVAIL;
858	} else {
859		*ifindex = 0;
860	}
861
862	return 0;
863}
864
865/* Add static entry (via netlink) */
866static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
867			 struct net_device *dev,
868			 const unsigned char *addr, u16 vid, u16 flags)
869{
870	struct vxlan_dev *vxlan = netdev_priv(dev);
871	/* struct net *net = dev_net(vxlan->dev); */
872	union vxlan_addr ip;
873	__be16 port;
874	u32 vni, ifindex;
875	int err;
876
877	if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) {
878		pr_info("RTM_NEWNEIGH with invalid state %#x\n",
879			ndm->ndm_state);
880		return -EINVAL;
881	}
882
883	if (tb[NDA_DST] == NULL)
884		return -EINVAL;
885
886	err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni, &ifindex);
887	if (err)
888		return err;
889
890	if (vxlan->default_dst.remote_ip.sa.sa_family != ip.sa.sa_family)
891		return -EAFNOSUPPORT;
892
893	spin_lock_bh(&vxlan->hash_lock);
894	err = vxlan_fdb_create(vxlan, addr, &ip, ndm->ndm_state, flags,
895			       port, vni, ifindex, ndm->ndm_flags);
896	spin_unlock_bh(&vxlan->hash_lock);
897
898	return err;
899}
900
901/* Delete entry (via netlink) */
902static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
903			    struct net_device *dev,
904			    const unsigned char *addr, u16 vid)
905{
906	struct vxlan_dev *vxlan = netdev_priv(dev);
907	struct vxlan_fdb *f;
908	struct vxlan_rdst *rd = NULL;
909	union vxlan_addr ip;
910	__be16 port;
911	u32 vni, ifindex;
912	int err;
913
914	err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni, &ifindex);
915	if (err)
916		return err;
917
918	err = -ENOENT;
919
920	spin_lock_bh(&vxlan->hash_lock);
921	f = vxlan_find_mac(vxlan, addr);
922	if (!f)
923		goto out;
924
925	if (!vxlan_addr_any(&ip)) {
926		rd = vxlan_fdb_find_rdst(f, &ip, port, vni, ifindex);
927		if (!rd)
928			goto out;
929	}
930
931	err = 0;
932
933	/* remove a destination if it's not the only one on the list,
934	 * otherwise destroy the fdb entry
935	 */
936	if (rd && !list_is_singular(&f->remotes)) {
937		list_del_rcu(&rd->list);
938		vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH);
939		kfree_rcu(rd, rcu);
940		goto out;
941	}
942
943	vxlan_fdb_destroy(vxlan, f);
944
945out:
946	spin_unlock_bh(&vxlan->hash_lock);
947
948	return err;
949}
950
951/* Dump forwarding table */
952static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
953			  struct net_device *dev,
954			  struct net_device *filter_dev, int idx)
955{
956	struct vxlan_dev *vxlan = netdev_priv(dev);
957	unsigned int h;
958
959	for (h = 0; h < FDB_HASH_SIZE; ++h) {
960		struct vxlan_fdb *f;
961		int err;
962
963		hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) {
964			struct vxlan_rdst *rd;
965
966			if (idx < cb->args[0])
967				goto skip;
968
969			list_for_each_entry_rcu(rd, &f->remotes, list) {
970				err = vxlan_fdb_info(skb, vxlan, f,
971						     NETLINK_CB(cb->skb).portid,
972						     cb->nlh->nlmsg_seq,
973						     RTM_NEWNEIGH,
974						     NLM_F_MULTI, rd);
975				if (err < 0)
976					goto out;
977			}
978skip:
979			++idx;
980		}
981	}
982out:
983	return idx;
984}
985
986/* Watch incoming packets to learn mapping between Ethernet address
987 * and Tunnel endpoint.
988 * Return true if packet is bogus and should be dropped.
989 */
990static bool vxlan_snoop(struct net_device *dev,
991			union vxlan_addr *src_ip, const u8 *src_mac)
992{
993	struct vxlan_dev *vxlan = netdev_priv(dev);
994	struct vxlan_fdb *f;
995
996	f = vxlan_find_mac(vxlan, src_mac);
997	if (likely(f)) {
998		struct vxlan_rdst *rdst = first_remote_rcu(f);
999
1000		if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip)))
1001			return false;
1002
1003		/* Don't migrate static entries, drop packets */
1004		if (f->state & NUD_NOARP)
1005			return true;
1006
1007		if (net_ratelimit())
1008			netdev_info(dev,
1009				    "%pM migrated from %pIS to %pIS\n",
1010				    src_mac, &rdst->remote_ip.sa, &src_ip->sa);
1011
1012		rdst->remote_ip = *src_ip;
1013		f->updated = jiffies;
1014		vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH);
1015	} else {
1016		/* learned new entry */
1017		spin_lock(&vxlan->hash_lock);
1018
1019		/* close off race between vxlan_flush and incoming packets */
1020		if (netif_running(dev))
1021			vxlan_fdb_create(vxlan, src_mac, src_ip,
1022					 NUD_REACHABLE,
1023					 NLM_F_EXCL|NLM_F_CREATE,
1024					 vxlan->dst_port,
1025					 vxlan->default_dst.remote_vni,
1026					 0, NTF_SELF);
1027		spin_unlock(&vxlan->hash_lock);
1028	}
1029
1030	return false;
1031}
1032
1033/* See if multicast group is already in use by other ID */
1034static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)
1035{
1036	struct vxlan_dev *vxlan;
1037
1038	/* The vxlan_sock is only used by dev, leaving group has
1039	 * no effect on other vxlan devices.
1040	 */
1041	if (atomic_read(&dev->vn_sock->refcnt) == 1)
1042		return false;
1043
1044	list_for_each_entry(vxlan, &vn->vxlan_list, next) {
1045		if (!netif_running(vxlan->dev) || vxlan == dev)
1046			continue;
1047
1048		if (vxlan->vn_sock != dev->vn_sock)
1049			continue;
1050
1051		if (!vxlan_addr_equal(&vxlan->default_dst.remote_ip,
1052				      &dev->default_dst.remote_ip))
1053			continue;
1054
1055		if (vxlan->default_dst.remote_ifindex !=
1056		    dev->default_dst.remote_ifindex)
1057			continue;
1058
1059		return true;
1060	}
1061
1062	return false;
1063}
1064
1065void vxlan_sock_release(struct vxlan_sock *vs)
1066{
1067	struct sock *sk = vs->sock->sk;
1068	struct net *net = sock_net(sk);
1069	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
1070
1071	if (!atomic_dec_and_test(&vs->refcnt))
1072		return;
1073
1074	spin_lock(&vn->sock_lock);
1075	hlist_del_rcu(&vs->hlist);
1076	vxlan_notify_del_rx_port(vs);
1077	spin_unlock(&vn->sock_lock);
1078
1079	queue_work(vxlan_wq, &vs->del_work);
1080}
1081EXPORT_SYMBOL_GPL(vxlan_sock_release);
1082
1083/* Update multicast group membership when first VNI on
1084 * multicast address is brought up
1085 */
1086static int vxlan_igmp_join(struct vxlan_dev *vxlan)
1087{
1088	struct vxlan_sock *vs = vxlan->vn_sock;
1089	struct sock *sk = vs->sock->sk;
1090	union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
1091	int ifindex = vxlan->default_dst.remote_ifindex;
1092	int ret = -EINVAL;
1093
1094	lock_sock(sk);
1095	if (ip->sa.sa_family == AF_INET) {
1096		struct ip_mreqn mreq = {
1097			.imr_multiaddr.s_addr	= ip->sin.sin_addr.s_addr,
1098			.imr_ifindex		= ifindex,
1099		};
1100
1101		ret = ip_mc_join_group(sk, &mreq);
1102#if IS_ENABLED(CONFIG_IPV6)
1103	} else {
1104		ret = ipv6_stub->ipv6_sock_mc_join(sk, ifindex,
1105						   &ip->sin6.sin6_addr);
1106#endif
1107	}
1108	release_sock(sk);
1109
1110	return ret;
1111}
1112
1113/* Inverse of vxlan_igmp_join when last VNI is brought down */
1114static int vxlan_igmp_leave(struct vxlan_dev *vxlan)
1115{
1116	struct vxlan_sock *vs = vxlan->vn_sock;
1117	struct sock *sk = vs->sock->sk;
1118	union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
1119	int ifindex = vxlan->default_dst.remote_ifindex;
1120	int ret = -EINVAL;
1121
1122	lock_sock(sk);
1123	if (ip->sa.sa_family == AF_INET) {
1124		struct ip_mreqn mreq = {
1125			.imr_multiaddr.s_addr	= ip->sin.sin_addr.s_addr,
1126			.imr_ifindex		= ifindex,
1127		};
1128
1129		ret = ip_mc_leave_group(sk, &mreq);
1130#if IS_ENABLED(CONFIG_IPV6)
1131	} else {
1132		ret = ipv6_stub->ipv6_sock_mc_drop(sk, ifindex,
1133						   &ip->sin6.sin6_addr);
1134#endif
1135	}
1136	release_sock(sk);
1137
1138	return ret;
1139}
1140
1141static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh,
1142				      size_t hdrlen, u32 data, bool nopartial)
1143{
1144	size_t start, offset, plen;
1145
1146	start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT;
1147	offset = start + ((data & VXLAN_RCO_UDP) ?
1148			  offsetof(struct udphdr, check) :
1149			  offsetof(struct tcphdr, check));
1150
1151	plen = hdrlen + offset + sizeof(u16);
1152
1153	if (!pskb_may_pull(skb, plen))
1154		return NULL;
1155
1156	vh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
1157
1158	skb_remcsum_process(skb, (void *)vh + hdrlen, start, offset,
1159			    nopartial);
1160
1161	return vh;
1162}
1163
1164/* Callback from net/ipv4/udp.c to receive packets */
1165static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
1166{
1167	struct vxlan_sock *vs;
1168	struct vxlanhdr *vxh;
1169	u32 flags, vni;
1170	struct vxlan_metadata md = {0};
1171
1172	/* Need Vxlan and inner Ethernet header to be present */
1173	if (!pskb_may_pull(skb, VXLAN_HLEN))
1174		goto error;
1175
1176	vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
1177	flags = ntohl(vxh->vx_flags);
1178	vni = ntohl(vxh->vx_vni);
1179
1180	if (flags & VXLAN_HF_VNI) {
1181		flags &= ~VXLAN_HF_VNI;
1182	} else {
1183		/* VNI flag always required to be set */
1184		goto bad_flags;
1185	}
1186
1187	if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB)))
1188		goto drop;
1189	vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
1190
1191	vs = rcu_dereference_sk_user_data(sk);
1192	if (!vs)
1193		goto drop;
1194
1195	if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
1196		vxh = vxlan_remcsum(skb, vxh, sizeof(struct vxlanhdr), vni,
1197				    !!(vs->flags & VXLAN_F_REMCSUM_NOPARTIAL));
1198		if (!vxh)
1199			goto drop;
1200
1201		flags &= ~VXLAN_HF_RCO;
1202		vni &= VXLAN_VNI_MASK;
1203	}
1204
1205	/* For backwards compatibility, only allow reserved fields to be
1206	 * used by VXLAN extensions if explicitly requested.
1207	 */
1208	if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) {
1209		struct vxlanhdr_gbp *gbp;
1210
1211		gbp = (struct vxlanhdr_gbp *)vxh;
1212		md.gbp = ntohs(gbp->policy_id);
1213
1214		if (gbp->dont_learn)
1215			md.gbp |= VXLAN_GBP_DONT_LEARN;
1216
1217		if (gbp->policy_applied)
1218			md.gbp |= VXLAN_GBP_POLICY_APPLIED;
1219
1220		flags &= ~VXLAN_GBP_USED_BITS;
1221	}
1222
1223	if (flags || vni & ~VXLAN_VNI_MASK) {
1224		/* If there are any unprocessed flags remaining treat
1225		 * this as a malformed packet. This behavior diverges from
1226		 * VXLAN RFC (RFC7348) which stipulates that bits in reserved
1227		 * in reserved fields are to be ignored. The approach here
1228		 * maintains compatibility with previous stack code, and also
1229		 * is more robust and provides a little more security in
1230		 * adding extensions to VXLAN.
1231		 */
1232
1233		goto bad_flags;
1234	}
1235
1236	md.vni = vxh->vx_vni;
1237	vs->rcv(vs, skb, &md);
1238	return 0;
1239
1240drop:
1241	/* Consume bad packet */
1242	kfree_skb(skb);
1243	return 0;
1244
1245bad_flags:
1246	netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n",
1247		   ntohl(vxh->vx_flags), ntohl(vxh->vx_vni));
1248
1249error:
1250	/* Return non vxlan pkt */
1251	return 1;
1252}
1253
1254static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
1255		      struct vxlan_metadata *md)
1256{
1257	struct iphdr *oip = NULL;
1258	struct ipv6hdr *oip6 = NULL;
1259	struct vxlan_dev *vxlan;
1260	struct pcpu_sw_netstats *stats;
1261	union vxlan_addr saddr;
1262	__u32 vni;
1263	int err = 0;
1264	union vxlan_addr *remote_ip;
1265
1266	vni = ntohl(md->vni) >> 8;
1267	/* Is this VNI defined? */
1268	vxlan = vxlan_vs_find_vni(vs, vni);
1269	if (!vxlan)
1270		goto drop;
1271
1272	remote_ip = &vxlan->default_dst.remote_ip;
1273	skb_reset_mac_header(skb);
1274	skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev)));
1275	skb->protocol = eth_type_trans(skb, vxlan->dev);
1276	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
1277
1278	/* Ignore packet loops (and multicast echo) */
1279	if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr))
1280		goto drop;
1281
1282	/* Re-examine inner Ethernet packet */
1283	if (remote_ip->sa.sa_family == AF_INET) {
1284		oip = ip_hdr(skb);
1285		saddr.sin.sin_addr.s_addr = oip->saddr;
1286		saddr.sa.sa_family = AF_INET;
1287#if IS_ENABLED(CONFIG_IPV6)
1288	} else {
1289		oip6 = ipv6_hdr(skb);
1290		saddr.sin6.sin6_addr = oip6->saddr;
1291		saddr.sa.sa_family = AF_INET6;
1292#endif
1293	}
1294
1295	if ((vxlan->flags & VXLAN_F_LEARN) &&
1296	    vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source))
1297		goto drop;
1298
1299	skb_reset_network_header(skb);
1300	skb->mark = md->gbp;
1301
1302	if (oip6)
1303		err = IP6_ECN_decapsulate(oip6, skb);
1304	if (oip)
1305		err = IP_ECN_decapsulate(oip, skb);
1306
1307	if (unlikely(err)) {
1308		if (log_ecn_error) {
1309			if (oip6)
1310				net_info_ratelimited("non-ECT from %pI6\n",
1311						     &oip6->saddr);
1312			if (oip)
1313				net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
1314						     &oip->saddr, oip->tos);
1315		}
1316		if (err > 1) {
1317			++vxlan->dev->stats.rx_frame_errors;
1318			++vxlan->dev->stats.rx_errors;
1319			goto drop;
1320		}
1321	}
1322
1323	stats = this_cpu_ptr(vxlan->dev->tstats);
1324	u64_stats_update_begin(&stats->syncp);
1325	stats->rx_packets++;
1326	stats->rx_bytes += skb->len;
1327	u64_stats_update_end(&stats->syncp);
1328
1329	netif_rx(skb);
1330
1331	return;
1332drop:
1333	/* Consume bad packet */
1334	kfree_skb(skb);
1335}
1336
1337static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
1338{
1339	struct vxlan_dev *vxlan = netdev_priv(dev);
1340	struct arphdr *parp;
1341	u8 *arpptr, *sha;
1342	__be32 sip, tip;
1343	struct neighbour *n;
1344
1345	if (dev->flags & IFF_NOARP)
1346		goto out;
1347
1348	if (!pskb_may_pull(skb, arp_hdr_len(dev))) {
1349		dev->stats.tx_dropped++;
1350		goto out;
1351	}
1352	parp = arp_hdr(skb);
1353
1354	if ((parp->ar_hrd != htons(ARPHRD_ETHER) &&
1355	     parp->ar_hrd != htons(ARPHRD_IEEE802)) ||
1356	    parp->ar_pro != htons(ETH_P_IP) ||
1357	    parp->ar_op != htons(ARPOP_REQUEST) ||
1358	    parp->ar_hln != dev->addr_len ||
1359	    parp->ar_pln != 4)
1360		goto out;
1361	arpptr = (u8 *)parp + sizeof(struct arphdr);
1362	sha = arpptr;
1363	arpptr += dev->addr_len;	/* sha */
1364	memcpy(&sip, arpptr, sizeof(sip));
1365	arpptr += sizeof(sip);
1366	arpptr += dev->addr_len;	/* tha */
1367	memcpy(&tip, arpptr, sizeof(tip));
1368
1369	if (ipv4_is_loopback(tip) ||
1370	    ipv4_is_multicast(tip))
1371		goto out;
1372
1373	n = neigh_lookup(&arp_tbl, &tip, dev);
1374
1375	if (n) {
1376		struct vxlan_fdb *f;
1377		struct sk_buff	*reply;
1378
1379		if (!(n->nud_state & NUD_CONNECTED)) {
1380			neigh_release(n);
1381			goto out;
1382		}
1383
1384		f = vxlan_find_mac(vxlan, n->ha);
1385		if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
1386			/* bridge-local neighbor */
1387			neigh_release(n);
1388			goto out;
1389		}
1390
1391		reply = arp_create(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
1392				n->ha, sha);
1393
1394		neigh_release(n);
1395
1396		if (reply == NULL)
1397			goto out;
1398
1399		skb_reset_mac_header(reply);
1400		__skb_pull(reply, skb_network_offset(reply));
1401		reply->ip_summed = CHECKSUM_UNNECESSARY;
1402		reply->pkt_type = PACKET_HOST;
1403
1404		if (netif_rx_ni(reply) == NET_RX_DROP)
1405			dev->stats.rx_dropped++;
1406	} else if (vxlan->flags & VXLAN_F_L3MISS) {
1407		union vxlan_addr ipa = {
1408			.sin.sin_addr.s_addr = tip,
1409			.sin.sin_family = AF_INET,
1410		};
1411
1412		vxlan_ip_miss(dev, &ipa);
1413	}
1414out:
1415	consume_skb(skb);
1416	return NETDEV_TX_OK;
1417}
1418
1419#if IS_ENABLED(CONFIG_IPV6)
1420static struct sk_buff *vxlan_na_create(struct sk_buff *request,
1421	struct neighbour *n, bool isrouter)
1422{
1423	struct net_device *dev = request->dev;
1424	struct sk_buff *reply;
1425	struct nd_msg *ns, *na;
1426	struct ipv6hdr *pip6;
1427	u8 *daddr;
1428	int na_olen = 8; /* opt hdr + ETH_ALEN for target */
1429	int ns_olen;
1430	int i, len;
1431
1432	if (dev == NULL)
1433		return NULL;
1434
1435	len = LL_RESERVED_SPACE(dev) + sizeof(struct ipv6hdr) +
1436		sizeof(*na) + na_olen + dev->needed_tailroom;
1437	reply = alloc_skb(len, GFP_ATOMIC);
1438	if (reply == NULL)
1439		return NULL;
1440
1441	reply->protocol = htons(ETH_P_IPV6);
1442	reply->dev = dev;
1443	skb_reserve(reply, LL_RESERVED_SPACE(request->dev));
1444	skb_push(reply, sizeof(struct ethhdr));
1445	skb_set_mac_header(reply, 0);
1446
1447	ns = (struct nd_msg *)skb_transport_header(request);
1448
1449	daddr = eth_hdr(request)->h_source;
1450	ns_olen = request->len - skb_transport_offset(request) - sizeof(*ns);
1451	for (i = 0; i < ns_olen-1; i += (ns->opt[i+1]<<3)) {
1452		if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) {
1453			daddr = ns->opt + i + sizeof(struct nd_opt_hdr);
1454			break;
1455		}
1456	}
1457
1458	/* Ethernet header */
1459	ether_addr_copy(eth_hdr(reply)->h_dest, daddr);
1460	ether_addr_copy(eth_hdr(reply)->h_source, n->ha);
1461	eth_hdr(reply)->h_proto = htons(ETH_P_IPV6);
1462	reply->protocol = htons(ETH_P_IPV6);
1463
1464	skb_pull(reply, sizeof(struct ethhdr));
1465	skb_set_network_header(reply, 0);
1466	skb_put(reply, sizeof(struct ipv6hdr));
1467
1468	/* IPv6 header */
1469
1470	pip6 = ipv6_hdr(reply);
1471	memset(pip6, 0, sizeof(struct ipv6hdr));
1472	pip6->version = 6;
1473	pip6->priority = ipv6_hdr(request)->priority;
1474	pip6->nexthdr = IPPROTO_ICMPV6;
1475	pip6->hop_limit = 255;
1476	pip6->daddr = ipv6_hdr(request)->saddr;
1477	pip6->saddr = *(struct in6_addr *)n->primary_key;
1478
1479	skb_pull(reply, sizeof(struct ipv6hdr));
1480	skb_set_transport_header(reply, 0);
1481
1482	na = (struct nd_msg *)skb_put(reply, sizeof(*na) + na_olen);
1483
1484	/* Neighbor Advertisement */
1485	memset(na, 0, sizeof(*na)+na_olen);
1486	na->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
1487	na->icmph.icmp6_router = isrouter;
1488	na->icmph.icmp6_override = 1;
1489	na->icmph.icmp6_solicited = 1;
1490	na->target = ns->target;
1491	ether_addr_copy(&na->opt[2], n->ha);
1492	na->opt[0] = ND_OPT_TARGET_LL_ADDR;
1493	na->opt[1] = na_olen >> 3;
1494
1495	na->icmph.icmp6_cksum = csum_ipv6_magic(&pip6->saddr,
1496		&pip6->daddr, sizeof(*na)+na_olen, IPPROTO_ICMPV6,
1497		csum_partial(na, sizeof(*na)+na_olen, 0));
1498
1499	pip6->payload_len = htons(sizeof(*na)+na_olen);
1500
1501	skb_push(reply, sizeof(struct ipv6hdr));
1502
1503	reply->ip_summed = CHECKSUM_UNNECESSARY;
1504
1505	return reply;
1506}
1507
1508static int neigh_reduce(struct net_device *dev, struct sk_buff *skb)
1509{
1510	struct vxlan_dev *vxlan = netdev_priv(dev);
1511	struct nd_msg *msg;
1512	const struct ipv6hdr *iphdr;
1513	const struct in6_addr *saddr, *daddr;
1514	struct neighbour *n;
1515	struct inet6_dev *in6_dev;
1516
1517	in6_dev = __in6_dev_get(dev);
1518	if (!in6_dev)
1519		goto out;
1520
1521	iphdr = ipv6_hdr(skb);
1522	saddr = &iphdr->saddr;
1523	daddr = &iphdr->daddr;
1524
1525	msg = (struct nd_msg *)skb_transport_header(skb);
1526	if (msg->icmph.icmp6_code != 0 ||
1527	    msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
1528		goto out;
1529
1530	if (ipv6_addr_loopback(daddr) ||
1531	    ipv6_addr_is_multicast(&msg->target))
1532		goto out;
1533
1534	n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, dev);
1535
1536	if (n) {
1537		struct vxlan_fdb *f;
1538		struct sk_buff *reply;
1539
1540		if (!(n->nud_state & NUD_CONNECTED)) {
1541			neigh_release(n);
1542			goto out;
1543		}
1544
1545		f = vxlan_find_mac(vxlan, n->ha);
1546		if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
1547			/* bridge-local neighbor */
1548			neigh_release(n);
1549			goto out;
1550		}
1551
1552		reply = vxlan_na_create(skb, n,
1553					!!(f ? f->flags & NTF_ROUTER : 0));
1554
1555		neigh_release(n);
1556
1557		if (reply == NULL)
1558			goto out;
1559
1560		if (netif_rx_ni(reply) == NET_RX_DROP)
1561			dev->stats.rx_dropped++;
1562
1563	} else if (vxlan->flags & VXLAN_F_L3MISS) {
1564		union vxlan_addr ipa = {
1565			.sin6.sin6_addr = msg->target,
1566			.sin6.sin6_family = AF_INET6,
1567		};
1568
1569		vxlan_ip_miss(dev, &ipa);
1570	}
1571
1572out:
1573	consume_skb(skb);
1574	return NETDEV_TX_OK;
1575}
1576#endif
1577
1578static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
1579{
1580	struct vxlan_dev *vxlan = netdev_priv(dev);
1581	struct neighbour *n;
1582
1583	if (is_multicast_ether_addr(eth_hdr(skb)->h_dest))
1584		return false;
1585
1586	n = NULL;
1587	switch (ntohs(eth_hdr(skb)->h_proto)) {
1588	case ETH_P_IP:
1589	{
1590		struct iphdr *pip;
1591
1592		if (!pskb_may_pull(skb, sizeof(struct iphdr)))
1593			return false;
1594		pip = ip_hdr(skb);
1595		n = neigh_lookup(&arp_tbl, &pip->daddr, dev);
1596		if (!n && (vxlan->flags & VXLAN_F_L3MISS)) {
1597			union vxlan_addr ipa = {
1598				.sin.sin_addr.s_addr = pip->daddr,
1599				.sin.sin_family = AF_INET,
1600			};
1601
1602			vxlan_ip_miss(dev, &ipa);
1603			return false;
1604		}
1605
1606		break;
1607	}
1608#if IS_ENABLED(CONFIG_IPV6)
1609	case ETH_P_IPV6:
1610	{
1611		struct ipv6hdr *pip6;
1612
1613		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
1614			return false;
1615		pip6 = ipv6_hdr(skb);
1616		n = neigh_lookup(ipv6_stub->nd_tbl, &pip6->daddr, dev);
1617		if (!n && (vxlan->flags & VXLAN_F_L3MISS)) {
1618			union vxlan_addr ipa = {
1619				.sin6.sin6_addr = pip6->daddr,
1620				.sin6.sin6_family = AF_INET6,
1621			};
1622
1623			vxlan_ip_miss(dev, &ipa);
1624			return false;
1625		}
1626
1627		break;
1628	}
1629#endif
1630	default:
1631		return false;
1632	}
1633
1634	if (n) {
1635		bool diff;
1636
1637		diff = !ether_addr_equal(eth_hdr(skb)->h_dest, n->ha);
1638		if (diff) {
1639			memcpy(eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
1640				dev->addr_len);
1641			memcpy(eth_hdr(skb)->h_dest, n->ha, dev->addr_len);
1642		}
1643		neigh_release(n);
1644		return diff;
1645	}
1646
1647	return false;
1648}
1649
1650static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags,
1651				struct vxlan_metadata *md)
1652{
1653	struct vxlanhdr_gbp *gbp;
1654
1655	if (!md->gbp)
1656		return;
1657
1658	gbp = (struct vxlanhdr_gbp *)vxh;
1659	vxh->vx_flags |= htonl(VXLAN_HF_GBP);
1660
1661	if (md->gbp & VXLAN_GBP_DONT_LEARN)
1662		gbp->dont_learn = 1;
1663
1664	if (md->gbp & VXLAN_GBP_POLICY_APPLIED)
1665		gbp->policy_applied = 1;
1666
1667	gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
1668}
1669
1670#if IS_ENABLED(CONFIG_IPV6)
1671static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk,
1672			   struct sk_buff *skb,
1673			   struct net_device *dev, struct in6_addr *saddr,
1674			   struct in6_addr *daddr, __u8 prio, __u8 ttl,
1675			   __be16 src_port, __be16 dst_port,
1676			   struct vxlan_metadata *md, bool xnet, u32 vxflags)
1677{
1678	struct vxlanhdr *vxh;
1679	int min_headroom;
1680	int err;
1681	bool udp_sum = !(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX);
1682	int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
1683	u16 hdrlen = sizeof(struct vxlanhdr);
1684
1685	if ((vxflags & VXLAN_F_REMCSUM_TX) &&
1686	    skb->ip_summed == CHECKSUM_PARTIAL) {
1687		int csum_start = skb_checksum_start_offset(skb);
1688
1689		if (csum_start <= VXLAN_MAX_REMCSUM_START &&
1690		    !(csum_start & VXLAN_RCO_SHIFT_MASK) &&
1691		    (skb->csum_offset == offsetof(struct udphdr, check) ||
1692		     skb->csum_offset == offsetof(struct tcphdr, check))) {
1693			udp_sum = false;
1694			type |= SKB_GSO_TUNNEL_REMCSUM;
1695		}
1696	}
1697
1698	skb_scrub_packet(skb, xnet);
1699
1700	min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
1701			+ VXLAN_HLEN + sizeof(struct ipv6hdr)
1702			+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
1703
1704	/* Need space for new headers (invalidates iph ptr) */
1705	err = skb_cow_head(skb, min_headroom);
1706	if (unlikely(err)) {
1707		kfree_skb(skb);
1708		goto err;
1709	}
1710
1711	skb = vlan_hwaccel_push_inside(skb);
1712	if (WARN_ON(!skb)) {
1713		err = -ENOMEM;
1714		goto err;
1715	}
1716
1717	skb = iptunnel_handle_offloads(skb, udp_sum, type);
1718	if (IS_ERR(skb)) {
1719		err = -EINVAL;
1720		goto err;
1721	}
1722
1723	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
1724	vxh->vx_flags = htonl(VXLAN_HF_VNI);
1725	vxh->vx_vni = md->vni;
1726
1727	if (type & SKB_GSO_TUNNEL_REMCSUM) {
1728		u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
1729			   VXLAN_RCO_SHIFT;
1730
1731		if (skb->csum_offset == offsetof(struct udphdr, check))
1732			data |= VXLAN_RCO_UDP;
1733
1734		vxh->vx_vni |= htonl(data);
1735		vxh->vx_flags |= htonl(VXLAN_HF_RCO);
1736
1737		if (!skb_is_gso(skb)) {
1738			skb->ip_summed = CHECKSUM_NONE;
1739			skb->encapsulation = 0;
1740		}
1741	}
1742
1743	if (vxflags & VXLAN_F_GBP)
1744		vxlan_build_gbp_hdr(vxh, vxflags, md);
1745
1746	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
1747
1748	udp_tunnel6_xmit_skb(dst, sk, skb, dev, saddr, daddr, prio,
1749			     ttl, src_port, dst_port,
1750			     !!(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX));
1751	return 0;
1752err:
1753	dst_release(dst);
1754	return err;
1755}
1756#endif
1757
1758int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
1759		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
1760		   __be16 src_port, __be16 dst_port,
1761		   struct vxlan_metadata *md, bool xnet, u32 vxflags)
1762{
1763	struct vxlanhdr *vxh;
1764	int min_headroom;
1765	int err;
1766	bool udp_sum = !!(vxflags & VXLAN_F_UDP_CSUM);
1767	int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
1768	u16 hdrlen = sizeof(struct vxlanhdr);
1769
1770	if ((vxflags & VXLAN_F_REMCSUM_TX) &&
1771	    skb->ip_summed == CHECKSUM_PARTIAL) {
1772		int csum_start = skb_checksum_start_offset(skb);
1773
1774		if (csum_start <= VXLAN_MAX_REMCSUM_START &&
1775		    !(csum_start & VXLAN_RCO_SHIFT_MASK) &&
1776		    (skb->csum_offset == offsetof(struct udphdr, check) ||
1777		     skb->csum_offset == offsetof(struct tcphdr, check))) {
1778			udp_sum = false;
1779			type |= SKB_GSO_TUNNEL_REMCSUM;
1780		}
1781	}
1782
1783	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
1784			+ VXLAN_HLEN + sizeof(struct iphdr)
1785			+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
1786
1787	/* Need space for new headers (invalidates iph ptr) */
1788	err = skb_cow_head(skb, min_headroom);
1789	if (unlikely(err)) {
1790		kfree_skb(skb);
1791		return err;
1792	}
1793
1794	skb = vlan_hwaccel_push_inside(skb);
1795	if (WARN_ON(!skb))
1796		return -ENOMEM;
1797
1798	skb = iptunnel_handle_offloads(skb, udp_sum, type);
1799	if (IS_ERR(skb))
1800		return PTR_ERR(skb);
1801
1802	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
1803	vxh->vx_flags = htonl(VXLAN_HF_VNI);
1804	vxh->vx_vni = md->vni;
1805
1806	if (type & SKB_GSO_TUNNEL_REMCSUM) {
1807		u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
1808			   VXLAN_RCO_SHIFT;
1809
1810		if (skb->csum_offset == offsetof(struct udphdr, check))
1811			data |= VXLAN_RCO_UDP;
1812
1813		vxh->vx_vni |= htonl(data);
1814		vxh->vx_flags |= htonl(VXLAN_HF_RCO);
1815
1816		if (!skb_is_gso(skb)) {
1817			skb->ip_summed = CHECKSUM_NONE;
1818			skb->encapsulation = 0;
1819		}
1820	}
1821
1822	if (vxflags & VXLAN_F_GBP)
1823		vxlan_build_gbp_hdr(vxh, vxflags, md);
1824
1825	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
1826
1827	return udp_tunnel_xmit_skb(rt, sk, skb, src, dst, tos,
1828				   ttl, df, src_port, dst_port, xnet,
1829				   !(vxflags & VXLAN_F_UDP_CSUM));
1830}
1831EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
1832
1833/* Bypass encapsulation if the destination is local */
1834static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
1835			       struct vxlan_dev *dst_vxlan)
1836{
1837	struct pcpu_sw_netstats *tx_stats, *rx_stats;
1838	union vxlan_addr loopback;
1839	union vxlan_addr *remote_ip = &dst_vxlan->default_dst.remote_ip;
1840	struct net_device *dev = skb->dev;
1841	int len = skb->len;
1842
1843	tx_stats = this_cpu_ptr(src_vxlan->dev->tstats);
1844	rx_stats = this_cpu_ptr(dst_vxlan->dev->tstats);
1845	skb->pkt_type = PACKET_HOST;
1846	skb->encapsulation = 0;
1847	skb->dev = dst_vxlan->dev;
1848	__skb_pull(skb, skb_network_offset(skb));
1849
1850	if (remote_ip->sa.sa_family == AF_INET) {
1851		loopback.sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
1852		loopback.sa.sa_family =  AF_INET;
1853#if IS_ENABLED(CONFIG_IPV6)
1854	} else {
1855		loopback.sin6.sin6_addr = in6addr_loopback;
1856		loopback.sa.sa_family =  AF_INET6;
1857#endif
1858	}
1859
1860	if (dst_vxlan->flags & VXLAN_F_LEARN)
1861		vxlan_snoop(skb->dev, &loopback, eth_hdr(skb)->h_source);
1862
1863	u64_stats_update_begin(&tx_stats->syncp);
1864	tx_stats->tx_packets++;
1865	tx_stats->tx_bytes += len;
1866	u64_stats_update_end(&tx_stats->syncp);
1867
1868	if (netif_rx(skb) == NET_RX_SUCCESS) {
1869		u64_stats_update_begin(&rx_stats->syncp);
1870		rx_stats->rx_packets++;
1871		rx_stats->rx_bytes += len;
1872		u64_stats_update_end(&rx_stats->syncp);
1873	} else {
1874		dev->stats.rx_dropped++;
1875	}
1876}
1877
1878static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1879			   struct vxlan_rdst *rdst, bool did_rsc)
1880{
1881	struct vxlan_dev *vxlan = netdev_priv(dev);
1882	struct sock *sk = vxlan->vn_sock->sock->sk;
1883	struct rtable *rt = NULL;
1884	const struct iphdr *old_iph;
1885	struct flowi4 fl4;
1886	union vxlan_addr *dst;
1887	struct vxlan_metadata md;
1888	__be16 src_port = 0, dst_port;
1889	u32 vni;
1890	__be16 df = 0;
1891	__u8 tos, ttl;
1892	int err;
1893
1894	dst_port = rdst->remote_port ? rdst->remote_port : vxlan->dst_port;
1895	vni = rdst->remote_vni;
1896	dst = &rdst->remote_ip;
1897
1898	if (vxlan_addr_any(dst)) {
1899		if (did_rsc) {
1900			/* short-circuited back to local bridge */
1901			vxlan_encap_bypass(skb, vxlan, vxlan);
1902			return;
1903		}
1904		goto drop;
1905	}
1906
1907	old_iph = ip_hdr(skb);
1908
1909	ttl = vxlan->ttl;
1910	if (!ttl && vxlan_addr_multicast(dst))
1911		ttl = 1;
1912
1913	tos = vxlan->tos;
1914	if (tos == 1)
1915		tos = ip_tunnel_get_dsfield(old_iph, skb);
1916
1917	src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->port_min,
1918				     vxlan->port_max, true);
1919
1920	if (dst->sa.sa_family == AF_INET) {
1921		memset(&fl4, 0, sizeof(fl4));
1922		fl4.flowi4_oif = rdst->remote_ifindex;
1923		fl4.flowi4_tos = RT_TOS(tos);
1924		fl4.daddr = dst->sin.sin_addr.s_addr;
1925		fl4.saddr = vxlan->saddr.sin.sin_addr.s_addr;
1926
1927		rt = ip_route_output_key(vxlan->net, &fl4);
1928		if (IS_ERR(rt)) {
1929			netdev_dbg(dev, "no route to %pI4\n",
1930				   &dst->sin.sin_addr.s_addr);
1931			dev->stats.tx_carrier_errors++;
1932			goto tx_error;
1933		}
1934
1935		if (rt->dst.dev == dev) {
1936			netdev_dbg(dev, "circular route to %pI4\n",
1937				   &dst->sin.sin_addr.s_addr);
1938			dev->stats.collisions++;
1939			goto rt_tx_error;
1940		}
1941
1942		/* Bypass encapsulation if the destination is local */
1943		if (rt->rt_flags & RTCF_LOCAL &&
1944		    !(rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
1945			struct vxlan_dev *dst_vxlan;
1946
1947			ip_rt_put(rt);
1948			dst_vxlan = vxlan_find_vni(vxlan->net, vni,
1949						   dst->sa.sa_family, dst_port,
1950						   vxlan->flags);
1951			if (!dst_vxlan)
1952				goto tx_error;
1953			vxlan_encap_bypass(skb, vxlan, dst_vxlan);
1954			return;
1955		}
1956
1957		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
1958		ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
1959		md.vni = htonl(vni << 8);
1960		md.gbp = skb->mark;
1961
1962		err = vxlan_xmit_skb(rt, sk, skb, fl4.saddr,
1963				     dst->sin.sin_addr.s_addr, tos, ttl, df,
1964				     src_port, dst_port, &md,
1965				     !net_eq(vxlan->net, dev_net(vxlan->dev)),
1966				     vxlan->flags);
1967		if (err < 0) {
1968			/* skb is already freed. */
1969			skb = NULL;
1970			goto rt_tx_error;
1971		}
1972
1973		iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
1974#if IS_ENABLED(CONFIG_IPV6)
1975	} else {
1976		struct dst_entry *ndst;
1977		struct flowi6 fl6;
1978		u32 flags;
1979
1980		memset(&fl6, 0, sizeof(fl6));
1981		fl6.flowi6_oif = rdst->remote_ifindex;
1982		fl6.daddr = dst->sin6.sin6_addr;
1983		fl6.saddr = vxlan->saddr.sin6.sin6_addr;
1984		fl6.flowi6_proto = IPPROTO_UDP;
1985
1986		if (ipv6_stub->ipv6_dst_lookup(sk, &ndst, &fl6)) {
1987			netdev_dbg(dev, "no route to %pI6\n",
1988				   &dst->sin6.sin6_addr);
1989			dev->stats.tx_carrier_errors++;
1990			goto tx_error;
1991		}
1992
1993		if (ndst->dev == dev) {
1994			netdev_dbg(dev, "circular route to %pI6\n",
1995				   &dst->sin6.sin6_addr);
1996			dst_release(ndst);
1997			dev->stats.collisions++;
1998			goto tx_error;
1999		}
2000
2001		/* Bypass encapsulation if the destination is local */
2002		flags = ((struct rt6_info *)ndst)->rt6i_flags;
2003		if (flags & RTF_LOCAL &&
2004		    !(flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
2005			struct vxlan_dev *dst_vxlan;
2006
2007			dst_release(ndst);
2008			dst_vxlan = vxlan_find_vni(vxlan->net, vni,
2009						   dst->sa.sa_family, dst_port,
2010						   vxlan->flags);
2011			if (!dst_vxlan)
2012				goto tx_error;
2013			vxlan_encap_bypass(skb, vxlan, dst_vxlan);
2014			return;
2015		}
2016
2017		ttl = ttl ? : ip6_dst_hoplimit(ndst);
2018		md.vni = htonl(vni << 8);
2019		md.gbp = skb->mark;
2020
2021		err = vxlan6_xmit_skb(ndst, sk, skb, dev, &fl6.saddr, &fl6.daddr,
2022				      0, ttl, src_port, dst_port, &md,
2023				      !net_eq(vxlan->net, dev_net(vxlan->dev)),
2024				      vxlan->flags);
2025#endif
2026	}
2027
2028	return;
2029
2030drop:
2031	dev->stats.tx_dropped++;
2032	goto tx_free;
2033
2034rt_tx_error:
2035	ip_rt_put(rt);
2036tx_error:
2037	dev->stats.tx_errors++;
2038tx_free:
2039	dev_kfree_skb(skb);
2040}
2041
2042/* Transmit local packets over Vxlan
2043 *
2044 * Outer IP header inherits ECN and DF from inner header.
2045 * Outer UDP destination is the VXLAN assigned port.
2046 *           source port is based on hash of flow
2047 */
2048static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
2049{
2050	struct vxlan_dev *vxlan = netdev_priv(dev);
2051	struct ethhdr *eth;
2052	bool did_rsc = false;
2053	struct vxlan_rdst *rdst, *fdst = NULL;
2054	struct vxlan_fdb *f;
2055
2056	skb_reset_mac_header(skb);
2057	eth = eth_hdr(skb);
2058
2059	if ((vxlan->flags & VXLAN_F_PROXY)) {
2060		if (ntohs(eth->h_proto) == ETH_P_ARP)
2061			return arp_reduce(dev, skb);
2062#if IS_ENABLED(CONFIG_IPV6)
2063		else if (ntohs(eth->h_proto) == ETH_P_IPV6 &&
2064			 pskb_may_pull(skb, sizeof(struct ipv6hdr)
2065				       + sizeof(struct nd_msg)) &&
2066			 ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
2067				struct nd_msg *msg;
2068
2069				msg = (struct nd_msg *)skb_transport_header(skb);
2070				if (msg->icmph.icmp6_code == 0 &&
2071				    msg->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION)
2072					return neigh_reduce(dev, skb);
2073		}
2074		eth = eth_hdr(skb);
2075#endif
2076	}
2077
2078	f = vxlan_find_mac(vxlan, eth->h_dest);
2079	did_rsc = false;
2080
2081	if (f && (f->flags & NTF_ROUTER) && (vxlan->flags & VXLAN_F_RSC) &&
2082	    (ntohs(eth->h_proto) == ETH_P_IP ||
2083	     ntohs(eth->h_proto) == ETH_P_IPV6)) {
2084		did_rsc = route_shortcircuit(dev, skb);
2085		if (did_rsc)
2086			f = vxlan_find_mac(vxlan, eth->h_dest);
2087	}
2088
2089	if (f == NULL) {
2090		f = vxlan_find_mac(vxlan, all_zeros_mac);
2091		if (f == NULL) {
2092			if ((vxlan->flags & VXLAN_F_L2MISS) &&
2093			    !is_multicast_ether_addr(eth->h_dest))
2094				vxlan_fdb_miss(vxlan, eth->h_dest);
2095
2096			dev->stats.tx_dropped++;
2097			kfree_skb(skb);
2098			return NETDEV_TX_OK;
2099		}
2100	}
2101
2102	list_for_each_entry_rcu(rdst, &f->remotes, list) {
2103		struct sk_buff *skb1;
2104
2105		if (!fdst) {
2106			fdst = rdst;
2107			continue;
2108		}
2109		skb1 = skb_clone(skb, GFP_ATOMIC);
2110		if (skb1)
2111			vxlan_xmit_one(skb1, dev, rdst, did_rsc);
2112	}
2113
2114	if (fdst)
2115		vxlan_xmit_one(skb, dev, fdst, did_rsc);
2116	else
2117		kfree_skb(skb);
2118	return NETDEV_TX_OK;
2119}
2120
2121/* Walk the forwarding table and purge stale entries */
2122static void vxlan_cleanup(unsigned long arg)
2123{
2124	struct vxlan_dev *vxlan = (struct vxlan_dev *) arg;
2125	unsigned long next_timer = jiffies + FDB_AGE_INTERVAL;
2126	unsigned int h;
2127
2128	if (!netif_running(vxlan->dev))
2129		return;
2130
2131	spin_lock_bh(&vxlan->hash_lock);
2132	for (h = 0; h < FDB_HASH_SIZE; ++h) {
2133		struct hlist_node *p, *n;
2134		hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
2135			struct vxlan_fdb *f
2136				= container_of(p, struct vxlan_fdb, hlist);
2137			unsigned long timeout;
2138
2139			if (f->state & NUD_PERMANENT)
2140				continue;
2141
2142			timeout = f->used + vxlan->age_interval * HZ;
2143			if (time_before_eq(timeout, jiffies)) {
2144				netdev_dbg(vxlan->dev,
2145					   "garbage collect %pM\n",
2146					   f->eth_addr);
2147				f->state = NUD_STALE;
2148				vxlan_fdb_destroy(vxlan, f);
2149			} else if (time_before(timeout, next_timer))
2150				next_timer = timeout;
2151		}
2152	}
2153	spin_unlock_bh(&vxlan->hash_lock);
2154
2155	mod_timer(&vxlan->age_timer, next_timer);
2156}
2157
2158static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan)
2159{
2160	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
2161	__u32 vni = vxlan->default_dst.remote_vni;
2162
2163	vxlan->vn_sock = vs;
2164	spin_lock(&vn->sock_lock);
2165	hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
2166	spin_unlock(&vn->sock_lock);
2167}
2168
2169/* Setup stats when device is created */
2170static int vxlan_init(struct net_device *dev)
2171{
2172	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
2173	if (!dev->tstats)
2174		return -ENOMEM;
2175
2176	return 0;
2177}
2178
2179static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan)
2180{
2181	struct vxlan_fdb *f;
2182
2183	spin_lock_bh(&vxlan->hash_lock);
2184	f = __vxlan_find_mac(vxlan, all_zeros_mac);
2185	if (f)
2186		vxlan_fdb_destroy(vxlan, f);
2187	spin_unlock_bh(&vxlan->hash_lock);
2188}
2189
2190static void vxlan_uninit(struct net_device *dev)
2191{
2192	struct vxlan_dev *vxlan = netdev_priv(dev);
2193
2194	vxlan_fdb_delete_default(vxlan);
2195
2196	free_percpu(dev->tstats);
2197}
2198
2199/* Start ageing timer and join group when device is brought up */
2200static int vxlan_open(struct net_device *dev)
2201{
2202	struct vxlan_dev *vxlan = netdev_priv(dev);
2203	struct vxlan_sock *vs;
2204	int ret = 0;
2205
2206	vs = vxlan_sock_add(vxlan->net, vxlan->dst_port, vxlan_rcv, NULL,
2207			    false, vxlan->flags);
2208	if (IS_ERR(vs))
2209		return PTR_ERR(vs);
2210
2211	vxlan_vs_add_dev(vs, vxlan);
2212
2213	if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) {
2214		ret = vxlan_igmp_join(vxlan);
2215		if (ret == -EADDRINUSE)
2216			ret = 0;
2217		if (ret) {
2218			vxlan_sock_release(vs);
2219			return ret;
2220		}
2221	}
2222
2223	if (vxlan->age_interval)
2224		mod_timer(&vxlan->age_timer, jiffies + FDB_AGE_INTERVAL);
2225
2226	return ret;
2227}
2228
2229/* Purge the forwarding table */
2230static void vxlan_flush(struct vxlan_dev *vxlan)
2231{
2232	unsigned int h;
2233
2234	spin_lock_bh(&vxlan->hash_lock);
2235	for (h = 0; h < FDB_HASH_SIZE; ++h) {
2236		struct hlist_node *p, *n;
2237		hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
2238			struct vxlan_fdb *f
2239				= container_of(p, struct vxlan_fdb, hlist);
2240			/* the all_zeros_mac entry is deleted at vxlan_uninit */
2241			if (!is_zero_ether_addr(f->eth_addr))
2242				vxlan_fdb_destroy(vxlan, f);
2243		}
2244	}
2245	spin_unlock_bh(&vxlan->hash_lock);
2246}
2247
2248/* Cleanup timer and forwarding table on shutdown */
2249static int vxlan_stop(struct net_device *dev)
2250{
2251	struct vxlan_dev *vxlan = netdev_priv(dev);
2252	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
2253	struct vxlan_sock *vs = vxlan->vn_sock;
2254	int ret = 0;
2255
2256	if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip) &&
2257	    !vxlan_group_used(vn, vxlan))
2258		ret = vxlan_igmp_leave(vxlan);
2259
2260	del_timer_sync(&vxlan->age_timer);
2261
2262	vxlan_flush(vxlan);
2263	vxlan_sock_release(vs);
2264
2265	return ret;
2266}
2267
2268/* Stub, nothing needs to be done. */
2269static void vxlan_set_multicast_list(struct net_device *dev)
2270{
2271}
2272
2273static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
2274{
2275	struct vxlan_dev *vxlan = netdev_priv(dev);
2276	struct vxlan_rdst *dst = &vxlan->default_dst;
2277	struct net_device *lowerdev;
2278	int max_mtu;
2279
2280	lowerdev = __dev_get_by_index(vxlan->net, dst->remote_ifindex);
2281	if (lowerdev == NULL)
2282		return eth_change_mtu(dev, new_mtu);
2283
2284	if (dst->remote_ip.sa.sa_family == AF_INET6)
2285		max_mtu = lowerdev->mtu - VXLAN6_HEADROOM;
2286	else
2287		max_mtu = lowerdev->mtu - VXLAN_HEADROOM;
2288
2289	if (new_mtu < 68 || new_mtu > max_mtu)
2290		return -EINVAL;
2291
2292	dev->mtu = new_mtu;
2293	return 0;
2294}
2295
2296static const struct net_device_ops vxlan_netdev_ops = {
2297	.ndo_init		= vxlan_init,
2298	.ndo_uninit		= vxlan_uninit,
2299	.ndo_open		= vxlan_open,
2300	.ndo_stop		= vxlan_stop,
2301	.ndo_start_xmit		= vxlan_xmit,
2302	.ndo_get_stats64	= ip_tunnel_get_stats64,
2303	.ndo_set_rx_mode	= vxlan_set_multicast_list,
2304	.ndo_change_mtu		= vxlan_change_mtu,
2305	.ndo_validate_addr	= eth_validate_addr,
2306	.ndo_set_mac_address	= eth_mac_addr,
2307	.ndo_fdb_add		= vxlan_fdb_add,
2308	.ndo_fdb_del		= vxlan_fdb_delete,
2309	.ndo_fdb_dump		= vxlan_fdb_dump,
2310};
2311
2312/* Info for udev, that this is a virtual tunnel endpoint */
2313static struct device_type vxlan_type = {
2314	.name = "vxlan",
2315};
2316
2317/* Calls the ndo_add_vxlan_port of the caller in order to
2318 * supply the listening VXLAN udp ports. Callers are expected
2319 * to implement the ndo_add_vxlan_port.
2320 */
2321void vxlan_get_rx_port(struct net_device *dev)
2322{
2323	struct vxlan_sock *vs;
2324	struct net *net = dev_net(dev);
2325	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
2326	sa_family_t sa_family;
2327	__be16 port;
2328	unsigned int i;
2329
2330	spin_lock(&vn->sock_lock);
2331	for (i = 0; i < PORT_HASH_SIZE; ++i) {
2332		hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) {
2333			port = inet_sk(vs->sock->sk)->inet_sport;
2334			sa_family = vs->sock->sk->sk_family;
2335			dev->netdev_ops->ndo_add_vxlan_port(dev, sa_family,
2336							    port);
2337		}
2338	}
2339	spin_unlock(&vn->sock_lock);
2340}
2341EXPORT_SYMBOL_GPL(vxlan_get_rx_port);
2342
2343/* Initialize the device structure. */
2344static void vxlan_setup(struct net_device *dev)
2345{
2346	struct vxlan_dev *vxlan = netdev_priv(dev);
2347	unsigned int h;
2348
2349	eth_hw_addr_random(dev);
2350	ether_setup(dev);
2351	if (vxlan->default_dst.remote_ip.sa.sa_family == AF_INET6)
2352		dev->needed_headroom = ETH_HLEN + VXLAN6_HEADROOM;
2353	else
2354		dev->needed_headroom = ETH_HLEN + VXLAN_HEADROOM;
2355
2356	dev->netdev_ops = &vxlan_netdev_ops;
2357	dev->destructor = free_netdev;
2358	SET_NETDEV_DEVTYPE(dev, &vxlan_type);
2359
2360	dev->tx_queue_len = 0;
2361	dev->features	|= NETIF_F_LLTX;
2362	dev->features	|= NETIF_F_SG | NETIF_F_HW_CSUM;
2363	dev->features   |= NETIF_F_RXCSUM;
2364	dev->features   |= NETIF_F_GSO_SOFTWARE;
2365
2366	dev->vlan_features = dev->features;
2367	dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
2368	dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
2369	dev->hw_features |= NETIF_F_GSO_SOFTWARE;
2370	dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
2371	netif_keep_dst(dev);
2372	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
2373
2374	INIT_LIST_HEAD(&vxlan->next);
2375	spin_lock_init(&vxlan->hash_lock);
2376
2377	init_timer_deferrable(&vxlan->age_timer);
2378	vxlan->age_timer.function = vxlan_cleanup;
2379	vxlan->age_timer.data = (unsigned long) vxlan;
2380
2381	vxlan->dst_port = htons(vxlan_port);
2382
2383	vxlan->dev = dev;
2384
2385	for (h = 0; h < FDB_HASH_SIZE; ++h)
2386		INIT_HLIST_HEAD(&vxlan->fdb_head[h]);
2387}
2388
2389static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
2390	[IFLA_VXLAN_ID]		= { .type = NLA_U32 },
2391	[IFLA_VXLAN_GROUP]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
2392	[IFLA_VXLAN_GROUP6]	= { .len = sizeof(struct in6_addr) },
2393	[IFLA_VXLAN_LINK]	= { .type = NLA_U32 },
2394	[IFLA_VXLAN_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
2395	[IFLA_VXLAN_LOCAL6]	= { .len = sizeof(struct in6_addr) },
2396	[IFLA_VXLAN_TOS]	= { .type = NLA_U8 },
2397	[IFLA_VXLAN_TTL]	= { .type = NLA_U8 },
2398	[IFLA_VXLAN_LEARNING]	= { .type = NLA_U8 },
2399	[IFLA_VXLAN_AGEING]	= { .type = NLA_U32 },
2400	[IFLA_VXLAN_LIMIT]	= { .type = NLA_U32 },
2401	[IFLA_VXLAN_PORT_RANGE] = { .len  = sizeof(struct ifla_vxlan_port_range) },
2402	[IFLA_VXLAN_PROXY]	= { .type = NLA_U8 },
2403	[IFLA_VXLAN_RSC]	= { .type = NLA_U8 },
2404	[IFLA_VXLAN_L2MISS]	= { .type = NLA_U8 },
2405	[IFLA_VXLAN_L3MISS]	= { .type = NLA_U8 },
2406	[IFLA_VXLAN_PORT]	= { .type = NLA_U16 },
2407	[IFLA_VXLAN_UDP_CSUM]	= { .type = NLA_U8 },
2408	[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]	= { .type = NLA_U8 },
2409	[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]	= { .type = NLA_U8 },
2410	[IFLA_VXLAN_REMCSUM_TX]	= { .type = NLA_U8 },
2411	[IFLA_VXLAN_REMCSUM_RX]	= { .type = NLA_U8 },
2412	[IFLA_VXLAN_GBP]	= { .type = NLA_FLAG, },
2413	[IFLA_VXLAN_REMCSUM_NOPARTIAL]	= { .type = NLA_FLAG },
2414};
2415
2416static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
2417{
2418	if (tb[IFLA_ADDRESS]) {
2419		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
2420			pr_debug("invalid link address (not ethernet)\n");
2421			return -EINVAL;
2422		}
2423
2424		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
2425			pr_debug("invalid all zero ethernet address\n");
2426			return -EADDRNOTAVAIL;
2427		}
2428	}
2429
2430	if (!data)
2431		return -EINVAL;
2432
2433	if (data[IFLA_VXLAN_ID]) {
2434		__u32 id = nla_get_u32(data[IFLA_VXLAN_ID]);
2435		if (id >= VXLAN_VID_MASK)
2436			return -ERANGE;
2437	}
2438
2439	if (data[IFLA_VXLAN_PORT_RANGE]) {
2440		const struct ifla_vxlan_port_range *p
2441			= nla_data(data[IFLA_VXLAN_PORT_RANGE]);
2442
2443		if (ntohs(p->high) < ntohs(p->low)) {
2444			pr_debug("port range %u .. %u not valid\n",
2445				 ntohs(p->low), ntohs(p->high));
2446			return -EINVAL;
2447		}
2448	}
2449
2450	return 0;
2451}
2452
2453static void vxlan_get_drvinfo(struct net_device *netdev,
2454			      struct ethtool_drvinfo *drvinfo)
2455{
2456	strlcpy(drvinfo->version, VXLAN_VERSION, sizeof(drvinfo->version));
2457	strlcpy(drvinfo->driver, "vxlan", sizeof(drvinfo->driver));
2458}
2459
2460static const struct ethtool_ops vxlan_ethtool_ops = {
2461	.get_drvinfo	= vxlan_get_drvinfo,
2462	.get_link	= ethtool_op_get_link,
2463};
2464
2465static void vxlan_del_work(struct work_struct *work)
2466{
2467	struct vxlan_sock *vs = container_of(work, struct vxlan_sock, del_work);
2468	udp_tunnel_sock_release(vs->sock);
2469	kfree_rcu(vs, rcu);
2470}
2471
2472static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
2473					__be16 port, u32 flags)
2474{
2475	struct socket *sock;
2476	struct udp_port_cfg udp_conf;
2477	int err;
2478
2479	memset(&udp_conf, 0, sizeof(udp_conf));
2480
2481	if (ipv6) {
2482		udp_conf.family = AF_INET6;
2483		udp_conf.use_udp6_rx_checksums =
2484		    !(flags & VXLAN_F_UDP_ZERO_CSUM6_RX);
2485	} else {
2486		udp_conf.family = AF_INET;
2487	}
2488
2489	udp_conf.local_udp_port = port;
2490
2491	/* Open UDP socket */
2492	err = udp_sock_create(net, &udp_conf, &sock);
2493	if (err < 0)
2494		return ERR_PTR(err);
2495
2496	return sock;
2497}
2498
2499/* Create new listen socket if needed */
2500static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
2501					      vxlan_rcv_t *rcv, void *data,
2502					      u32 flags)
2503{
2504	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
2505	struct vxlan_sock *vs;
2506	struct socket *sock;
2507	unsigned int h;
2508	bool ipv6 = !!(flags & VXLAN_F_IPV6);
2509	struct udp_tunnel_sock_cfg tunnel_cfg;
2510
2511	vs = kzalloc(sizeof(*vs), GFP_KERNEL);
2512	if (!vs)
2513		return ERR_PTR(-ENOMEM);
2514
2515	for (h = 0; h < VNI_HASH_SIZE; ++h)
2516		INIT_HLIST_HEAD(&vs->vni_list[h]);
2517
2518	INIT_WORK(&vs->del_work, vxlan_del_work);
2519
2520	sock = vxlan_create_sock(net, ipv6, port, flags);
2521	if (IS_ERR(sock)) {
2522		pr_info("Cannot bind port %d, err=%ld\n", ntohs(port),
2523			PTR_ERR(sock));
2524		kfree(vs);
2525		return ERR_CAST(sock);
2526	}
2527
2528	vs->sock = sock;
2529	atomic_set(&vs->refcnt, 1);
2530	vs->rcv = rcv;
2531	vs->data = data;
2532	vs->flags = (flags & VXLAN_F_RCV_FLAGS);
2533
2534	/* Initialize the vxlan udp offloads structure */
2535	vs->udp_offloads.port = port;
2536	vs->udp_offloads.callbacks.gro_receive  = vxlan_gro_receive;
2537	vs->udp_offloads.callbacks.gro_complete = vxlan_gro_complete;
2538
2539	spin_lock(&vn->sock_lock);
2540	hlist_add_head_rcu(&vs->hlist, vs_head(net, port));
2541	vxlan_notify_add_rx_port(vs);
2542	spin_unlock(&vn->sock_lock);
2543
2544	/* Mark socket as an encapsulation socket. */
2545	tunnel_cfg.sk_user_data = vs;
2546	tunnel_cfg.encap_type = 1;
2547	tunnel_cfg.encap_rcv = vxlan_udp_encap_recv;
2548	tunnel_cfg.encap_destroy = NULL;
2549
2550	setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
2551
2552	return vs;
2553}
2554
2555struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
2556				  vxlan_rcv_t *rcv, void *data,
2557				  bool no_share, u32 flags)
2558{
2559	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
2560	struct vxlan_sock *vs;
2561	bool ipv6 = flags & VXLAN_F_IPV6;
2562
2563	if (!no_share) {
2564		spin_lock(&vn->sock_lock);
2565		vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port,
2566				     flags);
2567		if (vs && vs->rcv == rcv) {
2568			if (!atomic_add_unless(&vs->refcnt, 1, 0))
2569				vs = ERR_PTR(-EBUSY);
2570			spin_unlock(&vn->sock_lock);
2571			return vs;
2572		}
2573		spin_unlock(&vn->sock_lock);
2574	}
2575
2576	return vxlan_socket_create(net, port, rcv, data, flags);
2577}
2578EXPORT_SYMBOL_GPL(vxlan_sock_add);
2579
2580static int vxlan_newlink(struct net *src_net, struct net_device *dev,
2581			 struct nlattr *tb[], struct nlattr *data[])
2582{
2583	struct vxlan_net *vn = net_generic(src_net, vxlan_net_id);
2584	struct vxlan_dev *vxlan = netdev_priv(dev), *tmp;
2585	struct vxlan_rdst *dst = &vxlan->default_dst;
2586	__u32 vni;
2587	int err;
2588	bool use_ipv6 = false;
2589
2590	if (!data[IFLA_VXLAN_ID])
2591		return -EINVAL;
2592
2593	vxlan->net = src_net;
2594
2595	vni = nla_get_u32(data[IFLA_VXLAN_ID]);
2596	dst->remote_vni = vni;
2597
2598	/* Unless IPv6 is explicitly requested, assume IPv4 */
2599	dst->remote_ip.sa.sa_family = AF_INET;
2600	if (data[IFLA_VXLAN_GROUP]) {
2601		dst->remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]);
2602	} else if (data[IFLA_VXLAN_GROUP6]) {
2603		if (!IS_ENABLED(CONFIG_IPV6))
2604			return -EPFNOSUPPORT;
2605
2606		dst->remote_ip.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_GROUP6]);
2607		dst->remote_ip.sa.sa_family = AF_INET6;
2608		use_ipv6 = true;
2609	}
2610
2611	if (data[IFLA_VXLAN_LOCAL]) {
2612		vxlan->saddr.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_LOCAL]);
2613		vxlan->saddr.sa.sa_family = AF_INET;
2614	} else if (data[IFLA_VXLAN_LOCAL6]) {
2615		if (!IS_ENABLED(CONFIG_IPV6))
2616			return -EPFNOSUPPORT;
2617
2618		/* TODO: respect scope id */
2619		vxlan->saddr.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_LOCAL6]);
2620		vxlan->saddr.sa.sa_family = AF_INET6;
2621		use_ipv6 = true;
2622	}
2623
2624	if (data[IFLA_VXLAN_LINK] &&
2625	    (dst->remote_ifindex = nla_get_u32(data[IFLA_VXLAN_LINK]))) {
2626		struct net_device *lowerdev
2627			 = __dev_get_by_index(src_net, dst->remote_ifindex);
2628
2629		if (!lowerdev) {
2630			pr_info("ifindex %d does not exist\n", dst->remote_ifindex);
2631			return -ENODEV;
2632		}
2633
2634#if IS_ENABLED(CONFIG_IPV6)
2635		if (use_ipv6) {
2636			struct inet6_dev *idev = __in6_dev_get(lowerdev);
2637			if (idev && idev->cnf.disable_ipv6) {
2638				pr_info("IPv6 is disabled via sysctl\n");
2639				return -EPERM;
2640			}
2641			vxlan->flags |= VXLAN_F_IPV6;
2642		}
2643#endif
2644
2645		if (!tb[IFLA_MTU])
2646			dev->mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
2647
2648		dev->needed_headroom = lowerdev->hard_header_len +
2649				       (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
2650	} else if (use_ipv6)
2651		vxlan->flags |= VXLAN_F_IPV6;
2652
2653	if (data[IFLA_VXLAN_TOS])
2654		vxlan->tos  = nla_get_u8(data[IFLA_VXLAN_TOS]);
2655
2656	if (data[IFLA_VXLAN_TTL])
2657		vxlan->ttl = nla_get_u8(data[IFLA_VXLAN_TTL]);
2658
2659	if (!data[IFLA_VXLAN_LEARNING] || nla_get_u8(data[IFLA_VXLAN_LEARNING]))
2660		vxlan->flags |= VXLAN_F_LEARN;
2661
2662	if (data[IFLA_VXLAN_AGEING])
2663		vxlan->age_interval = nla_get_u32(data[IFLA_VXLAN_AGEING]);
2664	else
2665		vxlan->age_interval = FDB_AGE_DEFAULT;
2666
2667	if (data[IFLA_VXLAN_PROXY] && nla_get_u8(data[IFLA_VXLAN_PROXY]))
2668		vxlan->flags |= VXLAN_F_PROXY;
2669
2670	if (data[IFLA_VXLAN_RSC] && nla_get_u8(data[IFLA_VXLAN_RSC]))
2671		vxlan->flags |= VXLAN_F_RSC;
2672
2673	if (data[IFLA_VXLAN_L2MISS] && nla_get_u8(data[IFLA_VXLAN_L2MISS]))
2674		vxlan->flags |= VXLAN_F_L2MISS;
2675
2676	if (data[IFLA_VXLAN_L3MISS] && nla_get_u8(data[IFLA_VXLAN_L3MISS]))
2677		vxlan->flags |= VXLAN_F_L3MISS;
2678
2679	if (data[IFLA_VXLAN_LIMIT])
2680		vxlan->addrmax = nla_get_u32(data[IFLA_VXLAN_LIMIT]);
2681
2682	if (data[IFLA_VXLAN_PORT_RANGE]) {
2683		const struct ifla_vxlan_port_range *p
2684			= nla_data(data[IFLA_VXLAN_PORT_RANGE]);
2685		vxlan->port_min = ntohs(p->low);
2686		vxlan->port_max = ntohs(p->high);
2687	}
2688
2689	if (data[IFLA_VXLAN_PORT])
2690		vxlan->dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]);
2691
2692	if (data[IFLA_VXLAN_UDP_CSUM] && nla_get_u8(data[IFLA_VXLAN_UDP_CSUM]))
2693		vxlan->flags |= VXLAN_F_UDP_CSUM;
2694
2695	if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX] &&
2696	    nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]))
2697		vxlan->flags |= VXLAN_F_UDP_ZERO_CSUM6_TX;
2698
2699	if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX] &&
2700	    nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]))
2701		vxlan->flags |= VXLAN_F_UDP_ZERO_CSUM6_RX;
2702
2703	if (data[IFLA_VXLAN_REMCSUM_TX] &&
2704	    nla_get_u8(data[IFLA_VXLAN_REMCSUM_TX]))
2705		vxlan->flags |= VXLAN_F_REMCSUM_TX;
2706
2707	if (data[IFLA_VXLAN_REMCSUM_RX] &&
2708	    nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
2709		vxlan->flags |= VXLAN_F_REMCSUM_RX;
2710
2711	if (data[IFLA_VXLAN_GBP])
2712		vxlan->flags |= VXLAN_F_GBP;
2713
2714	if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL])
2715		vxlan->flags |= VXLAN_F_REMCSUM_NOPARTIAL;
2716
2717	list_for_each_entry(tmp, &vn->vxlan_list, next) {
2718		if (tmp->default_dst.remote_vni == vni &&
2719		    (tmp->default_dst.remote_ip.sa.sa_family == AF_INET6 ||
2720		     tmp->saddr.sa.sa_family == AF_INET6) == use_ipv6 &&
2721		    tmp->dst_port == vxlan->dst_port &&
2722		    (tmp->flags & VXLAN_F_RCV_FLAGS) ==
2723		    (vxlan->flags & VXLAN_F_RCV_FLAGS))
2724		return -EEXIST;
2725	}
2726
2727	dev->ethtool_ops = &vxlan_ethtool_ops;
2728
2729	/* create an fdb entry for a valid default destination */
2730	if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) {
2731		err = vxlan_fdb_create(vxlan, all_zeros_mac,
2732				       &vxlan->default_dst.remote_ip,
2733				       NUD_REACHABLE|NUD_PERMANENT,
2734				       NLM_F_EXCL|NLM_F_CREATE,
2735				       vxlan->dst_port,
2736				       vxlan->default_dst.remote_vni,
2737				       vxlan->default_dst.remote_ifindex,
2738				       NTF_SELF);
2739		if (err)
2740			return err;
2741	}
2742
2743	err = register_netdevice(dev);
2744	if (err) {
2745		vxlan_fdb_delete_default(vxlan);
2746		return err;
2747	}
2748
2749	list_add(&vxlan->next, &vn->vxlan_list);
2750
2751	return 0;
2752}
2753
2754static void vxlan_dellink(struct net_device *dev, struct list_head *head)
2755{
2756	struct vxlan_dev *vxlan = netdev_priv(dev);
2757	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
2758
2759	spin_lock(&vn->sock_lock);
2760	if (!hlist_unhashed(&vxlan->hlist))
2761		hlist_del_rcu(&vxlan->hlist);
2762	spin_unlock(&vn->sock_lock);
2763
2764	list_del(&vxlan->next);
2765	unregister_netdevice_queue(dev, head);
2766}
2767
2768static size_t vxlan_get_size(const struct net_device *dev)
2769{
2770
2771	return nla_total_size(sizeof(__u32)) +	/* IFLA_VXLAN_ID */
2772		nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_GROUP{6} */
2773		nla_total_size(sizeof(__u32)) +	/* IFLA_VXLAN_LINK */
2774		nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_LOCAL{6} */
2775		nla_total_size(sizeof(__u8)) +	/* IFLA_VXLAN_TTL */
2776		nla_total_size(sizeof(__u8)) +	/* IFLA_VXLAN_TOS */
2777		nla_total_size(sizeof(__u8)) +	/* IFLA_VXLAN_LEARNING */
2778		nla_total_size(sizeof(__u8)) +	/* IFLA_VXLAN_PROXY */
2779		nla_total_size(sizeof(__u8)) +	/* IFLA_VXLAN_RSC */
2780		nla_total_size(sizeof(__u8)) +	/* IFLA_VXLAN_L2MISS */
2781		nla_total_size(sizeof(__u8)) +	/* IFLA_VXLAN_L3MISS */
2782		nla_total_size(sizeof(__u32)) +	/* IFLA_VXLAN_AGEING */
2783		nla_total_size(sizeof(__u32)) +	/* IFLA_VXLAN_LIMIT */
2784		nla_total_size(sizeof(struct ifla_vxlan_port_range)) +
2785		nla_total_size(sizeof(__be16)) + /* IFLA_VXLAN_PORT */
2786		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_CSUM */
2787		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_TX */
2788		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_RX */
2789		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_TX */
2790		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_RX */
2791		0;
2792}
2793
2794static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
2795{
2796	const struct vxlan_dev *vxlan = netdev_priv(dev);
2797	const struct vxlan_rdst *dst = &vxlan->default_dst;
2798	struct ifla_vxlan_port_range ports = {
2799		.low =  htons(vxlan->port_min),
2800		.high = htons(vxlan->port_max),
2801	};
2802
2803	if (nla_put_u32(skb, IFLA_VXLAN_ID, dst->remote_vni))
2804		goto nla_put_failure;
2805
2806	if (!vxlan_addr_any(&dst->remote_ip)) {
2807		if (dst->remote_ip.sa.sa_family == AF_INET) {
2808			if (nla_put_in_addr(skb, IFLA_VXLAN_GROUP,
2809					    dst->remote_ip.sin.sin_addr.s_addr))
2810				goto nla_put_failure;
2811#if IS_ENABLED(CONFIG_IPV6)
2812		} else {
2813			if (nla_put_in6_addr(skb, IFLA_VXLAN_GROUP6,
2814					     &dst->remote_ip.sin6.sin6_addr))
2815				goto nla_put_failure;
2816#endif
2817		}
2818	}
2819
2820	if (dst->remote_ifindex && nla_put_u32(skb, IFLA_VXLAN_LINK, dst->remote_ifindex))
2821		goto nla_put_failure;
2822
2823	if (!vxlan_addr_any(&vxlan->saddr)) {
2824		if (vxlan->saddr.sa.sa_family == AF_INET) {
2825			if (nla_put_in_addr(skb, IFLA_VXLAN_LOCAL,
2826					    vxlan->saddr.sin.sin_addr.s_addr))
2827				goto nla_put_failure;
2828#if IS_ENABLED(CONFIG_IPV6)
2829		} else {
2830			if (nla_put_in6_addr(skb, IFLA_VXLAN_LOCAL6,
2831					     &vxlan->saddr.sin6.sin6_addr))
2832				goto nla_put_failure;
2833#endif
2834		}
2835	}
2836
2837	if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->ttl) ||
2838	    nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->tos) ||
2839	    nla_put_u8(skb, IFLA_VXLAN_LEARNING,
2840			!!(vxlan->flags & VXLAN_F_LEARN)) ||
2841	    nla_put_u8(skb, IFLA_VXLAN_PROXY,
2842			!!(vxlan->flags & VXLAN_F_PROXY)) ||
2843	    nla_put_u8(skb, IFLA_VXLAN_RSC, !!(vxlan->flags & VXLAN_F_RSC)) ||
2844	    nla_put_u8(skb, IFLA_VXLAN_L2MISS,
2845			!!(vxlan->flags & VXLAN_F_L2MISS)) ||
2846	    nla_put_u8(skb, IFLA_VXLAN_L3MISS,
2847			!!(vxlan->flags & VXLAN_F_L3MISS)) ||
2848	    nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->age_interval) ||
2849	    nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->addrmax) ||
2850	    nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->dst_port) ||
2851	    nla_put_u8(skb, IFLA_VXLAN_UDP_CSUM,
2852			!!(vxlan->flags & VXLAN_F_UDP_CSUM)) ||
2853	    nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
2854			!!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||
2855	    nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
2856			!!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) ||
2857	    nla_put_u8(skb, IFLA_VXLAN_REMCSUM_TX,
2858			!!(vxlan->flags & VXLAN_F_REMCSUM_TX)) ||
2859	    nla_put_u8(skb, IFLA_VXLAN_REMCSUM_RX,
2860			!!(vxlan->flags & VXLAN_F_REMCSUM_RX)))
2861		goto nla_put_failure;
2862
2863	if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
2864		goto nla_put_failure;
2865
2866	if (vxlan->flags & VXLAN_F_GBP &&
2867	    nla_put_flag(skb, IFLA_VXLAN_GBP))
2868		goto nla_put_failure;
2869
2870	if (vxlan->flags & VXLAN_F_REMCSUM_NOPARTIAL &&
2871	    nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL))
2872		goto nla_put_failure;
2873
2874	return 0;
2875
2876nla_put_failure:
2877	return -EMSGSIZE;
2878}
2879
2880static struct net *vxlan_get_link_net(const struct net_device *dev)
2881{
2882	struct vxlan_dev *vxlan = netdev_priv(dev);
2883
2884	return vxlan->net;
2885}
2886
2887static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
2888	.kind		= "vxlan",
2889	.maxtype	= IFLA_VXLAN_MAX,
2890	.policy		= vxlan_policy,
2891	.priv_size	= sizeof(struct vxlan_dev),
2892	.setup		= vxlan_setup,
2893	.validate	= vxlan_validate,
2894	.newlink	= vxlan_newlink,
2895	.dellink	= vxlan_dellink,
2896	.get_size	= vxlan_get_size,
2897	.fill_info	= vxlan_fill_info,
2898	.get_link_net	= vxlan_get_link_net,
2899};
2900
2901static void vxlan_handle_lowerdev_unregister(struct vxlan_net *vn,
2902					     struct net_device *dev)
2903{
2904	struct vxlan_dev *vxlan, *next;
2905	LIST_HEAD(list_kill);
2906
2907	list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) {
2908		struct vxlan_rdst *dst = &vxlan->default_dst;
2909
2910		/* In case we created vxlan device with carrier
2911		 * and we loose the carrier due to module unload
2912		 * we also need to remove vxlan device. In other
2913		 * cases, it's not necessary and remote_ifindex
2914		 * is 0 here, so no matches.
2915		 */
2916		if (dst->remote_ifindex == dev->ifindex)
2917			vxlan_dellink(vxlan->dev, &list_kill);
2918	}
2919
2920	unregister_netdevice_many(&list_kill);
2921}
2922
2923static int vxlan_lowerdev_event(struct notifier_block *unused,
2924				unsigned long event, void *ptr)
2925{
2926	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2927	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
2928
2929	if (event == NETDEV_UNREGISTER)
2930		vxlan_handle_lowerdev_unregister(vn, dev);
2931
2932	return NOTIFY_DONE;
2933}
2934
2935static struct notifier_block vxlan_notifier_block __read_mostly = {
2936	.notifier_call = vxlan_lowerdev_event,
2937};
2938
2939static __net_init int vxlan_init_net(struct net *net)
2940{
2941	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
2942	unsigned int h;
2943
2944	INIT_LIST_HEAD(&vn->vxlan_list);
2945	spin_lock_init(&vn->sock_lock);
2946
2947	for (h = 0; h < PORT_HASH_SIZE; ++h)
2948		INIT_HLIST_HEAD(&vn->sock_list[h]);
2949
2950	return 0;
2951}
2952
2953static void __net_exit vxlan_exit_net(struct net *net)
2954{
2955	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
2956	struct vxlan_dev *vxlan, *next;
2957	struct net_device *dev, *aux;
2958	LIST_HEAD(list);
2959
2960	rtnl_lock();
2961	for_each_netdev_safe(net, dev, aux)
2962		if (dev->rtnl_link_ops == &vxlan_link_ops)
2963			unregister_netdevice_queue(dev, &list);
2964
2965	list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) {
2966		/* If vxlan->dev is in the same netns, it has already been added
2967		 * to the list by the previous loop.
2968		 */
2969		if (!net_eq(dev_net(vxlan->dev), net))
2970			unregister_netdevice_queue(vxlan->dev, &list);
2971	}
2972
2973	unregister_netdevice_many(&list);
2974	rtnl_unlock();
2975}
2976
2977static struct pernet_operations vxlan_net_ops = {
2978	.init = vxlan_init_net,
2979	.exit = vxlan_exit_net,
2980	.id   = &vxlan_net_id,
2981	.size = sizeof(struct vxlan_net),
2982};
2983
2984static int __init vxlan_init_module(void)
2985{
2986	int rc;
2987
2988	vxlan_wq = alloc_workqueue("vxlan", 0, 0);
2989	if (!vxlan_wq)
2990		return -ENOMEM;
2991
2992	get_random_bytes(&vxlan_salt, sizeof(vxlan_salt));
2993
2994	rc = register_pernet_subsys(&vxlan_net_ops);
2995	if (rc)
2996		goto out1;
2997
2998	rc = register_netdevice_notifier(&vxlan_notifier_block);
2999	if (rc)
3000		goto out2;
3001
3002	rc = rtnl_link_register(&vxlan_link_ops);
3003	if (rc)
3004		goto out3;
3005
3006	return 0;
3007out3:
3008	unregister_netdevice_notifier(&vxlan_notifier_block);
3009out2:
3010	unregister_pernet_subsys(&vxlan_net_ops);
3011out1:
3012	destroy_workqueue(vxlan_wq);
3013	return rc;
3014}
3015late_initcall(vxlan_init_module);
3016
3017static void __exit vxlan_cleanup_module(void)
3018{
3019	rtnl_link_unregister(&vxlan_link_ops);
3020	unregister_netdevice_notifier(&vxlan_notifier_block);
3021	destroy_workqueue(vxlan_wq);
3022	unregister_pernet_subsys(&vxlan_net_ops);
3023	/* rcu_barrier() is called by netns */
3024}
3025module_exit(vxlan_cleanup_module);
3026
3027MODULE_LICENSE("GPL");
3028MODULE_VERSION(VXLAN_VERSION);
3029MODULE_AUTHOR("Stephen Hemminger <stephen@networkplumber.org>");
3030MODULE_DESCRIPTION("Driver for VXLAN encapsulated traffic");
3031MODULE_ALIAS_RTNL_LINK("vxlan");
3032