This source file includes following definitions.
- ip_tunnel_hash
- ip_tunnel_key_match
- ip_tunnel_lookup
- ip_bucket
- ip_tunnel_add
- ip_tunnel_del
- ip_tunnel_find
- __ip_tunnel_create
- ip_tunnel_bind_dev
- ip_tunnel_create
- ip_tunnel_rcv
- ip_tunnel_encap_add_ops
- ip_tunnel_encap_del_ops
- ip_tunnel_encap_setup
- tnl_update_pmtu
- ip_md_tunnel_xmit
- ip_tunnel_xmit
- ip_tunnel_update
- ip_tunnel_ioctl
- __ip_tunnel_change_mtu
- ip_tunnel_change_mtu
- ip_tunnel_dev_free
- ip_tunnel_dellink
- ip_tunnel_get_link_net
- ip_tunnel_get_iflink
- ip_tunnel_init_net
- ip_tunnel_destroy
- ip_tunnel_delete_nets
- ip_tunnel_newlink
- ip_tunnel_changelink
- ip_tunnel_init
- ip_tunnel_uninit
- ip_tunnel_setup
1
2
3
4
5
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8 #include <linux/capability.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/slab.h>
13 #include <linux/uaccess.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
19 #include <linux/if_arp.h>
20 #include <linux/init.h>
21 #include <linux/in6.h>
22 #include <linux/inetdevice.h>
23 #include <linux/igmp.h>
24 #include <linux/netfilter_ipv4.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_ether.h>
27 #include <linux/if_vlan.h>
28 #include <linux/rculist.h>
29 #include <linux/err.h>
30
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/udp.h>
45 #include <net/dst_metadata.h>
46
47 #if IS_ENABLED(CONFIG_IPV6)
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #endif
52
53 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
54 {
55 return hash_32((__force u32)key ^ (__force u32)remote,
56 IP_TNL_HASH_BITS);
57 }
58
59 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
60 __be16 flags, __be32 key)
61 {
62 if (p->i_flags & TUNNEL_KEY) {
63 if (flags & TUNNEL_KEY)
64 return key == p->i_key;
65 else
66
67 return false;
68 } else
69 return !(flags & TUNNEL_KEY);
70 }
71
72
73
74
75
76
77
78
79
80
81
82
83 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
84 int link, __be16 flags,
85 __be32 remote, __be32 local,
86 __be32 key)
87 {
88 unsigned int hash;
89 struct ip_tunnel *t, *cand = NULL;
90 struct hlist_head *head;
91
92 hash = ip_tunnel_hash(key, remote);
93 head = &itn->tunnels[hash];
94
95 hlist_for_each_entry_rcu(t, head, hash_node) {
96 if (local != t->parms.iph.saddr ||
97 remote != t->parms.iph.daddr ||
98 !(t->dev->flags & IFF_UP))
99 continue;
100
101 if (!ip_tunnel_key_match(&t->parms, flags, key))
102 continue;
103
104 if (t->parms.link == link)
105 return t;
106 else
107 cand = t;
108 }
109
110 hlist_for_each_entry_rcu(t, head, hash_node) {
111 if (remote != t->parms.iph.daddr ||
112 t->parms.iph.saddr != 0 ||
113 !(t->dev->flags & IFF_UP))
114 continue;
115
116 if (!ip_tunnel_key_match(&t->parms, flags, key))
117 continue;
118
119 if (t->parms.link == link)
120 return t;
121 else if (!cand)
122 cand = t;
123 }
124
125 hash = ip_tunnel_hash(key, 0);
126 head = &itn->tunnels[hash];
127
128 hlist_for_each_entry_rcu(t, head, hash_node) {
129 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
130 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
131 continue;
132
133 if (!(t->dev->flags & IFF_UP))
134 continue;
135
136 if (!ip_tunnel_key_match(&t->parms, flags, key))
137 continue;
138
139 if (t->parms.link == link)
140 return t;
141 else if (!cand)
142 cand = t;
143 }
144
145 hlist_for_each_entry_rcu(t, head, hash_node) {
146 if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
147 t->parms.iph.saddr != 0 ||
148 t->parms.iph.daddr != 0 ||
149 !(t->dev->flags & IFF_UP))
150 continue;
151
152 if (t->parms.link == link)
153 return t;
154 else if (!cand)
155 cand = t;
156 }
157
158 if (cand)
159 return cand;
160
161 t = rcu_dereference(itn->collect_md_tun);
162 if (t && t->dev->flags & IFF_UP)
163 return t;
164
165 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
166 return netdev_priv(itn->fb_tunnel_dev);
167
168 return NULL;
169 }
170 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
171
172 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
173 struct ip_tunnel_parm *parms)
174 {
175 unsigned int h;
176 __be32 remote;
177 __be32 i_key = parms->i_key;
178
179 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
180 remote = parms->iph.daddr;
181 else
182 remote = 0;
183
184 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
185 i_key = 0;
186
187 h = ip_tunnel_hash(i_key, remote);
188 return &itn->tunnels[h];
189 }
190
191 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
192 {
193 struct hlist_head *head = ip_bucket(itn, &t->parms);
194
195 if (t->collect_md)
196 rcu_assign_pointer(itn->collect_md_tun, t);
197 hlist_add_head_rcu(&t->hash_node, head);
198 }
199
200 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
201 {
202 if (t->collect_md)
203 rcu_assign_pointer(itn->collect_md_tun, NULL);
204 hlist_del_init_rcu(&t->hash_node);
205 }
206
207 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
208 struct ip_tunnel_parm *parms,
209 int type)
210 {
211 __be32 remote = parms->iph.daddr;
212 __be32 local = parms->iph.saddr;
213 __be32 key = parms->i_key;
214 __be16 flags = parms->i_flags;
215 int link = parms->link;
216 struct ip_tunnel *t = NULL;
217 struct hlist_head *head = ip_bucket(itn, parms);
218
219 hlist_for_each_entry_rcu(t, head, hash_node) {
220 if (local == t->parms.iph.saddr &&
221 remote == t->parms.iph.daddr &&
222 link == t->parms.link &&
223 type == t->dev->type &&
224 ip_tunnel_key_match(&t->parms, flags, key))
225 break;
226 }
227 return t;
228 }
229
230 static struct net_device *__ip_tunnel_create(struct net *net,
231 const struct rtnl_link_ops *ops,
232 struct ip_tunnel_parm *parms)
233 {
234 int err;
235 struct ip_tunnel *tunnel;
236 struct net_device *dev;
237 char name[IFNAMSIZ];
238
239 err = -E2BIG;
240 if (parms->name[0]) {
241 if (!dev_valid_name(parms->name))
242 goto failed;
243 strlcpy(name, parms->name, IFNAMSIZ);
244 } else {
245 if (strlen(ops->kind) > (IFNAMSIZ - 3))
246 goto failed;
247 strcpy(name, ops->kind);
248 strcat(name, "%d");
249 }
250
251 ASSERT_RTNL();
252 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
253 if (!dev) {
254 err = -ENOMEM;
255 goto failed;
256 }
257 dev_net_set(dev, net);
258
259 dev->rtnl_link_ops = ops;
260
261 tunnel = netdev_priv(dev);
262 tunnel->parms = *parms;
263 tunnel->net = net;
264
265 err = register_netdevice(dev);
266 if (err)
267 goto failed_free;
268
269 return dev;
270
271 failed_free:
272 free_netdev(dev);
273 failed:
274 return ERR_PTR(err);
275 }
276
277 static int ip_tunnel_bind_dev(struct net_device *dev)
278 {
279 struct net_device *tdev = NULL;
280 struct ip_tunnel *tunnel = netdev_priv(dev);
281 const struct iphdr *iph;
282 int hlen = LL_MAX_HEADER;
283 int mtu = ETH_DATA_LEN;
284 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
285
286 iph = &tunnel->parms.iph;
287
288
289 if (iph->daddr) {
290 struct flowi4 fl4;
291 struct rtable *rt;
292
293 ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
294 iph->saddr, tunnel->parms.o_key,
295 RT_TOS(iph->tos), tunnel->parms.link,
296 tunnel->fwmark, 0);
297 rt = ip_route_output_key(tunnel->net, &fl4);
298
299 if (!IS_ERR(rt)) {
300 tdev = rt->dst.dev;
301 ip_rt_put(rt);
302 }
303 if (dev->type != ARPHRD_ETHER)
304 dev->flags |= IFF_POINTOPOINT;
305
306 dst_cache_reset(&tunnel->dst_cache);
307 }
308
309 if (!tdev && tunnel->parms.link)
310 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
311
312 if (tdev) {
313 hlen = tdev->hard_header_len + tdev->needed_headroom;
314 mtu = min(tdev->mtu, IP_MAX_MTU);
315 }
316
317 dev->needed_headroom = t_hlen + hlen;
318 mtu -= (dev->hard_header_len + t_hlen);
319
320 if (mtu < IPV4_MIN_MTU)
321 mtu = IPV4_MIN_MTU;
322
323 return mtu;
324 }
325
326 static struct ip_tunnel *ip_tunnel_create(struct net *net,
327 struct ip_tunnel_net *itn,
328 struct ip_tunnel_parm *parms)
329 {
330 struct ip_tunnel *nt;
331 struct net_device *dev;
332 int t_hlen;
333 int mtu;
334 int err;
335
336 dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
337 if (IS_ERR(dev))
338 return ERR_CAST(dev);
339
340 mtu = ip_tunnel_bind_dev(dev);
341 err = dev_set_mtu(dev, mtu);
342 if (err)
343 goto err_dev_set_mtu;
344
345 nt = netdev_priv(dev);
346 t_hlen = nt->hlen + sizeof(struct iphdr);
347 dev->min_mtu = ETH_MIN_MTU;
348 dev->max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
349 ip_tunnel_add(itn, nt);
350 return nt;
351
352 err_dev_set_mtu:
353 unregister_netdevice(dev);
354 return ERR_PTR(err);
355 }
356
357 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
358 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
359 bool log_ecn_error)
360 {
361 struct pcpu_sw_netstats *tstats;
362 const struct iphdr *iph = ip_hdr(skb);
363 int err;
364
365 #ifdef CONFIG_NET_IPGRE_BROADCAST
366 if (ipv4_is_multicast(iph->daddr)) {
367 tunnel->dev->stats.multicast++;
368 skb->pkt_type = PACKET_BROADCAST;
369 }
370 #endif
371
372 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
373 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
374 tunnel->dev->stats.rx_crc_errors++;
375 tunnel->dev->stats.rx_errors++;
376 goto drop;
377 }
378
379 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
380 if (!(tpi->flags&TUNNEL_SEQ) ||
381 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
382 tunnel->dev->stats.rx_fifo_errors++;
383 tunnel->dev->stats.rx_errors++;
384 goto drop;
385 }
386 tunnel->i_seqno = ntohl(tpi->seq) + 1;
387 }
388
389 skb_reset_network_header(skb);
390
391 err = IP_ECN_decapsulate(iph, skb);
392 if (unlikely(err)) {
393 if (log_ecn_error)
394 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
395 &iph->saddr, iph->tos);
396 if (err > 1) {
397 ++tunnel->dev->stats.rx_frame_errors;
398 ++tunnel->dev->stats.rx_errors;
399 goto drop;
400 }
401 }
402
403 tstats = this_cpu_ptr(tunnel->dev->tstats);
404 u64_stats_update_begin(&tstats->syncp);
405 tstats->rx_packets++;
406 tstats->rx_bytes += skb->len;
407 u64_stats_update_end(&tstats->syncp);
408
409 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
410
411 if (tunnel->dev->type == ARPHRD_ETHER) {
412 skb->protocol = eth_type_trans(skb, tunnel->dev);
413 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
414 } else {
415 skb->dev = tunnel->dev;
416 }
417
418 if (tun_dst)
419 skb_dst_set(skb, (struct dst_entry *)tun_dst);
420
421 gro_cells_receive(&tunnel->gro_cells, skb);
422 return 0;
423
424 drop:
425 if (tun_dst)
426 dst_release((struct dst_entry *)tun_dst);
427 kfree_skb(skb);
428 return 0;
429 }
430 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
431
432 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
433 unsigned int num)
434 {
435 if (num >= MAX_IPTUN_ENCAP_OPS)
436 return -ERANGE;
437
438 return !cmpxchg((const struct ip_tunnel_encap_ops **)
439 &iptun_encaps[num],
440 NULL, ops) ? 0 : -1;
441 }
442 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
443
444 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
445 unsigned int num)
446 {
447 int ret;
448
449 if (num >= MAX_IPTUN_ENCAP_OPS)
450 return -ERANGE;
451
452 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
453 &iptun_encaps[num],
454 ops, NULL) == ops) ? 0 : -1;
455
456 synchronize_net();
457
458 return ret;
459 }
460 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
461
462 int ip_tunnel_encap_setup(struct ip_tunnel *t,
463 struct ip_tunnel_encap *ipencap)
464 {
465 int hlen;
466
467 memset(&t->encap, 0, sizeof(t->encap));
468
469 hlen = ip_encap_hlen(ipencap);
470 if (hlen < 0)
471 return hlen;
472
473 t->encap.type = ipencap->type;
474 t->encap.sport = ipencap->sport;
475 t->encap.dport = ipencap->dport;
476 t->encap.flags = ipencap->flags;
477
478 t->encap_hlen = hlen;
479 t->hlen = t->encap_hlen + t->tun_hlen;
480
481 return 0;
482 }
483 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
484
485 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
486 struct rtable *rt, __be16 df,
487 const struct iphdr *inner_iph,
488 int tunnel_hlen, __be32 dst, bool md)
489 {
490 struct ip_tunnel *tunnel = netdev_priv(dev);
491 int pkt_size;
492 int mtu;
493
494 tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
495 pkt_size = skb->len - tunnel_hlen - dev->hard_header_len;
496
497 if (df)
498 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
499 - sizeof(struct iphdr) - tunnel_hlen;
500 else
501 mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
502
503 if (skb_valid_dst(skb))
504 skb_dst_update_pmtu_no_confirm(skb, mtu);
505
506 if (skb->protocol == htons(ETH_P_IP)) {
507 if (!skb_is_gso(skb) &&
508 (inner_iph->frag_off & htons(IP_DF)) &&
509 mtu < pkt_size) {
510 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
511 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
512 return -E2BIG;
513 }
514 }
515 #if IS_ENABLED(CONFIG_IPV6)
516 else if (skb->protocol == htons(ETH_P_IPV6)) {
517 struct rt6_info *rt6;
518 __be32 daddr;
519
520 rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
521 NULL;
522 daddr = md ? dst : tunnel->parms.iph.daddr;
523
524 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
525 mtu >= IPV6_MIN_MTU) {
526 if ((daddr && !ipv4_is_multicast(daddr)) ||
527 rt6->rt6i_dst.plen == 128) {
528 rt6->rt6i_flags |= RTF_MODIFIED;
529 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
530 }
531 }
532
533 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
534 mtu < pkt_size) {
535 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
536 return -E2BIG;
537 }
538 }
539 #endif
540 return 0;
541 }
542
543 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
544 u8 proto, int tunnel_hlen)
545 {
546 struct ip_tunnel *tunnel = netdev_priv(dev);
547 u32 headroom = sizeof(struct iphdr);
548 struct ip_tunnel_info *tun_info;
549 const struct ip_tunnel_key *key;
550 const struct iphdr *inner_iph;
551 struct rtable *rt = NULL;
552 struct flowi4 fl4;
553 __be16 df = 0;
554 u8 tos, ttl;
555 bool use_cache;
556
557 tun_info = skb_tunnel_info(skb);
558 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
559 ip_tunnel_info_af(tun_info) != AF_INET))
560 goto tx_error;
561 key = &tun_info->key;
562 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
563 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
564 tos = key->tos;
565 if (tos == 1) {
566 if (skb->protocol == htons(ETH_P_IP))
567 tos = inner_iph->tos;
568 else if (skb->protocol == htons(ETH_P_IPV6))
569 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
570 }
571 ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
572 tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
573 0, skb->mark, skb_get_hash(skb));
574 if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
575 goto tx_error;
576
577 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
578 if (use_cache)
579 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
580 if (!rt) {
581 rt = ip_route_output_key(tunnel->net, &fl4);
582 if (IS_ERR(rt)) {
583 dev->stats.tx_carrier_errors++;
584 goto tx_error;
585 }
586 if (use_cache)
587 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
588 fl4.saddr);
589 }
590 if (rt->dst.dev == dev) {
591 ip_rt_put(rt);
592 dev->stats.collisions++;
593 goto tx_error;
594 }
595
596 if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
597 df = htons(IP_DF);
598 if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
599 key->u.ipv4.dst, true)) {
600 ip_rt_put(rt);
601 goto tx_error;
602 }
603
604 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
605 ttl = key->ttl;
606 if (ttl == 0) {
607 if (skb->protocol == htons(ETH_P_IP))
608 ttl = inner_iph->ttl;
609 else if (skb->protocol == htons(ETH_P_IPV6))
610 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
611 else
612 ttl = ip4_dst_hoplimit(&rt->dst);
613 }
614
615 if (!df && skb->protocol == htons(ETH_P_IP))
616 df = inner_iph->frag_off & htons(IP_DF);
617
618 headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
619 if (headroom > dev->needed_headroom)
620 dev->needed_headroom = headroom;
621
622 if (skb_cow_head(skb, dev->needed_headroom)) {
623 ip_rt_put(rt);
624 goto tx_dropped;
625 }
626 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
627 df, !net_eq(tunnel->net, dev_net(dev)));
628 return;
629 tx_error:
630 dev->stats.tx_errors++;
631 goto kfree;
632 tx_dropped:
633 dev->stats.tx_dropped++;
634 kfree:
635 kfree_skb(skb);
636 }
637 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
638
639 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
640 const struct iphdr *tnl_params, u8 protocol)
641 {
642 struct ip_tunnel *tunnel = netdev_priv(dev);
643 struct ip_tunnel_info *tun_info = NULL;
644 const struct iphdr *inner_iph;
645 unsigned int max_headroom;
646 struct rtable *rt = NULL;
647 bool use_cache = false;
648 struct flowi4 fl4;
649 bool md = false;
650 bool connected;
651 u8 tos, ttl;
652 __be32 dst;
653 __be16 df;
654
655 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
656 connected = (tunnel->parms.iph.daddr != 0);
657
658 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
659
660 dst = tnl_params->daddr;
661 if (dst == 0) {
662
663
664 if (!skb_dst(skb)) {
665 dev->stats.tx_fifo_errors++;
666 goto tx_error;
667 }
668
669 tun_info = skb_tunnel_info(skb);
670 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
671 ip_tunnel_info_af(tun_info) == AF_INET &&
672 tun_info->key.u.ipv4.dst) {
673 dst = tun_info->key.u.ipv4.dst;
674 md = true;
675 connected = true;
676 }
677 else if (skb->protocol == htons(ETH_P_IP)) {
678 rt = skb_rtable(skb);
679 dst = rt_nexthop(rt, inner_iph->daddr);
680 }
681 #if IS_ENABLED(CONFIG_IPV6)
682 else if (skb->protocol == htons(ETH_P_IPV6)) {
683 const struct in6_addr *addr6;
684 struct neighbour *neigh;
685 bool do_tx_error_icmp;
686 int addr_type;
687
688 neigh = dst_neigh_lookup(skb_dst(skb),
689 &ipv6_hdr(skb)->daddr);
690 if (!neigh)
691 goto tx_error;
692
693 addr6 = (const struct in6_addr *)&neigh->primary_key;
694 addr_type = ipv6_addr_type(addr6);
695
696 if (addr_type == IPV6_ADDR_ANY) {
697 addr6 = &ipv6_hdr(skb)->daddr;
698 addr_type = ipv6_addr_type(addr6);
699 }
700
701 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
702 do_tx_error_icmp = true;
703 else {
704 do_tx_error_icmp = false;
705 dst = addr6->s6_addr32[3];
706 }
707 neigh_release(neigh);
708 if (do_tx_error_icmp)
709 goto tx_error_icmp;
710 }
711 #endif
712 else
713 goto tx_error;
714
715 if (!md)
716 connected = false;
717 }
718
719 tos = tnl_params->tos;
720 if (tos & 0x1) {
721 tos &= ~0x1;
722 if (skb->protocol == htons(ETH_P_IP)) {
723 tos = inner_iph->tos;
724 connected = false;
725 } else if (skb->protocol == htons(ETH_P_IPV6)) {
726 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
727 connected = false;
728 }
729 }
730
731 ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
732 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
733 tunnel->fwmark, skb_get_hash(skb));
734
735 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
736 goto tx_error;
737
738 if (connected && md) {
739 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
740 if (use_cache)
741 rt = dst_cache_get_ip4(&tun_info->dst_cache,
742 &fl4.saddr);
743 } else {
744 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
745 &fl4.saddr) : NULL;
746 }
747
748 if (!rt) {
749 rt = ip_route_output_key(tunnel->net, &fl4);
750
751 if (IS_ERR(rt)) {
752 dev->stats.tx_carrier_errors++;
753 goto tx_error;
754 }
755 if (use_cache)
756 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
757 fl4.saddr);
758 else if (!md && connected)
759 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
760 fl4.saddr);
761 }
762
763 if (rt->dst.dev == dev) {
764 ip_rt_put(rt);
765 dev->stats.collisions++;
766 goto tx_error;
767 }
768
769 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph,
770 0, 0, false)) {
771 ip_rt_put(rt);
772 goto tx_error;
773 }
774
775 if (tunnel->err_count > 0) {
776 if (time_before(jiffies,
777 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
778 tunnel->err_count--;
779
780 dst_link_failure(skb);
781 } else
782 tunnel->err_count = 0;
783 }
784
785 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
786 ttl = tnl_params->ttl;
787 if (ttl == 0) {
788 if (skb->protocol == htons(ETH_P_IP))
789 ttl = inner_iph->ttl;
790 #if IS_ENABLED(CONFIG_IPV6)
791 else if (skb->protocol == htons(ETH_P_IPV6))
792 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
793 #endif
794 else
795 ttl = ip4_dst_hoplimit(&rt->dst);
796 }
797
798 df = tnl_params->frag_off;
799 if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
800 df |= (inner_iph->frag_off&htons(IP_DF));
801
802 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
803 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
804 if (max_headroom > dev->needed_headroom)
805 dev->needed_headroom = max_headroom;
806
807 if (skb_cow_head(skb, dev->needed_headroom)) {
808 ip_rt_put(rt);
809 dev->stats.tx_dropped++;
810 kfree_skb(skb);
811 return;
812 }
813
814 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
815 df, !net_eq(tunnel->net, dev_net(dev)));
816 return;
817
818 #if IS_ENABLED(CONFIG_IPV6)
819 tx_error_icmp:
820 dst_link_failure(skb);
821 #endif
822 tx_error:
823 dev->stats.tx_errors++;
824 kfree_skb(skb);
825 }
826 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
827
828 static void ip_tunnel_update(struct ip_tunnel_net *itn,
829 struct ip_tunnel *t,
830 struct net_device *dev,
831 struct ip_tunnel_parm *p,
832 bool set_mtu,
833 __u32 fwmark)
834 {
835 ip_tunnel_del(itn, t);
836 t->parms.iph.saddr = p->iph.saddr;
837 t->parms.iph.daddr = p->iph.daddr;
838 t->parms.i_key = p->i_key;
839 t->parms.o_key = p->o_key;
840 if (dev->type != ARPHRD_ETHER) {
841 memcpy(dev->dev_addr, &p->iph.saddr, 4);
842 memcpy(dev->broadcast, &p->iph.daddr, 4);
843 }
844 ip_tunnel_add(itn, t);
845
846 t->parms.iph.ttl = p->iph.ttl;
847 t->parms.iph.tos = p->iph.tos;
848 t->parms.iph.frag_off = p->iph.frag_off;
849
850 if (t->parms.link != p->link || t->fwmark != fwmark) {
851 int mtu;
852
853 t->parms.link = p->link;
854 t->fwmark = fwmark;
855 mtu = ip_tunnel_bind_dev(dev);
856 if (set_mtu)
857 dev->mtu = mtu;
858 }
859 dst_cache_reset(&t->dst_cache);
860 netdev_state_change(dev);
861 }
862
863 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
864 {
865 int err = 0;
866 struct ip_tunnel *t = netdev_priv(dev);
867 struct net *net = t->net;
868 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
869
870 switch (cmd) {
871 case SIOCGETTUNNEL:
872 if (dev == itn->fb_tunnel_dev) {
873 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
874 if (!t)
875 t = netdev_priv(dev);
876 }
877 memcpy(p, &t->parms, sizeof(*p));
878 break;
879
880 case SIOCADDTUNNEL:
881 case SIOCCHGTUNNEL:
882 err = -EPERM;
883 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
884 goto done;
885 if (p->iph.ttl)
886 p->iph.frag_off |= htons(IP_DF);
887 if (!(p->i_flags & VTI_ISVTI)) {
888 if (!(p->i_flags & TUNNEL_KEY))
889 p->i_key = 0;
890 if (!(p->o_flags & TUNNEL_KEY))
891 p->o_key = 0;
892 }
893
894 t = ip_tunnel_find(itn, p, itn->type);
895
896 if (cmd == SIOCADDTUNNEL) {
897 if (!t) {
898 t = ip_tunnel_create(net, itn, p);
899 err = PTR_ERR_OR_ZERO(t);
900 break;
901 }
902
903 err = -EEXIST;
904 break;
905 }
906 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
907 if (t) {
908 if (t->dev != dev) {
909 err = -EEXIST;
910 break;
911 }
912 } else {
913 unsigned int nflags = 0;
914
915 if (ipv4_is_multicast(p->iph.daddr))
916 nflags = IFF_BROADCAST;
917 else if (p->iph.daddr)
918 nflags = IFF_POINTOPOINT;
919
920 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
921 err = -EINVAL;
922 break;
923 }
924
925 t = netdev_priv(dev);
926 }
927 }
928
929 if (t) {
930 err = 0;
931 ip_tunnel_update(itn, t, dev, p, true, 0);
932 } else {
933 err = -ENOENT;
934 }
935 break;
936
937 case SIOCDELTUNNEL:
938 err = -EPERM;
939 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
940 goto done;
941
942 if (dev == itn->fb_tunnel_dev) {
943 err = -ENOENT;
944 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
945 if (!t)
946 goto done;
947 err = -EPERM;
948 if (t == netdev_priv(itn->fb_tunnel_dev))
949 goto done;
950 dev = t->dev;
951 }
952 unregister_netdevice(dev);
953 err = 0;
954 break;
955
956 default:
957 err = -EINVAL;
958 }
959
960 done:
961 return err;
962 }
963 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
964
965 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
966 {
967 struct ip_tunnel *tunnel = netdev_priv(dev);
968 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
969 int max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
970
971 if (new_mtu < ETH_MIN_MTU)
972 return -EINVAL;
973
974 if (new_mtu > max_mtu) {
975 if (strict)
976 return -EINVAL;
977
978 new_mtu = max_mtu;
979 }
980
981 dev->mtu = new_mtu;
982 return 0;
983 }
984 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
985
986 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
987 {
988 return __ip_tunnel_change_mtu(dev, new_mtu, true);
989 }
990 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
991
992 static void ip_tunnel_dev_free(struct net_device *dev)
993 {
994 struct ip_tunnel *tunnel = netdev_priv(dev);
995
996 gro_cells_destroy(&tunnel->gro_cells);
997 dst_cache_destroy(&tunnel->dst_cache);
998 free_percpu(dev->tstats);
999 }
1000
1001 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1002 {
1003 struct ip_tunnel *tunnel = netdev_priv(dev);
1004 struct ip_tunnel_net *itn;
1005
1006 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1007
1008 if (itn->fb_tunnel_dev != dev) {
1009 ip_tunnel_del(itn, netdev_priv(dev));
1010 unregister_netdevice_queue(dev, head);
1011 }
1012 }
1013 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1014
1015 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1016 {
1017 struct ip_tunnel *tunnel = netdev_priv(dev);
1018
1019 return tunnel->net;
1020 }
1021 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1022
1023 int ip_tunnel_get_iflink(const struct net_device *dev)
1024 {
1025 struct ip_tunnel *tunnel = netdev_priv(dev);
1026
1027 return tunnel->parms.link;
1028 }
1029 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1030
1031 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1032 struct rtnl_link_ops *ops, char *devname)
1033 {
1034 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1035 struct ip_tunnel_parm parms;
1036 unsigned int i;
1037
1038 itn->rtnl_link_ops = ops;
1039 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1040 INIT_HLIST_HEAD(&itn->tunnels[i]);
1041
1042 if (!ops || !net_has_fallback_tunnels(net)) {
1043 struct ip_tunnel_net *it_init_net;
1044
1045 it_init_net = net_generic(&init_net, ip_tnl_net_id);
1046 itn->type = it_init_net->type;
1047 itn->fb_tunnel_dev = NULL;
1048 return 0;
1049 }
1050
1051 memset(&parms, 0, sizeof(parms));
1052 if (devname)
1053 strlcpy(parms.name, devname, IFNAMSIZ);
1054
1055 rtnl_lock();
1056 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1057
1058
1059
1060 if (!IS_ERR(itn->fb_tunnel_dev)) {
1061 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1062 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1063 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1064 itn->type = itn->fb_tunnel_dev->type;
1065 }
1066 rtnl_unlock();
1067
1068 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1069 }
1070 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1071
1072 static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1073 struct list_head *head,
1074 struct rtnl_link_ops *ops)
1075 {
1076 struct net_device *dev, *aux;
1077 int h;
1078
1079 for_each_netdev_safe(net, dev, aux)
1080 if (dev->rtnl_link_ops == ops)
1081 unregister_netdevice_queue(dev, head);
1082
1083 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1084 struct ip_tunnel *t;
1085 struct hlist_node *n;
1086 struct hlist_head *thead = &itn->tunnels[h];
1087
1088 hlist_for_each_entry_safe(t, n, thead, hash_node)
1089
1090
1091
1092 if (!net_eq(dev_net(t->dev), net))
1093 unregister_netdevice_queue(t->dev, head);
1094 }
1095 }
1096
1097 void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1098 struct rtnl_link_ops *ops)
1099 {
1100 struct ip_tunnel_net *itn;
1101 struct net *net;
1102 LIST_HEAD(list);
1103
1104 rtnl_lock();
1105 list_for_each_entry(net, net_list, exit_list) {
1106 itn = net_generic(net, id);
1107 ip_tunnel_destroy(net, itn, &list, ops);
1108 }
1109 unregister_netdevice_many(&list);
1110 rtnl_unlock();
1111 }
1112 EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1113
1114 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1115 struct ip_tunnel_parm *p, __u32 fwmark)
1116 {
1117 struct ip_tunnel *nt;
1118 struct net *net = dev_net(dev);
1119 struct ip_tunnel_net *itn;
1120 int mtu;
1121 int err;
1122
1123 nt = netdev_priv(dev);
1124 itn = net_generic(net, nt->ip_tnl_net_id);
1125
1126 if (nt->collect_md) {
1127 if (rtnl_dereference(itn->collect_md_tun))
1128 return -EEXIST;
1129 } else {
1130 if (ip_tunnel_find(itn, p, dev->type))
1131 return -EEXIST;
1132 }
1133
1134 nt->net = net;
1135 nt->parms = *p;
1136 nt->fwmark = fwmark;
1137 err = register_netdevice(dev);
1138 if (err)
1139 goto err_register_netdevice;
1140
1141 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1142 eth_hw_addr_random(dev);
1143
1144 mtu = ip_tunnel_bind_dev(dev);
1145 if (tb[IFLA_MTU]) {
1146 unsigned int max = IP_MAX_MTU - dev->hard_header_len - nt->hlen;
1147
1148 mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU,
1149 (unsigned int)(max - sizeof(struct iphdr)));
1150 }
1151
1152 err = dev_set_mtu(dev, mtu);
1153 if (err)
1154 goto err_dev_set_mtu;
1155
1156 ip_tunnel_add(itn, nt);
1157 return 0;
1158
1159 err_dev_set_mtu:
1160 unregister_netdevice(dev);
1161 err_register_netdevice:
1162 return err;
1163 }
1164 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1165
1166 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1167 struct ip_tunnel_parm *p, __u32 fwmark)
1168 {
1169 struct ip_tunnel *t;
1170 struct ip_tunnel *tunnel = netdev_priv(dev);
1171 struct net *net = tunnel->net;
1172 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1173
1174 if (dev == itn->fb_tunnel_dev)
1175 return -EINVAL;
1176
1177 t = ip_tunnel_find(itn, p, dev->type);
1178
1179 if (t) {
1180 if (t->dev != dev)
1181 return -EEXIST;
1182 } else {
1183 t = tunnel;
1184
1185 if (dev->type != ARPHRD_ETHER) {
1186 unsigned int nflags = 0;
1187
1188 if (ipv4_is_multicast(p->iph.daddr))
1189 nflags = IFF_BROADCAST;
1190 else if (p->iph.daddr)
1191 nflags = IFF_POINTOPOINT;
1192
1193 if ((dev->flags ^ nflags) &
1194 (IFF_POINTOPOINT | IFF_BROADCAST))
1195 return -EINVAL;
1196 }
1197 }
1198
1199 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1200 return 0;
1201 }
1202 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1203
1204 int ip_tunnel_init(struct net_device *dev)
1205 {
1206 struct ip_tunnel *tunnel = netdev_priv(dev);
1207 struct iphdr *iph = &tunnel->parms.iph;
1208 int err;
1209
1210 dev->needs_free_netdev = true;
1211 dev->priv_destructor = ip_tunnel_dev_free;
1212 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1213 if (!dev->tstats)
1214 return -ENOMEM;
1215
1216 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1217 if (err) {
1218 free_percpu(dev->tstats);
1219 return err;
1220 }
1221
1222 err = gro_cells_init(&tunnel->gro_cells, dev);
1223 if (err) {
1224 dst_cache_destroy(&tunnel->dst_cache);
1225 free_percpu(dev->tstats);
1226 return err;
1227 }
1228
1229 tunnel->dev = dev;
1230 tunnel->net = dev_net(dev);
1231 strcpy(tunnel->parms.name, dev->name);
1232 iph->version = 4;
1233 iph->ihl = 5;
1234
1235 if (tunnel->collect_md)
1236 netif_keep_dst(dev);
1237 return 0;
1238 }
1239 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1240
1241 void ip_tunnel_uninit(struct net_device *dev)
1242 {
1243 struct ip_tunnel *tunnel = netdev_priv(dev);
1244 struct net *net = tunnel->net;
1245 struct ip_tunnel_net *itn;
1246
1247 itn = net_generic(net, tunnel->ip_tnl_net_id);
1248
1249 if (itn->fb_tunnel_dev != dev)
1250 ip_tunnel_del(itn, netdev_priv(dev));
1251
1252 dst_cache_reset(&tunnel->dst_cache);
1253 }
1254 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1255
1256
1257 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1258 {
1259 struct ip_tunnel *tunnel = netdev_priv(dev);
1260 tunnel->ip_tnl_net_id = net_id;
1261 }
1262 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1263
1264 MODULE_LICENSE("GPL");