1 /*
2  *	Linux NET3:	GRE over IP protocol decoder.
3  *
4  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *	This program is free software; you can redistribute it and/or
7  *	modify it under the terms of the GNU General Public License
8  *	as published by the Free Software Foundation; either version
9  *	2 of the License, or (at your option) any later version.
10  *
11  */
12 
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <asm/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
23 #include <linux/in.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/mroute.h>
28 #include <linux/if_vlan.h>
29 #include <linux/init.h>
30 #include <linux/in6.h>
31 #include <linux/inetdevice.h>
32 #include <linux/igmp.h>
33 #include <linux/netfilter_ipv4.h>
34 #include <linux/etherdevice.h>
35 #include <linux/if_ether.h>
36 
37 #include <net/sock.h>
38 #include <net/ip.h>
39 #include <net/icmp.h>
40 #include <net/protocol.h>
41 #include <net/ip_tunnels.h>
42 #include <net/arp.h>
43 #include <net/checksum.h>
44 #include <net/dsfield.h>
45 #include <net/inet_ecn.h>
46 #include <net/xfrm.h>
47 #include <net/net_namespace.h>
48 #include <net/netns/generic.h>
49 #include <net/rtnetlink.h>
50 #include <net/gre.h>
51 #include <net/dst_metadata.h>
52 
53 #if IS_ENABLED(CONFIG_IPV6)
54 #include <net/ipv6.h>
55 #include <net/ip6_fib.h>
56 #include <net/ip6_route.h>
57 #endif
58 
59 /*
60    Problems & solutions
61    --------------------
62 
63    1. The most important issue is detecting local dead loops.
64    They would cause complete host lockup in transmit, which
65    would be "resolved" by stack overflow or, if queueing is enabled,
66    with infinite looping in net_bh.
67 
68    We cannot track such dead loops during route installation,
69    it is infeasible task. The most general solutions would be
70    to keep skb->encapsulation counter (sort of local ttl),
71    and silently drop packet when it expires. It is a good
72    solution, but it supposes maintaining new variable in ALL
73    skb, even if no tunneling is used.
74 
75    Current solution: xmit_recursion breaks dead loops. This is a percpu
76    counter, since when we enter the first ndo_xmit(), cpu migration is
77    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
78 
79    2. Networking dead loops would not kill routers, but would really
80    kill network. IP hop limit plays role of "t->recursion" in this case,
81    if we copy it from packet being encapsulated to upper header.
82    It is very good solution, but it introduces two problems:
83 
84    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
85      do not work over tunnels.
86    - traceroute does not work. I planned to relay ICMP from tunnel,
87      so that this problem would be solved and traceroute output
88      would even more informative. This idea appeared to be wrong:
89      only Linux complies to rfc1812 now (yes, guys, Linux is the only
90      true router now :-)), all routers (at least, in neighbourhood of mine)
91      return only 8 bytes of payload. It is the end.
92 
93    Hence, if we want that OSPF worked or traceroute said something reasonable,
94    we should search for another solution.
95 
96    One of them is to parse packet trying to detect inner encapsulation
97    made by our node. It is difficult or even impossible, especially,
98    taking into account fragmentation. TO be short, ttl is not solution at all.
99 
100    Current solution: The solution was UNEXPECTEDLY SIMPLE.
101    We force DF flag on tunnels with preconfigured hop limit,
102    that is ALL. :-) Well, it does not remove the problem completely,
103    but exponential growth of network traffic is changed to linear
104    (branches, that exceed pmtu are pruned) and tunnel mtu
105    rapidly degrades to value <68, where looping stops.
106    Yes, it is not good if there exists a router in the loop,
107    which does not force DF, even when encapsulating packets have DF set.
108    But it is not our problem! Nobody could accuse us, we made
109    all that we could make. Even if it is your gated who injected
110    fatal route to network, even if it were you who configured
111    fatal static route: you are innocent. :-)
112 
113    Alexey Kuznetsov.
114  */
115 
116 static bool log_ecn_error = true;
117 module_param(log_ecn_error, bool, 0644);
118 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
119 
120 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
121 static int ipgre_tunnel_init(struct net_device *dev);
122 
123 static int ipgre_net_id __read_mostly;
124 static int gre_tap_net_id __read_mostly;
125 
ip_gre_calc_hlen(__be16 o_flags)126 static int ip_gre_calc_hlen(__be16 o_flags)
127 {
128 	int addend = 4;
129 
130 	if (o_flags & TUNNEL_CSUM)
131 		addend += 4;
132 	if (o_flags & TUNNEL_KEY)
133 		addend += 4;
134 	if (o_flags & TUNNEL_SEQ)
135 		addend += 4;
136 	return addend;
137 }
138 
gre_flags_to_tnl_flags(__be16 flags)139 static __be16 gre_flags_to_tnl_flags(__be16 flags)
140 {
141 	__be16 tflags = 0;
142 
143 	if (flags & GRE_CSUM)
144 		tflags |= TUNNEL_CSUM;
145 	if (flags & GRE_ROUTING)
146 		tflags |= TUNNEL_ROUTING;
147 	if (flags & GRE_KEY)
148 		tflags |= TUNNEL_KEY;
149 	if (flags & GRE_SEQ)
150 		tflags |= TUNNEL_SEQ;
151 	if (flags & GRE_STRICT)
152 		tflags |= TUNNEL_STRICT;
153 	if (flags & GRE_REC)
154 		tflags |= TUNNEL_REC;
155 	if (flags & GRE_VERSION)
156 		tflags |= TUNNEL_VERSION;
157 
158 	return tflags;
159 }
160 
tnl_flags_to_gre_flags(__be16 tflags)161 static __be16 tnl_flags_to_gre_flags(__be16 tflags)
162 {
163 	__be16 flags = 0;
164 
165 	if (tflags & TUNNEL_CSUM)
166 		flags |= GRE_CSUM;
167 	if (tflags & TUNNEL_ROUTING)
168 		flags |= GRE_ROUTING;
169 	if (tflags & TUNNEL_KEY)
170 		flags |= GRE_KEY;
171 	if (tflags & TUNNEL_SEQ)
172 		flags |= GRE_SEQ;
173 	if (tflags & TUNNEL_STRICT)
174 		flags |= GRE_STRICT;
175 	if (tflags & TUNNEL_REC)
176 		flags |= GRE_REC;
177 	if (tflags & TUNNEL_VERSION)
178 		flags |= GRE_VERSION;
179 
180 	return flags;
181 }
182 
183 /* Fills in tpi and returns header length to be pulled. */
parse_gre_header(struct sk_buff * skb,struct tnl_ptk_info * tpi,bool * csum_err)184 static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
185 			    bool *csum_err)
186 {
187 	const struct gre_base_hdr *greh;
188 	__be32 *options;
189 	int hdr_len;
190 
191 	if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
192 		return -EINVAL;
193 
194 	greh = (struct gre_base_hdr *)skb_transport_header(skb);
195 	if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
196 		return -EINVAL;
197 
198 	tpi->flags = gre_flags_to_tnl_flags(greh->flags);
199 	hdr_len = ip_gre_calc_hlen(tpi->flags);
200 
201 	if (!pskb_may_pull(skb, hdr_len))
202 		return -EINVAL;
203 
204 	greh = (struct gre_base_hdr *)skb_transport_header(skb);
205 	tpi->proto = greh->protocol;
206 
207 	options = (__be32 *)(greh + 1);
208 	if (greh->flags & GRE_CSUM) {
209 		if (skb_checksum_simple_validate(skb)) {
210 			*csum_err = true;
211 			return -EINVAL;
212 		}
213 
214 		skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
215 					 null_compute_pseudo);
216 		options++;
217 	}
218 
219 	if (greh->flags & GRE_KEY) {
220 		tpi->key = *options;
221 		options++;
222 	} else {
223 		tpi->key = 0;
224 	}
225 	if (unlikely(greh->flags & GRE_SEQ)) {
226 		tpi->seq = *options;
227 		options++;
228 	} else {
229 		tpi->seq = 0;
230 	}
231 	/* WCCP version 1 and 2 protocol decoding.
232 	 * - Change protocol to IP
233 	 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
234 	 */
235 	if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
236 		tpi->proto = htons(ETH_P_IP);
237 		if ((*(u8 *)options & 0xF0) != 0x40) {
238 			hdr_len += 4;
239 			if (!pskb_may_pull(skb, hdr_len))
240 				return -EINVAL;
241 		}
242 	}
243 	return hdr_len;
244 }
245 
ipgre_err(struct sk_buff * skb,u32 info,const struct tnl_ptk_info * tpi)246 static void ipgre_err(struct sk_buff *skb, u32 info,
247 		      const struct tnl_ptk_info *tpi)
248 {
249 
250 	/* All the routers (except for Linux) return only
251 	   8 bytes of packet payload. It means, that precise relaying of
252 	   ICMP in the real Internet is absolutely infeasible.
253 
254 	   Moreover, Cisco "wise men" put GRE key to the third word
255 	   in GRE header. It makes impossible maintaining even soft
256 	   state for keyed GRE tunnels with enabled checksum. Tell
257 	   them "thank you".
258 
259 	   Well, I wonder, rfc1812 was written by Cisco employee,
260 	   what the hell these idiots break standards established
261 	   by themselves???
262 	   */
263 	struct net *net = dev_net(skb->dev);
264 	struct ip_tunnel_net *itn;
265 	const struct iphdr *iph;
266 	const int type = icmp_hdr(skb)->type;
267 	const int code = icmp_hdr(skb)->code;
268 	struct ip_tunnel *t;
269 
270 	switch (type) {
271 	default:
272 	case ICMP_PARAMETERPROB:
273 		return;
274 
275 	case ICMP_DEST_UNREACH:
276 		switch (code) {
277 		case ICMP_SR_FAILED:
278 		case ICMP_PORT_UNREACH:
279 			/* Impossible event. */
280 			return;
281 		default:
282 			/* All others are translated to HOST_UNREACH.
283 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
284 			   I believe they are just ether pollution. --ANK
285 			 */
286 			break;
287 		}
288 		break;
289 
290 	case ICMP_TIME_EXCEEDED:
291 		if (code != ICMP_EXC_TTL)
292 			return;
293 		break;
294 
295 	case ICMP_REDIRECT:
296 		break;
297 	}
298 
299 	if (tpi->proto == htons(ETH_P_TEB))
300 		itn = net_generic(net, gre_tap_net_id);
301 	else
302 		itn = net_generic(net, ipgre_net_id);
303 
304 	iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
305 	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
306 			     iph->daddr, iph->saddr, tpi->key);
307 
308 	if (!t)
309 		return;
310 
311 	if (t->parms.iph.daddr == 0 ||
312 	    ipv4_is_multicast(t->parms.iph.daddr))
313 		return;
314 
315 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
316 		return;
317 
318 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
319 		t->err_count++;
320 	else
321 		t->err_count = 1;
322 	t->err_time = jiffies;
323 }
324 
gre_err(struct sk_buff * skb,u32 info)325 static void gre_err(struct sk_buff *skb, u32 info)
326 {
327 	/* All the routers (except for Linux) return only
328 	 * 8 bytes of packet payload. It means, that precise relaying of
329 	 * ICMP in the real Internet is absolutely infeasible.
330 	 *
331 	 * Moreover, Cisco "wise men" put GRE key to the third word
332 	 * in GRE header. It makes impossible maintaining even soft
333 	 * state for keyed
334 	 * GRE tunnels with enabled checksum. Tell them "thank you".
335 	 *
336 	 * Well, I wonder, rfc1812 was written by Cisco employee,
337 	 * what the hell these idiots break standards established
338 	 * by themselves???
339 	 */
340 
341 	const int type = icmp_hdr(skb)->type;
342 	const int code = icmp_hdr(skb)->code;
343 	struct tnl_ptk_info tpi;
344 	bool csum_err = false;
345 
346 	if (parse_gre_header(skb, &tpi, &csum_err) < 0) {
347 		if (!csum_err)		/* ignore csum errors. */
348 			return;
349 	}
350 
351 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
352 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
353 				 skb->dev->ifindex, 0, IPPROTO_GRE, 0);
354 		return;
355 	}
356 	if (type == ICMP_REDIRECT) {
357 		ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
358 			      IPPROTO_GRE, 0);
359 		return;
360 	}
361 
362 	ipgre_err(skb, info, &tpi);
363 }
364 
key_to_tunnel_id(__be32 key)365 static __be64 key_to_tunnel_id(__be32 key)
366 {
367 #ifdef __BIG_ENDIAN
368 	return (__force __be64)((__force u32)key);
369 #else
370 	return (__force __be64)((__force u64)key << 32);
371 #endif
372 }
373 
374 /* Returns the least-significant 32 bits of a __be64. */
tunnel_id_to_key(__be64 x)375 static __be32 tunnel_id_to_key(__be64 x)
376 {
377 #ifdef __BIG_ENDIAN
378 	return (__force __be32)x;
379 #else
380 	return (__force __be32)((__force u64)x >> 32);
381 #endif
382 }
383 
ipgre_rcv(struct sk_buff * skb,const struct tnl_ptk_info * tpi)384 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
385 {
386 	struct net *net = dev_net(skb->dev);
387 	struct metadata_dst *tun_dst = NULL;
388 	struct ip_tunnel_net *itn;
389 	const struct iphdr *iph;
390 	struct ip_tunnel *tunnel;
391 
392 	if (tpi->proto == htons(ETH_P_TEB))
393 		itn = net_generic(net, gre_tap_net_id);
394 	else
395 		itn = net_generic(net, ipgre_net_id);
396 
397 	iph = ip_hdr(skb);
398 	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
399 				  iph->saddr, iph->daddr, tpi->key);
400 
401 	if (tunnel) {
402 		skb_pop_mac_header(skb);
403 		if (tunnel->collect_md) {
404 			__be16 flags;
405 			__be64 tun_id;
406 
407 			flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
408 			tun_id = key_to_tunnel_id(tpi->key);
409 			tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
410 			if (!tun_dst)
411 				return PACKET_REJECT;
412 		}
413 
414 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
415 		return PACKET_RCVD;
416 	}
417 	return PACKET_REJECT;
418 }
419 
gre_rcv(struct sk_buff * skb)420 static int gre_rcv(struct sk_buff *skb)
421 {
422 	struct tnl_ptk_info tpi;
423 	bool csum_err = false;
424 	int hdr_len;
425 
426 #ifdef CONFIG_NET_IPGRE_BROADCAST
427 	if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
428 		/* Looped back packet, drop it! */
429 		if (rt_is_output_route(skb_rtable(skb)))
430 			goto drop;
431 	}
432 #endif
433 
434 	hdr_len = parse_gre_header(skb, &tpi, &csum_err);
435 	if (hdr_len < 0)
436 		goto drop;
437 	if (iptunnel_pull_header(skb, hdr_len, tpi.proto) < 0)
438 		goto drop;
439 
440 	if (ipgre_rcv(skb, &tpi) == PACKET_RCVD)
441 		return 0;
442 
443 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
444 drop:
445 	kfree_skb(skb);
446 	return 0;
447 }
448 
build_header(struct sk_buff * skb,int hdr_len,__be16 flags,__be16 proto,__be32 key,__be32 seq)449 static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags,
450 			 __be16 proto, __be32 key, __be32 seq)
451 {
452 	struct gre_base_hdr *greh;
453 
454 	skb_push(skb, hdr_len);
455 
456 	skb_reset_transport_header(skb);
457 	greh = (struct gre_base_hdr *)skb->data;
458 	greh->flags = tnl_flags_to_gre_flags(flags);
459 	greh->protocol = proto;
460 
461 	if (flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) {
462 		__be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
463 
464 		if (flags & TUNNEL_SEQ) {
465 			*ptr = seq;
466 			ptr--;
467 		}
468 		if (flags & TUNNEL_KEY) {
469 			*ptr = key;
470 			ptr--;
471 		}
472 		if (flags & TUNNEL_CSUM &&
473 		    !(skb_shinfo(skb)->gso_type &
474 		      (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) {
475 			*ptr = 0;
476 			*(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
477 								 skb->len, 0));
478 		}
479 	}
480 }
481 
__gre_xmit(struct sk_buff * skb,struct net_device * dev,const struct iphdr * tnl_params,__be16 proto)482 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
483 		       const struct iphdr *tnl_params,
484 		       __be16 proto)
485 {
486 	struct ip_tunnel *tunnel = netdev_priv(dev);
487 
488 	if (tunnel->parms.o_flags & TUNNEL_SEQ)
489 		tunnel->o_seqno++;
490 
491 	/* Push GRE header. */
492 	build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
493 		     proto, tunnel->parms.o_key, htonl(tunnel->o_seqno));
494 
495 	skb_set_inner_protocol(skb, proto);
496 	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
497 }
498 
gre_handle_offloads(struct sk_buff * skb,bool csum)499 static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
500 					   bool csum)
501 {
502 	return iptunnel_handle_offloads(skb, csum,
503 					csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
504 }
505 
gre_get_rt(struct sk_buff * skb,struct net_device * dev,struct flowi4 * fl,const struct ip_tunnel_key * key)506 static struct rtable *gre_get_rt(struct sk_buff *skb,
507 				 struct net_device *dev,
508 				 struct flowi4 *fl,
509 				 const struct ip_tunnel_key *key)
510 {
511 	struct net *net = dev_net(dev);
512 
513 	memset(fl, 0, sizeof(*fl));
514 	fl->daddr = key->u.ipv4.dst;
515 	fl->saddr = key->u.ipv4.src;
516 	fl->flowi4_tos = RT_TOS(key->tos);
517 	fl->flowi4_mark = skb->mark;
518 	fl->flowi4_proto = IPPROTO_GRE;
519 
520 	return ip_route_output_key(net, fl);
521 }
522 
gre_fb_xmit(struct sk_buff * skb,struct net_device * dev)523 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
524 {
525 	struct ip_tunnel_info *tun_info;
526 	const struct ip_tunnel_key *key;
527 	struct flowi4 fl;
528 	struct rtable *rt;
529 	int min_headroom;
530 	int tunnel_hlen;
531 	__be16 df, flags;
532 	int err;
533 
534 	tun_info = skb_tunnel_info(skb);
535 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
536 		     ip_tunnel_info_af(tun_info) != AF_INET))
537 		goto err_free_skb;
538 
539 	key = &tun_info->key;
540 	rt = gre_get_rt(skb, dev, &fl, key);
541 	if (IS_ERR(rt))
542 		goto err_free_skb;
543 
544 	tunnel_hlen = ip_gre_calc_hlen(key->tun_flags);
545 
546 	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
547 			+ tunnel_hlen + sizeof(struct iphdr);
548 	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
549 		int head_delta = SKB_DATA_ALIGN(min_headroom -
550 						skb_headroom(skb) +
551 						16);
552 		err = pskb_expand_head(skb, max_t(int, head_delta, 0),
553 				       0, GFP_ATOMIC);
554 		if (unlikely(err))
555 			goto err_free_rt;
556 	}
557 
558 	/* Push Tunnel header. */
559 	skb = gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM));
560 	if (IS_ERR(skb)) {
561 		skb = NULL;
562 		goto err_free_rt;
563 	}
564 
565 	flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
566 	build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB),
567 		     tunnel_id_to_key(tun_info->key.tun_id), 0);
568 
569 	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
570 	err = iptunnel_xmit(skb->sk, rt, skb, fl.saddr,
571 			    key->u.ipv4.dst, IPPROTO_GRE,
572 			    key->tos, key->ttl, df, false);
573 	iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
574 	return;
575 
576 err_free_rt:
577 	ip_rt_put(rt);
578 err_free_skb:
579 	kfree_skb(skb);
580 	dev->stats.tx_dropped++;
581 }
582 
gre_fill_metadata_dst(struct net_device * dev,struct sk_buff * skb)583 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
584 {
585 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
586 	struct rtable *rt;
587 	struct flowi4 fl4;
588 
589 	if (ip_tunnel_info_af(info) != AF_INET)
590 		return -EINVAL;
591 
592 	rt = gre_get_rt(skb, dev, &fl4, &info->key);
593 	if (IS_ERR(rt))
594 		return PTR_ERR(rt);
595 
596 	ip_rt_put(rt);
597 	info->key.u.ipv4.src = fl4.saddr;
598 	return 0;
599 }
600 
ipgre_xmit(struct sk_buff * skb,struct net_device * dev)601 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
602 			      struct net_device *dev)
603 {
604 	struct ip_tunnel *tunnel = netdev_priv(dev);
605 	const struct iphdr *tnl_params;
606 
607 	if (tunnel->collect_md) {
608 		gre_fb_xmit(skb, dev);
609 		return NETDEV_TX_OK;
610 	}
611 
612 	if (dev->header_ops) {
613 		/* Need space for new headers */
614 		if (skb_cow_head(skb, dev->needed_headroom -
615 				      (tunnel->hlen + sizeof(struct iphdr))))
616 			goto free_skb;
617 
618 		tnl_params = (const struct iphdr *)skb->data;
619 
620 		/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
621 		 * to gre header.
622 		 */
623 		skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
624 		skb_reset_mac_header(skb);
625 	} else {
626 		if (skb_cow_head(skb, dev->needed_headroom))
627 			goto free_skb;
628 
629 		tnl_params = &tunnel->parms.iph;
630 	}
631 
632 	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
633 	if (IS_ERR(skb))
634 		goto out;
635 
636 	__gre_xmit(skb, dev, tnl_params, skb->protocol);
637 	return NETDEV_TX_OK;
638 
639 free_skb:
640 	kfree_skb(skb);
641 out:
642 	dev->stats.tx_dropped++;
643 	return NETDEV_TX_OK;
644 }
645 
gre_tap_xmit(struct sk_buff * skb,struct net_device * dev)646 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
647 				struct net_device *dev)
648 {
649 	struct ip_tunnel *tunnel = netdev_priv(dev);
650 
651 	if (tunnel->collect_md) {
652 		gre_fb_xmit(skb, dev);
653 		return NETDEV_TX_OK;
654 	}
655 
656 	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
657 	if (IS_ERR(skb))
658 		goto out;
659 
660 	if (skb_cow_head(skb, dev->needed_headroom))
661 		goto free_skb;
662 
663 	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
664 	return NETDEV_TX_OK;
665 
666 free_skb:
667 	kfree_skb(skb);
668 out:
669 	dev->stats.tx_dropped++;
670 	return NETDEV_TX_OK;
671 }
672 
ipgre_tunnel_ioctl(struct net_device * dev,struct ifreq * ifr,int cmd)673 static int ipgre_tunnel_ioctl(struct net_device *dev,
674 			      struct ifreq *ifr, int cmd)
675 {
676 	int err;
677 	struct ip_tunnel_parm p;
678 
679 	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
680 		return -EFAULT;
681 	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
682 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
683 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
684 		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
685 			return -EINVAL;
686 	}
687 	p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
688 	p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
689 
690 	err = ip_tunnel_ioctl(dev, &p, cmd);
691 	if (err)
692 		return err;
693 
694 	p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
695 	p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
696 
697 	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
698 		return -EFAULT;
699 	return 0;
700 }
701 
702 /* Nice toy. Unfortunately, useless in real life :-)
703    It allows to construct virtual multiprotocol broadcast "LAN"
704    over the Internet, provided multicast routing is tuned.
705 
706 
707    I have no idea was this bicycle invented before me,
708    so that I had to set ARPHRD_IPGRE to a random value.
709    I have an impression, that Cisco could make something similar,
710    but this feature is apparently missing in IOS<=11.2(8).
711 
712    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
713    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
714 
715    ping -t 255 224.66.66.66
716 
717    If nobody answers, mbone does not work.
718 
719    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
720    ip addr add 10.66.66.<somewhat>/24 dev Universe
721    ifconfig Universe up
722    ifconfig Universe add fe80::<Your_real_addr>/10
723    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
724    ftp 10.66.66.66
725    ...
726    ftp fec0:6666:6666::193.233.7.65
727    ...
728  */
ipgre_header(struct sk_buff * skb,struct net_device * dev,unsigned short type,const void * daddr,const void * saddr,unsigned int len)729 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
730 			unsigned short type,
731 			const void *daddr, const void *saddr, unsigned int len)
732 {
733 	struct ip_tunnel *t = netdev_priv(dev);
734 	struct iphdr *iph;
735 	struct gre_base_hdr *greh;
736 
737 	iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
738 	greh = (struct gre_base_hdr *)(iph+1);
739 	greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
740 	greh->protocol = htons(type);
741 
742 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
743 
744 	/* Set the source hardware address. */
745 	if (saddr)
746 		memcpy(&iph->saddr, saddr, 4);
747 	if (daddr)
748 		memcpy(&iph->daddr, daddr, 4);
749 	if (iph->daddr)
750 		return t->hlen + sizeof(*iph);
751 
752 	return -(t->hlen + sizeof(*iph));
753 }
754 
ipgre_header_parse(const struct sk_buff * skb,unsigned char * haddr)755 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
756 {
757 	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
758 	memcpy(haddr, &iph->saddr, 4);
759 	return 4;
760 }
761 
762 static const struct header_ops ipgre_header_ops = {
763 	.create	= ipgre_header,
764 	.parse	= ipgre_header_parse,
765 };
766 
767 #ifdef CONFIG_NET_IPGRE_BROADCAST
ipgre_open(struct net_device * dev)768 static int ipgre_open(struct net_device *dev)
769 {
770 	struct ip_tunnel *t = netdev_priv(dev);
771 
772 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
773 		struct flowi4 fl4;
774 		struct rtable *rt;
775 
776 		rt = ip_route_output_gre(t->net, &fl4,
777 					 t->parms.iph.daddr,
778 					 t->parms.iph.saddr,
779 					 t->parms.o_key,
780 					 RT_TOS(t->parms.iph.tos),
781 					 t->parms.link);
782 		if (IS_ERR(rt))
783 			return -EADDRNOTAVAIL;
784 		dev = rt->dst.dev;
785 		ip_rt_put(rt);
786 		if (!__in_dev_get_rtnl(dev))
787 			return -EADDRNOTAVAIL;
788 		t->mlink = dev->ifindex;
789 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
790 	}
791 	return 0;
792 }
793 
ipgre_close(struct net_device * dev)794 static int ipgre_close(struct net_device *dev)
795 {
796 	struct ip_tunnel *t = netdev_priv(dev);
797 
798 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
799 		struct in_device *in_dev;
800 		in_dev = inetdev_by_index(t->net, t->mlink);
801 		if (in_dev)
802 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
803 	}
804 	return 0;
805 }
806 #endif
807 
808 static const struct net_device_ops ipgre_netdev_ops = {
809 	.ndo_init		= ipgre_tunnel_init,
810 	.ndo_uninit		= ip_tunnel_uninit,
811 #ifdef CONFIG_NET_IPGRE_BROADCAST
812 	.ndo_open		= ipgre_open,
813 	.ndo_stop		= ipgre_close,
814 #endif
815 	.ndo_start_xmit		= ipgre_xmit,
816 	.ndo_do_ioctl		= ipgre_tunnel_ioctl,
817 	.ndo_change_mtu		= ip_tunnel_change_mtu,
818 	.ndo_get_stats64	= ip_tunnel_get_stats64,
819 	.ndo_get_iflink		= ip_tunnel_get_iflink,
820 };
821 
822 #define GRE_FEATURES (NETIF_F_SG |		\
823 		      NETIF_F_FRAGLIST |	\
824 		      NETIF_F_HIGHDMA |		\
825 		      NETIF_F_HW_CSUM)
826 
ipgre_tunnel_setup(struct net_device * dev)827 static void ipgre_tunnel_setup(struct net_device *dev)
828 {
829 	dev->netdev_ops		= &ipgre_netdev_ops;
830 	dev->type		= ARPHRD_IPGRE;
831 	ip_tunnel_setup(dev, ipgre_net_id);
832 }
833 
__gre_tunnel_init(struct net_device * dev)834 static void __gre_tunnel_init(struct net_device *dev)
835 {
836 	struct ip_tunnel *tunnel;
837 	int t_hlen;
838 
839 	tunnel = netdev_priv(dev);
840 	tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
841 	tunnel->parms.iph.protocol = IPPROTO_GRE;
842 
843 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
844 
845 	t_hlen = tunnel->hlen + sizeof(struct iphdr);
846 
847 	dev->needed_headroom	= LL_MAX_HEADER + t_hlen + 4;
848 	dev->mtu		= ETH_DATA_LEN - t_hlen - 4;
849 
850 	dev->features		|= GRE_FEATURES;
851 	dev->hw_features	|= GRE_FEATURES;
852 
853 	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
854 		/* TCP offload with GRE SEQ is not supported. */
855 		dev->features    |= NETIF_F_GSO_SOFTWARE;
856 		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
857 		/* Can use a lockless transmit, unless we generate
858 		 * output sequences
859 		 */
860 		dev->features |= NETIF_F_LLTX;
861 	}
862 }
863 
ipgre_tunnel_init(struct net_device * dev)864 static int ipgre_tunnel_init(struct net_device *dev)
865 {
866 	struct ip_tunnel *tunnel = netdev_priv(dev);
867 	struct iphdr *iph = &tunnel->parms.iph;
868 
869 	__gre_tunnel_init(dev);
870 
871 	memcpy(dev->dev_addr, &iph->saddr, 4);
872 	memcpy(dev->broadcast, &iph->daddr, 4);
873 
874 	dev->flags		= IFF_NOARP;
875 	netif_keep_dst(dev);
876 	dev->addr_len		= 4;
877 
878 	if (iph->daddr) {
879 #ifdef CONFIG_NET_IPGRE_BROADCAST
880 		if (ipv4_is_multicast(iph->daddr)) {
881 			if (!iph->saddr)
882 				return -EINVAL;
883 			dev->flags = IFF_BROADCAST;
884 			dev->header_ops = &ipgre_header_ops;
885 		}
886 #endif
887 	} else
888 		dev->header_ops = &ipgre_header_ops;
889 
890 	return ip_tunnel_init(dev);
891 }
892 
893 static const struct gre_protocol ipgre_protocol = {
894 	.handler     = gre_rcv,
895 	.err_handler = gre_err,
896 };
897 
ipgre_init_net(struct net * net)898 static int __net_init ipgre_init_net(struct net *net)
899 {
900 	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
901 }
902 
ipgre_exit_net(struct net * net)903 static void __net_exit ipgre_exit_net(struct net *net)
904 {
905 	struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
906 	ip_tunnel_delete_net(itn, &ipgre_link_ops);
907 }
908 
909 static struct pernet_operations ipgre_net_ops = {
910 	.init = ipgre_init_net,
911 	.exit = ipgre_exit_net,
912 	.id   = &ipgre_net_id,
913 	.size = sizeof(struct ip_tunnel_net),
914 };
915 
ipgre_tunnel_validate(struct nlattr * tb[],struct nlattr * data[])916 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
917 {
918 	__be16 flags;
919 
920 	if (!data)
921 		return 0;
922 
923 	flags = 0;
924 	if (data[IFLA_GRE_IFLAGS])
925 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
926 	if (data[IFLA_GRE_OFLAGS])
927 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
928 	if (flags & (GRE_VERSION|GRE_ROUTING))
929 		return -EINVAL;
930 
931 	return 0;
932 }
933 
ipgre_tap_validate(struct nlattr * tb[],struct nlattr * data[])934 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
935 {
936 	__be32 daddr;
937 
938 	if (tb[IFLA_ADDRESS]) {
939 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
940 			return -EINVAL;
941 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
942 			return -EADDRNOTAVAIL;
943 	}
944 
945 	if (!data)
946 		goto out;
947 
948 	if (data[IFLA_GRE_REMOTE]) {
949 		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
950 		if (!daddr)
951 			return -EINVAL;
952 	}
953 
954 out:
955 	return ipgre_tunnel_validate(tb, data);
956 }
957 
ipgre_netlink_parms(struct net_device * dev,struct nlattr * data[],struct nlattr * tb[],struct ip_tunnel_parm * parms)958 static void ipgre_netlink_parms(struct net_device *dev,
959 				struct nlattr *data[],
960 				struct nlattr *tb[],
961 				struct ip_tunnel_parm *parms)
962 {
963 	memset(parms, 0, sizeof(*parms));
964 
965 	parms->iph.protocol = IPPROTO_GRE;
966 
967 	if (!data)
968 		return;
969 
970 	if (data[IFLA_GRE_LINK])
971 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
972 
973 	if (data[IFLA_GRE_IFLAGS])
974 		parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
975 
976 	if (data[IFLA_GRE_OFLAGS])
977 		parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
978 
979 	if (data[IFLA_GRE_IKEY])
980 		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
981 
982 	if (data[IFLA_GRE_OKEY])
983 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
984 
985 	if (data[IFLA_GRE_LOCAL])
986 		parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
987 
988 	if (data[IFLA_GRE_REMOTE])
989 		parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
990 
991 	if (data[IFLA_GRE_TTL])
992 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
993 
994 	if (data[IFLA_GRE_TOS])
995 		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
996 
997 	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
998 		parms->iph.frag_off = htons(IP_DF);
999 
1000 	if (data[IFLA_GRE_COLLECT_METADATA]) {
1001 		struct ip_tunnel *t = netdev_priv(dev);
1002 
1003 		t->collect_md = true;
1004 	}
1005 }
1006 
1007 /* This function returns true when ENCAP attributes are present in the nl msg */
ipgre_netlink_encap_parms(struct nlattr * data[],struct ip_tunnel_encap * ipencap)1008 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1009 				      struct ip_tunnel_encap *ipencap)
1010 {
1011 	bool ret = false;
1012 
1013 	memset(ipencap, 0, sizeof(*ipencap));
1014 
1015 	if (!data)
1016 		return ret;
1017 
1018 	if (data[IFLA_GRE_ENCAP_TYPE]) {
1019 		ret = true;
1020 		ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1021 	}
1022 
1023 	if (data[IFLA_GRE_ENCAP_FLAGS]) {
1024 		ret = true;
1025 		ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1026 	}
1027 
1028 	if (data[IFLA_GRE_ENCAP_SPORT]) {
1029 		ret = true;
1030 		ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1031 	}
1032 
1033 	if (data[IFLA_GRE_ENCAP_DPORT]) {
1034 		ret = true;
1035 		ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1036 	}
1037 
1038 	return ret;
1039 }
1040 
gre_tap_init(struct net_device * dev)1041 static int gre_tap_init(struct net_device *dev)
1042 {
1043 	__gre_tunnel_init(dev);
1044 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1045 
1046 	return ip_tunnel_init(dev);
1047 }
1048 
1049 static const struct net_device_ops gre_tap_netdev_ops = {
1050 	.ndo_init		= gre_tap_init,
1051 	.ndo_uninit		= ip_tunnel_uninit,
1052 	.ndo_start_xmit		= gre_tap_xmit,
1053 	.ndo_set_mac_address 	= eth_mac_addr,
1054 	.ndo_validate_addr	= eth_validate_addr,
1055 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1056 	.ndo_get_stats64	= ip_tunnel_get_stats64,
1057 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1058 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1059 };
1060 
ipgre_tap_setup(struct net_device * dev)1061 static void ipgre_tap_setup(struct net_device *dev)
1062 {
1063 	ether_setup(dev);
1064 	dev->netdev_ops		= &gre_tap_netdev_ops;
1065 	dev->priv_flags 	|= IFF_LIVE_ADDR_CHANGE;
1066 	ip_tunnel_setup(dev, gre_tap_net_id);
1067 }
1068 
ipgre_newlink(struct net * src_net,struct net_device * dev,struct nlattr * tb[],struct nlattr * data[])1069 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1070 			 struct nlattr *tb[], struct nlattr *data[])
1071 {
1072 	struct ip_tunnel_parm p;
1073 	struct ip_tunnel_encap ipencap;
1074 
1075 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1076 		struct ip_tunnel *t = netdev_priv(dev);
1077 		int err = ip_tunnel_encap_setup(t, &ipencap);
1078 
1079 		if (err < 0)
1080 			return err;
1081 	}
1082 
1083 	ipgre_netlink_parms(dev, data, tb, &p);
1084 	return ip_tunnel_newlink(dev, tb, &p);
1085 }
1086 
ipgre_changelink(struct net_device * dev,struct nlattr * tb[],struct nlattr * data[])1087 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1088 			    struct nlattr *data[])
1089 {
1090 	struct ip_tunnel_parm p;
1091 	struct ip_tunnel_encap ipencap;
1092 
1093 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1094 		struct ip_tunnel *t = netdev_priv(dev);
1095 		int err = ip_tunnel_encap_setup(t, &ipencap);
1096 
1097 		if (err < 0)
1098 			return err;
1099 	}
1100 
1101 	ipgre_netlink_parms(dev, data, tb, &p);
1102 	return ip_tunnel_changelink(dev, tb, &p);
1103 }
1104 
ipgre_get_size(const struct net_device * dev)1105 static size_t ipgre_get_size(const struct net_device *dev)
1106 {
1107 	return
1108 		/* IFLA_GRE_LINK */
1109 		nla_total_size(4) +
1110 		/* IFLA_GRE_IFLAGS */
1111 		nla_total_size(2) +
1112 		/* IFLA_GRE_OFLAGS */
1113 		nla_total_size(2) +
1114 		/* IFLA_GRE_IKEY */
1115 		nla_total_size(4) +
1116 		/* IFLA_GRE_OKEY */
1117 		nla_total_size(4) +
1118 		/* IFLA_GRE_LOCAL */
1119 		nla_total_size(4) +
1120 		/* IFLA_GRE_REMOTE */
1121 		nla_total_size(4) +
1122 		/* IFLA_GRE_TTL */
1123 		nla_total_size(1) +
1124 		/* IFLA_GRE_TOS */
1125 		nla_total_size(1) +
1126 		/* IFLA_GRE_PMTUDISC */
1127 		nla_total_size(1) +
1128 		/* IFLA_GRE_ENCAP_TYPE */
1129 		nla_total_size(2) +
1130 		/* IFLA_GRE_ENCAP_FLAGS */
1131 		nla_total_size(2) +
1132 		/* IFLA_GRE_ENCAP_SPORT */
1133 		nla_total_size(2) +
1134 		/* IFLA_GRE_ENCAP_DPORT */
1135 		nla_total_size(2) +
1136 		/* IFLA_GRE_COLLECT_METADATA */
1137 		nla_total_size(0) +
1138 		0;
1139 }
1140 
ipgre_fill_info(struct sk_buff * skb,const struct net_device * dev)1141 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1142 {
1143 	struct ip_tunnel *t = netdev_priv(dev);
1144 	struct ip_tunnel_parm *p = &t->parms;
1145 
1146 	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1147 	    nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
1148 	    nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
1149 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1150 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1151 	    nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1152 	    nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1153 	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1154 	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1155 	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1156 		       !!(p->iph.frag_off & htons(IP_DF))))
1157 		goto nla_put_failure;
1158 
1159 	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1160 			t->encap.type) ||
1161 	    nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1162 			 t->encap.sport) ||
1163 	    nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1164 			 t->encap.dport) ||
1165 	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1166 			t->encap.flags))
1167 		goto nla_put_failure;
1168 
1169 	if (t->collect_md) {
1170 		if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1171 			goto nla_put_failure;
1172 	}
1173 
1174 	return 0;
1175 
1176 nla_put_failure:
1177 	return -EMSGSIZE;
1178 }
1179 
1180 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1181 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
1182 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
1183 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
1184 	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
1185 	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
1186 	[IFLA_GRE_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1187 	[IFLA_GRE_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1188 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
1189 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
1190 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
1191 	[IFLA_GRE_ENCAP_TYPE]	= { .type = NLA_U16 },
1192 	[IFLA_GRE_ENCAP_FLAGS]	= { .type = NLA_U16 },
1193 	[IFLA_GRE_ENCAP_SPORT]	= { .type = NLA_U16 },
1194 	[IFLA_GRE_ENCAP_DPORT]	= { .type = NLA_U16 },
1195 	[IFLA_GRE_COLLECT_METADATA]	= { .type = NLA_FLAG },
1196 };
1197 
1198 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1199 	.kind		= "gre",
1200 	.maxtype	= IFLA_GRE_MAX,
1201 	.policy		= ipgre_policy,
1202 	.priv_size	= sizeof(struct ip_tunnel),
1203 	.setup		= ipgre_tunnel_setup,
1204 	.validate	= ipgre_tunnel_validate,
1205 	.newlink	= ipgre_newlink,
1206 	.changelink	= ipgre_changelink,
1207 	.dellink	= ip_tunnel_dellink,
1208 	.get_size	= ipgre_get_size,
1209 	.fill_info	= ipgre_fill_info,
1210 	.get_link_net	= ip_tunnel_get_link_net,
1211 };
1212 
1213 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1214 	.kind		= "gretap",
1215 	.maxtype	= IFLA_GRE_MAX,
1216 	.policy		= ipgre_policy,
1217 	.priv_size	= sizeof(struct ip_tunnel),
1218 	.setup		= ipgre_tap_setup,
1219 	.validate	= ipgre_tap_validate,
1220 	.newlink	= ipgre_newlink,
1221 	.changelink	= ipgre_changelink,
1222 	.dellink	= ip_tunnel_dellink,
1223 	.get_size	= ipgre_get_size,
1224 	.fill_info	= ipgre_fill_info,
1225 	.get_link_net	= ip_tunnel_get_link_net,
1226 };
1227 
gretap_fb_dev_create(struct net * net,const char * name,u8 name_assign_type)1228 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1229 					u8 name_assign_type)
1230 {
1231 	struct nlattr *tb[IFLA_MAX + 1];
1232 	struct net_device *dev;
1233 	struct ip_tunnel *t;
1234 	int err;
1235 
1236 	memset(&tb, 0, sizeof(tb));
1237 
1238 	dev = rtnl_create_link(net, name, name_assign_type,
1239 			       &ipgre_tap_ops, tb);
1240 	if (IS_ERR(dev))
1241 		return dev;
1242 
1243 	/* Configure flow based GRE device. */
1244 	t = netdev_priv(dev);
1245 	t->collect_md = true;
1246 
1247 	err = ipgre_newlink(net, dev, tb, NULL);
1248 	if (err < 0)
1249 		goto out;
1250 
1251 	/* openvswitch users expect packet sizes to be unrestricted,
1252 	 * so set the largest MTU we can.
1253 	 */
1254 	err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1255 	if (err)
1256 		goto out;
1257 
1258 	return dev;
1259 out:
1260 	free_netdev(dev);
1261 	return ERR_PTR(err);
1262 }
1263 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1264 
ipgre_tap_init_net(struct net * net)1265 static int __net_init ipgre_tap_init_net(struct net *net)
1266 {
1267 	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1268 }
1269 
ipgre_tap_exit_net(struct net * net)1270 static void __net_exit ipgre_tap_exit_net(struct net *net)
1271 {
1272 	struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
1273 	ip_tunnel_delete_net(itn, &ipgre_tap_ops);
1274 }
1275 
1276 static struct pernet_operations ipgre_tap_net_ops = {
1277 	.init = ipgre_tap_init_net,
1278 	.exit = ipgre_tap_exit_net,
1279 	.id   = &gre_tap_net_id,
1280 	.size = sizeof(struct ip_tunnel_net),
1281 };
1282 
ipgre_init(void)1283 static int __init ipgre_init(void)
1284 {
1285 	int err;
1286 
1287 	pr_info("GRE over IPv4 tunneling driver\n");
1288 
1289 	err = register_pernet_device(&ipgre_net_ops);
1290 	if (err < 0)
1291 		return err;
1292 
1293 	err = register_pernet_device(&ipgre_tap_net_ops);
1294 	if (err < 0)
1295 		goto pnet_tap_faied;
1296 
1297 	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1298 	if (err < 0) {
1299 		pr_info("%s: can't add protocol\n", __func__);
1300 		goto add_proto_failed;
1301 	}
1302 
1303 	err = rtnl_link_register(&ipgre_link_ops);
1304 	if (err < 0)
1305 		goto rtnl_link_failed;
1306 
1307 	err = rtnl_link_register(&ipgre_tap_ops);
1308 	if (err < 0)
1309 		goto tap_ops_failed;
1310 
1311 	return 0;
1312 
1313 tap_ops_failed:
1314 	rtnl_link_unregister(&ipgre_link_ops);
1315 rtnl_link_failed:
1316 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1317 add_proto_failed:
1318 	unregister_pernet_device(&ipgre_tap_net_ops);
1319 pnet_tap_faied:
1320 	unregister_pernet_device(&ipgre_net_ops);
1321 	return err;
1322 }
1323 
ipgre_fini(void)1324 static void __exit ipgre_fini(void)
1325 {
1326 	rtnl_link_unregister(&ipgre_tap_ops);
1327 	rtnl_link_unregister(&ipgre_link_ops);
1328 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1329 	unregister_pernet_device(&ipgre_tap_net_ops);
1330 	unregister_pernet_device(&ipgre_net_ops);
1331 }
1332 
1333 module_init(ipgre_init);
1334 module_exit(ipgre_fini);
1335 MODULE_LICENSE("GPL");
1336 MODULE_ALIAS_RTNL_LINK("gre");
1337 MODULE_ALIAS_RTNL_LINK("gretap");
1338 MODULE_ALIAS_NETDEV("gre0");
1339 MODULE_ALIAS_NETDEV("gretap0");
1340