1/*
2 *	Linux NET3:	IP/IP protocol decoder.
3 *
4 *	Authors:
5 *		Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
6 *
7 *	Fixes:
8 *		Alan Cox	:	Merged and made usable non modular (its so tiny its silly as
9 *					a module taking up 2 pages).
10 *		Alan Cox	: 	Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11 *					to keep ip_forward happy.
12 *		Alan Cox	:	More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13 *		Kai Schulte	:	Fixed #defines for IP_FIREWALL->FIREWALL
14 *              David Woodhouse :       Perform some basic ICMP handling.
15 *                                      IPIP Routing without decapsulation.
16 *              Carlos Picoto   :       GRE over IP support
17 *		Alexey Kuznetsov:	Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18 *					I do not want to merge them together.
19 *
20 *	This program is free software; you can redistribute it and/or
21 *	modify it under the terms of the GNU General Public License
22 *	as published by the Free Software Foundation; either version
23 *	2 of the License, or (at your option) any later version.
24 *
25 */
26
27/* tunnel.c: an IP tunnel driver
28
29	The purpose of this driver is to provide an IP tunnel through
30	which you can tunnel network traffic transparently across subnets.
31
32	This was written by looking at Nick Holloway's dummy driver
33	Thanks for the great code!
34
35		-Sam Lantinga	(slouken@cs.ucdavis.edu)  02/01/95
36
37	Minor tweaks:
38		Cleaned up the code a little and added some pre-1.3.0 tweaks.
39		dev->hard_header/hard_header_len changed to use no headers.
40		Comments/bracketing tweaked.
41		Made the tunnels use dev->name not tunnel: when error reporting.
42		Added tx_dropped stat
43
44		-Alan Cox	(alan@lxorguk.ukuu.org.uk) 21 March 95
45
46	Reworked:
47		Changed to tunnel to destination gateway in addition to the
48			tunnel's pointopoint address
49		Almost completely rewritten
50		Note:  There is currently no firewall or ICMP handling done.
51
52		-Sam Lantinga	(slouken@cs.ucdavis.edu) 02/13/96
53
54*/
55
56/* Things I wish I had known when writing the tunnel driver:
57
58	When the tunnel_xmit() function is called, the skb contains the
59	packet to be sent (plus a great deal of extra info), and dev
60	contains the tunnel device that _we_ are.
61
62	When we are passed a packet, we are expected to fill in the
63	source address with our source IP address.
64
65	What is the proper way to allocate, copy and free a buffer?
66	After you allocate it, it is a "0 length" chunk of memory
67	starting at zero.  If you want to add headers to the buffer
68	later, you'll have to call "skb_reserve(skb, amount)" with
69	the amount of memory you want reserved.  Then, you call
70	"skb_put(skb, amount)" with the amount of space you want in
71	the buffer.  skb_put() returns a pointer to the top (#0) of
72	that buffer.  skb->len is set to the amount of space you have
73	"allocated" with skb_put().  You can then write up to skb->len
74	bytes to that buffer.  If you need more, you can call skb_put()
75	again with the additional amount of space you need.  You can
76	find out how much more space you can allocate by calling
77	"skb_tailroom(skb)".
78	Now, to add header space, call "skb_push(skb, header_len)".
79	This creates space at the beginning of the buffer and returns
80	a pointer to this new space.  If later you need to strip a
81	header from a buffer, call "skb_pull(skb, header_len)".
82	skb_headroom() will return how much space is left at the top
83	of the buffer (before the main data).  Remember, this headroom
84	space must be reserved before the skb_put() function is called.
85	*/
86
87/*
88   This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89
90   For comments look at net/ipv4/ip_gre.c --ANK
91 */
92
93
94#include <linux/capability.h>
95#include <linux/module.h>
96#include <linux/types.h>
97#include <linux/kernel.h>
98#include <linux/slab.h>
99#include <asm/uaccess.h>
100#include <linux/skbuff.h>
101#include <linux/netdevice.h>
102#include <linux/in.h>
103#include <linux/tcp.h>
104#include <linux/udp.h>
105#include <linux/if_arp.h>
106#include <linux/mroute.h>
107#include <linux/init.h>
108#include <linux/netfilter_ipv4.h>
109#include <linux/if_ether.h>
110
111#include <net/sock.h>
112#include <net/ip.h>
113#include <net/icmp.h>
114#include <net/ip_tunnels.h>
115#include <net/inet_ecn.h>
116#include <net/xfrm.h>
117#include <net/net_namespace.h>
118#include <net/netns/generic.h>
119
120static bool log_ecn_error = true;
121module_param(log_ecn_error, bool, 0644);
122MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
123
124static int ipip_net_id __read_mostly;
125
126static int ipip_tunnel_init(struct net_device *dev);
127static struct rtnl_link_ops ipip_link_ops __read_mostly;
128
129static int ipip_err(struct sk_buff *skb, u32 info)
130{
131
132/* All the routers (except for Linux) return only
133   8 bytes of packet payload. It means, that precise relaying of
134   ICMP in the real Internet is absolutely infeasible.
135 */
136	struct net *net = dev_net(skb->dev);
137	struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
138	const struct iphdr *iph = (const struct iphdr *)skb->data;
139	struct ip_tunnel *t;
140	int err;
141	const int type = icmp_hdr(skb)->type;
142	const int code = icmp_hdr(skb)->code;
143
144	err = -ENOENT;
145	t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
146			     iph->daddr, iph->saddr, 0);
147	if (!t)
148		goto out;
149
150	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
151		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
152				 t->parms.link, 0, IPPROTO_IPIP, 0);
153		err = 0;
154		goto out;
155	}
156
157	if (type == ICMP_REDIRECT) {
158		ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
159			      IPPROTO_IPIP, 0);
160		err = 0;
161		goto out;
162	}
163
164	if (t->parms.iph.daddr == 0)
165		goto out;
166
167	err = 0;
168	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
169		goto out;
170
171	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
172		t->err_count++;
173	else
174		t->err_count = 1;
175	t->err_time = jiffies;
176
177out:
178	return err;
179}
180
181static const struct tnl_ptk_info tpi = {
182	/* no tunnel info required for ipip. */
183	.proto = htons(ETH_P_IP),
184};
185
186static int ipip_rcv(struct sk_buff *skb)
187{
188	struct net *net = dev_net(skb->dev);
189	struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
190	struct ip_tunnel *tunnel;
191	const struct iphdr *iph;
192
193	iph = ip_hdr(skb);
194	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
195			iph->saddr, iph->daddr, 0);
196	if (tunnel) {
197		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
198			goto drop;
199		if (iptunnel_pull_header(skb, 0, tpi.proto))
200			goto drop;
201		return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
202	}
203
204	return -1;
205
206drop:
207	kfree_skb(skb);
208	return 0;
209}
210
211/*
212 *	This function assumes it is being called from dev_queue_xmit()
213 *	and that skb is filled properly by that function.
214 */
215static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
216{
217	struct ip_tunnel *tunnel = netdev_priv(dev);
218	const struct iphdr  *tiph = &tunnel->parms.iph;
219
220	if (unlikely(skb->protocol != htons(ETH_P_IP)))
221		goto tx_error;
222
223	skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP);
224	if (IS_ERR(skb))
225		goto out;
226
227	skb_set_inner_ipproto(skb, IPPROTO_IPIP);
228
229	ip_tunnel_xmit(skb, dev, tiph, tiph->protocol);
230	return NETDEV_TX_OK;
231
232tx_error:
233	kfree_skb(skb);
234out:
235	dev->stats.tx_errors++;
236	return NETDEV_TX_OK;
237}
238
239static int
240ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
241{
242	int err = 0;
243	struct ip_tunnel_parm p;
244
245	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
246		return -EFAULT;
247
248	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
249		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
250		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
251			return -EINVAL;
252	}
253
254	p.i_key = p.o_key = p.i_flags = p.o_flags = 0;
255	if (p.iph.ttl)
256		p.iph.frag_off |= htons(IP_DF);
257
258	err = ip_tunnel_ioctl(dev, &p, cmd);
259	if (err)
260		return err;
261
262	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
263		return -EFAULT;
264
265	return 0;
266}
267
268static const struct net_device_ops ipip_netdev_ops = {
269	.ndo_init       = ipip_tunnel_init,
270	.ndo_uninit     = ip_tunnel_uninit,
271	.ndo_start_xmit	= ipip_tunnel_xmit,
272	.ndo_do_ioctl	= ipip_tunnel_ioctl,
273	.ndo_change_mtu = ip_tunnel_change_mtu,
274	.ndo_get_stats64 = ip_tunnel_get_stats64,
275	.ndo_get_iflink = ip_tunnel_get_iflink,
276};
277
278#define IPIP_FEATURES (NETIF_F_SG |		\
279		       NETIF_F_FRAGLIST |	\
280		       NETIF_F_HIGHDMA |	\
281		       NETIF_F_GSO_SOFTWARE |	\
282		       NETIF_F_HW_CSUM)
283
284static void ipip_tunnel_setup(struct net_device *dev)
285{
286	dev->netdev_ops		= &ipip_netdev_ops;
287
288	dev->type		= ARPHRD_TUNNEL;
289	dev->flags		= IFF_NOARP;
290	dev->addr_len		= 4;
291	dev->features		|= NETIF_F_LLTX;
292	netif_keep_dst(dev);
293
294	dev->features		|= IPIP_FEATURES;
295	dev->hw_features	|= IPIP_FEATURES;
296	ip_tunnel_setup(dev, ipip_net_id);
297}
298
299static int ipip_tunnel_init(struct net_device *dev)
300{
301	struct ip_tunnel *tunnel = netdev_priv(dev);
302
303	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
304	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
305
306	tunnel->tun_hlen = 0;
307	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
308	tunnel->parms.iph.protocol = IPPROTO_IPIP;
309	return ip_tunnel_init(dev);
310}
311
312static void ipip_netlink_parms(struct nlattr *data[],
313			       struct ip_tunnel_parm *parms)
314{
315	memset(parms, 0, sizeof(*parms));
316
317	parms->iph.version = 4;
318	parms->iph.protocol = IPPROTO_IPIP;
319	parms->iph.ihl = 5;
320
321	if (!data)
322		return;
323
324	if (data[IFLA_IPTUN_LINK])
325		parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
326
327	if (data[IFLA_IPTUN_LOCAL])
328		parms->iph.saddr = nla_get_in_addr(data[IFLA_IPTUN_LOCAL]);
329
330	if (data[IFLA_IPTUN_REMOTE])
331		parms->iph.daddr = nla_get_in_addr(data[IFLA_IPTUN_REMOTE]);
332
333	if (data[IFLA_IPTUN_TTL]) {
334		parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
335		if (parms->iph.ttl)
336			parms->iph.frag_off = htons(IP_DF);
337	}
338
339	if (data[IFLA_IPTUN_TOS])
340		parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
341
342	if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
343		parms->iph.frag_off = htons(IP_DF);
344}
345
346/* This function returns true when ENCAP attributes are present in the nl msg */
347static bool ipip_netlink_encap_parms(struct nlattr *data[],
348				     struct ip_tunnel_encap *ipencap)
349{
350	bool ret = false;
351
352	memset(ipencap, 0, sizeof(*ipencap));
353
354	if (!data)
355		return ret;
356
357	if (data[IFLA_IPTUN_ENCAP_TYPE]) {
358		ret = true;
359		ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
360	}
361
362	if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
363		ret = true;
364		ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
365	}
366
367	if (data[IFLA_IPTUN_ENCAP_SPORT]) {
368		ret = true;
369		ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]);
370	}
371
372	if (data[IFLA_IPTUN_ENCAP_DPORT]) {
373		ret = true;
374		ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]);
375	}
376
377	return ret;
378}
379
380static int ipip_newlink(struct net *src_net, struct net_device *dev,
381			struct nlattr *tb[], struct nlattr *data[])
382{
383	struct ip_tunnel_parm p;
384	struct ip_tunnel_encap ipencap;
385
386	if (ipip_netlink_encap_parms(data, &ipencap)) {
387		struct ip_tunnel *t = netdev_priv(dev);
388		int err = ip_tunnel_encap_setup(t, &ipencap);
389
390		if (err < 0)
391			return err;
392	}
393
394	ipip_netlink_parms(data, &p);
395	return ip_tunnel_newlink(dev, tb, &p);
396}
397
398static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
399			   struct nlattr *data[])
400{
401	struct ip_tunnel_parm p;
402	struct ip_tunnel_encap ipencap;
403
404	if (ipip_netlink_encap_parms(data, &ipencap)) {
405		struct ip_tunnel *t = netdev_priv(dev);
406		int err = ip_tunnel_encap_setup(t, &ipencap);
407
408		if (err < 0)
409			return err;
410	}
411
412	ipip_netlink_parms(data, &p);
413
414	if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
415	    (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
416		return -EINVAL;
417
418	return ip_tunnel_changelink(dev, tb, &p);
419}
420
421static size_t ipip_get_size(const struct net_device *dev)
422{
423	return
424		/* IFLA_IPTUN_LINK */
425		nla_total_size(4) +
426		/* IFLA_IPTUN_LOCAL */
427		nla_total_size(4) +
428		/* IFLA_IPTUN_REMOTE */
429		nla_total_size(4) +
430		/* IFLA_IPTUN_TTL */
431		nla_total_size(1) +
432		/* IFLA_IPTUN_TOS */
433		nla_total_size(1) +
434		/* IFLA_IPTUN_PMTUDISC */
435		nla_total_size(1) +
436		/* IFLA_IPTUN_ENCAP_TYPE */
437		nla_total_size(2) +
438		/* IFLA_IPTUN_ENCAP_FLAGS */
439		nla_total_size(2) +
440		/* IFLA_IPTUN_ENCAP_SPORT */
441		nla_total_size(2) +
442		/* IFLA_IPTUN_ENCAP_DPORT */
443		nla_total_size(2) +
444		0;
445}
446
447static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
448{
449	struct ip_tunnel *tunnel = netdev_priv(dev);
450	struct ip_tunnel_parm *parm = &tunnel->parms;
451
452	if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
453	    nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
454	    nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
455	    nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
456	    nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
457	    nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
458		       !!(parm->iph.frag_off & htons(IP_DF))))
459		goto nla_put_failure;
460
461	if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
462			tunnel->encap.type) ||
463	    nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT,
464			 tunnel->encap.sport) ||
465	    nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT,
466			 tunnel->encap.dport) ||
467	    nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
468			tunnel->encap.flags))
469		goto nla_put_failure;
470
471	return 0;
472
473nla_put_failure:
474	return -EMSGSIZE;
475}
476
477static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
478	[IFLA_IPTUN_LINK]		= { .type = NLA_U32 },
479	[IFLA_IPTUN_LOCAL]		= { .type = NLA_U32 },
480	[IFLA_IPTUN_REMOTE]		= { .type = NLA_U32 },
481	[IFLA_IPTUN_TTL]		= { .type = NLA_U8 },
482	[IFLA_IPTUN_TOS]		= { .type = NLA_U8 },
483	[IFLA_IPTUN_PMTUDISC]		= { .type = NLA_U8 },
484	[IFLA_IPTUN_ENCAP_TYPE]		= { .type = NLA_U16 },
485	[IFLA_IPTUN_ENCAP_FLAGS]	= { .type = NLA_U16 },
486	[IFLA_IPTUN_ENCAP_SPORT]	= { .type = NLA_U16 },
487	[IFLA_IPTUN_ENCAP_DPORT]	= { .type = NLA_U16 },
488};
489
490static struct rtnl_link_ops ipip_link_ops __read_mostly = {
491	.kind		= "ipip",
492	.maxtype	= IFLA_IPTUN_MAX,
493	.policy		= ipip_policy,
494	.priv_size	= sizeof(struct ip_tunnel),
495	.setup		= ipip_tunnel_setup,
496	.newlink	= ipip_newlink,
497	.changelink	= ipip_changelink,
498	.dellink	= ip_tunnel_dellink,
499	.get_size	= ipip_get_size,
500	.fill_info	= ipip_fill_info,
501	.get_link_net	= ip_tunnel_get_link_net,
502};
503
504static struct xfrm_tunnel ipip_handler __read_mostly = {
505	.handler	=	ipip_rcv,
506	.err_handler	=	ipip_err,
507	.priority	=	1,
508};
509
510static int __net_init ipip_init_net(struct net *net)
511{
512	return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
513}
514
515static void __net_exit ipip_exit_net(struct net *net)
516{
517	struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
518	ip_tunnel_delete_net(itn, &ipip_link_ops);
519}
520
521static struct pernet_operations ipip_net_ops = {
522	.init = ipip_init_net,
523	.exit = ipip_exit_net,
524	.id   = &ipip_net_id,
525	.size = sizeof(struct ip_tunnel_net),
526};
527
528static int __init ipip_init(void)
529{
530	int err;
531
532	pr_info("ipip: IPv4 over IPv4 tunneling driver\n");
533
534	err = register_pernet_device(&ipip_net_ops);
535	if (err < 0)
536		return err;
537	err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
538	if (err < 0) {
539		pr_info("%s: can't register tunnel\n", __func__);
540		goto xfrm_tunnel_failed;
541	}
542	err = rtnl_link_register(&ipip_link_ops);
543	if (err < 0)
544		goto rtnl_link_failed;
545
546out:
547	return err;
548
549rtnl_link_failed:
550	xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
551xfrm_tunnel_failed:
552	unregister_pernet_device(&ipip_net_ops);
553	goto out;
554}
555
556static void __exit ipip_fini(void)
557{
558	rtnl_link_unregister(&ipip_link_ops);
559	if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
560		pr_info("%s: can't deregister tunnel\n", __func__);
561
562	unregister_pernet_device(&ipip_net_ops);
563}
564
565module_init(ipip_init);
566module_exit(ipip_fini);
567MODULE_LICENSE("GPL");
568MODULE_ALIAS_RTNL_LINK("ipip");
569MODULE_ALIAS_NETDEV("tunl0");
570