1/*
2 * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org>
3 * (C) 2012 by Vyatta Inc. <http://www.vyatta.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation (or any later at your option).
8 */
9#include <linux/init.h>
10#include <linux/module.h>
11#include <linux/kernel.h>
12#include <linux/rculist.h>
13#include <linux/rculist_nulls.h>
14#include <linux/types.h>
15#include <linux/timer.h>
16#include <linux/security.h>
17#include <linux/skbuff.h>
18#include <linux/errno.h>
19#include <linux/netlink.h>
20#include <linux/spinlock.h>
21#include <linux/interrupt.h>
22#include <linux/slab.h>
23
24#include <linux/netfilter.h>
25#include <net/netlink.h>
26#include <net/sock.h>
27#include <net/netfilter/nf_conntrack.h>
28#include <net/netfilter/nf_conntrack_core.h>
29#include <net/netfilter/nf_conntrack_l3proto.h>
30#include <net/netfilter/nf_conntrack_l4proto.h>
31#include <net/netfilter/nf_conntrack_tuple.h>
32#include <net/netfilter/nf_conntrack_timeout.h>
33
34#include <linux/netfilter/nfnetlink.h>
35#include <linux/netfilter/nfnetlink_cttimeout.h>
36
37MODULE_LICENSE("GPL");
38MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
39MODULE_DESCRIPTION("cttimeout: Extended Netfilter Connection Tracking timeout tuning");
40
41static LIST_HEAD(cttimeout_list);
42
43static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = {
44	[CTA_TIMEOUT_NAME]	= { .type = NLA_NUL_STRING,
45				    .len  = CTNL_TIMEOUT_NAME_MAX - 1},
46	[CTA_TIMEOUT_L3PROTO]	= { .type = NLA_U16 },
47	[CTA_TIMEOUT_L4PROTO]	= { .type = NLA_U8 },
48	[CTA_TIMEOUT_DATA]	= { .type = NLA_NESTED },
49};
50
51static int
52ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto,
53			  struct net *net, const struct nlattr *attr)
54{
55	int ret = 0;
56
57	if (likely(l4proto->ctnl_timeout.nlattr_to_obj)) {
58		struct nlattr *tb[l4proto->ctnl_timeout.nlattr_max+1];
59
60		ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max,
61				       attr, l4proto->ctnl_timeout.nla_policy);
62		if (ret < 0)
63			return ret;
64
65		ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts);
66	}
67	return ret;
68}
69
70static int
71cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb,
72		      const struct nlmsghdr *nlh,
73		      const struct nlattr * const cda[])
74{
75	__u16 l3num;
76	__u8 l4num;
77	struct nf_conntrack_l4proto *l4proto;
78	struct ctnl_timeout *timeout, *matching = NULL;
79	struct net *net = sock_net(skb->sk);
80	char *name;
81	int ret;
82
83	if (!cda[CTA_TIMEOUT_NAME] ||
84	    !cda[CTA_TIMEOUT_L3PROTO] ||
85	    !cda[CTA_TIMEOUT_L4PROTO] ||
86	    !cda[CTA_TIMEOUT_DATA])
87		return -EINVAL;
88
89	name = nla_data(cda[CTA_TIMEOUT_NAME]);
90	l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
91	l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
92
93	list_for_each_entry(timeout, &cttimeout_list, head) {
94		if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
95			continue;
96
97		if (nlh->nlmsg_flags & NLM_F_EXCL)
98			return -EEXIST;
99
100		matching = timeout;
101		break;
102	}
103
104	l4proto = nf_ct_l4proto_find_get(l3num, l4num);
105
106	/* This protocol is not supportted, skip. */
107	if (l4proto->l4proto != l4num) {
108		ret = -EOPNOTSUPP;
109		goto err_proto_put;
110	}
111
112	if (matching) {
113		if (nlh->nlmsg_flags & NLM_F_REPLACE) {
114			/* You cannot replace one timeout policy by another of
115			 * different kind, sorry.
116			 */
117			if (matching->l3num != l3num ||
118			    matching->l4proto->l4proto != l4num) {
119				ret = -EINVAL;
120				goto err_proto_put;
121			}
122
123			ret = ctnl_timeout_parse_policy(&matching->data,
124							l4proto, net,
125							cda[CTA_TIMEOUT_DATA]);
126			return ret;
127		}
128		ret = -EBUSY;
129		goto err_proto_put;
130	}
131
132	timeout = kzalloc(sizeof(struct ctnl_timeout) +
133			  l4proto->ctnl_timeout.obj_size, GFP_KERNEL);
134	if (timeout == NULL) {
135		ret = -ENOMEM;
136		goto err_proto_put;
137	}
138
139	ret = ctnl_timeout_parse_policy(&timeout->data, l4proto, net,
140					cda[CTA_TIMEOUT_DATA]);
141	if (ret < 0)
142		goto err;
143
144	strcpy(timeout->name, nla_data(cda[CTA_TIMEOUT_NAME]));
145	timeout->l3num = l3num;
146	timeout->l4proto = l4proto;
147	atomic_set(&timeout->refcnt, 1);
148	list_add_tail_rcu(&timeout->head, &cttimeout_list);
149
150	return 0;
151err:
152	kfree(timeout);
153err_proto_put:
154	nf_ct_l4proto_put(l4proto);
155	return ret;
156}
157
158static int
159ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
160		       int event, struct ctnl_timeout *timeout)
161{
162	struct nlmsghdr *nlh;
163	struct nfgenmsg *nfmsg;
164	unsigned int flags = portid ? NLM_F_MULTI : 0;
165	struct nf_conntrack_l4proto *l4proto = timeout->l4proto;
166
167	event |= NFNL_SUBSYS_CTNETLINK_TIMEOUT << 8;
168	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
169	if (nlh == NULL)
170		goto nlmsg_failure;
171
172	nfmsg = nlmsg_data(nlh);
173	nfmsg->nfgen_family = AF_UNSPEC;
174	nfmsg->version = NFNETLINK_V0;
175	nfmsg->res_id = 0;
176
177	if (nla_put_string(skb, CTA_TIMEOUT_NAME, timeout->name) ||
178	    nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(timeout->l3num)) ||
179	    nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, timeout->l4proto->l4proto) ||
180	    nla_put_be32(skb, CTA_TIMEOUT_USE,
181			 htonl(atomic_read(&timeout->refcnt))))
182		goto nla_put_failure;
183
184	if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) {
185		struct nlattr *nest_parms;
186		int ret;
187
188		nest_parms = nla_nest_start(skb,
189					    CTA_TIMEOUT_DATA | NLA_F_NESTED);
190		if (!nest_parms)
191			goto nla_put_failure;
192
193		ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, &timeout->data);
194		if (ret < 0)
195			goto nla_put_failure;
196
197		nla_nest_end(skb, nest_parms);
198	}
199
200	nlmsg_end(skb, nlh);
201	return skb->len;
202
203nlmsg_failure:
204nla_put_failure:
205	nlmsg_cancel(skb, nlh);
206	return -1;
207}
208
209static int
210ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb)
211{
212	struct ctnl_timeout *cur, *last;
213
214	if (cb->args[2])
215		return 0;
216
217	last = (struct ctnl_timeout *)cb->args[1];
218	if (cb->args[1])
219		cb->args[1] = 0;
220
221	rcu_read_lock();
222	list_for_each_entry_rcu(cur, &cttimeout_list, head) {
223		if (last) {
224			if (cur != last)
225				continue;
226
227			last = NULL;
228		}
229		if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).portid,
230					   cb->nlh->nlmsg_seq,
231					   NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
232					   IPCTNL_MSG_TIMEOUT_NEW, cur) < 0) {
233			cb->args[1] = (unsigned long)cur;
234			break;
235		}
236	}
237	if (!cb->args[1])
238		cb->args[2] = 1;
239	rcu_read_unlock();
240	return skb->len;
241}
242
243static int
244cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb,
245		      const struct nlmsghdr *nlh,
246		      const struct nlattr * const cda[])
247{
248	int ret = -ENOENT;
249	char *name;
250	struct ctnl_timeout *cur;
251
252	if (nlh->nlmsg_flags & NLM_F_DUMP) {
253		struct netlink_dump_control c = {
254			.dump = ctnl_timeout_dump,
255		};
256		return netlink_dump_start(ctnl, skb, nlh, &c);
257	}
258
259	if (!cda[CTA_TIMEOUT_NAME])
260		return -EINVAL;
261	name = nla_data(cda[CTA_TIMEOUT_NAME]);
262
263	list_for_each_entry(cur, &cttimeout_list, head) {
264		struct sk_buff *skb2;
265
266		if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
267			continue;
268
269		skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
270		if (skb2 == NULL) {
271			ret = -ENOMEM;
272			break;
273		}
274
275		ret = ctnl_timeout_fill_info(skb2, NETLINK_CB(skb).portid,
276					     nlh->nlmsg_seq,
277					     NFNL_MSG_TYPE(nlh->nlmsg_type),
278					     IPCTNL_MSG_TIMEOUT_NEW, cur);
279		if (ret <= 0) {
280			kfree_skb(skb2);
281			break;
282		}
283		ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid,
284					MSG_DONTWAIT);
285		if (ret > 0)
286			ret = 0;
287
288		/* this avoids a loop in nfnetlink. */
289		return ret == -EAGAIN ? -ENOBUFS : ret;
290	}
291	return ret;
292}
293
294static void untimeout(struct nf_conntrack_tuple_hash *i,
295		      struct ctnl_timeout *timeout)
296{
297	struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
298	struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct);
299
300	if (timeout_ext && (!timeout || timeout_ext->timeout == timeout))
301		RCU_INIT_POINTER(timeout_ext->timeout, NULL);
302}
303
304static void ctnl_untimeout(struct ctnl_timeout *timeout)
305{
306	struct nf_conntrack_tuple_hash *h;
307	const struct hlist_nulls_node *nn;
308	int i;
309
310	local_bh_disable();
311	for (i = 0; i < init_net.ct.htable_size; i++) {
312		spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
313		if (i < init_net.ct.htable_size) {
314			hlist_nulls_for_each_entry(h, nn, &init_net.ct.hash[i], hnnode)
315				untimeout(h, timeout);
316		}
317		spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
318	}
319	local_bh_enable();
320}
321
322/* try to delete object, fail if it is still in use. */
323static int ctnl_timeout_try_del(struct ctnl_timeout *timeout)
324{
325	int ret = 0;
326
327	/* we want to avoid races with nf_ct_timeout_find_get. */
328	if (atomic_dec_and_test(&timeout->refcnt)) {
329		/* We are protected by nfnl mutex. */
330		list_del_rcu(&timeout->head);
331		nf_ct_l4proto_put(timeout->l4proto);
332		ctnl_untimeout(timeout);
333		kfree_rcu(timeout, rcu_head);
334	} else {
335		/* still in use, restore reference counter. */
336		atomic_inc(&timeout->refcnt);
337		ret = -EBUSY;
338	}
339	return ret;
340}
341
342static int
343cttimeout_del_timeout(struct sock *ctnl, struct sk_buff *skb,
344		      const struct nlmsghdr *nlh,
345		      const struct nlattr * const cda[])
346{
347	char *name;
348	struct ctnl_timeout *cur;
349	int ret = -ENOENT;
350
351	if (!cda[CTA_TIMEOUT_NAME]) {
352		list_for_each_entry(cur, &cttimeout_list, head)
353			ctnl_timeout_try_del(cur);
354
355		return 0;
356	}
357	name = nla_data(cda[CTA_TIMEOUT_NAME]);
358
359	list_for_each_entry(cur, &cttimeout_list, head) {
360		if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
361			continue;
362
363		ret = ctnl_timeout_try_del(cur);
364		if (ret < 0)
365			return ret;
366
367		break;
368	}
369	return ret;
370}
371
372static int
373cttimeout_default_set(struct sock *ctnl, struct sk_buff *skb,
374		      const struct nlmsghdr *nlh,
375		      const struct nlattr * const cda[])
376{
377	__u16 l3num;
378	__u8 l4num;
379	struct nf_conntrack_l4proto *l4proto;
380	struct net *net = sock_net(skb->sk);
381	unsigned int *timeouts;
382	int ret;
383
384	if (!cda[CTA_TIMEOUT_L3PROTO] ||
385	    !cda[CTA_TIMEOUT_L4PROTO] ||
386	    !cda[CTA_TIMEOUT_DATA])
387		return -EINVAL;
388
389	l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
390	l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
391	l4proto = nf_ct_l4proto_find_get(l3num, l4num);
392
393	/* This protocol is not supported, skip. */
394	if (l4proto->l4proto != l4num) {
395		ret = -EOPNOTSUPP;
396		goto err;
397	}
398
399	timeouts = l4proto->get_timeouts(net);
400
401	ret = ctnl_timeout_parse_policy(timeouts, l4proto, net,
402					cda[CTA_TIMEOUT_DATA]);
403	if (ret < 0)
404		goto err;
405
406	nf_ct_l4proto_put(l4proto);
407	return 0;
408err:
409	nf_ct_l4proto_put(l4proto);
410	return ret;
411}
412
413static int
414cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
415			    u32 seq, u32 type, int event,
416			    struct nf_conntrack_l4proto *l4proto)
417{
418	struct nlmsghdr *nlh;
419	struct nfgenmsg *nfmsg;
420	unsigned int flags = portid ? NLM_F_MULTI : 0;
421
422	event |= NFNL_SUBSYS_CTNETLINK_TIMEOUT << 8;
423	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
424	if (nlh == NULL)
425		goto nlmsg_failure;
426
427	nfmsg = nlmsg_data(nlh);
428	nfmsg->nfgen_family = AF_UNSPEC;
429	nfmsg->version = NFNETLINK_V0;
430	nfmsg->res_id = 0;
431
432	if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l4proto->l3proto)) ||
433	    nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto))
434		goto nla_put_failure;
435
436	if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) {
437		struct nlattr *nest_parms;
438		unsigned int *timeouts = l4proto->get_timeouts(net);
439		int ret;
440
441		nest_parms = nla_nest_start(skb,
442					    CTA_TIMEOUT_DATA | NLA_F_NESTED);
443		if (!nest_parms)
444			goto nla_put_failure;
445
446		ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, timeouts);
447		if (ret < 0)
448			goto nla_put_failure;
449
450		nla_nest_end(skb, nest_parms);
451	}
452
453	nlmsg_end(skb, nlh);
454	return skb->len;
455
456nlmsg_failure:
457nla_put_failure:
458	nlmsg_cancel(skb, nlh);
459	return -1;
460}
461
462static int cttimeout_default_get(struct sock *ctnl, struct sk_buff *skb,
463				 const struct nlmsghdr *nlh,
464				 const struct nlattr * const cda[])
465{
466	__u16 l3num;
467	__u8 l4num;
468	struct nf_conntrack_l4proto *l4proto;
469	struct net *net = sock_net(skb->sk);
470	struct sk_buff *skb2;
471	int ret, err;
472
473	if (!cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO])
474		return -EINVAL;
475
476	l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
477	l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
478	l4proto = nf_ct_l4proto_find_get(l3num, l4num);
479
480	/* This protocol is not supported, skip. */
481	if (l4proto->l4proto != l4num) {
482		err = -EOPNOTSUPP;
483		goto err;
484	}
485
486	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
487	if (skb2 == NULL) {
488		err = -ENOMEM;
489		goto err;
490	}
491
492	ret = cttimeout_default_fill_info(net, skb2, NETLINK_CB(skb).portid,
493					  nlh->nlmsg_seq,
494					  NFNL_MSG_TYPE(nlh->nlmsg_type),
495					  IPCTNL_MSG_TIMEOUT_DEFAULT_SET,
496					  l4proto);
497	if (ret <= 0) {
498		kfree_skb(skb2);
499		err = -ENOMEM;
500		goto err;
501	}
502	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
503	if (ret > 0)
504		ret = 0;
505
506	/* this avoids a loop in nfnetlink. */
507	return ret == -EAGAIN ? -ENOBUFS : ret;
508err:
509	nf_ct_l4proto_put(l4proto);
510	return err;
511}
512
513#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
514static struct ctnl_timeout *ctnl_timeout_find_get(const char *name)
515{
516	struct ctnl_timeout *timeout, *matching = NULL;
517
518	rcu_read_lock();
519	list_for_each_entry_rcu(timeout, &cttimeout_list, head) {
520		if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
521			continue;
522
523		if (!try_module_get(THIS_MODULE))
524			goto err;
525
526		if (!atomic_inc_not_zero(&timeout->refcnt)) {
527			module_put(THIS_MODULE);
528			goto err;
529		}
530		matching = timeout;
531		break;
532	}
533err:
534	rcu_read_unlock();
535	return matching;
536}
537
538static void ctnl_timeout_put(struct ctnl_timeout *timeout)
539{
540	atomic_dec(&timeout->refcnt);
541	module_put(THIS_MODULE);
542}
543#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
544
545static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = {
546	[IPCTNL_MSG_TIMEOUT_NEW]	= { .call = cttimeout_new_timeout,
547					    .attr_count = CTA_TIMEOUT_MAX,
548					    .policy = cttimeout_nla_policy },
549	[IPCTNL_MSG_TIMEOUT_GET]	= { .call = cttimeout_get_timeout,
550					    .attr_count = CTA_TIMEOUT_MAX,
551					    .policy = cttimeout_nla_policy },
552	[IPCTNL_MSG_TIMEOUT_DELETE]	= { .call = cttimeout_del_timeout,
553					    .attr_count = CTA_TIMEOUT_MAX,
554					    .policy = cttimeout_nla_policy },
555	[IPCTNL_MSG_TIMEOUT_DEFAULT_SET]= { .call = cttimeout_default_set,
556					    .attr_count = CTA_TIMEOUT_MAX,
557					    .policy = cttimeout_nla_policy },
558	[IPCTNL_MSG_TIMEOUT_DEFAULT_GET]= { .call = cttimeout_default_get,
559					    .attr_count = CTA_TIMEOUT_MAX,
560					    .policy = cttimeout_nla_policy },
561};
562
563static const struct nfnetlink_subsystem cttimeout_subsys = {
564	.name				= "conntrack_timeout",
565	.subsys_id			= NFNL_SUBSYS_CTNETLINK_TIMEOUT,
566	.cb_count			= IPCTNL_MSG_TIMEOUT_MAX,
567	.cb				= cttimeout_cb,
568};
569
570MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_TIMEOUT);
571
572static int __init cttimeout_init(void)
573{
574	int ret;
575
576	ret = nfnetlink_subsys_register(&cttimeout_subsys);
577	if (ret < 0) {
578		pr_err("cttimeout_init: cannot register cttimeout with "
579			"nfnetlink.\n");
580		goto err_out;
581	}
582#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
583	RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, ctnl_timeout_find_get);
584	RCU_INIT_POINTER(nf_ct_timeout_put_hook, ctnl_timeout_put);
585#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
586	return 0;
587
588err_out:
589	return ret;
590}
591
592static void __exit cttimeout_exit(void)
593{
594	struct ctnl_timeout *cur, *tmp;
595
596	pr_info("cttimeout: unregistering from nfnetlink.\n");
597
598	nfnetlink_subsys_unregister(&cttimeout_subsys);
599
600	/* Make sure no conntrack objects refer to custom timeouts anymore. */
601	ctnl_untimeout(NULL);
602
603	list_for_each_entry_safe(cur, tmp, &cttimeout_list, head) {
604		list_del_rcu(&cur->head);
605		/* We are sure that our objects have no clients at this point,
606		 * it's safe to release them all without checking refcnt.
607		 */
608		nf_ct_l4proto_put(cur->l4proto);
609		kfree_rcu(cur, rcu_head);
610	}
611#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
612	RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL);
613	RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL);
614#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
615	rcu_barrier();
616}
617
618module_init(cttimeout_init);
619module_exit(cttimeout_exit);
620