1/* L3/L4 protocol support for nf_conntrack. */
2
3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/types.h>
14#include <linux/netfilter.h>
15#include <linux/module.h>
16#include <linux/slab.h>
17#include <linux/mutex.h>
18#include <linux/vmalloc.h>
19#include <linux/stddef.h>
20#include <linux/err.h>
21#include <linux/percpu.h>
22#include <linux/notifier.h>
23#include <linux/kernel.h>
24#include <linux/netdevice.h>
25
26#include <net/netfilter/nf_conntrack.h>
27#include <net/netfilter/nf_conntrack_l3proto.h>
28#include <net/netfilter/nf_conntrack_l4proto.h>
29#include <net/netfilter/nf_conntrack_core.h>
30
31static struct nf_conntrack_l4proto __rcu **nf_ct_protos[PF_MAX] __read_mostly;
32struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX] __read_mostly;
33EXPORT_SYMBOL_GPL(nf_ct_l3protos);
34
35static DEFINE_MUTEX(nf_ct_proto_mutex);
36
37#ifdef CONFIG_SYSCTL
38static int
39nf_ct_register_sysctl(struct net *net,
40		      struct ctl_table_header **header,
41		      const char *path,
42		      struct ctl_table *table)
43{
44	if (*header == NULL) {
45		*header = register_net_sysctl(net, path, table);
46		if (*header == NULL)
47			return -ENOMEM;
48	}
49
50	return 0;
51}
52
53static void
54nf_ct_unregister_sysctl(struct ctl_table_header **header,
55			struct ctl_table **table,
56			unsigned int users)
57{
58	if (users > 0)
59		return;
60
61	unregister_net_sysctl_table(*header);
62	kfree(*table);
63	*header = NULL;
64	*table = NULL;
65}
66#endif
67
68struct nf_conntrack_l4proto *
69__nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto)
70{
71	if (unlikely(l3proto >= AF_MAX || nf_ct_protos[l3proto] == NULL))
72		return &nf_conntrack_l4proto_generic;
73
74	return rcu_dereference(nf_ct_protos[l3proto][l4proto]);
75}
76EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
77
78/* this is guaranteed to always return a valid protocol helper, since
79 * it falls back to generic_protocol */
80struct nf_conntrack_l3proto *
81nf_ct_l3proto_find_get(u_int16_t l3proto)
82{
83	struct nf_conntrack_l3proto *p;
84
85	rcu_read_lock();
86	p = __nf_ct_l3proto_find(l3proto);
87	if (!try_module_get(p->me))
88		p = &nf_conntrack_l3proto_generic;
89	rcu_read_unlock();
90
91	return p;
92}
93EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get);
94
95int
96nf_ct_l3proto_try_module_get(unsigned short l3proto)
97{
98	int ret;
99	struct nf_conntrack_l3proto *p;
100
101retry:	p = nf_ct_l3proto_find_get(l3proto);
102	if (p == &nf_conntrack_l3proto_generic) {
103		ret = request_module("nf_conntrack-%d", l3proto);
104		if (!ret)
105			goto retry;
106
107		return -EPROTOTYPE;
108	}
109
110	return 0;
111}
112EXPORT_SYMBOL_GPL(nf_ct_l3proto_try_module_get);
113
114void nf_ct_l3proto_module_put(unsigned short l3proto)
115{
116	struct nf_conntrack_l3proto *p;
117
118	/* rcu_read_lock not necessary since the caller holds a reference, but
119	 * taken anyways to avoid lockdep warnings in __nf_ct_l3proto_find()
120	 */
121	rcu_read_lock();
122	p = __nf_ct_l3proto_find(l3proto);
123	module_put(p->me);
124	rcu_read_unlock();
125}
126EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put);
127
128struct nf_conntrack_l4proto *
129nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
130{
131	struct nf_conntrack_l4proto *p;
132
133	rcu_read_lock();
134	p = __nf_ct_l4proto_find(l3num, l4num);
135	if (!try_module_get(p->me))
136		p = &nf_conntrack_l4proto_generic;
137	rcu_read_unlock();
138
139	return p;
140}
141EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get);
142
143void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p)
144{
145	module_put(p->me);
146}
147EXPORT_SYMBOL_GPL(nf_ct_l4proto_put);
148
149static int kill_l3proto(struct nf_conn *i, void *data)
150{
151	return nf_ct_l3num(i) == ((struct nf_conntrack_l3proto *)data)->l3proto;
152}
153
154static int kill_l4proto(struct nf_conn *i, void *data)
155{
156	struct nf_conntrack_l4proto *l4proto;
157	l4proto = (struct nf_conntrack_l4proto *)data;
158	return nf_ct_protonum(i) == l4proto->l4proto &&
159	       nf_ct_l3num(i) == l4proto->l3proto;
160}
161
162static struct nf_ip_net *nf_ct_l3proto_net(struct net *net,
163					   struct nf_conntrack_l3proto *l3proto)
164{
165	if (l3proto->l3proto == PF_INET)
166		return &net->ct.nf_ct_proto;
167	else
168		return NULL;
169}
170
171static int nf_ct_l3proto_register_sysctl(struct net *net,
172					 struct nf_conntrack_l3proto *l3proto)
173{
174	int err = 0;
175	struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto);
176	/* nf_conntrack_l3proto_ipv6 doesn't support sysctl */
177	if (in == NULL)
178		return 0;
179
180#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
181	if (in->ctl_table != NULL) {
182		err = nf_ct_register_sysctl(net,
183					    &in->ctl_table_header,
184					    l3proto->ctl_table_path,
185					    in->ctl_table);
186		if (err < 0) {
187			kfree(in->ctl_table);
188			in->ctl_table = NULL;
189		}
190	}
191#endif
192	return err;
193}
194
195static void nf_ct_l3proto_unregister_sysctl(struct net *net,
196					    struct nf_conntrack_l3proto *l3proto)
197{
198	struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto);
199
200	if (in == NULL)
201		return;
202#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
203	if (in->ctl_table_header != NULL)
204		nf_ct_unregister_sysctl(&in->ctl_table_header,
205					&in->ctl_table,
206					0);
207#endif
208}
209
210int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
211{
212	int ret = 0;
213	struct nf_conntrack_l3proto *old;
214
215	if (proto->l3proto >= AF_MAX)
216		return -EBUSY;
217
218	if (proto->tuple_to_nlattr && !proto->nlattr_tuple_size)
219		return -EINVAL;
220
221	mutex_lock(&nf_ct_proto_mutex);
222	old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
223					lockdep_is_held(&nf_ct_proto_mutex));
224	if (old != &nf_conntrack_l3proto_generic) {
225		ret = -EBUSY;
226		goto out_unlock;
227	}
228
229	if (proto->nlattr_tuple_size)
230		proto->nla_size = 3 * proto->nlattr_tuple_size();
231
232	rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
233
234out_unlock:
235	mutex_unlock(&nf_ct_proto_mutex);
236	return ret;
237
238}
239EXPORT_SYMBOL_GPL(nf_ct_l3proto_register);
240
241int nf_ct_l3proto_pernet_register(struct net *net,
242				  struct nf_conntrack_l3proto *proto)
243{
244	int ret = 0;
245
246	if (proto->init_net) {
247		ret = proto->init_net(net);
248		if (ret < 0)
249			return ret;
250	}
251
252	return nf_ct_l3proto_register_sysctl(net, proto);
253}
254EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register);
255
256void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
257{
258	BUG_ON(proto->l3proto >= AF_MAX);
259
260	mutex_lock(&nf_ct_proto_mutex);
261	BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
262					 lockdep_is_held(&nf_ct_proto_mutex)
263					 ) != proto);
264	rcu_assign_pointer(nf_ct_l3protos[proto->l3proto],
265			   &nf_conntrack_l3proto_generic);
266	mutex_unlock(&nf_ct_proto_mutex);
267
268	synchronize_rcu();
269}
270EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
271
272void nf_ct_l3proto_pernet_unregister(struct net *net,
273				     struct nf_conntrack_l3proto *proto)
274{
275	nf_ct_l3proto_unregister_sysctl(net, proto);
276
277	/* Remove all contrack entries for this protocol */
278	nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0);
279}
280EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister);
281
282static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
283					      struct nf_conntrack_l4proto *l4proto)
284{
285	if (l4proto->get_net_proto) {
286		/* statically built-in protocols use static per-net */
287		return l4proto->get_net_proto(net);
288	} else if (l4proto->net_id) {
289		/* ... and loadable protocols use dynamic per-net */
290		return net_generic(net, *l4proto->net_id);
291	}
292	return NULL;
293}
294
295static
296int nf_ct_l4proto_register_sysctl(struct net *net,
297				  struct nf_proto_net *pn,
298				  struct nf_conntrack_l4proto *l4proto)
299{
300	int err = 0;
301
302#ifdef CONFIG_SYSCTL
303	if (pn->ctl_table != NULL) {
304		err = nf_ct_register_sysctl(net,
305					    &pn->ctl_table_header,
306					    "net/netfilter",
307					    pn->ctl_table);
308		if (err < 0) {
309			if (!pn->users) {
310				kfree(pn->ctl_table);
311				pn->ctl_table = NULL;
312			}
313		}
314	}
315#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
316	if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_table != NULL) {
317		if (err < 0) {
318			nf_ct_kfree_compat_sysctl_table(pn);
319			goto out;
320		}
321		err = nf_ct_register_sysctl(net,
322					    &pn->ctl_compat_header,
323					    "net/ipv4/netfilter",
324					    pn->ctl_compat_table);
325		if (err == 0)
326			goto out;
327
328		nf_ct_kfree_compat_sysctl_table(pn);
329		nf_ct_unregister_sysctl(&pn->ctl_table_header,
330					&pn->ctl_table,
331					pn->users);
332	}
333out:
334#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
335#endif /* CONFIG_SYSCTL */
336	return err;
337}
338
339static
340void nf_ct_l4proto_unregister_sysctl(struct net *net,
341				     struct nf_proto_net *pn,
342				     struct nf_conntrack_l4proto *l4proto)
343{
344#ifdef CONFIG_SYSCTL
345	if (pn->ctl_table_header != NULL)
346		nf_ct_unregister_sysctl(&pn->ctl_table_header,
347					&pn->ctl_table,
348					pn->users);
349
350#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
351	if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_header != NULL)
352		nf_ct_unregister_sysctl(&pn->ctl_compat_header,
353					&pn->ctl_compat_table,
354					0);
355#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
356#endif /* CONFIG_SYSCTL */
357}
358
359/* FIXME: Allow NULL functions and sub in pointers to generic for
360   them. --RR */
361int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto)
362{
363	int ret = 0;
364
365	if (l4proto->l3proto >= PF_MAX)
366		return -EBUSY;
367
368	if ((l4proto->to_nlattr && !l4proto->nlattr_size)
369		|| (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size))
370		return -EINVAL;
371
372	mutex_lock(&nf_ct_proto_mutex);
373	if (!nf_ct_protos[l4proto->l3proto]) {
374		/* l3proto may be loaded latter. */
375		struct nf_conntrack_l4proto __rcu **proto_array;
376		int i;
377
378		proto_array = kmalloc(MAX_NF_CT_PROTO *
379				      sizeof(struct nf_conntrack_l4proto *),
380				      GFP_KERNEL);
381		if (proto_array == NULL) {
382			ret = -ENOMEM;
383			goto out_unlock;
384		}
385
386		for (i = 0; i < MAX_NF_CT_PROTO; i++)
387			RCU_INIT_POINTER(proto_array[i], &nf_conntrack_l4proto_generic);
388
389		/* Before making proto_array visible to lockless readers,
390		 * we must make sure its content is committed to memory.
391		 */
392		smp_wmb();
393
394		nf_ct_protos[l4proto->l3proto] = proto_array;
395	} else if (rcu_dereference_protected(
396			nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
397			lockdep_is_held(&nf_ct_proto_mutex)
398			) != &nf_conntrack_l4proto_generic) {
399		ret = -EBUSY;
400		goto out_unlock;
401	}
402
403	l4proto->nla_size = 0;
404	if (l4proto->nlattr_size)
405		l4proto->nla_size += l4proto->nlattr_size();
406	if (l4proto->nlattr_tuple_size)
407		l4proto->nla_size += 3 * l4proto->nlattr_tuple_size();
408
409	rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
410			   l4proto);
411out_unlock:
412	mutex_unlock(&nf_ct_proto_mutex);
413	return ret;
414}
415EXPORT_SYMBOL_GPL(nf_ct_l4proto_register);
416
417int nf_ct_l4proto_pernet_register(struct net *net,
418				  struct nf_conntrack_l4proto *l4proto)
419{
420	int ret = 0;
421	struct nf_proto_net *pn = NULL;
422
423	if (l4proto->init_net) {
424		ret = l4proto->init_net(net, l4proto->l3proto);
425		if (ret < 0)
426			goto out;
427	}
428
429	pn = nf_ct_l4proto_net(net, l4proto);
430	if (pn == NULL)
431		goto out;
432
433	ret = nf_ct_l4proto_register_sysctl(net, pn, l4proto);
434	if (ret < 0)
435		goto out;
436
437	pn->users++;
438out:
439	return ret;
440}
441EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register);
442
443void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
444{
445	BUG_ON(l4proto->l3proto >= PF_MAX);
446
447	mutex_lock(&nf_ct_proto_mutex);
448	BUG_ON(rcu_dereference_protected(
449			nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
450			lockdep_is_held(&nf_ct_proto_mutex)
451			) != l4proto);
452	rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
453			   &nf_conntrack_l4proto_generic);
454	mutex_unlock(&nf_ct_proto_mutex);
455
456	synchronize_rcu();
457}
458EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
459
460void nf_ct_l4proto_pernet_unregister(struct net *net,
461				     struct nf_conntrack_l4proto *l4proto)
462{
463	struct nf_proto_net *pn = NULL;
464
465	pn = nf_ct_l4proto_net(net, l4proto);
466	if (pn == NULL)
467		return;
468
469	pn->users--;
470	nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
471
472	/* Remove all contrack entries for this protocol */
473	nf_ct_iterate_cleanup(net, kill_l4proto, l4proto, 0, 0);
474}
475EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister);
476
477int nf_conntrack_proto_pernet_init(struct net *net)
478{
479	int err;
480	struct nf_proto_net *pn = nf_ct_l4proto_net(net,
481					&nf_conntrack_l4proto_generic);
482
483	err = nf_conntrack_l4proto_generic.init_net(net,
484					nf_conntrack_l4proto_generic.l3proto);
485	if (err < 0)
486		return err;
487	err = nf_ct_l4proto_register_sysctl(net,
488					    pn,
489					    &nf_conntrack_l4proto_generic);
490	if (err < 0)
491		return err;
492
493	pn->users++;
494	return 0;
495}
496
497void nf_conntrack_proto_pernet_fini(struct net *net)
498{
499	struct nf_proto_net *pn = nf_ct_l4proto_net(net,
500					&nf_conntrack_l4proto_generic);
501
502	pn->users--;
503	nf_ct_l4proto_unregister_sysctl(net,
504					pn,
505					&nf_conntrack_l4proto_generic);
506}
507
508int nf_conntrack_proto_init(void)
509{
510	unsigned int i;
511	for (i = 0; i < AF_MAX; i++)
512		rcu_assign_pointer(nf_ct_l3protos[i],
513				   &nf_conntrack_l3proto_generic);
514	return 0;
515}
516
517void nf_conntrack_proto_fini(void)
518{
519	unsigned int i;
520	/* free l3proto protocol tables */
521	for (i = 0; i < PF_MAX; i++)
522		kfree(nf_ct_protos[i]);
523}
524