1/*
2 *	Linux IPv6 multicast routing support for BSD pim6sd
3 *	Based on net/ipv4/ipmr.c.
4 *
5 *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 *		LSIIT Laboratory, Strasbourg, France
7 *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8 *		6WIND, Paris, France
9 *	Copyright (C)2007,2008 USAGI/WIDE Project
10 *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11 *
12 *	This program is free software; you can redistribute it and/or
13 *	modify it under the terms of the GNU General Public License
14 *	as published by the Free Software Foundation; either version
15 *	2 of the License, or (at your option) any later version.
16 *
17 */
18
19#include <asm/uaccess.h>
20#include <linux/types.h>
21#include <linux/sched.h>
22#include <linux/errno.h>
23#include <linux/timer.h>
24#include <linux/mm.h>
25#include <linux/kernel.h>
26#include <linux/fcntl.h>
27#include <linux/stat.h>
28#include <linux/socket.h>
29#include <linux/inet.h>
30#include <linux/netdevice.h>
31#include <linux/inetdevice.h>
32#include <linux/proc_fs.h>
33#include <linux/seq_file.h>
34#include <linux/init.h>
35#include <linux/slab.h>
36#include <linux/compat.h>
37#include <net/protocol.h>
38#include <linux/skbuff.h>
39#include <net/sock.h>
40#include <net/raw.h>
41#include <linux/notifier.h>
42#include <linux/if_arp.h>
43#include <net/checksum.h>
44#include <net/netlink.h>
45#include <net/fib_rules.h>
46
47#include <net/ipv6.h>
48#include <net/ip6_route.h>
49#include <linux/mroute6.h>
50#include <linux/pim.h>
51#include <net/addrconf.h>
52#include <linux/netfilter_ipv6.h>
53#include <linux/export.h>
54#include <net/ip6_checksum.h>
55#include <linux/netconf.h>
56
57struct mr6_table {
58	struct list_head	list;
59	possible_net_t		net;
60	u32			id;
61	struct sock		*mroute6_sk;
62	struct timer_list	ipmr_expire_timer;
63	struct list_head	mfc6_unres_queue;
64	struct list_head	mfc6_cache_array[MFC6_LINES];
65	struct mif_device	vif6_table[MAXMIFS];
66	int			maxvif;
67	atomic_t		cache_resolve_queue_len;
68	bool			mroute_do_assert;
69	bool			mroute_do_pim;
70#ifdef CONFIG_IPV6_PIMSM_V2
71	int			mroute_reg_vif_num;
72#endif
73};
74
75struct ip6mr_rule {
76	struct fib_rule		common;
77};
78
79struct ip6mr_result {
80	struct mr6_table	*mrt;
81};
82
83/* Big lock, protecting vif table, mrt cache and mroute socket state.
84   Note that the changes are semaphored via rtnl_lock.
85 */
86
87static DEFINE_RWLOCK(mrt_lock);
88
89/*
90 *	Multicast router control variables
91 */
92
93#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
94
95/* Special spinlock for queue of unresolved entries */
96static DEFINE_SPINLOCK(mfc_unres_lock);
97
98/* We return to original Alan's scheme. Hash table of resolved
99   entries is changed only in process context and protected
100   with weak lock mrt_lock. Queue of unresolved entries is protected
101   with strong spinlock mfc_unres_lock.
102
103   In this case data path is free of exclusive locks at all.
104 */
105
106static struct kmem_cache *mrt_cachep __read_mostly;
107
108static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
109static void ip6mr_free_table(struct mr6_table *mrt);
110
111static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
112			   struct sk_buff *skb, struct mfc6_cache *cache);
113static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
114			      mifi_t mifi, int assert);
115static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
116			       struct mfc6_cache *c, struct rtmsg *rtm);
117static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
118			      int cmd);
119static int ip6mr_rtm_dumproute(struct sk_buff *skb,
120			       struct netlink_callback *cb);
121static void mroute_clean_tables(struct mr6_table *mrt, bool all);
122static void ipmr_expire_process(unsigned long arg);
123
124#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
125#define ip6mr_for_each_table(mrt, net) \
126	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
127
128static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
129{
130	struct mr6_table *mrt;
131
132	ip6mr_for_each_table(mrt, net) {
133		if (mrt->id == id)
134			return mrt;
135	}
136	return NULL;
137}
138
139static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
140			    struct mr6_table **mrt)
141{
142	int err;
143	struct ip6mr_result res;
144	struct fib_lookup_arg arg = {
145		.result = &res,
146		.flags = FIB_LOOKUP_NOREF,
147	};
148
149	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
150			       flowi6_to_flowi(flp6), 0, &arg);
151	if (err < 0)
152		return err;
153	*mrt = res.mrt;
154	return 0;
155}
156
157static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
158			     int flags, struct fib_lookup_arg *arg)
159{
160	struct ip6mr_result *res = arg->result;
161	struct mr6_table *mrt;
162
163	switch (rule->action) {
164	case FR_ACT_TO_TBL:
165		break;
166	case FR_ACT_UNREACHABLE:
167		return -ENETUNREACH;
168	case FR_ACT_PROHIBIT:
169		return -EACCES;
170	case FR_ACT_BLACKHOLE:
171	default:
172		return -EINVAL;
173	}
174
175	mrt = ip6mr_get_table(rule->fr_net, rule->table);
176	if (!mrt)
177		return -EAGAIN;
178	res->mrt = mrt;
179	return 0;
180}
181
182static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
183{
184	return 1;
185}
186
187static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
188	FRA_GENERIC_POLICY,
189};
190
191static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
192				struct fib_rule_hdr *frh, struct nlattr **tb)
193{
194	return 0;
195}
196
197static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
198			      struct nlattr **tb)
199{
200	return 1;
201}
202
203static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
204			   struct fib_rule_hdr *frh)
205{
206	frh->dst_len = 0;
207	frh->src_len = 0;
208	frh->tos     = 0;
209	return 0;
210}
211
212static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
213	.family		= RTNL_FAMILY_IP6MR,
214	.rule_size	= sizeof(struct ip6mr_rule),
215	.addr_size	= sizeof(struct in6_addr),
216	.action		= ip6mr_rule_action,
217	.match		= ip6mr_rule_match,
218	.configure	= ip6mr_rule_configure,
219	.compare	= ip6mr_rule_compare,
220	.default_pref	= fib_default_rule_pref,
221	.fill		= ip6mr_rule_fill,
222	.nlgroup	= RTNLGRP_IPV6_RULE,
223	.policy		= ip6mr_rule_policy,
224	.owner		= THIS_MODULE,
225};
226
227static int __net_init ip6mr_rules_init(struct net *net)
228{
229	struct fib_rules_ops *ops;
230	struct mr6_table *mrt;
231	int err;
232
233	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
234	if (IS_ERR(ops))
235		return PTR_ERR(ops);
236
237	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
238
239	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
240	if (!mrt) {
241		err = -ENOMEM;
242		goto err1;
243	}
244
245	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
246	if (err < 0)
247		goto err2;
248
249	net->ipv6.mr6_rules_ops = ops;
250	return 0;
251
252err2:
253	ip6mr_free_table(mrt);
254err1:
255	fib_rules_unregister(ops);
256	return err;
257}
258
259static void __net_exit ip6mr_rules_exit(struct net *net)
260{
261	struct mr6_table *mrt, *next;
262
263	rtnl_lock();
264	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
265		list_del(&mrt->list);
266		ip6mr_free_table(mrt);
267	}
268	fib_rules_unregister(net->ipv6.mr6_rules_ops);
269	rtnl_unlock();
270}
271#else
272#define ip6mr_for_each_table(mrt, net) \
273	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
274
275static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
276{
277	return net->ipv6.mrt6;
278}
279
280static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
281			    struct mr6_table **mrt)
282{
283	*mrt = net->ipv6.mrt6;
284	return 0;
285}
286
287static int __net_init ip6mr_rules_init(struct net *net)
288{
289	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
290	return net->ipv6.mrt6 ? 0 : -ENOMEM;
291}
292
293static void __net_exit ip6mr_rules_exit(struct net *net)
294{
295	rtnl_lock();
296	ip6mr_free_table(net->ipv6.mrt6);
297	net->ipv6.mrt6 = NULL;
298	rtnl_unlock();
299}
300#endif
301
302static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
303{
304	struct mr6_table *mrt;
305	unsigned int i;
306
307	mrt = ip6mr_get_table(net, id);
308	if (mrt)
309		return mrt;
310
311	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
312	if (!mrt)
313		return NULL;
314	mrt->id = id;
315	write_pnet(&mrt->net, net);
316
317	/* Forwarding cache */
318	for (i = 0; i < MFC6_LINES; i++)
319		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
320
321	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
322
323	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
324		    (unsigned long)mrt);
325
326#ifdef CONFIG_IPV6_PIMSM_V2
327	mrt->mroute_reg_vif_num = -1;
328#endif
329#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
330	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
331#endif
332	return mrt;
333}
334
335static void ip6mr_free_table(struct mr6_table *mrt)
336{
337	del_timer_sync(&mrt->ipmr_expire_timer);
338	mroute_clean_tables(mrt, true);
339	kfree(mrt);
340}
341
342#ifdef CONFIG_PROC_FS
343
344struct ipmr_mfc_iter {
345	struct seq_net_private p;
346	struct mr6_table *mrt;
347	struct list_head *cache;
348	int ct;
349};
350
351
352static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
353					   struct ipmr_mfc_iter *it, loff_t pos)
354{
355	struct mr6_table *mrt = it->mrt;
356	struct mfc6_cache *mfc;
357
358	read_lock(&mrt_lock);
359	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
360		it->cache = &mrt->mfc6_cache_array[it->ct];
361		list_for_each_entry(mfc, it->cache, list)
362			if (pos-- == 0)
363				return mfc;
364	}
365	read_unlock(&mrt_lock);
366
367	spin_lock_bh(&mfc_unres_lock);
368	it->cache = &mrt->mfc6_unres_queue;
369	list_for_each_entry(mfc, it->cache, list)
370		if (pos-- == 0)
371			return mfc;
372	spin_unlock_bh(&mfc_unres_lock);
373
374	it->cache = NULL;
375	return NULL;
376}
377
378/*
379 *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
380 */
381
382struct ipmr_vif_iter {
383	struct seq_net_private p;
384	struct mr6_table *mrt;
385	int ct;
386};
387
388static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
389					    struct ipmr_vif_iter *iter,
390					    loff_t pos)
391{
392	struct mr6_table *mrt = iter->mrt;
393
394	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
395		if (!MIF_EXISTS(mrt, iter->ct))
396			continue;
397		if (pos-- == 0)
398			return &mrt->vif6_table[iter->ct];
399	}
400	return NULL;
401}
402
403static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
404	__acquires(mrt_lock)
405{
406	struct ipmr_vif_iter *iter = seq->private;
407	struct net *net = seq_file_net(seq);
408	struct mr6_table *mrt;
409
410	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
411	if (!mrt)
412		return ERR_PTR(-ENOENT);
413
414	iter->mrt = mrt;
415
416	read_lock(&mrt_lock);
417	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
418		: SEQ_START_TOKEN;
419}
420
421static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
422{
423	struct ipmr_vif_iter *iter = seq->private;
424	struct net *net = seq_file_net(seq);
425	struct mr6_table *mrt = iter->mrt;
426
427	++*pos;
428	if (v == SEQ_START_TOKEN)
429		return ip6mr_vif_seq_idx(net, iter, 0);
430
431	while (++iter->ct < mrt->maxvif) {
432		if (!MIF_EXISTS(mrt, iter->ct))
433			continue;
434		return &mrt->vif6_table[iter->ct];
435	}
436	return NULL;
437}
438
439static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
440	__releases(mrt_lock)
441{
442	read_unlock(&mrt_lock);
443}
444
445static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
446{
447	struct ipmr_vif_iter *iter = seq->private;
448	struct mr6_table *mrt = iter->mrt;
449
450	if (v == SEQ_START_TOKEN) {
451		seq_puts(seq,
452			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
453	} else {
454		const struct mif_device *vif = v;
455		const char *name = vif->dev ? vif->dev->name : "none";
456
457		seq_printf(seq,
458			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
459			   vif - mrt->vif6_table,
460			   name, vif->bytes_in, vif->pkt_in,
461			   vif->bytes_out, vif->pkt_out,
462			   vif->flags);
463	}
464	return 0;
465}
466
467static const struct seq_operations ip6mr_vif_seq_ops = {
468	.start = ip6mr_vif_seq_start,
469	.next  = ip6mr_vif_seq_next,
470	.stop  = ip6mr_vif_seq_stop,
471	.show  = ip6mr_vif_seq_show,
472};
473
474static int ip6mr_vif_open(struct inode *inode, struct file *file)
475{
476	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
477			    sizeof(struct ipmr_vif_iter));
478}
479
480static const struct file_operations ip6mr_vif_fops = {
481	.owner	 = THIS_MODULE,
482	.open    = ip6mr_vif_open,
483	.read    = seq_read,
484	.llseek  = seq_lseek,
485	.release = seq_release_net,
486};
487
488static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
489{
490	struct ipmr_mfc_iter *it = seq->private;
491	struct net *net = seq_file_net(seq);
492	struct mr6_table *mrt;
493
494	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
495	if (!mrt)
496		return ERR_PTR(-ENOENT);
497
498	it->mrt = mrt;
499	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
500		: SEQ_START_TOKEN;
501}
502
503static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
504{
505	struct mfc6_cache *mfc = v;
506	struct ipmr_mfc_iter *it = seq->private;
507	struct net *net = seq_file_net(seq);
508	struct mr6_table *mrt = it->mrt;
509
510	++*pos;
511
512	if (v == SEQ_START_TOKEN)
513		return ipmr_mfc_seq_idx(net, seq->private, 0);
514
515	if (mfc->list.next != it->cache)
516		return list_entry(mfc->list.next, struct mfc6_cache, list);
517
518	if (it->cache == &mrt->mfc6_unres_queue)
519		goto end_of_list;
520
521	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
522
523	while (++it->ct < MFC6_LINES) {
524		it->cache = &mrt->mfc6_cache_array[it->ct];
525		if (list_empty(it->cache))
526			continue;
527		return list_first_entry(it->cache, struct mfc6_cache, list);
528	}
529
530	/* exhausted cache_array, show unresolved */
531	read_unlock(&mrt_lock);
532	it->cache = &mrt->mfc6_unres_queue;
533	it->ct = 0;
534
535	spin_lock_bh(&mfc_unres_lock);
536	if (!list_empty(it->cache))
537		return list_first_entry(it->cache, struct mfc6_cache, list);
538
539 end_of_list:
540	spin_unlock_bh(&mfc_unres_lock);
541	it->cache = NULL;
542
543	return NULL;
544}
545
546static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
547{
548	struct ipmr_mfc_iter *it = seq->private;
549	struct mr6_table *mrt = it->mrt;
550
551	if (it->cache == &mrt->mfc6_unres_queue)
552		spin_unlock_bh(&mfc_unres_lock);
553	else if (it->cache == &mrt->mfc6_cache_array[it->ct])
554		read_unlock(&mrt_lock);
555}
556
557static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
558{
559	int n;
560
561	if (v == SEQ_START_TOKEN) {
562		seq_puts(seq,
563			 "Group                            "
564			 "Origin                           "
565			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
566	} else {
567		const struct mfc6_cache *mfc = v;
568		const struct ipmr_mfc_iter *it = seq->private;
569		struct mr6_table *mrt = it->mrt;
570
571		seq_printf(seq, "%pI6 %pI6 %-3hd",
572			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
573			   mfc->mf6c_parent);
574
575		if (it->cache != &mrt->mfc6_unres_queue) {
576			seq_printf(seq, " %8lu %8lu %8lu",
577				   mfc->mfc_un.res.pkt,
578				   mfc->mfc_un.res.bytes,
579				   mfc->mfc_un.res.wrong_if);
580			for (n = mfc->mfc_un.res.minvif;
581			     n < mfc->mfc_un.res.maxvif; n++) {
582				if (MIF_EXISTS(mrt, n) &&
583				    mfc->mfc_un.res.ttls[n] < 255)
584					seq_printf(seq,
585						   " %2d:%-3d",
586						   n, mfc->mfc_un.res.ttls[n]);
587			}
588		} else {
589			/* unresolved mfc_caches don't contain
590			 * pkt, bytes and wrong_if values
591			 */
592			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
593		}
594		seq_putc(seq, '\n');
595	}
596	return 0;
597}
598
599static const struct seq_operations ipmr_mfc_seq_ops = {
600	.start = ipmr_mfc_seq_start,
601	.next  = ipmr_mfc_seq_next,
602	.stop  = ipmr_mfc_seq_stop,
603	.show  = ipmr_mfc_seq_show,
604};
605
606static int ipmr_mfc_open(struct inode *inode, struct file *file)
607{
608	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
609			    sizeof(struct ipmr_mfc_iter));
610}
611
612static const struct file_operations ip6mr_mfc_fops = {
613	.owner	 = THIS_MODULE,
614	.open    = ipmr_mfc_open,
615	.read    = seq_read,
616	.llseek  = seq_lseek,
617	.release = seq_release_net,
618};
619#endif
620
621#ifdef CONFIG_IPV6_PIMSM_V2
622
623static int pim6_rcv(struct sk_buff *skb)
624{
625	struct pimreghdr *pim;
626	struct ipv6hdr   *encap;
627	struct net_device  *reg_dev = NULL;
628	struct net *net = dev_net(skb->dev);
629	struct mr6_table *mrt;
630	struct flowi6 fl6 = {
631		.flowi6_iif	= skb->dev->ifindex,
632		.flowi6_mark	= skb->mark,
633	};
634	int reg_vif_num;
635
636	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
637		goto drop;
638
639	pim = (struct pimreghdr *)skb_transport_header(skb);
640	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
641	    (pim->flags & PIM_NULL_REGISTER) ||
642	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
643			     sizeof(*pim), IPPROTO_PIM,
644			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
645	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
646		goto drop;
647
648	/* check if the inner packet is destined to mcast group */
649	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
650				   sizeof(*pim));
651
652	if (!ipv6_addr_is_multicast(&encap->daddr) ||
653	    encap->payload_len == 0 ||
654	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
655		goto drop;
656
657	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
658		goto drop;
659	reg_vif_num = mrt->mroute_reg_vif_num;
660
661	read_lock(&mrt_lock);
662	if (reg_vif_num >= 0)
663		reg_dev = mrt->vif6_table[reg_vif_num].dev;
664	if (reg_dev)
665		dev_hold(reg_dev);
666	read_unlock(&mrt_lock);
667
668	if (!reg_dev)
669		goto drop;
670
671	skb->mac_header = skb->network_header;
672	skb_pull(skb, (u8 *)encap - skb->data);
673	skb_reset_network_header(skb);
674	skb->protocol = htons(ETH_P_IPV6);
675	skb->ip_summed = CHECKSUM_NONE;
676
677	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
678
679	netif_rx(skb);
680
681	dev_put(reg_dev);
682	return 0;
683 drop:
684	kfree_skb(skb);
685	return 0;
686}
687
688static const struct inet6_protocol pim6_protocol = {
689	.handler	=	pim6_rcv,
690};
691
692/* Service routines creating virtual interfaces: PIMREG */
693
694static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
695				      struct net_device *dev)
696{
697	struct net *net = dev_net(dev);
698	struct mr6_table *mrt;
699	struct flowi6 fl6 = {
700		.flowi6_oif	= dev->ifindex,
701		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
702		.flowi6_mark	= skb->mark,
703	};
704	int err;
705
706	err = ip6mr_fib_lookup(net, &fl6, &mrt);
707	if (err < 0) {
708		kfree_skb(skb);
709		return err;
710	}
711
712	read_lock(&mrt_lock);
713	dev->stats.tx_bytes += skb->len;
714	dev->stats.tx_packets++;
715	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
716	read_unlock(&mrt_lock);
717	kfree_skb(skb);
718	return NETDEV_TX_OK;
719}
720
721static int reg_vif_get_iflink(const struct net_device *dev)
722{
723	return 0;
724}
725
726static const struct net_device_ops reg_vif_netdev_ops = {
727	.ndo_start_xmit	= reg_vif_xmit,
728	.ndo_get_iflink = reg_vif_get_iflink,
729};
730
731static void reg_vif_setup(struct net_device *dev)
732{
733	dev->type		= ARPHRD_PIMREG;
734	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
735	dev->flags		= IFF_NOARP;
736	dev->netdev_ops		= &reg_vif_netdev_ops;
737	dev->destructor		= free_netdev;
738	dev->features		|= NETIF_F_NETNS_LOCAL;
739}
740
741static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
742{
743	struct net_device *dev;
744	char name[IFNAMSIZ];
745
746	if (mrt->id == RT6_TABLE_DFLT)
747		sprintf(name, "pim6reg");
748	else
749		sprintf(name, "pim6reg%u", mrt->id);
750
751	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
752	if (!dev)
753		return NULL;
754
755	dev_net_set(dev, net);
756
757	if (register_netdevice(dev)) {
758		free_netdev(dev);
759		return NULL;
760	}
761
762	if (dev_open(dev))
763		goto failure;
764
765	dev_hold(dev);
766	return dev;
767
768failure:
769	/* allow the register to be completed before unregistering. */
770	rtnl_unlock();
771	rtnl_lock();
772
773	unregister_netdevice(dev);
774	return NULL;
775}
776#endif
777
778/*
779 *	Delete a VIF entry
780 */
781
782static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
783{
784	struct mif_device *v;
785	struct net_device *dev;
786	struct inet6_dev *in6_dev;
787
788	if (vifi < 0 || vifi >= mrt->maxvif)
789		return -EADDRNOTAVAIL;
790
791	v = &mrt->vif6_table[vifi];
792
793	write_lock_bh(&mrt_lock);
794	dev = v->dev;
795	v->dev = NULL;
796
797	if (!dev) {
798		write_unlock_bh(&mrt_lock);
799		return -EADDRNOTAVAIL;
800	}
801
802#ifdef CONFIG_IPV6_PIMSM_V2
803	if (vifi == mrt->mroute_reg_vif_num)
804		mrt->mroute_reg_vif_num = -1;
805#endif
806
807	if (vifi + 1 == mrt->maxvif) {
808		int tmp;
809		for (tmp = vifi - 1; tmp >= 0; tmp--) {
810			if (MIF_EXISTS(mrt, tmp))
811				break;
812		}
813		mrt->maxvif = tmp + 1;
814	}
815
816	write_unlock_bh(&mrt_lock);
817
818	dev_set_allmulti(dev, -1);
819
820	in6_dev = __in6_dev_get(dev);
821	if (in6_dev) {
822		in6_dev->cnf.mc_forwarding--;
823		inet6_netconf_notify_devconf(dev_net(dev),
824					     NETCONFA_MC_FORWARDING,
825					     dev->ifindex, &in6_dev->cnf);
826	}
827
828	if (v->flags & MIFF_REGISTER)
829		unregister_netdevice_queue(dev, head);
830
831	dev_put(dev);
832	return 0;
833}
834
835static inline void ip6mr_cache_free(struct mfc6_cache *c)
836{
837	kmem_cache_free(mrt_cachep, c);
838}
839
840/* Destroy an unresolved cache entry, killing queued skbs
841   and reporting error to netlink readers.
842 */
843
844static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
845{
846	struct net *net = read_pnet(&mrt->net);
847	struct sk_buff *skb;
848
849	atomic_dec(&mrt->cache_resolve_queue_len);
850
851	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
852		if (ipv6_hdr(skb)->version == 0) {
853			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
854			nlh->nlmsg_type = NLMSG_ERROR;
855			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
856			skb_trim(skb, nlh->nlmsg_len);
857			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
858			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
859		} else
860			kfree_skb(skb);
861	}
862
863	ip6mr_cache_free(c);
864}
865
866
867/* Timer process for all the unresolved queue. */
868
869static void ipmr_do_expire_process(struct mr6_table *mrt)
870{
871	unsigned long now = jiffies;
872	unsigned long expires = 10 * HZ;
873	struct mfc6_cache *c, *next;
874
875	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
876		if (time_after(c->mfc_un.unres.expires, now)) {
877			/* not yet... */
878			unsigned long interval = c->mfc_un.unres.expires - now;
879			if (interval < expires)
880				expires = interval;
881			continue;
882		}
883
884		list_del(&c->list);
885		mr6_netlink_event(mrt, c, RTM_DELROUTE);
886		ip6mr_destroy_unres(mrt, c);
887	}
888
889	if (!list_empty(&mrt->mfc6_unres_queue))
890		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
891}
892
893static void ipmr_expire_process(unsigned long arg)
894{
895	struct mr6_table *mrt = (struct mr6_table *)arg;
896
897	if (!spin_trylock(&mfc_unres_lock)) {
898		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
899		return;
900	}
901
902	if (!list_empty(&mrt->mfc6_unres_queue))
903		ipmr_do_expire_process(mrt);
904
905	spin_unlock(&mfc_unres_lock);
906}
907
908/* Fill oifs list. It is called under write locked mrt_lock. */
909
910static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
911				    unsigned char *ttls)
912{
913	int vifi;
914
915	cache->mfc_un.res.minvif = MAXMIFS;
916	cache->mfc_un.res.maxvif = 0;
917	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
918
919	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
920		if (MIF_EXISTS(mrt, vifi) &&
921		    ttls[vifi] && ttls[vifi] < 255) {
922			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
923			if (cache->mfc_un.res.minvif > vifi)
924				cache->mfc_un.res.minvif = vifi;
925			if (cache->mfc_un.res.maxvif <= vifi)
926				cache->mfc_un.res.maxvif = vifi + 1;
927		}
928	}
929}
930
931static int mif6_add(struct net *net, struct mr6_table *mrt,
932		    struct mif6ctl *vifc, int mrtsock)
933{
934	int vifi = vifc->mif6c_mifi;
935	struct mif_device *v = &mrt->vif6_table[vifi];
936	struct net_device *dev;
937	struct inet6_dev *in6_dev;
938	int err;
939
940	/* Is vif busy ? */
941	if (MIF_EXISTS(mrt, vifi))
942		return -EADDRINUSE;
943
944	switch (vifc->mif6c_flags) {
945#ifdef CONFIG_IPV6_PIMSM_V2
946	case MIFF_REGISTER:
947		/*
948		 * Special Purpose VIF in PIM
949		 * All the packets will be sent to the daemon
950		 */
951		if (mrt->mroute_reg_vif_num >= 0)
952			return -EADDRINUSE;
953		dev = ip6mr_reg_vif(net, mrt);
954		if (!dev)
955			return -ENOBUFS;
956		err = dev_set_allmulti(dev, 1);
957		if (err) {
958			unregister_netdevice(dev);
959			dev_put(dev);
960			return err;
961		}
962		break;
963#endif
964	case 0:
965		dev = dev_get_by_index(net, vifc->mif6c_pifi);
966		if (!dev)
967			return -EADDRNOTAVAIL;
968		err = dev_set_allmulti(dev, 1);
969		if (err) {
970			dev_put(dev);
971			return err;
972		}
973		break;
974	default:
975		return -EINVAL;
976	}
977
978	in6_dev = __in6_dev_get(dev);
979	if (in6_dev) {
980		in6_dev->cnf.mc_forwarding++;
981		inet6_netconf_notify_devconf(dev_net(dev),
982					     NETCONFA_MC_FORWARDING,
983					     dev->ifindex, &in6_dev->cnf);
984	}
985
986	/*
987	 *	Fill in the VIF structures
988	 */
989	v->rate_limit = vifc->vifc_rate_limit;
990	v->flags = vifc->mif6c_flags;
991	if (!mrtsock)
992		v->flags |= VIFF_STATIC;
993	v->threshold = vifc->vifc_threshold;
994	v->bytes_in = 0;
995	v->bytes_out = 0;
996	v->pkt_in = 0;
997	v->pkt_out = 0;
998	v->link = dev->ifindex;
999	if (v->flags & MIFF_REGISTER)
1000		v->link = dev_get_iflink(dev);
1001
1002	/* And finish update writing critical data */
1003	write_lock_bh(&mrt_lock);
1004	v->dev = dev;
1005#ifdef CONFIG_IPV6_PIMSM_V2
1006	if (v->flags & MIFF_REGISTER)
1007		mrt->mroute_reg_vif_num = vifi;
1008#endif
1009	if (vifi + 1 > mrt->maxvif)
1010		mrt->maxvif = vifi + 1;
1011	write_unlock_bh(&mrt_lock);
1012	return 0;
1013}
1014
1015static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
1016					   const struct in6_addr *origin,
1017					   const struct in6_addr *mcastgrp)
1018{
1019	int line = MFC6_HASH(mcastgrp, origin);
1020	struct mfc6_cache *c;
1021
1022	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1023		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1024		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1025			return c;
1026	}
1027	return NULL;
1028}
1029
1030/* Look for a (*,*,oif) entry */
1031static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
1032						      mifi_t mifi)
1033{
1034	int line = MFC6_HASH(&in6addr_any, &in6addr_any);
1035	struct mfc6_cache *c;
1036
1037	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1038		if (ipv6_addr_any(&c->mf6c_origin) &&
1039		    ipv6_addr_any(&c->mf6c_mcastgrp) &&
1040		    (c->mfc_un.res.ttls[mifi] < 255))
1041			return c;
1042
1043	return NULL;
1044}
1045
1046/* Look for a (*,G) entry */
1047static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
1048					       struct in6_addr *mcastgrp,
1049					       mifi_t mifi)
1050{
1051	int line = MFC6_HASH(mcastgrp, &in6addr_any);
1052	struct mfc6_cache *c, *proxy;
1053
1054	if (ipv6_addr_any(mcastgrp))
1055		goto skip;
1056
1057	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1058		if (ipv6_addr_any(&c->mf6c_origin) &&
1059		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
1060			if (c->mfc_un.res.ttls[mifi] < 255)
1061				return c;
1062
1063			/* It's ok if the mifi is part of the static tree */
1064			proxy = ip6mr_cache_find_any_parent(mrt,
1065							    c->mf6c_parent);
1066			if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
1067				return c;
1068		}
1069
1070skip:
1071	return ip6mr_cache_find_any_parent(mrt, mifi);
1072}
1073
1074/*
1075 *	Allocate a multicast cache entry
1076 */
1077static struct mfc6_cache *ip6mr_cache_alloc(void)
1078{
1079	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1080	if (!c)
1081		return NULL;
1082	c->mfc_un.res.minvif = MAXMIFS;
1083	return c;
1084}
1085
1086static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1087{
1088	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1089	if (!c)
1090		return NULL;
1091	skb_queue_head_init(&c->mfc_un.unres.unresolved);
1092	c->mfc_un.unres.expires = jiffies + 10 * HZ;
1093	return c;
1094}
1095
1096/*
1097 *	A cache entry has gone into a resolved state from queued
1098 */
1099
1100static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1101				struct mfc6_cache *uc, struct mfc6_cache *c)
1102{
1103	struct sk_buff *skb;
1104
1105	/*
1106	 *	Play the pending entries through our router
1107	 */
1108
1109	while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1110		if (ipv6_hdr(skb)->version == 0) {
1111			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1112
1113			if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
1114				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1115			} else {
1116				nlh->nlmsg_type = NLMSG_ERROR;
1117				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1118				skb_trim(skb, nlh->nlmsg_len);
1119				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1120			}
1121			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1122		} else
1123			ip6_mr_forward(net, mrt, skb, c);
1124	}
1125}
1126
1127/*
1128 *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1129 *	expects the following bizarre scheme.
1130 *
1131 *	Called under mrt_lock.
1132 */
1133
1134static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1135			      mifi_t mifi, int assert)
1136{
1137	struct sk_buff *skb;
1138	struct mrt6msg *msg;
1139	int ret;
1140
1141#ifdef CONFIG_IPV6_PIMSM_V2
1142	if (assert == MRT6MSG_WHOLEPKT)
1143		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1144						+sizeof(*msg));
1145	else
1146#endif
1147		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1148
1149	if (!skb)
1150		return -ENOBUFS;
1151
1152	/* I suppose that internal messages
1153	 * do not require checksums */
1154
1155	skb->ip_summed = CHECKSUM_UNNECESSARY;
1156
1157#ifdef CONFIG_IPV6_PIMSM_V2
1158	if (assert == MRT6MSG_WHOLEPKT) {
1159		/* Ugly, but we have no choice with this interface.
1160		   Duplicate old header, fix length etc.
1161		   And all this only to mangle msg->im6_msgtype and
1162		   to set msg->im6_mbz to "mbz" :-)
1163		 */
1164		skb_push(skb, -skb_network_offset(pkt));
1165
1166		skb_push(skb, sizeof(*msg));
1167		skb_reset_transport_header(skb);
1168		msg = (struct mrt6msg *)skb_transport_header(skb);
1169		msg->im6_mbz = 0;
1170		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1171		msg->im6_mif = mrt->mroute_reg_vif_num;
1172		msg->im6_pad = 0;
1173		msg->im6_src = ipv6_hdr(pkt)->saddr;
1174		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1175
1176		skb->ip_summed = CHECKSUM_UNNECESSARY;
1177	} else
1178#endif
1179	{
1180	/*
1181	 *	Copy the IP header
1182	 */
1183
1184	skb_put(skb, sizeof(struct ipv6hdr));
1185	skb_reset_network_header(skb);
1186	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1187
1188	/*
1189	 *	Add our header
1190	 */
1191	skb_put(skb, sizeof(*msg));
1192	skb_reset_transport_header(skb);
1193	msg = (struct mrt6msg *)skb_transport_header(skb);
1194
1195	msg->im6_mbz = 0;
1196	msg->im6_msgtype = assert;
1197	msg->im6_mif = mifi;
1198	msg->im6_pad = 0;
1199	msg->im6_src = ipv6_hdr(pkt)->saddr;
1200	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1201
1202	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1203	skb->ip_summed = CHECKSUM_UNNECESSARY;
1204	}
1205
1206	if (!mrt->mroute6_sk) {
1207		kfree_skb(skb);
1208		return -EINVAL;
1209	}
1210
1211	/*
1212	 *	Deliver to user space multicast routing algorithms
1213	 */
1214	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1215	if (ret < 0) {
1216		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1217		kfree_skb(skb);
1218	}
1219
1220	return ret;
1221}
1222
1223/*
1224 *	Queue a packet for resolution. It gets locked cache entry!
1225 */
1226
1227static int
1228ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1229{
1230	bool found = false;
1231	int err;
1232	struct mfc6_cache *c;
1233
1234	spin_lock_bh(&mfc_unres_lock);
1235	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1236		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1237		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1238			found = true;
1239			break;
1240		}
1241	}
1242
1243	if (!found) {
1244		/*
1245		 *	Create a new entry if allowable
1246		 */
1247
1248		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1249		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1250			spin_unlock_bh(&mfc_unres_lock);
1251
1252			kfree_skb(skb);
1253			return -ENOBUFS;
1254		}
1255
1256		/*
1257		 *	Fill in the new cache entry
1258		 */
1259		c->mf6c_parent = -1;
1260		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1261		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1262
1263		/*
1264		 *	Reflect first query at pim6sd
1265		 */
1266		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1267		if (err < 0) {
1268			/* If the report failed throw the cache entry
1269			   out - Brad Parker
1270			 */
1271			spin_unlock_bh(&mfc_unres_lock);
1272
1273			ip6mr_cache_free(c);
1274			kfree_skb(skb);
1275			return err;
1276		}
1277
1278		atomic_inc(&mrt->cache_resolve_queue_len);
1279		list_add(&c->list, &mrt->mfc6_unres_queue);
1280		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1281
1282		ipmr_do_expire_process(mrt);
1283	}
1284
1285	/*
1286	 *	See if we can append the packet
1287	 */
1288	if (c->mfc_un.unres.unresolved.qlen > 3) {
1289		kfree_skb(skb);
1290		err = -ENOBUFS;
1291	} else {
1292		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1293		err = 0;
1294	}
1295
1296	spin_unlock_bh(&mfc_unres_lock);
1297	return err;
1298}
1299
1300/*
1301 *	MFC6 cache manipulation by user space
1302 */
1303
1304static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
1305			    int parent)
1306{
1307	int line;
1308	struct mfc6_cache *c, *next;
1309
1310	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1311
1312	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1313		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1314		    ipv6_addr_equal(&c->mf6c_mcastgrp,
1315				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1316		    (parent == -1 || parent == c->mf6c_parent)) {
1317			write_lock_bh(&mrt_lock);
1318			list_del(&c->list);
1319			write_unlock_bh(&mrt_lock);
1320
1321			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1322			ip6mr_cache_free(c);
1323			return 0;
1324		}
1325	}
1326	return -ENOENT;
1327}
1328
1329static int ip6mr_device_event(struct notifier_block *this,
1330			      unsigned long event, void *ptr)
1331{
1332	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1333	struct net *net = dev_net(dev);
1334	struct mr6_table *mrt;
1335	struct mif_device *v;
1336	int ct;
1337	LIST_HEAD(list);
1338
1339	if (event != NETDEV_UNREGISTER)
1340		return NOTIFY_DONE;
1341
1342	ip6mr_for_each_table(mrt, net) {
1343		v = &mrt->vif6_table[0];
1344		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1345			if (v->dev == dev)
1346				mif6_delete(mrt, ct, &list);
1347		}
1348	}
1349	unregister_netdevice_many(&list);
1350
1351	return NOTIFY_DONE;
1352}
1353
1354static struct notifier_block ip6_mr_notifier = {
1355	.notifier_call = ip6mr_device_event
1356};
1357
1358/*
1359 *	Setup for IP multicast routing
1360 */
1361
1362static int __net_init ip6mr_net_init(struct net *net)
1363{
1364	int err;
1365
1366	err = ip6mr_rules_init(net);
1367	if (err < 0)
1368		goto fail;
1369
1370#ifdef CONFIG_PROC_FS
1371	err = -ENOMEM;
1372	if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
1373		goto proc_vif_fail;
1374	if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
1375		goto proc_cache_fail;
1376#endif
1377
1378	return 0;
1379
1380#ifdef CONFIG_PROC_FS
1381proc_cache_fail:
1382	remove_proc_entry("ip6_mr_vif", net->proc_net);
1383proc_vif_fail:
1384	ip6mr_rules_exit(net);
1385#endif
1386fail:
1387	return err;
1388}
1389
1390static void __net_exit ip6mr_net_exit(struct net *net)
1391{
1392#ifdef CONFIG_PROC_FS
1393	remove_proc_entry("ip6_mr_cache", net->proc_net);
1394	remove_proc_entry("ip6_mr_vif", net->proc_net);
1395#endif
1396	ip6mr_rules_exit(net);
1397}
1398
1399static struct pernet_operations ip6mr_net_ops = {
1400	.init = ip6mr_net_init,
1401	.exit = ip6mr_net_exit,
1402};
1403
1404int __init ip6_mr_init(void)
1405{
1406	int err;
1407
1408	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1409				       sizeof(struct mfc6_cache),
1410				       0, SLAB_HWCACHE_ALIGN,
1411				       NULL);
1412	if (!mrt_cachep)
1413		return -ENOMEM;
1414
1415	err = register_pernet_subsys(&ip6mr_net_ops);
1416	if (err)
1417		goto reg_pernet_fail;
1418
1419	err = register_netdevice_notifier(&ip6_mr_notifier);
1420	if (err)
1421		goto reg_notif_fail;
1422#ifdef CONFIG_IPV6_PIMSM_V2
1423	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1424		pr_err("%s: can't add PIM protocol\n", __func__);
1425		err = -EAGAIN;
1426		goto add_proto_fail;
1427	}
1428#endif
1429	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1430		      ip6mr_rtm_dumproute, NULL);
1431	return 0;
1432#ifdef CONFIG_IPV6_PIMSM_V2
1433add_proto_fail:
1434	unregister_netdevice_notifier(&ip6_mr_notifier);
1435#endif
1436reg_notif_fail:
1437	unregister_pernet_subsys(&ip6mr_net_ops);
1438reg_pernet_fail:
1439	kmem_cache_destroy(mrt_cachep);
1440	return err;
1441}
1442
1443void ip6_mr_cleanup(void)
1444{
1445	rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1446#ifdef CONFIG_IPV6_PIMSM_V2
1447	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1448#endif
1449	unregister_netdevice_notifier(&ip6_mr_notifier);
1450	unregister_pernet_subsys(&ip6mr_net_ops);
1451	kmem_cache_destroy(mrt_cachep);
1452}
1453
1454static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1455			 struct mf6cctl *mfc, int mrtsock, int parent)
1456{
1457	bool found = false;
1458	int line;
1459	struct mfc6_cache *uc, *c;
1460	unsigned char ttls[MAXMIFS];
1461	int i;
1462
1463	if (mfc->mf6cc_parent >= MAXMIFS)
1464		return -ENFILE;
1465
1466	memset(ttls, 255, MAXMIFS);
1467	for (i = 0; i < MAXMIFS; i++) {
1468		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1469			ttls[i] = 1;
1470
1471	}
1472
1473	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1474
1475	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1476		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1477		    ipv6_addr_equal(&c->mf6c_mcastgrp,
1478				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1479		    (parent == -1 || parent == mfc->mf6cc_parent)) {
1480			found = true;
1481			break;
1482		}
1483	}
1484
1485	if (found) {
1486		write_lock_bh(&mrt_lock);
1487		c->mf6c_parent = mfc->mf6cc_parent;
1488		ip6mr_update_thresholds(mrt, c, ttls);
1489		if (!mrtsock)
1490			c->mfc_flags |= MFC_STATIC;
1491		write_unlock_bh(&mrt_lock);
1492		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1493		return 0;
1494	}
1495
1496	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1497	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1498		return -EINVAL;
1499
1500	c = ip6mr_cache_alloc();
1501	if (!c)
1502		return -ENOMEM;
1503
1504	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1505	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1506	c->mf6c_parent = mfc->mf6cc_parent;
1507	ip6mr_update_thresholds(mrt, c, ttls);
1508	if (!mrtsock)
1509		c->mfc_flags |= MFC_STATIC;
1510
1511	write_lock_bh(&mrt_lock);
1512	list_add(&c->list, &mrt->mfc6_cache_array[line]);
1513	write_unlock_bh(&mrt_lock);
1514
1515	/*
1516	 *	Check to see if we resolved a queued list. If so we
1517	 *	need to send on the frames and tidy up.
1518	 */
1519	found = false;
1520	spin_lock_bh(&mfc_unres_lock);
1521	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1522		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1523		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1524			list_del(&uc->list);
1525			atomic_dec(&mrt->cache_resolve_queue_len);
1526			found = true;
1527			break;
1528		}
1529	}
1530	if (list_empty(&mrt->mfc6_unres_queue))
1531		del_timer(&mrt->ipmr_expire_timer);
1532	spin_unlock_bh(&mfc_unres_lock);
1533
1534	if (found) {
1535		ip6mr_cache_resolve(net, mrt, uc, c);
1536		ip6mr_cache_free(uc);
1537	}
1538	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1539	return 0;
1540}
1541
1542/*
1543 *	Close the multicast socket, and clear the vif tables etc
1544 */
1545
1546static void mroute_clean_tables(struct mr6_table *mrt, bool all)
1547{
1548	int i;
1549	LIST_HEAD(list);
1550	struct mfc6_cache *c, *next;
1551
1552	/*
1553	 *	Shut down all active vif entries
1554	 */
1555	for (i = 0; i < mrt->maxvif; i++) {
1556		if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
1557			continue;
1558		mif6_delete(mrt, i, &list);
1559	}
1560	unregister_netdevice_many(&list);
1561
1562	/*
1563	 *	Wipe the cache
1564	 */
1565	for (i = 0; i < MFC6_LINES; i++) {
1566		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1567			if (!all && (c->mfc_flags & MFC_STATIC))
1568				continue;
1569			write_lock_bh(&mrt_lock);
1570			list_del(&c->list);
1571			write_unlock_bh(&mrt_lock);
1572
1573			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1574			ip6mr_cache_free(c);
1575		}
1576	}
1577
1578	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1579		spin_lock_bh(&mfc_unres_lock);
1580		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1581			list_del(&c->list);
1582			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1583			ip6mr_destroy_unres(mrt, c);
1584		}
1585		spin_unlock_bh(&mfc_unres_lock);
1586	}
1587}
1588
1589static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1590{
1591	int err = 0;
1592	struct net *net = sock_net(sk);
1593
1594	rtnl_lock();
1595	write_lock_bh(&mrt_lock);
1596	if (likely(mrt->mroute6_sk == NULL)) {
1597		mrt->mroute6_sk = sk;
1598		net->ipv6.devconf_all->mc_forwarding++;
1599		inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
1600					     NETCONFA_IFINDEX_ALL,
1601					     net->ipv6.devconf_all);
1602	}
1603	else
1604		err = -EADDRINUSE;
1605	write_unlock_bh(&mrt_lock);
1606
1607	rtnl_unlock();
1608
1609	return err;
1610}
1611
1612int ip6mr_sk_done(struct sock *sk)
1613{
1614	int err = -EACCES;
1615	struct net *net = sock_net(sk);
1616	struct mr6_table *mrt;
1617
1618	rtnl_lock();
1619	ip6mr_for_each_table(mrt, net) {
1620		if (sk == mrt->mroute6_sk) {
1621			write_lock_bh(&mrt_lock);
1622			mrt->mroute6_sk = NULL;
1623			net->ipv6.devconf_all->mc_forwarding--;
1624			inet6_netconf_notify_devconf(net,
1625						     NETCONFA_MC_FORWARDING,
1626						     NETCONFA_IFINDEX_ALL,
1627						     net->ipv6.devconf_all);
1628			write_unlock_bh(&mrt_lock);
1629
1630			mroute_clean_tables(mrt, false);
1631			err = 0;
1632			break;
1633		}
1634	}
1635	rtnl_unlock();
1636
1637	return err;
1638}
1639
1640struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1641{
1642	struct mr6_table *mrt;
1643	struct flowi6 fl6 = {
1644		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
1645		.flowi6_oif	= skb->dev->ifindex,
1646		.flowi6_mark	= skb->mark,
1647	};
1648
1649	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1650		return NULL;
1651
1652	return mrt->mroute6_sk;
1653}
1654
1655/*
1656 *	Socket options and virtual interface manipulation. The whole
1657 *	virtual interface system is a complete heap, but unfortunately
1658 *	that's how BSD mrouted happens to think. Maybe one day with a proper
1659 *	MOSPF/PIM router set up we can clean this up.
1660 */
1661
1662int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1663{
1664	int ret, parent = 0;
1665	struct mif6ctl vif;
1666	struct mf6cctl mfc;
1667	mifi_t mifi;
1668	struct net *net = sock_net(sk);
1669	struct mr6_table *mrt;
1670
1671	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1672	if (!mrt)
1673		return -ENOENT;
1674
1675	if (optname != MRT6_INIT) {
1676		if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
1677			return -EACCES;
1678	}
1679
1680	switch (optname) {
1681	case MRT6_INIT:
1682		if (sk->sk_type != SOCK_RAW ||
1683		    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1684			return -EOPNOTSUPP;
1685		if (optlen < sizeof(int))
1686			return -EINVAL;
1687
1688		return ip6mr_sk_init(mrt, sk);
1689
1690	case MRT6_DONE:
1691		return ip6mr_sk_done(sk);
1692
1693	case MRT6_ADD_MIF:
1694		if (optlen < sizeof(vif))
1695			return -EINVAL;
1696		if (copy_from_user(&vif, optval, sizeof(vif)))
1697			return -EFAULT;
1698		if (vif.mif6c_mifi >= MAXMIFS)
1699			return -ENFILE;
1700		rtnl_lock();
1701		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1702		rtnl_unlock();
1703		return ret;
1704
1705	case MRT6_DEL_MIF:
1706		if (optlen < sizeof(mifi_t))
1707			return -EINVAL;
1708		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1709			return -EFAULT;
1710		rtnl_lock();
1711		ret = mif6_delete(mrt, mifi, NULL);
1712		rtnl_unlock();
1713		return ret;
1714
1715	/*
1716	 *	Manipulate the forwarding caches. These live
1717	 *	in a sort of kernel/user symbiosis.
1718	 */
1719	case MRT6_ADD_MFC:
1720	case MRT6_DEL_MFC:
1721		parent = -1;
1722	case MRT6_ADD_MFC_PROXY:
1723	case MRT6_DEL_MFC_PROXY:
1724		if (optlen < sizeof(mfc))
1725			return -EINVAL;
1726		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1727			return -EFAULT;
1728		if (parent == 0)
1729			parent = mfc.mf6cc_parent;
1730		rtnl_lock();
1731		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1732			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1733		else
1734			ret = ip6mr_mfc_add(net, mrt, &mfc,
1735					    sk == mrt->mroute6_sk, parent);
1736		rtnl_unlock();
1737		return ret;
1738
1739	/*
1740	 *	Control PIM assert (to activate pim will activate assert)
1741	 */
1742	case MRT6_ASSERT:
1743	{
1744		int v;
1745
1746		if (optlen != sizeof(v))
1747			return -EINVAL;
1748		if (get_user(v, (int __user *)optval))
1749			return -EFAULT;
1750		mrt->mroute_do_assert = v;
1751		return 0;
1752	}
1753
1754#ifdef CONFIG_IPV6_PIMSM_V2
1755	case MRT6_PIM:
1756	{
1757		int v;
1758
1759		if (optlen != sizeof(v))
1760			return -EINVAL;
1761		if (get_user(v, (int __user *)optval))
1762			return -EFAULT;
1763		v = !!v;
1764		rtnl_lock();
1765		ret = 0;
1766		if (v != mrt->mroute_do_pim) {
1767			mrt->mroute_do_pim = v;
1768			mrt->mroute_do_assert = v;
1769		}
1770		rtnl_unlock();
1771		return ret;
1772	}
1773
1774#endif
1775#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1776	case MRT6_TABLE:
1777	{
1778		u32 v;
1779
1780		if (optlen != sizeof(u32))
1781			return -EINVAL;
1782		if (get_user(v, (u32 __user *)optval))
1783			return -EFAULT;
1784		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1785		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1786			return -EINVAL;
1787		if (sk == mrt->mroute6_sk)
1788			return -EBUSY;
1789
1790		rtnl_lock();
1791		ret = 0;
1792		if (!ip6mr_new_table(net, v))
1793			ret = -ENOMEM;
1794		raw6_sk(sk)->ip6mr_table = v;
1795		rtnl_unlock();
1796		return ret;
1797	}
1798#endif
1799	/*
1800	 *	Spurious command, or MRT6_VERSION which you cannot
1801	 *	set.
1802	 */
1803	default:
1804		return -ENOPROTOOPT;
1805	}
1806}
1807
1808/*
1809 *	Getsock opt support for the multicast routing system.
1810 */
1811
1812int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1813			  int __user *optlen)
1814{
1815	int olr;
1816	int val;
1817	struct net *net = sock_net(sk);
1818	struct mr6_table *mrt;
1819
1820	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1821	if (!mrt)
1822		return -ENOENT;
1823
1824	switch (optname) {
1825	case MRT6_VERSION:
1826		val = 0x0305;
1827		break;
1828#ifdef CONFIG_IPV6_PIMSM_V2
1829	case MRT6_PIM:
1830		val = mrt->mroute_do_pim;
1831		break;
1832#endif
1833	case MRT6_ASSERT:
1834		val = mrt->mroute_do_assert;
1835		break;
1836	default:
1837		return -ENOPROTOOPT;
1838	}
1839
1840	if (get_user(olr, optlen))
1841		return -EFAULT;
1842
1843	olr = min_t(int, olr, sizeof(int));
1844	if (olr < 0)
1845		return -EINVAL;
1846
1847	if (put_user(olr, optlen))
1848		return -EFAULT;
1849	if (copy_to_user(optval, &val, olr))
1850		return -EFAULT;
1851	return 0;
1852}
1853
1854/*
1855 *	The IP multicast ioctl support routines.
1856 */
1857
1858int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1859{
1860	struct sioc_sg_req6 sr;
1861	struct sioc_mif_req6 vr;
1862	struct mif_device *vif;
1863	struct mfc6_cache *c;
1864	struct net *net = sock_net(sk);
1865	struct mr6_table *mrt;
1866
1867	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1868	if (!mrt)
1869		return -ENOENT;
1870
1871	switch (cmd) {
1872	case SIOCGETMIFCNT_IN6:
1873		if (copy_from_user(&vr, arg, sizeof(vr)))
1874			return -EFAULT;
1875		if (vr.mifi >= mrt->maxvif)
1876			return -EINVAL;
1877		read_lock(&mrt_lock);
1878		vif = &mrt->vif6_table[vr.mifi];
1879		if (MIF_EXISTS(mrt, vr.mifi)) {
1880			vr.icount = vif->pkt_in;
1881			vr.ocount = vif->pkt_out;
1882			vr.ibytes = vif->bytes_in;
1883			vr.obytes = vif->bytes_out;
1884			read_unlock(&mrt_lock);
1885
1886			if (copy_to_user(arg, &vr, sizeof(vr)))
1887				return -EFAULT;
1888			return 0;
1889		}
1890		read_unlock(&mrt_lock);
1891		return -EADDRNOTAVAIL;
1892	case SIOCGETSGCNT_IN6:
1893		if (copy_from_user(&sr, arg, sizeof(sr)))
1894			return -EFAULT;
1895
1896		read_lock(&mrt_lock);
1897		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1898		if (c) {
1899			sr.pktcnt = c->mfc_un.res.pkt;
1900			sr.bytecnt = c->mfc_un.res.bytes;
1901			sr.wrong_if = c->mfc_un.res.wrong_if;
1902			read_unlock(&mrt_lock);
1903
1904			if (copy_to_user(arg, &sr, sizeof(sr)))
1905				return -EFAULT;
1906			return 0;
1907		}
1908		read_unlock(&mrt_lock);
1909		return -EADDRNOTAVAIL;
1910	default:
1911		return -ENOIOCTLCMD;
1912	}
1913}
1914
1915#ifdef CONFIG_COMPAT
1916struct compat_sioc_sg_req6 {
1917	struct sockaddr_in6 src;
1918	struct sockaddr_in6 grp;
1919	compat_ulong_t pktcnt;
1920	compat_ulong_t bytecnt;
1921	compat_ulong_t wrong_if;
1922};
1923
1924struct compat_sioc_mif_req6 {
1925	mifi_t	mifi;
1926	compat_ulong_t icount;
1927	compat_ulong_t ocount;
1928	compat_ulong_t ibytes;
1929	compat_ulong_t obytes;
1930};
1931
1932int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1933{
1934	struct compat_sioc_sg_req6 sr;
1935	struct compat_sioc_mif_req6 vr;
1936	struct mif_device *vif;
1937	struct mfc6_cache *c;
1938	struct net *net = sock_net(sk);
1939	struct mr6_table *mrt;
1940
1941	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1942	if (!mrt)
1943		return -ENOENT;
1944
1945	switch (cmd) {
1946	case SIOCGETMIFCNT_IN6:
1947		if (copy_from_user(&vr, arg, sizeof(vr)))
1948			return -EFAULT;
1949		if (vr.mifi >= mrt->maxvif)
1950			return -EINVAL;
1951		read_lock(&mrt_lock);
1952		vif = &mrt->vif6_table[vr.mifi];
1953		if (MIF_EXISTS(mrt, vr.mifi)) {
1954			vr.icount = vif->pkt_in;
1955			vr.ocount = vif->pkt_out;
1956			vr.ibytes = vif->bytes_in;
1957			vr.obytes = vif->bytes_out;
1958			read_unlock(&mrt_lock);
1959
1960			if (copy_to_user(arg, &vr, sizeof(vr)))
1961				return -EFAULT;
1962			return 0;
1963		}
1964		read_unlock(&mrt_lock);
1965		return -EADDRNOTAVAIL;
1966	case SIOCGETSGCNT_IN6:
1967		if (copy_from_user(&sr, arg, sizeof(sr)))
1968			return -EFAULT;
1969
1970		read_lock(&mrt_lock);
1971		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1972		if (c) {
1973			sr.pktcnt = c->mfc_un.res.pkt;
1974			sr.bytecnt = c->mfc_un.res.bytes;
1975			sr.wrong_if = c->mfc_un.res.wrong_if;
1976			read_unlock(&mrt_lock);
1977
1978			if (copy_to_user(arg, &sr, sizeof(sr)))
1979				return -EFAULT;
1980			return 0;
1981		}
1982		read_unlock(&mrt_lock);
1983		return -EADDRNOTAVAIL;
1984	default:
1985		return -ENOIOCTLCMD;
1986	}
1987}
1988#endif
1989
1990static inline int ip6mr_forward2_finish(struct sock *sk, struct sk_buff *skb)
1991{
1992	IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1993			 IPSTATS_MIB_OUTFORWDATAGRAMS);
1994	IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1995			 IPSTATS_MIB_OUTOCTETS, skb->len);
1996	return dst_output_sk(sk, skb);
1997}
1998
1999/*
2000 *	Processing handlers for ip6mr_forward
2001 */
2002
2003static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
2004			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
2005{
2006	struct ipv6hdr *ipv6h;
2007	struct mif_device *vif = &mrt->vif6_table[vifi];
2008	struct net_device *dev;
2009	struct dst_entry *dst;
2010	struct flowi6 fl6;
2011
2012	if (!vif->dev)
2013		goto out_free;
2014
2015#ifdef CONFIG_IPV6_PIMSM_V2
2016	if (vif->flags & MIFF_REGISTER) {
2017		vif->pkt_out++;
2018		vif->bytes_out += skb->len;
2019		vif->dev->stats.tx_bytes += skb->len;
2020		vif->dev->stats.tx_packets++;
2021		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2022		goto out_free;
2023	}
2024#endif
2025
2026	ipv6h = ipv6_hdr(skb);
2027
2028	fl6 = (struct flowi6) {
2029		.flowi6_oif = vif->link,
2030		.daddr = ipv6h->daddr,
2031	};
2032
2033	dst = ip6_route_output(net, NULL, &fl6);
2034	if (dst->error) {
2035		dst_release(dst);
2036		goto out_free;
2037	}
2038
2039	skb_dst_drop(skb);
2040	skb_dst_set(skb, dst);
2041
2042	/*
2043	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2044	 * not only before forwarding, but after forwarding on all output
2045	 * interfaces. It is clear, if mrouter runs a multicasting
2046	 * program, it should receive packets not depending to what interface
2047	 * program is joined.
2048	 * If we will not make it, the program will have to join on all
2049	 * interfaces. On the other hand, multihoming host (or router, but
2050	 * not mrouter) cannot join to more than one interface - it will
2051	 * result in receiving multiple packets.
2052	 */
2053	dev = vif->dev;
2054	skb->dev = dev;
2055	vif->pkt_out++;
2056	vif->bytes_out += skb->len;
2057
2058	/* We are about to write */
2059	/* XXX: extension headers? */
2060	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2061		goto out_free;
2062
2063	ipv6h = ipv6_hdr(skb);
2064	ipv6h->hop_limit--;
2065
2066	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2067
2068	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb,
2069		       skb->dev, dev,
2070		       ip6mr_forward2_finish);
2071
2072out_free:
2073	kfree_skb(skb);
2074	return 0;
2075}
2076
2077static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
2078{
2079	int ct;
2080
2081	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2082		if (mrt->vif6_table[ct].dev == dev)
2083			break;
2084	}
2085	return ct;
2086}
2087
2088static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
2089			   struct sk_buff *skb, struct mfc6_cache *cache)
2090{
2091	int psend = -1;
2092	int vif, ct;
2093	int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2094
2095	vif = cache->mf6c_parent;
2096	cache->mfc_un.res.pkt++;
2097	cache->mfc_un.res.bytes += skb->len;
2098
2099	if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
2100		struct mfc6_cache *cache_proxy;
2101
2102		/* For an (*,G) entry, we only check that the incoming
2103		 * interface is part of the static tree.
2104		 */
2105		cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
2106		if (cache_proxy &&
2107		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
2108			goto forward;
2109	}
2110
2111	/*
2112	 * Wrong interface: drop packet and (maybe) send PIM assert.
2113	 */
2114	if (mrt->vif6_table[vif].dev != skb->dev) {
2115		cache->mfc_un.res.wrong_if++;
2116
2117		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2118		    /* pimsm uses asserts, when switching from RPT to SPT,
2119		       so that we cannot check that packet arrived on an oif.
2120		       It is bad, but otherwise we would need to move pretty
2121		       large chunk of pimd to kernel. Ough... --ANK
2122		     */
2123		    (mrt->mroute_do_pim ||
2124		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2125		    time_after(jiffies,
2126			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2127			cache->mfc_un.res.last_assert = jiffies;
2128			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2129		}
2130		goto dont_forward;
2131	}
2132
2133forward:
2134	mrt->vif6_table[vif].pkt_in++;
2135	mrt->vif6_table[vif].bytes_in += skb->len;
2136
2137	/*
2138	 *	Forward the frame
2139	 */
2140	if (ipv6_addr_any(&cache->mf6c_origin) &&
2141	    ipv6_addr_any(&cache->mf6c_mcastgrp)) {
2142		if (true_vifi >= 0 &&
2143		    true_vifi != cache->mf6c_parent &&
2144		    ipv6_hdr(skb)->hop_limit >
2145				cache->mfc_un.res.ttls[cache->mf6c_parent]) {
2146			/* It's an (*,*) entry and the packet is not coming from
2147			 * the upstream: forward the packet to the upstream
2148			 * only.
2149			 */
2150			psend = cache->mf6c_parent;
2151			goto last_forward;
2152		}
2153		goto dont_forward;
2154	}
2155	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2156		/* For (*,G) entry, don't forward to the incoming interface */
2157		if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
2158		    ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2159			if (psend != -1) {
2160				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2161				if (skb2)
2162					ip6mr_forward2(net, mrt, skb2, cache, psend);
2163			}
2164			psend = ct;
2165		}
2166	}
2167last_forward:
2168	if (psend != -1) {
2169		ip6mr_forward2(net, mrt, skb, cache, psend);
2170		return;
2171	}
2172
2173dont_forward:
2174	kfree_skb(skb);
2175}
2176
2177
2178/*
2179 *	Multicast packets for forwarding arrive here
2180 */
2181
2182int ip6_mr_input(struct sk_buff *skb)
2183{
2184	struct mfc6_cache *cache;
2185	struct net *net = dev_net(skb->dev);
2186	struct mr6_table *mrt;
2187	struct flowi6 fl6 = {
2188		.flowi6_iif	= skb->dev->ifindex,
2189		.flowi6_mark	= skb->mark,
2190	};
2191	int err;
2192
2193	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2194	if (err < 0) {
2195		kfree_skb(skb);
2196		return err;
2197	}
2198
2199	read_lock(&mrt_lock);
2200	cache = ip6mr_cache_find(mrt,
2201				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2202	if (!cache) {
2203		int vif = ip6mr_find_vif(mrt, skb->dev);
2204
2205		if (vif >= 0)
2206			cache = ip6mr_cache_find_any(mrt,
2207						     &ipv6_hdr(skb)->daddr,
2208						     vif);
2209	}
2210
2211	/*
2212	 *	No usable cache entry
2213	 */
2214	if (!cache) {
2215		int vif;
2216
2217		vif = ip6mr_find_vif(mrt, skb->dev);
2218		if (vif >= 0) {
2219			int err = ip6mr_cache_unresolved(mrt, vif, skb);
2220			read_unlock(&mrt_lock);
2221
2222			return err;
2223		}
2224		read_unlock(&mrt_lock);
2225		kfree_skb(skb);
2226		return -ENODEV;
2227	}
2228
2229	ip6_mr_forward(net, mrt, skb, cache);
2230
2231	read_unlock(&mrt_lock);
2232
2233	return 0;
2234}
2235
2236
2237static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2238			       struct mfc6_cache *c, struct rtmsg *rtm)
2239{
2240	int ct;
2241	struct rtnexthop *nhp;
2242	struct nlattr *mp_attr;
2243	struct rta_mfc_stats mfcs;
2244
2245	/* If cache is unresolved, don't try to parse IIF and OIF */
2246	if (c->mf6c_parent >= MAXMIFS)
2247		return -ENOENT;
2248
2249	if (MIF_EXISTS(mrt, c->mf6c_parent) &&
2250	    nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
2251		return -EMSGSIZE;
2252	mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
2253	if (!mp_attr)
2254		return -EMSGSIZE;
2255
2256	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2257		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2258			nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
2259			if (!nhp) {
2260				nla_nest_cancel(skb, mp_attr);
2261				return -EMSGSIZE;
2262			}
2263
2264			nhp->rtnh_flags = 0;
2265			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2266			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2267			nhp->rtnh_len = sizeof(*nhp);
2268		}
2269	}
2270
2271	nla_nest_end(skb, mp_attr);
2272
2273	mfcs.mfcs_packets = c->mfc_un.res.pkt;
2274	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2275	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
2276	if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0)
2277		return -EMSGSIZE;
2278
2279	rtm->rtm_type = RTN_MULTICAST;
2280	return 1;
2281}
2282
2283int ip6mr_get_route(struct net *net,
2284		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2285{
2286	int err;
2287	struct mr6_table *mrt;
2288	struct mfc6_cache *cache;
2289	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2290
2291	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2292	if (!mrt)
2293		return -ENOENT;
2294
2295	read_lock(&mrt_lock);
2296	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2297	if (!cache && skb->dev) {
2298		int vif = ip6mr_find_vif(mrt, skb->dev);
2299
2300		if (vif >= 0)
2301			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2302						     vif);
2303	}
2304
2305	if (!cache) {
2306		struct sk_buff *skb2;
2307		struct ipv6hdr *iph;
2308		struct net_device *dev;
2309		int vif;
2310
2311		if (nowait) {
2312			read_unlock(&mrt_lock);
2313			return -EAGAIN;
2314		}
2315
2316		dev = skb->dev;
2317		if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2318			read_unlock(&mrt_lock);
2319			return -ENODEV;
2320		}
2321
2322		/* really correct? */
2323		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2324		if (!skb2) {
2325			read_unlock(&mrt_lock);
2326			return -ENOMEM;
2327		}
2328
2329		skb_reset_transport_header(skb2);
2330
2331		skb_put(skb2, sizeof(struct ipv6hdr));
2332		skb_reset_network_header(skb2);
2333
2334		iph = ipv6_hdr(skb2);
2335		iph->version = 0;
2336		iph->priority = 0;
2337		iph->flow_lbl[0] = 0;
2338		iph->flow_lbl[1] = 0;
2339		iph->flow_lbl[2] = 0;
2340		iph->payload_len = 0;
2341		iph->nexthdr = IPPROTO_NONE;
2342		iph->hop_limit = 0;
2343		iph->saddr = rt->rt6i_src.addr;
2344		iph->daddr = rt->rt6i_dst.addr;
2345
2346		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2347		read_unlock(&mrt_lock);
2348
2349		return err;
2350	}
2351
2352	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2353		cache->mfc_flags |= MFC_NOTIFY;
2354
2355	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2356	read_unlock(&mrt_lock);
2357	return err;
2358}
2359
2360static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2361			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2362			     int flags)
2363{
2364	struct nlmsghdr *nlh;
2365	struct rtmsg *rtm;
2366	int err;
2367
2368	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2369	if (!nlh)
2370		return -EMSGSIZE;
2371
2372	rtm = nlmsg_data(nlh);
2373	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2374	rtm->rtm_dst_len  = 128;
2375	rtm->rtm_src_len  = 128;
2376	rtm->rtm_tos      = 0;
2377	rtm->rtm_table    = mrt->id;
2378	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2379		goto nla_put_failure;
2380	rtm->rtm_type = RTN_MULTICAST;
2381	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2382	if (c->mfc_flags & MFC_STATIC)
2383		rtm->rtm_protocol = RTPROT_STATIC;
2384	else
2385		rtm->rtm_protocol = RTPROT_MROUTED;
2386	rtm->rtm_flags    = 0;
2387
2388	if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2389	    nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2390		goto nla_put_failure;
2391	err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
2392	/* do not break the dump if cache is unresolved */
2393	if (err < 0 && err != -ENOENT)
2394		goto nla_put_failure;
2395
2396	nlmsg_end(skb, nlh);
2397	return 0;
2398
2399nla_put_failure:
2400	nlmsg_cancel(skb, nlh);
2401	return -EMSGSIZE;
2402}
2403
2404static int mr6_msgsize(bool unresolved, int maxvif)
2405{
2406	size_t len =
2407		NLMSG_ALIGN(sizeof(struct rtmsg))
2408		+ nla_total_size(4)	/* RTA_TABLE */
2409		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2410		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2411		;
2412
2413	if (!unresolved)
2414		len = len
2415		      + nla_total_size(4)	/* RTA_IIF */
2416		      + nla_total_size(0)	/* RTA_MULTIPATH */
2417		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2418						/* RTA_MFC_STATS */
2419		      + nla_total_size(sizeof(struct rta_mfc_stats))
2420		;
2421
2422	return len;
2423}
2424
2425static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
2426			      int cmd)
2427{
2428	struct net *net = read_pnet(&mrt->net);
2429	struct sk_buff *skb;
2430	int err = -ENOBUFS;
2431
2432	skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
2433			GFP_ATOMIC);
2434	if (!skb)
2435		goto errout;
2436
2437	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2438	if (err < 0)
2439		goto errout;
2440
2441	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2442	return;
2443
2444errout:
2445	kfree_skb(skb);
2446	if (err < 0)
2447		rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2448}
2449
2450static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2451{
2452	struct net *net = sock_net(skb->sk);
2453	struct mr6_table *mrt;
2454	struct mfc6_cache *mfc;
2455	unsigned int t = 0, s_t;
2456	unsigned int h = 0, s_h;
2457	unsigned int e = 0, s_e;
2458
2459	s_t = cb->args[0];
2460	s_h = cb->args[1];
2461	s_e = cb->args[2];
2462
2463	read_lock(&mrt_lock);
2464	ip6mr_for_each_table(mrt, net) {
2465		if (t < s_t)
2466			goto next_table;
2467		if (t > s_t)
2468			s_h = 0;
2469		for (h = s_h; h < MFC6_LINES; h++) {
2470			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2471				if (e < s_e)
2472					goto next_entry;
2473				if (ip6mr_fill_mroute(mrt, skb,
2474						      NETLINK_CB(cb->skb).portid,
2475						      cb->nlh->nlmsg_seq,
2476						      mfc, RTM_NEWROUTE,
2477						      NLM_F_MULTI) < 0)
2478					goto done;
2479next_entry:
2480				e++;
2481			}
2482			e = s_e = 0;
2483		}
2484		spin_lock_bh(&mfc_unres_lock);
2485		list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
2486			if (e < s_e)
2487				goto next_entry2;
2488			if (ip6mr_fill_mroute(mrt, skb,
2489					      NETLINK_CB(cb->skb).portid,
2490					      cb->nlh->nlmsg_seq,
2491					      mfc, RTM_NEWROUTE,
2492					      NLM_F_MULTI) < 0) {
2493				spin_unlock_bh(&mfc_unres_lock);
2494				goto done;
2495			}
2496next_entry2:
2497			e++;
2498		}
2499		spin_unlock_bh(&mfc_unres_lock);
2500		e = s_e = 0;
2501		s_h = 0;
2502next_table:
2503		t++;
2504	}
2505done:
2506	read_unlock(&mrt_lock);
2507
2508	cb->args[2] = e;
2509	cb->args[1] = h;
2510	cb->args[0] = t;
2511
2512	return skb->len;
2513}
2514