1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65 
66 #include <asm/uaccess.h>
67 
68 #ifdef CONFIG_SYSCTL
69 #include <linux/sysctl.h>
70 #endif
71 
72 enum rt6_nud_state {
73 	RT6_NUD_FAIL_HARD = -3,
74 	RT6_NUD_FAIL_PROBE = -2,
75 	RT6_NUD_FAIL_DO_RR = -1,
76 	RT6_NUD_SUCCEED = 1
77 };
78 
79 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
80 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
81 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
82 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
83 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
84 static void		ip6_dst_destroy(struct dst_entry *);
85 static void		ip6_dst_ifdown(struct dst_entry *,
86 				       struct net_device *dev, int how);
87 static int		 ip6_dst_gc(struct dst_ops *ops);
88 
89 static int		ip6_pkt_discard(struct sk_buff *skb);
90 static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
91 static int		ip6_pkt_prohibit(struct sk_buff *skb);
92 static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
93 static void		ip6_link_failure(struct sk_buff *skb);
94 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
95 					   struct sk_buff *skb, u32 mtu);
96 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
97 					struct sk_buff *skb);
98 static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
99 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
100 
101 #ifdef CONFIG_IPV6_ROUTE_INFO
102 static struct rt6_info *rt6_add_route_info(struct net *net,
103 					   const struct in6_addr *prefix, int prefixlen,
104 					   const struct in6_addr *gwaddr, int ifindex,
105 					   unsigned int pref);
106 static struct rt6_info *rt6_get_route_info(struct net *net,
107 					   const struct in6_addr *prefix, int prefixlen,
108 					   const struct in6_addr *gwaddr, int ifindex);
109 #endif
110 
111 struct uncached_list {
112 	spinlock_t		lock;
113 	struct list_head	head;
114 };
115 
116 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
117 
rt6_uncached_list_add(struct rt6_info * rt)118 static void rt6_uncached_list_add(struct rt6_info *rt)
119 {
120 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
121 
122 	rt->dst.flags |= DST_NOCACHE;
123 	rt->rt6i_uncached_list = ul;
124 
125 	spin_lock_bh(&ul->lock);
126 	list_add_tail(&rt->rt6i_uncached, &ul->head);
127 	spin_unlock_bh(&ul->lock);
128 }
129 
rt6_uncached_list_del(struct rt6_info * rt)130 static void rt6_uncached_list_del(struct rt6_info *rt)
131 {
132 	if (!list_empty(&rt->rt6i_uncached)) {
133 		struct uncached_list *ul = rt->rt6i_uncached_list;
134 
135 		spin_lock_bh(&ul->lock);
136 		list_del(&rt->rt6i_uncached);
137 		spin_unlock_bh(&ul->lock);
138 	}
139 }
140 
rt6_uncached_list_flush_dev(struct net * net,struct net_device * dev)141 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
142 {
143 	struct net_device *loopback_dev = net->loopback_dev;
144 	int cpu;
145 
146 	if (dev == loopback_dev)
147 		return;
148 
149 	for_each_possible_cpu(cpu) {
150 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
151 		struct rt6_info *rt;
152 
153 		spin_lock_bh(&ul->lock);
154 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
155 			struct inet6_dev *rt_idev = rt->rt6i_idev;
156 			struct net_device *rt_dev = rt->dst.dev;
157 
158 			if (rt_idev->dev == dev) {
159 				rt->rt6i_idev = in6_dev_get(loopback_dev);
160 				in6_dev_put(rt_idev);
161 			}
162 
163 			if (rt_dev == dev) {
164 				rt->dst.dev = loopback_dev;
165 				dev_hold(rt->dst.dev);
166 				dev_put(rt_dev);
167 			}
168 		}
169 		spin_unlock_bh(&ul->lock);
170 	}
171 }
172 
rt6_pcpu_cow_metrics(struct rt6_info * rt)173 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
174 {
175 	return dst_metrics_write_ptr(rt->dst.from);
176 }
177 
ipv6_cow_metrics(struct dst_entry * dst,unsigned long old)178 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
179 {
180 	struct rt6_info *rt = (struct rt6_info *)dst;
181 
182 	if (rt->rt6i_flags & RTF_PCPU)
183 		return rt6_pcpu_cow_metrics(rt);
184 	else if (rt->rt6i_flags & RTF_CACHE)
185 		return NULL;
186 	else
187 		return dst_cow_metrics_generic(dst, old);
188 }
189 
choose_neigh_daddr(struct rt6_info * rt,struct sk_buff * skb,const void * daddr)190 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
191 					     struct sk_buff *skb,
192 					     const void *daddr)
193 {
194 	struct in6_addr *p = &rt->rt6i_gateway;
195 
196 	if (!ipv6_addr_any(p))
197 		return (const void *) p;
198 	else if (skb)
199 		return &ipv6_hdr(skb)->daddr;
200 	return daddr;
201 }
202 
ip6_neigh_lookup(const struct dst_entry * dst,struct sk_buff * skb,const void * daddr)203 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
204 					  struct sk_buff *skb,
205 					  const void *daddr)
206 {
207 	struct rt6_info *rt = (struct rt6_info *) dst;
208 	struct neighbour *n;
209 
210 	daddr = choose_neigh_daddr(rt, skb, daddr);
211 	n = __ipv6_neigh_lookup(dst->dev, daddr);
212 	if (n)
213 		return n;
214 	return neigh_create(&nd_tbl, daddr, dst->dev);
215 }
216 
217 static struct dst_ops ip6_dst_ops_template = {
218 	.family			=	AF_INET6,
219 	.gc			=	ip6_dst_gc,
220 	.gc_thresh		=	1024,
221 	.check			=	ip6_dst_check,
222 	.default_advmss		=	ip6_default_advmss,
223 	.mtu			=	ip6_mtu,
224 	.cow_metrics		=	ipv6_cow_metrics,
225 	.destroy		=	ip6_dst_destroy,
226 	.ifdown			=	ip6_dst_ifdown,
227 	.negative_advice	=	ip6_negative_advice,
228 	.link_failure		=	ip6_link_failure,
229 	.update_pmtu		=	ip6_rt_update_pmtu,
230 	.redirect		=	rt6_do_redirect,
231 	.local_out		=	__ip6_local_out,
232 	.neigh_lookup		=	ip6_neigh_lookup,
233 };
234 
ip6_blackhole_mtu(const struct dst_entry * dst)235 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
236 {
237 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
238 
239 	return mtu ? : dst->dev->mtu;
240 }
241 
ip6_rt_blackhole_update_pmtu(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb,u32 mtu)242 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
243 					 struct sk_buff *skb, u32 mtu)
244 {
245 }
246 
ip6_rt_blackhole_redirect(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb)247 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
248 				      struct sk_buff *skb)
249 {
250 }
251 
252 static struct dst_ops ip6_dst_blackhole_ops = {
253 	.family			=	AF_INET6,
254 	.destroy		=	ip6_dst_destroy,
255 	.check			=	ip6_dst_check,
256 	.mtu			=	ip6_blackhole_mtu,
257 	.default_advmss		=	ip6_default_advmss,
258 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
259 	.redirect		=	ip6_rt_blackhole_redirect,
260 	.cow_metrics		=	dst_cow_metrics_generic,
261 	.neigh_lookup		=	ip6_neigh_lookup,
262 };
263 
264 static const u32 ip6_template_metrics[RTAX_MAX] = {
265 	[RTAX_HOPLIMIT - 1] = 0,
266 };
267 
268 static const struct rt6_info ip6_null_entry_template = {
269 	.dst = {
270 		.__refcnt	= ATOMIC_INIT(1),
271 		.__use		= 1,
272 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
273 		.error		= -ENETUNREACH,
274 		.input		= ip6_pkt_discard,
275 		.output		= ip6_pkt_discard_out,
276 	},
277 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
278 	.rt6i_protocol  = RTPROT_KERNEL,
279 	.rt6i_metric	= ~(u32) 0,
280 	.rt6i_ref	= ATOMIC_INIT(1),
281 };
282 
283 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
284 
285 static const struct rt6_info ip6_prohibit_entry_template = {
286 	.dst = {
287 		.__refcnt	= ATOMIC_INIT(1),
288 		.__use		= 1,
289 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
290 		.error		= -EACCES,
291 		.input		= ip6_pkt_prohibit,
292 		.output		= ip6_pkt_prohibit_out,
293 	},
294 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
295 	.rt6i_protocol  = RTPROT_KERNEL,
296 	.rt6i_metric	= ~(u32) 0,
297 	.rt6i_ref	= ATOMIC_INIT(1),
298 };
299 
300 static const struct rt6_info ip6_blk_hole_entry_template = {
301 	.dst = {
302 		.__refcnt	= ATOMIC_INIT(1),
303 		.__use		= 1,
304 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
305 		.error		= -EINVAL,
306 		.input		= dst_discard,
307 		.output		= dst_discard_out,
308 	},
309 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
310 	.rt6i_protocol  = RTPROT_KERNEL,
311 	.rt6i_metric	= ~(u32) 0,
312 	.rt6i_ref	= ATOMIC_INIT(1),
313 };
314 
315 #endif
316 
rt6_info_init(struct rt6_info * rt)317 static void rt6_info_init(struct rt6_info *rt)
318 {
319 	struct dst_entry *dst = &rt->dst;
320 
321 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
322 	INIT_LIST_HEAD(&rt->rt6i_siblings);
323 	INIT_LIST_HEAD(&rt->rt6i_uncached);
324 }
325 
326 /* allocate dst with ip6_dst_ops */
__ip6_dst_alloc(struct net * net,struct net_device * dev,int flags)327 static struct rt6_info *__ip6_dst_alloc(struct net *net,
328 					struct net_device *dev,
329 					int flags)
330 {
331 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
332 					0, DST_OBSOLETE_FORCE_CHK, flags);
333 
334 	if (rt)
335 		rt6_info_init(rt);
336 
337 	return rt;
338 }
339 
ip6_dst_alloc(struct net * net,struct net_device * dev,int flags)340 static struct rt6_info *ip6_dst_alloc(struct net *net,
341 				      struct net_device *dev,
342 				      int flags)
343 {
344 	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
345 
346 	if (rt) {
347 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
348 		if (rt->rt6i_pcpu) {
349 			int cpu;
350 
351 			for_each_possible_cpu(cpu) {
352 				struct rt6_info **p;
353 
354 				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
355 				/* no one shares rt */
356 				*p =  NULL;
357 			}
358 		} else {
359 			dst_destroy((struct dst_entry *)rt);
360 			return NULL;
361 		}
362 	}
363 
364 	return rt;
365 }
366 
ip6_dst_destroy(struct dst_entry * dst)367 static void ip6_dst_destroy(struct dst_entry *dst)
368 {
369 	struct rt6_info *rt = (struct rt6_info *)dst;
370 	struct dst_entry *from = dst->from;
371 	struct inet6_dev *idev;
372 
373 	dst_destroy_metrics_generic(dst);
374 	free_percpu(rt->rt6i_pcpu);
375 	rt6_uncached_list_del(rt);
376 
377 	idev = rt->rt6i_idev;
378 	if (idev) {
379 		rt->rt6i_idev = NULL;
380 		in6_dev_put(idev);
381 	}
382 
383 	dst->from = NULL;
384 	dst_release(from);
385 }
386 
ip6_dst_ifdown(struct dst_entry * dst,struct net_device * dev,int how)387 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
388 			   int how)
389 {
390 	struct rt6_info *rt = (struct rt6_info *)dst;
391 	struct inet6_dev *idev = rt->rt6i_idev;
392 	struct net_device *loopback_dev =
393 		dev_net(dev)->loopback_dev;
394 
395 	if (dev != loopback_dev) {
396 		if (idev && idev->dev == dev) {
397 			struct inet6_dev *loopback_idev =
398 				in6_dev_get(loopback_dev);
399 			if (loopback_idev) {
400 				rt->rt6i_idev = loopback_idev;
401 				in6_dev_put(idev);
402 			}
403 		}
404 	}
405 }
406 
__rt6_check_expired(const struct rt6_info * rt)407 static bool __rt6_check_expired(const struct rt6_info *rt)
408 {
409 	if (rt->rt6i_flags & RTF_EXPIRES)
410 		return time_after(jiffies, rt->dst.expires);
411 	else
412 		return false;
413 }
414 
rt6_check_expired(const struct rt6_info * rt)415 static bool rt6_check_expired(const struct rt6_info *rt)
416 {
417 	if (rt->rt6i_flags & RTF_EXPIRES) {
418 		if (time_after(jiffies, rt->dst.expires))
419 			return true;
420 	} else if (rt->dst.from) {
421 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
422 	}
423 	return false;
424 }
425 
426 /* Multipath route selection:
427  *   Hash based function using packet header and flowlabel.
428  * Adapted from fib_info_hashfn()
429  */
rt6_info_hash_nhsfn(unsigned int candidate_count,const struct flowi6 * fl6)430 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
431 			       const struct flowi6 *fl6)
432 {
433 	return get_hash_from_flowi6(fl6) % candidate_count;
434 }
435 
rt6_multipath_select(struct rt6_info * match,struct flowi6 * fl6,int oif,int strict)436 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
437 					     struct flowi6 *fl6, int oif,
438 					     int strict)
439 {
440 	struct rt6_info *sibling, *next_sibling;
441 	int route_choosen;
442 
443 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
444 	/* Don't change the route, if route_choosen == 0
445 	 * (siblings does not include ourself)
446 	 */
447 	if (route_choosen)
448 		list_for_each_entry_safe(sibling, next_sibling,
449 				&match->rt6i_siblings, rt6i_siblings) {
450 			route_choosen--;
451 			if (route_choosen == 0) {
452 				if (rt6_score_route(sibling, oif, strict) < 0)
453 					break;
454 				match = sibling;
455 				break;
456 			}
457 		}
458 	return match;
459 }
460 
461 /*
462  *	Route lookup. Any table->tb6_lock is implied.
463  */
464 
rt6_device_match(struct net * net,struct rt6_info * rt,const struct in6_addr * saddr,int oif,int flags)465 static inline struct rt6_info *rt6_device_match(struct net *net,
466 						    struct rt6_info *rt,
467 						    const struct in6_addr *saddr,
468 						    int oif,
469 						    int flags)
470 {
471 	struct rt6_info *local = NULL;
472 	struct rt6_info *sprt;
473 
474 	if (!oif && ipv6_addr_any(saddr))
475 		goto out;
476 
477 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
478 		struct net_device *dev = sprt->dst.dev;
479 
480 		if (oif) {
481 			if (dev->ifindex == oif)
482 				return sprt;
483 			if (dev->flags & IFF_LOOPBACK) {
484 				if (!sprt->rt6i_idev ||
485 				    sprt->rt6i_idev->dev->ifindex != oif) {
486 					if (flags & RT6_LOOKUP_F_IFACE)
487 						continue;
488 					if (local &&
489 					    local->rt6i_idev->dev->ifindex == oif)
490 						continue;
491 				}
492 				local = sprt;
493 			}
494 		} else {
495 			if (ipv6_chk_addr(net, saddr, dev,
496 					  flags & RT6_LOOKUP_F_IFACE))
497 				return sprt;
498 		}
499 	}
500 
501 	if (oif) {
502 		if (local)
503 			return local;
504 
505 		if (flags & RT6_LOOKUP_F_IFACE)
506 			return net->ipv6.ip6_null_entry;
507 	}
508 out:
509 	return rt;
510 }
511 
512 #ifdef CONFIG_IPV6_ROUTER_PREF
513 struct __rt6_probe_work {
514 	struct work_struct work;
515 	struct in6_addr target;
516 	struct net_device *dev;
517 };
518 
rt6_probe_deferred(struct work_struct * w)519 static void rt6_probe_deferred(struct work_struct *w)
520 {
521 	struct in6_addr mcaddr;
522 	struct __rt6_probe_work *work =
523 		container_of(w, struct __rt6_probe_work, work);
524 
525 	addrconf_addr_solict_mult(&work->target, &mcaddr);
526 	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
527 	dev_put(work->dev);
528 	kfree(work);
529 }
530 
rt6_probe(struct rt6_info * rt)531 static void rt6_probe(struct rt6_info *rt)
532 {
533 	struct __rt6_probe_work *work;
534 	struct neighbour *neigh;
535 	/*
536 	 * Okay, this does not seem to be appropriate
537 	 * for now, however, we need to check if it
538 	 * is really so; aka Router Reachability Probing.
539 	 *
540 	 * Router Reachability Probe MUST be rate-limited
541 	 * to no more than one per minute.
542 	 */
543 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
544 		return;
545 	rcu_read_lock_bh();
546 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
547 	if (neigh) {
548 		if (neigh->nud_state & NUD_VALID)
549 			goto out;
550 
551 		work = NULL;
552 		write_lock(&neigh->lock);
553 		if (!(neigh->nud_state & NUD_VALID) &&
554 		    time_after(jiffies,
555 			       neigh->updated +
556 			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
557 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
558 			if (work)
559 				__neigh_set_probe_once(neigh);
560 		}
561 		write_unlock(&neigh->lock);
562 	} else {
563 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
564 	}
565 
566 	if (work) {
567 		INIT_WORK(&work->work, rt6_probe_deferred);
568 		work->target = rt->rt6i_gateway;
569 		dev_hold(rt->dst.dev);
570 		work->dev = rt->dst.dev;
571 		schedule_work(&work->work);
572 	}
573 
574 out:
575 	rcu_read_unlock_bh();
576 }
577 #else
rt6_probe(struct rt6_info * rt)578 static inline void rt6_probe(struct rt6_info *rt)
579 {
580 }
581 #endif
582 
583 /*
584  * Default Router Selection (RFC 2461 6.3.6)
585  */
rt6_check_dev(struct rt6_info * rt,int oif)586 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
587 {
588 	struct net_device *dev = rt->dst.dev;
589 	if (!oif || dev->ifindex == oif)
590 		return 2;
591 	if ((dev->flags & IFF_LOOPBACK) &&
592 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
593 		return 1;
594 	return 0;
595 }
596 
rt6_check_neigh(struct rt6_info * rt)597 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
598 {
599 	struct neighbour *neigh;
600 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
601 
602 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
603 	    !(rt->rt6i_flags & RTF_GATEWAY))
604 		return RT6_NUD_SUCCEED;
605 
606 	rcu_read_lock_bh();
607 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
608 	if (neigh) {
609 		read_lock(&neigh->lock);
610 		if (neigh->nud_state & NUD_VALID)
611 			ret = RT6_NUD_SUCCEED;
612 #ifdef CONFIG_IPV6_ROUTER_PREF
613 		else if (!(neigh->nud_state & NUD_FAILED))
614 			ret = RT6_NUD_SUCCEED;
615 		else
616 			ret = RT6_NUD_FAIL_PROBE;
617 #endif
618 		read_unlock(&neigh->lock);
619 	} else {
620 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
621 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
622 	}
623 	rcu_read_unlock_bh();
624 
625 	return ret;
626 }
627 
rt6_score_route(struct rt6_info * rt,int oif,int strict)628 static int rt6_score_route(struct rt6_info *rt, int oif,
629 			   int strict)
630 {
631 	int m;
632 
633 	m = rt6_check_dev(rt, oif);
634 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
635 		return RT6_NUD_FAIL_HARD;
636 #ifdef CONFIG_IPV6_ROUTER_PREF
637 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
638 #endif
639 	if (strict & RT6_LOOKUP_F_REACHABLE) {
640 		int n = rt6_check_neigh(rt);
641 		if (n < 0)
642 			return n;
643 	}
644 	return m;
645 }
646 
find_match(struct rt6_info * rt,int oif,int strict,int * mpri,struct rt6_info * match,bool * do_rr)647 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
648 				   int *mpri, struct rt6_info *match,
649 				   bool *do_rr)
650 {
651 	int m;
652 	bool match_do_rr = false;
653 	struct inet6_dev *idev = rt->rt6i_idev;
654 	struct net_device *dev = rt->dst.dev;
655 
656 	if (dev && !netif_carrier_ok(dev) &&
657 	    idev->cnf.ignore_routes_with_linkdown)
658 		goto out;
659 
660 	if (rt6_check_expired(rt))
661 		goto out;
662 
663 	m = rt6_score_route(rt, oif, strict);
664 	if (m == RT6_NUD_FAIL_DO_RR) {
665 		match_do_rr = true;
666 		m = 0; /* lowest valid score */
667 	} else if (m == RT6_NUD_FAIL_HARD) {
668 		goto out;
669 	}
670 
671 	if (strict & RT6_LOOKUP_F_REACHABLE)
672 		rt6_probe(rt);
673 
674 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
675 	if (m > *mpri) {
676 		*do_rr = match_do_rr;
677 		*mpri = m;
678 		match = rt;
679 	}
680 out:
681 	return match;
682 }
683 
find_rr_leaf(struct fib6_node * fn,struct rt6_info * rr_head,u32 metric,int oif,int strict,bool * do_rr)684 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
685 				     struct rt6_info *rr_head,
686 				     u32 metric, int oif, int strict,
687 				     bool *do_rr)
688 {
689 	struct rt6_info *rt, *match, *cont;
690 	int mpri = -1;
691 
692 	match = NULL;
693 	cont = NULL;
694 	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
695 		if (rt->rt6i_metric != metric) {
696 			cont = rt;
697 			break;
698 		}
699 
700 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
701 	}
702 
703 	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
704 		if (rt->rt6i_metric != metric) {
705 			cont = rt;
706 			break;
707 		}
708 
709 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
710 	}
711 
712 	if (match || !cont)
713 		return match;
714 
715 	for (rt = cont; rt; rt = rt->dst.rt6_next)
716 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
717 
718 	return match;
719 }
720 
rt6_select(struct fib6_node * fn,int oif,int strict)721 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
722 {
723 	struct rt6_info *match, *rt0;
724 	struct net *net;
725 	bool do_rr = false;
726 
727 	rt0 = fn->rr_ptr;
728 	if (!rt0)
729 		fn->rr_ptr = rt0 = fn->leaf;
730 
731 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
732 			     &do_rr);
733 
734 	if (do_rr) {
735 		struct rt6_info *next = rt0->dst.rt6_next;
736 
737 		/* no entries matched; do round-robin */
738 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
739 			next = fn->leaf;
740 
741 		if (next != rt0)
742 			fn->rr_ptr = next;
743 	}
744 
745 	net = dev_net(rt0->dst.dev);
746 	return match ? match : net->ipv6.ip6_null_entry;
747 }
748 
rt6_is_gw_or_nonexthop(const struct rt6_info * rt)749 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
750 {
751 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
752 }
753 
754 #ifdef CONFIG_IPV6_ROUTE_INFO
rt6_route_rcv(struct net_device * dev,u8 * opt,int len,const struct in6_addr * gwaddr)755 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
756 		  const struct in6_addr *gwaddr)
757 {
758 	struct net *net = dev_net(dev);
759 	struct route_info *rinfo = (struct route_info *) opt;
760 	struct in6_addr prefix_buf, *prefix;
761 	unsigned int pref;
762 	unsigned long lifetime;
763 	struct rt6_info *rt;
764 
765 	if (len < sizeof(struct route_info)) {
766 		return -EINVAL;
767 	}
768 
769 	/* Sanity check for prefix_len and length */
770 	if (rinfo->length > 3) {
771 		return -EINVAL;
772 	} else if (rinfo->prefix_len > 128) {
773 		return -EINVAL;
774 	} else if (rinfo->prefix_len > 64) {
775 		if (rinfo->length < 2) {
776 			return -EINVAL;
777 		}
778 	} else if (rinfo->prefix_len > 0) {
779 		if (rinfo->length < 1) {
780 			return -EINVAL;
781 		}
782 	}
783 
784 	pref = rinfo->route_pref;
785 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
786 		return -EINVAL;
787 
788 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
789 
790 	if (rinfo->length == 3)
791 		prefix = (struct in6_addr *)rinfo->prefix;
792 	else {
793 		/* this function is safe */
794 		ipv6_addr_prefix(&prefix_buf,
795 				 (struct in6_addr *)rinfo->prefix,
796 				 rinfo->prefix_len);
797 		prefix = &prefix_buf;
798 	}
799 
800 	if (rinfo->prefix_len == 0)
801 		rt = rt6_get_dflt_router(gwaddr, dev);
802 	else
803 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
804 					gwaddr, dev->ifindex);
805 
806 	if (rt && !lifetime) {
807 		ip6_del_rt(rt);
808 		rt = NULL;
809 	}
810 
811 	if (!rt && lifetime)
812 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
813 					pref);
814 	else if (rt)
815 		rt->rt6i_flags = RTF_ROUTEINFO |
816 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
817 
818 	if (rt) {
819 		if (!addrconf_finite_timeout(lifetime))
820 			rt6_clean_expires(rt);
821 		else
822 			rt6_set_expires(rt, jiffies + HZ * lifetime);
823 
824 		ip6_rt_put(rt);
825 	}
826 	return 0;
827 }
828 #endif
829 
fib6_backtrack(struct fib6_node * fn,struct in6_addr * saddr)830 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
831 					struct in6_addr *saddr)
832 {
833 	struct fib6_node *pn;
834 	while (1) {
835 		if (fn->fn_flags & RTN_TL_ROOT)
836 			return NULL;
837 		pn = fn->parent;
838 		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
839 			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
840 		else
841 			fn = pn;
842 		if (fn->fn_flags & RTN_RTINFO)
843 			return fn;
844 	}
845 }
846 
ip6_pol_route_lookup(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)847 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
848 					     struct fib6_table *table,
849 					     struct flowi6 *fl6, int flags)
850 {
851 	struct fib6_node *fn;
852 	struct rt6_info *rt;
853 
854 	read_lock_bh(&table->tb6_lock);
855 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
856 restart:
857 	rt = fn->leaf;
858 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
859 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
860 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
861 	if (rt == net->ipv6.ip6_null_entry) {
862 		fn = fib6_backtrack(fn, &fl6->saddr);
863 		if (fn)
864 			goto restart;
865 	}
866 	dst_use(&rt->dst, jiffies);
867 	read_unlock_bh(&table->tb6_lock);
868 	return rt;
869 
870 }
871 
ip6_route_lookup(struct net * net,struct flowi6 * fl6,int flags)872 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
873 				    int flags)
874 {
875 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
876 }
877 EXPORT_SYMBOL_GPL(ip6_route_lookup);
878 
rt6_lookup(struct net * net,const struct in6_addr * daddr,const struct in6_addr * saddr,int oif,int strict)879 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
880 			    const struct in6_addr *saddr, int oif, int strict)
881 {
882 	struct flowi6 fl6 = {
883 		.flowi6_oif = oif,
884 		.daddr = *daddr,
885 	};
886 	struct dst_entry *dst;
887 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
888 
889 	if (saddr) {
890 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
891 		flags |= RT6_LOOKUP_F_HAS_SADDR;
892 	}
893 
894 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
895 	if (dst->error == 0)
896 		return (struct rt6_info *) dst;
897 
898 	dst_release(dst);
899 
900 	return NULL;
901 }
902 EXPORT_SYMBOL(rt6_lookup);
903 
904 /* ip6_ins_rt is called with FREE table->tb6_lock.
905    It takes new route entry, the addition fails by any reason the
906    route is freed. In any case, if caller does not hold it, it may
907    be destroyed.
908  */
909 
__ip6_ins_rt(struct rt6_info * rt,struct nl_info * info,struct mx6_config * mxc)910 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
911 			struct mx6_config *mxc)
912 {
913 	int err;
914 	struct fib6_table *table;
915 
916 	table = rt->rt6i_table;
917 	write_lock_bh(&table->tb6_lock);
918 	err = fib6_add(&table->tb6_root, rt, info, mxc);
919 	write_unlock_bh(&table->tb6_lock);
920 
921 	return err;
922 }
923 
ip6_ins_rt(struct rt6_info * rt)924 int ip6_ins_rt(struct rt6_info *rt)
925 {
926 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
927 	struct mx6_config mxc = { .mx = NULL, };
928 
929 	return __ip6_ins_rt(rt, &info, &mxc);
930 }
931 
ip6_rt_cache_alloc(struct rt6_info * ort,const struct in6_addr * daddr,const struct in6_addr * saddr)932 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
933 					   const struct in6_addr *daddr,
934 					   const struct in6_addr *saddr)
935 {
936 	struct rt6_info *rt;
937 
938 	/*
939 	 *	Clone the route.
940 	 */
941 
942 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
943 		ort = (struct rt6_info *)ort->dst.from;
944 
945 	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
946 
947 	if (!rt)
948 		return NULL;
949 
950 	ip6_rt_copy_init(rt, ort);
951 	rt->rt6i_flags |= RTF_CACHE;
952 	rt->rt6i_metric = 0;
953 	rt->dst.flags |= DST_HOST;
954 	rt->rt6i_dst.addr = *daddr;
955 	rt->rt6i_dst.plen = 128;
956 
957 	if (!rt6_is_gw_or_nonexthop(ort)) {
958 		if (ort->rt6i_dst.plen != 128 &&
959 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
960 			rt->rt6i_flags |= RTF_ANYCAST;
961 #ifdef CONFIG_IPV6_SUBTREES
962 		if (rt->rt6i_src.plen && saddr) {
963 			rt->rt6i_src.addr = *saddr;
964 			rt->rt6i_src.plen = 128;
965 		}
966 #endif
967 	}
968 
969 	return rt;
970 }
971 
ip6_rt_pcpu_alloc(struct rt6_info * rt)972 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
973 {
974 	struct rt6_info *pcpu_rt;
975 
976 	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
977 				  rt->dst.dev, rt->dst.flags);
978 
979 	if (!pcpu_rt)
980 		return NULL;
981 	ip6_rt_copy_init(pcpu_rt, rt);
982 	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
983 	pcpu_rt->rt6i_flags |= RTF_PCPU;
984 	return pcpu_rt;
985 }
986 
987 /* It should be called with read_lock_bh(&tb6_lock) acquired */
rt6_get_pcpu_route(struct rt6_info * rt)988 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
989 {
990 	struct rt6_info *pcpu_rt, **p;
991 
992 	p = this_cpu_ptr(rt->rt6i_pcpu);
993 	pcpu_rt = *p;
994 
995 	if (pcpu_rt) {
996 		dst_hold(&pcpu_rt->dst);
997 		rt6_dst_from_metrics_check(pcpu_rt);
998 	}
999 	return pcpu_rt;
1000 }
1001 
rt6_make_pcpu_route(struct rt6_info * rt)1002 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1003 {
1004 	struct fib6_table *table = rt->rt6i_table;
1005 	struct rt6_info *pcpu_rt, *prev, **p;
1006 
1007 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1008 	if (!pcpu_rt) {
1009 		struct net *net = dev_net(rt->dst.dev);
1010 
1011 		dst_hold(&net->ipv6.ip6_null_entry->dst);
1012 		return net->ipv6.ip6_null_entry;
1013 	}
1014 
1015 	read_lock_bh(&table->tb6_lock);
1016 	if (rt->rt6i_pcpu) {
1017 		p = this_cpu_ptr(rt->rt6i_pcpu);
1018 		prev = cmpxchg(p, NULL, pcpu_rt);
1019 		if (prev) {
1020 			/* If someone did it before us, return prev instead */
1021 			dst_destroy(&pcpu_rt->dst);
1022 			pcpu_rt = prev;
1023 		}
1024 	} else {
1025 		/* rt has been removed from the fib6 tree
1026 		 * before we have a chance to acquire the read_lock.
1027 		 * In this case, don't brother to create a pcpu rt
1028 		 * since rt is going away anyway.  The next
1029 		 * dst_check() will trigger a re-lookup.
1030 		 */
1031 		dst_destroy(&pcpu_rt->dst);
1032 		pcpu_rt = rt;
1033 	}
1034 	dst_hold(&pcpu_rt->dst);
1035 	rt6_dst_from_metrics_check(pcpu_rt);
1036 	read_unlock_bh(&table->tb6_lock);
1037 	return pcpu_rt;
1038 }
1039 
ip6_pol_route(struct net * net,struct fib6_table * table,int oif,struct flowi6 * fl6,int flags)1040 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1041 				      struct flowi6 *fl6, int flags)
1042 {
1043 	struct fib6_node *fn, *saved_fn;
1044 	struct rt6_info *rt;
1045 	int strict = 0;
1046 
1047 	strict |= flags & RT6_LOOKUP_F_IFACE;
1048 	if (net->ipv6.devconf_all->forwarding == 0)
1049 		strict |= RT6_LOOKUP_F_REACHABLE;
1050 
1051 	read_lock_bh(&table->tb6_lock);
1052 
1053 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1054 	saved_fn = fn;
1055 
1056 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1057 		oif = 0;
1058 
1059 redo_rt6_select:
1060 	rt = rt6_select(fn, oif, strict);
1061 	if (rt->rt6i_nsiblings)
1062 		rt = rt6_multipath_select(rt, fl6, oif, strict);
1063 	if (rt == net->ipv6.ip6_null_entry) {
1064 		fn = fib6_backtrack(fn, &fl6->saddr);
1065 		if (fn)
1066 			goto redo_rt6_select;
1067 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1068 			/* also consider unreachable route */
1069 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1070 			fn = saved_fn;
1071 			goto redo_rt6_select;
1072 		}
1073 	}
1074 
1075 
1076 	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1077 		dst_use(&rt->dst, jiffies);
1078 		read_unlock_bh(&table->tb6_lock);
1079 
1080 		rt6_dst_from_metrics_check(rt);
1081 		return rt;
1082 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1083 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1084 		/* Create a RTF_CACHE clone which will not be
1085 		 * owned by the fib6 tree.  It is for the special case where
1086 		 * the daddr in the skb during the neighbor look-up is different
1087 		 * from the fl6->daddr used to look-up route here.
1088 		 */
1089 
1090 		struct rt6_info *uncached_rt;
1091 
1092 		dst_use(&rt->dst, jiffies);
1093 		read_unlock_bh(&table->tb6_lock);
1094 
1095 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1096 		dst_release(&rt->dst);
1097 
1098 		if (uncached_rt)
1099 			rt6_uncached_list_add(uncached_rt);
1100 		else
1101 			uncached_rt = net->ipv6.ip6_null_entry;
1102 
1103 		dst_hold(&uncached_rt->dst);
1104 		return uncached_rt;
1105 
1106 	} else {
1107 		/* Get a percpu copy */
1108 
1109 		struct rt6_info *pcpu_rt;
1110 
1111 		rt->dst.lastuse = jiffies;
1112 		rt->dst.__use++;
1113 		pcpu_rt = rt6_get_pcpu_route(rt);
1114 
1115 		if (pcpu_rt) {
1116 			read_unlock_bh(&table->tb6_lock);
1117 		} else {
1118 			/* We have to do the read_unlock first
1119 			 * because rt6_make_pcpu_route() may trigger
1120 			 * ip6_dst_gc() which will take the write_lock.
1121 			 */
1122 			dst_hold(&rt->dst);
1123 			read_unlock_bh(&table->tb6_lock);
1124 			pcpu_rt = rt6_make_pcpu_route(rt);
1125 			dst_release(&rt->dst);
1126 		}
1127 
1128 		return pcpu_rt;
1129 
1130 	}
1131 }
1132 
ip6_pol_route_input(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)1133 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1134 					    struct flowi6 *fl6, int flags)
1135 {
1136 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1137 }
1138 
ip6_route_input_lookup(struct net * net,struct net_device * dev,struct flowi6 * fl6,int flags)1139 static struct dst_entry *ip6_route_input_lookup(struct net *net,
1140 						struct net_device *dev,
1141 						struct flowi6 *fl6, int flags)
1142 {
1143 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1144 		flags |= RT6_LOOKUP_F_IFACE;
1145 
1146 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1147 }
1148 
ip6_route_input(struct sk_buff * skb)1149 void ip6_route_input(struct sk_buff *skb)
1150 {
1151 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1152 	struct net *net = dev_net(skb->dev);
1153 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1154 	struct ip_tunnel_info *tun_info;
1155 	struct flowi6 fl6 = {
1156 		.flowi6_iif = l3mdev_fib_oif(skb->dev),
1157 		.daddr = iph->daddr,
1158 		.saddr = iph->saddr,
1159 		.flowlabel = ip6_flowinfo(iph),
1160 		.flowi6_mark = skb->mark,
1161 		.flowi6_proto = iph->nexthdr,
1162 	};
1163 
1164 	tun_info = skb_tunnel_info(skb);
1165 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1166 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1167 	skb_dst_drop(skb);
1168 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1169 }
1170 
ip6_pol_route_output(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)1171 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1172 					     struct flowi6 *fl6, int flags)
1173 {
1174 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1175 }
1176 
ip6_route_output_flags(struct net * net,const struct sock * sk,struct flowi6 * fl6,int flags)1177 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1178 					 struct flowi6 *fl6, int flags)
1179 {
1180 	struct dst_entry *dst;
1181 	bool any_src;
1182 
1183 	dst = l3mdev_rt6_dst_by_oif(net, fl6);
1184 	if (dst)
1185 		return dst;
1186 
1187 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1188 
1189 	any_src = ipv6_addr_any(&fl6->saddr);
1190 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1191 	    (fl6->flowi6_oif && any_src))
1192 		flags |= RT6_LOOKUP_F_IFACE;
1193 
1194 	if (!any_src)
1195 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1196 	else if (sk)
1197 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1198 
1199 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1200 }
1201 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1202 
ip6_blackhole_route(struct net * net,struct dst_entry * dst_orig)1203 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1204 {
1205 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1206 	struct dst_entry *new = NULL;
1207 
1208 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1209 	if (rt) {
1210 		rt6_info_init(rt);
1211 
1212 		new = &rt->dst;
1213 		new->__use = 1;
1214 		new->input = dst_discard;
1215 		new->output = dst_discard_out;
1216 
1217 		dst_copy_metrics(new, &ort->dst);
1218 		rt->rt6i_idev = ort->rt6i_idev;
1219 		if (rt->rt6i_idev)
1220 			in6_dev_hold(rt->rt6i_idev);
1221 
1222 		rt->rt6i_gateway = ort->rt6i_gateway;
1223 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1224 		rt->rt6i_metric = 0;
1225 
1226 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1227 #ifdef CONFIG_IPV6_SUBTREES
1228 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1229 #endif
1230 
1231 		dst_free(new);
1232 	}
1233 
1234 	dst_release(dst_orig);
1235 	return new ? new : ERR_PTR(-ENOMEM);
1236 }
1237 
1238 /*
1239  *	Destination cache support functions
1240  */
1241 
rt6_dst_from_metrics_check(struct rt6_info * rt)1242 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1243 {
1244 	if (rt->dst.from &&
1245 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1246 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1247 }
1248 
rt6_check(struct rt6_info * rt,u32 cookie)1249 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1250 {
1251 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1252 		return NULL;
1253 
1254 	if (rt6_check_expired(rt))
1255 		return NULL;
1256 
1257 	return &rt->dst;
1258 }
1259 
rt6_dst_from_check(struct rt6_info * rt,u32 cookie)1260 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1261 {
1262 	if (!__rt6_check_expired(rt) &&
1263 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1264 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1265 		return &rt->dst;
1266 	else
1267 		return NULL;
1268 }
1269 
ip6_dst_check(struct dst_entry * dst,u32 cookie)1270 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1271 {
1272 	struct rt6_info *rt;
1273 
1274 	rt = (struct rt6_info *) dst;
1275 
1276 	/* All IPV6 dsts are created with ->obsolete set to the value
1277 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1278 	 * into this function always.
1279 	 */
1280 
1281 	rt6_dst_from_metrics_check(rt);
1282 
1283 	if (rt->rt6i_flags & RTF_PCPU ||
1284 	    (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1285 		return rt6_dst_from_check(rt, cookie);
1286 	else
1287 		return rt6_check(rt, cookie);
1288 }
1289 
ip6_negative_advice(struct dst_entry * dst)1290 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1291 {
1292 	struct rt6_info *rt = (struct rt6_info *) dst;
1293 
1294 	if (rt) {
1295 		if (rt->rt6i_flags & RTF_CACHE) {
1296 			if (rt6_check_expired(rt)) {
1297 				ip6_del_rt(rt);
1298 				dst = NULL;
1299 			}
1300 		} else {
1301 			dst_release(dst);
1302 			dst = NULL;
1303 		}
1304 	}
1305 	return dst;
1306 }
1307 
ip6_link_failure(struct sk_buff * skb)1308 static void ip6_link_failure(struct sk_buff *skb)
1309 {
1310 	struct rt6_info *rt;
1311 
1312 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1313 
1314 	rt = (struct rt6_info *) skb_dst(skb);
1315 	if (rt) {
1316 		if (rt->rt6i_flags & RTF_CACHE) {
1317 			dst_hold(&rt->dst);
1318 			ip6_del_rt(rt);
1319 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1320 			rt->rt6i_node->fn_sernum = -1;
1321 		}
1322 	}
1323 }
1324 
rt6_do_update_pmtu(struct rt6_info * rt,u32 mtu)1325 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1326 {
1327 	struct net *net = dev_net(rt->dst.dev);
1328 
1329 	rt->rt6i_flags |= RTF_MODIFIED;
1330 	rt->rt6i_pmtu = mtu;
1331 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1332 }
1333 
rt6_cache_allowed_for_pmtu(const struct rt6_info * rt)1334 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1335 {
1336 	return !(rt->rt6i_flags & RTF_CACHE) &&
1337 		(rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1338 }
1339 
__ip6_rt_update_pmtu(struct dst_entry * dst,const struct sock * sk,const struct ipv6hdr * iph,u32 mtu)1340 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1341 				 const struct ipv6hdr *iph, u32 mtu)
1342 {
1343 	struct rt6_info *rt6 = (struct rt6_info *)dst;
1344 
1345 	if (rt6->rt6i_flags & RTF_LOCAL)
1346 		return;
1347 
1348 	dst_confirm(dst);
1349 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1350 	if (mtu >= dst_mtu(dst))
1351 		return;
1352 
1353 	if (!rt6_cache_allowed_for_pmtu(rt6)) {
1354 		rt6_do_update_pmtu(rt6, mtu);
1355 	} else {
1356 		const struct in6_addr *daddr, *saddr;
1357 		struct rt6_info *nrt6;
1358 
1359 		if (iph) {
1360 			daddr = &iph->daddr;
1361 			saddr = &iph->saddr;
1362 		} else if (sk) {
1363 			daddr = &sk->sk_v6_daddr;
1364 			saddr = &inet6_sk(sk)->saddr;
1365 		} else {
1366 			return;
1367 		}
1368 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1369 		if (nrt6) {
1370 			rt6_do_update_pmtu(nrt6, mtu);
1371 
1372 			/* ip6_ins_rt(nrt6) will bump the
1373 			 * rt6->rt6i_node->fn_sernum
1374 			 * which will fail the next rt6_check() and
1375 			 * invalidate the sk->sk_dst_cache.
1376 			 */
1377 			ip6_ins_rt(nrt6);
1378 		}
1379 	}
1380 }
1381 
ip6_rt_update_pmtu(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb,u32 mtu)1382 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1383 			       struct sk_buff *skb, u32 mtu)
1384 {
1385 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1386 }
1387 
ip6_update_pmtu(struct sk_buff * skb,struct net * net,__be32 mtu,int oif,u32 mark)1388 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1389 		     int oif, u32 mark)
1390 {
1391 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1392 	struct dst_entry *dst;
1393 	struct flowi6 fl6;
1394 
1395 	memset(&fl6, 0, sizeof(fl6));
1396 	fl6.flowi6_oif = oif;
1397 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1398 	fl6.daddr = iph->daddr;
1399 	fl6.saddr = iph->saddr;
1400 	fl6.flowlabel = ip6_flowinfo(iph);
1401 
1402 	dst = ip6_route_output(net, NULL, &fl6);
1403 	if (!dst->error)
1404 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1405 	dst_release(dst);
1406 }
1407 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1408 
ip6_sk_update_pmtu(struct sk_buff * skb,struct sock * sk,__be32 mtu)1409 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1410 {
1411 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1412 			sk->sk_bound_dev_if, sk->sk_mark);
1413 }
1414 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1415 
1416 /* Handle redirects */
1417 struct ip6rd_flowi {
1418 	struct flowi6 fl6;
1419 	struct in6_addr gateway;
1420 };
1421 
__ip6_route_redirect(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)1422 static struct rt6_info *__ip6_route_redirect(struct net *net,
1423 					     struct fib6_table *table,
1424 					     struct flowi6 *fl6,
1425 					     int flags)
1426 {
1427 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1428 	struct rt6_info *rt;
1429 	struct fib6_node *fn;
1430 
1431 	/* Get the "current" route for this destination and
1432 	 * check if the redirect has come from approriate router.
1433 	 *
1434 	 * RFC 4861 specifies that redirects should only be
1435 	 * accepted if they come from the nexthop to the target.
1436 	 * Due to the way the routes are chosen, this notion
1437 	 * is a bit fuzzy and one might need to check all possible
1438 	 * routes.
1439 	 */
1440 
1441 	read_lock_bh(&table->tb6_lock);
1442 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1443 restart:
1444 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1445 		if (rt6_check_expired(rt))
1446 			continue;
1447 		if (rt->dst.error)
1448 			break;
1449 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1450 			continue;
1451 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1452 			continue;
1453 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1454 			continue;
1455 		break;
1456 	}
1457 
1458 	if (!rt)
1459 		rt = net->ipv6.ip6_null_entry;
1460 	else if (rt->dst.error) {
1461 		rt = net->ipv6.ip6_null_entry;
1462 		goto out;
1463 	}
1464 
1465 	if (rt == net->ipv6.ip6_null_entry) {
1466 		fn = fib6_backtrack(fn, &fl6->saddr);
1467 		if (fn)
1468 			goto restart;
1469 	}
1470 
1471 out:
1472 	dst_hold(&rt->dst);
1473 
1474 	read_unlock_bh(&table->tb6_lock);
1475 
1476 	return rt;
1477 };
1478 
ip6_route_redirect(struct net * net,const struct flowi6 * fl6,const struct in6_addr * gateway)1479 static struct dst_entry *ip6_route_redirect(struct net *net,
1480 					const struct flowi6 *fl6,
1481 					const struct in6_addr *gateway)
1482 {
1483 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1484 	struct ip6rd_flowi rdfl;
1485 
1486 	rdfl.fl6 = *fl6;
1487 	rdfl.gateway = *gateway;
1488 
1489 	return fib6_rule_lookup(net, &rdfl.fl6,
1490 				flags, __ip6_route_redirect);
1491 }
1492 
ip6_redirect(struct sk_buff * skb,struct net * net,int oif,u32 mark)1493 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1494 {
1495 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1496 	struct dst_entry *dst;
1497 	struct flowi6 fl6;
1498 
1499 	memset(&fl6, 0, sizeof(fl6));
1500 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1501 	fl6.flowi6_oif = oif;
1502 	fl6.flowi6_mark = mark;
1503 	fl6.daddr = iph->daddr;
1504 	fl6.saddr = iph->saddr;
1505 	fl6.flowlabel = ip6_flowinfo(iph);
1506 
1507 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1508 	rt6_do_redirect(dst, NULL, skb);
1509 	dst_release(dst);
1510 }
1511 EXPORT_SYMBOL_GPL(ip6_redirect);
1512 
ip6_redirect_no_header(struct sk_buff * skb,struct net * net,int oif,u32 mark)1513 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1514 			    u32 mark)
1515 {
1516 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1517 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1518 	struct dst_entry *dst;
1519 	struct flowi6 fl6;
1520 
1521 	memset(&fl6, 0, sizeof(fl6));
1522 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1523 	fl6.flowi6_oif = oif;
1524 	fl6.flowi6_mark = mark;
1525 	fl6.daddr = msg->dest;
1526 	fl6.saddr = iph->daddr;
1527 
1528 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1529 	rt6_do_redirect(dst, NULL, skb);
1530 	dst_release(dst);
1531 }
1532 
ip6_sk_redirect(struct sk_buff * skb,struct sock * sk)1533 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1534 {
1535 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1536 }
1537 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1538 
ip6_default_advmss(const struct dst_entry * dst)1539 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1540 {
1541 	struct net_device *dev = dst->dev;
1542 	unsigned int mtu = dst_mtu(dst);
1543 	struct net *net = dev_net(dev);
1544 
1545 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1546 
1547 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1548 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1549 
1550 	/*
1551 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1552 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1553 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1554 	 * rely only on pmtu discovery"
1555 	 */
1556 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1557 		mtu = IPV6_MAXPLEN;
1558 	return mtu;
1559 }
1560 
ip6_mtu(const struct dst_entry * dst)1561 static unsigned int ip6_mtu(const struct dst_entry *dst)
1562 {
1563 	const struct rt6_info *rt = (const struct rt6_info *)dst;
1564 	unsigned int mtu = rt->rt6i_pmtu;
1565 	struct inet6_dev *idev;
1566 
1567 	if (mtu)
1568 		goto out;
1569 
1570 	mtu = dst_metric_raw(dst, RTAX_MTU);
1571 	if (mtu)
1572 		goto out;
1573 
1574 	mtu = IPV6_MIN_MTU;
1575 
1576 	rcu_read_lock();
1577 	idev = __in6_dev_get(dst->dev);
1578 	if (idev)
1579 		mtu = idev->cnf.mtu6;
1580 	rcu_read_unlock();
1581 
1582 out:
1583 	return min_t(unsigned int, mtu, IP6_MAX_MTU);
1584 }
1585 
1586 static struct dst_entry *icmp6_dst_gc_list;
1587 static DEFINE_SPINLOCK(icmp6_dst_lock);
1588 
icmp6_dst_alloc(struct net_device * dev,struct flowi6 * fl6)1589 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1590 				  struct flowi6 *fl6)
1591 {
1592 	struct dst_entry *dst;
1593 	struct rt6_info *rt;
1594 	struct inet6_dev *idev = in6_dev_get(dev);
1595 	struct net *net = dev_net(dev);
1596 
1597 	if (unlikely(!idev))
1598 		return ERR_PTR(-ENODEV);
1599 
1600 	rt = ip6_dst_alloc(net, dev, 0);
1601 	if (unlikely(!rt)) {
1602 		in6_dev_put(idev);
1603 		dst = ERR_PTR(-ENOMEM);
1604 		goto out;
1605 	}
1606 
1607 	rt->dst.flags |= DST_HOST;
1608 	rt->dst.output  = ip6_output;
1609 	atomic_set(&rt->dst.__refcnt, 1);
1610 	rt->rt6i_gateway  = fl6->daddr;
1611 	rt->rt6i_dst.addr = fl6->daddr;
1612 	rt->rt6i_dst.plen = 128;
1613 	rt->rt6i_idev     = idev;
1614 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1615 
1616 	spin_lock_bh(&icmp6_dst_lock);
1617 	rt->dst.next = icmp6_dst_gc_list;
1618 	icmp6_dst_gc_list = &rt->dst;
1619 	spin_unlock_bh(&icmp6_dst_lock);
1620 
1621 	fib6_force_start_gc(net);
1622 
1623 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1624 
1625 out:
1626 	return dst;
1627 }
1628 
icmp6_dst_gc(void)1629 int icmp6_dst_gc(void)
1630 {
1631 	struct dst_entry *dst, **pprev;
1632 	int more = 0;
1633 
1634 	spin_lock_bh(&icmp6_dst_lock);
1635 	pprev = &icmp6_dst_gc_list;
1636 
1637 	while ((dst = *pprev) != NULL) {
1638 		if (!atomic_read(&dst->__refcnt)) {
1639 			*pprev = dst->next;
1640 			dst_free(dst);
1641 		} else {
1642 			pprev = &dst->next;
1643 			++more;
1644 		}
1645 	}
1646 
1647 	spin_unlock_bh(&icmp6_dst_lock);
1648 
1649 	return more;
1650 }
1651 
icmp6_clean_all(int (* func)(struct rt6_info * rt,void * arg),void * arg)1652 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1653 			    void *arg)
1654 {
1655 	struct dst_entry *dst, **pprev;
1656 
1657 	spin_lock_bh(&icmp6_dst_lock);
1658 	pprev = &icmp6_dst_gc_list;
1659 	while ((dst = *pprev) != NULL) {
1660 		struct rt6_info *rt = (struct rt6_info *) dst;
1661 		if (func(rt, arg)) {
1662 			*pprev = dst->next;
1663 			dst_free(dst);
1664 		} else {
1665 			pprev = &dst->next;
1666 		}
1667 	}
1668 	spin_unlock_bh(&icmp6_dst_lock);
1669 }
1670 
ip6_dst_gc(struct dst_ops * ops)1671 static int ip6_dst_gc(struct dst_ops *ops)
1672 {
1673 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1674 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1675 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1676 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1677 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1678 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1679 	int entries;
1680 
1681 	entries = dst_entries_get_fast(ops);
1682 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1683 	    entries <= rt_max_size)
1684 		goto out;
1685 
1686 	net->ipv6.ip6_rt_gc_expire++;
1687 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1688 	entries = dst_entries_get_slow(ops);
1689 	if (entries < ops->gc_thresh)
1690 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1691 out:
1692 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1693 	return entries > rt_max_size;
1694 }
1695 
ip6_convert_metrics(struct mx6_config * mxc,const struct fib6_config * cfg)1696 static int ip6_convert_metrics(struct mx6_config *mxc,
1697 			       const struct fib6_config *cfg)
1698 {
1699 	bool ecn_ca = false;
1700 	struct nlattr *nla;
1701 	int remaining;
1702 	u32 *mp;
1703 
1704 	if (!cfg->fc_mx)
1705 		return 0;
1706 
1707 	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1708 	if (unlikely(!mp))
1709 		return -ENOMEM;
1710 
1711 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1712 		int type = nla_type(nla);
1713 		u32 val;
1714 
1715 		if (!type)
1716 			continue;
1717 		if (unlikely(type > RTAX_MAX))
1718 			goto err;
1719 
1720 		if (type == RTAX_CC_ALGO) {
1721 			char tmp[TCP_CA_NAME_MAX];
1722 
1723 			nla_strlcpy(tmp, nla, sizeof(tmp));
1724 			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1725 			if (val == TCP_CA_UNSPEC)
1726 				goto err;
1727 		} else {
1728 			val = nla_get_u32(nla);
1729 		}
1730 		if (type == RTAX_HOPLIMIT && val > 255)
1731 			val = 255;
1732 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1733 			goto err;
1734 
1735 		mp[type - 1] = val;
1736 		__set_bit(type - 1, mxc->mx_valid);
1737 	}
1738 
1739 	if (ecn_ca) {
1740 		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1741 		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1742 	}
1743 
1744 	mxc->mx = mp;
1745 	return 0;
1746  err:
1747 	kfree(mp);
1748 	return -EINVAL;
1749 }
1750 
ip6_route_info_create(struct fib6_config * cfg)1751 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1752 {
1753 	struct net *net = cfg->fc_nlinfo.nl_net;
1754 	struct rt6_info *rt = NULL;
1755 	struct net_device *dev = NULL;
1756 	struct inet6_dev *idev = NULL;
1757 	struct fib6_table *table;
1758 	int addr_type;
1759 	int err = -EINVAL;
1760 
1761 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1762 		goto out;
1763 #ifndef CONFIG_IPV6_SUBTREES
1764 	if (cfg->fc_src_len)
1765 		goto out;
1766 #endif
1767 	if (cfg->fc_ifindex) {
1768 		err = -ENODEV;
1769 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1770 		if (!dev)
1771 			goto out;
1772 		idev = in6_dev_get(dev);
1773 		if (!idev)
1774 			goto out;
1775 	}
1776 
1777 	if (cfg->fc_metric == 0)
1778 		cfg->fc_metric = IP6_RT_PRIO_USER;
1779 
1780 	err = -ENOBUFS;
1781 	if (cfg->fc_nlinfo.nlh &&
1782 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1783 		table = fib6_get_table(net, cfg->fc_table);
1784 		if (!table) {
1785 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1786 			table = fib6_new_table(net, cfg->fc_table);
1787 		}
1788 	} else {
1789 		table = fib6_new_table(net, cfg->fc_table);
1790 	}
1791 
1792 	if (!table)
1793 		goto out;
1794 
1795 	rt = ip6_dst_alloc(net, NULL,
1796 			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1797 
1798 	if (!rt) {
1799 		err = -ENOMEM;
1800 		goto out;
1801 	}
1802 
1803 	if (cfg->fc_flags & RTF_EXPIRES)
1804 		rt6_set_expires(rt, jiffies +
1805 				clock_t_to_jiffies(cfg->fc_expires));
1806 	else
1807 		rt6_clean_expires(rt);
1808 
1809 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1810 		cfg->fc_protocol = RTPROT_BOOT;
1811 	rt->rt6i_protocol = cfg->fc_protocol;
1812 
1813 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1814 
1815 	if (addr_type & IPV6_ADDR_MULTICAST)
1816 		rt->dst.input = ip6_mc_input;
1817 	else if (cfg->fc_flags & RTF_LOCAL)
1818 		rt->dst.input = ip6_input;
1819 	else
1820 		rt->dst.input = ip6_forward;
1821 
1822 	rt->dst.output = ip6_output;
1823 
1824 	if (cfg->fc_encap) {
1825 		struct lwtunnel_state *lwtstate;
1826 
1827 		err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1828 					   cfg->fc_encap, AF_INET6, cfg,
1829 					   &lwtstate);
1830 		if (err)
1831 			goto out;
1832 		rt->dst.lwtstate = lwtstate_get(lwtstate);
1833 		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1834 			rt->dst.lwtstate->orig_output = rt->dst.output;
1835 			rt->dst.output = lwtunnel_output;
1836 		}
1837 		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1838 			rt->dst.lwtstate->orig_input = rt->dst.input;
1839 			rt->dst.input = lwtunnel_input;
1840 		}
1841 	}
1842 
1843 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1844 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1845 	if (rt->rt6i_dst.plen == 128)
1846 		rt->dst.flags |= DST_HOST;
1847 
1848 #ifdef CONFIG_IPV6_SUBTREES
1849 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1850 	rt->rt6i_src.plen = cfg->fc_src_len;
1851 #endif
1852 
1853 	rt->rt6i_metric = cfg->fc_metric;
1854 
1855 	/* We cannot add true routes via loopback here,
1856 	   they would result in kernel looping; promote them to reject routes
1857 	 */
1858 	if ((cfg->fc_flags & RTF_REJECT) ||
1859 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1860 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1861 	     !(cfg->fc_flags & RTF_LOCAL))) {
1862 		/* hold loopback dev/idev if we haven't done so. */
1863 		if (dev != net->loopback_dev) {
1864 			if (dev) {
1865 				dev_put(dev);
1866 				in6_dev_put(idev);
1867 			}
1868 			dev = net->loopback_dev;
1869 			dev_hold(dev);
1870 			idev = in6_dev_get(dev);
1871 			if (!idev) {
1872 				err = -ENODEV;
1873 				goto out;
1874 			}
1875 		}
1876 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1877 		switch (cfg->fc_type) {
1878 		case RTN_BLACKHOLE:
1879 			rt->dst.error = -EINVAL;
1880 			rt->dst.output = dst_discard_out;
1881 			rt->dst.input = dst_discard;
1882 			break;
1883 		case RTN_PROHIBIT:
1884 			rt->dst.error = -EACCES;
1885 			rt->dst.output = ip6_pkt_prohibit_out;
1886 			rt->dst.input = ip6_pkt_prohibit;
1887 			break;
1888 		case RTN_THROW:
1889 		case RTN_UNREACHABLE:
1890 		default:
1891 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1892 					: (cfg->fc_type == RTN_UNREACHABLE)
1893 					? -EHOSTUNREACH : -ENETUNREACH;
1894 			rt->dst.output = ip6_pkt_discard_out;
1895 			rt->dst.input = ip6_pkt_discard;
1896 			break;
1897 		}
1898 		goto install_route;
1899 	}
1900 
1901 	if (cfg->fc_flags & RTF_GATEWAY) {
1902 		const struct in6_addr *gw_addr;
1903 		int gwa_type;
1904 
1905 		gw_addr = &cfg->fc_gateway;
1906 		gwa_type = ipv6_addr_type(gw_addr);
1907 
1908 		/* if gw_addr is local we will fail to detect this in case
1909 		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1910 		 * will return already-added prefix route via interface that
1911 		 * prefix route was assigned to, which might be non-loopback.
1912 		 */
1913 		err = -EINVAL;
1914 		if (ipv6_chk_addr_and_flags(net, gw_addr,
1915 					    gwa_type & IPV6_ADDR_LINKLOCAL ?
1916 					    dev : NULL, 0, 0))
1917 			goto out;
1918 
1919 		rt->rt6i_gateway = *gw_addr;
1920 
1921 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1922 			struct rt6_info *grt;
1923 
1924 			/* IPv6 strictly inhibits using not link-local
1925 			   addresses as nexthop address.
1926 			   Otherwise, router will not able to send redirects.
1927 			   It is very good, but in some (rare!) circumstances
1928 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1929 			   some exceptions. --ANK
1930 			 */
1931 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1932 				goto out;
1933 
1934 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1935 
1936 			err = -EHOSTUNREACH;
1937 			if (!grt)
1938 				goto out;
1939 			if (dev) {
1940 				if (dev != grt->dst.dev) {
1941 					ip6_rt_put(grt);
1942 					goto out;
1943 				}
1944 			} else {
1945 				dev = grt->dst.dev;
1946 				idev = grt->rt6i_idev;
1947 				dev_hold(dev);
1948 				in6_dev_hold(grt->rt6i_idev);
1949 			}
1950 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1951 				err = 0;
1952 			ip6_rt_put(grt);
1953 
1954 			if (err)
1955 				goto out;
1956 		}
1957 		err = -EINVAL;
1958 		if (!dev || (dev->flags & IFF_LOOPBACK))
1959 			goto out;
1960 	}
1961 
1962 	err = -ENODEV;
1963 	if (!dev)
1964 		goto out;
1965 
1966 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1967 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1968 			err = -EINVAL;
1969 			goto out;
1970 		}
1971 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1972 		rt->rt6i_prefsrc.plen = 128;
1973 	} else
1974 		rt->rt6i_prefsrc.plen = 0;
1975 
1976 	rt->rt6i_flags = cfg->fc_flags;
1977 
1978 install_route:
1979 	rt->dst.dev = dev;
1980 	rt->rt6i_idev = idev;
1981 	rt->rt6i_table = table;
1982 
1983 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1984 
1985 	return rt;
1986 out:
1987 	if (dev)
1988 		dev_put(dev);
1989 	if (idev)
1990 		in6_dev_put(idev);
1991 	if (rt)
1992 		dst_free(&rt->dst);
1993 
1994 	return ERR_PTR(err);
1995 }
1996 
ip6_route_add(struct fib6_config * cfg)1997 int ip6_route_add(struct fib6_config *cfg)
1998 {
1999 	struct mx6_config mxc = { .mx = NULL, };
2000 	struct rt6_info *rt;
2001 	int err;
2002 
2003 	rt = ip6_route_info_create(cfg);
2004 	if (IS_ERR(rt)) {
2005 		err = PTR_ERR(rt);
2006 		rt = NULL;
2007 		goto out;
2008 	}
2009 
2010 	err = ip6_convert_metrics(&mxc, cfg);
2011 	if (err)
2012 		goto out;
2013 
2014 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2015 
2016 	kfree(mxc.mx);
2017 
2018 	return err;
2019 out:
2020 	if (rt)
2021 		dst_free(&rt->dst);
2022 
2023 	return err;
2024 }
2025 
__ip6_del_rt(struct rt6_info * rt,struct nl_info * info)2026 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2027 {
2028 	int err;
2029 	struct fib6_table *table;
2030 	struct net *net = dev_net(rt->dst.dev);
2031 
2032 	if (rt == net->ipv6.ip6_null_entry ||
2033 	    rt->dst.flags & DST_NOCACHE) {
2034 		err = -ENOENT;
2035 		goto out;
2036 	}
2037 
2038 	table = rt->rt6i_table;
2039 	write_lock_bh(&table->tb6_lock);
2040 	err = fib6_del(rt, info);
2041 	write_unlock_bh(&table->tb6_lock);
2042 
2043 out:
2044 	ip6_rt_put(rt);
2045 	return err;
2046 }
2047 
ip6_del_rt(struct rt6_info * rt)2048 int ip6_del_rt(struct rt6_info *rt)
2049 {
2050 	struct nl_info info = {
2051 		.nl_net = dev_net(rt->dst.dev),
2052 	};
2053 	return __ip6_del_rt(rt, &info);
2054 }
2055 
ip6_route_del(struct fib6_config * cfg)2056 static int ip6_route_del(struct fib6_config *cfg)
2057 {
2058 	struct fib6_table *table;
2059 	struct fib6_node *fn;
2060 	struct rt6_info *rt;
2061 	int err = -ESRCH;
2062 
2063 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2064 	if (!table)
2065 		return err;
2066 
2067 	read_lock_bh(&table->tb6_lock);
2068 
2069 	fn = fib6_locate(&table->tb6_root,
2070 			 &cfg->fc_dst, cfg->fc_dst_len,
2071 			 &cfg->fc_src, cfg->fc_src_len);
2072 
2073 	if (fn) {
2074 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2075 			if ((rt->rt6i_flags & RTF_CACHE) &&
2076 			    !(cfg->fc_flags & RTF_CACHE))
2077 				continue;
2078 			if (cfg->fc_ifindex &&
2079 			    (!rt->dst.dev ||
2080 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2081 				continue;
2082 			if (cfg->fc_flags & RTF_GATEWAY &&
2083 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2084 				continue;
2085 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2086 				continue;
2087 			dst_hold(&rt->dst);
2088 			read_unlock_bh(&table->tb6_lock);
2089 
2090 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2091 		}
2092 	}
2093 	read_unlock_bh(&table->tb6_lock);
2094 
2095 	return err;
2096 }
2097 
rt6_do_redirect(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb)2098 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2099 {
2100 	struct netevent_redirect netevent;
2101 	struct rt6_info *rt, *nrt = NULL;
2102 	struct ndisc_options ndopts;
2103 	struct inet6_dev *in6_dev;
2104 	struct neighbour *neigh;
2105 	struct rd_msg *msg;
2106 	int optlen, on_link;
2107 	u8 *lladdr;
2108 
2109 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2110 	optlen -= sizeof(*msg);
2111 
2112 	if (optlen < 0) {
2113 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2114 		return;
2115 	}
2116 
2117 	msg = (struct rd_msg *)icmp6_hdr(skb);
2118 
2119 	if (ipv6_addr_is_multicast(&msg->dest)) {
2120 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2121 		return;
2122 	}
2123 
2124 	on_link = 0;
2125 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2126 		on_link = 1;
2127 	} else if (ipv6_addr_type(&msg->target) !=
2128 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2129 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2130 		return;
2131 	}
2132 
2133 	in6_dev = __in6_dev_get(skb->dev);
2134 	if (!in6_dev)
2135 		return;
2136 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2137 		return;
2138 
2139 	/* RFC2461 8.1:
2140 	 *	The IP source address of the Redirect MUST be the same as the current
2141 	 *	first-hop router for the specified ICMP Destination Address.
2142 	 */
2143 
2144 	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2145 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2146 		return;
2147 	}
2148 
2149 	lladdr = NULL;
2150 	if (ndopts.nd_opts_tgt_lladdr) {
2151 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2152 					     skb->dev);
2153 		if (!lladdr) {
2154 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2155 			return;
2156 		}
2157 	}
2158 
2159 	rt = (struct rt6_info *) dst;
2160 	if (rt->rt6i_flags & RTF_REJECT) {
2161 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2162 		return;
2163 	}
2164 
2165 	/* Redirect received -> path was valid.
2166 	 * Look, redirects are sent only in response to data packets,
2167 	 * so that this nexthop apparently is reachable. --ANK
2168 	 */
2169 	dst_confirm(&rt->dst);
2170 
2171 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2172 	if (!neigh)
2173 		return;
2174 
2175 	/*
2176 	 *	We have finally decided to accept it.
2177 	 */
2178 
2179 	neigh_update(neigh, lladdr, NUD_STALE,
2180 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2181 		     NEIGH_UPDATE_F_OVERRIDE|
2182 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2183 				     NEIGH_UPDATE_F_ISROUTER))
2184 		     );
2185 
2186 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2187 	if (!nrt)
2188 		goto out;
2189 
2190 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2191 	if (on_link)
2192 		nrt->rt6i_flags &= ~RTF_GATEWAY;
2193 
2194 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2195 
2196 	if (ip6_ins_rt(nrt))
2197 		goto out;
2198 
2199 	netevent.old = &rt->dst;
2200 	netevent.new = &nrt->dst;
2201 	netevent.daddr = &msg->dest;
2202 	netevent.neigh = neigh;
2203 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2204 
2205 	if (rt->rt6i_flags & RTF_CACHE) {
2206 		rt = (struct rt6_info *) dst_clone(&rt->dst);
2207 		ip6_del_rt(rt);
2208 	}
2209 
2210 out:
2211 	neigh_release(neigh);
2212 }
2213 
2214 /*
2215  *	Misc support functions
2216  */
2217 
rt6_set_from(struct rt6_info * rt,struct rt6_info * from)2218 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2219 {
2220 	BUG_ON(from->dst.from);
2221 
2222 	rt->rt6i_flags &= ~RTF_EXPIRES;
2223 	dst_hold(&from->dst);
2224 	rt->dst.from = &from->dst;
2225 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2226 }
2227 
ip6_rt_copy_init(struct rt6_info * rt,struct rt6_info * ort)2228 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2229 {
2230 	rt->dst.input = ort->dst.input;
2231 	rt->dst.output = ort->dst.output;
2232 	rt->rt6i_dst = ort->rt6i_dst;
2233 	rt->dst.error = ort->dst.error;
2234 	rt->rt6i_idev = ort->rt6i_idev;
2235 	if (rt->rt6i_idev)
2236 		in6_dev_hold(rt->rt6i_idev);
2237 	rt->dst.lastuse = jiffies;
2238 	rt->rt6i_gateway = ort->rt6i_gateway;
2239 	rt->rt6i_flags = ort->rt6i_flags;
2240 	rt6_set_from(rt, ort);
2241 	rt->rt6i_metric = ort->rt6i_metric;
2242 #ifdef CONFIG_IPV6_SUBTREES
2243 	rt->rt6i_src = ort->rt6i_src;
2244 #endif
2245 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2246 	rt->rt6i_table = ort->rt6i_table;
2247 	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2248 }
2249 
2250 #ifdef CONFIG_IPV6_ROUTE_INFO
rt6_get_route_info(struct net * net,const struct in6_addr * prefix,int prefixlen,const struct in6_addr * gwaddr,int ifindex)2251 static struct rt6_info *rt6_get_route_info(struct net *net,
2252 					   const struct in6_addr *prefix, int prefixlen,
2253 					   const struct in6_addr *gwaddr, int ifindex)
2254 {
2255 	struct fib6_node *fn;
2256 	struct rt6_info *rt = NULL;
2257 	struct fib6_table *table;
2258 
2259 	table = fib6_get_table(net, RT6_TABLE_INFO);
2260 	if (!table)
2261 		return NULL;
2262 
2263 	read_lock_bh(&table->tb6_lock);
2264 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2265 	if (!fn)
2266 		goto out;
2267 
2268 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2269 		if (rt->dst.dev->ifindex != ifindex)
2270 			continue;
2271 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2272 			continue;
2273 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2274 			continue;
2275 		dst_hold(&rt->dst);
2276 		break;
2277 	}
2278 out:
2279 	read_unlock_bh(&table->tb6_lock);
2280 	return rt;
2281 }
2282 
rt6_add_route_info(struct net * net,const struct in6_addr * prefix,int prefixlen,const struct in6_addr * gwaddr,int ifindex,unsigned int pref)2283 static struct rt6_info *rt6_add_route_info(struct net *net,
2284 					   const struct in6_addr *prefix, int prefixlen,
2285 					   const struct in6_addr *gwaddr, int ifindex,
2286 					   unsigned int pref)
2287 {
2288 	struct fib6_config cfg = {
2289 		.fc_metric	= IP6_RT_PRIO_USER,
2290 		.fc_ifindex	= ifindex,
2291 		.fc_dst_len	= prefixlen,
2292 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2293 				  RTF_UP | RTF_PREF(pref),
2294 		.fc_nlinfo.portid = 0,
2295 		.fc_nlinfo.nlh = NULL,
2296 		.fc_nlinfo.nl_net = net,
2297 	};
2298 
2299 	cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
2300 	cfg.fc_dst = *prefix;
2301 	cfg.fc_gateway = *gwaddr;
2302 
2303 	/* We should treat it as a default route if prefix length is 0. */
2304 	if (!prefixlen)
2305 		cfg.fc_flags |= RTF_DEFAULT;
2306 
2307 	ip6_route_add(&cfg);
2308 
2309 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2310 }
2311 #endif
2312 
rt6_get_dflt_router(const struct in6_addr * addr,struct net_device * dev)2313 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2314 {
2315 	struct rt6_info *rt;
2316 	struct fib6_table *table;
2317 
2318 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2319 	if (!table)
2320 		return NULL;
2321 
2322 	read_lock_bh(&table->tb6_lock);
2323 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2324 		if (dev == rt->dst.dev &&
2325 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2326 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2327 			break;
2328 	}
2329 	if (rt)
2330 		dst_hold(&rt->dst);
2331 	read_unlock_bh(&table->tb6_lock);
2332 	return rt;
2333 }
2334 
rt6_add_dflt_router(const struct in6_addr * gwaddr,struct net_device * dev,unsigned int pref)2335 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2336 				     struct net_device *dev,
2337 				     unsigned int pref)
2338 {
2339 	struct fib6_config cfg = {
2340 		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2341 		.fc_metric	= IP6_RT_PRIO_USER,
2342 		.fc_ifindex	= dev->ifindex,
2343 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2344 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2345 		.fc_nlinfo.portid = 0,
2346 		.fc_nlinfo.nlh = NULL,
2347 		.fc_nlinfo.nl_net = dev_net(dev),
2348 	};
2349 
2350 	cfg.fc_gateway = *gwaddr;
2351 
2352 	ip6_route_add(&cfg);
2353 
2354 	return rt6_get_dflt_router(gwaddr, dev);
2355 }
2356 
rt6_purge_dflt_routers(struct net * net)2357 void rt6_purge_dflt_routers(struct net *net)
2358 {
2359 	struct rt6_info *rt;
2360 	struct fib6_table *table;
2361 
2362 	/* NOTE: Keep consistent with rt6_get_dflt_router */
2363 	table = fib6_get_table(net, RT6_TABLE_DFLT);
2364 	if (!table)
2365 		return;
2366 
2367 restart:
2368 	read_lock_bh(&table->tb6_lock);
2369 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2370 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2371 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2372 			dst_hold(&rt->dst);
2373 			read_unlock_bh(&table->tb6_lock);
2374 			ip6_del_rt(rt);
2375 			goto restart;
2376 		}
2377 	}
2378 	read_unlock_bh(&table->tb6_lock);
2379 }
2380 
rtmsg_to_fib6_config(struct net * net,struct in6_rtmsg * rtmsg,struct fib6_config * cfg)2381 static void rtmsg_to_fib6_config(struct net *net,
2382 				 struct in6_rtmsg *rtmsg,
2383 				 struct fib6_config *cfg)
2384 {
2385 	memset(cfg, 0, sizeof(*cfg));
2386 
2387 	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2388 			 : RT6_TABLE_MAIN;
2389 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2390 	cfg->fc_metric = rtmsg->rtmsg_metric;
2391 	cfg->fc_expires = rtmsg->rtmsg_info;
2392 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2393 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2394 	cfg->fc_flags = rtmsg->rtmsg_flags;
2395 
2396 	cfg->fc_nlinfo.nl_net = net;
2397 
2398 	cfg->fc_dst = rtmsg->rtmsg_dst;
2399 	cfg->fc_src = rtmsg->rtmsg_src;
2400 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2401 }
2402 
ipv6_route_ioctl(struct net * net,unsigned int cmd,void __user * arg)2403 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2404 {
2405 	struct fib6_config cfg;
2406 	struct in6_rtmsg rtmsg;
2407 	int err;
2408 
2409 	switch (cmd) {
2410 	case SIOCADDRT:		/* Add a route */
2411 	case SIOCDELRT:		/* Delete a route */
2412 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2413 			return -EPERM;
2414 		err = copy_from_user(&rtmsg, arg,
2415 				     sizeof(struct in6_rtmsg));
2416 		if (err)
2417 			return -EFAULT;
2418 
2419 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2420 
2421 		rtnl_lock();
2422 		switch (cmd) {
2423 		case SIOCADDRT:
2424 			err = ip6_route_add(&cfg);
2425 			break;
2426 		case SIOCDELRT:
2427 			err = ip6_route_del(&cfg);
2428 			break;
2429 		default:
2430 			err = -EINVAL;
2431 		}
2432 		rtnl_unlock();
2433 
2434 		return err;
2435 	}
2436 
2437 	return -EINVAL;
2438 }
2439 
2440 /*
2441  *	Drop the packet on the floor
2442  */
2443 
ip6_pkt_drop(struct sk_buff * skb,u8 code,int ipstats_mib_noroutes)2444 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2445 {
2446 	int type;
2447 	struct dst_entry *dst = skb_dst(skb);
2448 	switch (ipstats_mib_noroutes) {
2449 	case IPSTATS_MIB_INNOROUTES:
2450 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2451 		if (type == IPV6_ADDR_ANY) {
2452 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2453 				      IPSTATS_MIB_INADDRERRORS);
2454 			break;
2455 		}
2456 		/* FALLTHROUGH */
2457 	case IPSTATS_MIB_OUTNOROUTES:
2458 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2459 			      ipstats_mib_noroutes);
2460 		break;
2461 	}
2462 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2463 	kfree_skb(skb);
2464 	return 0;
2465 }
2466 
ip6_pkt_discard(struct sk_buff * skb)2467 static int ip6_pkt_discard(struct sk_buff *skb)
2468 {
2469 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2470 }
2471 
ip6_pkt_discard_out(struct net * net,struct sock * sk,struct sk_buff * skb)2472 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2473 {
2474 	skb->dev = skb_dst(skb)->dev;
2475 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2476 }
2477 
ip6_pkt_prohibit(struct sk_buff * skb)2478 static int ip6_pkt_prohibit(struct sk_buff *skb)
2479 {
2480 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2481 }
2482 
ip6_pkt_prohibit_out(struct net * net,struct sock * sk,struct sk_buff * skb)2483 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2484 {
2485 	skb->dev = skb_dst(skb)->dev;
2486 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2487 }
2488 
2489 /*
2490  *	Allocate a dst for local (unicast / anycast) address.
2491  */
2492 
addrconf_dst_alloc(struct inet6_dev * idev,const struct in6_addr * addr,bool anycast)2493 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2494 				    const struct in6_addr *addr,
2495 				    bool anycast)
2496 {
2497 	u32 tb_id;
2498 	struct net *net = dev_net(idev->dev);
2499 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2500 					    DST_NOCOUNT);
2501 	if (!rt)
2502 		return ERR_PTR(-ENOMEM);
2503 
2504 	in6_dev_hold(idev);
2505 
2506 	rt->dst.flags |= DST_HOST;
2507 	rt->dst.input = ip6_input;
2508 	rt->dst.output = ip6_output;
2509 	rt->rt6i_idev = idev;
2510 
2511 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2512 	if (anycast)
2513 		rt->rt6i_flags |= RTF_ANYCAST;
2514 	else
2515 		rt->rt6i_flags |= RTF_LOCAL;
2516 
2517 	rt->rt6i_gateway  = *addr;
2518 	rt->rt6i_dst.addr = *addr;
2519 	rt->rt6i_dst.plen = 128;
2520 	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2521 	rt->rt6i_table = fib6_get_table(net, tb_id);
2522 	rt->dst.flags |= DST_NOCACHE;
2523 
2524 	atomic_set(&rt->dst.__refcnt, 1);
2525 
2526 	return rt;
2527 }
2528 
ip6_route_get_saddr(struct net * net,struct rt6_info * rt,const struct in6_addr * daddr,unsigned int prefs,struct in6_addr * saddr)2529 int ip6_route_get_saddr(struct net *net,
2530 			struct rt6_info *rt,
2531 			const struct in6_addr *daddr,
2532 			unsigned int prefs,
2533 			struct in6_addr *saddr)
2534 {
2535 	struct inet6_dev *idev =
2536 		rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2537 	int err = 0;
2538 	if (rt && rt->rt6i_prefsrc.plen)
2539 		*saddr = rt->rt6i_prefsrc.addr;
2540 	else
2541 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2542 					 daddr, prefs, saddr);
2543 	return err;
2544 }
2545 
2546 /* remove deleted ip from prefsrc entries */
2547 struct arg_dev_net_ip {
2548 	struct net_device *dev;
2549 	struct net *net;
2550 	struct in6_addr *addr;
2551 };
2552 
fib6_remove_prefsrc(struct rt6_info * rt,void * arg)2553 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2554 {
2555 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2556 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2557 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2558 
2559 	if (((void *)rt->dst.dev == dev || !dev) &&
2560 	    rt != net->ipv6.ip6_null_entry &&
2561 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2562 		/* remove prefsrc entry */
2563 		rt->rt6i_prefsrc.plen = 0;
2564 	}
2565 	return 0;
2566 }
2567 
rt6_remove_prefsrc(struct inet6_ifaddr * ifp)2568 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2569 {
2570 	struct net *net = dev_net(ifp->idev->dev);
2571 	struct arg_dev_net_ip adni = {
2572 		.dev = ifp->idev->dev,
2573 		.net = net,
2574 		.addr = &ifp->addr,
2575 	};
2576 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2577 }
2578 
2579 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2580 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2581 
2582 /* Remove routers and update dst entries when gateway turn into host. */
fib6_clean_tohost(struct rt6_info * rt,void * arg)2583 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2584 {
2585 	struct in6_addr *gateway = (struct in6_addr *)arg;
2586 
2587 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2588 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2589 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2590 		return -1;
2591 	}
2592 	return 0;
2593 }
2594 
rt6_clean_tohost(struct net * net,struct in6_addr * gateway)2595 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2596 {
2597 	fib6_clean_all(net, fib6_clean_tohost, gateway);
2598 }
2599 
2600 struct arg_dev_net {
2601 	struct net_device *dev;
2602 	struct net *net;
2603 };
2604 
fib6_ifdown(struct rt6_info * rt,void * arg)2605 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2606 {
2607 	const struct arg_dev_net *adn = arg;
2608 	const struct net_device *dev = adn->dev;
2609 
2610 	if ((rt->dst.dev == dev || !dev) &&
2611 	    rt != adn->net->ipv6.ip6_null_entry)
2612 		return -1;
2613 
2614 	return 0;
2615 }
2616 
rt6_ifdown(struct net * net,struct net_device * dev)2617 void rt6_ifdown(struct net *net, struct net_device *dev)
2618 {
2619 	struct arg_dev_net adn = {
2620 		.dev = dev,
2621 		.net = net,
2622 	};
2623 
2624 	fib6_clean_all(net, fib6_ifdown, &adn);
2625 	icmp6_clean_all(fib6_ifdown, &adn);
2626 	if (dev)
2627 		rt6_uncached_list_flush_dev(net, dev);
2628 }
2629 
2630 struct rt6_mtu_change_arg {
2631 	struct net_device *dev;
2632 	unsigned int mtu;
2633 };
2634 
rt6_mtu_change_route(struct rt6_info * rt,void * p_arg)2635 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2636 {
2637 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2638 	struct inet6_dev *idev;
2639 
2640 	/* In IPv6 pmtu discovery is not optional,
2641 	   so that RTAX_MTU lock cannot disable it.
2642 	   We still use this lock to block changes
2643 	   caused by addrconf/ndisc.
2644 	*/
2645 
2646 	idev = __in6_dev_get(arg->dev);
2647 	if (!idev)
2648 		return 0;
2649 
2650 	/* For administrative MTU increase, there is no way to discover
2651 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2652 	   Since RFC 1981 doesn't include administrative MTU increase
2653 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2654 	 */
2655 	/*
2656 	   If new MTU is less than route PMTU, this new MTU will be the
2657 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2658 	   decreases; if new MTU is greater than route PMTU, and the
2659 	   old MTU is the lowest MTU in the path, update the route PMTU
2660 	   to reflect the increase. In this case if the other nodes' MTU
2661 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2662 	   PMTU discouvery.
2663 	 */
2664 	if (rt->dst.dev == arg->dev &&
2665 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2666 		if (rt->rt6i_flags & RTF_CACHE) {
2667 			/* For RTF_CACHE with rt6i_pmtu == 0
2668 			 * (i.e. a redirected route),
2669 			 * the metrics of its rt->dst.from has already
2670 			 * been updated.
2671 			 */
2672 			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2673 				rt->rt6i_pmtu = arg->mtu;
2674 		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2675 			   (dst_mtu(&rt->dst) < arg->mtu &&
2676 			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2677 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2678 		}
2679 	}
2680 	return 0;
2681 }
2682 
rt6_mtu_change(struct net_device * dev,unsigned int mtu)2683 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2684 {
2685 	struct rt6_mtu_change_arg arg = {
2686 		.dev = dev,
2687 		.mtu = mtu,
2688 	};
2689 
2690 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2691 }
2692 
2693 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2694 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2695 	[RTA_OIF]               = { .type = NLA_U32 },
2696 	[RTA_IIF]		= { .type = NLA_U32 },
2697 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2698 	[RTA_METRICS]           = { .type = NLA_NESTED },
2699 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2700 	[RTA_PREF]              = { .type = NLA_U8 },
2701 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
2702 	[RTA_ENCAP]		= { .type = NLA_NESTED },
2703 };
2704 
rtm_to_fib6_config(struct sk_buff * skb,struct nlmsghdr * nlh,struct fib6_config * cfg)2705 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2706 			      struct fib6_config *cfg)
2707 {
2708 	struct rtmsg *rtm;
2709 	struct nlattr *tb[RTA_MAX+1];
2710 	unsigned int pref;
2711 	int err;
2712 
2713 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2714 	if (err < 0)
2715 		goto errout;
2716 
2717 	err = -EINVAL;
2718 	rtm = nlmsg_data(nlh);
2719 	memset(cfg, 0, sizeof(*cfg));
2720 
2721 	cfg->fc_table = rtm->rtm_table;
2722 	cfg->fc_dst_len = rtm->rtm_dst_len;
2723 	cfg->fc_src_len = rtm->rtm_src_len;
2724 	cfg->fc_flags = RTF_UP;
2725 	cfg->fc_protocol = rtm->rtm_protocol;
2726 	cfg->fc_type = rtm->rtm_type;
2727 
2728 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2729 	    rtm->rtm_type == RTN_BLACKHOLE ||
2730 	    rtm->rtm_type == RTN_PROHIBIT ||
2731 	    rtm->rtm_type == RTN_THROW)
2732 		cfg->fc_flags |= RTF_REJECT;
2733 
2734 	if (rtm->rtm_type == RTN_LOCAL)
2735 		cfg->fc_flags |= RTF_LOCAL;
2736 
2737 	if (rtm->rtm_flags & RTM_F_CLONED)
2738 		cfg->fc_flags |= RTF_CACHE;
2739 
2740 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2741 	cfg->fc_nlinfo.nlh = nlh;
2742 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2743 
2744 	if (tb[RTA_GATEWAY]) {
2745 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2746 		cfg->fc_flags |= RTF_GATEWAY;
2747 	}
2748 
2749 	if (tb[RTA_DST]) {
2750 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2751 
2752 		if (nla_len(tb[RTA_DST]) < plen)
2753 			goto errout;
2754 
2755 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2756 	}
2757 
2758 	if (tb[RTA_SRC]) {
2759 		int plen = (rtm->rtm_src_len + 7) >> 3;
2760 
2761 		if (nla_len(tb[RTA_SRC]) < plen)
2762 			goto errout;
2763 
2764 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2765 	}
2766 
2767 	if (tb[RTA_PREFSRC])
2768 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2769 
2770 	if (tb[RTA_OIF])
2771 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2772 
2773 	if (tb[RTA_PRIORITY])
2774 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2775 
2776 	if (tb[RTA_METRICS]) {
2777 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2778 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2779 	}
2780 
2781 	if (tb[RTA_TABLE])
2782 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2783 
2784 	if (tb[RTA_MULTIPATH]) {
2785 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2786 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2787 	}
2788 
2789 	if (tb[RTA_PREF]) {
2790 		pref = nla_get_u8(tb[RTA_PREF]);
2791 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
2792 		    pref != ICMPV6_ROUTER_PREF_HIGH)
2793 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
2794 		cfg->fc_flags |= RTF_PREF(pref);
2795 	}
2796 
2797 	if (tb[RTA_ENCAP])
2798 		cfg->fc_encap = tb[RTA_ENCAP];
2799 
2800 	if (tb[RTA_ENCAP_TYPE])
2801 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2802 
2803 	err = 0;
2804 errout:
2805 	return err;
2806 }
2807 
2808 struct rt6_nh {
2809 	struct rt6_info *rt6_info;
2810 	struct fib6_config r_cfg;
2811 	struct mx6_config mxc;
2812 	struct list_head next;
2813 };
2814 
ip6_print_replace_route_err(struct list_head * rt6_nh_list)2815 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2816 {
2817 	struct rt6_nh *nh;
2818 
2819 	list_for_each_entry(nh, rt6_nh_list, next) {
2820 		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2821 		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2822 		        nh->r_cfg.fc_ifindex);
2823 	}
2824 }
2825 
ip6_route_info_append(struct list_head * rt6_nh_list,struct rt6_info * rt,struct fib6_config * r_cfg)2826 static int ip6_route_info_append(struct list_head *rt6_nh_list,
2827 				 struct rt6_info *rt, struct fib6_config *r_cfg)
2828 {
2829 	struct rt6_nh *nh;
2830 	struct rt6_info *rtnh;
2831 	int err = -EEXIST;
2832 
2833 	list_for_each_entry(nh, rt6_nh_list, next) {
2834 		/* check if rt6_info already exists */
2835 		rtnh = nh->rt6_info;
2836 
2837 		if (rtnh->dst.dev == rt->dst.dev &&
2838 		    rtnh->rt6i_idev == rt->rt6i_idev &&
2839 		    ipv6_addr_equal(&rtnh->rt6i_gateway,
2840 				    &rt->rt6i_gateway))
2841 			return err;
2842 	}
2843 
2844 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2845 	if (!nh)
2846 		return -ENOMEM;
2847 	nh->rt6_info = rt;
2848 	err = ip6_convert_metrics(&nh->mxc, r_cfg);
2849 	if (err) {
2850 		kfree(nh);
2851 		return err;
2852 	}
2853 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2854 	list_add_tail(&nh->next, rt6_nh_list);
2855 
2856 	return 0;
2857 }
2858 
ip6_route_multipath_add(struct fib6_config * cfg)2859 static int ip6_route_multipath_add(struct fib6_config *cfg)
2860 {
2861 	struct fib6_config r_cfg;
2862 	struct rtnexthop *rtnh;
2863 	struct rt6_info *rt;
2864 	struct rt6_nh *err_nh;
2865 	struct rt6_nh *nh, *nh_safe;
2866 	int remaining;
2867 	int attrlen;
2868 	int err = 1;
2869 	int nhn = 0;
2870 	int replace = (cfg->fc_nlinfo.nlh &&
2871 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2872 	LIST_HEAD(rt6_nh_list);
2873 
2874 	remaining = cfg->fc_mp_len;
2875 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2876 
2877 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
2878 	 * rt6_info structs per nexthop
2879 	 */
2880 	while (rtnh_ok(rtnh, remaining)) {
2881 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2882 		if (rtnh->rtnh_ifindex)
2883 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2884 
2885 		attrlen = rtnh_attrlen(rtnh);
2886 		if (attrlen > 0) {
2887 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2888 
2889 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2890 			if (nla) {
2891 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
2892 				r_cfg.fc_flags |= RTF_GATEWAY;
2893 			}
2894 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2895 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2896 			if (nla)
2897 				r_cfg.fc_encap_type = nla_get_u16(nla);
2898 		}
2899 
2900 		rt = ip6_route_info_create(&r_cfg);
2901 		if (IS_ERR(rt)) {
2902 			err = PTR_ERR(rt);
2903 			rt = NULL;
2904 			goto cleanup;
2905 		}
2906 
2907 		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2908 		if (err) {
2909 			dst_free(&rt->dst);
2910 			goto cleanup;
2911 		}
2912 
2913 		rtnh = rtnh_next(rtnh, &remaining);
2914 	}
2915 
2916 	err_nh = NULL;
2917 	list_for_each_entry(nh, &rt6_nh_list, next) {
2918 		err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2919 		/* nh->rt6_info is used or freed at this point, reset to NULL*/
2920 		nh->rt6_info = NULL;
2921 		if (err) {
2922 			if (replace && nhn)
2923 				ip6_print_replace_route_err(&rt6_nh_list);
2924 			err_nh = nh;
2925 			goto add_errout;
2926 		}
2927 
2928 		/* Because each route is added like a single route we remove
2929 		 * these flags after the first nexthop: if there is a collision,
2930 		 * we have already failed to add the first nexthop:
2931 		 * fib6_add_rt2node() has rejected it; when replacing, old
2932 		 * nexthops have been replaced by first new, the rest should
2933 		 * be added to it.
2934 		 */
2935 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2936 						     NLM_F_REPLACE);
2937 		nhn++;
2938 	}
2939 
2940 	goto cleanup;
2941 
2942 add_errout:
2943 	/* Delete routes that were already added */
2944 	list_for_each_entry(nh, &rt6_nh_list, next) {
2945 		if (err_nh == nh)
2946 			break;
2947 		ip6_route_del(&nh->r_cfg);
2948 	}
2949 
2950 cleanup:
2951 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2952 		if (nh->rt6_info)
2953 			dst_free(&nh->rt6_info->dst);
2954 		kfree(nh->mxc.mx);
2955 		list_del(&nh->next);
2956 		kfree(nh);
2957 	}
2958 
2959 	return err;
2960 }
2961 
ip6_route_multipath_del(struct fib6_config * cfg)2962 static int ip6_route_multipath_del(struct fib6_config *cfg)
2963 {
2964 	struct fib6_config r_cfg;
2965 	struct rtnexthop *rtnh;
2966 	int remaining;
2967 	int attrlen;
2968 	int err = 1, last_err = 0;
2969 
2970 	remaining = cfg->fc_mp_len;
2971 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2972 
2973 	/* Parse a Multipath Entry */
2974 	while (rtnh_ok(rtnh, remaining)) {
2975 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2976 		if (rtnh->rtnh_ifindex)
2977 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2978 
2979 		attrlen = rtnh_attrlen(rtnh);
2980 		if (attrlen > 0) {
2981 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2982 
2983 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2984 			if (nla) {
2985 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2986 				r_cfg.fc_flags |= RTF_GATEWAY;
2987 			}
2988 		}
2989 		err = ip6_route_del(&r_cfg);
2990 		if (err)
2991 			last_err = err;
2992 
2993 		rtnh = rtnh_next(rtnh, &remaining);
2994 	}
2995 
2996 	return last_err;
2997 }
2998 
inet6_rtm_delroute(struct sk_buff * skb,struct nlmsghdr * nlh)2999 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3000 {
3001 	struct fib6_config cfg;
3002 	int err;
3003 
3004 	err = rtm_to_fib6_config(skb, nlh, &cfg);
3005 	if (err < 0)
3006 		return err;
3007 
3008 	if (cfg.fc_mp)
3009 		return ip6_route_multipath_del(&cfg);
3010 	else
3011 		return ip6_route_del(&cfg);
3012 }
3013 
inet6_rtm_newroute(struct sk_buff * skb,struct nlmsghdr * nlh)3014 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3015 {
3016 	struct fib6_config cfg;
3017 	int err;
3018 
3019 	err = rtm_to_fib6_config(skb, nlh, &cfg);
3020 	if (err < 0)
3021 		return err;
3022 
3023 	if (cfg.fc_mp)
3024 		return ip6_route_multipath_add(&cfg);
3025 	else
3026 		return ip6_route_add(&cfg);
3027 }
3028 
rt6_nlmsg_size(struct rt6_info * rt)3029 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3030 {
3031 	return NLMSG_ALIGN(sizeof(struct rtmsg))
3032 	       + nla_total_size(16) /* RTA_SRC */
3033 	       + nla_total_size(16) /* RTA_DST */
3034 	       + nla_total_size(16) /* RTA_GATEWAY */
3035 	       + nla_total_size(16) /* RTA_PREFSRC */
3036 	       + nla_total_size(4) /* RTA_TABLE */
3037 	       + nla_total_size(4) /* RTA_IIF */
3038 	       + nla_total_size(4) /* RTA_OIF */
3039 	       + nla_total_size(4) /* RTA_PRIORITY */
3040 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3041 	       + nla_total_size(sizeof(struct rta_cacheinfo))
3042 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3043 	       + nla_total_size(1) /* RTA_PREF */
3044 	       + lwtunnel_get_encap_size(rt->dst.lwtstate);
3045 }
3046 
rt6_fill_node(struct net * net,struct sk_buff * skb,struct rt6_info * rt,struct in6_addr * dst,struct in6_addr * src,int iif,int type,u32 portid,u32 seq,int prefix,int nowait,unsigned int flags)3047 static int rt6_fill_node(struct net *net,
3048 			 struct sk_buff *skb, struct rt6_info *rt,
3049 			 struct in6_addr *dst, struct in6_addr *src,
3050 			 int iif, int type, u32 portid, u32 seq,
3051 			 int prefix, int nowait, unsigned int flags)
3052 {
3053 	u32 metrics[RTAX_MAX];
3054 	struct rtmsg *rtm;
3055 	struct nlmsghdr *nlh;
3056 	long expires;
3057 	u32 table;
3058 
3059 	if (prefix) {	/* user wants prefix routes only */
3060 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3061 			/* success since this is not a prefix route */
3062 			return 1;
3063 		}
3064 	}
3065 
3066 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3067 	if (!nlh)
3068 		return -EMSGSIZE;
3069 
3070 	rtm = nlmsg_data(nlh);
3071 	rtm->rtm_family = AF_INET6;
3072 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
3073 	rtm->rtm_src_len = rt->rt6i_src.plen;
3074 	rtm->rtm_tos = 0;
3075 	if (rt->rt6i_table)
3076 		table = rt->rt6i_table->tb6_id;
3077 	else
3078 		table = RT6_TABLE_UNSPEC;
3079 	rtm->rtm_table = table;
3080 	if (nla_put_u32(skb, RTA_TABLE, table))
3081 		goto nla_put_failure;
3082 	if (rt->rt6i_flags & RTF_REJECT) {
3083 		switch (rt->dst.error) {
3084 		case -EINVAL:
3085 			rtm->rtm_type = RTN_BLACKHOLE;
3086 			break;
3087 		case -EACCES:
3088 			rtm->rtm_type = RTN_PROHIBIT;
3089 			break;
3090 		case -EAGAIN:
3091 			rtm->rtm_type = RTN_THROW;
3092 			break;
3093 		default:
3094 			rtm->rtm_type = RTN_UNREACHABLE;
3095 			break;
3096 		}
3097 	}
3098 	else if (rt->rt6i_flags & RTF_LOCAL)
3099 		rtm->rtm_type = RTN_LOCAL;
3100 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3101 		rtm->rtm_type = RTN_LOCAL;
3102 	else
3103 		rtm->rtm_type = RTN_UNICAST;
3104 	rtm->rtm_flags = 0;
3105 	if (!netif_carrier_ok(rt->dst.dev)) {
3106 		rtm->rtm_flags |= RTNH_F_LINKDOWN;
3107 		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3108 			rtm->rtm_flags |= RTNH_F_DEAD;
3109 	}
3110 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3111 	rtm->rtm_protocol = rt->rt6i_protocol;
3112 	if (rt->rt6i_flags & RTF_DYNAMIC)
3113 		rtm->rtm_protocol = RTPROT_REDIRECT;
3114 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
3115 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3116 			rtm->rtm_protocol = RTPROT_RA;
3117 		else
3118 			rtm->rtm_protocol = RTPROT_KERNEL;
3119 	}
3120 
3121 	if (rt->rt6i_flags & RTF_CACHE)
3122 		rtm->rtm_flags |= RTM_F_CLONED;
3123 
3124 	if (dst) {
3125 		if (nla_put_in6_addr(skb, RTA_DST, dst))
3126 			goto nla_put_failure;
3127 		rtm->rtm_dst_len = 128;
3128 	} else if (rtm->rtm_dst_len)
3129 		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3130 			goto nla_put_failure;
3131 #ifdef CONFIG_IPV6_SUBTREES
3132 	if (src) {
3133 		if (nla_put_in6_addr(skb, RTA_SRC, src))
3134 			goto nla_put_failure;
3135 		rtm->rtm_src_len = 128;
3136 	} else if (rtm->rtm_src_len &&
3137 		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3138 		goto nla_put_failure;
3139 #endif
3140 	if (iif) {
3141 #ifdef CONFIG_IPV6_MROUTE
3142 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3143 			int err = ip6mr_get_route(net, skb, rtm, nowait);
3144 			if (err <= 0) {
3145 				if (!nowait) {
3146 					if (err == 0)
3147 						return 0;
3148 					goto nla_put_failure;
3149 				} else {
3150 					if (err == -EMSGSIZE)
3151 						goto nla_put_failure;
3152 				}
3153 			}
3154 		} else
3155 #endif
3156 			if (nla_put_u32(skb, RTA_IIF, iif))
3157 				goto nla_put_failure;
3158 	} else if (dst) {
3159 		struct in6_addr saddr_buf;
3160 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3161 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3162 			goto nla_put_failure;
3163 	}
3164 
3165 	if (rt->rt6i_prefsrc.plen) {
3166 		struct in6_addr saddr_buf;
3167 		saddr_buf = rt->rt6i_prefsrc.addr;
3168 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3169 			goto nla_put_failure;
3170 	}
3171 
3172 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3173 	if (rt->rt6i_pmtu)
3174 		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3175 	if (rtnetlink_put_metrics(skb, metrics) < 0)
3176 		goto nla_put_failure;
3177 
3178 	if (rt->rt6i_flags & RTF_GATEWAY) {
3179 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3180 			goto nla_put_failure;
3181 	}
3182 
3183 	if (rt->dst.dev &&
3184 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3185 		goto nla_put_failure;
3186 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3187 		goto nla_put_failure;
3188 
3189 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3190 
3191 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3192 		goto nla_put_failure;
3193 
3194 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3195 		goto nla_put_failure;
3196 
3197 	lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3198 
3199 	nlmsg_end(skb, nlh);
3200 	return 0;
3201 
3202 nla_put_failure:
3203 	nlmsg_cancel(skb, nlh);
3204 	return -EMSGSIZE;
3205 }
3206 
rt6_dump_route(struct rt6_info * rt,void * p_arg)3207 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3208 {
3209 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3210 	int prefix;
3211 
3212 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3213 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3214 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3215 	} else
3216 		prefix = 0;
3217 
3218 	return rt6_fill_node(arg->net,
3219 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3220 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3221 		     prefix, 0, NLM_F_MULTI);
3222 }
3223 
inet6_rtm_getroute(struct sk_buff * in_skb,struct nlmsghdr * nlh)3224 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3225 {
3226 	struct net *net = sock_net(in_skb->sk);
3227 	struct nlattr *tb[RTA_MAX+1];
3228 	struct rt6_info *rt;
3229 	struct sk_buff *skb;
3230 	struct rtmsg *rtm;
3231 	struct flowi6 fl6;
3232 	int err, iif = 0, oif = 0;
3233 
3234 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3235 	if (err < 0)
3236 		goto errout;
3237 
3238 	err = -EINVAL;
3239 	memset(&fl6, 0, sizeof(fl6));
3240 
3241 	if (tb[RTA_SRC]) {
3242 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3243 			goto errout;
3244 
3245 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3246 	}
3247 
3248 	if (tb[RTA_DST]) {
3249 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3250 			goto errout;
3251 
3252 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3253 	}
3254 
3255 	if (tb[RTA_IIF])
3256 		iif = nla_get_u32(tb[RTA_IIF]);
3257 
3258 	if (tb[RTA_OIF])
3259 		oif = nla_get_u32(tb[RTA_OIF]);
3260 
3261 	if (tb[RTA_MARK])
3262 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3263 
3264 	if (iif) {
3265 		struct net_device *dev;
3266 		int flags = 0;
3267 
3268 		dev = __dev_get_by_index(net, iif);
3269 		if (!dev) {
3270 			err = -ENODEV;
3271 			goto errout;
3272 		}
3273 
3274 		fl6.flowi6_iif = iif;
3275 
3276 		if (!ipv6_addr_any(&fl6.saddr))
3277 			flags |= RT6_LOOKUP_F_HAS_SADDR;
3278 
3279 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3280 							       flags);
3281 	} else {
3282 		fl6.flowi6_oif = oif;
3283 
3284 		if (netif_index_is_l3_master(net, oif)) {
3285 			fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
3286 					   FLOWI_FLAG_SKIP_NH_OIF;
3287 		}
3288 
3289 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3290 	}
3291 
3292 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3293 	if (!skb) {
3294 		ip6_rt_put(rt);
3295 		err = -ENOBUFS;
3296 		goto errout;
3297 	}
3298 
3299 	/* Reserve room for dummy headers, this skb can pass
3300 	   through good chunk of routing engine.
3301 	 */
3302 	skb_reset_mac_header(skb);
3303 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3304 
3305 	skb_dst_set(skb, &rt->dst);
3306 
3307 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3308 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3309 			    nlh->nlmsg_seq, 0, 0, 0);
3310 	if (err < 0) {
3311 		kfree_skb(skb);
3312 		goto errout;
3313 	}
3314 
3315 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3316 errout:
3317 	return err;
3318 }
3319 
inet6_rt_notify(int event,struct rt6_info * rt,struct nl_info * info,unsigned int nlm_flags)3320 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3321 		     unsigned int nlm_flags)
3322 {
3323 	struct sk_buff *skb;
3324 	struct net *net = info->nl_net;
3325 	u32 seq;
3326 	int err;
3327 
3328 	err = -ENOBUFS;
3329 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3330 
3331 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3332 	if (!skb)
3333 		goto errout;
3334 
3335 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3336 				event, info->portid, seq, 0, 0, nlm_flags);
3337 	if (err < 0) {
3338 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3339 		WARN_ON(err == -EMSGSIZE);
3340 		kfree_skb(skb);
3341 		goto errout;
3342 	}
3343 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3344 		    info->nlh, gfp_any());
3345 	return;
3346 errout:
3347 	if (err < 0)
3348 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3349 }
3350 
ip6_route_dev_notify(struct notifier_block * this,unsigned long event,void * ptr)3351 static int ip6_route_dev_notify(struct notifier_block *this,
3352 				unsigned long event, void *ptr)
3353 {
3354 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3355 	struct net *net = dev_net(dev);
3356 
3357 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3358 		net->ipv6.ip6_null_entry->dst.dev = dev;
3359 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3360 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3361 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3362 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3363 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3364 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3365 #endif
3366 	}
3367 
3368 	return NOTIFY_OK;
3369 }
3370 
3371 /*
3372  *	/proc
3373  */
3374 
3375 #ifdef CONFIG_PROC_FS
3376 
3377 static const struct file_operations ipv6_route_proc_fops = {
3378 	.owner		= THIS_MODULE,
3379 	.open		= ipv6_route_open,
3380 	.read		= seq_read,
3381 	.llseek		= seq_lseek,
3382 	.release	= seq_release_net,
3383 };
3384 
rt6_stats_seq_show(struct seq_file * seq,void * v)3385 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3386 {
3387 	struct net *net = (struct net *)seq->private;
3388 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3389 		   net->ipv6.rt6_stats->fib_nodes,
3390 		   net->ipv6.rt6_stats->fib_route_nodes,
3391 		   net->ipv6.rt6_stats->fib_rt_alloc,
3392 		   net->ipv6.rt6_stats->fib_rt_entries,
3393 		   net->ipv6.rt6_stats->fib_rt_cache,
3394 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3395 		   net->ipv6.rt6_stats->fib_discarded_routes);
3396 
3397 	return 0;
3398 }
3399 
rt6_stats_seq_open(struct inode * inode,struct file * file)3400 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3401 {
3402 	return single_open_net(inode, file, rt6_stats_seq_show);
3403 }
3404 
3405 static const struct file_operations rt6_stats_seq_fops = {
3406 	.owner	 = THIS_MODULE,
3407 	.open	 = rt6_stats_seq_open,
3408 	.read	 = seq_read,
3409 	.llseek	 = seq_lseek,
3410 	.release = single_release_net,
3411 };
3412 #endif	/* CONFIG_PROC_FS */
3413 
3414 #ifdef CONFIG_SYSCTL
3415 
3416 static
ipv6_sysctl_rtcache_flush(struct ctl_table * ctl,int write,void __user * buffer,size_t * lenp,loff_t * ppos)3417 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3418 			      void __user *buffer, size_t *lenp, loff_t *ppos)
3419 {
3420 	struct net *net;
3421 	int delay;
3422 	if (!write)
3423 		return -EINVAL;
3424 
3425 	net = (struct net *)ctl->extra1;
3426 	delay = net->ipv6.sysctl.flush_delay;
3427 	proc_dointvec(ctl, write, buffer, lenp, ppos);
3428 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3429 	return 0;
3430 }
3431 
3432 struct ctl_table ipv6_route_table_template[] = {
3433 	{
3434 		.procname	=	"flush",
3435 		.data		=	&init_net.ipv6.sysctl.flush_delay,
3436 		.maxlen		=	sizeof(int),
3437 		.mode		=	0200,
3438 		.proc_handler	=	ipv6_sysctl_rtcache_flush
3439 	},
3440 	{
3441 		.procname	=	"gc_thresh",
3442 		.data		=	&ip6_dst_ops_template.gc_thresh,
3443 		.maxlen		=	sizeof(int),
3444 		.mode		=	0644,
3445 		.proc_handler	=	proc_dointvec,
3446 	},
3447 	{
3448 		.procname	=	"max_size",
3449 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
3450 		.maxlen		=	sizeof(int),
3451 		.mode		=	0644,
3452 		.proc_handler	=	proc_dointvec,
3453 	},
3454 	{
3455 		.procname	=	"gc_min_interval",
3456 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3457 		.maxlen		=	sizeof(int),
3458 		.mode		=	0644,
3459 		.proc_handler	=	proc_dointvec_jiffies,
3460 	},
3461 	{
3462 		.procname	=	"gc_timeout",
3463 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3464 		.maxlen		=	sizeof(int),
3465 		.mode		=	0644,
3466 		.proc_handler	=	proc_dointvec_jiffies,
3467 	},
3468 	{
3469 		.procname	=	"gc_interval",
3470 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
3471 		.maxlen		=	sizeof(int),
3472 		.mode		=	0644,
3473 		.proc_handler	=	proc_dointvec_jiffies,
3474 	},
3475 	{
3476 		.procname	=	"gc_elasticity",
3477 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3478 		.maxlen		=	sizeof(int),
3479 		.mode		=	0644,
3480 		.proc_handler	=	proc_dointvec,
3481 	},
3482 	{
3483 		.procname	=	"mtu_expires",
3484 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3485 		.maxlen		=	sizeof(int),
3486 		.mode		=	0644,
3487 		.proc_handler	=	proc_dointvec_jiffies,
3488 	},
3489 	{
3490 		.procname	=	"min_adv_mss",
3491 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3492 		.maxlen		=	sizeof(int),
3493 		.mode		=	0644,
3494 		.proc_handler	=	proc_dointvec,
3495 	},
3496 	{
3497 		.procname	=	"gc_min_interval_ms",
3498 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3499 		.maxlen		=	sizeof(int),
3500 		.mode		=	0644,
3501 		.proc_handler	=	proc_dointvec_ms_jiffies,
3502 	},
3503 	{ }
3504 };
3505 
ipv6_route_sysctl_init(struct net * net)3506 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3507 {
3508 	struct ctl_table *table;
3509 
3510 	table = kmemdup(ipv6_route_table_template,
3511 			sizeof(ipv6_route_table_template),
3512 			GFP_KERNEL);
3513 
3514 	if (table) {
3515 		table[0].data = &net->ipv6.sysctl.flush_delay;
3516 		table[0].extra1 = net;
3517 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3518 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3519 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3520 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3521 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3522 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3523 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3524 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3525 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3526 
3527 		/* Don't export sysctls to unprivileged users */
3528 		if (net->user_ns != &init_user_ns)
3529 			table[0].procname = NULL;
3530 	}
3531 
3532 	return table;
3533 }
3534 #endif
3535 
ip6_route_net_init(struct net * net)3536 static int __net_init ip6_route_net_init(struct net *net)
3537 {
3538 	int ret = -ENOMEM;
3539 
3540 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3541 	       sizeof(net->ipv6.ip6_dst_ops));
3542 
3543 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3544 		goto out_ip6_dst_ops;
3545 
3546 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3547 					   sizeof(*net->ipv6.ip6_null_entry),
3548 					   GFP_KERNEL);
3549 	if (!net->ipv6.ip6_null_entry)
3550 		goto out_ip6_dst_entries;
3551 	net->ipv6.ip6_null_entry->dst.path =
3552 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3553 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3554 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3555 			 ip6_template_metrics, true);
3556 
3557 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3558 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3559 					       sizeof(*net->ipv6.ip6_prohibit_entry),
3560 					       GFP_KERNEL);
3561 	if (!net->ipv6.ip6_prohibit_entry)
3562 		goto out_ip6_null_entry;
3563 	net->ipv6.ip6_prohibit_entry->dst.path =
3564 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3565 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3566 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3567 			 ip6_template_metrics, true);
3568 
3569 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3570 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3571 					       GFP_KERNEL);
3572 	if (!net->ipv6.ip6_blk_hole_entry)
3573 		goto out_ip6_prohibit_entry;
3574 	net->ipv6.ip6_blk_hole_entry->dst.path =
3575 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3576 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3577 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3578 			 ip6_template_metrics, true);
3579 #endif
3580 
3581 	net->ipv6.sysctl.flush_delay = 0;
3582 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3583 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3584 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3585 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3586 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3587 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3588 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3589 
3590 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3591 
3592 	ret = 0;
3593 out:
3594 	return ret;
3595 
3596 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3597 out_ip6_prohibit_entry:
3598 	kfree(net->ipv6.ip6_prohibit_entry);
3599 out_ip6_null_entry:
3600 	kfree(net->ipv6.ip6_null_entry);
3601 #endif
3602 out_ip6_dst_entries:
3603 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3604 out_ip6_dst_ops:
3605 	goto out;
3606 }
3607 
ip6_route_net_exit(struct net * net)3608 static void __net_exit ip6_route_net_exit(struct net *net)
3609 {
3610 	kfree(net->ipv6.ip6_null_entry);
3611 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3612 	kfree(net->ipv6.ip6_prohibit_entry);
3613 	kfree(net->ipv6.ip6_blk_hole_entry);
3614 #endif
3615 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3616 }
3617 
ip6_route_net_init_late(struct net * net)3618 static int __net_init ip6_route_net_init_late(struct net *net)
3619 {
3620 #ifdef CONFIG_PROC_FS
3621 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3622 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3623 #endif
3624 	return 0;
3625 }
3626 
ip6_route_net_exit_late(struct net * net)3627 static void __net_exit ip6_route_net_exit_late(struct net *net)
3628 {
3629 #ifdef CONFIG_PROC_FS
3630 	remove_proc_entry("ipv6_route", net->proc_net);
3631 	remove_proc_entry("rt6_stats", net->proc_net);
3632 #endif
3633 }
3634 
3635 static struct pernet_operations ip6_route_net_ops = {
3636 	.init = ip6_route_net_init,
3637 	.exit = ip6_route_net_exit,
3638 };
3639 
ipv6_inetpeer_init(struct net * net)3640 static int __net_init ipv6_inetpeer_init(struct net *net)
3641 {
3642 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3643 
3644 	if (!bp)
3645 		return -ENOMEM;
3646 	inet_peer_base_init(bp);
3647 	net->ipv6.peers = bp;
3648 	return 0;
3649 }
3650 
ipv6_inetpeer_exit(struct net * net)3651 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3652 {
3653 	struct inet_peer_base *bp = net->ipv6.peers;
3654 
3655 	net->ipv6.peers = NULL;
3656 	inetpeer_invalidate_tree(bp);
3657 	kfree(bp);
3658 }
3659 
3660 static struct pernet_operations ipv6_inetpeer_ops = {
3661 	.init	=	ipv6_inetpeer_init,
3662 	.exit	=	ipv6_inetpeer_exit,
3663 };
3664 
3665 static struct pernet_operations ip6_route_net_late_ops = {
3666 	.init = ip6_route_net_init_late,
3667 	.exit = ip6_route_net_exit_late,
3668 };
3669 
3670 static struct notifier_block ip6_route_dev_notifier = {
3671 	.notifier_call = ip6_route_dev_notify,
3672 	.priority = 0,
3673 };
3674 
ip6_route_init(void)3675 int __init ip6_route_init(void)
3676 {
3677 	int ret;
3678 	int cpu;
3679 
3680 	ret = -ENOMEM;
3681 	ip6_dst_ops_template.kmem_cachep =
3682 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3683 				  SLAB_HWCACHE_ALIGN, NULL);
3684 	if (!ip6_dst_ops_template.kmem_cachep)
3685 		goto out;
3686 
3687 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3688 	if (ret)
3689 		goto out_kmem_cache;
3690 
3691 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3692 	if (ret)
3693 		goto out_dst_entries;
3694 
3695 	ret = register_pernet_subsys(&ip6_route_net_ops);
3696 	if (ret)
3697 		goto out_register_inetpeer;
3698 
3699 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3700 
3701 	/* Registering of the loopback is done before this portion of code,
3702 	 * the loopback reference in rt6_info will not be taken, do it
3703 	 * manually for init_net */
3704 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3705 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3706   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3707 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3708 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3709 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3710 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3711   #endif
3712 	ret = fib6_init();
3713 	if (ret)
3714 		goto out_register_subsys;
3715 
3716 	ret = xfrm6_init();
3717 	if (ret)
3718 		goto out_fib6_init;
3719 
3720 	ret = fib6_rules_init();
3721 	if (ret)
3722 		goto xfrm6_init;
3723 
3724 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3725 	if (ret)
3726 		goto fib6_rules_init;
3727 
3728 	ret = -ENOBUFS;
3729 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3730 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3731 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3732 		goto out_register_late_subsys;
3733 
3734 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3735 	if (ret)
3736 		goto out_register_late_subsys;
3737 
3738 	for_each_possible_cpu(cpu) {
3739 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3740 
3741 		INIT_LIST_HEAD(&ul->head);
3742 		spin_lock_init(&ul->lock);
3743 	}
3744 
3745 out:
3746 	return ret;
3747 
3748 out_register_late_subsys:
3749 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3750 fib6_rules_init:
3751 	fib6_rules_cleanup();
3752 xfrm6_init:
3753 	xfrm6_fini();
3754 out_fib6_init:
3755 	fib6_gc_cleanup();
3756 out_register_subsys:
3757 	unregister_pernet_subsys(&ip6_route_net_ops);
3758 out_register_inetpeer:
3759 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3760 out_dst_entries:
3761 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3762 out_kmem_cache:
3763 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3764 	goto out;
3765 }
3766 
ip6_route_cleanup(void)3767 void ip6_route_cleanup(void)
3768 {
3769 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3770 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3771 	fib6_rules_cleanup();
3772 	xfrm6_fini();
3773 	fib6_gc_cleanup();
3774 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3775 	unregister_pernet_subsys(&ip6_route_net_ops);
3776 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3777 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3778 }
3779