1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 enum rt6_nud_state {
69 RT6_NUD_FAIL_HARD = -3,
70 RT6_NUD_FAIL_PROBE = -2,
71 RT6_NUD_FAIL_DO_RR = -1,
72 RT6_NUD_SUCCEED = 1
73 };
74
75 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
76 const struct in6_addr *dest);
77 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
79 static unsigned int ip6_mtu(const struct dst_entry *dst);
80 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81 static void ip6_dst_destroy(struct dst_entry *);
82 static void ip6_dst_ifdown(struct dst_entry *,
83 struct net_device *dev, int how);
84 static int ip6_dst_gc(struct dst_ops *ops);
85
86 static int ip6_pkt_discard(struct sk_buff *skb);
87 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
88 static int ip6_pkt_prohibit(struct sk_buff *skb);
89 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
90 static void ip6_link_failure(struct sk_buff *skb);
91 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92 struct sk_buff *skb, u32 mtu);
93 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
94 struct sk_buff *skb);
95 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
96
97 #ifdef CONFIG_IPV6_ROUTE_INFO
98 static struct rt6_info *rt6_add_route_info(struct net *net,
99 const struct in6_addr *prefix, int prefixlen,
100 const struct in6_addr *gwaddr, int ifindex,
101 unsigned int pref);
102 static struct rt6_info *rt6_get_route_info(struct net *net,
103 const struct in6_addr *prefix, int prefixlen,
104 const struct in6_addr *gwaddr, int ifindex);
105 #endif
106
rt6_bind_peer(struct rt6_info * rt,int create)107 static void rt6_bind_peer(struct rt6_info *rt, int create)
108 {
109 struct inet_peer_base *base;
110 struct inet_peer *peer;
111
112 base = inetpeer_base_ptr(rt->_rt6i_peer);
113 if (!base)
114 return;
115
116 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
117 if (peer) {
118 if (!rt6_set_peer(rt, peer))
119 inet_putpeer(peer);
120 }
121 }
122
__rt6_get_peer(struct rt6_info * rt,int create)123 static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create)
124 {
125 if (rt6_has_peer(rt))
126 return rt6_peer_ptr(rt);
127
128 rt6_bind_peer(rt, create);
129 return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL);
130 }
131
rt6_get_peer_create(struct rt6_info * rt)132 static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt)
133 {
134 return __rt6_get_peer(rt, 1);
135 }
136
ipv6_cow_metrics(struct dst_entry * dst,unsigned long old)137 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
138 {
139 struct rt6_info *rt = (struct rt6_info *) dst;
140 struct inet_peer *peer;
141 u32 *p = NULL;
142
143 if (!(rt->dst.flags & DST_HOST))
144 return dst_cow_metrics_generic(dst, old);
145
146 peer = rt6_get_peer_create(rt);
147 if (peer) {
148 u32 *old_p = __DST_METRICS_PTR(old);
149 unsigned long prev, new;
150
151 p = peer->metrics;
152 if (inet_metrics_new(peer) ||
153 (old & DST_METRICS_FORCE_OVERWRITE))
154 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
155
156 new = (unsigned long) p;
157 prev = cmpxchg(&dst->_metrics, old, new);
158
159 if (prev != old) {
160 p = __DST_METRICS_PTR(prev);
161 if (prev & DST_METRICS_READ_ONLY)
162 p = NULL;
163 }
164 }
165 return p;
166 }
167
choose_neigh_daddr(struct rt6_info * rt,struct sk_buff * skb,const void * daddr)168 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
169 struct sk_buff *skb,
170 const void *daddr)
171 {
172 struct in6_addr *p = &rt->rt6i_gateway;
173
174 if (!ipv6_addr_any(p))
175 return (const void *) p;
176 else if (skb)
177 return &ipv6_hdr(skb)->daddr;
178 return daddr;
179 }
180
ip6_neigh_lookup(const struct dst_entry * dst,struct sk_buff * skb,const void * daddr)181 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
182 struct sk_buff *skb,
183 const void *daddr)
184 {
185 struct rt6_info *rt = (struct rt6_info *) dst;
186 struct neighbour *n;
187
188 daddr = choose_neigh_daddr(rt, skb, daddr);
189 n = __ipv6_neigh_lookup(dst->dev, daddr);
190 if (n)
191 return n;
192 return neigh_create(&nd_tbl, daddr, dst->dev);
193 }
194
195 static struct dst_ops ip6_dst_ops_template = {
196 .family = AF_INET6,
197 .gc = ip6_dst_gc,
198 .gc_thresh = 1024,
199 .check = ip6_dst_check,
200 .default_advmss = ip6_default_advmss,
201 .mtu = ip6_mtu,
202 .cow_metrics = ipv6_cow_metrics,
203 .destroy = ip6_dst_destroy,
204 .ifdown = ip6_dst_ifdown,
205 .negative_advice = ip6_negative_advice,
206 .link_failure = ip6_link_failure,
207 .update_pmtu = ip6_rt_update_pmtu,
208 .redirect = rt6_do_redirect,
209 .local_out = __ip6_local_out,
210 .neigh_lookup = ip6_neigh_lookup,
211 };
212
ip6_blackhole_mtu(const struct dst_entry * dst)213 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
214 {
215 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
216
217 return mtu ? : dst->dev->mtu;
218 }
219
ip6_rt_blackhole_update_pmtu(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb,u32 mtu)220 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
221 struct sk_buff *skb, u32 mtu)
222 {
223 }
224
ip6_rt_blackhole_redirect(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb)225 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
226 struct sk_buff *skb)
227 {
228 }
229
ip6_rt_blackhole_cow_metrics(struct dst_entry * dst,unsigned long old)230 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
231 unsigned long old)
232 {
233 return NULL;
234 }
235
236 static struct dst_ops ip6_dst_blackhole_ops = {
237 .family = AF_INET6,
238 .destroy = ip6_dst_destroy,
239 .check = ip6_dst_check,
240 .mtu = ip6_blackhole_mtu,
241 .default_advmss = ip6_default_advmss,
242 .update_pmtu = ip6_rt_blackhole_update_pmtu,
243 .redirect = ip6_rt_blackhole_redirect,
244 .cow_metrics = ip6_rt_blackhole_cow_metrics,
245 .neigh_lookup = ip6_neigh_lookup,
246 };
247
248 static const u32 ip6_template_metrics[RTAX_MAX] = {
249 [RTAX_HOPLIMIT - 1] = 0,
250 };
251
252 static const struct rt6_info ip6_null_entry_template = {
253 .dst = {
254 .__refcnt = ATOMIC_INIT(1),
255 .__use = 1,
256 .obsolete = DST_OBSOLETE_FORCE_CHK,
257 .error = -ENETUNREACH,
258 .input = ip6_pkt_discard,
259 .output = ip6_pkt_discard_out,
260 },
261 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
262 .rt6i_protocol = RTPROT_KERNEL,
263 .rt6i_metric = ~(u32) 0,
264 .rt6i_ref = ATOMIC_INIT(1),
265 };
266
267 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
268
269 static const struct rt6_info ip6_prohibit_entry_template = {
270 .dst = {
271 .__refcnt = ATOMIC_INIT(1),
272 .__use = 1,
273 .obsolete = DST_OBSOLETE_FORCE_CHK,
274 .error = -EACCES,
275 .input = ip6_pkt_prohibit,
276 .output = ip6_pkt_prohibit_out,
277 },
278 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
279 .rt6i_protocol = RTPROT_KERNEL,
280 .rt6i_metric = ~(u32) 0,
281 .rt6i_ref = ATOMIC_INIT(1),
282 };
283
284 static const struct rt6_info ip6_blk_hole_entry_template = {
285 .dst = {
286 .__refcnt = ATOMIC_INIT(1),
287 .__use = 1,
288 .obsolete = DST_OBSOLETE_FORCE_CHK,
289 .error = -EINVAL,
290 .input = dst_discard,
291 .output = dst_discard_sk,
292 },
293 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
294 .rt6i_protocol = RTPROT_KERNEL,
295 .rt6i_metric = ~(u32) 0,
296 .rt6i_ref = ATOMIC_INIT(1),
297 };
298
299 #endif
300
301 /* allocate dst with ip6_dst_ops */
ip6_dst_alloc(struct net * net,struct net_device * dev,int flags,struct fib6_table * table)302 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
303 struct net_device *dev,
304 int flags,
305 struct fib6_table *table)
306 {
307 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
308 0, DST_OBSOLETE_FORCE_CHK, flags);
309
310 if (rt) {
311 struct dst_entry *dst = &rt->dst;
312
313 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
314 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
315 INIT_LIST_HEAD(&rt->rt6i_siblings);
316 }
317 return rt;
318 }
319
ip6_dst_destroy(struct dst_entry * dst)320 static void ip6_dst_destroy(struct dst_entry *dst)
321 {
322 struct rt6_info *rt = (struct rt6_info *)dst;
323 struct inet6_dev *idev = rt->rt6i_idev;
324 struct dst_entry *from = dst->from;
325
326 if (!(rt->dst.flags & DST_HOST))
327 dst_destroy_metrics_generic(dst);
328
329 if (idev) {
330 rt->rt6i_idev = NULL;
331 in6_dev_put(idev);
332 }
333
334 dst->from = NULL;
335 dst_release(from);
336
337 if (rt6_has_peer(rt)) {
338 struct inet_peer *peer = rt6_peer_ptr(rt);
339 inet_putpeer(peer);
340 }
341 }
342
ip6_dst_ifdown(struct dst_entry * dst,struct net_device * dev,int how)343 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
344 int how)
345 {
346 struct rt6_info *rt = (struct rt6_info *)dst;
347 struct inet6_dev *idev = rt->rt6i_idev;
348 struct net_device *loopback_dev =
349 dev_net(dev)->loopback_dev;
350
351 if (dev != loopback_dev) {
352 if (idev && idev->dev == dev) {
353 struct inet6_dev *loopback_idev =
354 in6_dev_get(loopback_dev);
355 if (loopback_idev) {
356 rt->rt6i_idev = loopback_idev;
357 in6_dev_put(idev);
358 }
359 }
360 }
361 }
362
rt6_check_expired(const struct rt6_info * rt)363 static bool rt6_check_expired(const struct rt6_info *rt)
364 {
365 if (rt->rt6i_flags & RTF_EXPIRES) {
366 if (time_after(jiffies, rt->dst.expires))
367 return true;
368 } else if (rt->dst.from) {
369 return rt6_check_expired((struct rt6_info *) rt->dst.from);
370 }
371 return false;
372 }
373
374 /* Multipath route selection:
375 * Hash based function using packet header and flowlabel.
376 * Adapted from fib_info_hashfn()
377 */
rt6_info_hash_nhsfn(unsigned int candidate_count,const struct flowi6 * fl6)378 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
379 const struct flowi6 *fl6)
380 {
381 unsigned int val = fl6->flowi6_proto;
382
383 val ^= ipv6_addr_hash(&fl6->daddr);
384 val ^= ipv6_addr_hash(&fl6->saddr);
385
386 /* Work only if this not encapsulated */
387 switch (fl6->flowi6_proto) {
388 case IPPROTO_UDP:
389 case IPPROTO_TCP:
390 case IPPROTO_SCTP:
391 val ^= (__force u16)fl6->fl6_sport;
392 val ^= (__force u16)fl6->fl6_dport;
393 break;
394
395 case IPPROTO_ICMPV6:
396 val ^= (__force u16)fl6->fl6_icmp_type;
397 val ^= (__force u16)fl6->fl6_icmp_code;
398 break;
399 }
400 /* RFC6438 recommands to use flowlabel */
401 val ^= (__force u32)fl6->flowlabel;
402
403 /* Perhaps, we need to tune, this function? */
404 val = val ^ (val >> 7) ^ (val >> 12);
405 return val % candidate_count;
406 }
407
rt6_multipath_select(struct rt6_info * match,struct flowi6 * fl6,int oif,int strict)408 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
409 struct flowi6 *fl6, int oif,
410 int strict)
411 {
412 struct rt6_info *sibling, *next_sibling;
413 int route_choosen;
414
415 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
416 /* Don't change the route, if route_choosen == 0
417 * (siblings does not include ourself)
418 */
419 if (route_choosen)
420 list_for_each_entry_safe(sibling, next_sibling,
421 &match->rt6i_siblings, rt6i_siblings) {
422 route_choosen--;
423 if (route_choosen == 0) {
424 if (rt6_score_route(sibling, oif, strict) < 0)
425 break;
426 match = sibling;
427 break;
428 }
429 }
430 return match;
431 }
432
433 /*
434 * Route lookup. Any table->tb6_lock is implied.
435 */
436
rt6_device_match(struct net * net,struct rt6_info * rt,const struct in6_addr * saddr,int oif,int flags)437 static inline struct rt6_info *rt6_device_match(struct net *net,
438 struct rt6_info *rt,
439 const struct in6_addr *saddr,
440 int oif,
441 int flags)
442 {
443 struct rt6_info *local = NULL;
444 struct rt6_info *sprt;
445
446 if (!oif && ipv6_addr_any(saddr))
447 goto out;
448
449 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
450 struct net_device *dev = sprt->dst.dev;
451
452 if (oif) {
453 if (dev->ifindex == oif)
454 return sprt;
455 if (dev->flags & IFF_LOOPBACK) {
456 if (!sprt->rt6i_idev ||
457 sprt->rt6i_idev->dev->ifindex != oif) {
458 if (flags & RT6_LOOKUP_F_IFACE && oif)
459 continue;
460 if (local && (!oif ||
461 local->rt6i_idev->dev->ifindex == oif))
462 continue;
463 }
464 local = sprt;
465 }
466 } else {
467 if (ipv6_chk_addr(net, saddr, dev,
468 flags & RT6_LOOKUP_F_IFACE))
469 return sprt;
470 }
471 }
472
473 if (oif) {
474 if (local)
475 return local;
476
477 if (flags & RT6_LOOKUP_F_IFACE)
478 return net->ipv6.ip6_null_entry;
479 }
480 out:
481 return rt;
482 }
483
484 #ifdef CONFIG_IPV6_ROUTER_PREF
485 struct __rt6_probe_work {
486 struct work_struct work;
487 struct in6_addr target;
488 struct net_device *dev;
489 };
490
rt6_probe_deferred(struct work_struct * w)491 static void rt6_probe_deferred(struct work_struct *w)
492 {
493 struct in6_addr mcaddr;
494 struct __rt6_probe_work *work =
495 container_of(w, struct __rt6_probe_work, work);
496
497 addrconf_addr_solict_mult(&work->target, &mcaddr);
498 ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
499 dev_put(work->dev);
500 kfree(work);
501 }
502
rt6_probe(struct rt6_info * rt)503 static void rt6_probe(struct rt6_info *rt)
504 {
505 struct neighbour *neigh;
506 /*
507 * Okay, this does not seem to be appropriate
508 * for now, however, we need to check if it
509 * is really so; aka Router Reachability Probing.
510 *
511 * Router Reachability Probe MUST be rate-limited
512 * to no more than one per minute.
513 */
514 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
515 return;
516 rcu_read_lock_bh();
517 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
518 if (neigh) {
519 write_lock(&neigh->lock);
520 if (neigh->nud_state & NUD_VALID)
521 goto out;
522 }
523
524 if (!neigh ||
525 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
526 struct __rt6_probe_work *work;
527
528 work = kmalloc(sizeof(*work), GFP_ATOMIC);
529
530 if (neigh && work)
531 __neigh_set_probe_once(neigh);
532
533 if (neigh)
534 write_unlock(&neigh->lock);
535
536 if (work) {
537 INIT_WORK(&work->work, rt6_probe_deferred);
538 work->target = rt->rt6i_gateway;
539 dev_hold(rt->dst.dev);
540 work->dev = rt->dst.dev;
541 schedule_work(&work->work);
542 }
543 } else {
544 out:
545 write_unlock(&neigh->lock);
546 }
547 rcu_read_unlock_bh();
548 }
549 #else
rt6_probe(struct rt6_info * rt)550 static inline void rt6_probe(struct rt6_info *rt)
551 {
552 }
553 #endif
554
555 /*
556 * Default Router Selection (RFC 2461 6.3.6)
557 */
rt6_check_dev(struct rt6_info * rt,int oif)558 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
559 {
560 struct net_device *dev = rt->dst.dev;
561 if (!oif || dev->ifindex == oif)
562 return 2;
563 if ((dev->flags & IFF_LOOPBACK) &&
564 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
565 return 1;
566 return 0;
567 }
568
rt6_check_neigh(struct rt6_info * rt)569 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
570 {
571 struct neighbour *neigh;
572 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
573
574 if (rt->rt6i_flags & RTF_NONEXTHOP ||
575 !(rt->rt6i_flags & RTF_GATEWAY))
576 return RT6_NUD_SUCCEED;
577
578 rcu_read_lock_bh();
579 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
580 if (neigh) {
581 read_lock(&neigh->lock);
582 if (neigh->nud_state & NUD_VALID)
583 ret = RT6_NUD_SUCCEED;
584 #ifdef CONFIG_IPV6_ROUTER_PREF
585 else if (!(neigh->nud_state & NUD_FAILED))
586 ret = RT6_NUD_SUCCEED;
587 else
588 ret = RT6_NUD_FAIL_PROBE;
589 #endif
590 read_unlock(&neigh->lock);
591 } else {
592 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
593 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
594 }
595 rcu_read_unlock_bh();
596
597 return ret;
598 }
599
rt6_score_route(struct rt6_info * rt,int oif,int strict)600 static int rt6_score_route(struct rt6_info *rt, int oif,
601 int strict)
602 {
603 int m;
604
605 m = rt6_check_dev(rt, oif);
606 if (!m && (strict & RT6_LOOKUP_F_IFACE))
607 return RT6_NUD_FAIL_HARD;
608 #ifdef CONFIG_IPV6_ROUTER_PREF
609 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
610 #endif
611 if (strict & RT6_LOOKUP_F_REACHABLE) {
612 int n = rt6_check_neigh(rt);
613 if (n < 0)
614 return n;
615 }
616 return m;
617 }
618
find_match(struct rt6_info * rt,int oif,int strict,int * mpri,struct rt6_info * match,bool * do_rr)619 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
620 int *mpri, struct rt6_info *match,
621 bool *do_rr)
622 {
623 int m;
624 bool match_do_rr = false;
625
626 if (rt6_check_expired(rt))
627 goto out;
628
629 m = rt6_score_route(rt, oif, strict);
630 if (m == RT6_NUD_FAIL_DO_RR) {
631 match_do_rr = true;
632 m = 0; /* lowest valid score */
633 } else if (m == RT6_NUD_FAIL_HARD) {
634 goto out;
635 }
636
637 if (strict & RT6_LOOKUP_F_REACHABLE)
638 rt6_probe(rt);
639
640 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
641 if (m > *mpri) {
642 *do_rr = match_do_rr;
643 *mpri = m;
644 match = rt;
645 }
646 out:
647 return match;
648 }
649
find_rr_leaf(struct fib6_node * fn,struct rt6_info * rr_head,u32 metric,int oif,int strict,bool * do_rr)650 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
651 struct rt6_info *rr_head,
652 u32 metric, int oif, int strict,
653 bool *do_rr)
654 {
655 struct rt6_info *rt, *match;
656 int mpri = -1;
657
658 match = NULL;
659 for (rt = rr_head; rt && rt->rt6i_metric == metric;
660 rt = rt->dst.rt6_next)
661 match = find_match(rt, oif, strict, &mpri, match, do_rr);
662 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
663 rt = rt->dst.rt6_next)
664 match = find_match(rt, oif, strict, &mpri, match, do_rr);
665
666 return match;
667 }
668
rt6_select(struct fib6_node * fn,int oif,int strict)669 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
670 {
671 struct rt6_info *match, *rt0;
672 struct net *net;
673 bool do_rr = false;
674
675 rt0 = fn->rr_ptr;
676 if (!rt0)
677 fn->rr_ptr = rt0 = fn->leaf;
678
679 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
680 &do_rr);
681
682 if (do_rr) {
683 struct rt6_info *next = rt0->dst.rt6_next;
684
685 /* no entries matched; do round-robin */
686 if (!next || next->rt6i_metric != rt0->rt6i_metric)
687 next = fn->leaf;
688
689 if (next != rt0)
690 fn->rr_ptr = next;
691 }
692
693 net = dev_net(rt0->dst.dev);
694 return match ? match : net->ipv6.ip6_null_entry;
695 }
696
697 #ifdef CONFIG_IPV6_ROUTE_INFO
rt6_route_rcv(struct net_device * dev,u8 * opt,int len,const struct in6_addr * gwaddr)698 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
699 const struct in6_addr *gwaddr)
700 {
701 struct net *net = dev_net(dev);
702 struct route_info *rinfo = (struct route_info *) opt;
703 struct in6_addr prefix_buf, *prefix;
704 unsigned int pref;
705 unsigned long lifetime;
706 struct rt6_info *rt;
707
708 if (len < sizeof(struct route_info)) {
709 return -EINVAL;
710 }
711
712 /* Sanity check for prefix_len and length */
713 if (rinfo->length > 3) {
714 return -EINVAL;
715 } else if (rinfo->prefix_len > 128) {
716 return -EINVAL;
717 } else if (rinfo->prefix_len > 64) {
718 if (rinfo->length < 2) {
719 return -EINVAL;
720 }
721 } else if (rinfo->prefix_len > 0) {
722 if (rinfo->length < 1) {
723 return -EINVAL;
724 }
725 }
726
727 pref = rinfo->route_pref;
728 if (pref == ICMPV6_ROUTER_PREF_INVALID)
729 return -EINVAL;
730
731 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
732
733 if (rinfo->length == 3)
734 prefix = (struct in6_addr *)rinfo->prefix;
735 else {
736 /* this function is safe */
737 ipv6_addr_prefix(&prefix_buf,
738 (struct in6_addr *)rinfo->prefix,
739 rinfo->prefix_len);
740 prefix = &prefix_buf;
741 }
742
743 if (rinfo->prefix_len == 0)
744 rt = rt6_get_dflt_router(gwaddr, dev);
745 else
746 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
747 gwaddr, dev->ifindex);
748
749 if (rt && !lifetime) {
750 ip6_del_rt(rt);
751 rt = NULL;
752 }
753
754 if (!rt && lifetime)
755 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
756 pref);
757 else if (rt)
758 rt->rt6i_flags = RTF_ROUTEINFO |
759 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
760
761 if (rt) {
762 if (!addrconf_finite_timeout(lifetime))
763 rt6_clean_expires(rt);
764 else
765 rt6_set_expires(rt, jiffies + HZ * lifetime);
766
767 ip6_rt_put(rt);
768 }
769 return 0;
770 }
771 #endif
772
fib6_backtrack(struct fib6_node * fn,struct in6_addr * saddr)773 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
774 struct in6_addr *saddr)
775 {
776 struct fib6_node *pn;
777 while (1) {
778 if (fn->fn_flags & RTN_TL_ROOT)
779 return NULL;
780 pn = fn->parent;
781 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
782 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
783 else
784 fn = pn;
785 if (fn->fn_flags & RTN_RTINFO)
786 return fn;
787 }
788 }
789
ip6_pol_route_lookup(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)790 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
791 struct fib6_table *table,
792 struct flowi6 *fl6, int flags)
793 {
794 struct fib6_node *fn;
795 struct rt6_info *rt;
796
797 read_lock_bh(&table->tb6_lock);
798 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
799 restart:
800 rt = fn->leaf;
801 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
802 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
803 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
804 if (rt == net->ipv6.ip6_null_entry) {
805 fn = fib6_backtrack(fn, &fl6->saddr);
806 if (fn)
807 goto restart;
808 }
809 dst_use(&rt->dst, jiffies);
810 read_unlock_bh(&table->tb6_lock);
811 return rt;
812
813 }
814
ip6_route_lookup(struct net * net,struct flowi6 * fl6,int flags)815 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
816 int flags)
817 {
818 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
819 }
820 EXPORT_SYMBOL_GPL(ip6_route_lookup);
821
rt6_lookup(struct net * net,const struct in6_addr * daddr,const struct in6_addr * saddr,int oif,int strict)822 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
823 const struct in6_addr *saddr, int oif, int strict)
824 {
825 struct flowi6 fl6 = {
826 .flowi6_oif = oif,
827 .daddr = *daddr,
828 };
829 struct dst_entry *dst;
830 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
831
832 if (saddr) {
833 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
834 flags |= RT6_LOOKUP_F_HAS_SADDR;
835 }
836
837 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
838 if (dst->error == 0)
839 return (struct rt6_info *) dst;
840
841 dst_release(dst);
842
843 return NULL;
844 }
845 EXPORT_SYMBOL(rt6_lookup);
846
847 /* ip6_ins_rt is called with FREE table->tb6_lock.
848 It takes new route entry, the addition fails by any reason the
849 route is freed. In any case, if caller does not hold it, it may
850 be destroyed.
851 */
852
__ip6_ins_rt(struct rt6_info * rt,struct nl_info * info,struct mx6_config * mxc)853 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
854 struct mx6_config *mxc)
855 {
856 int err;
857 struct fib6_table *table;
858
859 table = rt->rt6i_table;
860 write_lock_bh(&table->tb6_lock);
861 err = fib6_add(&table->tb6_root, rt, info, mxc);
862 write_unlock_bh(&table->tb6_lock);
863
864 return err;
865 }
866
ip6_ins_rt(struct rt6_info * rt)867 int ip6_ins_rt(struct rt6_info *rt)
868 {
869 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
870 struct mx6_config mxc = { .mx = NULL, };
871
872 return __ip6_ins_rt(rt, &info, &mxc);
873 }
874
rt6_alloc_cow(struct rt6_info * ort,const struct in6_addr * daddr,const struct in6_addr * saddr)875 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
876 const struct in6_addr *daddr,
877 const struct in6_addr *saddr)
878 {
879 struct rt6_info *rt;
880
881 /*
882 * Clone the route.
883 */
884
885 rt = ip6_rt_copy(ort, daddr);
886
887 if (rt) {
888 if (ort->rt6i_dst.plen != 128 &&
889 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
890 rt->rt6i_flags |= RTF_ANYCAST;
891
892 rt->rt6i_flags |= RTF_CACHE;
893
894 #ifdef CONFIG_IPV6_SUBTREES
895 if (rt->rt6i_src.plen && saddr) {
896 rt->rt6i_src.addr = *saddr;
897 rt->rt6i_src.plen = 128;
898 }
899 #endif
900 }
901
902 return rt;
903 }
904
rt6_alloc_clone(struct rt6_info * ort,const struct in6_addr * daddr)905 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
906 const struct in6_addr *daddr)
907 {
908 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
909
910 if (rt)
911 rt->rt6i_flags |= RTF_CACHE;
912 return rt;
913 }
914
ip6_pol_route(struct net * net,struct fib6_table * table,int oif,struct flowi6 * fl6,int flags)915 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
916 struct flowi6 *fl6, int flags)
917 {
918 struct fib6_node *fn, *saved_fn;
919 struct rt6_info *rt, *nrt;
920 int strict = 0;
921 int attempts = 3;
922 int err;
923
924 strict |= flags & RT6_LOOKUP_F_IFACE;
925 if (net->ipv6.devconf_all->forwarding == 0)
926 strict |= RT6_LOOKUP_F_REACHABLE;
927
928 redo_fib6_lookup_lock:
929 read_lock_bh(&table->tb6_lock);
930
931 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
932 saved_fn = fn;
933
934 redo_rt6_select:
935 rt = rt6_select(fn, oif, strict);
936 if (rt->rt6i_nsiblings)
937 rt = rt6_multipath_select(rt, fl6, oif, strict);
938 if (rt == net->ipv6.ip6_null_entry) {
939 fn = fib6_backtrack(fn, &fl6->saddr);
940 if (fn)
941 goto redo_rt6_select;
942 else if (strict & RT6_LOOKUP_F_REACHABLE) {
943 /* also consider unreachable route */
944 strict &= ~RT6_LOOKUP_F_REACHABLE;
945 fn = saved_fn;
946 goto redo_rt6_select;
947 } else {
948 dst_hold(&rt->dst);
949 read_unlock_bh(&table->tb6_lock);
950 goto out2;
951 }
952 }
953
954 dst_hold(&rt->dst);
955 read_unlock_bh(&table->tb6_lock);
956
957 if (rt->rt6i_flags & RTF_CACHE)
958 goto out2;
959
960 if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
961 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
962 else if (!(rt->dst.flags & DST_HOST))
963 nrt = rt6_alloc_clone(rt, &fl6->daddr);
964 else
965 goto out2;
966
967 ip6_rt_put(rt);
968 rt = nrt ? : net->ipv6.ip6_null_entry;
969
970 dst_hold(&rt->dst);
971 if (nrt) {
972 err = ip6_ins_rt(nrt);
973 if (!err)
974 goto out2;
975 }
976
977 if (--attempts <= 0)
978 goto out2;
979
980 /*
981 * Race condition! In the gap, when table->tb6_lock was
982 * released someone could insert this route. Relookup.
983 */
984 ip6_rt_put(rt);
985 goto redo_fib6_lookup_lock;
986
987 out2:
988 rt->dst.lastuse = jiffies;
989 rt->dst.__use++;
990
991 return rt;
992 }
993
ip6_pol_route_input(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)994 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
995 struct flowi6 *fl6, int flags)
996 {
997 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
998 }
999
ip6_route_input_lookup(struct net * net,struct net_device * dev,struct flowi6 * fl6,int flags)1000 static struct dst_entry *ip6_route_input_lookup(struct net *net,
1001 struct net_device *dev,
1002 struct flowi6 *fl6, int flags)
1003 {
1004 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1005 flags |= RT6_LOOKUP_F_IFACE;
1006
1007 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1008 }
1009
ip6_route_input(struct sk_buff * skb)1010 void ip6_route_input(struct sk_buff *skb)
1011 {
1012 const struct ipv6hdr *iph = ipv6_hdr(skb);
1013 struct net *net = dev_net(skb->dev);
1014 int flags = RT6_LOOKUP_F_HAS_SADDR;
1015 struct flowi6 fl6 = {
1016 .flowi6_iif = skb->dev->ifindex,
1017 .daddr = iph->daddr,
1018 .saddr = iph->saddr,
1019 .flowlabel = ip6_flowinfo(iph),
1020 .flowi6_mark = skb->mark,
1021 .flowi6_proto = iph->nexthdr,
1022 };
1023
1024 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1025 }
1026
ip6_pol_route_output(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)1027 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1028 struct flowi6 *fl6, int flags)
1029 {
1030 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1031 }
1032
ip6_route_output_flags(struct net * net,const struct sock * sk,struct flowi6 * fl6,int flags)1033 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1034 struct flowi6 *fl6, int flags)
1035 {
1036 fl6->flowi6_iif = LOOPBACK_IFINDEX;
1037
1038 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1039 flags |= RT6_LOOKUP_F_IFACE;
1040
1041 if (!ipv6_addr_any(&fl6->saddr))
1042 flags |= RT6_LOOKUP_F_HAS_SADDR;
1043 else if (sk)
1044 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1045
1046 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1047 }
1048 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1049
ip6_blackhole_route(struct net * net,struct dst_entry * dst_orig)1050 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1051 {
1052 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1053 struct dst_entry *new = NULL;
1054
1055 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1056 if (rt) {
1057 new = &rt->dst;
1058
1059 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1060 rt6_init_peer(rt, net->ipv6.peers);
1061
1062 new->__use = 1;
1063 new->input = dst_discard;
1064 new->output = dst_discard_sk;
1065
1066 if (dst_metrics_read_only(&ort->dst))
1067 new->_metrics = ort->dst._metrics;
1068 else
1069 dst_copy_metrics(new, &ort->dst);
1070 rt->rt6i_idev = ort->rt6i_idev;
1071 if (rt->rt6i_idev)
1072 in6_dev_hold(rt->rt6i_idev);
1073
1074 rt->rt6i_gateway = ort->rt6i_gateway;
1075 rt->rt6i_flags = ort->rt6i_flags;
1076 rt->rt6i_metric = 0;
1077
1078 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1079 #ifdef CONFIG_IPV6_SUBTREES
1080 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1081 #endif
1082
1083 dst_free(new);
1084 }
1085
1086 dst_release(dst_orig);
1087 return new ? new : ERR_PTR(-ENOMEM);
1088 }
1089
1090 /*
1091 * Destination cache support functions
1092 */
1093
ip6_dst_check(struct dst_entry * dst,u32 cookie)1094 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1095 {
1096 struct rt6_info *rt;
1097
1098 rt = (struct rt6_info *) dst;
1099
1100 /* All IPV6 dsts are created with ->obsolete set to the value
1101 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1102 * into this function always.
1103 */
1104 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1105 return NULL;
1106
1107 if (rt6_check_expired(rt))
1108 return NULL;
1109
1110 return dst;
1111 }
1112
ip6_negative_advice(struct dst_entry * dst)1113 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1114 {
1115 struct rt6_info *rt = (struct rt6_info *) dst;
1116
1117 if (rt) {
1118 if (rt->rt6i_flags & RTF_CACHE) {
1119 if (rt6_check_expired(rt)) {
1120 ip6_del_rt(rt);
1121 dst = NULL;
1122 }
1123 } else {
1124 dst_release(dst);
1125 dst = NULL;
1126 }
1127 }
1128 return dst;
1129 }
1130
ip6_link_failure(struct sk_buff * skb)1131 static void ip6_link_failure(struct sk_buff *skb)
1132 {
1133 struct rt6_info *rt;
1134
1135 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1136
1137 rt = (struct rt6_info *) skb_dst(skb);
1138 if (rt) {
1139 if (rt->rt6i_flags & RTF_CACHE) {
1140 dst_hold(&rt->dst);
1141 if (ip6_del_rt(rt))
1142 dst_free(&rt->dst);
1143 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1144 rt->rt6i_node->fn_sernum = -1;
1145 }
1146 }
1147 }
1148
ip6_rt_update_pmtu(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb,u32 mtu)1149 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1150 struct sk_buff *skb, u32 mtu)
1151 {
1152 struct rt6_info *rt6 = (struct rt6_info *)dst;
1153
1154 dst_confirm(dst);
1155 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1156 struct net *net = dev_net(dst->dev);
1157
1158 rt6->rt6i_flags |= RTF_MODIFIED;
1159 if (mtu < IPV6_MIN_MTU)
1160 mtu = IPV6_MIN_MTU;
1161
1162 dst_metric_set(dst, RTAX_MTU, mtu);
1163 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1164 }
1165 }
1166
ip6_update_pmtu(struct sk_buff * skb,struct net * net,__be32 mtu,int oif,u32 mark)1167 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1168 int oif, u32 mark)
1169 {
1170 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1171 struct dst_entry *dst;
1172 struct flowi6 fl6;
1173
1174 memset(&fl6, 0, sizeof(fl6));
1175 fl6.flowi6_oif = oif;
1176 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1177 fl6.daddr = iph->daddr;
1178 fl6.saddr = iph->saddr;
1179 fl6.flowlabel = ip6_flowinfo(iph);
1180
1181 dst = ip6_route_output(net, NULL, &fl6);
1182 if (!dst->error)
1183 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1184 dst_release(dst);
1185 }
1186 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1187
ip6_sk_update_pmtu(struct sk_buff * skb,struct sock * sk,__be32 mtu)1188 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1189 {
1190 ip6_update_pmtu(skb, sock_net(sk), mtu,
1191 sk->sk_bound_dev_if, sk->sk_mark);
1192 }
1193 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1194
1195 /* Handle redirects */
1196 struct ip6rd_flowi {
1197 struct flowi6 fl6;
1198 struct in6_addr gateway;
1199 };
1200
__ip6_route_redirect(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)1201 static struct rt6_info *__ip6_route_redirect(struct net *net,
1202 struct fib6_table *table,
1203 struct flowi6 *fl6,
1204 int flags)
1205 {
1206 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1207 struct rt6_info *rt;
1208 struct fib6_node *fn;
1209
1210 /* Get the "current" route for this destination and
1211 * check if the redirect has come from approriate router.
1212 *
1213 * RFC 4861 specifies that redirects should only be
1214 * accepted if they come from the nexthop to the target.
1215 * Due to the way the routes are chosen, this notion
1216 * is a bit fuzzy and one might need to check all possible
1217 * routes.
1218 */
1219
1220 read_lock_bh(&table->tb6_lock);
1221 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1222 restart:
1223 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1224 if (rt6_check_expired(rt))
1225 continue;
1226 if (rt->dst.error)
1227 break;
1228 if (!(rt->rt6i_flags & RTF_GATEWAY))
1229 continue;
1230 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1231 continue;
1232 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1233 continue;
1234 break;
1235 }
1236
1237 if (!rt)
1238 rt = net->ipv6.ip6_null_entry;
1239 else if (rt->dst.error) {
1240 rt = net->ipv6.ip6_null_entry;
1241 goto out;
1242 }
1243
1244 if (rt == net->ipv6.ip6_null_entry) {
1245 fn = fib6_backtrack(fn, &fl6->saddr);
1246 if (fn)
1247 goto restart;
1248 }
1249
1250 out:
1251 dst_hold(&rt->dst);
1252
1253 read_unlock_bh(&table->tb6_lock);
1254
1255 return rt;
1256 };
1257
ip6_route_redirect(struct net * net,const struct flowi6 * fl6,const struct in6_addr * gateway)1258 static struct dst_entry *ip6_route_redirect(struct net *net,
1259 const struct flowi6 *fl6,
1260 const struct in6_addr *gateway)
1261 {
1262 int flags = RT6_LOOKUP_F_HAS_SADDR;
1263 struct ip6rd_flowi rdfl;
1264
1265 rdfl.fl6 = *fl6;
1266 rdfl.gateway = *gateway;
1267
1268 return fib6_rule_lookup(net, &rdfl.fl6,
1269 flags, __ip6_route_redirect);
1270 }
1271
ip6_redirect(struct sk_buff * skb,struct net * net,int oif,u32 mark)1272 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1273 {
1274 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1275 struct dst_entry *dst;
1276 struct flowi6 fl6;
1277
1278 memset(&fl6, 0, sizeof(fl6));
1279 fl6.flowi6_iif = LOOPBACK_IFINDEX;
1280 fl6.flowi6_oif = oif;
1281 fl6.flowi6_mark = mark;
1282 fl6.daddr = iph->daddr;
1283 fl6.saddr = iph->saddr;
1284 fl6.flowlabel = ip6_flowinfo(iph);
1285
1286 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1287 rt6_do_redirect(dst, NULL, skb);
1288 dst_release(dst);
1289 }
1290 EXPORT_SYMBOL_GPL(ip6_redirect);
1291
ip6_redirect_no_header(struct sk_buff * skb,struct net * net,int oif,u32 mark)1292 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1293 u32 mark)
1294 {
1295 const struct ipv6hdr *iph = ipv6_hdr(skb);
1296 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1297 struct dst_entry *dst;
1298 struct flowi6 fl6;
1299
1300 memset(&fl6, 0, sizeof(fl6));
1301 fl6.flowi6_iif = LOOPBACK_IFINDEX;
1302 fl6.flowi6_oif = oif;
1303 fl6.flowi6_mark = mark;
1304 fl6.daddr = msg->dest;
1305 fl6.saddr = iph->daddr;
1306
1307 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1308 rt6_do_redirect(dst, NULL, skb);
1309 dst_release(dst);
1310 }
1311
ip6_sk_redirect(struct sk_buff * skb,struct sock * sk)1312 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1313 {
1314 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1315 }
1316 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1317
ip6_default_advmss(const struct dst_entry * dst)1318 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1319 {
1320 struct net_device *dev = dst->dev;
1321 unsigned int mtu = dst_mtu(dst);
1322 struct net *net = dev_net(dev);
1323
1324 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1325
1326 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1327 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1328
1329 /*
1330 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1331 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1332 * IPV6_MAXPLEN is also valid and means: "any MSS,
1333 * rely only on pmtu discovery"
1334 */
1335 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1336 mtu = IPV6_MAXPLEN;
1337 return mtu;
1338 }
1339
ip6_mtu(const struct dst_entry * dst)1340 static unsigned int ip6_mtu(const struct dst_entry *dst)
1341 {
1342 struct inet6_dev *idev;
1343 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1344
1345 if (mtu)
1346 goto out;
1347
1348 mtu = IPV6_MIN_MTU;
1349
1350 rcu_read_lock();
1351 idev = __in6_dev_get(dst->dev);
1352 if (idev)
1353 mtu = idev->cnf.mtu6;
1354 rcu_read_unlock();
1355
1356 out:
1357 return min_t(unsigned int, mtu, IP6_MAX_MTU);
1358 }
1359
1360 static struct dst_entry *icmp6_dst_gc_list;
1361 static DEFINE_SPINLOCK(icmp6_dst_lock);
1362
icmp6_dst_alloc(struct net_device * dev,struct flowi6 * fl6)1363 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1364 struct flowi6 *fl6)
1365 {
1366 struct dst_entry *dst;
1367 struct rt6_info *rt;
1368 struct inet6_dev *idev = in6_dev_get(dev);
1369 struct net *net = dev_net(dev);
1370
1371 if (unlikely(!idev))
1372 return ERR_PTR(-ENODEV);
1373
1374 rt = ip6_dst_alloc(net, dev, 0, NULL);
1375 if (unlikely(!rt)) {
1376 in6_dev_put(idev);
1377 dst = ERR_PTR(-ENOMEM);
1378 goto out;
1379 }
1380
1381 rt->dst.flags |= DST_HOST;
1382 rt->dst.output = ip6_output;
1383 atomic_set(&rt->dst.__refcnt, 1);
1384 rt->rt6i_gateway = fl6->daddr;
1385 rt->rt6i_dst.addr = fl6->daddr;
1386 rt->rt6i_dst.plen = 128;
1387 rt->rt6i_idev = idev;
1388 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1389
1390 spin_lock_bh(&icmp6_dst_lock);
1391 rt->dst.next = icmp6_dst_gc_list;
1392 icmp6_dst_gc_list = &rt->dst;
1393 spin_unlock_bh(&icmp6_dst_lock);
1394
1395 fib6_force_start_gc(net);
1396
1397 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1398
1399 out:
1400 return dst;
1401 }
1402
icmp6_dst_gc(void)1403 int icmp6_dst_gc(void)
1404 {
1405 struct dst_entry *dst, **pprev;
1406 int more = 0;
1407
1408 spin_lock_bh(&icmp6_dst_lock);
1409 pprev = &icmp6_dst_gc_list;
1410
1411 while ((dst = *pprev) != NULL) {
1412 if (!atomic_read(&dst->__refcnt)) {
1413 *pprev = dst->next;
1414 dst_free(dst);
1415 } else {
1416 pprev = &dst->next;
1417 ++more;
1418 }
1419 }
1420
1421 spin_unlock_bh(&icmp6_dst_lock);
1422
1423 return more;
1424 }
1425
icmp6_clean_all(int (* func)(struct rt6_info * rt,void * arg),void * arg)1426 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1427 void *arg)
1428 {
1429 struct dst_entry *dst, **pprev;
1430
1431 spin_lock_bh(&icmp6_dst_lock);
1432 pprev = &icmp6_dst_gc_list;
1433 while ((dst = *pprev) != NULL) {
1434 struct rt6_info *rt = (struct rt6_info *) dst;
1435 if (func(rt, arg)) {
1436 *pprev = dst->next;
1437 dst_free(dst);
1438 } else {
1439 pprev = &dst->next;
1440 }
1441 }
1442 spin_unlock_bh(&icmp6_dst_lock);
1443 }
1444
ip6_dst_gc(struct dst_ops * ops)1445 static int ip6_dst_gc(struct dst_ops *ops)
1446 {
1447 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1448 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1449 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1450 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1451 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1452 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1453 int entries;
1454
1455 entries = dst_entries_get_fast(ops);
1456 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1457 entries <= rt_max_size)
1458 goto out;
1459
1460 net->ipv6.ip6_rt_gc_expire++;
1461 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1462 entries = dst_entries_get_slow(ops);
1463 if (entries < ops->gc_thresh)
1464 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1465 out:
1466 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1467 return entries > rt_max_size;
1468 }
1469
ip6_convert_metrics(struct mx6_config * mxc,const struct fib6_config * cfg)1470 static int ip6_convert_metrics(struct mx6_config *mxc,
1471 const struct fib6_config *cfg)
1472 {
1473 struct nlattr *nla;
1474 int remaining;
1475 u32 *mp;
1476
1477 if (!cfg->fc_mx)
1478 return 0;
1479
1480 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1481 if (unlikely(!mp))
1482 return -ENOMEM;
1483
1484 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1485 int type = nla_type(nla);
1486
1487 if (type) {
1488 u32 val;
1489
1490 if (unlikely(type > RTAX_MAX))
1491 goto err;
1492 if (type == RTAX_CC_ALGO) {
1493 char tmp[TCP_CA_NAME_MAX];
1494
1495 nla_strlcpy(tmp, nla, sizeof(tmp));
1496 val = tcp_ca_get_key_by_name(tmp);
1497 if (val == TCP_CA_UNSPEC)
1498 goto err;
1499 } else {
1500 val = nla_get_u32(nla);
1501 }
1502
1503 mp[type - 1] = val;
1504 __set_bit(type - 1, mxc->mx_valid);
1505 }
1506 }
1507
1508 mxc->mx = mp;
1509
1510 return 0;
1511 err:
1512 kfree(mp);
1513 return -EINVAL;
1514 }
1515
ip6_route_info_create(struct fib6_config * cfg,struct rt6_info ** rt_ret)1516 int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
1517 {
1518 int err;
1519 struct net *net = cfg->fc_nlinfo.nl_net;
1520 struct rt6_info *rt = NULL;
1521 struct net_device *dev = NULL;
1522 struct inet6_dev *idev = NULL;
1523 struct fib6_table *table;
1524 int addr_type;
1525
1526 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1527 return -EINVAL;
1528 #ifndef CONFIG_IPV6_SUBTREES
1529 if (cfg->fc_src_len)
1530 return -EINVAL;
1531 #endif
1532 if (cfg->fc_ifindex) {
1533 err = -ENODEV;
1534 dev = dev_get_by_index(net, cfg->fc_ifindex);
1535 if (!dev)
1536 goto out;
1537 idev = in6_dev_get(dev);
1538 if (!idev)
1539 goto out;
1540 }
1541
1542 if (cfg->fc_metric == 0)
1543 cfg->fc_metric = IP6_RT_PRIO_USER;
1544
1545 err = -ENOBUFS;
1546 if (cfg->fc_nlinfo.nlh &&
1547 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1548 table = fib6_get_table(net, cfg->fc_table);
1549 if (!table) {
1550 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1551 table = fib6_new_table(net, cfg->fc_table);
1552 }
1553 } else {
1554 table = fib6_new_table(net, cfg->fc_table);
1555 }
1556
1557 if (!table)
1558 goto out;
1559
1560 rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1561
1562 if (!rt) {
1563 err = -ENOMEM;
1564 goto out;
1565 }
1566
1567 if (cfg->fc_flags & RTF_EXPIRES)
1568 rt6_set_expires(rt, jiffies +
1569 clock_t_to_jiffies(cfg->fc_expires));
1570 else
1571 rt6_clean_expires(rt);
1572
1573 if (cfg->fc_protocol == RTPROT_UNSPEC)
1574 cfg->fc_protocol = RTPROT_BOOT;
1575 rt->rt6i_protocol = cfg->fc_protocol;
1576
1577 addr_type = ipv6_addr_type(&cfg->fc_dst);
1578
1579 if (addr_type & IPV6_ADDR_MULTICAST)
1580 rt->dst.input = ip6_mc_input;
1581 else if (cfg->fc_flags & RTF_LOCAL)
1582 rt->dst.input = ip6_input;
1583 else
1584 rt->dst.input = ip6_forward;
1585
1586 rt->dst.output = ip6_output;
1587
1588 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1589 rt->rt6i_dst.plen = cfg->fc_dst_len;
1590 if (rt->rt6i_dst.plen == 128) {
1591 rt->dst.flags |= DST_HOST;
1592 dst_metrics_set_force_overwrite(&rt->dst);
1593 }
1594
1595 #ifdef CONFIG_IPV6_SUBTREES
1596 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1597 rt->rt6i_src.plen = cfg->fc_src_len;
1598 #endif
1599
1600 rt->rt6i_metric = cfg->fc_metric;
1601
1602 /* We cannot add true routes via loopback here,
1603 they would result in kernel looping; promote them to reject routes
1604 */
1605 if ((cfg->fc_flags & RTF_REJECT) ||
1606 (dev && (dev->flags & IFF_LOOPBACK) &&
1607 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1608 !(cfg->fc_flags & RTF_LOCAL))) {
1609 /* hold loopback dev/idev if we haven't done so. */
1610 if (dev != net->loopback_dev) {
1611 if (dev) {
1612 dev_put(dev);
1613 in6_dev_put(idev);
1614 }
1615 dev = net->loopback_dev;
1616 dev_hold(dev);
1617 idev = in6_dev_get(dev);
1618 if (!idev) {
1619 err = -ENODEV;
1620 goto out;
1621 }
1622 }
1623 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1624 switch (cfg->fc_type) {
1625 case RTN_BLACKHOLE:
1626 rt->dst.error = -EINVAL;
1627 rt->dst.output = dst_discard_sk;
1628 rt->dst.input = dst_discard;
1629 break;
1630 case RTN_PROHIBIT:
1631 rt->dst.error = -EACCES;
1632 rt->dst.output = ip6_pkt_prohibit_out;
1633 rt->dst.input = ip6_pkt_prohibit;
1634 break;
1635 case RTN_THROW:
1636 default:
1637 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1638 : -ENETUNREACH;
1639 rt->dst.output = ip6_pkt_discard_out;
1640 rt->dst.input = ip6_pkt_discard;
1641 break;
1642 }
1643 goto install_route;
1644 }
1645
1646 if (cfg->fc_flags & RTF_GATEWAY) {
1647 const struct in6_addr *gw_addr;
1648 int gwa_type;
1649
1650 gw_addr = &cfg->fc_gateway;
1651 rt->rt6i_gateway = *gw_addr;
1652 gwa_type = ipv6_addr_type(gw_addr);
1653
1654 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1655 struct rt6_info *grt;
1656
1657 /* IPv6 strictly inhibits using not link-local
1658 addresses as nexthop address.
1659 Otherwise, router will not able to send redirects.
1660 It is very good, but in some (rare!) circumstances
1661 (SIT, PtP, NBMA NOARP links) it is handy to allow
1662 some exceptions. --ANK
1663 */
1664 err = -EINVAL;
1665 if (!(gwa_type & IPV6_ADDR_UNICAST))
1666 goto out;
1667
1668 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1669
1670 err = -EHOSTUNREACH;
1671 if (!grt)
1672 goto out;
1673 if (dev) {
1674 if (dev != grt->dst.dev) {
1675 ip6_rt_put(grt);
1676 goto out;
1677 }
1678 } else {
1679 dev = grt->dst.dev;
1680 idev = grt->rt6i_idev;
1681 dev_hold(dev);
1682 in6_dev_hold(grt->rt6i_idev);
1683 }
1684 if (!(grt->rt6i_flags & RTF_GATEWAY))
1685 err = 0;
1686 ip6_rt_put(grt);
1687
1688 if (err)
1689 goto out;
1690 }
1691 err = -EINVAL;
1692 if (!dev || (dev->flags & IFF_LOOPBACK))
1693 goto out;
1694 }
1695
1696 err = -ENODEV;
1697 if (!dev)
1698 goto out;
1699
1700 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1701 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1702 err = -EINVAL;
1703 goto out;
1704 }
1705 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1706 rt->rt6i_prefsrc.plen = 128;
1707 } else
1708 rt->rt6i_prefsrc.plen = 0;
1709
1710 rt->rt6i_flags = cfg->fc_flags;
1711
1712 install_route:
1713 rt->dst.dev = dev;
1714 rt->rt6i_idev = idev;
1715 rt->rt6i_table = table;
1716
1717 cfg->fc_nlinfo.nl_net = dev_net(dev);
1718
1719 *rt_ret = rt;
1720
1721 return 0;
1722 out:
1723 if (dev)
1724 dev_put(dev);
1725 if (idev)
1726 in6_dev_put(idev);
1727 if (rt)
1728 dst_free(&rt->dst);
1729
1730 *rt_ret = NULL;
1731
1732 return err;
1733 }
1734
ip6_route_add(struct fib6_config * cfg)1735 int ip6_route_add(struct fib6_config *cfg)
1736 {
1737 struct mx6_config mxc = { .mx = NULL, };
1738 struct rt6_info *rt = NULL;
1739 int err;
1740
1741 err = ip6_route_info_create(cfg, &rt);
1742 if (err)
1743 goto out;
1744
1745 err = ip6_convert_metrics(&mxc, cfg);
1746 if (err)
1747 goto out;
1748
1749 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1750
1751 kfree(mxc.mx);
1752
1753 return err;
1754 out:
1755 if (rt)
1756 dst_free(&rt->dst);
1757
1758 return err;
1759 }
1760
__ip6_del_rt(struct rt6_info * rt,struct nl_info * info)1761 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1762 {
1763 int err;
1764 struct fib6_table *table;
1765 struct net *net = dev_net(rt->dst.dev);
1766
1767 if (rt == net->ipv6.ip6_null_entry) {
1768 err = -ENOENT;
1769 goto out;
1770 }
1771
1772 table = rt->rt6i_table;
1773 write_lock_bh(&table->tb6_lock);
1774 err = fib6_del(rt, info);
1775 write_unlock_bh(&table->tb6_lock);
1776
1777 out:
1778 ip6_rt_put(rt);
1779 return err;
1780 }
1781
ip6_del_rt(struct rt6_info * rt)1782 int ip6_del_rt(struct rt6_info *rt)
1783 {
1784 struct nl_info info = {
1785 .nl_net = dev_net(rt->dst.dev),
1786 };
1787 return __ip6_del_rt(rt, &info);
1788 }
1789
ip6_route_del(struct fib6_config * cfg)1790 static int ip6_route_del(struct fib6_config *cfg)
1791 {
1792 struct fib6_table *table;
1793 struct fib6_node *fn;
1794 struct rt6_info *rt;
1795 int err = -ESRCH;
1796
1797 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1798 if (!table)
1799 return err;
1800
1801 read_lock_bh(&table->tb6_lock);
1802
1803 fn = fib6_locate(&table->tb6_root,
1804 &cfg->fc_dst, cfg->fc_dst_len,
1805 &cfg->fc_src, cfg->fc_src_len);
1806
1807 if (fn) {
1808 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1809 if (cfg->fc_ifindex &&
1810 (!rt->dst.dev ||
1811 rt->dst.dev->ifindex != cfg->fc_ifindex))
1812 continue;
1813 if (cfg->fc_flags & RTF_GATEWAY &&
1814 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1815 continue;
1816 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1817 continue;
1818 dst_hold(&rt->dst);
1819 read_unlock_bh(&table->tb6_lock);
1820
1821 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1822 }
1823 }
1824 read_unlock_bh(&table->tb6_lock);
1825
1826 return err;
1827 }
1828
rt6_do_redirect(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb)1829 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1830 {
1831 struct net *net = dev_net(skb->dev);
1832 struct netevent_redirect netevent;
1833 struct rt6_info *rt, *nrt = NULL;
1834 struct ndisc_options ndopts;
1835 struct inet6_dev *in6_dev;
1836 struct neighbour *neigh;
1837 struct rd_msg *msg;
1838 int optlen, on_link;
1839 u8 *lladdr;
1840
1841 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1842 optlen -= sizeof(*msg);
1843
1844 if (optlen < 0) {
1845 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1846 return;
1847 }
1848
1849 msg = (struct rd_msg *)icmp6_hdr(skb);
1850
1851 if (ipv6_addr_is_multicast(&msg->dest)) {
1852 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1853 return;
1854 }
1855
1856 on_link = 0;
1857 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1858 on_link = 1;
1859 } else if (ipv6_addr_type(&msg->target) !=
1860 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1861 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1862 return;
1863 }
1864
1865 in6_dev = __in6_dev_get(skb->dev);
1866 if (!in6_dev)
1867 return;
1868 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1869 return;
1870
1871 /* RFC2461 8.1:
1872 * The IP source address of the Redirect MUST be the same as the current
1873 * first-hop router for the specified ICMP Destination Address.
1874 */
1875
1876 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1877 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1878 return;
1879 }
1880
1881 lladdr = NULL;
1882 if (ndopts.nd_opts_tgt_lladdr) {
1883 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1884 skb->dev);
1885 if (!lladdr) {
1886 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1887 return;
1888 }
1889 }
1890
1891 rt = (struct rt6_info *) dst;
1892 if (rt == net->ipv6.ip6_null_entry) {
1893 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1894 return;
1895 }
1896
1897 /* Redirect received -> path was valid.
1898 * Look, redirects are sent only in response to data packets,
1899 * so that this nexthop apparently is reachable. --ANK
1900 */
1901 dst_confirm(&rt->dst);
1902
1903 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1904 if (!neigh)
1905 return;
1906
1907 /*
1908 * We have finally decided to accept it.
1909 */
1910
1911 neigh_update(neigh, lladdr, NUD_STALE,
1912 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1913 NEIGH_UPDATE_F_OVERRIDE|
1914 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1915 NEIGH_UPDATE_F_ISROUTER))
1916 );
1917
1918 nrt = ip6_rt_copy(rt, &msg->dest);
1919 if (!nrt)
1920 goto out;
1921
1922 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1923 if (on_link)
1924 nrt->rt6i_flags &= ~RTF_GATEWAY;
1925
1926 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1927
1928 if (ip6_ins_rt(nrt))
1929 goto out;
1930
1931 netevent.old = &rt->dst;
1932 netevent.new = &nrt->dst;
1933 netevent.daddr = &msg->dest;
1934 netevent.neigh = neigh;
1935 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1936
1937 if (rt->rt6i_flags & RTF_CACHE) {
1938 rt = (struct rt6_info *) dst_clone(&rt->dst);
1939 ip6_del_rt(rt);
1940 }
1941
1942 out:
1943 neigh_release(neigh);
1944 }
1945
1946 /*
1947 * Misc support functions
1948 */
1949
ip6_rt_copy(struct rt6_info * ort,const struct in6_addr * dest)1950 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1951 const struct in6_addr *dest)
1952 {
1953 struct net *net = dev_net(ort->dst.dev);
1954 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1955 ort->rt6i_table);
1956
1957 if (rt) {
1958 rt->dst.input = ort->dst.input;
1959 rt->dst.output = ort->dst.output;
1960 rt->dst.flags |= DST_HOST;
1961
1962 rt->rt6i_dst.addr = *dest;
1963 rt->rt6i_dst.plen = 128;
1964 dst_copy_metrics(&rt->dst, &ort->dst);
1965 rt->dst.error = ort->dst.error;
1966 rt->rt6i_idev = ort->rt6i_idev;
1967 if (rt->rt6i_idev)
1968 in6_dev_hold(rt->rt6i_idev);
1969 rt->dst.lastuse = jiffies;
1970
1971 if (ort->rt6i_flags & RTF_GATEWAY)
1972 rt->rt6i_gateway = ort->rt6i_gateway;
1973 else
1974 rt->rt6i_gateway = *dest;
1975 rt->rt6i_flags = ort->rt6i_flags;
1976 rt6_set_from(rt, ort);
1977 rt->rt6i_metric = 0;
1978
1979 #ifdef CONFIG_IPV6_SUBTREES
1980 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1981 #endif
1982 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1983 rt->rt6i_table = ort->rt6i_table;
1984 }
1985 return rt;
1986 }
1987
1988 #ifdef CONFIG_IPV6_ROUTE_INFO
rt6_get_route_info(struct net * net,const struct in6_addr * prefix,int prefixlen,const struct in6_addr * gwaddr,int ifindex)1989 static struct rt6_info *rt6_get_route_info(struct net *net,
1990 const struct in6_addr *prefix, int prefixlen,
1991 const struct in6_addr *gwaddr, int ifindex)
1992 {
1993 struct fib6_node *fn;
1994 struct rt6_info *rt = NULL;
1995 struct fib6_table *table;
1996
1997 table = fib6_get_table(net, RT6_TABLE_INFO);
1998 if (!table)
1999 return NULL;
2000
2001 read_lock_bh(&table->tb6_lock);
2002 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2003 if (!fn)
2004 goto out;
2005
2006 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2007 if (rt->dst.dev->ifindex != ifindex)
2008 continue;
2009 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2010 continue;
2011 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2012 continue;
2013 dst_hold(&rt->dst);
2014 break;
2015 }
2016 out:
2017 read_unlock_bh(&table->tb6_lock);
2018 return rt;
2019 }
2020
rt6_add_route_info(struct net * net,const struct in6_addr * prefix,int prefixlen,const struct in6_addr * gwaddr,int ifindex,unsigned int pref)2021 static struct rt6_info *rt6_add_route_info(struct net *net,
2022 const struct in6_addr *prefix, int prefixlen,
2023 const struct in6_addr *gwaddr, int ifindex,
2024 unsigned int pref)
2025 {
2026 struct fib6_config cfg = {
2027 .fc_table = RT6_TABLE_INFO,
2028 .fc_metric = IP6_RT_PRIO_USER,
2029 .fc_ifindex = ifindex,
2030 .fc_dst_len = prefixlen,
2031 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2032 RTF_UP | RTF_PREF(pref),
2033 .fc_nlinfo.portid = 0,
2034 .fc_nlinfo.nlh = NULL,
2035 .fc_nlinfo.nl_net = net,
2036 };
2037
2038 cfg.fc_dst = *prefix;
2039 cfg.fc_gateway = *gwaddr;
2040
2041 /* We should treat it as a default route if prefix length is 0. */
2042 if (!prefixlen)
2043 cfg.fc_flags |= RTF_DEFAULT;
2044
2045 ip6_route_add(&cfg);
2046
2047 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2048 }
2049 #endif
2050
rt6_get_dflt_router(const struct in6_addr * addr,struct net_device * dev)2051 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2052 {
2053 struct rt6_info *rt;
2054 struct fib6_table *table;
2055
2056 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2057 if (!table)
2058 return NULL;
2059
2060 read_lock_bh(&table->tb6_lock);
2061 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2062 if (dev == rt->dst.dev &&
2063 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2064 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2065 break;
2066 }
2067 if (rt)
2068 dst_hold(&rt->dst);
2069 read_unlock_bh(&table->tb6_lock);
2070 return rt;
2071 }
2072
rt6_add_dflt_router(const struct in6_addr * gwaddr,struct net_device * dev,unsigned int pref)2073 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2074 struct net_device *dev,
2075 unsigned int pref)
2076 {
2077 struct fib6_config cfg = {
2078 .fc_table = RT6_TABLE_DFLT,
2079 .fc_metric = IP6_RT_PRIO_USER,
2080 .fc_ifindex = dev->ifindex,
2081 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2082 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2083 .fc_nlinfo.portid = 0,
2084 .fc_nlinfo.nlh = NULL,
2085 .fc_nlinfo.nl_net = dev_net(dev),
2086 };
2087
2088 cfg.fc_gateway = *gwaddr;
2089
2090 ip6_route_add(&cfg);
2091
2092 return rt6_get_dflt_router(gwaddr, dev);
2093 }
2094
rt6_purge_dflt_routers(struct net * net)2095 void rt6_purge_dflt_routers(struct net *net)
2096 {
2097 struct rt6_info *rt;
2098 struct fib6_table *table;
2099
2100 /* NOTE: Keep consistent with rt6_get_dflt_router */
2101 table = fib6_get_table(net, RT6_TABLE_DFLT);
2102 if (!table)
2103 return;
2104
2105 restart:
2106 read_lock_bh(&table->tb6_lock);
2107 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2108 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2109 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2110 dst_hold(&rt->dst);
2111 read_unlock_bh(&table->tb6_lock);
2112 ip6_del_rt(rt);
2113 goto restart;
2114 }
2115 }
2116 read_unlock_bh(&table->tb6_lock);
2117 }
2118
rtmsg_to_fib6_config(struct net * net,struct in6_rtmsg * rtmsg,struct fib6_config * cfg)2119 static void rtmsg_to_fib6_config(struct net *net,
2120 struct in6_rtmsg *rtmsg,
2121 struct fib6_config *cfg)
2122 {
2123 memset(cfg, 0, sizeof(*cfg));
2124
2125 cfg->fc_table = RT6_TABLE_MAIN;
2126 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2127 cfg->fc_metric = rtmsg->rtmsg_metric;
2128 cfg->fc_expires = rtmsg->rtmsg_info;
2129 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2130 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2131 cfg->fc_flags = rtmsg->rtmsg_flags;
2132
2133 cfg->fc_nlinfo.nl_net = net;
2134
2135 cfg->fc_dst = rtmsg->rtmsg_dst;
2136 cfg->fc_src = rtmsg->rtmsg_src;
2137 cfg->fc_gateway = rtmsg->rtmsg_gateway;
2138 }
2139
ipv6_route_ioctl(struct net * net,unsigned int cmd,void __user * arg)2140 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2141 {
2142 struct fib6_config cfg;
2143 struct in6_rtmsg rtmsg;
2144 int err;
2145
2146 switch (cmd) {
2147 case SIOCADDRT: /* Add a route */
2148 case SIOCDELRT: /* Delete a route */
2149 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2150 return -EPERM;
2151 err = copy_from_user(&rtmsg, arg,
2152 sizeof(struct in6_rtmsg));
2153 if (err)
2154 return -EFAULT;
2155
2156 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2157
2158 rtnl_lock();
2159 switch (cmd) {
2160 case SIOCADDRT:
2161 err = ip6_route_add(&cfg);
2162 break;
2163 case SIOCDELRT:
2164 err = ip6_route_del(&cfg);
2165 break;
2166 default:
2167 err = -EINVAL;
2168 }
2169 rtnl_unlock();
2170
2171 return err;
2172 }
2173
2174 return -EINVAL;
2175 }
2176
2177 /*
2178 * Drop the packet on the floor
2179 */
2180
ip6_pkt_drop(struct sk_buff * skb,u8 code,int ipstats_mib_noroutes)2181 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2182 {
2183 int type;
2184 struct dst_entry *dst = skb_dst(skb);
2185 switch (ipstats_mib_noroutes) {
2186 case IPSTATS_MIB_INNOROUTES:
2187 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2188 if (type == IPV6_ADDR_ANY) {
2189 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2190 IPSTATS_MIB_INADDRERRORS);
2191 break;
2192 }
2193 /* FALLTHROUGH */
2194 case IPSTATS_MIB_OUTNOROUTES:
2195 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2196 ipstats_mib_noroutes);
2197 break;
2198 }
2199 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2200 kfree_skb(skb);
2201 return 0;
2202 }
2203
ip6_pkt_discard(struct sk_buff * skb)2204 static int ip6_pkt_discard(struct sk_buff *skb)
2205 {
2206 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2207 }
2208
ip6_pkt_discard_out(struct sock * sk,struct sk_buff * skb)2209 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2210 {
2211 skb->dev = skb_dst(skb)->dev;
2212 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2213 }
2214
ip6_pkt_prohibit(struct sk_buff * skb)2215 static int ip6_pkt_prohibit(struct sk_buff *skb)
2216 {
2217 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2218 }
2219
ip6_pkt_prohibit_out(struct sock * sk,struct sk_buff * skb)2220 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2221 {
2222 skb->dev = skb_dst(skb)->dev;
2223 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2224 }
2225
2226 /*
2227 * Allocate a dst for local (unicast / anycast) address.
2228 */
2229
addrconf_dst_alloc(struct inet6_dev * idev,const struct in6_addr * addr,bool anycast)2230 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2231 const struct in6_addr *addr,
2232 bool anycast)
2233 {
2234 struct net *net = dev_net(idev->dev);
2235 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2236 DST_NOCOUNT, NULL);
2237 if (!rt)
2238 return ERR_PTR(-ENOMEM);
2239
2240 in6_dev_hold(idev);
2241
2242 rt->dst.flags |= DST_HOST;
2243 rt->dst.input = ip6_input;
2244 rt->dst.output = ip6_output;
2245 rt->rt6i_idev = idev;
2246
2247 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2248 if (anycast)
2249 rt->rt6i_flags |= RTF_ANYCAST;
2250 else
2251 rt->rt6i_flags |= RTF_LOCAL;
2252
2253 rt->rt6i_gateway = *addr;
2254 rt->rt6i_dst.addr = *addr;
2255 rt->rt6i_dst.plen = 128;
2256 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2257
2258 atomic_set(&rt->dst.__refcnt, 1);
2259
2260 return rt;
2261 }
2262
ip6_route_get_saddr(struct net * net,struct rt6_info * rt,const struct in6_addr * daddr,unsigned int prefs,struct in6_addr * saddr)2263 int ip6_route_get_saddr(struct net *net,
2264 struct rt6_info *rt,
2265 const struct in6_addr *daddr,
2266 unsigned int prefs,
2267 struct in6_addr *saddr)
2268 {
2269 struct inet6_dev *idev =
2270 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2271 int err = 0;
2272 if (rt && rt->rt6i_prefsrc.plen)
2273 *saddr = rt->rt6i_prefsrc.addr;
2274 else
2275 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2276 daddr, prefs, saddr);
2277 return err;
2278 }
2279
2280 /* remove deleted ip from prefsrc entries */
2281 struct arg_dev_net_ip {
2282 struct net_device *dev;
2283 struct net *net;
2284 struct in6_addr *addr;
2285 };
2286
fib6_remove_prefsrc(struct rt6_info * rt,void * arg)2287 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2288 {
2289 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2290 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2291 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2292
2293 if (((void *)rt->dst.dev == dev || !dev) &&
2294 rt != net->ipv6.ip6_null_entry &&
2295 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2296 /* remove prefsrc entry */
2297 rt->rt6i_prefsrc.plen = 0;
2298 }
2299 return 0;
2300 }
2301
rt6_remove_prefsrc(struct inet6_ifaddr * ifp)2302 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2303 {
2304 struct net *net = dev_net(ifp->idev->dev);
2305 struct arg_dev_net_ip adni = {
2306 .dev = ifp->idev->dev,
2307 .net = net,
2308 .addr = &ifp->addr,
2309 };
2310 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2311 }
2312
2313 #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2314 #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2315
2316 /* Remove routers and update dst entries when gateway turn into host. */
fib6_clean_tohost(struct rt6_info * rt,void * arg)2317 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2318 {
2319 struct in6_addr *gateway = (struct in6_addr *)arg;
2320
2321 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2322 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2323 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2324 return -1;
2325 }
2326 return 0;
2327 }
2328
rt6_clean_tohost(struct net * net,struct in6_addr * gateway)2329 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2330 {
2331 fib6_clean_all(net, fib6_clean_tohost, gateway);
2332 }
2333
2334 struct arg_dev_net {
2335 struct net_device *dev;
2336 struct net *net;
2337 };
2338
fib6_ifdown(struct rt6_info * rt,void * arg)2339 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2340 {
2341 const struct arg_dev_net *adn = arg;
2342 const struct net_device *dev = adn->dev;
2343
2344 if ((rt->dst.dev == dev || !dev) &&
2345 rt != adn->net->ipv6.ip6_null_entry)
2346 return -1;
2347
2348 return 0;
2349 }
2350
rt6_ifdown(struct net * net,struct net_device * dev)2351 void rt6_ifdown(struct net *net, struct net_device *dev)
2352 {
2353 struct arg_dev_net adn = {
2354 .dev = dev,
2355 .net = net,
2356 };
2357
2358 fib6_clean_all(net, fib6_ifdown, &adn);
2359 icmp6_clean_all(fib6_ifdown, &adn);
2360 }
2361
2362 struct rt6_mtu_change_arg {
2363 struct net_device *dev;
2364 unsigned int mtu;
2365 };
2366
rt6_mtu_change_route(struct rt6_info * rt,void * p_arg)2367 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2368 {
2369 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2370 struct inet6_dev *idev;
2371
2372 /* In IPv6 pmtu discovery is not optional,
2373 so that RTAX_MTU lock cannot disable it.
2374 We still use this lock to block changes
2375 caused by addrconf/ndisc.
2376 */
2377
2378 idev = __in6_dev_get(arg->dev);
2379 if (!idev)
2380 return 0;
2381
2382 /* For administrative MTU increase, there is no way to discover
2383 IPv6 PMTU increase, so PMTU increase should be updated here.
2384 Since RFC 1981 doesn't include administrative MTU increase
2385 update PMTU increase is a MUST. (i.e. jumbo frame)
2386 */
2387 /*
2388 If new MTU is less than route PMTU, this new MTU will be the
2389 lowest MTU in the path, update the route PMTU to reflect PMTU
2390 decreases; if new MTU is greater than route PMTU, and the
2391 old MTU is the lowest MTU in the path, update the route PMTU
2392 to reflect the increase. In this case if the other nodes' MTU
2393 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2394 PMTU discouvery.
2395 */
2396 if (rt->dst.dev == arg->dev &&
2397 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2398 (dst_mtu(&rt->dst) >= arg->mtu ||
2399 (dst_mtu(&rt->dst) < arg->mtu &&
2400 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2401 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2402 }
2403 return 0;
2404 }
2405
rt6_mtu_change(struct net_device * dev,unsigned int mtu)2406 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2407 {
2408 struct rt6_mtu_change_arg arg = {
2409 .dev = dev,
2410 .mtu = mtu,
2411 };
2412
2413 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2414 }
2415
2416 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2417 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2418 [RTA_OIF] = { .type = NLA_U32 },
2419 [RTA_IIF] = { .type = NLA_U32 },
2420 [RTA_PRIORITY] = { .type = NLA_U32 },
2421 [RTA_METRICS] = { .type = NLA_NESTED },
2422 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
2423 [RTA_PREF] = { .type = NLA_U8 },
2424 };
2425
rtm_to_fib6_config(struct sk_buff * skb,struct nlmsghdr * nlh,struct fib6_config * cfg)2426 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2427 struct fib6_config *cfg)
2428 {
2429 struct rtmsg *rtm;
2430 struct nlattr *tb[RTA_MAX+1];
2431 unsigned int pref;
2432 int err;
2433
2434 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2435 if (err < 0)
2436 goto errout;
2437
2438 err = -EINVAL;
2439 rtm = nlmsg_data(nlh);
2440 memset(cfg, 0, sizeof(*cfg));
2441
2442 cfg->fc_table = rtm->rtm_table;
2443 cfg->fc_dst_len = rtm->rtm_dst_len;
2444 cfg->fc_src_len = rtm->rtm_src_len;
2445 cfg->fc_flags = RTF_UP;
2446 cfg->fc_protocol = rtm->rtm_protocol;
2447 cfg->fc_type = rtm->rtm_type;
2448
2449 if (rtm->rtm_type == RTN_UNREACHABLE ||
2450 rtm->rtm_type == RTN_BLACKHOLE ||
2451 rtm->rtm_type == RTN_PROHIBIT ||
2452 rtm->rtm_type == RTN_THROW)
2453 cfg->fc_flags |= RTF_REJECT;
2454
2455 if (rtm->rtm_type == RTN_LOCAL)
2456 cfg->fc_flags |= RTF_LOCAL;
2457
2458 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2459 cfg->fc_nlinfo.nlh = nlh;
2460 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2461
2462 if (tb[RTA_GATEWAY]) {
2463 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2464 cfg->fc_flags |= RTF_GATEWAY;
2465 }
2466
2467 if (tb[RTA_DST]) {
2468 int plen = (rtm->rtm_dst_len + 7) >> 3;
2469
2470 if (nla_len(tb[RTA_DST]) < plen)
2471 goto errout;
2472
2473 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2474 }
2475
2476 if (tb[RTA_SRC]) {
2477 int plen = (rtm->rtm_src_len + 7) >> 3;
2478
2479 if (nla_len(tb[RTA_SRC]) < plen)
2480 goto errout;
2481
2482 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2483 }
2484
2485 if (tb[RTA_PREFSRC])
2486 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2487
2488 if (tb[RTA_OIF])
2489 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2490
2491 if (tb[RTA_PRIORITY])
2492 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2493
2494 if (tb[RTA_METRICS]) {
2495 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2496 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2497 }
2498
2499 if (tb[RTA_TABLE])
2500 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2501
2502 if (tb[RTA_MULTIPATH]) {
2503 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2504 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2505 }
2506
2507 if (tb[RTA_PREF]) {
2508 pref = nla_get_u8(tb[RTA_PREF]);
2509 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2510 pref != ICMPV6_ROUTER_PREF_HIGH)
2511 pref = ICMPV6_ROUTER_PREF_MEDIUM;
2512 cfg->fc_flags |= RTF_PREF(pref);
2513 }
2514
2515 err = 0;
2516 errout:
2517 return err;
2518 }
2519
2520 struct rt6_nh {
2521 struct rt6_info *rt6_info;
2522 struct fib6_config r_cfg;
2523 struct mx6_config mxc;
2524 struct list_head next;
2525 };
2526
ip6_print_replace_route_err(struct list_head * rt6_nh_list)2527 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2528 {
2529 struct rt6_nh *nh;
2530
2531 list_for_each_entry(nh, rt6_nh_list, next) {
2532 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2533 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2534 nh->r_cfg.fc_ifindex);
2535 }
2536 }
2537
ip6_route_info_append(struct list_head * rt6_nh_list,struct rt6_info * rt,struct fib6_config * r_cfg)2538 static int ip6_route_info_append(struct list_head *rt6_nh_list,
2539 struct rt6_info *rt, struct fib6_config *r_cfg)
2540 {
2541 struct rt6_nh *nh;
2542 struct rt6_info *rtnh;
2543 int err = -EEXIST;
2544
2545 list_for_each_entry(nh, rt6_nh_list, next) {
2546 /* check if rt6_info already exists */
2547 rtnh = nh->rt6_info;
2548
2549 if (rtnh->dst.dev == rt->dst.dev &&
2550 rtnh->rt6i_idev == rt->rt6i_idev &&
2551 ipv6_addr_equal(&rtnh->rt6i_gateway,
2552 &rt->rt6i_gateway))
2553 return err;
2554 }
2555
2556 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2557 if (!nh)
2558 return -ENOMEM;
2559 nh->rt6_info = rt;
2560 err = ip6_convert_metrics(&nh->mxc, r_cfg);
2561 if (err) {
2562 kfree(nh);
2563 return err;
2564 }
2565 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2566 list_add_tail(&nh->next, rt6_nh_list);
2567
2568 return 0;
2569 }
2570
ip6_route_multipath_add(struct fib6_config * cfg)2571 static int ip6_route_multipath_add(struct fib6_config *cfg)
2572 {
2573 struct fib6_config r_cfg;
2574 struct rtnexthop *rtnh;
2575 struct rt6_info *rt;
2576 struct rt6_nh *err_nh;
2577 struct rt6_nh *nh, *nh_safe;
2578 int remaining;
2579 int attrlen;
2580 int err = 1;
2581 int nhn = 0;
2582 int replace = (cfg->fc_nlinfo.nlh &&
2583 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2584 LIST_HEAD(rt6_nh_list);
2585
2586 remaining = cfg->fc_mp_len;
2587 rtnh = (struct rtnexthop *)cfg->fc_mp;
2588
2589 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
2590 * rt6_info structs per nexthop
2591 */
2592 while (rtnh_ok(rtnh, remaining)) {
2593 memcpy(&r_cfg, cfg, sizeof(*cfg));
2594 if (rtnh->rtnh_ifindex)
2595 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2596
2597 attrlen = rtnh_attrlen(rtnh);
2598 if (attrlen > 0) {
2599 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2600
2601 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2602 if (nla) {
2603 r_cfg.fc_gateway = nla_get_in6_addr(nla);
2604 r_cfg.fc_flags |= RTF_GATEWAY;
2605 }
2606 }
2607
2608 err = ip6_route_info_create(&r_cfg, &rt);
2609 if (err)
2610 goto cleanup;
2611
2612 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2613 if (err) {
2614 dst_free(&rt->dst);
2615 goto cleanup;
2616 }
2617
2618 rtnh = rtnh_next(rtnh, &remaining);
2619 }
2620
2621 err_nh = NULL;
2622 list_for_each_entry(nh, &rt6_nh_list, next) {
2623 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2624 /* nh->rt6_info is used or freed at this point, reset to NULL*/
2625 nh->rt6_info = NULL;
2626 if (err) {
2627 if (replace && nhn)
2628 ip6_print_replace_route_err(&rt6_nh_list);
2629 err_nh = nh;
2630 goto add_errout;
2631 }
2632
2633 /* Because each route is added like a single route we remove
2634 * these flags after the first nexthop: if there is a collision,
2635 * we have already failed to add the first nexthop:
2636 * fib6_add_rt2node() has rejected it; when replacing, old
2637 * nexthops have been replaced by first new, the rest should
2638 * be added to it.
2639 */
2640 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2641 NLM_F_REPLACE);
2642 nhn++;
2643 }
2644
2645 goto cleanup;
2646
2647 add_errout:
2648 /* Delete routes that were already added */
2649 list_for_each_entry(nh, &rt6_nh_list, next) {
2650 if (err_nh == nh)
2651 break;
2652 ip6_route_del(&nh->r_cfg);
2653 }
2654
2655 cleanup:
2656 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2657 if (nh->rt6_info)
2658 dst_free(&nh->rt6_info->dst);
2659 if (nh->mxc.mx)
2660 kfree(nh->mxc.mx);
2661 list_del(&nh->next);
2662 kfree(nh);
2663 }
2664
2665 return err;
2666 }
2667
ip6_route_multipath_del(struct fib6_config * cfg)2668 static int ip6_route_multipath_del(struct fib6_config *cfg)
2669 {
2670 struct fib6_config r_cfg;
2671 struct rtnexthop *rtnh;
2672 int remaining;
2673 int attrlen;
2674 int err = 1, last_err = 0;
2675
2676 remaining = cfg->fc_mp_len;
2677 rtnh = (struct rtnexthop *)cfg->fc_mp;
2678
2679 /* Parse a Multipath Entry */
2680 while (rtnh_ok(rtnh, remaining)) {
2681 memcpy(&r_cfg, cfg, sizeof(*cfg));
2682 if (rtnh->rtnh_ifindex)
2683 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2684
2685 attrlen = rtnh_attrlen(rtnh);
2686 if (attrlen > 0) {
2687 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2688
2689 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2690 if (nla) {
2691 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2692 r_cfg.fc_flags |= RTF_GATEWAY;
2693 }
2694 }
2695 err = ip6_route_del(&r_cfg);
2696 if (err)
2697 last_err = err;
2698
2699 rtnh = rtnh_next(rtnh, &remaining);
2700 }
2701
2702 return last_err;
2703 }
2704
inet6_rtm_delroute(struct sk_buff * skb,struct nlmsghdr * nlh)2705 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2706 {
2707 struct fib6_config cfg;
2708 int err;
2709
2710 err = rtm_to_fib6_config(skb, nlh, &cfg);
2711 if (err < 0)
2712 return err;
2713
2714 if (cfg.fc_mp)
2715 return ip6_route_multipath_del(&cfg);
2716 else
2717 return ip6_route_del(&cfg);
2718 }
2719
inet6_rtm_newroute(struct sk_buff * skb,struct nlmsghdr * nlh)2720 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2721 {
2722 struct fib6_config cfg;
2723 int err;
2724
2725 err = rtm_to_fib6_config(skb, nlh, &cfg);
2726 if (err < 0)
2727 return err;
2728
2729 if (cfg.fc_mp)
2730 return ip6_route_multipath_add(&cfg);
2731 else
2732 return ip6_route_add(&cfg);
2733 }
2734
rt6_nlmsg_size(void)2735 static inline size_t rt6_nlmsg_size(void)
2736 {
2737 return NLMSG_ALIGN(sizeof(struct rtmsg))
2738 + nla_total_size(16) /* RTA_SRC */
2739 + nla_total_size(16) /* RTA_DST */
2740 + nla_total_size(16) /* RTA_GATEWAY */
2741 + nla_total_size(16) /* RTA_PREFSRC */
2742 + nla_total_size(4) /* RTA_TABLE */
2743 + nla_total_size(4) /* RTA_IIF */
2744 + nla_total_size(4) /* RTA_OIF */
2745 + nla_total_size(4) /* RTA_PRIORITY */
2746 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2747 + nla_total_size(sizeof(struct rta_cacheinfo))
2748 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
2749 + nla_total_size(1); /* RTA_PREF */
2750 }
2751
rt6_fill_node(struct net * net,struct sk_buff * skb,struct rt6_info * rt,struct in6_addr * dst,struct in6_addr * src,int iif,int type,u32 portid,u32 seq,int prefix,int nowait,unsigned int flags)2752 static int rt6_fill_node(struct net *net,
2753 struct sk_buff *skb, struct rt6_info *rt,
2754 struct in6_addr *dst, struct in6_addr *src,
2755 int iif, int type, u32 portid, u32 seq,
2756 int prefix, int nowait, unsigned int flags)
2757 {
2758 struct rtmsg *rtm;
2759 struct nlmsghdr *nlh;
2760 long expires;
2761 u32 table;
2762
2763 if (prefix) { /* user wants prefix routes only */
2764 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2765 /* success since this is not a prefix route */
2766 return 1;
2767 }
2768 }
2769
2770 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2771 if (!nlh)
2772 return -EMSGSIZE;
2773
2774 rtm = nlmsg_data(nlh);
2775 rtm->rtm_family = AF_INET6;
2776 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2777 rtm->rtm_src_len = rt->rt6i_src.plen;
2778 rtm->rtm_tos = 0;
2779 if (rt->rt6i_table)
2780 table = rt->rt6i_table->tb6_id;
2781 else
2782 table = RT6_TABLE_UNSPEC;
2783 rtm->rtm_table = table;
2784 if (nla_put_u32(skb, RTA_TABLE, table))
2785 goto nla_put_failure;
2786 if (rt->rt6i_flags & RTF_REJECT) {
2787 switch (rt->dst.error) {
2788 case -EINVAL:
2789 rtm->rtm_type = RTN_BLACKHOLE;
2790 break;
2791 case -EACCES:
2792 rtm->rtm_type = RTN_PROHIBIT;
2793 break;
2794 case -EAGAIN:
2795 rtm->rtm_type = RTN_THROW;
2796 break;
2797 default:
2798 rtm->rtm_type = RTN_UNREACHABLE;
2799 break;
2800 }
2801 }
2802 else if (rt->rt6i_flags & RTF_LOCAL)
2803 rtm->rtm_type = RTN_LOCAL;
2804 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2805 rtm->rtm_type = RTN_LOCAL;
2806 else
2807 rtm->rtm_type = RTN_UNICAST;
2808 rtm->rtm_flags = 0;
2809 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2810 rtm->rtm_protocol = rt->rt6i_protocol;
2811 if (rt->rt6i_flags & RTF_DYNAMIC)
2812 rtm->rtm_protocol = RTPROT_REDIRECT;
2813 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2814 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2815 rtm->rtm_protocol = RTPROT_RA;
2816 else
2817 rtm->rtm_protocol = RTPROT_KERNEL;
2818 }
2819
2820 if (rt->rt6i_flags & RTF_CACHE)
2821 rtm->rtm_flags |= RTM_F_CLONED;
2822
2823 if (dst) {
2824 if (nla_put_in6_addr(skb, RTA_DST, dst))
2825 goto nla_put_failure;
2826 rtm->rtm_dst_len = 128;
2827 } else if (rtm->rtm_dst_len)
2828 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
2829 goto nla_put_failure;
2830 #ifdef CONFIG_IPV6_SUBTREES
2831 if (src) {
2832 if (nla_put_in6_addr(skb, RTA_SRC, src))
2833 goto nla_put_failure;
2834 rtm->rtm_src_len = 128;
2835 } else if (rtm->rtm_src_len &&
2836 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
2837 goto nla_put_failure;
2838 #endif
2839 if (iif) {
2840 #ifdef CONFIG_IPV6_MROUTE
2841 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2842 int err = ip6mr_get_route(net, skb, rtm, nowait);
2843 if (err <= 0) {
2844 if (!nowait) {
2845 if (err == 0)
2846 return 0;
2847 goto nla_put_failure;
2848 } else {
2849 if (err == -EMSGSIZE)
2850 goto nla_put_failure;
2851 }
2852 }
2853 } else
2854 #endif
2855 if (nla_put_u32(skb, RTA_IIF, iif))
2856 goto nla_put_failure;
2857 } else if (dst) {
2858 struct in6_addr saddr_buf;
2859 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2860 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2861 goto nla_put_failure;
2862 }
2863
2864 if (rt->rt6i_prefsrc.plen) {
2865 struct in6_addr saddr_buf;
2866 saddr_buf = rt->rt6i_prefsrc.addr;
2867 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2868 goto nla_put_failure;
2869 }
2870
2871 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2872 goto nla_put_failure;
2873
2874 if (rt->rt6i_flags & RTF_GATEWAY) {
2875 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
2876 goto nla_put_failure;
2877 }
2878
2879 if (rt->dst.dev &&
2880 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2881 goto nla_put_failure;
2882 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2883 goto nla_put_failure;
2884
2885 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2886
2887 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2888 goto nla_put_failure;
2889
2890 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
2891 goto nla_put_failure;
2892
2893 nlmsg_end(skb, nlh);
2894 return 0;
2895
2896 nla_put_failure:
2897 nlmsg_cancel(skb, nlh);
2898 return -EMSGSIZE;
2899 }
2900
rt6_dump_route(struct rt6_info * rt,void * p_arg)2901 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2902 {
2903 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2904 int prefix;
2905
2906 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2907 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2908 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2909 } else
2910 prefix = 0;
2911
2912 return rt6_fill_node(arg->net,
2913 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2914 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2915 prefix, 0, NLM_F_MULTI);
2916 }
2917
inet6_rtm_getroute(struct sk_buff * in_skb,struct nlmsghdr * nlh)2918 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
2919 {
2920 struct net *net = sock_net(in_skb->sk);
2921 struct nlattr *tb[RTA_MAX+1];
2922 struct rt6_info *rt;
2923 struct sk_buff *skb;
2924 struct rtmsg *rtm;
2925 struct flowi6 fl6;
2926 int err, iif = 0, oif = 0;
2927
2928 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2929 if (err < 0)
2930 goto errout;
2931
2932 err = -EINVAL;
2933 memset(&fl6, 0, sizeof(fl6));
2934
2935 if (tb[RTA_SRC]) {
2936 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2937 goto errout;
2938
2939 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2940 }
2941
2942 if (tb[RTA_DST]) {
2943 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2944 goto errout;
2945
2946 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2947 }
2948
2949 if (tb[RTA_IIF])
2950 iif = nla_get_u32(tb[RTA_IIF]);
2951
2952 if (tb[RTA_OIF])
2953 oif = nla_get_u32(tb[RTA_OIF]);
2954
2955 if (tb[RTA_MARK])
2956 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
2957
2958 if (iif) {
2959 struct net_device *dev;
2960 int flags = 0;
2961
2962 dev = __dev_get_by_index(net, iif);
2963 if (!dev) {
2964 err = -ENODEV;
2965 goto errout;
2966 }
2967
2968 fl6.flowi6_iif = iif;
2969
2970 if (!ipv6_addr_any(&fl6.saddr))
2971 flags |= RT6_LOOKUP_F_HAS_SADDR;
2972
2973 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2974 flags);
2975 } else {
2976 fl6.flowi6_oif = oif;
2977
2978 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2979 }
2980
2981 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2982 if (!skb) {
2983 ip6_rt_put(rt);
2984 err = -ENOBUFS;
2985 goto errout;
2986 }
2987
2988 /* Reserve room for dummy headers, this skb can pass
2989 through good chunk of routing engine.
2990 */
2991 skb_reset_mac_header(skb);
2992 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2993
2994 skb_dst_set(skb, &rt->dst);
2995
2996 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2997 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2998 nlh->nlmsg_seq, 0, 0, 0);
2999 if (err < 0) {
3000 kfree_skb(skb);
3001 goto errout;
3002 }
3003
3004 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3005 errout:
3006 return err;
3007 }
3008
inet6_rt_notify(int event,struct rt6_info * rt,struct nl_info * info)3009 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
3010 {
3011 struct sk_buff *skb;
3012 struct net *net = info->nl_net;
3013 u32 seq;
3014 int err;
3015
3016 err = -ENOBUFS;
3017 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3018
3019 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
3020 if (!skb)
3021 goto errout;
3022
3023 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3024 event, info->portid, seq, 0, 0, 0);
3025 if (err < 0) {
3026 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3027 WARN_ON(err == -EMSGSIZE);
3028 kfree_skb(skb);
3029 goto errout;
3030 }
3031 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3032 info->nlh, gfp_any());
3033 return;
3034 errout:
3035 if (err < 0)
3036 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3037 }
3038
ip6_route_dev_notify(struct notifier_block * this,unsigned long event,void * ptr)3039 static int ip6_route_dev_notify(struct notifier_block *this,
3040 unsigned long event, void *ptr)
3041 {
3042 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3043 struct net *net = dev_net(dev);
3044
3045 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3046 net->ipv6.ip6_null_entry->dst.dev = dev;
3047 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3048 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3049 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3050 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3051 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3052 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3053 #endif
3054 }
3055
3056 return NOTIFY_OK;
3057 }
3058
3059 /*
3060 * /proc
3061 */
3062
3063 #ifdef CONFIG_PROC_FS
3064
3065 static const struct file_operations ipv6_route_proc_fops = {
3066 .owner = THIS_MODULE,
3067 .open = ipv6_route_open,
3068 .read = seq_read,
3069 .llseek = seq_lseek,
3070 .release = seq_release_net,
3071 };
3072
rt6_stats_seq_show(struct seq_file * seq,void * v)3073 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3074 {
3075 struct net *net = (struct net *)seq->private;
3076 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3077 net->ipv6.rt6_stats->fib_nodes,
3078 net->ipv6.rt6_stats->fib_route_nodes,
3079 net->ipv6.rt6_stats->fib_rt_alloc,
3080 net->ipv6.rt6_stats->fib_rt_entries,
3081 net->ipv6.rt6_stats->fib_rt_cache,
3082 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3083 net->ipv6.rt6_stats->fib_discarded_routes);
3084
3085 return 0;
3086 }
3087
rt6_stats_seq_open(struct inode * inode,struct file * file)3088 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3089 {
3090 return single_open_net(inode, file, rt6_stats_seq_show);
3091 }
3092
3093 static const struct file_operations rt6_stats_seq_fops = {
3094 .owner = THIS_MODULE,
3095 .open = rt6_stats_seq_open,
3096 .read = seq_read,
3097 .llseek = seq_lseek,
3098 .release = single_release_net,
3099 };
3100 #endif /* CONFIG_PROC_FS */
3101
3102 #ifdef CONFIG_SYSCTL
3103
3104 static
ipv6_sysctl_rtcache_flush(struct ctl_table * ctl,int write,void __user * buffer,size_t * lenp,loff_t * ppos)3105 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3106 void __user *buffer, size_t *lenp, loff_t *ppos)
3107 {
3108 struct net *net;
3109 int delay;
3110 if (!write)
3111 return -EINVAL;
3112
3113 net = (struct net *)ctl->extra1;
3114 delay = net->ipv6.sysctl.flush_delay;
3115 proc_dointvec(ctl, write, buffer, lenp, ppos);
3116 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3117 return 0;
3118 }
3119
3120 struct ctl_table ipv6_route_table_template[] = {
3121 {
3122 .procname = "flush",
3123 .data = &init_net.ipv6.sysctl.flush_delay,
3124 .maxlen = sizeof(int),
3125 .mode = 0200,
3126 .proc_handler = ipv6_sysctl_rtcache_flush
3127 },
3128 {
3129 .procname = "gc_thresh",
3130 .data = &ip6_dst_ops_template.gc_thresh,
3131 .maxlen = sizeof(int),
3132 .mode = 0644,
3133 .proc_handler = proc_dointvec,
3134 },
3135 {
3136 .procname = "max_size",
3137 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
3138 .maxlen = sizeof(int),
3139 .mode = 0644,
3140 .proc_handler = proc_dointvec,
3141 },
3142 {
3143 .procname = "gc_min_interval",
3144 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3145 .maxlen = sizeof(int),
3146 .mode = 0644,
3147 .proc_handler = proc_dointvec_jiffies,
3148 },
3149 {
3150 .procname = "gc_timeout",
3151 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3152 .maxlen = sizeof(int),
3153 .mode = 0644,
3154 .proc_handler = proc_dointvec_jiffies,
3155 },
3156 {
3157 .procname = "gc_interval",
3158 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3159 .maxlen = sizeof(int),
3160 .mode = 0644,
3161 .proc_handler = proc_dointvec_jiffies,
3162 },
3163 {
3164 .procname = "gc_elasticity",
3165 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3166 .maxlen = sizeof(int),
3167 .mode = 0644,
3168 .proc_handler = proc_dointvec,
3169 },
3170 {
3171 .procname = "mtu_expires",
3172 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3173 .maxlen = sizeof(int),
3174 .mode = 0644,
3175 .proc_handler = proc_dointvec_jiffies,
3176 },
3177 {
3178 .procname = "min_adv_mss",
3179 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3180 .maxlen = sizeof(int),
3181 .mode = 0644,
3182 .proc_handler = proc_dointvec,
3183 },
3184 {
3185 .procname = "gc_min_interval_ms",
3186 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3187 .maxlen = sizeof(int),
3188 .mode = 0644,
3189 .proc_handler = proc_dointvec_ms_jiffies,
3190 },
3191 { }
3192 };
3193
ipv6_route_sysctl_init(struct net * net)3194 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3195 {
3196 struct ctl_table *table;
3197
3198 table = kmemdup(ipv6_route_table_template,
3199 sizeof(ipv6_route_table_template),
3200 GFP_KERNEL);
3201
3202 if (table) {
3203 table[0].data = &net->ipv6.sysctl.flush_delay;
3204 table[0].extra1 = net;
3205 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3206 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3207 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3208 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3209 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3210 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3211 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3212 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3213 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3214
3215 /* Don't export sysctls to unprivileged users */
3216 if (net->user_ns != &init_user_ns)
3217 table[0].procname = NULL;
3218 }
3219
3220 return table;
3221 }
3222 #endif
3223
ip6_route_net_init(struct net * net)3224 static int __net_init ip6_route_net_init(struct net *net)
3225 {
3226 int ret = -ENOMEM;
3227
3228 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3229 sizeof(net->ipv6.ip6_dst_ops));
3230
3231 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3232 goto out_ip6_dst_ops;
3233
3234 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3235 sizeof(*net->ipv6.ip6_null_entry),
3236 GFP_KERNEL);
3237 if (!net->ipv6.ip6_null_entry)
3238 goto out_ip6_dst_entries;
3239 net->ipv6.ip6_null_entry->dst.path =
3240 (struct dst_entry *)net->ipv6.ip6_null_entry;
3241 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3242 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3243 ip6_template_metrics, true);
3244
3245 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3246 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3247 sizeof(*net->ipv6.ip6_prohibit_entry),
3248 GFP_KERNEL);
3249 if (!net->ipv6.ip6_prohibit_entry)
3250 goto out_ip6_null_entry;
3251 net->ipv6.ip6_prohibit_entry->dst.path =
3252 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3253 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3254 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3255 ip6_template_metrics, true);
3256
3257 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3258 sizeof(*net->ipv6.ip6_blk_hole_entry),
3259 GFP_KERNEL);
3260 if (!net->ipv6.ip6_blk_hole_entry)
3261 goto out_ip6_prohibit_entry;
3262 net->ipv6.ip6_blk_hole_entry->dst.path =
3263 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3264 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3265 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3266 ip6_template_metrics, true);
3267 #endif
3268
3269 net->ipv6.sysctl.flush_delay = 0;
3270 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3271 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3272 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3273 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3274 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3275 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3276 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3277
3278 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3279
3280 ret = 0;
3281 out:
3282 return ret;
3283
3284 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3285 out_ip6_prohibit_entry:
3286 kfree(net->ipv6.ip6_prohibit_entry);
3287 out_ip6_null_entry:
3288 kfree(net->ipv6.ip6_null_entry);
3289 #endif
3290 out_ip6_dst_entries:
3291 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3292 out_ip6_dst_ops:
3293 goto out;
3294 }
3295
ip6_route_net_exit(struct net * net)3296 static void __net_exit ip6_route_net_exit(struct net *net)
3297 {
3298 kfree(net->ipv6.ip6_null_entry);
3299 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3300 kfree(net->ipv6.ip6_prohibit_entry);
3301 kfree(net->ipv6.ip6_blk_hole_entry);
3302 #endif
3303 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3304 }
3305
ip6_route_net_init_late(struct net * net)3306 static int __net_init ip6_route_net_init_late(struct net *net)
3307 {
3308 #ifdef CONFIG_PROC_FS
3309 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3310 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3311 #endif
3312 return 0;
3313 }
3314
ip6_route_net_exit_late(struct net * net)3315 static void __net_exit ip6_route_net_exit_late(struct net *net)
3316 {
3317 #ifdef CONFIG_PROC_FS
3318 remove_proc_entry("ipv6_route", net->proc_net);
3319 remove_proc_entry("rt6_stats", net->proc_net);
3320 #endif
3321 }
3322
3323 static struct pernet_operations ip6_route_net_ops = {
3324 .init = ip6_route_net_init,
3325 .exit = ip6_route_net_exit,
3326 };
3327
ipv6_inetpeer_init(struct net * net)3328 static int __net_init ipv6_inetpeer_init(struct net *net)
3329 {
3330 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3331
3332 if (!bp)
3333 return -ENOMEM;
3334 inet_peer_base_init(bp);
3335 net->ipv6.peers = bp;
3336 return 0;
3337 }
3338
ipv6_inetpeer_exit(struct net * net)3339 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3340 {
3341 struct inet_peer_base *bp = net->ipv6.peers;
3342
3343 net->ipv6.peers = NULL;
3344 inetpeer_invalidate_tree(bp);
3345 kfree(bp);
3346 }
3347
3348 static struct pernet_operations ipv6_inetpeer_ops = {
3349 .init = ipv6_inetpeer_init,
3350 .exit = ipv6_inetpeer_exit,
3351 };
3352
3353 static struct pernet_operations ip6_route_net_late_ops = {
3354 .init = ip6_route_net_init_late,
3355 .exit = ip6_route_net_exit_late,
3356 };
3357
3358 static struct notifier_block ip6_route_dev_notifier = {
3359 .notifier_call = ip6_route_dev_notify,
3360 .priority = 0,
3361 };
3362
ip6_route_init(void)3363 int __init ip6_route_init(void)
3364 {
3365 int ret;
3366
3367 ret = -ENOMEM;
3368 ip6_dst_ops_template.kmem_cachep =
3369 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3370 SLAB_HWCACHE_ALIGN, NULL);
3371 if (!ip6_dst_ops_template.kmem_cachep)
3372 goto out;
3373
3374 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3375 if (ret)
3376 goto out_kmem_cache;
3377
3378 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3379 if (ret)
3380 goto out_dst_entries;
3381
3382 ret = register_pernet_subsys(&ip6_route_net_ops);
3383 if (ret)
3384 goto out_register_inetpeer;
3385
3386 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3387
3388 /* Registering of the loopback is done before this portion of code,
3389 * the loopback reference in rt6_info will not be taken, do it
3390 * manually for init_net */
3391 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3392 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3393 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3394 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3395 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3396 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3397 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3398 #endif
3399 ret = fib6_init();
3400 if (ret)
3401 goto out_register_subsys;
3402
3403 ret = xfrm6_init();
3404 if (ret)
3405 goto out_fib6_init;
3406
3407 ret = fib6_rules_init();
3408 if (ret)
3409 goto xfrm6_init;
3410
3411 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3412 if (ret)
3413 goto fib6_rules_init;
3414
3415 ret = -ENOBUFS;
3416 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3417 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3418 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3419 goto out_register_late_subsys;
3420
3421 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3422 if (ret)
3423 goto out_register_late_subsys;
3424
3425 out:
3426 return ret;
3427
3428 out_register_late_subsys:
3429 unregister_pernet_subsys(&ip6_route_net_late_ops);
3430 fib6_rules_init:
3431 fib6_rules_cleanup();
3432 xfrm6_init:
3433 xfrm6_fini();
3434 out_fib6_init:
3435 fib6_gc_cleanup();
3436 out_register_subsys:
3437 unregister_pernet_subsys(&ip6_route_net_ops);
3438 out_register_inetpeer:
3439 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3440 out_dst_entries:
3441 dst_entries_destroy(&ip6_dst_blackhole_ops);
3442 out_kmem_cache:
3443 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3444 goto out;
3445 }
3446
ip6_route_cleanup(void)3447 void ip6_route_cleanup(void)
3448 {
3449 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3450 unregister_pernet_subsys(&ip6_route_net_late_ops);
3451 fib6_rules_cleanup();
3452 xfrm6_fini();
3453 fib6_gc_cleanup();
3454 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3455 unregister_pernet_subsys(&ip6_route_net_ops);
3456 dst_entries_destroy(&ip6_dst_blackhole_ops);
3457 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3458 }
3459