1#include <linux/types.h> 2#include <linux/skbuff.h> 3#include <linux/socket.h> 4#include <linux/sysctl.h> 5#include <linux/net.h> 6#include <linux/module.h> 7#include <linux/if_arp.h> 8#include <linux/ipv6.h> 9#include <linux/mpls.h> 10#include <linux/vmalloc.h> 11#include <net/ip.h> 12#include <net/dst.h> 13#include <net/sock.h> 14#include <net/arp.h> 15#include <net/ip_fib.h> 16#include <net/netevent.h> 17#include <net/netns/generic.h> 18#if IS_ENABLED(CONFIG_IPV6) 19#include <net/ipv6.h> 20#include <net/addrconf.h> 21#endif 22#include <net/nexthop.h> 23#include "internal.h" 24 25/* Maximum number of labels to look ahead at when selecting a path of 26 * a multipath route 27 */ 28#define MAX_MP_SELECT_LABELS 4 29 30#define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1) 31 32static int zero = 0; 33static int label_limit = (1 << 20) - 1; 34 35static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, 36 struct nlmsghdr *nlh, struct net *net, u32 portid, 37 unsigned int nlm_flags); 38 39static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index) 40{ 41 struct mpls_route *rt = NULL; 42 43 if (index < net->mpls.platform_labels) { 44 struct mpls_route __rcu **platform_label = 45 rcu_dereference(net->mpls.platform_label); 46 rt = rcu_dereference(platform_label[index]); 47 } 48 return rt; 49} 50 51static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev) 52{ 53 return rcu_dereference_rtnl(dev->mpls_ptr); 54} 55 56bool mpls_output_possible(const struct net_device *dev) 57{ 58 return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev); 59} 60EXPORT_SYMBOL_GPL(mpls_output_possible); 61 62static u8 *__mpls_nh_via(struct mpls_route *rt, struct mpls_nh *nh) 63{ 64 u8 *nh0_via = PTR_ALIGN((u8 *)&rt->rt_nh[rt->rt_nhn], VIA_ALEN_ALIGN); 65 int nh_index = nh - rt->rt_nh; 66 67 return nh0_via + rt->rt_max_alen * nh_index; 68} 69 70static const u8 *mpls_nh_via(const struct mpls_route *rt, 71 const struct mpls_nh *nh) 72{ 73 return __mpls_nh_via((struct mpls_route *)rt, (struct mpls_nh *)nh); 74} 75 76static unsigned int mpls_nh_header_size(const struct mpls_nh *nh) 77{ 78 /* The size of the layer 2.5 labels to be added for this route */ 79 return nh->nh_labels * sizeof(struct mpls_shim_hdr); 80} 81 82unsigned int mpls_dev_mtu(const struct net_device *dev) 83{ 84 /* The amount of data the layer 2 frame can hold */ 85 return dev->mtu; 86} 87EXPORT_SYMBOL_GPL(mpls_dev_mtu); 88 89bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) 90{ 91 if (skb->len <= mtu) 92 return false; 93 94 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) 95 return false; 96 97 return true; 98} 99EXPORT_SYMBOL_GPL(mpls_pkt_too_big); 100 101static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, 102 struct sk_buff *skb, bool bos) 103{ 104 struct mpls_entry_decoded dec; 105 struct mpls_shim_hdr *hdr; 106 bool eli_seen = false; 107 int label_index; 108 int nh_index = 0; 109 u32 hash = 0; 110 111 /* No need to look further into packet if there's only 112 * one path 113 */ 114 if (rt->rt_nhn == 1) 115 goto out; 116 117 for (label_index = 0; label_index < MAX_MP_SELECT_LABELS && !bos; 118 label_index++) { 119 if (!pskb_may_pull(skb, sizeof(*hdr) * label_index)) 120 break; 121 122 /* Read and decode the current label */ 123 hdr = mpls_hdr(skb) + label_index; 124 dec = mpls_entry_decode(hdr); 125 126 /* RFC6790 - reserved labels MUST NOT be used as keys 127 * for the load-balancing function 128 */ 129 if (likely(dec.label >= MPLS_LABEL_FIRST_UNRESERVED)) { 130 hash = jhash_1word(dec.label, hash); 131 132 /* The entropy label follows the entropy label 133 * indicator, so this means that the entropy 134 * label was just added to the hash - no need to 135 * go any deeper either in the label stack or in the 136 * payload 137 */ 138 if (eli_seen) 139 break; 140 } else if (dec.label == MPLS_LABEL_ENTROPY) { 141 eli_seen = true; 142 } 143 144 bos = dec.bos; 145 if (bos && pskb_may_pull(skb, sizeof(*hdr) * label_index + 146 sizeof(struct iphdr))) { 147 const struct iphdr *v4hdr; 148 149 v4hdr = (const struct iphdr *)(mpls_hdr(skb) + 150 label_index); 151 if (v4hdr->version == 4) { 152 hash = jhash_3words(ntohl(v4hdr->saddr), 153 ntohl(v4hdr->daddr), 154 v4hdr->protocol, hash); 155 } else if (v4hdr->version == 6 && 156 pskb_may_pull(skb, sizeof(*hdr) * label_index + 157 sizeof(struct ipv6hdr))) { 158 const struct ipv6hdr *v6hdr; 159 160 v6hdr = (const struct ipv6hdr *)(mpls_hdr(skb) + 161 label_index); 162 163 hash = __ipv6_addr_jhash(&v6hdr->saddr, hash); 164 hash = __ipv6_addr_jhash(&v6hdr->daddr, hash); 165 hash = jhash_1word(v6hdr->nexthdr, hash); 166 } 167 } 168 } 169 170 nh_index = hash % rt->rt_nhn; 171out: 172 return &rt->rt_nh[nh_index]; 173} 174 175static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb, 176 struct mpls_entry_decoded dec) 177{ 178 enum mpls_payload_type payload_type; 179 bool success = false; 180 181 /* The IPv4 code below accesses through the IPv4 header 182 * checksum, which is 12 bytes into the packet. 183 * The IPv6 code below accesses through the IPv6 hop limit 184 * which is 8 bytes into the packet. 185 * 186 * For all supported cases there should always be at least 12 187 * bytes of packet data present. The IPv4 header is 20 bytes 188 * without options and the IPv6 header is always 40 bytes 189 * long. 190 */ 191 if (!pskb_may_pull(skb, 12)) 192 return false; 193 194 payload_type = rt->rt_payload_type; 195 if (payload_type == MPT_UNSPEC) 196 payload_type = ip_hdr(skb)->version; 197 198 switch (payload_type) { 199 case MPT_IPV4: { 200 struct iphdr *hdr4 = ip_hdr(skb); 201 skb->protocol = htons(ETH_P_IP); 202 csum_replace2(&hdr4->check, 203 htons(hdr4->ttl << 8), 204 htons(dec.ttl << 8)); 205 hdr4->ttl = dec.ttl; 206 success = true; 207 break; 208 } 209 case MPT_IPV6: { 210 struct ipv6hdr *hdr6 = ipv6_hdr(skb); 211 skb->protocol = htons(ETH_P_IPV6); 212 hdr6->hop_limit = dec.ttl; 213 success = true; 214 break; 215 } 216 case MPT_UNSPEC: 217 break; 218 } 219 220 return success; 221} 222 223static int mpls_forward(struct sk_buff *skb, struct net_device *dev, 224 struct packet_type *pt, struct net_device *orig_dev) 225{ 226 struct net *net = dev_net(dev); 227 struct mpls_shim_hdr *hdr; 228 struct mpls_route *rt; 229 struct mpls_nh *nh; 230 struct mpls_entry_decoded dec; 231 struct net_device *out_dev; 232 struct mpls_dev *mdev; 233 unsigned int hh_len; 234 unsigned int new_header_size; 235 unsigned int mtu; 236 int err; 237 238 /* Careful this entire function runs inside of an rcu critical section */ 239 240 mdev = mpls_dev_get(dev); 241 if (!mdev || !mdev->input_enabled) 242 goto drop; 243 244 if (skb->pkt_type != PACKET_HOST) 245 goto drop; 246 247 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) 248 goto drop; 249 250 if (!pskb_may_pull(skb, sizeof(*hdr))) 251 goto drop; 252 253 /* Read and decode the label */ 254 hdr = mpls_hdr(skb); 255 dec = mpls_entry_decode(hdr); 256 257 /* Pop the label */ 258 skb_pull(skb, sizeof(*hdr)); 259 skb_reset_network_header(skb); 260 261 skb_orphan(skb); 262 263 rt = mpls_route_input_rcu(net, dec.label); 264 if (!rt) 265 goto drop; 266 267 nh = mpls_select_multipath(rt, skb, dec.bos); 268 if (!nh) 269 goto drop; 270 271 /* Find the output device */ 272 out_dev = rcu_dereference(nh->nh_dev); 273 if (!mpls_output_possible(out_dev)) 274 goto drop; 275 276 if (skb_warn_if_lro(skb)) 277 goto drop; 278 279 skb_forward_csum(skb); 280 281 /* Verify ttl is valid */ 282 if (dec.ttl <= 1) 283 goto drop; 284 dec.ttl -= 1; 285 286 /* Verify the destination can hold the packet */ 287 new_header_size = mpls_nh_header_size(nh); 288 mtu = mpls_dev_mtu(out_dev); 289 if (mpls_pkt_too_big(skb, mtu - new_header_size)) 290 goto drop; 291 292 hh_len = LL_RESERVED_SPACE(out_dev); 293 if (!out_dev->header_ops) 294 hh_len = 0; 295 296 /* Ensure there is enough space for the headers in the skb */ 297 if (skb_cow(skb, hh_len + new_header_size)) 298 goto drop; 299 300 skb->dev = out_dev; 301 skb->protocol = htons(ETH_P_MPLS_UC); 302 303 if (unlikely(!new_header_size && dec.bos)) { 304 /* Penultimate hop popping */ 305 if (!mpls_egress(rt, skb, dec)) 306 goto drop; 307 } else { 308 bool bos; 309 int i; 310 skb_push(skb, new_header_size); 311 skb_reset_network_header(skb); 312 /* Push the new labels */ 313 hdr = mpls_hdr(skb); 314 bos = dec.bos; 315 for (i = nh->nh_labels - 1; i >= 0; i--) { 316 hdr[i] = mpls_entry_encode(nh->nh_label[i], 317 dec.ttl, 0, bos); 318 bos = false; 319 } 320 } 321 322 /* If via wasn't specified then send out using device address */ 323 if (nh->nh_via_table == MPLS_NEIGH_TABLE_UNSPEC) 324 err = neigh_xmit(NEIGH_LINK_TABLE, out_dev, 325 out_dev->dev_addr, skb); 326 else 327 err = neigh_xmit(nh->nh_via_table, out_dev, 328 mpls_nh_via(rt, nh), skb); 329 if (err) 330 net_dbg_ratelimited("%s: packet transmission failed: %d\n", 331 __func__, err); 332 return 0; 333 334drop: 335 kfree_skb(skb); 336 return NET_RX_DROP; 337} 338 339static struct packet_type mpls_packet_type __read_mostly = { 340 .type = cpu_to_be16(ETH_P_MPLS_UC), 341 .func = mpls_forward, 342}; 343 344static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = { 345 [RTA_DST] = { .type = NLA_U32 }, 346 [RTA_OIF] = { .type = NLA_U32 }, 347}; 348 349struct mpls_route_config { 350 u32 rc_protocol; 351 u32 rc_ifindex; 352 u8 rc_via_table; 353 u8 rc_via_alen; 354 u8 rc_via[MAX_VIA_ALEN]; 355 u32 rc_label; 356 u8 rc_output_labels; 357 u32 rc_output_label[MAX_NEW_LABELS]; 358 u32 rc_nlflags; 359 enum mpls_payload_type rc_payload_type; 360 struct nl_info rc_nlinfo; 361 struct rtnexthop *rc_mp; 362 int rc_mp_len; 363}; 364 365static struct mpls_route *mpls_rt_alloc(int num_nh, u8 max_alen) 366{ 367 u8 max_alen_aligned = ALIGN(max_alen, VIA_ALEN_ALIGN); 368 struct mpls_route *rt; 369 370 rt = kzalloc(ALIGN(sizeof(*rt) + num_nh * sizeof(*rt->rt_nh), 371 VIA_ALEN_ALIGN) + 372 num_nh * max_alen_aligned, 373 GFP_KERNEL); 374 if (rt) { 375 rt->rt_nhn = num_nh; 376 rt->rt_max_alen = max_alen_aligned; 377 } 378 379 return rt; 380} 381 382static void mpls_rt_free(struct mpls_route *rt) 383{ 384 if (rt) 385 kfree_rcu(rt, rt_rcu); 386} 387 388static void mpls_notify_route(struct net *net, unsigned index, 389 struct mpls_route *old, struct mpls_route *new, 390 const struct nl_info *info) 391{ 392 struct nlmsghdr *nlh = info ? info->nlh : NULL; 393 unsigned portid = info ? info->portid : 0; 394 int event = new ? RTM_NEWROUTE : RTM_DELROUTE; 395 struct mpls_route *rt = new ? new : old; 396 unsigned nlm_flags = (old && new) ? NLM_F_REPLACE : 0; 397 /* Ignore reserved labels for now */ 398 if (rt && (index >= MPLS_LABEL_FIRST_UNRESERVED)) 399 rtmsg_lfib(event, index, rt, nlh, net, portid, nlm_flags); 400} 401 402static void mpls_route_update(struct net *net, unsigned index, 403 struct mpls_route *new, 404 const struct nl_info *info) 405{ 406 struct mpls_route __rcu **platform_label; 407 struct mpls_route *rt; 408 409 ASSERT_RTNL(); 410 411 platform_label = rtnl_dereference(net->mpls.platform_label); 412 rt = rtnl_dereference(platform_label[index]); 413 rcu_assign_pointer(platform_label[index], new); 414 415 mpls_notify_route(net, index, rt, new, info); 416 417 /* If we removed a route free it now */ 418 mpls_rt_free(rt); 419} 420 421static unsigned find_free_label(struct net *net) 422{ 423 struct mpls_route __rcu **platform_label; 424 size_t platform_labels; 425 unsigned index; 426 427 platform_label = rtnl_dereference(net->mpls.platform_label); 428 platform_labels = net->mpls.platform_labels; 429 for (index = MPLS_LABEL_FIRST_UNRESERVED; index < platform_labels; 430 index++) { 431 if (!rtnl_dereference(platform_label[index])) 432 return index; 433 } 434 return LABEL_NOT_SPECIFIED; 435} 436 437#if IS_ENABLED(CONFIG_INET) 438static struct net_device *inet_fib_lookup_dev(struct net *net, 439 const void *addr) 440{ 441 struct net_device *dev; 442 struct rtable *rt; 443 struct in_addr daddr; 444 445 memcpy(&daddr, addr, sizeof(struct in_addr)); 446 rt = ip_route_output(net, daddr.s_addr, 0, 0, 0); 447 if (IS_ERR(rt)) 448 return ERR_CAST(rt); 449 450 dev = rt->dst.dev; 451 dev_hold(dev); 452 453 ip_rt_put(rt); 454 455 return dev; 456} 457#else 458static struct net_device *inet_fib_lookup_dev(struct net *net, 459 const void *addr) 460{ 461 return ERR_PTR(-EAFNOSUPPORT); 462} 463#endif 464 465#if IS_ENABLED(CONFIG_IPV6) 466static struct net_device *inet6_fib_lookup_dev(struct net *net, 467 const void *addr) 468{ 469 struct net_device *dev; 470 struct dst_entry *dst; 471 struct flowi6 fl6; 472 int err; 473 474 if (!ipv6_stub) 475 return ERR_PTR(-EAFNOSUPPORT); 476 477 memset(&fl6, 0, sizeof(fl6)); 478 memcpy(&fl6.daddr, addr, sizeof(struct in6_addr)); 479 err = ipv6_stub->ipv6_dst_lookup(net, NULL, &dst, &fl6); 480 if (err) 481 return ERR_PTR(err); 482 483 dev = dst->dev; 484 dev_hold(dev); 485 dst_release(dst); 486 487 return dev; 488} 489#else 490static struct net_device *inet6_fib_lookup_dev(struct net *net, 491 const void *addr) 492{ 493 return ERR_PTR(-EAFNOSUPPORT); 494} 495#endif 496 497static struct net_device *find_outdev(struct net *net, 498 struct mpls_route *rt, 499 struct mpls_nh *nh, int oif) 500{ 501 struct net_device *dev = NULL; 502 503 if (!oif) { 504 switch (nh->nh_via_table) { 505 case NEIGH_ARP_TABLE: 506 dev = inet_fib_lookup_dev(net, mpls_nh_via(rt, nh)); 507 break; 508 case NEIGH_ND_TABLE: 509 dev = inet6_fib_lookup_dev(net, mpls_nh_via(rt, nh)); 510 break; 511 case NEIGH_LINK_TABLE: 512 break; 513 } 514 } else { 515 dev = dev_get_by_index(net, oif); 516 } 517 518 if (!dev) 519 return ERR_PTR(-ENODEV); 520 521 if (IS_ERR(dev)) 522 return dev; 523 524 /* The caller is holding rtnl anyways, so release the dev reference */ 525 dev_put(dev); 526 527 return dev; 528} 529 530static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt, 531 struct mpls_nh *nh, int oif) 532{ 533 struct net_device *dev = NULL; 534 int err = -ENODEV; 535 536 dev = find_outdev(net, rt, nh, oif); 537 if (IS_ERR(dev)) { 538 err = PTR_ERR(dev); 539 dev = NULL; 540 goto errout; 541 } 542 543 /* Ensure this is a supported device */ 544 err = -EINVAL; 545 if (!mpls_dev_get(dev)) 546 goto errout; 547 548 if ((nh->nh_via_table == NEIGH_LINK_TABLE) && 549 (dev->addr_len != nh->nh_via_alen)) 550 goto errout; 551 552 RCU_INIT_POINTER(nh->nh_dev, dev); 553 554 return 0; 555 556errout: 557 return err; 558} 559 560static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg, 561 struct mpls_route *rt) 562{ 563 struct net *net = cfg->rc_nlinfo.nl_net; 564 struct mpls_nh *nh = rt->rt_nh; 565 int err; 566 int i; 567 568 if (!nh) 569 return -ENOMEM; 570 571 err = -EINVAL; 572 /* Ensure only a supported number of labels are present */ 573 if (cfg->rc_output_labels > MAX_NEW_LABELS) 574 goto errout; 575 576 nh->nh_labels = cfg->rc_output_labels; 577 for (i = 0; i < nh->nh_labels; i++) 578 nh->nh_label[i] = cfg->rc_output_label[i]; 579 580 nh->nh_via_table = cfg->rc_via_table; 581 memcpy(__mpls_nh_via(rt, nh), cfg->rc_via, cfg->rc_via_alen); 582 nh->nh_via_alen = cfg->rc_via_alen; 583 584 err = mpls_nh_assign_dev(net, rt, nh, cfg->rc_ifindex); 585 if (err) 586 goto errout; 587 588 return 0; 589 590errout: 591 return err; 592} 593 594static int mpls_nh_build(struct net *net, struct mpls_route *rt, 595 struct mpls_nh *nh, int oif, 596 struct nlattr *via, struct nlattr *newdst) 597{ 598 int err = -ENOMEM; 599 600 if (!nh) 601 goto errout; 602 603 if (newdst) { 604 err = nla_get_labels(newdst, MAX_NEW_LABELS, 605 &nh->nh_labels, nh->nh_label); 606 if (err) 607 goto errout; 608 } 609 610 if (via) { 611 err = nla_get_via(via, &nh->nh_via_alen, &nh->nh_via_table, 612 __mpls_nh_via(rt, nh)); 613 if (err) 614 goto errout; 615 } else { 616 nh->nh_via_table = MPLS_NEIGH_TABLE_UNSPEC; 617 } 618 619 err = mpls_nh_assign_dev(net, rt, nh, oif); 620 if (err) 621 goto errout; 622 623 return 0; 624 625errout: 626 return err; 627} 628 629static int mpls_count_nexthops(struct rtnexthop *rtnh, int len, 630 u8 cfg_via_alen, u8 *max_via_alen) 631{ 632 int nhs = 0; 633 int remaining = len; 634 635 if (!rtnh) { 636 *max_via_alen = cfg_via_alen; 637 return 1; 638 } 639 640 *max_via_alen = 0; 641 642 while (rtnh_ok(rtnh, remaining)) { 643 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 644 int attrlen; 645 646 attrlen = rtnh_attrlen(rtnh); 647 nla = nla_find(attrs, attrlen, RTA_VIA); 648 if (nla && nla_len(nla) >= 649 offsetof(struct rtvia, rtvia_addr)) { 650 int via_alen = nla_len(nla) - 651 offsetof(struct rtvia, rtvia_addr); 652 653 if (via_alen <= MAX_VIA_ALEN) 654 *max_via_alen = max_t(u16, *max_via_alen, 655 via_alen); 656 } 657 658 nhs++; 659 rtnh = rtnh_next(rtnh, &remaining); 660 } 661 662 /* leftover implies invalid nexthop configuration, discard it */ 663 return remaining > 0 ? 0 : nhs; 664} 665 666static int mpls_nh_build_multi(struct mpls_route_config *cfg, 667 struct mpls_route *rt) 668{ 669 struct rtnexthop *rtnh = cfg->rc_mp; 670 struct nlattr *nla_via, *nla_newdst; 671 int remaining = cfg->rc_mp_len; 672 int nhs = 0; 673 int err = 0; 674 675 change_nexthops(rt) { 676 int attrlen; 677 678 nla_via = NULL; 679 nla_newdst = NULL; 680 681 err = -EINVAL; 682 if (!rtnh_ok(rtnh, remaining)) 683 goto errout; 684 685 /* neither weighted multipath nor any flags 686 * are supported 687 */ 688 if (rtnh->rtnh_hops || rtnh->rtnh_flags) 689 goto errout; 690 691 attrlen = rtnh_attrlen(rtnh); 692 if (attrlen > 0) { 693 struct nlattr *attrs = rtnh_attrs(rtnh); 694 695 nla_via = nla_find(attrs, attrlen, RTA_VIA); 696 nla_newdst = nla_find(attrs, attrlen, RTA_NEWDST); 697 } 698 699 err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh, 700 rtnh->rtnh_ifindex, nla_via, 701 nla_newdst); 702 if (err) 703 goto errout; 704 705 rtnh = rtnh_next(rtnh, &remaining); 706 nhs++; 707 } endfor_nexthops(rt); 708 709 rt->rt_nhn = nhs; 710 711 return 0; 712 713errout: 714 return err; 715} 716 717static int mpls_route_add(struct mpls_route_config *cfg) 718{ 719 struct mpls_route __rcu **platform_label; 720 struct net *net = cfg->rc_nlinfo.nl_net; 721 struct mpls_route *rt, *old; 722 int err = -EINVAL; 723 u8 max_via_alen; 724 unsigned index; 725 int nhs; 726 727 index = cfg->rc_label; 728 729 /* If a label was not specified during insert pick one */ 730 if ((index == LABEL_NOT_SPECIFIED) && 731 (cfg->rc_nlflags & NLM_F_CREATE)) { 732 index = find_free_label(net); 733 } 734 735 /* Reserved labels may not be set */ 736 if (index < MPLS_LABEL_FIRST_UNRESERVED) 737 goto errout; 738 739 /* The full 20 bit range may not be supported. */ 740 if (index >= net->mpls.platform_labels) 741 goto errout; 742 743 /* Append makes no sense with mpls */ 744 err = -EOPNOTSUPP; 745 if (cfg->rc_nlflags & NLM_F_APPEND) 746 goto errout; 747 748 err = -EEXIST; 749 platform_label = rtnl_dereference(net->mpls.platform_label); 750 old = rtnl_dereference(platform_label[index]); 751 if ((cfg->rc_nlflags & NLM_F_EXCL) && old) 752 goto errout; 753 754 err = -EEXIST; 755 if (!(cfg->rc_nlflags & NLM_F_REPLACE) && old) 756 goto errout; 757 758 err = -ENOENT; 759 if (!(cfg->rc_nlflags & NLM_F_CREATE) && !old) 760 goto errout; 761 762 err = -EINVAL; 763 nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len, 764 cfg->rc_via_alen, &max_via_alen); 765 if (nhs == 0) 766 goto errout; 767 768 err = -ENOMEM; 769 rt = mpls_rt_alloc(nhs, max_via_alen); 770 if (!rt) 771 goto errout; 772 773 rt->rt_protocol = cfg->rc_protocol; 774 rt->rt_payload_type = cfg->rc_payload_type; 775 776 if (cfg->rc_mp) 777 err = mpls_nh_build_multi(cfg, rt); 778 else 779 err = mpls_nh_build_from_cfg(cfg, rt); 780 if (err) 781 goto freert; 782 783 mpls_route_update(net, index, rt, &cfg->rc_nlinfo); 784 785 return 0; 786 787freert: 788 mpls_rt_free(rt); 789errout: 790 return err; 791} 792 793static int mpls_route_del(struct mpls_route_config *cfg) 794{ 795 struct net *net = cfg->rc_nlinfo.nl_net; 796 unsigned index; 797 int err = -EINVAL; 798 799 index = cfg->rc_label; 800 801 /* Reserved labels may not be removed */ 802 if (index < MPLS_LABEL_FIRST_UNRESERVED) 803 goto errout; 804 805 /* The full 20 bit range may not be supported */ 806 if (index >= net->mpls.platform_labels) 807 goto errout; 808 809 mpls_route_update(net, index, NULL, &cfg->rc_nlinfo); 810 811 err = 0; 812errout: 813 return err; 814} 815 816#define MPLS_PERDEV_SYSCTL_OFFSET(field) \ 817 (&((struct mpls_dev *)0)->field) 818 819static const struct ctl_table mpls_dev_table[] = { 820 { 821 .procname = "input", 822 .maxlen = sizeof(int), 823 .mode = 0644, 824 .proc_handler = proc_dointvec, 825 .data = MPLS_PERDEV_SYSCTL_OFFSET(input_enabled), 826 }, 827 { } 828}; 829 830static int mpls_dev_sysctl_register(struct net_device *dev, 831 struct mpls_dev *mdev) 832{ 833 char path[sizeof("net/mpls/conf/") + IFNAMSIZ]; 834 struct ctl_table *table; 835 int i; 836 837 table = kmemdup(&mpls_dev_table, sizeof(mpls_dev_table), GFP_KERNEL); 838 if (!table) 839 goto out; 840 841 /* Table data contains only offsets relative to the base of 842 * the mdev at this point, so make them absolute. 843 */ 844 for (i = 0; i < ARRAY_SIZE(mpls_dev_table); i++) 845 table[i].data = (char *)mdev + (uintptr_t)table[i].data; 846 847 snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name); 848 849 mdev->sysctl = register_net_sysctl(dev_net(dev), path, table); 850 if (!mdev->sysctl) 851 goto free; 852 853 return 0; 854 855free: 856 kfree(table); 857out: 858 return -ENOBUFS; 859} 860 861static void mpls_dev_sysctl_unregister(struct mpls_dev *mdev) 862{ 863 struct ctl_table *table; 864 865 table = mdev->sysctl->ctl_table_arg; 866 unregister_net_sysctl_table(mdev->sysctl); 867 kfree(table); 868} 869 870static struct mpls_dev *mpls_add_dev(struct net_device *dev) 871{ 872 struct mpls_dev *mdev; 873 int err = -ENOMEM; 874 875 ASSERT_RTNL(); 876 877 mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); 878 if (!mdev) 879 return ERR_PTR(err); 880 881 err = mpls_dev_sysctl_register(dev, mdev); 882 if (err) 883 goto free; 884 885 rcu_assign_pointer(dev->mpls_ptr, mdev); 886 887 return mdev; 888 889free: 890 kfree(mdev); 891 return ERR_PTR(err); 892} 893 894static void mpls_ifdown(struct net_device *dev) 895{ 896 struct mpls_route __rcu **platform_label; 897 struct net *net = dev_net(dev); 898 struct mpls_dev *mdev; 899 unsigned index; 900 901 platform_label = rtnl_dereference(net->mpls.platform_label); 902 for (index = 0; index < net->mpls.platform_labels; index++) { 903 struct mpls_route *rt = rtnl_dereference(platform_label[index]); 904 if (!rt) 905 continue; 906 for_nexthops(rt) { 907 if (rtnl_dereference(nh->nh_dev) != dev) 908 continue; 909 nh->nh_dev = NULL; 910 } endfor_nexthops(rt); 911 } 912 913 mdev = mpls_dev_get(dev); 914 if (!mdev) 915 return; 916 917 mpls_dev_sysctl_unregister(mdev); 918 919 RCU_INIT_POINTER(dev->mpls_ptr, NULL); 920 921 kfree_rcu(mdev, rcu); 922} 923 924static int mpls_dev_notify(struct notifier_block *this, unsigned long event, 925 void *ptr) 926{ 927 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 928 struct mpls_dev *mdev; 929 930 switch(event) { 931 case NETDEV_REGISTER: 932 /* For now just support ethernet devices */ 933 if ((dev->type == ARPHRD_ETHER) || 934 (dev->type == ARPHRD_LOOPBACK)) { 935 mdev = mpls_add_dev(dev); 936 if (IS_ERR(mdev)) 937 return notifier_from_errno(PTR_ERR(mdev)); 938 } 939 break; 940 941 case NETDEV_UNREGISTER: 942 mpls_ifdown(dev); 943 break; 944 case NETDEV_CHANGENAME: 945 mdev = mpls_dev_get(dev); 946 if (mdev) { 947 int err; 948 949 mpls_dev_sysctl_unregister(mdev); 950 err = mpls_dev_sysctl_register(dev, mdev); 951 if (err) 952 return notifier_from_errno(err); 953 } 954 break; 955 } 956 return NOTIFY_OK; 957} 958 959static struct notifier_block mpls_dev_notifier = { 960 .notifier_call = mpls_dev_notify, 961}; 962 963static int nla_put_via(struct sk_buff *skb, 964 u8 table, const void *addr, int alen) 965{ 966 static const int table_to_family[NEIGH_NR_TABLES + 1] = { 967 AF_INET, AF_INET6, AF_DECnet, AF_PACKET, 968 }; 969 struct nlattr *nla; 970 struct rtvia *via; 971 int family = AF_UNSPEC; 972 973 nla = nla_reserve(skb, RTA_VIA, alen + 2); 974 if (!nla) 975 return -EMSGSIZE; 976 977 if (table <= NEIGH_NR_TABLES) 978 family = table_to_family[table]; 979 980 via = nla_data(nla); 981 via->rtvia_family = family; 982 memcpy(via->rtvia_addr, addr, alen); 983 return 0; 984} 985 986int nla_put_labels(struct sk_buff *skb, int attrtype, 987 u8 labels, const u32 label[]) 988{ 989 struct nlattr *nla; 990 struct mpls_shim_hdr *nla_label; 991 bool bos; 992 int i; 993 nla = nla_reserve(skb, attrtype, labels*4); 994 if (!nla) 995 return -EMSGSIZE; 996 997 nla_label = nla_data(nla); 998 bos = true; 999 for (i = labels - 1; i >= 0; i--) { 1000 nla_label[i] = mpls_entry_encode(label[i], 0, 0, bos); 1001 bos = false; 1002 } 1003 1004 return 0; 1005} 1006EXPORT_SYMBOL_GPL(nla_put_labels); 1007 1008int nla_get_labels(const struct nlattr *nla, 1009 u32 max_labels, u8 *labels, u32 label[]) 1010{ 1011 unsigned len = nla_len(nla); 1012 unsigned nla_labels; 1013 struct mpls_shim_hdr *nla_label; 1014 bool bos; 1015 int i; 1016 1017 /* len needs to be an even multiple of 4 (the label size) */ 1018 if (len & 3) 1019 return -EINVAL; 1020 1021 /* Limit the number of new labels allowed */ 1022 nla_labels = len/4; 1023 if (nla_labels > max_labels) 1024 return -EINVAL; 1025 1026 nla_label = nla_data(nla); 1027 bos = true; 1028 for (i = nla_labels - 1; i >= 0; i--, bos = false) { 1029 struct mpls_entry_decoded dec; 1030 dec = mpls_entry_decode(nla_label + i); 1031 1032 /* Ensure the bottom of stack flag is properly set 1033 * and ttl and tc are both clear. 1034 */ 1035 if ((dec.bos != bos) || dec.ttl || dec.tc) 1036 return -EINVAL; 1037 1038 switch (dec.label) { 1039 case MPLS_LABEL_IMPLNULL: 1040 /* RFC3032: This is a label that an LSR may 1041 * assign and distribute, but which never 1042 * actually appears in the encapsulation. 1043 */ 1044 return -EINVAL; 1045 } 1046 1047 label[i] = dec.label; 1048 } 1049 *labels = nla_labels; 1050 return 0; 1051} 1052EXPORT_SYMBOL_GPL(nla_get_labels); 1053 1054int nla_get_via(const struct nlattr *nla, u8 *via_alen, 1055 u8 *via_table, u8 via_addr[]) 1056{ 1057 struct rtvia *via = nla_data(nla); 1058 int err = -EINVAL; 1059 int alen; 1060 1061 if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) 1062 goto errout; 1063 alen = nla_len(nla) - 1064 offsetof(struct rtvia, rtvia_addr); 1065 if (alen > MAX_VIA_ALEN) 1066 goto errout; 1067 1068 /* Validate the address family */ 1069 switch (via->rtvia_family) { 1070 case AF_PACKET: 1071 *via_table = NEIGH_LINK_TABLE; 1072 break; 1073 case AF_INET: 1074 *via_table = NEIGH_ARP_TABLE; 1075 if (alen != 4) 1076 goto errout; 1077 break; 1078 case AF_INET6: 1079 *via_table = NEIGH_ND_TABLE; 1080 if (alen != 16) 1081 goto errout; 1082 break; 1083 default: 1084 /* Unsupported address family */ 1085 goto errout; 1086 } 1087 1088 memcpy(via_addr, via->rtvia_addr, alen); 1089 *via_alen = alen; 1090 err = 0; 1091 1092errout: 1093 return err; 1094} 1095 1096static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, 1097 struct mpls_route_config *cfg) 1098{ 1099 struct rtmsg *rtm; 1100 struct nlattr *tb[RTA_MAX+1]; 1101 int index; 1102 int err; 1103 1104 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy); 1105 if (err < 0) 1106 goto errout; 1107 1108 err = -EINVAL; 1109 rtm = nlmsg_data(nlh); 1110 memset(cfg, 0, sizeof(*cfg)); 1111 1112 if (rtm->rtm_family != AF_MPLS) 1113 goto errout; 1114 if (rtm->rtm_dst_len != 20) 1115 goto errout; 1116 if (rtm->rtm_src_len != 0) 1117 goto errout; 1118 if (rtm->rtm_tos != 0) 1119 goto errout; 1120 if (rtm->rtm_table != RT_TABLE_MAIN) 1121 goto errout; 1122 /* Any value is acceptable for rtm_protocol */ 1123 1124 /* As mpls uses destination specific addresses 1125 * (or source specific address in the case of multicast) 1126 * all addresses have universal scope. 1127 */ 1128 if (rtm->rtm_scope != RT_SCOPE_UNIVERSE) 1129 goto errout; 1130 if (rtm->rtm_type != RTN_UNICAST) 1131 goto errout; 1132 if (rtm->rtm_flags != 0) 1133 goto errout; 1134 1135 cfg->rc_label = LABEL_NOT_SPECIFIED; 1136 cfg->rc_protocol = rtm->rtm_protocol; 1137 cfg->rc_via_table = MPLS_NEIGH_TABLE_UNSPEC; 1138 cfg->rc_nlflags = nlh->nlmsg_flags; 1139 cfg->rc_nlinfo.portid = NETLINK_CB(skb).portid; 1140 cfg->rc_nlinfo.nlh = nlh; 1141 cfg->rc_nlinfo.nl_net = sock_net(skb->sk); 1142 1143 for (index = 0; index <= RTA_MAX; index++) { 1144 struct nlattr *nla = tb[index]; 1145 if (!nla) 1146 continue; 1147 1148 switch(index) { 1149 case RTA_OIF: 1150 cfg->rc_ifindex = nla_get_u32(nla); 1151 break; 1152 case RTA_NEWDST: 1153 if (nla_get_labels(nla, MAX_NEW_LABELS, 1154 &cfg->rc_output_labels, 1155 cfg->rc_output_label)) 1156 goto errout; 1157 break; 1158 case RTA_DST: 1159 { 1160 u8 label_count; 1161 if (nla_get_labels(nla, 1, &label_count, 1162 &cfg->rc_label)) 1163 goto errout; 1164 1165 /* Reserved labels may not be set */ 1166 if (cfg->rc_label < MPLS_LABEL_FIRST_UNRESERVED) 1167 goto errout; 1168 1169 break; 1170 } 1171 case RTA_VIA: 1172 { 1173 if (nla_get_via(nla, &cfg->rc_via_alen, 1174 &cfg->rc_via_table, cfg->rc_via)) 1175 goto errout; 1176 break; 1177 } 1178 case RTA_MULTIPATH: 1179 { 1180 cfg->rc_mp = nla_data(nla); 1181 cfg->rc_mp_len = nla_len(nla); 1182 break; 1183 } 1184 default: 1185 /* Unsupported attribute */ 1186 goto errout; 1187 } 1188 } 1189 1190 err = 0; 1191errout: 1192 return err; 1193} 1194 1195static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) 1196{ 1197 struct mpls_route_config cfg; 1198 int err; 1199 1200 err = rtm_to_route_config(skb, nlh, &cfg); 1201 if (err < 0) 1202 return err; 1203 1204 return mpls_route_del(&cfg); 1205} 1206 1207 1208static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) 1209{ 1210 struct mpls_route_config cfg; 1211 int err; 1212 1213 err = rtm_to_route_config(skb, nlh, &cfg); 1214 if (err < 0) 1215 return err; 1216 1217 return mpls_route_add(&cfg); 1218} 1219 1220static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, 1221 u32 label, struct mpls_route *rt, int flags) 1222{ 1223 struct net_device *dev; 1224 struct nlmsghdr *nlh; 1225 struct rtmsg *rtm; 1226 1227 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); 1228 if (nlh == NULL) 1229 return -EMSGSIZE; 1230 1231 rtm = nlmsg_data(nlh); 1232 rtm->rtm_family = AF_MPLS; 1233 rtm->rtm_dst_len = 20; 1234 rtm->rtm_src_len = 0; 1235 rtm->rtm_tos = 0; 1236 rtm->rtm_table = RT_TABLE_MAIN; 1237 rtm->rtm_protocol = rt->rt_protocol; 1238 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 1239 rtm->rtm_type = RTN_UNICAST; 1240 rtm->rtm_flags = 0; 1241 1242 if (nla_put_labels(skb, RTA_DST, 1, &label)) 1243 goto nla_put_failure; 1244 if (rt->rt_nhn == 1) { 1245 const struct mpls_nh *nh = rt->rt_nh; 1246 1247 if (nh->nh_labels && 1248 nla_put_labels(skb, RTA_NEWDST, nh->nh_labels, 1249 nh->nh_label)) 1250 goto nla_put_failure; 1251 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC && 1252 nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh), 1253 nh->nh_via_alen)) 1254 goto nla_put_failure; 1255 dev = rtnl_dereference(nh->nh_dev); 1256 if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) 1257 goto nla_put_failure; 1258 } else { 1259 struct rtnexthop *rtnh; 1260 struct nlattr *mp; 1261 1262 mp = nla_nest_start(skb, RTA_MULTIPATH); 1263 if (!mp) 1264 goto nla_put_failure; 1265 1266 for_nexthops(rt) { 1267 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); 1268 if (!rtnh) 1269 goto nla_put_failure; 1270 1271 dev = rtnl_dereference(nh->nh_dev); 1272 if (dev) 1273 rtnh->rtnh_ifindex = dev->ifindex; 1274 if (nh->nh_labels && nla_put_labels(skb, RTA_NEWDST, 1275 nh->nh_labels, 1276 nh->nh_label)) 1277 goto nla_put_failure; 1278 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC && 1279 nla_put_via(skb, nh->nh_via_table, 1280 mpls_nh_via(rt, nh), 1281 nh->nh_via_alen)) 1282 goto nla_put_failure; 1283 1284 /* length of rtnetlink header + attributes */ 1285 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; 1286 } endfor_nexthops(rt); 1287 1288 nla_nest_end(skb, mp); 1289 } 1290 1291 nlmsg_end(skb, nlh); 1292 return 0; 1293 1294nla_put_failure: 1295 nlmsg_cancel(skb, nlh); 1296 return -EMSGSIZE; 1297} 1298 1299static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) 1300{ 1301 struct net *net = sock_net(skb->sk); 1302 struct mpls_route __rcu **platform_label; 1303 size_t platform_labels; 1304 unsigned int index; 1305 1306 ASSERT_RTNL(); 1307 1308 index = cb->args[0]; 1309 if (index < MPLS_LABEL_FIRST_UNRESERVED) 1310 index = MPLS_LABEL_FIRST_UNRESERVED; 1311 1312 platform_label = rtnl_dereference(net->mpls.platform_label); 1313 platform_labels = net->mpls.platform_labels; 1314 for (; index < platform_labels; index++) { 1315 struct mpls_route *rt; 1316 rt = rtnl_dereference(platform_label[index]); 1317 if (!rt) 1318 continue; 1319 1320 if (mpls_dump_route(skb, NETLINK_CB(cb->skb).portid, 1321 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1322 index, rt, NLM_F_MULTI) < 0) 1323 break; 1324 } 1325 cb->args[0] = index; 1326 1327 return skb->len; 1328} 1329 1330static inline size_t lfib_nlmsg_size(struct mpls_route *rt) 1331{ 1332 size_t payload = 1333 NLMSG_ALIGN(sizeof(struct rtmsg)) 1334 + nla_total_size(4); /* RTA_DST */ 1335 1336 if (rt->rt_nhn == 1) { 1337 struct mpls_nh *nh = rt->rt_nh; 1338 1339 if (nh->nh_dev) 1340 payload += nla_total_size(4); /* RTA_OIF */ 1341 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC) /* RTA_VIA */ 1342 payload += nla_total_size(2 + nh->nh_via_alen); 1343 if (nh->nh_labels) /* RTA_NEWDST */ 1344 payload += nla_total_size(nh->nh_labels * 4); 1345 } else { 1346 /* each nexthop is packed in an attribute */ 1347 size_t nhsize = 0; 1348 1349 for_nexthops(rt) { 1350 nhsize += nla_total_size(sizeof(struct rtnexthop)); 1351 /* RTA_VIA */ 1352 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC) 1353 nhsize += nla_total_size(2 + nh->nh_via_alen); 1354 if (nh->nh_labels) 1355 nhsize += nla_total_size(nh->nh_labels * 4); 1356 } endfor_nexthops(rt); 1357 /* nested attribute */ 1358 payload += nla_total_size(nhsize); 1359 } 1360 1361 return payload; 1362} 1363 1364static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, 1365 struct nlmsghdr *nlh, struct net *net, u32 portid, 1366 unsigned int nlm_flags) 1367{ 1368 struct sk_buff *skb; 1369 u32 seq = nlh ? nlh->nlmsg_seq : 0; 1370 int err = -ENOBUFS; 1371 1372 skb = nlmsg_new(lfib_nlmsg_size(rt), GFP_KERNEL); 1373 if (skb == NULL) 1374 goto errout; 1375 1376 err = mpls_dump_route(skb, portid, seq, event, label, rt, nlm_flags); 1377 if (err < 0) { 1378 /* -EMSGSIZE implies BUG in lfib_nlmsg_size */ 1379 WARN_ON(err == -EMSGSIZE); 1380 kfree_skb(skb); 1381 goto errout; 1382 } 1383 rtnl_notify(skb, net, portid, RTNLGRP_MPLS_ROUTE, nlh, GFP_KERNEL); 1384 1385 return; 1386errout: 1387 if (err < 0) 1388 rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err); 1389} 1390 1391static int resize_platform_label_table(struct net *net, size_t limit) 1392{ 1393 size_t size = sizeof(struct mpls_route *) * limit; 1394 size_t old_limit; 1395 size_t cp_size; 1396 struct mpls_route __rcu **labels = NULL, **old; 1397 struct mpls_route *rt0 = NULL, *rt2 = NULL; 1398 unsigned index; 1399 1400 if (size) { 1401 labels = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); 1402 if (!labels) 1403 labels = vzalloc(size); 1404 1405 if (!labels) 1406 goto nolabels; 1407 } 1408 1409 /* In case the predefined labels need to be populated */ 1410 if (limit > MPLS_LABEL_IPV4NULL) { 1411 struct net_device *lo = net->loopback_dev; 1412 rt0 = mpls_rt_alloc(1, lo->addr_len); 1413 if (!rt0) 1414 goto nort0; 1415 RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo); 1416 rt0->rt_protocol = RTPROT_KERNEL; 1417 rt0->rt_payload_type = MPT_IPV4; 1418 rt0->rt_nh->nh_via_table = NEIGH_LINK_TABLE; 1419 rt0->rt_nh->nh_via_alen = lo->addr_len; 1420 memcpy(__mpls_nh_via(rt0, rt0->rt_nh), lo->dev_addr, 1421 lo->addr_len); 1422 } 1423 if (limit > MPLS_LABEL_IPV6NULL) { 1424 struct net_device *lo = net->loopback_dev; 1425 rt2 = mpls_rt_alloc(1, lo->addr_len); 1426 if (!rt2) 1427 goto nort2; 1428 RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo); 1429 rt2->rt_protocol = RTPROT_KERNEL; 1430 rt2->rt_payload_type = MPT_IPV6; 1431 rt2->rt_nh->nh_via_table = NEIGH_LINK_TABLE; 1432 rt2->rt_nh->nh_via_alen = lo->addr_len; 1433 memcpy(__mpls_nh_via(rt2, rt2->rt_nh), lo->dev_addr, 1434 lo->addr_len); 1435 } 1436 1437 rtnl_lock(); 1438 /* Remember the original table */ 1439 old = rtnl_dereference(net->mpls.platform_label); 1440 old_limit = net->mpls.platform_labels; 1441 1442 /* Free any labels beyond the new table */ 1443 for (index = limit; index < old_limit; index++) 1444 mpls_route_update(net, index, NULL, NULL); 1445 1446 /* Copy over the old labels */ 1447 cp_size = size; 1448 if (old_limit < limit) 1449 cp_size = old_limit * sizeof(struct mpls_route *); 1450 1451 memcpy(labels, old, cp_size); 1452 1453 /* If needed set the predefined labels */ 1454 if ((old_limit <= MPLS_LABEL_IPV6NULL) && 1455 (limit > MPLS_LABEL_IPV6NULL)) { 1456 RCU_INIT_POINTER(labels[MPLS_LABEL_IPV6NULL], rt2); 1457 rt2 = NULL; 1458 } 1459 1460 if ((old_limit <= MPLS_LABEL_IPV4NULL) && 1461 (limit > MPLS_LABEL_IPV4NULL)) { 1462 RCU_INIT_POINTER(labels[MPLS_LABEL_IPV4NULL], rt0); 1463 rt0 = NULL; 1464 } 1465 1466 /* Update the global pointers */ 1467 net->mpls.platform_labels = limit; 1468 rcu_assign_pointer(net->mpls.platform_label, labels); 1469 1470 rtnl_unlock(); 1471 1472 mpls_rt_free(rt2); 1473 mpls_rt_free(rt0); 1474 1475 if (old) { 1476 synchronize_rcu(); 1477 kvfree(old); 1478 } 1479 return 0; 1480 1481nort2: 1482 mpls_rt_free(rt0); 1483nort0: 1484 kvfree(labels); 1485nolabels: 1486 return -ENOMEM; 1487} 1488 1489static int mpls_platform_labels(struct ctl_table *table, int write, 1490 void __user *buffer, size_t *lenp, loff_t *ppos) 1491{ 1492 struct net *net = table->data; 1493 int platform_labels = net->mpls.platform_labels; 1494 int ret; 1495 struct ctl_table tmp = { 1496 .procname = table->procname, 1497 .data = &platform_labels, 1498 .maxlen = sizeof(int), 1499 .mode = table->mode, 1500 .extra1 = &zero, 1501 .extra2 = &label_limit, 1502 }; 1503 1504 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 1505 1506 if (write && ret == 0) 1507 ret = resize_platform_label_table(net, platform_labels); 1508 1509 return ret; 1510} 1511 1512static const struct ctl_table mpls_table[] = { 1513 { 1514 .procname = "platform_labels", 1515 .data = NULL, 1516 .maxlen = sizeof(int), 1517 .mode = 0644, 1518 .proc_handler = mpls_platform_labels, 1519 }, 1520 { } 1521}; 1522 1523static int mpls_net_init(struct net *net) 1524{ 1525 struct ctl_table *table; 1526 1527 net->mpls.platform_labels = 0; 1528 net->mpls.platform_label = NULL; 1529 1530 table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL); 1531 if (table == NULL) 1532 return -ENOMEM; 1533 1534 table[0].data = net; 1535 net->mpls.ctl = register_net_sysctl(net, "net/mpls", table); 1536 if (net->mpls.ctl == NULL) { 1537 kfree(table); 1538 return -ENOMEM; 1539 } 1540 1541 return 0; 1542} 1543 1544static void mpls_net_exit(struct net *net) 1545{ 1546 struct mpls_route __rcu **platform_label; 1547 size_t platform_labels; 1548 struct ctl_table *table; 1549 unsigned int index; 1550 1551 table = net->mpls.ctl->ctl_table_arg; 1552 unregister_net_sysctl_table(net->mpls.ctl); 1553 kfree(table); 1554 1555 /* An rcu grace period has passed since there was a device in 1556 * the network namespace (and thus the last in flight packet) 1557 * left this network namespace. This is because 1558 * unregister_netdevice_many and netdev_run_todo has completed 1559 * for each network device that was in this network namespace. 1560 * 1561 * As such no additional rcu synchronization is necessary when 1562 * freeing the platform_label table. 1563 */ 1564 rtnl_lock(); 1565 platform_label = rtnl_dereference(net->mpls.platform_label); 1566 platform_labels = net->mpls.platform_labels; 1567 for (index = 0; index < platform_labels; index++) { 1568 struct mpls_route *rt = rtnl_dereference(platform_label[index]); 1569 RCU_INIT_POINTER(platform_label[index], NULL); 1570 mpls_rt_free(rt); 1571 } 1572 rtnl_unlock(); 1573 1574 kvfree(platform_label); 1575} 1576 1577static struct pernet_operations mpls_net_ops = { 1578 .init = mpls_net_init, 1579 .exit = mpls_net_exit, 1580}; 1581 1582static int __init mpls_init(void) 1583{ 1584 int err; 1585 1586 BUILD_BUG_ON(sizeof(struct mpls_shim_hdr) != 4); 1587 1588 err = register_pernet_subsys(&mpls_net_ops); 1589 if (err) 1590 goto out; 1591 1592 err = register_netdevice_notifier(&mpls_dev_notifier); 1593 if (err) 1594 goto out_unregister_pernet; 1595 1596 dev_add_pack(&mpls_packet_type); 1597 1598 rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, NULL); 1599 rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, NULL); 1600 rtnl_register(PF_MPLS, RTM_GETROUTE, NULL, mpls_dump_routes, NULL); 1601 err = 0; 1602out: 1603 return err; 1604 1605out_unregister_pernet: 1606 unregister_pernet_subsys(&mpls_net_ops); 1607 goto out; 1608} 1609module_init(mpls_init); 1610 1611static void __exit mpls_exit(void) 1612{ 1613 rtnl_unregister_all(PF_MPLS); 1614 dev_remove_pack(&mpls_packet_type); 1615 unregister_netdevice_notifier(&mpls_dev_notifier); 1616 unregister_pernet_subsys(&mpls_net_ops); 1617} 1618module_exit(mpls_exit); 1619 1620MODULE_DESCRIPTION("MultiProtocol Label Switching"); 1621MODULE_LICENSE("GPL v2"); 1622MODULE_ALIAS_NETPROTO(PF_MPLS); 1623