1/* 2 * IPv6 Address Label subsystem 3 * for the IPv6 "Default" Source Address Selection 4 * 5 * Copyright (C)2007 USAGI/WIDE Project 6 */ 7/* 8 * Author: 9 * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org> 10 */ 11 12#include <linux/kernel.h> 13#include <linux/list.h> 14#include <linux/rcupdate.h> 15#include <linux/in6.h> 16#include <linux/slab.h> 17#include <net/addrconf.h> 18#include <linux/if_addrlabel.h> 19#include <linux/netlink.h> 20#include <linux/rtnetlink.h> 21 22#if 0 23#define ADDRLABEL(x...) printk(x) 24#else 25#define ADDRLABEL(x...) do { ; } while (0) 26#endif 27 28/* 29 * Policy Table 30 */ 31struct ip6addrlbl_entry { 32 possible_net_t lbl_net; 33 struct in6_addr prefix; 34 int prefixlen; 35 int ifindex; 36 int addrtype; 37 u32 label; 38 struct hlist_node list; 39 atomic_t refcnt; 40 struct rcu_head rcu; 41}; 42 43static struct ip6addrlbl_table 44{ 45 struct hlist_head head; 46 spinlock_t lock; 47 u32 seq; 48} ip6addrlbl_table; 49 50static inline 51struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl) 52{ 53 return read_pnet(&lbl->lbl_net); 54} 55 56/* 57 * Default policy table (RFC6724 + extensions) 58 * 59 * prefix addr_type label 60 * ------------------------------------------------------------------------- 61 * ::1/128 LOOPBACK 0 62 * ::/0 N/A 1 63 * 2002::/16 N/A 2 64 * ::/96 COMPATv4 3 65 * ::ffff:0:0/96 V4MAPPED 4 66 * fc00::/7 N/A 5 ULA (RFC 4193) 67 * 2001::/32 N/A 6 Teredo (RFC 4380) 68 * 2001:10::/28 N/A 7 ORCHID (RFC 4843) 69 * fec0::/10 N/A 11 Site-local 70 * (deprecated by RFC3879) 71 * 3ffe::/16 N/A 12 6bone 72 * 73 * Note: 0xffffffff is used if we do not have any policies. 74 * Note: Labels for ULA and 6to4 are different from labels listed in RFC6724. 75 */ 76 77#define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL 78 79static const __net_initconst struct ip6addrlbl_init_table 80{ 81 const struct in6_addr *prefix; 82 int prefixlen; 83 u32 label; 84} ip6addrlbl_init_table[] = { 85 { /* ::/0 */ 86 .prefix = &in6addr_any, 87 .label = 1, 88 }, { /* fc00::/7 */ 89 .prefix = &(struct in6_addr){ { { 0xfc } } } , 90 .prefixlen = 7, 91 .label = 5, 92 }, { /* fec0::/10 */ 93 .prefix = &(struct in6_addr){ { { 0xfe, 0xc0 } } }, 94 .prefixlen = 10, 95 .label = 11, 96 }, { /* 2002::/16 */ 97 .prefix = &(struct in6_addr){ { { 0x20, 0x02 } } }, 98 .prefixlen = 16, 99 .label = 2, 100 }, { /* 3ffe::/16 */ 101 .prefix = &(struct in6_addr){ { { 0x3f, 0xfe } } }, 102 .prefixlen = 16, 103 .label = 12, 104 }, { /* 2001::/32 */ 105 .prefix = &(struct in6_addr){ { { 0x20, 0x01 } } }, 106 .prefixlen = 32, 107 .label = 6, 108 }, { /* 2001:10::/28 */ 109 .prefix = &(struct in6_addr){ { { 0x20, 0x01, 0x00, 0x10 } } }, 110 .prefixlen = 28, 111 .label = 7, 112 }, { /* ::ffff:0:0 */ 113 .prefix = &(struct in6_addr){ { { [10] = 0xff, [11] = 0xff } } }, 114 .prefixlen = 96, 115 .label = 4, 116 }, { /* ::/96 */ 117 .prefix = &in6addr_any, 118 .prefixlen = 96, 119 .label = 3, 120 }, { /* ::1/128 */ 121 .prefix = &in6addr_loopback, 122 .prefixlen = 128, 123 .label = 0, 124 } 125}; 126 127/* Object management */ 128static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p) 129{ 130 kfree(p); 131} 132 133static void ip6addrlbl_free_rcu(struct rcu_head *h) 134{ 135 ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu)); 136} 137 138static bool ip6addrlbl_hold(struct ip6addrlbl_entry *p) 139{ 140 return atomic_inc_not_zero(&p->refcnt); 141} 142 143static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p) 144{ 145 if (atomic_dec_and_test(&p->refcnt)) 146 call_rcu(&p->rcu, ip6addrlbl_free_rcu); 147} 148 149/* Find label */ 150static bool __ip6addrlbl_match(struct net *net, 151 const struct ip6addrlbl_entry *p, 152 const struct in6_addr *addr, 153 int addrtype, int ifindex) 154{ 155 if (!net_eq(ip6addrlbl_net(p), net)) 156 return false; 157 if (p->ifindex && p->ifindex != ifindex) 158 return false; 159 if (p->addrtype && p->addrtype != addrtype) 160 return false; 161 if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen)) 162 return false; 163 return true; 164} 165 166static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net, 167 const struct in6_addr *addr, 168 int type, int ifindex) 169{ 170 struct ip6addrlbl_entry *p; 171 hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { 172 if (__ip6addrlbl_match(net, p, addr, type, ifindex)) 173 return p; 174 } 175 return NULL; 176} 177 178u32 ipv6_addr_label(struct net *net, 179 const struct in6_addr *addr, int type, int ifindex) 180{ 181 u32 label; 182 struct ip6addrlbl_entry *p; 183 184 type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK; 185 186 rcu_read_lock(); 187 p = __ipv6_addr_label(net, addr, type, ifindex); 188 label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT; 189 rcu_read_unlock(); 190 191 ADDRLABEL(KERN_DEBUG "%s(addr=%pI6, type=%d, ifindex=%d) => %08x\n", 192 __func__, addr, type, ifindex, label); 193 194 return label; 195} 196 197/* allocate one entry */ 198static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, 199 const struct in6_addr *prefix, 200 int prefixlen, int ifindex, 201 u32 label) 202{ 203 struct ip6addrlbl_entry *newp; 204 int addrtype; 205 206 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u)\n", 207 __func__, prefix, prefixlen, ifindex, (unsigned int)label); 208 209 addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK); 210 211 switch (addrtype) { 212 case IPV6_ADDR_MAPPED: 213 if (prefixlen > 96) 214 return ERR_PTR(-EINVAL); 215 if (prefixlen < 96) 216 addrtype = 0; 217 break; 218 case IPV6_ADDR_COMPATv4: 219 if (prefixlen != 96) 220 addrtype = 0; 221 break; 222 case IPV6_ADDR_LOOPBACK: 223 if (prefixlen != 128) 224 addrtype = 0; 225 break; 226 } 227 228 newp = kmalloc(sizeof(*newp), GFP_KERNEL); 229 if (!newp) 230 return ERR_PTR(-ENOMEM); 231 232 ipv6_addr_prefix(&newp->prefix, prefix, prefixlen); 233 newp->prefixlen = prefixlen; 234 newp->ifindex = ifindex; 235 newp->addrtype = addrtype; 236 newp->label = label; 237 INIT_HLIST_NODE(&newp->list); 238 write_pnet(&newp->lbl_net, net); 239 atomic_set(&newp->refcnt, 1); 240 return newp; 241} 242 243/* add a label */ 244static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) 245{ 246 struct hlist_node *n; 247 struct ip6addrlbl_entry *last = NULL, *p = NULL; 248 int ret = 0; 249 250 ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp, 251 replace); 252 253 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { 254 if (p->prefixlen == newp->prefixlen && 255 net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) && 256 p->ifindex == newp->ifindex && 257 ipv6_addr_equal(&p->prefix, &newp->prefix)) { 258 if (!replace) { 259 ret = -EEXIST; 260 goto out; 261 } 262 hlist_replace_rcu(&p->list, &newp->list); 263 ip6addrlbl_put(p); 264 goto out; 265 } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) || 266 (p->prefixlen < newp->prefixlen)) { 267 hlist_add_before_rcu(&newp->list, &p->list); 268 goto out; 269 } 270 last = p; 271 } 272 if (last) 273 hlist_add_behind_rcu(&newp->list, &last->list); 274 else 275 hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); 276out: 277 if (!ret) 278 ip6addrlbl_table.seq++; 279 return ret; 280} 281 282/* add a label */ 283static int ip6addrlbl_add(struct net *net, 284 const struct in6_addr *prefix, int prefixlen, 285 int ifindex, u32 label, int replace) 286{ 287 struct ip6addrlbl_entry *newp; 288 int ret = 0; 289 290 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", 291 __func__, prefix, prefixlen, ifindex, (unsigned int)label, 292 replace); 293 294 newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label); 295 if (IS_ERR(newp)) 296 return PTR_ERR(newp); 297 spin_lock(&ip6addrlbl_table.lock); 298 ret = __ip6addrlbl_add(newp, replace); 299 spin_unlock(&ip6addrlbl_table.lock); 300 if (ret) 301 ip6addrlbl_free(newp); 302 return ret; 303} 304 305/* remove a label */ 306static int __ip6addrlbl_del(struct net *net, 307 const struct in6_addr *prefix, int prefixlen, 308 int ifindex) 309{ 310 struct ip6addrlbl_entry *p = NULL; 311 struct hlist_node *n; 312 int ret = -ESRCH; 313 314 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", 315 __func__, prefix, prefixlen, ifindex); 316 317 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { 318 if (p->prefixlen == prefixlen && 319 net_eq(ip6addrlbl_net(p), net) && 320 p->ifindex == ifindex && 321 ipv6_addr_equal(&p->prefix, prefix)) { 322 hlist_del_rcu(&p->list); 323 ip6addrlbl_put(p); 324 ret = 0; 325 break; 326 } 327 } 328 return ret; 329} 330 331static int ip6addrlbl_del(struct net *net, 332 const struct in6_addr *prefix, int prefixlen, 333 int ifindex) 334{ 335 struct in6_addr prefix_buf; 336 int ret; 337 338 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", 339 __func__, prefix, prefixlen, ifindex); 340 341 ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); 342 spin_lock(&ip6addrlbl_table.lock); 343 ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex); 344 spin_unlock(&ip6addrlbl_table.lock); 345 return ret; 346} 347 348/* add default label */ 349static int __net_init ip6addrlbl_net_init(struct net *net) 350{ 351 int err = 0; 352 int i; 353 354 ADDRLABEL(KERN_DEBUG "%s\n", __func__); 355 356 for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { 357 int ret = ip6addrlbl_add(net, 358 ip6addrlbl_init_table[i].prefix, 359 ip6addrlbl_init_table[i].prefixlen, 360 0, 361 ip6addrlbl_init_table[i].label, 0); 362 /* XXX: should we free all rules when we catch an error? */ 363 if (ret && (!err || err != -ENOMEM)) 364 err = ret; 365 } 366 return err; 367} 368 369static void __net_exit ip6addrlbl_net_exit(struct net *net) 370{ 371 struct ip6addrlbl_entry *p = NULL; 372 struct hlist_node *n; 373 374 /* Remove all labels belonging to the exiting net */ 375 spin_lock(&ip6addrlbl_table.lock); 376 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { 377 if (net_eq(ip6addrlbl_net(p), net)) { 378 hlist_del_rcu(&p->list); 379 ip6addrlbl_put(p); 380 } 381 } 382 spin_unlock(&ip6addrlbl_table.lock); 383} 384 385static struct pernet_operations ipv6_addr_label_ops = { 386 .init = ip6addrlbl_net_init, 387 .exit = ip6addrlbl_net_exit, 388}; 389 390int __init ipv6_addr_label_init(void) 391{ 392 spin_lock_init(&ip6addrlbl_table.lock); 393 394 return register_pernet_subsys(&ipv6_addr_label_ops); 395} 396 397void ipv6_addr_label_cleanup(void) 398{ 399 unregister_pernet_subsys(&ipv6_addr_label_ops); 400} 401 402static const struct nla_policy ifal_policy[IFAL_MAX+1] = { 403 [IFAL_ADDRESS] = { .len = sizeof(struct in6_addr), }, 404 [IFAL_LABEL] = { .len = sizeof(u32), }, 405}; 406 407static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh) 408{ 409 struct net *net = sock_net(skb->sk); 410 struct ifaddrlblmsg *ifal; 411 struct nlattr *tb[IFAL_MAX+1]; 412 struct in6_addr *pfx; 413 u32 label; 414 int err = 0; 415 416 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); 417 if (err < 0) 418 return err; 419 420 ifal = nlmsg_data(nlh); 421 422 if (ifal->ifal_family != AF_INET6 || 423 ifal->ifal_prefixlen > 128) 424 return -EINVAL; 425 426 if (!tb[IFAL_ADDRESS]) 427 return -EINVAL; 428 pfx = nla_data(tb[IFAL_ADDRESS]); 429 430 if (!tb[IFAL_LABEL]) 431 return -EINVAL; 432 label = nla_get_u32(tb[IFAL_LABEL]); 433 if (label == IPV6_ADDR_LABEL_DEFAULT) 434 return -EINVAL; 435 436 switch (nlh->nlmsg_type) { 437 case RTM_NEWADDRLABEL: 438 if (ifal->ifal_index && 439 !__dev_get_by_index(net, ifal->ifal_index)) 440 return -EINVAL; 441 442 err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen, 443 ifal->ifal_index, label, 444 nlh->nlmsg_flags & NLM_F_REPLACE); 445 break; 446 case RTM_DELADDRLABEL: 447 err = ip6addrlbl_del(net, pfx, ifal->ifal_prefixlen, 448 ifal->ifal_index); 449 break; 450 default: 451 err = -EOPNOTSUPP; 452 } 453 return err; 454} 455 456static void ip6addrlbl_putmsg(struct nlmsghdr *nlh, 457 int prefixlen, int ifindex, u32 lseq) 458{ 459 struct ifaddrlblmsg *ifal = nlmsg_data(nlh); 460 ifal->ifal_family = AF_INET6; 461 ifal->ifal_prefixlen = prefixlen; 462 ifal->ifal_flags = 0; 463 ifal->ifal_index = ifindex; 464 ifal->ifal_seq = lseq; 465}; 466 467static int ip6addrlbl_fill(struct sk_buff *skb, 468 struct ip6addrlbl_entry *p, 469 u32 lseq, 470 u32 portid, u32 seq, int event, 471 unsigned int flags) 472{ 473 struct nlmsghdr *nlh = nlmsg_put(skb, portid, seq, event, 474 sizeof(struct ifaddrlblmsg), flags); 475 if (!nlh) 476 return -EMSGSIZE; 477 478 ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq); 479 480 if (nla_put_in6_addr(skb, IFAL_ADDRESS, &p->prefix) < 0 || 481 nla_put_u32(skb, IFAL_LABEL, p->label) < 0) { 482 nlmsg_cancel(skb, nlh); 483 return -EMSGSIZE; 484 } 485 486 nlmsg_end(skb, nlh); 487 return 0; 488} 489 490static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) 491{ 492 struct net *net = sock_net(skb->sk); 493 struct ip6addrlbl_entry *p; 494 int idx = 0, s_idx = cb->args[0]; 495 int err; 496 497 rcu_read_lock(); 498 hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { 499 if (idx >= s_idx && 500 net_eq(ip6addrlbl_net(p), net)) { 501 err = ip6addrlbl_fill(skb, p, 502 ip6addrlbl_table.seq, 503 NETLINK_CB(cb->skb).portid, 504 cb->nlh->nlmsg_seq, 505 RTM_NEWADDRLABEL, 506 NLM_F_MULTI); 507 if (err < 0) 508 break; 509 } 510 idx++; 511 } 512 rcu_read_unlock(); 513 cb->args[0] = idx; 514 return skb->len; 515} 516 517static inline int ip6addrlbl_msgsize(void) 518{ 519 return NLMSG_ALIGN(sizeof(struct ifaddrlblmsg)) 520 + nla_total_size(16) /* IFAL_ADDRESS */ 521 + nla_total_size(4); /* IFAL_LABEL */ 522} 523 524static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh) 525{ 526 struct net *net = sock_net(in_skb->sk); 527 struct ifaddrlblmsg *ifal; 528 struct nlattr *tb[IFAL_MAX+1]; 529 struct in6_addr *addr; 530 u32 lseq; 531 int err = 0; 532 struct ip6addrlbl_entry *p; 533 struct sk_buff *skb; 534 535 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); 536 if (err < 0) 537 return err; 538 539 ifal = nlmsg_data(nlh); 540 541 if (ifal->ifal_family != AF_INET6 || 542 ifal->ifal_prefixlen != 128) 543 return -EINVAL; 544 545 if (ifal->ifal_index && 546 !__dev_get_by_index(net, ifal->ifal_index)) 547 return -EINVAL; 548 549 if (!tb[IFAL_ADDRESS]) 550 return -EINVAL; 551 addr = nla_data(tb[IFAL_ADDRESS]); 552 553 rcu_read_lock(); 554 p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); 555 if (p && !ip6addrlbl_hold(p)) 556 p = NULL; 557 lseq = ip6addrlbl_table.seq; 558 rcu_read_unlock(); 559 560 if (!p) { 561 err = -ESRCH; 562 goto out; 563 } 564 565 skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL); 566 if (!skb) { 567 ip6addrlbl_put(p); 568 return -ENOBUFS; 569 } 570 571 err = ip6addrlbl_fill(skb, p, lseq, 572 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 573 RTM_NEWADDRLABEL, 0); 574 575 ip6addrlbl_put(p); 576 577 if (err < 0) { 578 WARN_ON(err == -EMSGSIZE); 579 kfree_skb(skb); 580 goto out; 581 } 582 583 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 584out: 585 return err; 586} 587 588void __init ipv6_addr_label_rtnl_register(void) 589{ 590 __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, 591 NULL, NULL); 592 __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, 593 NULL, NULL); 594 __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, 595 ip6addrlbl_dump, NULL); 596} 597 598