1/* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com> 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of the GNU General Public License as 5 * published by the Free Software Foundation; either version 2 of 6 * the License, or (at your option) any later version. 7 * 8 */ 9 10#include "ipvlan.h" 11 12static u32 ipvlan_jhash_secret __read_mostly; 13 14void ipvlan_init_secret(void) 15{ 16 net_get_random_once(&ipvlan_jhash_secret, sizeof(ipvlan_jhash_secret)); 17} 18 19static void ipvlan_count_rx(const struct ipvl_dev *ipvlan, 20 unsigned int len, bool success, bool mcast) 21{ 22 if (!ipvlan) 23 return; 24 25 if (likely(success)) { 26 struct ipvl_pcpu_stats *pcptr; 27 28 pcptr = this_cpu_ptr(ipvlan->pcpu_stats); 29 u64_stats_update_begin(&pcptr->syncp); 30 pcptr->rx_pkts++; 31 pcptr->rx_bytes += len; 32 if (mcast) 33 pcptr->rx_mcast++; 34 u64_stats_update_end(&pcptr->syncp); 35 } else { 36 this_cpu_inc(ipvlan->pcpu_stats->rx_errs); 37 } 38} 39 40static u8 ipvlan_get_v6_hash(const void *iaddr) 41{ 42 const struct in6_addr *ip6_addr = iaddr; 43 44 return __ipv6_addr_jhash(ip6_addr, ipvlan_jhash_secret) & 45 IPVLAN_HASH_MASK; 46} 47 48static u8 ipvlan_get_v4_hash(const void *iaddr) 49{ 50 const struct in_addr *ip4_addr = iaddr; 51 52 return jhash_1word(ip4_addr->s_addr, ipvlan_jhash_secret) & 53 IPVLAN_HASH_MASK; 54} 55 56struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port, 57 const void *iaddr, bool is_v6) 58{ 59 struct ipvl_addr *addr; 60 u8 hash; 61 62 hash = is_v6 ? ipvlan_get_v6_hash(iaddr) : 63 ipvlan_get_v4_hash(iaddr); 64 hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode) { 65 if (is_v6 && addr->atype == IPVL_IPV6 && 66 ipv6_addr_equal(&addr->ip6addr, iaddr)) 67 return addr; 68 else if (!is_v6 && addr->atype == IPVL_IPV4 && 69 addr->ip4addr.s_addr == 70 ((struct in_addr *)iaddr)->s_addr) 71 return addr; 72 } 73 return NULL; 74} 75 76void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr) 77{ 78 struct ipvl_port *port = ipvlan->port; 79 u8 hash; 80 81 hash = (addr->atype == IPVL_IPV6) ? 82 ipvlan_get_v6_hash(&addr->ip6addr) : 83 ipvlan_get_v4_hash(&addr->ip4addr); 84 if (hlist_unhashed(&addr->hlnode)) 85 hlist_add_head_rcu(&addr->hlnode, &port->hlhead[hash]); 86} 87 88void ipvlan_ht_addr_del(struct ipvl_addr *addr) 89{ 90 hlist_del_init_rcu(&addr->hlnode); 91} 92 93struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, 94 const void *iaddr, bool is_v6) 95{ 96 struct ipvl_addr *addr; 97 98 list_for_each_entry(addr, &ipvlan->addrs, anode) { 99 if ((is_v6 && addr->atype == IPVL_IPV6 && 100 ipv6_addr_equal(&addr->ip6addr, iaddr)) || 101 (!is_v6 && addr->atype == IPVL_IPV4 && 102 addr->ip4addr.s_addr == ((struct in_addr *)iaddr)->s_addr)) 103 return addr; 104 } 105 return NULL; 106} 107 108bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6) 109{ 110 struct ipvl_dev *ipvlan; 111 112 ASSERT_RTNL(); 113 114 list_for_each_entry(ipvlan, &port->ipvlans, pnode) { 115 if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) 116 return true; 117 } 118 return false; 119} 120 121static void *ipvlan_get_L3_hdr(struct sk_buff *skb, int *type) 122{ 123 void *lyr3h = NULL; 124 125 switch (skb->protocol) { 126 case htons(ETH_P_ARP): { 127 struct arphdr *arph; 128 129 if (unlikely(!pskb_may_pull(skb, sizeof(*arph)))) 130 return NULL; 131 132 arph = arp_hdr(skb); 133 *type = IPVL_ARP; 134 lyr3h = arph; 135 break; 136 } 137 case htons(ETH_P_IP): { 138 u32 pktlen; 139 struct iphdr *ip4h; 140 141 if (unlikely(!pskb_may_pull(skb, sizeof(*ip4h)))) 142 return NULL; 143 144 ip4h = ip_hdr(skb); 145 pktlen = ntohs(ip4h->tot_len); 146 if (ip4h->ihl < 5 || ip4h->version != 4) 147 return NULL; 148 if (skb->len < pktlen || pktlen < (ip4h->ihl * 4)) 149 return NULL; 150 151 *type = IPVL_IPV4; 152 lyr3h = ip4h; 153 break; 154 } 155 case htons(ETH_P_IPV6): { 156 struct ipv6hdr *ip6h; 157 158 if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h)))) 159 return NULL; 160 161 ip6h = ipv6_hdr(skb); 162 if (ip6h->version != 6) 163 return NULL; 164 165 *type = IPVL_IPV6; 166 lyr3h = ip6h; 167 /* Only Neighbour Solicitation pkts need different treatment */ 168 if (ipv6_addr_any(&ip6h->saddr) && 169 ip6h->nexthdr == NEXTHDR_ICMP) { 170 *type = IPVL_ICMPV6; 171 lyr3h = ip6h + 1; 172 } 173 break; 174 } 175 default: 176 return NULL; 177 } 178 179 return lyr3h; 180} 181 182unsigned int ipvlan_mac_hash(const unsigned char *addr) 183{ 184 u32 hash = jhash_1word(__get_unaligned_cpu32(addr+2), 185 ipvlan_jhash_secret); 186 187 return hash & IPVLAN_MAC_FILTER_MASK; 188} 189 190void ipvlan_process_multicast(struct work_struct *work) 191{ 192 struct ipvl_port *port = container_of(work, struct ipvl_port, wq); 193 struct ethhdr *ethh; 194 struct ipvl_dev *ipvlan; 195 struct sk_buff *skb, *nskb; 196 struct sk_buff_head list; 197 unsigned int len; 198 unsigned int mac_hash; 199 int ret; 200 u8 pkt_type; 201 bool hlocal, dlocal; 202 203 __skb_queue_head_init(&list); 204 205 spin_lock_bh(&port->backlog.lock); 206 skb_queue_splice_tail_init(&port->backlog, &list); 207 spin_unlock_bh(&port->backlog.lock); 208 209 while ((skb = __skb_dequeue(&list)) != NULL) { 210 ethh = eth_hdr(skb); 211 hlocal = ether_addr_equal(ethh->h_source, port->dev->dev_addr); 212 mac_hash = ipvlan_mac_hash(ethh->h_dest); 213 214 if (ether_addr_equal(ethh->h_dest, port->dev->broadcast)) 215 pkt_type = PACKET_BROADCAST; 216 else 217 pkt_type = PACKET_MULTICAST; 218 219 dlocal = false; 220 rcu_read_lock(); 221 list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { 222 if (hlocal && (ipvlan->dev == skb->dev)) { 223 dlocal = true; 224 continue; 225 } 226 if (!test_bit(mac_hash, ipvlan->mac_filters)) 227 continue; 228 229 ret = NET_RX_DROP; 230 len = skb->len + ETH_HLEN; 231 nskb = skb_clone(skb, GFP_ATOMIC); 232 if (!nskb) 233 goto acct; 234 235 nskb->pkt_type = pkt_type; 236 nskb->dev = ipvlan->dev; 237 if (hlocal) 238 ret = dev_forward_skb(ipvlan->dev, nskb); 239 else 240 ret = netif_rx(nskb); 241acct: 242 ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); 243 } 244 rcu_read_unlock(); 245 246 if (dlocal) { 247 /* If the packet originated here, send it out. */ 248 skb->dev = port->dev; 249 skb->pkt_type = pkt_type; 250 dev_queue_xmit(skb); 251 } else { 252 kfree_skb(skb); 253 } 254 } 255} 256 257static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb, 258 bool local) 259{ 260 struct ipvl_dev *ipvlan = addr->master; 261 struct net_device *dev = ipvlan->dev; 262 unsigned int len; 263 rx_handler_result_t ret = RX_HANDLER_CONSUMED; 264 bool success = false; 265 struct sk_buff *skb = *pskb; 266 267 len = skb->len + ETH_HLEN; 268 if (unlikely(!(dev->flags & IFF_UP))) { 269 kfree_skb(skb); 270 goto out; 271 } 272 273 skb = skb_share_check(skb, GFP_ATOMIC); 274 if (!skb) 275 goto out; 276 277 *pskb = skb; 278 skb->dev = dev; 279 skb->pkt_type = PACKET_HOST; 280 281 if (local) { 282 if (dev_forward_skb(ipvlan->dev, skb) == NET_RX_SUCCESS) 283 success = true; 284 } else { 285 ret = RX_HANDLER_ANOTHER; 286 success = true; 287 } 288 289out: 290 ipvlan_count_rx(ipvlan, len, success, false); 291 return ret; 292} 293 294static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, 295 void *lyr3h, int addr_type, 296 bool use_dest) 297{ 298 struct ipvl_addr *addr = NULL; 299 300 if (addr_type == IPVL_IPV6) { 301 struct ipv6hdr *ip6h; 302 struct in6_addr *i6addr; 303 304 ip6h = (struct ipv6hdr *)lyr3h; 305 i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr; 306 addr = ipvlan_ht_addr_lookup(port, i6addr, true); 307 } else if (addr_type == IPVL_ICMPV6) { 308 struct nd_msg *ndmh; 309 struct in6_addr *i6addr; 310 311 /* Make sure that the NeighborSolicitation ICMPv6 packets 312 * are handled to avoid DAD issue. 313 */ 314 ndmh = (struct nd_msg *)lyr3h; 315 if (ndmh->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) { 316 i6addr = &ndmh->target; 317 addr = ipvlan_ht_addr_lookup(port, i6addr, true); 318 } 319 } else if (addr_type == IPVL_IPV4) { 320 struct iphdr *ip4h; 321 __be32 *i4addr; 322 323 ip4h = (struct iphdr *)lyr3h; 324 i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr; 325 addr = ipvlan_ht_addr_lookup(port, i4addr, false); 326 } else if (addr_type == IPVL_ARP) { 327 struct arphdr *arph; 328 unsigned char *arp_ptr; 329 __be32 dip; 330 331 arph = (struct arphdr *)lyr3h; 332 arp_ptr = (unsigned char *)(arph + 1); 333 if (use_dest) 334 arp_ptr += (2 * port->dev->addr_len) + 4; 335 else 336 arp_ptr += port->dev->addr_len; 337 338 memcpy(&dip, arp_ptr, 4); 339 addr = ipvlan_ht_addr_lookup(port, &dip, false); 340 } 341 342 return addr; 343} 344 345static int ipvlan_process_v4_outbound(struct sk_buff *skb) 346{ 347 const struct iphdr *ip4h = ip_hdr(skb); 348 struct net_device *dev = skb->dev; 349 struct net *net = dev_net(dev); 350 struct rtable *rt; 351 int err, ret = NET_XMIT_DROP; 352 struct flowi4 fl4 = { 353 .flowi4_oif = dev->ifindex, 354 .flowi4_tos = RT_TOS(ip4h->tos), 355 .flowi4_flags = FLOWI_FLAG_ANYSRC, 356 .daddr = ip4h->daddr, 357 .saddr = ip4h->saddr, 358 }; 359 360 rt = ip_route_output_flow(net, &fl4, NULL); 361 if (IS_ERR(rt)) 362 goto err; 363 364 if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { 365 ip_rt_put(rt); 366 goto err; 367 } 368 skb_dst_drop(skb); 369 skb_dst_set(skb, &rt->dst); 370 err = ip_local_out(net, skb->sk, skb); 371 if (unlikely(net_xmit_eval(err))) 372 dev->stats.tx_errors++; 373 else 374 ret = NET_XMIT_SUCCESS; 375 goto out; 376err: 377 dev->stats.tx_errors++; 378 kfree_skb(skb); 379out: 380 return ret; 381} 382 383static int ipvlan_process_v6_outbound(struct sk_buff *skb) 384{ 385 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 386 struct net_device *dev = skb->dev; 387 struct net *net = dev_net(dev); 388 struct dst_entry *dst; 389 int err, ret = NET_XMIT_DROP; 390 struct flowi6 fl6 = { 391 .flowi6_iif = dev->ifindex, 392 .daddr = ip6h->daddr, 393 .saddr = ip6h->saddr, 394 .flowi6_flags = FLOWI_FLAG_ANYSRC, 395 .flowlabel = ip6_flowinfo(ip6h), 396 .flowi6_mark = skb->mark, 397 .flowi6_proto = ip6h->nexthdr, 398 }; 399 400 dst = ip6_route_output(net, NULL, &fl6); 401 if (dst->error) { 402 ret = dst->error; 403 dst_release(dst); 404 goto err; 405 } 406 skb_dst_drop(skb); 407 skb_dst_set(skb, dst); 408 err = ip6_local_out(net, skb->sk, skb); 409 if (unlikely(net_xmit_eval(err))) 410 dev->stats.tx_errors++; 411 else 412 ret = NET_XMIT_SUCCESS; 413 goto out; 414err: 415 dev->stats.tx_errors++; 416 kfree_skb(skb); 417out: 418 return ret; 419} 420 421static int ipvlan_process_outbound(struct sk_buff *skb, 422 const struct ipvl_dev *ipvlan) 423{ 424 struct ethhdr *ethh = eth_hdr(skb); 425 int ret = NET_XMIT_DROP; 426 427 /* In this mode we dont care about multicast and broadcast traffic */ 428 if (is_multicast_ether_addr(ethh->h_dest)) { 429 pr_warn_ratelimited("Dropped {multi|broad}cast of type= [%x]\n", 430 ntohs(skb->protocol)); 431 kfree_skb(skb); 432 goto out; 433 } 434 435 /* The ipvlan is a pseudo-L2 device, so the packets that we receive 436 * will have L2; which need to discarded and processed further 437 * in the net-ns of the main-device. 438 */ 439 if (skb_mac_header_was_set(skb)) { 440 skb_pull(skb, sizeof(*ethh)); 441 skb->mac_header = (typeof(skb->mac_header))~0U; 442 skb_reset_network_header(skb); 443 } 444 445 if (skb->protocol == htons(ETH_P_IPV6)) 446 ret = ipvlan_process_v6_outbound(skb); 447 else if (skb->protocol == htons(ETH_P_IP)) 448 ret = ipvlan_process_v4_outbound(skb); 449 else { 450 pr_warn_ratelimited("Dropped outbound packet type=%x\n", 451 ntohs(skb->protocol)); 452 kfree_skb(skb); 453 } 454out: 455 return ret; 456} 457 458static void ipvlan_multicast_enqueue(struct ipvl_port *port, 459 struct sk_buff *skb) 460{ 461 if (skb->protocol == htons(ETH_P_PAUSE)) { 462 kfree_skb(skb); 463 return; 464 } 465 466 spin_lock(&port->backlog.lock); 467 if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) { 468 __skb_queue_tail(&port->backlog, skb); 469 spin_unlock(&port->backlog.lock); 470 schedule_work(&port->wq); 471 } else { 472 spin_unlock(&port->backlog.lock); 473 atomic_long_inc(&skb->dev->rx_dropped); 474 kfree_skb(skb); 475 } 476} 477 478static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev) 479{ 480 const struct ipvl_dev *ipvlan = netdev_priv(dev); 481 void *lyr3h; 482 struct ipvl_addr *addr; 483 int addr_type; 484 485 lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); 486 if (!lyr3h) 487 goto out; 488 489 addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); 490 if (addr) 491 return ipvlan_rcv_frame(addr, &skb, true); 492 493out: 494 skb->dev = ipvlan->phy_dev; 495 return ipvlan_process_outbound(skb, ipvlan); 496} 497 498static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) 499{ 500 const struct ipvl_dev *ipvlan = netdev_priv(dev); 501 struct ethhdr *eth = eth_hdr(skb); 502 struct ipvl_addr *addr; 503 void *lyr3h; 504 int addr_type; 505 506 if (ether_addr_equal(eth->h_dest, eth->h_source)) { 507 lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); 508 if (lyr3h) { 509 addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); 510 if (addr) 511 return ipvlan_rcv_frame(addr, &skb, true); 512 } 513 skb = skb_share_check(skb, GFP_ATOMIC); 514 if (!skb) 515 return NET_XMIT_DROP; 516 517 /* Packet definitely does not belong to any of the 518 * virtual devices, but the dest is local. So forward 519 * the skb for the main-dev. At the RX side we just return 520 * RX_PASS for it to be processed further on the stack. 521 */ 522 return dev_forward_skb(ipvlan->phy_dev, skb); 523 524 } else if (is_multicast_ether_addr(eth->h_dest)) { 525 ipvlan_multicast_enqueue(ipvlan->port, skb); 526 return NET_XMIT_SUCCESS; 527 } 528 529 skb->dev = ipvlan->phy_dev; 530 return dev_queue_xmit(skb); 531} 532 533int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) 534{ 535 struct ipvl_dev *ipvlan = netdev_priv(dev); 536 struct ipvl_port *port = ipvlan_port_get_rcu_bh(ipvlan->phy_dev); 537 538 if (!port) 539 goto out; 540 541 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) 542 goto out; 543 544 switch(port->mode) { 545 case IPVLAN_MODE_L2: 546 return ipvlan_xmit_mode_l2(skb, dev); 547 case IPVLAN_MODE_L3: 548 return ipvlan_xmit_mode_l3(skb, dev); 549 } 550 551 /* Should not reach here */ 552 WARN_ONCE(true, "ipvlan_queue_xmit() called for mode = [%hx]\n", 553 port->mode); 554out: 555 kfree_skb(skb); 556 return NET_XMIT_DROP; 557} 558 559static bool ipvlan_external_frame(struct sk_buff *skb, struct ipvl_port *port) 560{ 561 struct ethhdr *eth = eth_hdr(skb); 562 struct ipvl_addr *addr; 563 void *lyr3h; 564 int addr_type; 565 566 if (ether_addr_equal(eth->h_source, skb->dev->dev_addr)) { 567 lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); 568 if (!lyr3h) 569 return true; 570 571 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, false); 572 if (addr) 573 return false; 574 } 575 576 return true; 577} 578 579static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb, 580 struct ipvl_port *port) 581{ 582 void *lyr3h; 583 int addr_type; 584 struct ipvl_addr *addr; 585 struct sk_buff *skb = *pskb; 586 rx_handler_result_t ret = RX_HANDLER_PASS; 587 588 lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); 589 if (!lyr3h) 590 goto out; 591 592 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); 593 if (addr) 594 ret = ipvlan_rcv_frame(addr, pskb, false); 595 596out: 597 return ret; 598} 599 600static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb, 601 struct ipvl_port *port) 602{ 603 struct sk_buff *skb = *pskb; 604 struct ethhdr *eth = eth_hdr(skb); 605 rx_handler_result_t ret = RX_HANDLER_PASS; 606 void *lyr3h; 607 int addr_type; 608 609 if (is_multicast_ether_addr(eth->h_dest)) { 610 if (ipvlan_external_frame(skb, port)) { 611 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); 612 613 /* External frames are queued for device local 614 * distribution, but a copy is given to master 615 * straight away to avoid sending duplicates later 616 * when work-queue processes this frame. This is 617 * achieved by returning RX_HANDLER_PASS. 618 */ 619 if (nskb) 620 ipvlan_multicast_enqueue(port, nskb); 621 } 622 } else { 623 struct ipvl_addr *addr; 624 625 lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); 626 if (!lyr3h) 627 return ret; 628 629 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); 630 if (addr) 631 ret = ipvlan_rcv_frame(addr, pskb, false); 632 } 633 634 return ret; 635} 636 637rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) 638{ 639 struct sk_buff *skb = *pskb; 640 struct ipvl_port *port = ipvlan_port_get_rcu(skb->dev); 641 642 if (!port) 643 return RX_HANDLER_PASS; 644 645 switch (port->mode) { 646 case IPVLAN_MODE_L2: 647 return ipvlan_handle_mode_l2(pskb, port); 648 case IPVLAN_MODE_L3: 649 return ipvlan_handle_mode_l3(pskb, port); 650 } 651 652 /* Should not reach here */ 653 WARN_ONCE(true, "ipvlan_handle_frame() called for mode = [%hx]\n", 654 port->mode); 655 kfree_skb(skb); 656 return RX_HANDLER_CONSUMED; 657} 658