root/tools/testing/selftests/bpf/progs/test_xdp_noinline.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rol32
  2. jhash
  3. __jhash_nwords
  4. jhash_2words
  5. calc_offset
  6. parse_udp
  7. parse_tcp
  8. encap_v6
  9. encap_v4
  10. decap_v6
  11. decap_v4
  12. swap_mac_and_send
  13. send_icmp_reply
  14. send_icmp6_reply
  15. parse_icmpv6
  16. parse_icmp
  17. get_packet_hash
  18. get_packet_dst
  19. connection_table_lookup
  20. process_l3_headers_v6
  21. process_l3_headers_v4
  22. process_packet
  23. balancer_ingress

   1 // SPDX-License-Identifier: GPL-2.0
   2 // Copyright (c) 2017 Facebook
   3 #include <stddef.h>
   4 #include <stdbool.h>
   5 #include <string.h>
   6 #include <linux/pkt_cls.h>
   7 #include <linux/bpf.h>
   8 #include <linux/in.h>
   9 #include <linux/if_ether.h>
  10 #include <linux/ip.h>
  11 #include <linux/ipv6.h>
  12 #include <linux/icmp.h>
  13 #include <linux/icmpv6.h>
  14 #include <linux/tcp.h>
  15 #include <linux/udp.h>
  16 #include "bpf_helpers.h"
  17 #include "bpf_endian.h"
  18 
  19 static __u32 rol32(__u32 word, unsigned int shift)
  20 {
  21         return (word << shift) | (word >> ((-shift) & 31));
  22 }
  23 
  24 /* copy paste of jhash from kernel sources to make sure llvm
  25  * can compile it into valid sequence of bpf instructions
  26  */
  27 #define __jhash_mix(a, b, c)                    \
  28 {                                               \
  29         a -= c;  a ^= rol32(c, 4);  c += b;     \
  30         b -= a;  b ^= rol32(a, 6);  a += c;     \
  31         c -= b;  c ^= rol32(b, 8);  b += a;     \
  32         a -= c;  a ^= rol32(c, 16); c += b;     \
  33         b -= a;  b ^= rol32(a, 19); a += c;     \
  34         c -= b;  c ^= rol32(b, 4);  b += a;     \
  35 }
  36 
  37 #define __jhash_final(a, b, c)                  \
  38 {                                               \
  39         c ^= b; c -= rol32(b, 14);              \
  40         a ^= c; a -= rol32(c, 11);              \
  41         b ^= a; b -= rol32(a, 25);              \
  42         c ^= b; c -= rol32(b, 16);              \
  43         a ^= c; a -= rol32(c, 4);               \
  44         b ^= a; b -= rol32(a, 14);              \
  45         c ^= b; c -= rol32(b, 24);              \
  46 }
  47 
  48 #define JHASH_INITVAL           0xdeadbeef
  49 
  50 typedef unsigned int u32;
  51 
  52 static __attribute__ ((noinline))
  53 u32 jhash(const void *key, u32 length, u32 initval)
  54 {
  55         u32 a, b, c;
  56         const unsigned char *k = key;
  57 
  58         a = b = c = JHASH_INITVAL + length + initval;
  59 
  60         while (length > 12) {
  61                 a += *(u32 *)(k);
  62                 b += *(u32 *)(k + 4);
  63                 c += *(u32 *)(k + 8);
  64                 __jhash_mix(a, b, c);
  65                 length -= 12;
  66                 k += 12;
  67         }
  68         switch (length) {
  69         case 12: c += (u32)k[11]<<24;
  70         case 11: c += (u32)k[10]<<16;
  71         case 10: c += (u32)k[9]<<8;
  72         case 9:  c += k[8];
  73         case 8:  b += (u32)k[7]<<24;
  74         case 7:  b += (u32)k[6]<<16;
  75         case 6:  b += (u32)k[5]<<8;
  76         case 5:  b += k[4];
  77         case 4:  a += (u32)k[3]<<24;
  78         case 3:  a += (u32)k[2]<<16;
  79         case 2:  a += (u32)k[1]<<8;
  80         case 1:  a += k[0];
  81                  __jhash_final(a, b, c);
  82         case 0: /* Nothing left to add */
  83                 break;
  84         }
  85 
  86         return c;
  87 }
  88 
  89 static __attribute__ ((noinline))
  90 u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
  91 {
  92         a += initval;
  93         b += initval;
  94         c += initval;
  95         __jhash_final(a, b, c);
  96         return c;
  97 }
  98 
  99 static __attribute__ ((noinline))
 100 u32 jhash_2words(u32 a, u32 b, u32 initval)
 101 {
 102         return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
 103 }
 104 
 105 struct flow_key {
 106         union {
 107                 __be32 src;
 108                 __be32 srcv6[4];
 109         };
 110         union {
 111                 __be32 dst;
 112                 __be32 dstv6[4];
 113         };
 114         union {
 115                 __u32 ports;
 116                 __u16 port16[2];
 117         };
 118         __u8 proto;
 119 };
 120 
 121 struct packet_description {
 122         struct flow_key flow;
 123         __u8 flags;
 124 };
 125 
 126 struct ctl_value {
 127         union {
 128                 __u64 value;
 129                 __u32 ifindex;
 130                 __u8 mac[6];
 131         };
 132 };
 133 
 134 struct vip_definition {
 135         union {
 136                 __be32 vip;
 137                 __be32 vipv6[4];
 138         };
 139         __u16 port;
 140         __u16 family;
 141         __u8 proto;
 142 };
 143 
 144 struct vip_meta {
 145         __u32 flags;
 146         __u32 vip_num;
 147 };
 148 
 149 struct real_pos_lru {
 150         __u32 pos;
 151         __u64 atime;
 152 };
 153 
 154 struct real_definition {
 155         union {
 156                 __be32 dst;
 157                 __be32 dstv6[4];
 158         };
 159         __u8 flags;
 160 };
 161 
 162 struct lb_stats {
 163         __u64 v2;
 164         __u64 v1;
 165 };
 166 
 167 struct {
 168         __uint(type, BPF_MAP_TYPE_HASH);
 169         __uint(max_entries, 512);
 170         __type(key, struct vip_definition);
 171         __type(value, struct vip_meta);
 172 } vip_map SEC(".maps");
 173 
 174 struct {
 175         __uint(type, BPF_MAP_TYPE_LRU_HASH);
 176         __uint(max_entries, 300);
 177         __uint(map_flags, 1U << 1);
 178         __type(key, struct flow_key);
 179         __type(value, struct real_pos_lru);
 180 } lru_cache SEC(".maps");
 181 
 182 struct {
 183         __uint(type, BPF_MAP_TYPE_ARRAY);
 184         __uint(max_entries, 12 * 655);
 185         __type(key, __u32);
 186         __type(value, __u32);
 187 } ch_rings SEC(".maps");
 188 
 189 struct {
 190         __uint(type, BPF_MAP_TYPE_ARRAY);
 191         __uint(max_entries, 40);
 192         __type(key, __u32);
 193         __type(value, struct real_definition);
 194 } reals SEC(".maps");
 195 
 196 struct {
 197         __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
 198         __uint(max_entries, 515);
 199         __type(key, __u32);
 200         __type(value, struct lb_stats);
 201 } stats SEC(".maps");
 202 
 203 struct {
 204         __uint(type, BPF_MAP_TYPE_ARRAY);
 205         __uint(max_entries, 16);
 206         __type(key, __u32);
 207         __type(value, struct ctl_value);
 208 } ctl_array SEC(".maps");
 209 
 210 struct eth_hdr {
 211         unsigned char eth_dest[6];
 212         unsigned char eth_source[6];
 213         unsigned short eth_proto;
 214 };
 215 
 216 static inline __u64 calc_offset(bool is_ipv6, bool is_icmp)
 217 {
 218         __u64 off = sizeof(struct eth_hdr);
 219         if (is_ipv6) {
 220                 off += sizeof(struct ipv6hdr);
 221                 if (is_icmp)
 222                         off += sizeof(struct icmp6hdr) + sizeof(struct ipv6hdr);
 223         } else {
 224                 off += sizeof(struct iphdr);
 225                 if (is_icmp)
 226                         off += sizeof(struct icmphdr) + sizeof(struct iphdr);
 227         }
 228         return off;
 229 }
 230 
 231 static __attribute__ ((noinline))
 232 bool parse_udp(void *data, void *data_end,
 233                bool is_ipv6, struct packet_description *pckt)
 234 {
 235 
 236         bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
 237         __u64 off = calc_offset(is_ipv6, is_icmp);
 238         struct udphdr *udp;
 239         udp = data + off;
 240 
 241         if (udp + 1 > data_end)
 242                 return 0;
 243         if (!is_icmp) {
 244                 pckt->flow.port16[0] = udp->source;
 245                 pckt->flow.port16[1] = udp->dest;
 246         } else {
 247                 pckt->flow.port16[0] = udp->dest;
 248                 pckt->flow.port16[1] = udp->source;
 249         }
 250         return 1;
 251 }
 252 
 253 static __attribute__ ((noinline))
 254 bool parse_tcp(void *data, void *data_end,
 255                bool is_ipv6, struct packet_description *pckt)
 256 {
 257 
 258         bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
 259         __u64 off = calc_offset(is_ipv6, is_icmp);
 260         struct tcphdr *tcp;
 261 
 262         tcp = data + off;
 263         if (tcp + 1 > data_end)
 264                 return 0;
 265         if (tcp->syn)
 266                 pckt->flags |= (1 << 1);
 267         if (!is_icmp) {
 268                 pckt->flow.port16[0] = tcp->source;
 269                 pckt->flow.port16[1] = tcp->dest;
 270         } else {
 271                 pckt->flow.port16[0] = tcp->dest;
 272                 pckt->flow.port16[1] = tcp->source;
 273         }
 274         return 1;
 275 }
 276 
 277 static __attribute__ ((noinline))
 278 bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval,
 279               struct packet_description *pckt,
 280               struct real_definition *dst, __u32 pkt_bytes)
 281 {
 282         struct eth_hdr *new_eth;
 283         struct eth_hdr *old_eth;
 284         struct ipv6hdr *ip6h;
 285         __u32 ip_suffix;
 286         void *data_end;
 287         void *data;
 288 
 289         if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
 290                 return 0;
 291         data = (void *)(long)xdp->data;
 292         data_end = (void *)(long)xdp->data_end;
 293         new_eth = data;
 294         ip6h = data + sizeof(struct eth_hdr);
 295         old_eth = data + sizeof(struct ipv6hdr);
 296         if (new_eth + 1 > data_end ||
 297             old_eth + 1 > data_end || ip6h + 1 > data_end)
 298                 return 0;
 299         memcpy(new_eth->eth_dest, cval->mac, 6);
 300         memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
 301         new_eth->eth_proto = 56710;
 302         ip6h->version = 6;
 303         ip6h->priority = 0;
 304         memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
 305 
 306         ip6h->nexthdr = IPPROTO_IPV6;
 307         ip_suffix = pckt->flow.srcv6[3] ^ pckt->flow.port16[0];
 308         ip6h->payload_len =
 309             bpf_htons(pkt_bytes + sizeof(struct ipv6hdr));
 310         ip6h->hop_limit = 4;
 311 
 312         ip6h->saddr.in6_u.u6_addr32[0] = 1;
 313         ip6h->saddr.in6_u.u6_addr32[1] = 2;
 314         ip6h->saddr.in6_u.u6_addr32[2] = 3;
 315         ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix;
 316         memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16);
 317         return 1;
 318 }
 319 
 320 static __attribute__ ((noinline))
 321 bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
 322               struct packet_description *pckt,
 323               struct real_definition *dst, __u32 pkt_bytes)
 324 {
 325 
 326         __u32 ip_suffix = bpf_ntohs(pckt->flow.port16[0]);
 327         struct eth_hdr *new_eth;
 328         struct eth_hdr *old_eth;
 329         __u16 *next_iph_u16;
 330         struct iphdr *iph;
 331         __u32 csum = 0;
 332         void *data_end;
 333         void *data;
 334 
 335         ip_suffix <<= 15;
 336         ip_suffix ^= pckt->flow.src;
 337         if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
 338                 return 0;
 339         data = (void *)(long)xdp->data;
 340         data_end = (void *)(long)xdp->data_end;
 341         new_eth = data;
 342         iph = data + sizeof(struct eth_hdr);
 343         old_eth = data + sizeof(struct iphdr);
 344         if (new_eth + 1 > data_end ||
 345             old_eth + 1 > data_end || iph + 1 > data_end)
 346                 return 0;
 347         memcpy(new_eth->eth_dest, cval->mac, 6);
 348         memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
 349         new_eth->eth_proto = 8;
 350         iph->version = 4;
 351         iph->ihl = 5;
 352         iph->frag_off = 0;
 353         iph->protocol = IPPROTO_IPIP;
 354         iph->check = 0;
 355         iph->tos = 1;
 356         iph->tot_len = bpf_htons(pkt_bytes + sizeof(struct iphdr));
 357         /* don't update iph->daddr, since it will overwrite old eth_proto
 358          * and multiple iterations of bpf_prog_run() will fail
 359          */
 360 
 361         iph->saddr = ((0xFFFF0000 & ip_suffix) | 4268) ^ dst->dst;
 362         iph->ttl = 4;
 363 
 364         next_iph_u16 = (__u16 *) iph;
 365 #pragma clang loop unroll(full)
 366         for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
 367                 csum += *next_iph_u16++;
 368         iph->check = ~((csum & 0xffff) + (csum >> 16));
 369         if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
 370                 return 0;
 371         return 1;
 372 }
 373 
 374 static __attribute__ ((noinline))
 375 bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4)
 376 {
 377         struct eth_hdr *new_eth;
 378         struct eth_hdr *old_eth;
 379 
 380         old_eth = *data;
 381         new_eth = *data + sizeof(struct ipv6hdr);
 382         memcpy(new_eth->eth_source, old_eth->eth_source, 6);
 383         memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
 384         if (inner_v4)
 385                 new_eth->eth_proto = 8;
 386         else
 387                 new_eth->eth_proto = 56710;
 388         if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr)))
 389                 return 0;
 390         *data = (void *)(long)xdp->data;
 391         *data_end = (void *)(long)xdp->data_end;
 392         return 1;
 393 }
 394 
 395 static __attribute__ ((noinline))
 396 bool decap_v4(struct xdp_md *xdp, void **data, void **data_end)
 397 {
 398         struct eth_hdr *new_eth;
 399         struct eth_hdr *old_eth;
 400 
 401         old_eth = *data;
 402         new_eth = *data + sizeof(struct iphdr);
 403         memcpy(new_eth->eth_source, old_eth->eth_source, 6);
 404         memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
 405         new_eth->eth_proto = 8;
 406         if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
 407                 return 0;
 408         *data = (void *)(long)xdp->data;
 409         *data_end = (void *)(long)xdp->data_end;
 410         return 1;
 411 }
 412 
 413 static __attribute__ ((noinline))
 414 int swap_mac_and_send(void *data, void *data_end)
 415 {
 416         unsigned char tmp_mac[6];
 417         struct eth_hdr *eth;
 418 
 419         eth = data;
 420         memcpy(tmp_mac, eth->eth_source, 6);
 421         memcpy(eth->eth_source, eth->eth_dest, 6);
 422         memcpy(eth->eth_dest, tmp_mac, 6);
 423         return XDP_TX;
 424 }
 425 
 426 static __attribute__ ((noinline))
 427 int send_icmp_reply(void *data, void *data_end)
 428 {
 429         struct icmphdr *icmp_hdr;
 430         __u16 *next_iph_u16;
 431         __u32 tmp_addr = 0;
 432         struct iphdr *iph;
 433         __u32 csum1 = 0;
 434         __u32 csum = 0;
 435         __u64 off = 0;
 436 
 437         if (data + sizeof(struct eth_hdr)
 438              + sizeof(struct iphdr) + sizeof(struct icmphdr) > data_end)
 439                 return XDP_DROP;
 440         off += sizeof(struct eth_hdr);
 441         iph = data + off;
 442         off += sizeof(struct iphdr);
 443         icmp_hdr = data + off;
 444         icmp_hdr->type = 0;
 445         icmp_hdr->checksum += 0x0007;
 446         iph->ttl = 4;
 447         tmp_addr = iph->daddr;
 448         iph->daddr = iph->saddr;
 449         iph->saddr = tmp_addr;
 450         iph->check = 0;
 451         next_iph_u16 = (__u16 *) iph;
 452 #pragma clang loop unroll(full)
 453         for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
 454                 csum += *next_iph_u16++;
 455         iph->check = ~((csum & 0xffff) + (csum >> 16));
 456         return swap_mac_and_send(data, data_end);
 457 }
 458 
 459 static __attribute__ ((noinline))
 460 int send_icmp6_reply(void *data, void *data_end)
 461 {
 462         struct icmp6hdr *icmp_hdr;
 463         struct ipv6hdr *ip6h;
 464         __be32 tmp_addr[4];
 465         __u64 off = 0;
 466 
 467         if (data + sizeof(struct eth_hdr)
 468              + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) > data_end)
 469                 return XDP_DROP;
 470         off += sizeof(struct eth_hdr);
 471         ip6h = data + off;
 472         off += sizeof(struct ipv6hdr);
 473         icmp_hdr = data + off;
 474         icmp_hdr->icmp6_type = 129;
 475         icmp_hdr->icmp6_cksum -= 0x0001;
 476         ip6h->hop_limit = 4;
 477         memcpy(tmp_addr, ip6h->saddr.in6_u.u6_addr32, 16);
 478         memcpy(ip6h->saddr.in6_u.u6_addr32, ip6h->daddr.in6_u.u6_addr32, 16);
 479         memcpy(ip6h->daddr.in6_u.u6_addr32, tmp_addr, 16);
 480         return swap_mac_and_send(data, data_end);
 481 }
 482 
 483 static __attribute__ ((noinline))
 484 int parse_icmpv6(void *data, void *data_end, __u64 off,
 485                  struct packet_description *pckt)
 486 {
 487         struct icmp6hdr *icmp_hdr;
 488         struct ipv6hdr *ip6h;
 489 
 490         icmp_hdr = data + off;
 491         if (icmp_hdr + 1 > data_end)
 492                 return XDP_DROP;
 493         if (icmp_hdr->icmp6_type == 128)
 494                 return send_icmp6_reply(data, data_end);
 495         if (icmp_hdr->icmp6_type != 3)
 496                 return XDP_PASS;
 497         off += sizeof(struct icmp6hdr);
 498         ip6h = data + off;
 499         if (ip6h + 1 > data_end)
 500                 return XDP_DROP;
 501         pckt->flow.proto = ip6h->nexthdr;
 502         pckt->flags |= (1 << 0);
 503         memcpy(pckt->flow.srcv6, ip6h->daddr.in6_u.u6_addr32, 16);
 504         memcpy(pckt->flow.dstv6, ip6h->saddr.in6_u.u6_addr32, 16);
 505         return -1;
 506 }
 507 
 508 static __attribute__ ((noinline))
 509 int parse_icmp(void *data, void *data_end, __u64 off,
 510                struct packet_description *pckt)
 511 {
 512         struct icmphdr *icmp_hdr;
 513         struct iphdr *iph;
 514 
 515         icmp_hdr = data + off;
 516         if (icmp_hdr + 1 > data_end)
 517                 return XDP_DROP;
 518         if (icmp_hdr->type == 8)
 519                 return send_icmp_reply(data, data_end);
 520         if ((icmp_hdr->type != 3) || (icmp_hdr->code != 4))
 521                 return XDP_PASS;
 522         off += sizeof(struct icmphdr);
 523         iph = data + off;
 524         if (iph + 1 > data_end)
 525                 return XDP_DROP;
 526         if (iph->ihl != 5)
 527                 return XDP_DROP;
 528         pckt->flow.proto = iph->protocol;
 529         pckt->flags |= (1 << 0);
 530         pckt->flow.src = iph->daddr;
 531         pckt->flow.dst = iph->saddr;
 532         return -1;
 533 }
 534 
 535 static __attribute__ ((noinline))
 536 __u32 get_packet_hash(struct packet_description *pckt,
 537                       bool hash_16bytes)
 538 {
 539         if (hash_16bytes)
 540                 return jhash_2words(jhash(pckt->flow.srcv6, 16, 12),
 541                                     pckt->flow.ports, 24);
 542         else
 543                 return jhash_2words(pckt->flow.src, pckt->flow.ports,
 544                                     24);
 545 }
 546 
 547 __attribute__ ((noinline))
 548 static bool get_packet_dst(struct real_definition **real,
 549                            struct packet_description *pckt,
 550                            struct vip_meta *vip_info,
 551                            bool is_ipv6, void *lru_map)
 552 {
 553         struct real_pos_lru new_dst_lru = { };
 554         bool hash_16bytes = is_ipv6;
 555         __u32 *real_pos, hash, key;
 556         __u64 cur_time;
 557 
 558         if (vip_info->flags & (1 << 2))
 559                 hash_16bytes = 1;
 560         if (vip_info->flags & (1 << 3)) {
 561                 pckt->flow.port16[0] = pckt->flow.port16[1];
 562                 memset(pckt->flow.srcv6, 0, 16);
 563         }
 564         hash = get_packet_hash(pckt, hash_16bytes);
 565         if (hash != 0x358459b7 /* jhash of ipv4 packet */  &&
 566             hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
 567                 return 0;
 568         key = 2 * vip_info->vip_num + hash % 2;
 569         real_pos = bpf_map_lookup_elem(&ch_rings, &key);
 570         if (!real_pos)
 571                 return 0;
 572         key = *real_pos;
 573         *real = bpf_map_lookup_elem(&reals, &key);
 574         if (!(*real))
 575                 return 0;
 576         if (!(vip_info->flags & (1 << 1))) {
 577                 __u32 conn_rate_key = 512 + 2;
 578                 struct lb_stats *conn_rate_stats =
 579                     bpf_map_lookup_elem(&stats, &conn_rate_key);
 580 
 581                 if (!conn_rate_stats)
 582                         return 1;
 583                 cur_time = bpf_ktime_get_ns();
 584                 if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) {
 585                         conn_rate_stats->v1 = 1;
 586                         conn_rate_stats->v2 = cur_time;
 587                 } else {
 588                         conn_rate_stats->v1 += 1;
 589                         if (conn_rate_stats->v1 >= 1)
 590                                 return 1;
 591                 }
 592                 if (pckt->flow.proto == IPPROTO_UDP)
 593                         new_dst_lru.atime = cur_time;
 594                 new_dst_lru.pos = key;
 595                 bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0);
 596         }
 597         return 1;
 598 }
 599 
 600 __attribute__ ((noinline))
 601 static void connection_table_lookup(struct real_definition **real,
 602                                     struct packet_description *pckt,
 603                                     void *lru_map)
 604 {
 605 
 606         struct real_pos_lru *dst_lru;
 607         __u64 cur_time;
 608         __u32 key;
 609 
 610         dst_lru = bpf_map_lookup_elem(lru_map, &pckt->flow);
 611         if (!dst_lru)
 612                 return;
 613         if (pckt->flow.proto == IPPROTO_UDP) {
 614                 cur_time = bpf_ktime_get_ns();
 615                 if (cur_time - dst_lru->atime > 300000)
 616                         return;
 617                 dst_lru->atime = cur_time;
 618         }
 619         key = dst_lru->pos;
 620         *real = bpf_map_lookup_elem(&reals, &key);
 621 }
 622 
 623 /* don't believe your eyes!
 624  * below function has 6 arguments whereas bpf and llvm allow maximum of 5
 625  * but since it's _static_ llvm can optimize one argument away
 626  */
 627 __attribute__ ((noinline))
 628 static int process_l3_headers_v6(struct packet_description *pckt,
 629                                  __u8 *protocol, __u64 off,
 630                                  __u16 *pkt_bytes, void *data,
 631                                  void *data_end)
 632 {
 633         struct ipv6hdr *ip6h;
 634         __u64 iph_len;
 635         int action;
 636 
 637         ip6h = data + off;
 638         if (ip6h + 1 > data_end)
 639                 return XDP_DROP;
 640         iph_len = sizeof(struct ipv6hdr);
 641         *protocol = ip6h->nexthdr;
 642         pckt->flow.proto = *protocol;
 643         *pkt_bytes = bpf_ntohs(ip6h->payload_len);
 644         off += iph_len;
 645         if (*protocol == 45) {
 646                 return XDP_DROP;
 647         } else if (*protocol == 59) {
 648                 action = parse_icmpv6(data, data_end, off, pckt);
 649                 if (action >= 0)
 650                         return action;
 651         } else {
 652                 memcpy(pckt->flow.srcv6, ip6h->saddr.in6_u.u6_addr32, 16);
 653                 memcpy(pckt->flow.dstv6, ip6h->daddr.in6_u.u6_addr32, 16);
 654         }
 655         return -1;
 656 }
 657 
 658 __attribute__ ((noinline))
 659 static int process_l3_headers_v4(struct packet_description *pckt,
 660                                  __u8 *protocol, __u64 off,
 661                                  __u16 *pkt_bytes, void *data,
 662                                  void *data_end)
 663 {
 664         struct iphdr *iph;
 665         __u64 iph_len;
 666         int action;
 667 
 668         iph = data + off;
 669         if (iph + 1 > data_end)
 670                 return XDP_DROP;
 671         if (iph->ihl != 5)
 672                 return XDP_DROP;
 673         *protocol = iph->protocol;
 674         pckt->flow.proto = *protocol;
 675         *pkt_bytes = bpf_ntohs(iph->tot_len);
 676         off += 20;
 677         if (iph->frag_off & 65343)
 678                 return XDP_DROP;
 679         if (*protocol == IPPROTO_ICMP) {
 680                 action = parse_icmp(data, data_end, off, pckt);
 681                 if (action >= 0)
 682                         return action;
 683         } else {
 684                 pckt->flow.src = iph->saddr;
 685                 pckt->flow.dst = iph->daddr;
 686         }
 687         return -1;
 688 }
 689 
 690 __attribute__ ((noinline))
 691 static int process_packet(void *data, __u64 off, void *data_end,
 692                           bool is_ipv6, struct xdp_md *xdp)
 693 {
 694 
 695         struct real_definition *dst = NULL;
 696         struct packet_description pckt = { };
 697         struct vip_definition vip = { };
 698         struct lb_stats *data_stats;
 699         struct eth_hdr *eth = data;
 700         void *lru_map = &lru_cache;
 701         struct vip_meta *vip_info;
 702         __u32 lru_stats_key = 513;
 703         __u32 mac_addr_pos = 0;
 704         __u32 stats_key = 512;
 705         struct ctl_value *cval;
 706         __u16 pkt_bytes;
 707         __u64 iph_len;
 708         __u8 protocol;
 709         __u32 vip_num;
 710         int action;
 711 
 712         if (is_ipv6)
 713                 action = process_l3_headers_v6(&pckt, &protocol, off,
 714                                                &pkt_bytes, data, data_end);
 715         else
 716                 action = process_l3_headers_v4(&pckt, &protocol, off,
 717                                                &pkt_bytes, data, data_end);
 718         if (action >= 0)
 719                 return action;
 720         protocol = pckt.flow.proto;
 721         if (protocol == IPPROTO_TCP) {
 722                 if (!parse_tcp(data, data_end, is_ipv6, &pckt))
 723                         return XDP_DROP;
 724         } else if (protocol == IPPROTO_UDP) {
 725                 if (!parse_udp(data, data_end, is_ipv6, &pckt))
 726                         return XDP_DROP;
 727         } else {
 728                 return XDP_TX;
 729         }
 730 
 731         if (is_ipv6)
 732                 memcpy(vip.vipv6, pckt.flow.dstv6, 16);
 733         else
 734                 vip.vip = pckt.flow.dst;
 735         vip.port = pckt.flow.port16[1];
 736         vip.proto = pckt.flow.proto;
 737         vip_info = bpf_map_lookup_elem(&vip_map, &vip);
 738         if (!vip_info) {
 739                 vip.port = 0;
 740                 vip_info = bpf_map_lookup_elem(&vip_map, &vip);
 741                 if (!vip_info)
 742                         return XDP_PASS;
 743                 if (!(vip_info->flags & (1 << 4)))
 744                         pckt.flow.port16[1] = 0;
 745         }
 746         if (data_end - data > 1400)
 747                 return XDP_DROP;
 748         data_stats = bpf_map_lookup_elem(&stats, &stats_key);
 749         if (!data_stats)
 750                 return XDP_DROP;
 751         data_stats->v1 += 1;
 752         if (!dst) {
 753                 if (vip_info->flags & (1 << 0))
 754                         pckt.flow.port16[0] = 0;
 755                 if (!(pckt.flags & (1 << 1)) && !(vip_info->flags & (1 << 1)))
 756                         connection_table_lookup(&dst, &pckt, lru_map);
 757                 if (dst)
 758                         goto out;
 759                 if (pckt.flow.proto == IPPROTO_TCP) {
 760                         struct lb_stats *lru_stats =
 761                             bpf_map_lookup_elem(&stats, &lru_stats_key);
 762 
 763                         if (!lru_stats)
 764                                 return XDP_DROP;
 765                         if (pckt.flags & (1 << 1))
 766                                 lru_stats->v1 += 1;
 767                         else
 768                                 lru_stats->v2 += 1;
 769                 }
 770                 if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6, lru_map))
 771                         return XDP_DROP;
 772                 data_stats->v2 += 1;
 773         }
 774 out:
 775         cval = bpf_map_lookup_elem(&ctl_array, &mac_addr_pos);
 776         if (!cval)
 777                 return XDP_DROP;
 778         if (dst->flags & (1 << 0)) {
 779                 if (!encap_v6(xdp, cval, &pckt, dst, pkt_bytes))
 780                         return XDP_DROP;
 781         } else {
 782                 if (!encap_v4(xdp, cval, &pckt, dst, pkt_bytes))
 783                         return XDP_DROP;
 784         }
 785         vip_num = vip_info->vip_num;
 786         data_stats = bpf_map_lookup_elem(&stats, &vip_num);
 787         if (!data_stats)
 788                 return XDP_DROP;
 789         data_stats->v1 += 1;
 790         data_stats->v2 += pkt_bytes;
 791 
 792         data = (void *)(long)xdp->data;
 793         data_end = (void *)(long)xdp->data_end;
 794         if (data + 4 > data_end)
 795                 return XDP_DROP;
 796         *(u32 *)data = dst->dst;
 797         return XDP_DROP;
 798 }
 799 
 800 __attribute__ ((section("xdp-test"), used))
 801 int balancer_ingress(struct xdp_md *ctx)
 802 {
 803         void *data = (void *)(long)ctx->data;
 804         void *data_end = (void *)(long)ctx->data_end;
 805         struct eth_hdr *eth = data;
 806         __u32 eth_proto;
 807         __u32 nh_off;
 808 
 809         nh_off = sizeof(struct eth_hdr);
 810         if (data + nh_off > data_end)
 811                 return XDP_DROP;
 812         eth_proto = bpf_ntohs(eth->eth_proto);
 813         if (eth_proto == ETH_P_IP)
 814                 return process_packet(data, nh_off, data_end, 0, ctx);
 815         else if (eth_proto == ETH_P_IPV6)
 816                 return process_packet(data, nh_off, data_end, 1, ctx);
 817         else
 818                 return XDP_DROP;
 819 }
 820 
 821 char _license[] __attribute__ ((section("license"), used)) = "GPL";
 822 int _version __attribute__ ((section("version"), used)) = 1;

/* [<][>][^][v][top][bottom][index][help] */