root/net/netfilter/nf_flow_table_core.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. flow_offload_fill_dir
  2. flow_offload_alloc
  3. flow_offload_fixup_tcp
  4. nf_flow_timeout_delta
  5. flow_offload_fixup_ct_timeout
  6. flow_offload_fixup_ct_state
  7. flow_offload_fixup_ct
  8. flow_offload_free
  9. flow_offload_hash
  10. flow_offload_hash_obj
  11. flow_offload_hash_cmp
  12. flow_offload_add
  13. nf_flow_has_expired
  14. flow_offload_del
  15. flow_offload_teardown
  16. flow_offload_lookup
  17. nf_flow_table_iterate
  18. nf_flow_offload_gc_step
  19. nf_flow_offload_work_gc
  20. nf_flow_nat_port_tcp
  21. nf_flow_nat_port_udp
  22. nf_flow_nat_port
  23. nf_flow_snat_port
  24. nf_flow_dnat_port
  25. nf_flow_table_init
  26. nf_flow_table_do_cleanup
  27. nf_flow_table_iterate_cleanup
  28. nf_flow_table_cleanup
  29. nf_flow_table_free

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 #include <linux/kernel.h>
   3 #include <linux/init.h>
   4 #include <linux/module.h>
   5 #include <linux/netfilter.h>
   6 #include <linux/rhashtable.h>
   7 #include <linux/netdevice.h>
   8 #include <net/ip.h>
   9 #include <net/ip6_route.h>
  10 #include <net/netfilter/nf_tables.h>
  11 #include <net/netfilter/nf_flow_table.h>
  12 #include <net/netfilter/nf_conntrack.h>
  13 #include <net/netfilter/nf_conntrack_core.h>
  14 #include <net/netfilter/nf_conntrack_l4proto.h>
  15 #include <net/netfilter/nf_conntrack_tuple.h>
  16 
  17 struct flow_offload_entry {
  18         struct flow_offload     flow;
  19         struct nf_conn          *ct;
  20         struct rcu_head         rcu_head;
  21 };
  22 
  23 static DEFINE_MUTEX(flowtable_lock);
  24 static LIST_HEAD(flowtables);
  25 
  26 static void
  27 flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
  28                       struct nf_flow_route *route,
  29                       enum flow_offload_tuple_dir dir)
  30 {
  31         struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
  32         struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
  33         struct dst_entry *other_dst = route->tuple[!dir].dst;
  34         struct dst_entry *dst = route->tuple[dir].dst;
  35 
  36         ft->dir = dir;
  37 
  38         switch (ctt->src.l3num) {
  39         case NFPROTO_IPV4:
  40                 ft->src_v4 = ctt->src.u3.in;
  41                 ft->dst_v4 = ctt->dst.u3.in;
  42                 ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
  43                 break;
  44         case NFPROTO_IPV6:
  45                 ft->src_v6 = ctt->src.u3.in6;
  46                 ft->dst_v6 = ctt->dst.u3.in6;
  47                 ft->mtu = ip6_dst_mtu_forward(dst);
  48                 break;
  49         }
  50 
  51         ft->l3proto = ctt->src.l3num;
  52         ft->l4proto = ctt->dst.protonum;
  53         ft->src_port = ctt->src.u.tcp.port;
  54         ft->dst_port = ctt->dst.u.tcp.port;
  55 
  56         ft->iifidx = other_dst->dev->ifindex;
  57         ft->dst_cache = dst;
  58 }
  59 
  60 struct flow_offload *
  61 flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
  62 {
  63         struct flow_offload_entry *entry;
  64         struct flow_offload *flow;
  65 
  66         if (unlikely(nf_ct_is_dying(ct) ||
  67             !atomic_inc_not_zero(&ct->ct_general.use)))
  68                 return NULL;
  69 
  70         entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
  71         if (!entry)
  72                 goto err_ct_refcnt;
  73 
  74         flow = &entry->flow;
  75 
  76         if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
  77                 goto err_dst_cache_original;
  78 
  79         if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
  80                 goto err_dst_cache_reply;
  81 
  82         entry->ct = ct;
  83 
  84         flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
  85         flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
  86 
  87         if (ct->status & IPS_SRC_NAT)
  88                 flow->flags |= FLOW_OFFLOAD_SNAT;
  89         if (ct->status & IPS_DST_NAT)
  90                 flow->flags |= FLOW_OFFLOAD_DNAT;
  91 
  92         return flow;
  93 
  94 err_dst_cache_reply:
  95         dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
  96 err_dst_cache_original:
  97         kfree(entry);
  98 err_ct_refcnt:
  99         nf_ct_put(ct);
 100 
 101         return NULL;
 102 }
 103 EXPORT_SYMBOL_GPL(flow_offload_alloc);
 104 
 105 static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
 106 {
 107         tcp->state = TCP_CONNTRACK_ESTABLISHED;
 108         tcp->seen[0].td_maxwin = 0;
 109         tcp->seen[1].td_maxwin = 0;
 110 }
 111 
 112 #define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ)
 113 #define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ)
 114 
 115 static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
 116 {
 117         return (__s32)(timeout - (u32)jiffies);
 118 }
 119 
 120 static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
 121 {
 122         const struct nf_conntrack_l4proto *l4proto;
 123         int l4num = nf_ct_protonum(ct);
 124         unsigned int timeout;
 125 
 126         l4proto = nf_ct_l4proto_find(l4num);
 127         if (!l4proto)
 128                 return;
 129 
 130         if (l4num == IPPROTO_TCP)
 131                 timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT;
 132         else if (l4num == IPPROTO_UDP)
 133                 timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT;
 134         else
 135                 return;
 136 
 137         if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
 138                 ct->timeout = nfct_time_stamp + timeout;
 139 }
 140 
 141 static void flow_offload_fixup_ct_state(struct nf_conn *ct)
 142 {
 143         if (nf_ct_protonum(ct) == IPPROTO_TCP)
 144                 flow_offload_fixup_tcp(&ct->proto.tcp);
 145 }
 146 
 147 static void flow_offload_fixup_ct(struct nf_conn *ct)
 148 {
 149         flow_offload_fixup_ct_state(ct);
 150         flow_offload_fixup_ct_timeout(ct);
 151 }
 152 
 153 void flow_offload_free(struct flow_offload *flow)
 154 {
 155         struct flow_offload_entry *e;
 156 
 157         dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
 158         dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
 159         e = container_of(flow, struct flow_offload_entry, flow);
 160         if (flow->flags & FLOW_OFFLOAD_DYING)
 161                 nf_ct_delete(e->ct, 0, 0);
 162         nf_ct_put(e->ct);
 163         kfree_rcu(e, rcu_head);
 164 }
 165 EXPORT_SYMBOL_GPL(flow_offload_free);
 166 
 167 static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
 168 {
 169         const struct flow_offload_tuple *tuple = data;
 170 
 171         return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
 172 }
 173 
 174 static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
 175 {
 176         const struct flow_offload_tuple_rhash *tuplehash = data;
 177 
 178         return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
 179 }
 180 
 181 static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
 182                                         const void *ptr)
 183 {
 184         const struct flow_offload_tuple *tuple = arg->key;
 185         const struct flow_offload_tuple_rhash *x = ptr;
 186 
 187         if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
 188                 return 1;
 189 
 190         return 0;
 191 }
 192 
 193 static const struct rhashtable_params nf_flow_offload_rhash_params = {
 194         .head_offset            = offsetof(struct flow_offload_tuple_rhash, node),
 195         .hashfn                 = flow_offload_hash,
 196         .obj_hashfn             = flow_offload_hash_obj,
 197         .obj_cmpfn              = flow_offload_hash_cmp,
 198         .automatic_shrinking    = true,
 199 };
 200 
 201 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
 202 {
 203         int err;
 204 
 205         flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
 206 
 207         err = rhashtable_insert_fast(&flow_table->rhashtable,
 208                                      &flow->tuplehash[0].node,
 209                                      nf_flow_offload_rhash_params);
 210         if (err < 0)
 211                 return err;
 212 
 213         err = rhashtable_insert_fast(&flow_table->rhashtable,
 214                                      &flow->tuplehash[1].node,
 215                                      nf_flow_offload_rhash_params);
 216         if (err < 0) {
 217                 rhashtable_remove_fast(&flow_table->rhashtable,
 218                                        &flow->tuplehash[0].node,
 219                                        nf_flow_offload_rhash_params);
 220                 return err;
 221         }
 222 
 223         return 0;
 224 }
 225 EXPORT_SYMBOL_GPL(flow_offload_add);
 226 
 227 static inline bool nf_flow_has_expired(const struct flow_offload *flow)
 228 {
 229         return nf_flow_timeout_delta(flow->timeout) <= 0;
 230 }
 231 
 232 static void flow_offload_del(struct nf_flowtable *flow_table,
 233                              struct flow_offload *flow)
 234 {
 235         struct flow_offload_entry *e;
 236 
 237         rhashtable_remove_fast(&flow_table->rhashtable,
 238                                &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
 239                                nf_flow_offload_rhash_params);
 240         rhashtable_remove_fast(&flow_table->rhashtable,
 241                                &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
 242                                nf_flow_offload_rhash_params);
 243 
 244         e = container_of(flow, struct flow_offload_entry, flow);
 245         clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
 246 
 247         if (nf_flow_has_expired(flow))
 248                 flow_offload_fixup_ct(e->ct);
 249         else if (flow->flags & FLOW_OFFLOAD_TEARDOWN)
 250                 flow_offload_fixup_ct_timeout(e->ct);
 251 
 252         flow_offload_free(flow);
 253 }
 254 
 255 void flow_offload_teardown(struct flow_offload *flow)
 256 {
 257         struct flow_offload_entry *e;
 258 
 259         flow->flags |= FLOW_OFFLOAD_TEARDOWN;
 260 
 261         e = container_of(flow, struct flow_offload_entry, flow);
 262         flow_offload_fixup_ct_state(e->ct);
 263 }
 264 EXPORT_SYMBOL_GPL(flow_offload_teardown);
 265 
 266 struct flow_offload_tuple_rhash *
 267 flow_offload_lookup(struct nf_flowtable *flow_table,
 268                     struct flow_offload_tuple *tuple)
 269 {
 270         struct flow_offload_tuple_rhash *tuplehash;
 271         struct flow_offload *flow;
 272         struct flow_offload_entry *e;
 273         int dir;
 274 
 275         tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple,
 276                                       nf_flow_offload_rhash_params);
 277         if (!tuplehash)
 278                 return NULL;
 279 
 280         dir = tuplehash->tuple.dir;
 281         flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
 282         if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
 283                 return NULL;
 284 
 285         e = container_of(flow, struct flow_offload_entry, flow);
 286         if (unlikely(nf_ct_is_dying(e->ct)))
 287                 return NULL;
 288 
 289         return tuplehash;
 290 }
 291 EXPORT_SYMBOL_GPL(flow_offload_lookup);
 292 
 293 static int
 294 nf_flow_table_iterate(struct nf_flowtable *flow_table,
 295                       void (*iter)(struct flow_offload *flow, void *data),
 296                       void *data)
 297 {
 298         struct flow_offload_tuple_rhash *tuplehash;
 299         struct rhashtable_iter hti;
 300         struct flow_offload *flow;
 301         int err = 0;
 302 
 303         rhashtable_walk_enter(&flow_table->rhashtable, &hti);
 304         rhashtable_walk_start(&hti);
 305 
 306         while ((tuplehash = rhashtable_walk_next(&hti))) {
 307                 if (IS_ERR(tuplehash)) {
 308                         if (PTR_ERR(tuplehash) != -EAGAIN) {
 309                                 err = PTR_ERR(tuplehash);
 310                                 break;
 311                         }
 312                         continue;
 313                 }
 314                 if (tuplehash->tuple.dir)
 315                         continue;
 316 
 317                 flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
 318 
 319                 iter(flow, data);
 320         }
 321         rhashtable_walk_stop(&hti);
 322         rhashtable_walk_exit(&hti);
 323 
 324         return err;
 325 }
 326 
 327 static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
 328 {
 329         struct nf_flowtable *flow_table = data;
 330         struct flow_offload_entry *e;
 331 
 332         e = container_of(flow, struct flow_offload_entry, flow);
 333         if (nf_flow_has_expired(flow) || nf_ct_is_dying(e->ct) ||
 334             (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN)))
 335                 flow_offload_del(flow_table, flow);
 336 }
 337 
 338 static void nf_flow_offload_work_gc(struct work_struct *work)
 339 {
 340         struct nf_flowtable *flow_table;
 341 
 342         flow_table = container_of(work, struct nf_flowtable, gc_work.work);
 343         nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
 344         queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
 345 }
 346 
 347 static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
 348                                 __be16 port, __be16 new_port)
 349 {
 350         struct tcphdr *tcph;
 351 
 352         if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
 353             skb_try_make_writable(skb, thoff + sizeof(*tcph)))
 354                 return -1;
 355 
 356         tcph = (void *)(skb_network_header(skb) + thoff);
 357         inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true);
 358 
 359         return 0;
 360 }
 361 
 362 static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
 363                                 __be16 port, __be16 new_port)
 364 {
 365         struct udphdr *udph;
 366 
 367         if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
 368             skb_try_make_writable(skb, thoff + sizeof(*udph)))
 369                 return -1;
 370 
 371         udph = (void *)(skb_network_header(skb) + thoff);
 372         if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
 373                 inet_proto_csum_replace2(&udph->check, skb, port,
 374                                          new_port, true);
 375                 if (!udph->check)
 376                         udph->check = CSUM_MANGLED_0;
 377         }
 378 
 379         return 0;
 380 }
 381 
 382 static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
 383                             u8 protocol, __be16 port, __be16 new_port)
 384 {
 385         switch (protocol) {
 386         case IPPROTO_TCP:
 387                 if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
 388                         return NF_DROP;
 389                 break;
 390         case IPPROTO_UDP:
 391                 if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
 392                         return NF_DROP;
 393                 break;
 394         }
 395 
 396         return 0;
 397 }
 398 
 399 int nf_flow_snat_port(const struct flow_offload *flow,
 400                       struct sk_buff *skb, unsigned int thoff,
 401                       u8 protocol, enum flow_offload_tuple_dir dir)
 402 {
 403         struct flow_ports *hdr;
 404         __be16 port, new_port;
 405 
 406         if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
 407             skb_try_make_writable(skb, thoff + sizeof(*hdr)))
 408                 return -1;
 409 
 410         hdr = (void *)(skb_network_header(skb) + thoff);
 411 
 412         switch (dir) {
 413         case FLOW_OFFLOAD_DIR_ORIGINAL:
 414                 port = hdr->source;
 415                 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
 416                 hdr->source = new_port;
 417                 break;
 418         case FLOW_OFFLOAD_DIR_REPLY:
 419                 port = hdr->dest;
 420                 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
 421                 hdr->dest = new_port;
 422                 break;
 423         default:
 424                 return -1;
 425         }
 426 
 427         return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
 428 }
 429 EXPORT_SYMBOL_GPL(nf_flow_snat_port);
 430 
 431 int nf_flow_dnat_port(const struct flow_offload *flow,
 432                       struct sk_buff *skb, unsigned int thoff,
 433                       u8 protocol, enum flow_offload_tuple_dir dir)
 434 {
 435         struct flow_ports *hdr;
 436         __be16 port, new_port;
 437 
 438         if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
 439             skb_try_make_writable(skb, thoff + sizeof(*hdr)))
 440                 return -1;
 441 
 442         hdr = (void *)(skb_network_header(skb) + thoff);
 443 
 444         switch (dir) {
 445         case FLOW_OFFLOAD_DIR_ORIGINAL:
 446                 port = hdr->dest;
 447                 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port;
 448                 hdr->dest = new_port;
 449                 break;
 450         case FLOW_OFFLOAD_DIR_REPLY:
 451                 port = hdr->source;
 452                 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
 453                 hdr->source = new_port;
 454                 break;
 455         default:
 456                 return -1;
 457         }
 458 
 459         return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
 460 }
 461 EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
 462 
 463 int nf_flow_table_init(struct nf_flowtable *flowtable)
 464 {
 465         int err;
 466 
 467         INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
 468 
 469         err = rhashtable_init(&flowtable->rhashtable,
 470                               &nf_flow_offload_rhash_params);
 471         if (err < 0)
 472                 return err;
 473 
 474         queue_delayed_work(system_power_efficient_wq,
 475                            &flowtable->gc_work, HZ);
 476 
 477         mutex_lock(&flowtable_lock);
 478         list_add(&flowtable->list, &flowtables);
 479         mutex_unlock(&flowtable_lock);
 480 
 481         return 0;
 482 }
 483 EXPORT_SYMBOL_GPL(nf_flow_table_init);
 484 
 485 static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
 486 {
 487         struct net_device *dev = data;
 488         struct flow_offload_entry *e;
 489 
 490         e = container_of(flow, struct flow_offload_entry, flow);
 491 
 492         if (!dev) {
 493                 flow_offload_teardown(flow);
 494                 return;
 495         }
 496         if (net_eq(nf_ct_net(e->ct), dev_net(dev)) &&
 497             (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
 498              flow->tuplehash[1].tuple.iifidx == dev->ifindex))
 499                 flow_offload_dead(flow);
 500 }
 501 
 502 static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
 503                                           struct net_device *dev)
 504 {
 505         nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
 506         flush_delayed_work(&flowtable->gc_work);
 507 }
 508 
 509 void nf_flow_table_cleanup(struct net_device *dev)
 510 {
 511         struct nf_flowtable *flowtable;
 512 
 513         mutex_lock(&flowtable_lock);
 514         list_for_each_entry(flowtable, &flowtables, list)
 515                 nf_flow_table_iterate_cleanup(flowtable, dev);
 516         mutex_unlock(&flowtable_lock);
 517 }
 518 EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
 519 
 520 void nf_flow_table_free(struct nf_flowtable *flow_table)
 521 {
 522         mutex_lock(&flowtable_lock);
 523         list_del(&flow_table->list);
 524         mutex_unlock(&flowtable_lock);
 525         cancel_delayed_work_sync(&flow_table->gc_work);
 526         nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
 527         nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
 528         rhashtable_destroy(&flow_table->rhashtable);
 529 }
 530 EXPORT_SYMBOL_GPL(nf_flow_table_free);
 531 
 532 MODULE_LICENSE("GPL");
 533 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");

/* [<][>][^][v][top][bottom][index][help] */