root/net/netfilter/nft_set_rbtree.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. nft_rbtree_interval_end
  2. nft_rbtree_interval_start
  3. nft_rbtree_equal
  4. __nft_rbtree_lookup
  5. nft_rbtree_lookup
  6. __nft_rbtree_get
  7. nft_rbtree_get
  8. __nft_rbtree_insert
  9. nft_rbtree_insert
  10. nft_rbtree_remove
  11. nft_rbtree_activate
  12. nft_rbtree_flush
  13. nft_rbtree_deactivate
  14. nft_rbtree_walk
  15. nft_rbtree_gc
  16. nft_rbtree_privsize
  17. nft_rbtree_init
  18. nft_rbtree_destroy
  19. nft_rbtree_estimate

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
   4  *
   5  * Development of this code funded by Astaro AG (http://www.astaro.com/)
   6  */
   7 
   8 #include <linux/kernel.h>
   9 #include <linux/init.h>
  10 #include <linux/module.h>
  11 #include <linux/list.h>
  12 #include <linux/rbtree.h>
  13 #include <linux/netlink.h>
  14 #include <linux/netfilter.h>
  15 #include <linux/netfilter/nf_tables.h>
  16 #include <net/netfilter/nf_tables_core.h>
  17 
  18 struct nft_rbtree {
  19         struct rb_root          root;
  20         rwlock_t                lock;
  21         seqcount_t              count;
  22         struct delayed_work     gc_work;
  23 };
  24 
  25 struct nft_rbtree_elem {
  26         struct rb_node          node;
  27         struct nft_set_ext      ext;
  28 };
  29 
  30 static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe)
  31 {
  32         return nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) &&
  33                (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END);
  34 }
  35 
  36 static bool nft_rbtree_interval_start(const struct nft_rbtree_elem *rbe)
  37 {
  38         return !nft_rbtree_interval_end(rbe);
  39 }
  40 
  41 static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
  42                              const struct nft_rbtree_elem *interval)
  43 {
  44         return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
  45 }
  46 
  47 static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
  48                                 const u32 *key, const struct nft_set_ext **ext,
  49                                 unsigned int seq)
  50 {
  51         struct nft_rbtree *priv = nft_set_priv(set);
  52         const struct nft_rbtree_elem *rbe, *interval = NULL;
  53         u8 genmask = nft_genmask_cur(net);
  54         const struct rb_node *parent;
  55         const void *this;
  56         int d;
  57 
  58         parent = rcu_dereference_raw(priv->root.rb_node);
  59         while (parent != NULL) {
  60                 if (read_seqcount_retry(&priv->count, seq))
  61                         return false;
  62 
  63                 rbe = rb_entry(parent, struct nft_rbtree_elem, node);
  64 
  65                 this = nft_set_ext_key(&rbe->ext);
  66                 d = memcmp(this, key, set->klen);
  67                 if (d < 0) {
  68                         parent = rcu_dereference_raw(parent->rb_left);
  69                         if (interval &&
  70                             nft_rbtree_equal(set, this, interval) &&
  71                             nft_rbtree_interval_end(rbe) &&
  72                             nft_rbtree_interval_start(interval))
  73                                 continue;
  74                         interval = rbe;
  75                 } else if (d > 0)
  76                         parent = rcu_dereference_raw(parent->rb_right);
  77                 else {
  78                         if (!nft_set_elem_active(&rbe->ext, genmask)) {
  79                                 parent = rcu_dereference_raw(parent->rb_left);
  80                                 continue;
  81                         }
  82 
  83                         if (nft_set_elem_expired(&rbe->ext))
  84                                 return false;
  85 
  86                         if (nft_rbtree_interval_end(rbe)) {
  87                                 if (nft_set_is_anonymous(set))
  88                                         return false;
  89                                 parent = rcu_dereference_raw(parent->rb_left);
  90                                 interval = NULL;
  91                                 continue;
  92                         }
  93 
  94                         *ext = &rbe->ext;
  95                         return true;
  96                 }
  97         }
  98 
  99         if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
 100             nft_set_elem_active(&interval->ext, genmask) &&
 101             !nft_set_elem_expired(&interval->ext) &&
 102             nft_rbtree_interval_start(interval)) {
 103                 *ext = &interval->ext;
 104                 return true;
 105         }
 106 
 107         return false;
 108 }
 109 
 110 static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
 111                               const u32 *key, const struct nft_set_ext **ext)
 112 {
 113         struct nft_rbtree *priv = nft_set_priv(set);
 114         unsigned int seq = read_seqcount_begin(&priv->count);
 115         bool ret;
 116 
 117         ret = __nft_rbtree_lookup(net, set, key, ext, seq);
 118         if (ret || !read_seqcount_retry(&priv->count, seq))
 119                 return ret;
 120 
 121         read_lock_bh(&priv->lock);
 122         seq = read_seqcount_begin(&priv->count);
 123         ret = __nft_rbtree_lookup(net, set, key, ext, seq);
 124         read_unlock_bh(&priv->lock);
 125 
 126         return ret;
 127 }
 128 
 129 static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set,
 130                              const u32 *key, struct nft_rbtree_elem **elem,
 131                              unsigned int seq, unsigned int flags, u8 genmask)
 132 {
 133         struct nft_rbtree_elem *rbe, *interval = NULL;
 134         struct nft_rbtree *priv = nft_set_priv(set);
 135         const struct rb_node *parent;
 136         const void *this;
 137         int d;
 138 
 139         parent = rcu_dereference_raw(priv->root.rb_node);
 140         while (parent != NULL) {
 141                 if (read_seqcount_retry(&priv->count, seq))
 142                         return false;
 143 
 144                 rbe = rb_entry(parent, struct nft_rbtree_elem, node);
 145 
 146                 this = nft_set_ext_key(&rbe->ext);
 147                 d = memcmp(this, key, set->klen);
 148                 if (d < 0) {
 149                         parent = rcu_dereference_raw(parent->rb_left);
 150                         if (!(flags & NFT_SET_ELEM_INTERVAL_END))
 151                                 interval = rbe;
 152                 } else if (d > 0) {
 153                         parent = rcu_dereference_raw(parent->rb_right);
 154                         if (flags & NFT_SET_ELEM_INTERVAL_END)
 155                                 interval = rbe;
 156                 } else {
 157                         if (!nft_set_elem_active(&rbe->ext, genmask)) {
 158                                 parent = rcu_dereference_raw(parent->rb_left);
 159                                 continue;
 160                         }
 161 
 162                         if (nft_set_elem_expired(&rbe->ext))
 163                                 return false;
 164 
 165                         if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) ||
 166                             (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) ==
 167                             (flags & NFT_SET_ELEM_INTERVAL_END)) {
 168                                 *elem = rbe;
 169                                 return true;
 170                         }
 171 
 172                         if (nft_rbtree_interval_end(rbe))
 173                                 interval = NULL;
 174 
 175                         parent = rcu_dereference_raw(parent->rb_left);
 176                 }
 177         }
 178 
 179         if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
 180             nft_set_elem_active(&interval->ext, genmask) &&
 181             !nft_set_elem_expired(&interval->ext) &&
 182             ((!nft_rbtree_interval_end(interval) &&
 183               !(flags & NFT_SET_ELEM_INTERVAL_END)) ||
 184              (nft_rbtree_interval_end(interval) &&
 185               (flags & NFT_SET_ELEM_INTERVAL_END)))) {
 186                 *elem = interval;
 187                 return true;
 188         }
 189 
 190         return false;
 191 }
 192 
 193 static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
 194                             const struct nft_set_elem *elem, unsigned int flags)
 195 {
 196         struct nft_rbtree *priv = nft_set_priv(set);
 197         unsigned int seq = read_seqcount_begin(&priv->count);
 198         struct nft_rbtree_elem *rbe = ERR_PTR(-ENOENT);
 199         const u32 *key = (const u32 *)&elem->key.val;
 200         u8 genmask = nft_genmask_cur(net);
 201         bool ret;
 202 
 203         ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
 204         if (ret || !read_seqcount_retry(&priv->count, seq))
 205                 return rbe;
 206 
 207         read_lock_bh(&priv->lock);
 208         seq = read_seqcount_begin(&priv->count);
 209         ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
 210         if (!ret)
 211                 rbe = ERR_PTR(-ENOENT);
 212         read_unlock_bh(&priv->lock);
 213 
 214         return rbe;
 215 }
 216 
 217 static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
 218                                struct nft_rbtree_elem *new,
 219                                struct nft_set_ext **ext)
 220 {
 221         struct nft_rbtree *priv = nft_set_priv(set);
 222         u8 genmask = nft_genmask_next(net);
 223         struct nft_rbtree_elem *rbe;
 224         struct rb_node *parent, **p;
 225         int d;
 226 
 227         parent = NULL;
 228         p = &priv->root.rb_node;
 229         while (*p != NULL) {
 230                 parent = *p;
 231                 rbe = rb_entry(parent, struct nft_rbtree_elem, node);
 232                 d = memcmp(nft_set_ext_key(&rbe->ext),
 233                            nft_set_ext_key(&new->ext),
 234                            set->klen);
 235                 if (d < 0)
 236                         p = &parent->rb_left;
 237                 else if (d > 0)
 238                         p = &parent->rb_right;
 239                 else {
 240                         if (nft_rbtree_interval_end(rbe) &&
 241                             nft_rbtree_interval_start(new)) {
 242                                 p = &parent->rb_left;
 243                         } else if (nft_rbtree_interval_start(rbe) &&
 244                                    nft_rbtree_interval_end(new)) {
 245                                 p = &parent->rb_right;
 246                         } else if (nft_set_elem_active(&rbe->ext, genmask)) {
 247                                 *ext = &rbe->ext;
 248                                 return -EEXIST;
 249                         } else {
 250                                 p = &parent->rb_left;
 251                         }
 252                 }
 253         }
 254         rb_link_node_rcu(&new->node, parent, p);
 255         rb_insert_color(&new->node, &priv->root);
 256         return 0;
 257 }
 258 
 259 static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
 260                              const struct nft_set_elem *elem,
 261                              struct nft_set_ext **ext)
 262 {
 263         struct nft_rbtree *priv = nft_set_priv(set);
 264         struct nft_rbtree_elem *rbe = elem->priv;
 265         int err;
 266 
 267         write_lock_bh(&priv->lock);
 268         write_seqcount_begin(&priv->count);
 269         err = __nft_rbtree_insert(net, set, rbe, ext);
 270         write_seqcount_end(&priv->count);
 271         write_unlock_bh(&priv->lock);
 272 
 273         return err;
 274 }
 275 
 276 static void nft_rbtree_remove(const struct net *net,
 277                               const struct nft_set *set,
 278                               const struct nft_set_elem *elem)
 279 {
 280         struct nft_rbtree *priv = nft_set_priv(set);
 281         struct nft_rbtree_elem *rbe = elem->priv;
 282 
 283         write_lock_bh(&priv->lock);
 284         write_seqcount_begin(&priv->count);
 285         rb_erase(&rbe->node, &priv->root);
 286         write_seqcount_end(&priv->count);
 287         write_unlock_bh(&priv->lock);
 288 }
 289 
 290 static void nft_rbtree_activate(const struct net *net,
 291                                 const struct nft_set *set,
 292                                 const struct nft_set_elem *elem)
 293 {
 294         struct nft_rbtree_elem *rbe = elem->priv;
 295 
 296         nft_set_elem_change_active(net, set, &rbe->ext);
 297         nft_set_elem_clear_busy(&rbe->ext);
 298 }
 299 
 300 static bool nft_rbtree_flush(const struct net *net,
 301                              const struct nft_set *set, void *priv)
 302 {
 303         struct nft_rbtree_elem *rbe = priv;
 304 
 305         if (!nft_set_elem_mark_busy(&rbe->ext) ||
 306             !nft_is_active(net, &rbe->ext)) {
 307                 nft_set_elem_change_active(net, set, &rbe->ext);
 308                 return true;
 309         }
 310         return false;
 311 }
 312 
 313 static void *nft_rbtree_deactivate(const struct net *net,
 314                                    const struct nft_set *set,
 315                                    const struct nft_set_elem *elem)
 316 {
 317         const struct nft_rbtree *priv = nft_set_priv(set);
 318         const struct rb_node *parent = priv->root.rb_node;
 319         struct nft_rbtree_elem *rbe, *this = elem->priv;
 320         u8 genmask = nft_genmask_next(net);
 321         int d;
 322 
 323         while (parent != NULL) {
 324                 rbe = rb_entry(parent, struct nft_rbtree_elem, node);
 325 
 326                 d = memcmp(nft_set_ext_key(&rbe->ext), &elem->key.val,
 327                                            set->klen);
 328                 if (d < 0)
 329                         parent = parent->rb_left;
 330                 else if (d > 0)
 331                         parent = parent->rb_right;
 332                 else {
 333                         if (nft_rbtree_interval_end(rbe) &&
 334                             nft_rbtree_interval_start(this)) {
 335                                 parent = parent->rb_left;
 336                                 continue;
 337                         } else if (nft_rbtree_interval_start(rbe) &&
 338                                    nft_rbtree_interval_end(this)) {
 339                                 parent = parent->rb_right;
 340                                 continue;
 341                         } else if (!nft_set_elem_active(&rbe->ext, genmask)) {
 342                                 parent = parent->rb_left;
 343                                 continue;
 344                         }
 345                         nft_rbtree_flush(net, set, rbe);
 346                         return rbe;
 347                 }
 348         }
 349         return NULL;
 350 }
 351 
 352 static void nft_rbtree_walk(const struct nft_ctx *ctx,
 353                             struct nft_set *set,
 354                             struct nft_set_iter *iter)
 355 {
 356         struct nft_rbtree *priv = nft_set_priv(set);
 357         struct nft_rbtree_elem *rbe;
 358         struct nft_set_elem elem;
 359         struct rb_node *node;
 360 
 361         read_lock_bh(&priv->lock);
 362         for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
 363                 rbe = rb_entry(node, struct nft_rbtree_elem, node);
 364 
 365                 if (iter->count < iter->skip)
 366                         goto cont;
 367                 if (nft_set_elem_expired(&rbe->ext))
 368                         goto cont;
 369                 if (!nft_set_elem_active(&rbe->ext, iter->genmask))
 370                         goto cont;
 371 
 372                 elem.priv = rbe;
 373 
 374                 iter->err = iter->fn(ctx, set, iter, &elem);
 375                 if (iter->err < 0) {
 376                         read_unlock_bh(&priv->lock);
 377                         return;
 378                 }
 379 cont:
 380                 iter->count++;
 381         }
 382         read_unlock_bh(&priv->lock);
 383 }
 384 
 385 static void nft_rbtree_gc(struct work_struct *work)
 386 {
 387         struct nft_rbtree_elem *rbe, *rbe_end = NULL, *rbe_prev = NULL;
 388         struct nft_set_gc_batch *gcb = NULL;
 389         struct nft_rbtree *priv;
 390         struct rb_node *node;
 391         struct nft_set *set;
 392 
 393         priv = container_of(work, struct nft_rbtree, gc_work.work);
 394         set  = nft_set_container_of(priv);
 395 
 396         write_lock_bh(&priv->lock);
 397         write_seqcount_begin(&priv->count);
 398         for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
 399                 rbe = rb_entry(node, struct nft_rbtree_elem, node);
 400 
 401                 if (nft_rbtree_interval_end(rbe)) {
 402                         rbe_end = rbe;
 403                         continue;
 404                 }
 405                 if (!nft_set_elem_expired(&rbe->ext))
 406                         continue;
 407                 if (nft_set_elem_mark_busy(&rbe->ext))
 408                         continue;
 409 
 410                 if (rbe_prev) {
 411                         rb_erase(&rbe_prev->node, &priv->root);
 412                         rbe_prev = NULL;
 413                 }
 414                 gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
 415                 if (!gcb)
 416                         break;
 417 
 418                 atomic_dec(&set->nelems);
 419                 nft_set_gc_batch_add(gcb, rbe);
 420                 rbe_prev = rbe;
 421 
 422                 if (rbe_end) {
 423                         atomic_dec(&set->nelems);
 424                         nft_set_gc_batch_add(gcb, rbe_end);
 425                         rb_erase(&rbe_end->node, &priv->root);
 426                         rbe_end = NULL;
 427                 }
 428                 node = rb_next(node);
 429                 if (!node)
 430                         break;
 431         }
 432         if (rbe_prev)
 433                 rb_erase(&rbe_prev->node, &priv->root);
 434         write_seqcount_end(&priv->count);
 435         write_unlock_bh(&priv->lock);
 436 
 437         nft_set_gc_batch_complete(gcb);
 438 
 439         queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
 440                            nft_set_gc_interval(set));
 441 }
 442 
 443 static u64 nft_rbtree_privsize(const struct nlattr * const nla[],
 444                                const struct nft_set_desc *desc)
 445 {
 446         return sizeof(struct nft_rbtree);
 447 }
 448 
 449 static int nft_rbtree_init(const struct nft_set *set,
 450                            const struct nft_set_desc *desc,
 451                            const struct nlattr * const nla[])
 452 {
 453         struct nft_rbtree *priv = nft_set_priv(set);
 454 
 455         rwlock_init(&priv->lock);
 456         seqcount_init(&priv->count);
 457         priv->root = RB_ROOT;
 458 
 459         INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rbtree_gc);
 460         if (set->flags & NFT_SET_TIMEOUT)
 461                 queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
 462                                    nft_set_gc_interval(set));
 463 
 464         return 0;
 465 }
 466 
 467 static void nft_rbtree_destroy(const struct nft_set *set)
 468 {
 469         struct nft_rbtree *priv = nft_set_priv(set);
 470         struct nft_rbtree_elem *rbe;
 471         struct rb_node *node;
 472 
 473         cancel_delayed_work_sync(&priv->gc_work);
 474         rcu_barrier();
 475         while ((node = priv->root.rb_node) != NULL) {
 476                 rb_erase(node, &priv->root);
 477                 rbe = rb_entry(node, struct nft_rbtree_elem, node);
 478                 nft_set_elem_destroy(set, rbe, true);
 479         }
 480 }
 481 
 482 static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
 483                                 struct nft_set_estimate *est)
 484 {
 485         if (desc->size)
 486                 est->size = sizeof(struct nft_rbtree) +
 487                             desc->size * sizeof(struct nft_rbtree_elem);
 488         else
 489                 est->size = ~0;
 490 
 491         est->lookup = NFT_SET_CLASS_O_LOG_N;
 492         est->space  = NFT_SET_CLASS_O_N;
 493 
 494         return true;
 495 }
 496 
 497 struct nft_set_type nft_set_rbtree_type __read_mostly = {
 498         .owner          = THIS_MODULE,
 499         .features       = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
 500         .ops            = {
 501                 .privsize       = nft_rbtree_privsize,
 502                 .elemsize       = offsetof(struct nft_rbtree_elem, ext),
 503                 .estimate       = nft_rbtree_estimate,
 504                 .init           = nft_rbtree_init,
 505                 .destroy        = nft_rbtree_destroy,
 506                 .insert         = nft_rbtree_insert,
 507                 .remove         = nft_rbtree_remove,
 508                 .deactivate     = nft_rbtree_deactivate,
 509                 .flush          = nft_rbtree_flush,
 510                 .activate       = nft_rbtree_activate,
 511                 .lookup         = nft_rbtree_lookup,
 512                 .walk           = nft_rbtree_walk,
 513                 .get            = nft_rbtree_get,
 514         },
 515 };

/* [<][>][^][v][top][bottom][index][help] */