root/net/netfilter/nf_conntrack_ecache.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ecache_work_evict_list
  2. ecache_work
  3. nf_conntrack_eventmask_report
  4. nf_ct_deliver_cached_events
  5. nf_ct_expect_event_report
  6. nf_conntrack_register_notifier
  7. nf_conntrack_unregister_notifier
  8. nf_ct_expect_register_notifier
  9. nf_ct_expect_unregister_notifier
  10. nf_conntrack_ecache_pernet_init
  11. nf_conntrack_ecache_pernet_fini
  12. nf_conntrack_ecache_init
  13. nf_conntrack_ecache_fini

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /* Event cache for netfilter. */
   3 
   4 /*
   5  * (C) 2005 Harald Welte <laforge@gnumonks.org>
   6  * (C) 2005 Patrick McHardy <kaber@trash.net>
   7  * (C) 2005-2006 Netfilter Core Team <coreteam@netfilter.org>
   8  * (C) 2005 USAGI/WIDE Project <http://www.linux-ipv6.org>
   9  */
  10 
  11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  12 
  13 #include <linux/types.h>
  14 #include <linux/netfilter.h>
  15 #include <linux/skbuff.h>
  16 #include <linux/vmalloc.h>
  17 #include <linux/stddef.h>
  18 #include <linux/err.h>
  19 #include <linux/percpu.h>
  20 #include <linux/kernel.h>
  21 #include <linux/netdevice.h>
  22 #include <linux/slab.h>
  23 #include <linux/export.h>
  24 
  25 #include <net/netfilter/nf_conntrack.h>
  26 #include <net/netfilter/nf_conntrack_core.h>
  27 #include <net/netfilter/nf_conntrack_ecache.h>
  28 #include <net/netfilter/nf_conntrack_extend.h>
  29 
  30 static DEFINE_MUTEX(nf_ct_ecache_mutex);
  31 
  32 #define ECACHE_RETRY_WAIT (HZ/10)
  33 
  34 enum retry_state {
  35         STATE_CONGESTED,
  36         STATE_RESTART,
  37         STATE_DONE,
  38 };
  39 
  40 static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
  41 {
  42         struct nf_conn *refs[16];
  43         struct nf_conntrack_tuple_hash *h;
  44         struct hlist_nulls_node *n;
  45         unsigned int evicted = 0;
  46         enum retry_state ret = STATE_DONE;
  47 
  48         spin_lock(&pcpu->lock);
  49 
  50         hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
  51                 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
  52                 struct nf_conntrack_ecache *e;
  53 
  54                 if (!nf_ct_is_confirmed(ct))
  55                         continue;
  56 
  57                 e = nf_ct_ecache_find(ct);
  58                 if (!e || e->state != NFCT_ECACHE_DESTROY_FAIL)
  59                         continue;
  60 
  61                 if (nf_conntrack_event(IPCT_DESTROY, ct)) {
  62                         ret = STATE_CONGESTED;
  63                         break;
  64                 }
  65 
  66                 e->state = NFCT_ECACHE_DESTROY_SENT;
  67                 refs[evicted] = ct;
  68 
  69                 if (++evicted >= ARRAY_SIZE(refs)) {
  70                         ret = STATE_RESTART;
  71                         break;
  72                 }
  73         }
  74 
  75         spin_unlock(&pcpu->lock);
  76 
  77         /* can't _put while holding lock */
  78         while (evicted)
  79                 nf_ct_put(refs[--evicted]);
  80 
  81         return ret;
  82 }
  83 
  84 static void ecache_work(struct work_struct *work)
  85 {
  86         struct netns_ct *ctnet =
  87                 container_of(work, struct netns_ct, ecache_dwork.work);
  88         int cpu, delay = -1;
  89         struct ct_pcpu *pcpu;
  90 
  91         local_bh_disable();
  92 
  93         for_each_possible_cpu(cpu) {
  94                 enum retry_state ret;
  95 
  96                 pcpu = per_cpu_ptr(ctnet->pcpu_lists, cpu);
  97 
  98                 ret = ecache_work_evict_list(pcpu);
  99 
 100                 switch (ret) {
 101                 case STATE_CONGESTED:
 102                         delay = ECACHE_RETRY_WAIT;
 103                         goto out;
 104                 case STATE_RESTART:
 105                         delay = 0;
 106                         break;
 107                 case STATE_DONE:
 108                         break;
 109                 }
 110         }
 111 
 112  out:
 113         local_bh_enable();
 114 
 115         ctnet->ecache_dwork_pending = delay > 0;
 116         if (delay >= 0)
 117                 schedule_delayed_work(&ctnet->ecache_dwork, delay);
 118 }
 119 
 120 int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
 121                                   u32 portid, int report)
 122 {
 123         int ret = 0;
 124         struct net *net = nf_ct_net(ct);
 125         struct nf_ct_event_notifier *notify;
 126         struct nf_conntrack_ecache *e;
 127 
 128         rcu_read_lock();
 129         notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
 130         if (!notify)
 131                 goto out_unlock;
 132 
 133         e = nf_ct_ecache_find(ct);
 134         if (!e)
 135                 goto out_unlock;
 136 
 137         if (nf_ct_is_confirmed(ct)) {
 138                 struct nf_ct_event item = {
 139                         .ct     = ct,
 140                         .portid = e->portid ? e->portid : portid,
 141                         .report = report
 142                 };
 143                 /* This is a resent of a destroy event? If so, skip missed */
 144                 unsigned long missed = e->portid ? 0 : e->missed;
 145 
 146                 if (!((eventmask | missed) & e->ctmask))
 147                         goto out_unlock;
 148 
 149                 ret = notify->fcn(eventmask | missed, &item);
 150                 if (unlikely(ret < 0 || missed)) {
 151                         spin_lock_bh(&ct->lock);
 152                         if (ret < 0) {
 153                                 /* This is a destroy event that has been
 154                                  * triggered by a process, we store the PORTID
 155                                  * to include it in the retransmission.
 156                                  */
 157                                 if (eventmask & (1 << IPCT_DESTROY)) {
 158                                         if (e->portid == 0 && portid != 0)
 159                                                 e->portid = portid;
 160                                         e->state = NFCT_ECACHE_DESTROY_FAIL;
 161                                 } else {
 162                                         e->missed |= eventmask;
 163                                 }
 164                         } else {
 165                                 e->missed &= ~missed;
 166                         }
 167                         spin_unlock_bh(&ct->lock);
 168                 }
 169         }
 170 out_unlock:
 171         rcu_read_unlock();
 172         return ret;
 173 }
 174 EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report);
 175 
 176 /* deliver cached events and clear cache entry - must be called with locally
 177  * disabled softirqs */
 178 void nf_ct_deliver_cached_events(struct nf_conn *ct)
 179 {
 180         struct net *net = nf_ct_net(ct);
 181         unsigned long events, missed;
 182         struct nf_ct_event_notifier *notify;
 183         struct nf_conntrack_ecache *e;
 184         struct nf_ct_event item;
 185         int ret;
 186 
 187         rcu_read_lock();
 188         notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
 189         if (notify == NULL)
 190                 goto out_unlock;
 191 
 192         e = nf_ct_ecache_find(ct);
 193         if (e == NULL)
 194                 goto out_unlock;
 195 
 196         events = xchg(&e->cache, 0);
 197 
 198         if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct))
 199                 goto out_unlock;
 200 
 201         /* We make a copy of the missed event cache without taking
 202          * the lock, thus we may send missed events twice. However,
 203          * this does not harm and it happens very rarely. */
 204         missed = e->missed;
 205 
 206         if (!((events | missed) & e->ctmask))
 207                 goto out_unlock;
 208 
 209         item.ct = ct;
 210         item.portid = 0;
 211         item.report = 0;
 212 
 213         ret = notify->fcn(events | missed, &item);
 214 
 215         if (likely(ret == 0 && !missed))
 216                 goto out_unlock;
 217 
 218         spin_lock_bh(&ct->lock);
 219         if (ret < 0)
 220                 e->missed |= events;
 221         else
 222                 e->missed &= ~missed;
 223         spin_unlock_bh(&ct->lock);
 224 
 225 out_unlock:
 226         rcu_read_unlock();
 227 }
 228 EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
 229 
 230 void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
 231                                struct nf_conntrack_expect *exp,
 232                                u32 portid, int report)
 233 
 234 {
 235         struct net *net = nf_ct_exp_net(exp);
 236         struct nf_exp_event_notifier *notify;
 237         struct nf_conntrack_ecache *e;
 238 
 239         rcu_read_lock();
 240         notify = rcu_dereference(net->ct.nf_expect_event_cb);
 241         if (!notify)
 242                 goto out_unlock;
 243 
 244         e = nf_ct_ecache_find(exp->master);
 245         if (!e)
 246                 goto out_unlock;
 247 
 248         if (e->expmask & (1 << event)) {
 249                 struct nf_exp_event item = {
 250                         .exp    = exp,
 251                         .portid = portid,
 252                         .report = report
 253                 };
 254                 notify->fcn(1 << event, &item);
 255         }
 256 out_unlock:
 257         rcu_read_unlock();
 258 }
 259 
 260 int nf_conntrack_register_notifier(struct net *net,
 261                                    struct nf_ct_event_notifier *new)
 262 {
 263         int ret;
 264         struct nf_ct_event_notifier *notify;
 265 
 266         mutex_lock(&nf_ct_ecache_mutex);
 267         notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
 268                                            lockdep_is_held(&nf_ct_ecache_mutex));
 269         if (notify != NULL) {
 270                 ret = -EBUSY;
 271                 goto out_unlock;
 272         }
 273         rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new);
 274         ret = 0;
 275 
 276 out_unlock:
 277         mutex_unlock(&nf_ct_ecache_mutex);
 278         return ret;
 279 }
 280 EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
 281 
 282 void nf_conntrack_unregister_notifier(struct net *net,
 283                                       struct nf_ct_event_notifier *new)
 284 {
 285         struct nf_ct_event_notifier *notify;
 286 
 287         mutex_lock(&nf_ct_ecache_mutex);
 288         notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
 289                                            lockdep_is_held(&nf_ct_ecache_mutex));
 290         BUG_ON(notify != new);
 291         RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL);
 292         mutex_unlock(&nf_ct_ecache_mutex);
 293         /* synchronize_rcu() is called from ctnetlink_exit. */
 294 }
 295 EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
 296 
 297 int nf_ct_expect_register_notifier(struct net *net,
 298                                    struct nf_exp_event_notifier *new)
 299 {
 300         int ret;
 301         struct nf_exp_event_notifier *notify;
 302 
 303         mutex_lock(&nf_ct_ecache_mutex);
 304         notify = rcu_dereference_protected(net->ct.nf_expect_event_cb,
 305                                            lockdep_is_held(&nf_ct_ecache_mutex));
 306         if (notify != NULL) {
 307                 ret = -EBUSY;
 308                 goto out_unlock;
 309         }
 310         rcu_assign_pointer(net->ct.nf_expect_event_cb, new);
 311         ret = 0;
 312 
 313 out_unlock:
 314         mutex_unlock(&nf_ct_ecache_mutex);
 315         return ret;
 316 }
 317 EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier);
 318 
 319 void nf_ct_expect_unregister_notifier(struct net *net,
 320                                       struct nf_exp_event_notifier *new)
 321 {
 322         struct nf_exp_event_notifier *notify;
 323 
 324         mutex_lock(&nf_ct_ecache_mutex);
 325         notify = rcu_dereference_protected(net->ct.nf_expect_event_cb,
 326                                            lockdep_is_held(&nf_ct_ecache_mutex));
 327         BUG_ON(notify != new);
 328         RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL);
 329         mutex_unlock(&nf_ct_ecache_mutex);
 330         /* synchronize_rcu() is called from ctnetlink_exit. */
 331 }
 332 EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
 333 
 334 #define NF_CT_EVENTS_DEFAULT 1
 335 static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
 336 
 337 static const struct nf_ct_ext_type event_extend = {
 338         .len    = sizeof(struct nf_conntrack_ecache),
 339         .align  = __alignof__(struct nf_conntrack_ecache),
 340         .id     = NF_CT_EXT_ECACHE,
 341 };
 342 
 343 void nf_conntrack_ecache_pernet_init(struct net *net)
 344 {
 345         net->ct.sysctl_events = nf_ct_events;
 346         INIT_DELAYED_WORK(&net->ct.ecache_dwork, ecache_work);
 347 }
 348 
 349 void nf_conntrack_ecache_pernet_fini(struct net *net)
 350 {
 351         cancel_delayed_work_sync(&net->ct.ecache_dwork);
 352 }
 353 
 354 int nf_conntrack_ecache_init(void)
 355 {
 356         int ret = nf_ct_extend_register(&event_extend);
 357         if (ret < 0)
 358                 pr_err("Unable to register event extension\n");
 359 
 360         BUILD_BUG_ON(__IPCT_MAX >= 16); /* ctmask, missed use u16 */
 361 
 362         return ret;
 363 }
 364 
 365 void nf_conntrack_ecache_fini(void)
 366 {
 367         nf_ct_extend_unregister(&event_extend);
 368 }

/* [<][>][^][v][top][bottom][index][help] */