root/net/sched/sch_choke.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. choke_len
  2. use_ecn
  3. use_harddrop
  4. choke_zap_head_holes
  5. choke_zap_tail_holes
  6. choke_drop_by_idx
  7. choke_set_classid
  8. choke_match_flow
  9. choke_peek_random
  10. choke_match_random
  11. choke_enqueue
  12. choke_dequeue
  13. choke_reset
  14. choke_free
  15. choke_change
  16. choke_init
  17. choke_dump
  18. choke_dump_stats
  19. choke_destroy
  20. choke_peek_head
  21. choke_module_init
  22. choke_module_exit

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * net/sched/sch_choke.c        CHOKE scheduler
   4  *
   5  * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com>
   6  * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
   7  */
   8 
   9 #include <linux/module.h>
  10 #include <linux/types.h>
  11 #include <linux/kernel.h>
  12 #include <linux/skbuff.h>
  13 #include <linux/vmalloc.h>
  14 #include <net/pkt_sched.h>
  15 #include <net/pkt_cls.h>
  16 #include <net/inet_ecn.h>
  17 #include <net/red.h>
  18 #include <net/flow_dissector.h>
  19 
  20 /*
  21    CHOKe stateless AQM for fair bandwidth allocation
  22    =================================================
  23 
  24    CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for
  25    unresponsive flows) is a variant of RED that penalizes misbehaving flows but
  26    maintains no flow state. The difference from RED is an additional step
  27    during the enqueuing process. If average queue size is over the
  28    low threshold (qmin), a packet is chosen at random from the queue.
  29    If both the new and chosen packet are from the same flow, both
  30    are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it
  31    needs to access packets in queue randomly. It has a minimal class
  32    interface to allow overriding the builtin flow classifier with
  33    filters.
  34 
  35    Source:
  36    R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless
  37    Active Queue Management Scheme for Approximating Fair Bandwidth Allocation",
  38    IEEE INFOCOM, 2000.
  39 
  40    A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial
  41    Characteristics", IEEE/ACM Transactions on Networking, 2004
  42 
  43  */
  44 
  45 /* Upper bound on size of sk_buff table (packets) */
  46 #define CHOKE_MAX_QUEUE (128*1024 - 1)
  47 
  48 struct choke_sched_data {
  49 /* Parameters */
  50         u32              limit;
  51         unsigned char    flags;
  52 
  53         struct red_parms parms;
  54 
  55 /* Variables */
  56         struct red_vars  vars;
  57         struct {
  58                 u32     prob_drop;      /* Early probability drops */
  59                 u32     prob_mark;      /* Early probability marks */
  60                 u32     forced_drop;    /* Forced drops, qavg > max_thresh */
  61                 u32     forced_mark;    /* Forced marks, qavg > max_thresh */
  62                 u32     pdrop;          /* Drops due to queue limits */
  63                 u32     other;          /* Drops due to drop() calls */
  64                 u32     matched;        /* Drops to flow match */
  65         } stats;
  66 
  67         unsigned int     head;
  68         unsigned int     tail;
  69 
  70         unsigned int     tab_mask; /* size - 1 */
  71 
  72         struct sk_buff **tab;
  73 };
  74 
  75 /* number of elements in queue including holes */
  76 static unsigned int choke_len(const struct choke_sched_data *q)
  77 {
  78         return (q->tail - q->head) & q->tab_mask;
  79 }
  80 
  81 /* Is ECN parameter configured */
  82 static int use_ecn(const struct choke_sched_data *q)
  83 {
  84         return q->flags & TC_RED_ECN;
  85 }
  86 
  87 /* Should packets over max just be dropped (versus marked) */
  88 static int use_harddrop(const struct choke_sched_data *q)
  89 {
  90         return q->flags & TC_RED_HARDDROP;
  91 }
  92 
  93 /* Move head pointer forward to skip over holes */
  94 static void choke_zap_head_holes(struct choke_sched_data *q)
  95 {
  96         do {
  97                 q->head = (q->head + 1) & q->tab_mask;
  98                 if (q->head == q->tail)
  99                         break;
 100         } while (q->tab[q->head] == NULL);
 101 }
 102 
 103 /* Move tail pointer backwards to reuse holes */
 104 static void choke_zap_tail_holes(struct choke_sched_data *q)
 105 {
 106         do {
 107                 q->tail = (q->tail - 1) & q->tab_mask;
 108                 if (q->head == q->tail)
 109                         break;
 110         } while (q->tab[q->tail] == NULL);
 111 }
 112 
 113 /* Drop packet from queue array by creating a "hole" */
 114 static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx,
 115                               struct sk_buff **to_free)
 116 {
 117         struct choke_sched_data *q = qdisc_priv(sch);
 118         struct sk_buff *skb = q->tab[idx];
 119 
 120         q->tab[idx] = NULL;
 121 
 122         if (idx == q->head)
 123                 choke_zap_head_holes(q);
 124         if (idx == q->tail)
 125                 choke_zap_tail_holes(q);
 126 
 127         qdisc_qstats_backlog_dec(sch, skb);
 128         qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
 129         qdisc_drop(skb, sch, to_free);
 130         --sch->q.qlen;
 131 }
 132 
 133 struct choke_skb_cb {
 134         u16                     classid;
 135         u8                      keys_valid;
 136         struct                  flow_keys_digest keys;
 137 };
 138 
 139 static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
 140 {
 141         qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb));
 142         return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
 143 }
 144 
 145 static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
 146 {
 147         choke_skb_cb(skb)->classid = classid;
 148 }
 149 
 150 /*
 151  * Compare flow of two packets
 152  *  Returns true only if source and destination address and port match.
 153  *          false for special cases
 154  */
 155 static bool choke_match_flow(struct sk_buff *skb1,
 156                              struct sk_buff *skb2)
 157 {
 158         struct flow_keys temp;
 159 
 160         if (skb1->protocol != skb2->protocol)
 161                 return false;
 162 
 163         if (!choke_skb_cb(skb1)->keys_valid) {
 164                 choke_skb_cb(skb1)->keys_valid = 1;
 165                 skb_flow_dissect_flow_keys(skb1, &temp, 0);
 166                 make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp);
 167         }
 168 
 169         if (!choke_skb_cb(skb2)->keys_valid) {
 170                 choke_skb_cb(skb2)->keys_valid = 1;
 171                 skb_flow_dissect_flow_keys(skb2, &temp, 0);
 172                 make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp);
 173         }
 174 
 175         return !memcmp(&choke_skb_cb(skb1)->keys,
 176                        &choke_skb_cb(skb2)->keys,
 177                        sizeof(choke_skb_cb(skb1)->keys));
 178 }
 179 
 180 /*
 181  * Select a packet at random from queue
 182  * HACK: since queue can have holes from previous deletion; retry several
 183  *   times to find a random skb but then just give up and return the head
 184  * Will return NULL if queue is empty (q->head == q->tail)
 185  */
 186 static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
 187                                          unsigned int *pidx)
 188 {
 189         struct sk_buff *skb;
 190         int retrys = 3;
 191 
 192         do {
 193                 *pidx = (q->head + prandom_u32_max(choke_len(q))) & q->tab_mask;
 194                 skb = q->tab[*pidx];
 195                 if (skb)
 196                         return skb;
 197         } while (--retrys > 0);
 198 
 199         return q->tab[*pidx = q->head];
 200 }
 201 
 202 /*
 203  * Compare new packet with random packet in queue
 204  * returns true if matched and sets *pidx
 205  */
 206 static bool choke_match_random(const struct choke_sched_data *q,
 207                                struct sk_buff *nskb,
 208                                unsigned int *pidx)
 209 {
 210         struct sk_buff *oskb;
 211 
 212         if (q->head == q->tail)
 213                 return false;
 214 
 215         oskb = choke_peek_random(q, pidx);
 216         return choke_match_flow(oskb, nskb);
 217 }
 218 
 219 static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 220                          struct sk_buff **to_free)
 221 {
 222         struct choke_sched_data *q = qdisc_priv(sch);
 223         const struct red_parms *p = &q->parms;
 224 
 225         choke_skb_cb(skb)->keys_valid = 0;
 226         /* Compute average queue usage (see RED) */
 227         q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
 228         if (red_is_idling(&q->vars))
 229                 red_end_of_idle_period(&q->vars);
 230 
 231         /* Is queue small? */
 232         if (q->vars.qavg <= p->qth_min)
 233                 q->vars.qcount = -1;
 234         else {
 235                 unsigned int idx;
 236 
 237                 /* Draw a packet at random from queue and compare flow */
 238                 if (choke_match_random(q, skb, &idx)) {
 239                         q->stats.matched++;
 240                         choke_drop_by_idx(sch, idx, to_free);
 241                         goto congestion_drop;
 242                 }
 243 
 244                 /* Queue is large, always mark/drop */
 245                 if (q->vars.qavg > p->qth_max) {
 246                         q->vars.qcount = -1;
 247 
 248                         qdisc_qstats_overlimit(sch);
 249                         if (use_harddrop(q) || !use_ecn(q) ||
 250                             !INET_ECN_set_ce(skb)) {
 251                                 q->stats.forced_drop++;
 252                                 goto congestion_drop;
 253                         }
 254 
 255                         q->stats.forced_mark++;
 256                 } else if (++q->vars.qcount) {
 257                         if (red_mark_probability(p, &q->vars, q->vars.qavg)) {
 258                                 q->vars.qcount = 0;
 259                                 q->vars.qR = red_random(p);
 260 
 261                                 qdisc_qstats_overlimit(sch);
 262                                 if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
 263                                         q->stats.prob_drop++;
 264                                         goto congestion_drop;
 265                                 }
 266 
 267                                 q->stats.prob_mark++;
 268                         }
 269                 } else
 270                         q->vars.qR = red_random(p);
 271         }
 272 
 273         /* Admit new packet */
 274         if (sch->q.qlen < q->limit) {
 275                 q->tab[q->tail] = skb;
 276                 q->tail = (q->tail + 1) & q->tab_mask;
 277                 ++sch->q.qlen;
 278                 qdisc_qstats_backlog_inc(sch, skb);
 279                 return NET_XMIT_SUCCESS;
 280         }
 281 
 282         q->stats.pdrop++;
 283         return qdisc_drop(skb, sch, to_free);
 284 
 285 congestion_drop:
 286         qdisc_drop(skb, sch, to_free);
 287         return NET_XMIT_CN;
 288 }
 289 
 290 static struct sk_buff *choke_dequeue(struct Qdisc *sch)
 291 {
 292         struct choke_sched_data *q = qdisc_priv(sch);
 293         struct sk_buff *skb;
 294 
 295         if (q->head == q->tail) {
 296                 if (!red_is_idling(&q->vars))
 297                         red_start_of_idle_period(&q->vars);
 298                 return NULL;
 299         }
 300 
 301         skb = q->tab[q->head];
 302         q->tab[q->head] = NULL;
 303         choke_zap_head_holes(q);
 304         --sch->q.qlen;
 305         qdisc_qstats_backlog_dec(sch, skb);
 306         qdisc_bstats_update(sch, skb);
 307 
 308         return skb;
 309 }
 310 
 311 static void choke_reset(struct Qdisc *sch)
 312 {
 313         struct choke_sched_data *q = qdisc_priv(sch);
 314 
 315         while (q->head != q->tail) {
 316                 struct sk_buff *skb = q->tab[q->head];
 317 
 318                 q->head = (q->head + 1) & q->tab_mask;
 319                 if (!skb)
 320                         continue;
 321                 rtnl_qdisc_drop(skb, sch);
 322         }
 323 
 324         sch->q.qlen = 0;
 325         sch->qstats.backlog = 0;
 326         if (q->tab)
 327                 memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
 328         q->head = q->tail = 0;
 329         red_restart(&q->vars);
 330 }
 331 
 332 static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
 333         [TCA_CHOKE_PARMS]       = { .len = sizeof(struct tc_red_qopt) },
 334         [TCA_CHOKE_STAB]        = { .len = RED_STAB_SIZE },
 335         [TCA_CHOKE_MAX_P]       = { .type = NLA_U32 },
 336 };
 337 
 338 
 339 static void choke_free(void *addr)
 340 {
 341         kvfree(addr);
 342 }
 343 
 344 static int choke_change(struct Qdisc *sch, struct nlattr *opt,
 345                         struct netlink_ext_ack *extack)
 346 {
 347         struct choke_sched_data *q = qdisc_priv(sch);
 348         struct nlattr *tb[TCA_CHOKE_MAX + 1];
 349         const struct tc_red_qopt *ctl;
 350         int err;
 351         struct sk_buff **old = NULL;
 352         unsigned int mask;
 353         u32 max_P;
 354 
 355         if (opt == NULL)
 356                 return -EINVAL;
 357 
 358         err = nla_parse_nested_deprecated(tb, TCA_CHOKE_MAX, opt,
 359                                           choke_policy, NULL);
 360         if (err < 0)
 361                 return err;
 362 
 363         if (tb[TCA_CHOKE_PARMS] == NULL ||
 364             tb[TCA_CHOKE_STAB] == NULL)
 365                 return -EINVAL;
 366 
 367         max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
 368 
 369         ctl = nla_data(tb[TCA_CHOKE_PARMS]);
 370 
 371         if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
 372                 return -EINVAL;
 373 
 374         if (ctl->limit > CHOKE_MAX_QUEUE)
 375                 return -EINVAL;
 376 
 377         mask = roundup_pow_of_two(ctl->limit + 1) - 1;
 378         if (mask != q->tab_mask) {
 379                 struct sk_buff **ntab;
 380 
 381                 ntab = kvmalloc_array((mask + 1), sizeof(struct sk_buff *), GFP_KERNEL | __GFP_ZERO);
 382                 if (!ntab)
 383                         return -ENOMEM;
 384 
 385                 sch_tree_lock(sch);
 386                 old = q->tab;
 387                 if (old) {
 388                         unsigned int oqlen = sch->q.qlen, tail = 0;
 389                         unsigned dropped = 0;
 390 
 391                         while (q->head != q->tail) {
 392                                 struct sk_buff *skb = q->tab[q->head];
 393 
 394                                 q->head = (q->head + 1) & q->tab_mask;
 395                                 if (!skb)
 396                                         continue;
 397                                 if (tail < mask) {
 398                                         ntab[tail++] = skb;
 399                                         continue;
 400                                 }
 401                                 dropped += qdisc_pkt_len(skb);
 402                                 qdisc_qstats_backlog_dec(sch, skb);
 403                                 --sch->q.qlen;
 404                                 rtnl_qdisc_drop(skb, sch);
 405                         }
 406                         qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
 407                         q->head = 0;
 408                         q->tail = tail;
 409                 }
 410 
 411                 q->tab_mask = mask;
 412                 q->tab = ntab;
 413         } else
 414                 sch_tree_lock(sch);
 415 
 416         q->flags = ctl->flags;
 417         q->limit = ctl->limit;
 418 
 419         red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
 420                       ctl->Plog, ctl->Scell_log,
 421                       nla_data(tb[TCA_CHOKE_STAB]),
 422                       max_P);
 423         red_set_vars(&q->vars);
 424 
 425         if (q->head == q->tail)
 426                 red_end_of_idle_period(&q->vars);
 427 
 428         sch_tree_unlock(sch);
 429         choke_free(old);
 430         return 0;
 431 }
 432 
 433 static int choke_init(struct Qdisc *sch, struct nlattr *opt,
 434                       struct netlink_ext_ack *extack)
 435 {
 436         return choke_change(sch, opt, extack);
 437 }
 438 
 439 static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
 440 {
 441         struct choke_sched_data *q = qdisc_priv(sch);
 442         struct nlattr *opts = NULL;
 443         struct tc_red_qopt opt = {
 444                 .limit          = q->limit,
 445                 .flags          = q->flags,
 446                 .qth_min        = q->parms.qth_min >> q->parms.Wlog,
 447                 .qth_max        = q->parms.qth_max >> q->parms.Wlog,
 448                 .Wlog           = q->parms.Wlog,
 449                 .Plog           = q->parms.Plog,
 450                 .Scell_log      = q->parms.Scell_log,
 451         };
 452 
 453         opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
 454         if (opts == NULL)
 455                 goto nla_put_failure;
 456 
 457         if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
 458             nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
 459                 goto nla_put_failure;
 460         return nla_nest_end(skb, opts);
 461 
 462 nla_put_failure:
 463         nla_nest_cancel(skb, opts);
 464         return -EMSGSIZE;
 465 }
 466 
 467 static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 468 {
 469         struct choke_sched_data *q = qdisc_priv(sch);
 470         struct tc_choke_xstats st = {
 471                 .early  = q->stats.prob_drop + q->stats.forced_drop,
 472                 .marked = q->stats.prob_mark + q->stats.forced_mark,
 473                 .pdrop  = q->stats.pdrop,
 474                 .other  = q->stats.other,
 475                 .matched = q->stats.matched,
 476         };
 477 
 478         return gnet_stats_copy_app(d, &st, sizeof(st));
 479 }
 480 
 481 static void choke_destroy(struct Qdisc *sch)
 482 {
 483         struct choke_sched_data *q = qdisc_priv(sch);
 484 
 485         choke_free(q->tab);
 486 }
 487 
 488 static struct sk_buff *choke_peek_head(struct Qdisc *sch)
 489 {
 490         struct choke_sched_data *q = qdisc_priv(sch);
 491 
 492         return (q->head != q->tail) ? q->tab[q->head] : NULL;
 493 }
 494 
 495 static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
 496         .id             =       "choke",
 497         .priv_size      =       sizeof(struct choke_sched_data),
 498 
 499         .enqueue        =       choke_enqueue,
 500         .dequeue        =       choke_dequeue,
 501         .peek           =       choke_peek_head,
 502         .init           =       choke_init,
 503         .destroy        =       choke_destroy,
 504         .reset          =       choke_reset,
 505         .change         =       choke_change,
 506         .dump           =       choke_dump,
 507         .dump_stats     =       choke_dump_stats,
 508         .owner          =       THIS_MODULE,
 509 };
 510 
 511 static int __init choke_module_init(void)
 512 {
 513         return register_qdisc(&choke_qdisc_ops);
 514 }
 515 
 516 static void __exit choke_module_exit(void)
 517 {
 518         unregister_qdisc(&choke_qdisc_ops);
 519 }
 520 
 521 module_init(choke_module_init)
 522 module_exit(choke_module_exit)
 523 
 524 MODULE_LICENSE("GPL");

/* [<][>][^][v][top][bottom][index][help] */