root/net/sched/sch_sfq.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. sfq_dep_head
  2. sfq_hash
  3. sfq_classify
  4. sfq_link
  5. sfq_dec
  6. sfq_inc
  7. slot_dequeue_tail
  8. slot_dequeue_head
  9. slot_queue_init
  10. slot_queue_add
  11. sfq_drop
  12. sfq_prob_mark
  13. sfq_hard_mark
  14. sfq_headdrop
  15. sfq_enqueue
  16. sfq_dequeue
  17. sfq_reset
  18. sfq_rehash
  19. sfq_perturbation
  20. sfq_change
  21. sfq_alloc
  22. sfq_free
  23. sfq_destroy
  24. sfq_init
  25. sfq_dump
  26. sfq_leaf
  27. sfq_find
  28. sfq_bind
  29. sfq_unbind
  30. sfq_tcf_block
  31. sfq_dump_class
  32. sfq_dump_class_stats
  33. sfq_walk
  34. sfq_module_init
  35. sfq_module_exit

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * net/sched/sch_sfq.c  Stochastic Fairness Queueing discipline.
   4  *
   5  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   6  */
   7 
   8 #include <linux/module.h>
   9 #include <linux/types.h>
  10 #include <linux/kernel.h>
  11 #include <linux/jiffies.h>
  12 #include <linux/string.h>
  13 #include <linux/in.h>
  14 #include <linux/errno.h>
  15 #include <linux/init.h>
  16 #include <linux/skbuff.h>
  17 #include <linux/siphash.h>
  18 #include <linux/slab.h>
  19 #include <linux/vmalloc.h>
  20 #include <net/netlink.h>
  21 #include <net/pkt_sched.h>
  22 #include <net/pkt_cls.h>
  23 #include <net/red.h>
  24 
  25 
  26 /*      Stochastic Fairness Queuing algorithm.
  27         =======================================
  28 
  29         Source:
  30         Paul E. McKenney "Stochastic Fairness Queuing",
  31         IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
  32 
  33         Paul E. McKenney "Stochastic Fairness Queuing",
  34         "Interworking: Research and Experience", v.2, 1991, p.113-131.
  35 
  36 
  37         See also:
  38         M. Shreedhar and George Varghese "Efficient Fair
  39         Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
  40 
  41 
  42         This is not the thing that is usually called (W)FQ nowadays.
  43         It does not use any timestamp mechanism, but instead
  44         processes queues in round-robin order.
  45 
  46         ADVANTAGE:
  47 
  48         - It is very cheap. Both CPU and memory requirements are minimal.
  49 
  50         DRAWBACKS:
  51 
  52         - "Stochastic" -> It is not 100% fair.
  53         When hash collisions occur, several flows are considered as one.
  54 
  55         - "Round-robin" -> It introduces larger delays than virtual clock
  56         based schemes, and should not be used for isolating interactive
  57         traffic from non-interactive. It means, that this scheduler
  58         should be used as leaf of CBQ or P3, which put interactive traffic
  59         to higher priority band.
  60 
  61         We still need true WFQ for top level CSZ, but using WFQ
  62         for the best effort traffic is absolutely pointless:
  63         SFQ is superior for this purpose.
  64 
  65         IMPLEMENTATION:
  66         This implementation limits :
  67         - maximal queue length per flow to 127 packets.
  68         - max mtu to 2^18-1;
  69         - max 65408 flows,
  70         - number of hash buckets to 65536.
  71 
  72         It is easy to increase these values, but not in flight.  */
  73 
  74 #define SFQ_MAX_DEPTH           127 /* max number of packets per flow */
  75 #define SFQ_DEFAULT_FLOWS       128
  76 #define SFQ_MAX_FLOWS           (0x10000 - SFQ_MAX_DEPTH - 1) /* max number of flows */
  77 #define SFQ_EMPTY_SLOT          0xffff
  78 #define SFQ_DEFAULT_HASH_DIVISOR 1024
  79 
  80 /* We use 16 bits to store allot, and want to handle packets up to 64K
  81  * Scale allot by 8 (1<<3) so that no overflow occurs.
  82  */
  83 #define SFQ_ALLOT_SHIFT         3
  84 #define SFQ_ALLOT_SIZE(X)       DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
  85 
  86 /* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */
  87 typedef u16 sfq_index;
  88 
  89 /*
  90  * We dont use pointers to save space.
  91  * Small indexes [0 ... SFQ_MAX_FLOWS - 1] are 'pointers' to slots[] array
  92  * while following values [SFQ_MAX_FLOWS ... SFQ_MAX_FLOWS + SFQ_MAX_DEPTH]
  93  * are 'pointers' to dep[] array
  94  */
  95 struct sfq_head {
  96         sfq_index       next;
  97         sfq_index       prev;
  98 };
  99 
 100 struct sfq_slot {
 101         struct sk_buff  *skblist_next;
 102         struct sk_buff  *skblist_prev;
 103         sfq_index       qlen; /* number of skbs in skblist */
 104         sfq_index       next; /* next slot in sfq RR chain */
 105         struct sfq_head dep; /* anchor in dep[] chains */
 106         unsigned short  hash; /* hash value (index in ht[]) */
 107         short           allot; /* credit for this slot */
 108 
 109         unsigned int    backlog;
 110         struct red_vars vars;
 111 };
 112 
 113 struct sfq_sched_data {
 114 /* frequently used fields */
 115         int             limit;          /* limit of total number of packets in this qdisc */
 116         unsigned int    divisor;        /* number of slots in hash table */
 117         u8              headdrop;
 118         u8              maxdepth;       /* limit of packets per flow */
 119 
 120         siphash_key_t   perturbation;
 121         u8              cur_depth;      /* depth of longest slot */
 122         u8              flags;
 123         unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
 124         struct tcf_proto __rcu *filter_list;
 125         struct tcf_block *block;
 126         sfq_index       *ht;            /* Hash table ('divisor' slots) */
 127         struct sfq_slot *slots;         /* Flows table ('maxflows' entries) */
 128 
 129         struct red_parms *red_parms;
 130         struct tc_sfqred_stats stats;
 131         struct sfq_slot *tail;          /* current slot in round */
 132 
 133         struct sfq_head dep[SFQ_MAX_DEPTH + 1];
 134                                         /* Linked lists of slots, indexed by depth
 135                                          * dep[0] : list of unused flows
 136                                          * dep[1] : list of flows with 1 packet
 137                                          * dep[X] : list of flows with X packets
 138                                          */
 139 
 140         unsigned int    maxflows;       /* number of flows in flows array */
 141         int             perturb_period;
 142         unsigned int    quantum;        /* Allotment per round: MUST BE >= MTU */
 143         struct timer_list perturb_timer;
 144         struct Qdisc    *sch;
 145 };
 146 
 147 /*
 148  * sfq_head are either in a sfq_slot or in dep[] array
 149  */
 150 static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
 151 {
 152         if (val < SFQ_MAX_FLOWS)
 153                 return &q->slots[val].dep;
 154         return &q->dep[val - SFQ_MAX_FLOWS];
 155 }
 156 
 157 static unsigned int sfq_hash(const struct sfq_sched_data *q,
 158                              const struct sk_buff *skb)
 159 {
 160         return skb_get_hash_perturb(skb, &q->perturbation) & (q->divisor - 1);
 161 }
 162 
 163 static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 164                                  int *qerr)
 165 {
 166         struct sfq_sched_data *q = qdisc_priv(sch);
 167         struct tcf_result res;
 168         struct tcf_proto *fl;
 169         int result;
 170 
 171         if (TC_H_MAJ(skb->priority) == sch->handle &&
 172             TC_H_MIN(skb->priority) > 0 &&
 173             TC_H_MIN(skb->priority) <= q->divisor)
 174                 return TC_H_MIN(skb->priority);
 175 
 176         fl = rcu_dereference_bh(q->filter_list);
 177         if (!fl)
 178                 return sfq_hash(q, skb) + 1;
 179 
 180         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 181         result = tcf_classify(skb, fl, &res, false);
 182         if (result >= 0) {
 183 #ifdef CONFIG_NET_CLS_ACT
 184                 switch (result) {
 185                 case TC_ACT_STOLEN:
 186                 case TC_ACT_QUEUED:
 187                 case TC_ACT_TRAP:
 188                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 189                         /* fall through */
 190                 case TC_ACT_SHOT:
 191                         return 0;
 192                 }
 193 #endif
 194                 if (TC_H_MIN(res.classid) <= q->divisor)
 195                         return TC_H_MIN(res.classid);
 196         }
 197         return 0;
 198 }
 199 
 200 /*
 201  * x : slot number [0 .. SFQ_MAX_FLOWS - 1]
 202  */
 203 static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
 204 {
 205         sfq_index p, n;
 206         struct sfq_slot *slot = &q->slots[x];
 207         int qlen = slot->qlen;
 208 
 209         p = qlen + SFQ_MAX_FLOWS;
 210         n = q->dep[qlen].next;
 211 
 212         slot->dep.next = n;
 213         slot->dep.prev = p;
 214 
 215         q->dep[qlen].next = x;          /* sfq_dep_head(q, p)->next = x */
 216         sfq_dep_head(q, n)->prev = x;
 217 }
 218 
 219 #define sfq_unlink(q, x, n, p)                  \
 220         do {                                    \
 221                 n = q->slots[x].dep.next;       \
 222                 p = q->slots[x].dep.prev;       \
 223                 sfq_dep_head(q, p)->next = n;   \
 224                 sfq_dep_head(q, n)->prev = p;   \
 225         } while (0)
 226 
 227 
 228 static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
 229 {
 230         sfq_index p, n;
 231         int d;
 232 
 233         sfq_unlink(q, x, n, p);
 234 
 235         d = q->slots[x].qlen--;
 236         if (n == p && q->cur_depth == d)
 237                 q->cur_depth--;
 238         sfq_link(q, x);
 239 }
 240 
 241 static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
 242 {
 243         sfq_index p, n;
 244         int d;
 245 
 246         sfq_unlink(q, x, n, p);
 247 
 248         d = ++q->slots[x].qlen;
 249         if (q->cur_depth < d)
 250                 q->cur_depth = d;
 251         sfq_link(q, x);
 252 }
 253 
 254 /* helper functions : might be changed when/if skb use a standard list_head */
 255 
 256 /* remove one skb from tail of slot queue */
 257 static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
 258 {
 259         struct sk_buff *skb = slot->skblist_prev;
 260 
 261         slot->skblist_prev = skb->prev;
 262         skb->prev->next = (struct sk_buff *)slot;
 263         skb->next = skb->prev = NULL;
 264         return skb;
 265 }
 266 
 267 /* remove one skb from head of slot queue */
 268 static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
 269 {
 270         struct sk_buff *skb = slot->skblist_next;
 271 
 272         slot->skblist_next = skb->next;
 273         skb->next->prev = (struct sk_buff *)slot;
 274         skb->next = skb->prev = NULL;
 275         return skb;
 276 }
 277 
 278 static inline void slot_queue_init(struct sfq_slot *slot)
 279 {
 280         memset(slot, 0, sizeof(*slot));
 281         slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
 282 }
 283 
 284 /* add skb to slot queue (tail add) */
 285 static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
 286 {
 287         skb->prev = slot->skblist_prev;
 288         skb->next = (struct sk_buff *)slot;
 289         slot->skblist_prev->next = skb;
 290         slot->skblist_prev = skb;
 291 }
 292 
 293 static unsigned int sfq_drop(struct Qdisc *sch, struct sk_buff **to_free)
 294 {
 295         struct sfq_sched_data *q = qdisc_priv(sch);
 296         sfq_index x, d = q->cur_depth;
 297         struct sk_buff *skb;
 298         unsigned int len;
 299         struct sfq_slot *slot;
 300 
 301         /* Queue is full! Find the longest slot and drop tail packet from it */
 302         if (d > 1) {
 303                 x = q->dep[d].next;
 304                 slot = &q->slots[x];
 305 drop:
 306                 skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot);
 307                 len = qdisc_pkt_len(skb);
 308                 slot->backlog -= len;
 309                 sfq_dec(q, x);
 310                 sch->q.qlen--;
 311                 qdisc_qstats_backlog_dec(sch, skb);
 312                 qdisc_drop(skb, sch, to_free);
 313                 return len;
 314         }
 315 
 316         if (d == 1) {
 317                 /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
 318                 x = q->tail->next;
 319                 slot = &q->slots[x];
 320                 q->tail->next = slot->next;
 321                 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
 322                 goto drop;
 323         }
 324 
 325         return 0;
 326 }
 327 
 328 /* Is ECN parameter configured */
 329 static int sfq_prob_mark(const struct sfq_sched_data *q)
 330 {
 331         return q->flags & TC_RED_ECN;
 332 }
 333 
 334 /* Should packets over max threshold just be marked */
 335 static int sfq_hard_mark(const struct sfq_sched_data *q)
 336 {
 337         return (q->flags & (TC_RED_ECN | TC_RED_HARDDROP)) == TC_RED_ECN;
 338 }
 339 
 340 static int sfq_headdrop(const struct sfq_sched_data *q)
 341 {
 342         return q->headdrop;
 343 }
 344 
 345 static int
 346 sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
 347 {
 348         struct sfq_sched_data *q = qdisc_priv(sch);
 349         unsigned int hash, dropped;
 350         sfq_index x, qlen;
 351         struct sfq_slot *slot;
 352         int uninitialized_var(ret);
 353         struct sk_buff *head;
 354         int delta;
 355 
 356         hash = sfq_classify(skb, sch, &ret);
 357         if (hash == 0) {
 358                 if (ret & __NET_XMIT_BYPASS)
 359                         qdisc_qstats_drop(sch);
 360                 __qdisc_drop(skb, to_free);
 361                 return ret;
 362         }
 363         hash--;
 364 
 365         x = q->ht[hash];
 366         slot = &q->slots[x];
 367         if (x == SFQ_EMPTY_SLOT) {
 368                 x = q->dep[0].next; /* get a free slot */
 369                 if (x >= SFQ_MAX_FLOWS)
 370                         return qdisc_drop(skb, sch, to_free);
 371                 q->ht[hash] = x;
 372                 slot = &q->slots[x];
 373                 slot->hash = hash;
 374                 slot->backlog = 0; /* should already be 0 anyway... */
 375                 red_set_vars(&slot->vars);
 376                 goto enqueue;
 377         }
 378         if (q->red_parms) {
 379                 slot->vars.qavg = red_calc_qavg_no_idle_time(q->red_parms,
 380                                                         &slot->vars,
 381                                                         slot->backlog);
 382                 switch (red_action(q->red_parms,
 383                                    &slot->vars,
 384                                    slot->vars.qavg)) {
 385                 case RED_DONT_MARK:
 386                         break;
 387 
 388                 case RED_PROB_MARK:
 389                         qdisc_qstats_overlimit(sch);
 390                         if (sfq_prob_mark(q)) {
 391                                 /* We know we have at least one packet in queue */
 392                                 if (sfq_headdrop(q) &&
 393                                     INET_ECN_set_ce(slot->skblist_next)) {
 394                                         q->stats.prob_mark_head++;
 395                                         break;
 396                                 }
 397                                 if (INET_ECN_set_ce(skb)) {
 398                                         q->stats.prob_mark++;
 399                                         break;
 400                                 }
 401                         }
 402                         q->stats.prob_drop++;
 403                         goto congestion_drop;
 404 
 405                 case RED_HARD_MARK:
 406                         qdisc_qstats_overlimit(sch);
 407                         if (sfq_hard_mark(q)) {
 408                                 /* We know we have at least one packet in queue */
 409                                 if (sfq_headdrop(q) &&
 410                                     INET_ECN_set_ce(slot->skblist_next)) {
 411                                         q->stats.forced_mark_head++;
 412                                         break;
 413                                 }
 414                                 if (INET_ECN_set_ce(skb)) {
 415                                         q->stats.forced_mark++;
 416                                         break;
 417                                 }
 418                         }
 419                         q->stats.forced_drop++;
 420                         goto congestion_drop;
 421                 }
 422         }
 423 
 424         if (slot->qlen >= q->maxdepth) {
 425 congestion_drop:
 426                 if (!sfq_headdrop(q))
 427                         return qdisc_drop(skb, sch, to_free);
 428 
 429                 /* We know we have at least one packet in queue */
 430                 head = slot_dequeue_head(slot);
 431                 delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb);
 432                 sch->qstats.backlog -= delta;
 433                 slot->backlog -= delta;
 434                 qdisc_drop(head, sch, to_free);
 435 
 436                 slot_queue_add(slot, skb);
 437                 qdisc_tree_reduce_backlog(sch, 0, delta);
 438                 return NET_XMIT_CN;
 439         }
 440 
 441 enqueue:
 442         qdisc_qstats_backlog_inc(sch, skb);
 443         slot->backlog += qdisc_pkt_len(skb);
 444         slot_queue_add(slot, skb);
 445         sfq_inc(q, x);
 446         if (slot->qlen == 1) {          /* The flow is new */
 447                 if (q->tail == NULL) {  /* It is the first flow */
 448                         slot->next = x;
 449                 } else {
 450                         slot->next = q->tail->next;
 451                         q->tail->next = x;
 452                 }
 453                 /* We put this flow at the end of our flow list.
 454                  * This might sound unfair for a new flow to wait after old ones,
 455                  * but we could endup servicing new flows only, and freeze old ones.
 456                  */
 457                 q->tail = slot;
 458                 /* We could use a bigger initial quantum for new flows */
 459                 slot->allot = q->scaled_quantum;
 460         }
 461         if (++sch->q.qlen <= q->limit)
 462                 return NET_XMIT_SUCCESS;
 463 
 464         qlen = slot->qlen;
 465         dropped = sfq_drop(sch, to_free);
 466         /* Return Congestion Notification only if we dropped a packet
 467          * from this flow.
 468          */
 469         if (qlen != slot->qlen) {
 470                 qdisc_tree_reduce_backlog(sch, 0, dropped - qdisc_pkt_len(skb));
 471                 return NET_XMIT_CN;
 472         }
 473 
 474         /* As we dropped a packet, better let upper stack know this */
 475         qdisc_tree_reduce_backlog(sch, 1, dropped);
 476         return NET_XMIT_SUCCESS;
 477 }
 478 
 479 static struct sk_buff *
 480 sfq_dequeue(struct Qdisc *sch)
 481 {
 482         struct sfq_sched_data *q = qdisc_priv(sch);
 483         struct sk_buff *skb;
 484         sfq_index a, next_a;
 485         struct sfq_slot *slot;
 486 
 487         /* No active slots */
 488         if (q->tail == NULL)
 489                 return NULL;
 490 
 491 next_slot:
 492         a = q->tail->next;
 493         slot = &q->slots[a];
 494         if (slot->allot <= 0) {
 495                 q->tail = slot;
 496                 slot->allot += q->scaled_quantum;
 497                 goto next_slot;
 498         }
 499         skb = slot_dequeue_head(slot);
 500         sfq_dec(q, a);
 501         qdisc_bstats_update(sch, skb);
 502         sch->q.qlen--;
 503         qdisc_qstats_backlog_dec(sch, skb);
 504         slot->backlog -= qdisc_pkt_len(skb);
 505         /* Is the slot empty? */
 506         if (slot->qlen == 0) {
 507                 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
 508                 next_a = slot->next;
 509                 if (a == next_a) {
 510                         q->tail = NULL; /* no more active slots */
 511                         return skb;
 512                 }
 513                 q->tail->next = next_a;
 514         } else {
 515                 slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb));
 516         }
 517         return skb;
 518 }
 519 
 520 static void
 521 sfq_reset(struct Qdisc *sch)
 522 {
 523         struct sk_buff *skb;
 524 
 525         while ((skb = sfq_dequeue(sch)) != NULL)
 526                 rtnl_kfree_skbs(skb, skb);
 527 }
 528 
 529 /*
 530  * When q->perturbation is changed, we rehash all queued skbs
 531  * to avoid OOO (Out Of Order) effects.
 532  * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change
 533  * counters.
 534  */
 535 static void sfq_rehash(struct Qdisc *sch)
 536 {
 537         struct sfq_sched_data *q = qdisc_priv(sch);
 538         struct sk_buff *skb;
 539         int i;
 540         struct sfq_slot *slot;
 541         struct sk_buff_head list;
 542         int dropped = 0;
 543         unsigned int drop_len = 0;
 544 
 545         __skb_queue_head_init(&list);
 546 
 547         for (i = 0; i < q->maxflows; i++) {
 548                 slot = &q->slots[i];
 549                 if (!slot->qlen)
 550                         continue;
 551                 while (slot->qlen) {
 552                         skb = slot_dequeue_head(slot);
 553                         sfq_dec(q, i);
 554                         __skb_queue_tail(&list, skb);
 555                 }
 556                 slot->backlog = 0;
 557                 red_set_vars(&slot->vars);
 558                 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
 559         }
 560         q->tail = NULL;
 561 
 562         while ((skb = __skb_dequeue(&list)) != NULL) {
 563                 unsigned int hash = sfq_hash(q, skb);
 564                 sfq_index x = q->ht[hash];
 565 
 566                 slot = &q->slots[x];
 567                 if (x == SFQ_EMPTY_SLOT) {
 568                         x = q->dep[0].next; /* get a free slot */
 569                         if (x >= SFQ_MAX_FLOWS) {
 570 drop:
 571                                 qdisc_qstats_backlog_dec(sch, skb);
 572                                 drop_len += qdisc_pkt_len(skb);
 573                                 kfree_skb(skb);
 574                                 dropped++;
 575                                 continue;
 576                         }
 577                         q->ht[hash] = x;
 578                         slot = &q->slots[x];
 579                         slot->hash = hash;
 580                 }
 581                 if (slot->qlen >= q->maxdepth)
 582                         goto drop;
 583                 slot_queue_add(slot, skb);
 584                 if (q->red_parms)
 585                         slot->vars.qavg = red_calc_qavg(q->red_parms,
 586                                                         &slot->vars,
 587                                                         slot->backlog);
 588                 slot->backlog += qdisc_pkt_len(skb);
 589                 sfq_inc(q, x);
 590                 if (slot->qlen == 1) {          /* The flow is new */
 591                         if (q->tail == NULL) {  /* It is the first flow */
 592                                 slot->next = x;
 593                         } else {
 594                                 slot->next = q->tail->next;
 595                                 q->tail->next = x;
 596                         }
 597                         q->tail = slot;
 598                         slot->allot = q->scaled_quantum;
 599                 }
 600         }
 601         sch->q.qlen -= dropped;
 602         qdisc_tree_reduce_backlog(sch, dropped, drop_len);
 603 }
 604 
 605 static void sfq_perturbation(struct timer_list *t)
 606 {
 607         struct sfq_sched_data *q = from_timer(q, t, perturb_timer);
 608         struct Qdisc *sch = q->sch;
 609         spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
 610         siphash_key_t nkey;
 611 
 612         get_random_bytes(&nkey, sizeof(nkey));
 613         spin_lock(root_lock);
 614         q->perturbation = nkey;
 615         if (!q->filter_list && q->tail)
 616                 sfq_rehash(sch);
 617         spin_unlock(root_lock);
 618 
 619         if (q->perturb_period)
 620                 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
 621 }
 622 
 623 static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 624 {
 625         struct sfq_sched_data *q = qdisc_priv(sch);
 626         struct tc_sfq_qopt *ctl = nla_data(opt);
 627         struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
 628         unsigned int qlen, dropped = 0;
 629         struct red_parms *p = NULL;
 630         struct sk_buff *to_free = NULL;
 631         struct sk_buff *tail = NULL;
 632 
 633         if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
 634                 return -EINVAL;
 635         if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1)))
 636                 ctl_v1 = nla_data(opt);
 637         if (ctl->divisor &&
 638             (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
 639                 return -EINVAL;
 640 
 641         /* slot->allot is a short, make sure quantum is not too big. */
 642         if (ctl->quantum) {
 643                 unsigned int scaled = SFQ_ALLOT_SIZE(ctl->quantum);
 644 
 645                 if (scaled <= 0 || scaled > SHRT_MAX)
 646                         return -EINVAL;
 647         }
 648 
 649         if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
 650                                         ctl_v1->Wlog))
 651                 return -EINVAL;
 652         if (ctl_v1 && ctl_v1->qth_min) {
 653                 p = kmalloc(sizeof(*p), GFP_KERNEL);
 654                 if (!p)
 655                         return -ENOMEM;
 656         }
 657         sch_tree_lock(sch);
 658         if (ctl->quantum) {
 659                 q->quantum = ctl->quantum;
 660                 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
 661         }
 662         q->perturb_period = ctl->perturb_period * HZ;
 663         if (ctl->flows)
 664                 q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
 665         if (ctl->divisor) {
 666                 q->divisor = ctl->divisor;
 667                 q->maxflows = min_t(u32, q->maxflows, q->divisor);
 668         }
 669         if (ctl_v1) {
 670                 if (ctl_v1->depth)
 671                         q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
 672                 if (p) {
 673                         swap(q->red_parms, p);
 674                         red_set_parms(q->red_parms,
 675                                       ctl_v1->qth_min, ctl_v1->qth_max,
 676                                       ctl_v1->Wlog,
 677                                       ctl_v1->Plog, ctl_v1->Scell_log,
 678                                       NULL,
 679                                       ctl_v1->max_P);
 680                 }
 681                 q->flags = ctl_v1->flags;
 682                 q->headdrop = ctl_v1->headdrop;
 683         }
 684         if (ctl->limit) {
 685                 q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows);
 686                 q->maxflows = min_t(u32, q->maxflows, q->limit);
 687         }
 688 
 689         qlen = sch->q.qlen;
 690         while (sch->q.qlen > q->limit) {
 691                 dropped += sfq_drop(sch, &to_free);
 692                 if (!tail)
 693                         tail = to_free;
 694         }
 695 
 696         rtnl_kfree_skbs(to_free, tail);
 697         qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
 698 
 699         del_timer(&q->perturb_timer);
 700         if (q->perturb_period) {
 701                 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
 702                 get_random_bytes(&q->perturbation, sizeof(q->perturbation));
 703         }
 704         sch_tree_unlock(sch);
 705         kfree(p);
 706         return 0;
 707 }
 708 
 709 static void *sfq_alloc(size_t sz)
 710 {
 711         return  kvmalloc(sz, GFP_KERNEL);
 712 }
 713 
 714 static void sfq_free(void *addr)
 715 {
 716         kvfree(addr);
 717 }
 718 
 719 static void sfq_destroy(struct Qdisc *sch)
 720 {
 721         struct sfq_sched_data *q = qdisc_priv(sch);
 722 
 723         tcf_block_put(q->block);
 724         q->perturb_period = 0;
 725         del_timer_sync(&q->perturb_timer);
 726         sfq_free(q->ht);
 727         sfq_free(q->slots);
 728         kfree(q->red_parms);
 729 }
 730 
 731 static int sfq_init(struct Qdisc *sch, struct nlattr *opt,
 732                     struct netlink_ext_ack *extack)
 733 {
 734         struct sfq_sched_data *q = qdisc_priv(sch);
 735         int i;
 736         int err;
 737 
 738         q->sch = sch;
 739         timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE);
 740 
 741         err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
 742         if (err)
 743                 return err;
 744 
 745         for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) {
 746                 q->dep[i].next = i + SFQ_MAX_FLOWS;
 747                 q->dep[i].prev = i + SFQ_MAX_FLOWS;
 748         }
 749 
 750         q->limit = SFQ_MAX_DEPTH;
 751         q->maxdepth = SFQ_MAX_DEPTH;
 752         q->cur_depth = 0;
 753         q->tail = NULL;
 754         q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
 755         q->maxflows = SFQ_DEFAULT_FLOWS;
 756         q->quantum = psched_mtu(qdisc_dev(sch));
 757         q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
 758         q->perturb_period = 0;
 759         get_random_bytes(&q->perturbation, sizeof(q->perturbation));
 760 
 761         if (opt) {
 762                 int err = sfq_change(sch, opt);
 763                 if (err)
 764                         return err;
 765         }
 766 
 767         q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor);
 768         q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows);
 769         if (!q->ht || !q->slots) {
 770                 /* Note: sfq_destroy() will be called by our caller */
 771                 return -ENOMEM;
 772         }
 773 
 774         for (i = 0; i < q->divisor; i++)
 775                 q->ht[i] = SFQ_EMPTY_SLOT;
 776 
 777         for (i = 0; i < q->maxflows; i++) {
 778                 slot_queue_init(&q->slots[i]);
 779                 sfq_link(q, i);
 780         }
 781         if (q->limit >= 1)
 782                 sch->flags |= TCQ_F_CAN_BYPASS;
 783         else
 784                 sch->flags &= ~TCQ_F_CAN_BYPASS;
 785         return 0;
 786 }
 787 
 788 static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
 789 {
 790         struct sfq_sched_data *q = qdisc_priv(sch);
 791         unsigned char *b = skb_tail_pointer(skb);
 792         struct tc_sfq_qopt_v1 opt;
 793         struct red_parms *p = q->red_parms;
 794 
 795         memset(&opt, 0, sizeof(opt));
 796         opt.v0.quantum  = q->quantum;
 797         opt.v0.perturb_period = q->perturb_period / HZ;
 798         opt.v0.limit    = q->limit;
 799         opt.v0.divisor  = q->divisor;
 800         opt.v0.flows    = q->maxflows;
 801         opt.depth       = q->maxdepth;
 802         opt.headdrop    = q->headdrop;
 803 
 804         if (p) {
 805                 opt.qth_min     = p->qth_min >> p->Wlog;
 806                 opt.qth_max     = p->qth_max >> p->Wlog;
 807                 opt.Wlog        = p->Wlog;
 808                 opt.Plog        = p->Plog;
 809                 opt.Scell_log   = p->Scell_log;
 810                 opt.max_P       = p->max_P;
 811         }
 812         memcpy(&opt.stats, &q->stats, sizeof(opt.stats));
 813         opt.flags       = q->flags;
 814 
 815         if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
 816                 goto nla_put_failure;
 817 
 818         return skb->len;
 819 
 820 nla_put_failure:
 821         nlmsg_trim(skb, b);
 822         return -1;
 823 }
 824 
 825 static struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg)
 826 {
 827         return NULL;
 828 }
 829 
 830 static unsigned long sfq_find(struct Qdisc *sch, u32 classid)
 831 {
 832         return 0;
 833 }
 834 
 835 static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
 836                               u32 classid)
 837 {
 838         return 0;
 839 }
 840 
 841 static void sfq_unbind(struct Qdisc *q, unsigned long cl)
 842 {
 843 }
 844 
 845 static struct tcf_block *sfq_tcf_block(struct Qdisc *sch, unsigned long cl,
 846                                        struct netlink_ext_ack *extack)
 847 {
 848         struct sfq_sched_data *q = qdisc_priv(sch);
 849 
 850         if (cl)
 851                 return NULL;
 852         return q->block;
 853 }
 854 
 855 static int sfq_dump_class(struct Qdisc *sch, unsigned long cl,
 856                           struct sk_buff *skb, struct tcmsg *tcm)
 857 {
 858         tcm->tcm_handle |= TC_H_MIN(cl);
 859         return 0;
 860 }
 861 
 862 static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 863                                 struct gnet_dump *d)
 864 {
 865         struct sfq_sched_data *q = qdisc_priv(sch);
 866         sfq_index idx = q->ht[cl - 1];
 867         struct gnet_stats_queue qs = { 0 };
 868         struct tc_sfq_xstats xstats = { 0 };
 869 
 870         if (idx != SFQ_EMPTY_SLOT) {
 871                 const struct sfq_slot *slot = &q->slots[idx];
 872 
 873                 xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
 874                 qs.qlen = slot->qlen;
 875                 qs.backlog = slot->backlog;
 876         }
 877         if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
 878                 return -1;
 879         return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
 880 }
 881 
 882 static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 883 {
 884         struct sfq_sched_data *q = qdisc_priv(sch);
 885         unsigned int i;
 886 
 887         if (arg->stop)
 888                 return;
 889 
 890         for (i = 0; i < q->divisor; i++) {
 891                 if (q->ht[i] == SFQ_EMPTY_SLOT ||
 892                     arg->count < arg->skip) {
 893                         arg->count++;
 894                         continue;
 895                 }
 896                 if (arg->fn(sch, i + 1, arg) < 0) {
 897                         arg->stop = 1;
 898                         break;
 899                 }
 900                 arg->count++;
 901         }
 902 }
 903 
 904 static const struct Qdisc_class_ops sfq_class_ops = {
 905         .leaf           =       sfq_leaf,
 906         .find           =       sfq_find,
 907         .tcf_block      =       sfq_tcf_block,
 908         .bind_tcf       =       sfq_bind,
 909         .unbind_tcf     =       sfq_unbind,
 910         .dump           =       sfq_dump_class,
 911         .dump_stats     =       sfq_dump_class_stats,
 912         .walk           =       sfq_walk,
 913 };
 914 
 915 static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
 916         .cl_ops         =       &sfq_class_ops,
 917         .id             =       "sfq",
 918         .priv_size      =       sizeof(struct sfq_sched_data),
 919         .enqueue        =       sfq_enqueue,
 920         .dequeue        =       sfq_dequeue,
 921         .peek           =       qdisc_peek_dequeued,
 922         .init           =       sfq_init,
 923         .reset          =       sfq_reset,
 924         .destroy        =       sfq_destroy,
 925         .change         =       NULL,
 926         .dump           =       sfq_dump,
 927         .owner          =       THIS_MODULE,
 928 };
 929 
 930 static int __init sfq_module_init(void)
 931 {
 932         return register_qdisc(&sfq_qdisc_ops);
 933 }
 934 static void __exit sfq_module_exit(void)
 935 {
 936         unregister_qdisc(&sfq_qdisc_ops);
 937 }
 938 module_init(sfq_module_init)
 939 module_exit(sfq_module_exit)
 940 MODULE_LICENSE("GPL");

/* [<][>][^][v][top][bottom][index][help] */