root/net/sched/sch_api.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. register_qdisc
  2. unregister_qdisc
  3. qdisc_get_default
  4. qdisc_lookup_default
  5. qdisc_set_default
  6. sch_default_qdisc
  7. qdisc_match_from_root
  8. qdisc_hash_add
  9. qdisc_hash_del
  10. qdisc_lookup
  11. qdisc_lookup_rcu
  12. qdisc_leaf
  13. qdisc_lookup_ops
  14. __detect_linklayer
  15. qdisc_get_rtab
  16. qdisc_put_rtab
  17. qdisc_get_stab
  18. qdisc_put_stab
  19. qdisc_dump_stab
  20. __qdisc_calculate_pkt_len
  21. qdisc_warn_nonwc
  22. qdisc_watchdog
  23. qdisc_watchdog_init_clockid
  24. qdisc_watchdog_init
  25. qdisc_watchdog_schedule_ns
  26. qdisc_watchdog_cancel
  27. qdisc_class_hash_alloc
  28. qdisc_class_hash_grow
  29. qdisc_class_hash_init
  30. qdisc_class_hash_destroy
  31. qdisc_class_hash_insert
  32. qdisc_class_hash_remove
  33. qdisc_alloc_handle
  34. qdisc_tree_reduce_backlog
  35. qdisc_offload_dump_helper
  36. qdisc_offload_graft_helper
  37. qdisc_offload_graft_root
  38. tc_fill_qdisc
  39. tc_qdisc_dump_ignore
  40. qdisc_notify
  41. notify_and_destroy
  42. qdisc_clear_nolock
  43. qdisc_graft
  44. qdisc_block_indexes_set
  45. qdisc_create
  46. qdisc_change
  47. check_loop
  48. check_loop_fn
  49. tc_get_qdisc
  50. tc_modify_qdisc
  51. tc_dump_qdisc_root
  52. tc_dump_qdisc
  53. tc_fill_tclass
  54. tclass_notify
  55. tclass_del_notify
  56. tcf_node_bind
  57. tc_bind_class_walker
  58. tc_bind_tclass
  59. tc_bind_tclass
  60. tc_ctl_tclass
  61. qdisc_class_dump
  62. tc_dump_tclass_qdisc
  63. tc_dump_tclass_root
  64. tc_dump_tclass
  65. psched_show
  66. psched_net_init
  67. psched_net_exit
  68. psched_net_init
  69. psched_net_exit
  70. pktsched_init

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * net/sched/sch_api.c  Packet scheduler API.
   4  *
   5  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   6  *
   7  * Fixes:
   8  *
   9  * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
  10  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
  11  * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
  12  */
  13 
  14 #include <linux/module.h>
  15 #include <linux/types.h>
  16 #include <linux/kernel.h>
  17 #include <linux/string.h>
  18 #include <linux/errno.h>
  19 #include <linux/skbuff.h>
  20 #include <linux/init.h>
  21 #include <linux/proc_fs.h>
  22 #include <linux/seq_file.h>
  23 #include <linux/kmod.h>
  24 #include <linux/list.h>
  25 #include <linux/hrtimer.h>
  26 #include <linux/slab.h>
  27 #include <linux/hashtable.h>
  28 
  29 #include <net/net_namespace.h>
  30 #include <net/sock.h>
  31 #include <net/netlink.h>
  32 #include <net/pkt_sched.h>
  33 #include <net/pkt_cls.h>
  34 
  35 /*
  36 
  37    Short review.
  38    -------------
  39 
  40    This file consists of two interrelated parts:
  41 
  42    1. queueing disciplines manager frontend.
  43    2. traffic classes manager frontend.
  44 
  45    Generally, queueing discipline ("qdisc") is a black box,
  46    which is able to enqueue packets and to dequeue them (when
  47    device is ready to send something) in order and at times
  48    determined by algorithm hidden in it.
  49 
  50    qdisc's are divided to two categories:
  51    - "queues", which have no internal structure visible from outside.
  52    - "schedulers", which split all the packets to "traffic classes",
  53      using "packet classifiers" (look at cls_api.c)
  54 
  55    In turn, classes may have child qdiscs (as rule, queues)
  56    attached to them etc. etc. etc.
  57 
  58    The goal of the routines in this file is to translate
  59    information supplied by user in the form of handles
  60    to more intelligible for kernel form, to make some sanity
  61    checks and part of work, which is common to all qdiscs
  62    and to provide rtnetlink notifications.
  63 
  64    All real intelligent work is done inside qdisc modules.
  65 
  66 
  67 
  68    Every discipline has two major routines: enqueue and dequeue.
  69 
  70    ---dequeue
  71 
  72    dequeue usually returns a skb to send. It is allowed to return NULL,
  73    but it does not mean that queue is empty, it just means that
  74    discipline does not want to send anything this time.
  75    Queue is really empty if q->q.qlen == 0.
  76    For complicated disciplines with multiple queues q->q is not
  77    real packet queue, but however q->q.qlen must be valid.
  78 
  79    ---enqueue
  80 
  81    enqueue returns 0, if packet was enqueued successfully.
  82    If packet (this one or another one) was dropped, it returns
  83    not zero error code.
  84    NET_XMIT_DROP        - this packet dropped
  85      Expected action: do not backoff, but wait until queue will clear.
  86    NET_XMIT_CN          - probably this packet enqueued, but another one dropped.
  87      Expected action: backoff or ignore
  88 
  89    Auxiliary routines:
  90 
  91    ---peek
  92 
  93    like dequeue but without removing a packet from the queue
  94 
  95    ---reset
  96 
  97    returns qdisc to initial state: purge all buffers, clear all
  98    timers, counters (except for statistics) etc.
  99 
 100    ---init
 101 
 102    initializes newly created qdisc.
 103 
 104    ---destroy
 105 
 106    destroys resources allocated by init and during lifetime of qdisc.
 107 
 108    ---change
 109 
 110    changes qdisc parameters.
 111  */
 112 
 113 /* Protects list of registered TC modules. It is pure SMP lock. */
 114 static DEFINE_RWLOCK(qdisc_mod_lock);
 115 
 116 
 117 /************************************************
 118  *      Queueing disciplines manipulation.      *
 119  ************************************************/
 120 
 121 
 122 /* The list of all installed queueing disciplines. */
 123 
 124 static struct Qdisc_ops *qdisc_base;
 125 
 126 /* Register/unregister queueing discipline */
 127 
 128 int register_qdisc(struct Qdisc_ops *qops)
 129 {
 130         struct Qdisc_ops *q, **qp;
 131         int rc = -EEXIST;
 132 
 133         write_lock(&qdisc_mod_lock);
 134         for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
 135                 if (!strcmp(qops->id, q->id))
 136                         goto out;
 137 
 138         if (qops->enqueue == NULL)
 139                 qops->enqueue = noop_qdisc_ops.enqueue;
 140         if (qops->peek == NULL) {
 141                 if (qops->dequeue == NULL)
 142                         qops->peek = noop_qdisc_ops.peek;
 143                 else
 144                         goto out_einval;
 145         }
 146         if (qops->dequeue == NULL)
 147                 qops->dequeue = noop_qdisc_ops.dequeue;
 148 
 149         if (qops->cl_ops) {
 150                 const struct Qdisc_class_ops *cops = qops->cl_ops;
 151 
 152                 if (!(cops->find && cops->walk && cops->leaf))
 153                         goto out_einval;
 154 
 155                 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
 156                         goto out_einval;
 157         }
 158 
 159         qops->next = NULL;
 160         *qp = qops;
 161         rc = 0;
 162 out:
 163         write_unlock(&qdisc_mod_lock);
 164         return rc;
 165 
 166 out_einval:
 167         rc = -EINVAL;
 168         goto out;
 169 }
 170 EXPORT_SYMBOL(register_qdisc);
 171 
 172 int unregister_qdisc(struct Qdisc_ops *qops)
 173 {
 174         struct Qdisc_ops *q, **qp;
 175         int err = -ENOENT;
 176 
 177         write_lock(&qdisc_mod_lock);
 178         for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
 179                 if (q == qops)
 180                         break;
 181         if (q) {
 182                 *qp = q->next;
 183                 q->next = NULL;
 184                 err = 0;
 185         }
 186         write_unlock(&qdisc_mod_lock);
 187         return err;
 188 }
 189 EXPORT_SYMBOL(unregister_qdisc);
 190 
 191 /* Get default qdisc if not otherwise specified */
 192 void qdisc_get_default(char *name, size_t len)
 193 {
 194         read_lock(&qdisc_mod_lock);
 195         strlcpy(name, default_qdisc_ops->id, len);
 196         read_unlock(&qdisc_mod_lock);
 197 }
 198 
 199 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
 200 {
 201         struct Qdisc_ops *q = NULL;
 202 
 203         for (q = qdisc_base; q; q = q->next) {
 204                 if (!strcmp(name, q->id)) {
 205                         if (!try_module_get(q->owner))
 206                                 q = NULL;
 207                         break;
 208                 }
 209         }
 210 
 211         return q;
 212 }
 213 
 214 /* Set new default qdisc to use */
 215 int qdisc_set_default(const char *name)
 216 {
 217         const struct Qdisc_ops *ops;
 218 
 219         if (!capable(CAP_NET_ADMIN))
 220                 return -EPERM;
 221 
 222         write_lock(&qdisc_mod_lock);
 223         ops = qdisc_lookup_default(name);
 224         if (!ops) {
 225                 /* Not found, drop lock and try to load module */
 226                 write_unlock(&qdisc_mod_lock);
 227                 request_module("sch_%s", name);
 228                 write_lock(&qdisc_mod_lock);
 229 
 230                 ops = qdisc_lookup_default(name);
 231         }
 232 
 233         if (ops) {
 234                 /* Set new default */
 235                 module_put(default_qdisc_ops->owner);
 236                 default_qdisc_ops = ops;
 237         }
 238         write_unlock(&qdisc_mod_lock);
 239 
 240         return ops ? 0 : -ENOENT;
 241 }
 242 
 243 #ifdef CONFIG_NET_SCH_DEFAULT
 244 /* Set default value from kernel config */
 245 static int __init sch_default_qdisc(void)
 246 {
 247         return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
 248 }
 249 late_initcall(sch_default_qdisc);
 250 #endif
 251 
 252 /* We know handle. Find qdisc among all qdisc's attached to device
 253  * (root qdisc, all its children, children of children etc.)
 254  * Note: caller either uses rtnl or rcu_read_lock()
 255  */
 256 
 257 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
 258 {
 259         struct Qdisc *q;
 260 
 261         if (!qdisc_dev(root))
 262                 return (root->handle == handle ? root : NULL);
 263 
 264         if (!(root->flags & TCQ_F_BUILTIN) &&
 265             root->handle == handle)
 266                 return root;
 267 
 268         hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
 269                 if (q->handle == handle)
 270                         return q;
 271         }
 272         return NULL;
 273 }
 274 
 275 void qdisc_hash_add(struct Qdisc *q, bool invisible)
 276 {
 277         if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
 278                 ASSERT_RTNL();
 279                 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
 280                 if (invisible)
 281                         q->flags |= TCQ_F_INVISIBLE;
 282         }
 283 }
 284 EXPORT_SYMBOL(qdisc_hash_add);
 285 
 286 void qdisc_hash_del(struct Qdisc *q)
 287 {
 288         if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
 289                 ASSERT_RTNL();
 290                 hash_del_rcu(&q->hash);
 291         }
 292 }
 293 EXPORT_SYMBOL(qdisc_hash_del);
 294 
 295 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
 296 {
 297         struct Qdisc *q;
 298 
 299         if (!handle)
 300                 return NULL;
 301         q = qdisc_match_from_root(dev->qdisc, handle);
 302         if (q)
 303                 goto out;
 304 
 305         if (dev_ingress_queue(dev))
 306                 q = qdisc_match_from_root(
 307                         dev_ingress_queue(dev)->qdisc_sleeping,
 308                         handle);
 309 out:
 310         return q;
 311 }
 312 
 313 struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
 314 {
 315         struct netdev_queue *nq;
 316         struct Qdisc *q;
 317 
 318         if (!handle)
 319                 return NULL;
 320         q = qdisc_match_from_root(dev->qdisc, handle);
 321         if (q)
 322                 goto out;
 323 
 324         nq = dev_ingress_queue_rcu(dev);
 325         if (nq)
 326                 q = qdisc_match_from_root(nq->qdisc_sleeping, handle);
 327 out:
 328         return q;
 329 }
 330 
 331 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
 332 {
 333         unsigned long cl;
 334         const struct Qdisc_class_ops *cops = p->ops->cl_ops;
 335 
 336         if (cops == NULL)
 337                 return NULL;
 338         cl = cops->find(p, classid);
 339 
 340         if (cl == 0)
 341                 return NULL;
 342         return cops->leaf(p, cl);
 343 }
 344 
 345 /* Find queueing discipline by name */
 346 
 347 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
 348 {
 349         struct Qdisc_ops *q = NULL;
 350 
 351         if (kind) {
 352                 read_lock(&qdisc_mod_lock);
 353                 for (q = qdisc_base; q; q = q->next) {
 354                         if (nla_strcmp(kind, q->id) == 0) {
 355                                 if (!try_module_get(q->owner))
 356                                         q = NULL;
 357                                 break;
 358                         }
 359                 }
 360                 read_unlock(&qdisc_mod_lock);
 361         }
 362         return q;
 363 }
 364 
 365 /* The linklayer setting were not transferred from iproute2, in older
 366  * versions, and the rate tables lookup systems have been dropped in
 367  * the kernel. To keep backward compatible with older iproute2 tc
 368  * utils, we detect the linklayer setting by detecting if the rate
 369  * table were modified.
 370  *
 371  * For linklayer ATM table entries, the rate table will be aligned to
 372  * 48 bytes, thus some table entries will contain the same value.  The
 373  * mpu (min packet unit) is also encoded into the old rate table, thus
 374  * starting from the mpu, we find low and high table entries for
 375  * mapping this cell.  If these entries contain the same value, when
 376  * the rate tables have been modified for linklayer ATM.
 377  *
 378  * This is done by rounding mpu to the nearest 48 bytes cell/entry,
 379  * and then roundup to the next cell, calc the table entry one below,
 380  * and compare.
 381  */
 382 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
 383 {
 384         int low       = roundup(r->mpu, 48);
 385         int high      = roundup(low+1, 48);
 386         int cell_low  = low >> r->cell_log;
 387         int cell_high = (high >> r->cell_log) - 1;
 388 
 389         /* rtab is too inaccurate at rates > 100Mbit/s */
 390         if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
 391                 pr_debug("TC linklayer: Giving up ATM detection\n");
 392                 return TC_LINKLAYER_ETHERNET;
 393         }
 394 
 395         if ((cell_high > cell_low) && (cell_high < 256)
 396             && (rtab[cell_low] == rtab[cell_high])) {
 397                 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
 398                          cell_low, cell_high, rtab[cell_high]);
 399                 return TC_LINKLAYER_ATM;
 400         }
 401         return TC_LINKLAYER_ETHERNET;
 402 }
 403 
 404 static struct qdisc_rate_table *qdisc_rtab_list;
 405 
 406 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
 407                                         struct nlattr *tab,
 408                                         struct netlink_ext_ack *extack)
 409 {
 410         struct qdisc_rate_table *rtab;
 411 
 412         if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
 413             nla_len(tab) != TC_RTAB_SIZE) {
 414                 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
 415                 return NULL;
 416         }
 417 
 418         for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
 419                 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
 420                     !memcmp(&rtab->data, nla_data(tab), 1024)) {
 421                         rtab->refcnt++;
 422                         return rtab;
 423                 }
 424         }
 425 
 426         rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
 427         if (rtab) {
 428                 rtab->rate = *r;
 429                 rtab->refcnt = 1;
 430                 memcpy(rtab->data, nla_data(tab), 1024);
 431                 if (r->linklayer == TC_LINKLAYER_UNAWARE)
 432                         r->linklayer = __detect_linklayer(r, rtab->data);
 433                 rtab->next = qdisc_rtab_list;
 434                 qdisc_rtab_list = rtab;
 435         } else {
 436                 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
 437         }
 438         return rtab;
 439 }
 440 EXPORT_SYMBOL(qdisc_get_rtab);
 441 
 442 void qdisc_put_rtab(struct qdisc_rate_table *tab)
 443 {
 444         struct qdisc_rate_table *rtab, **rtabp;
 445 
 446         if (!tab || --tab->refcnt)
 447                 return;
 448 
 449         for (rtabp = &qdisc_rtab_list;
 450              (rtab = *rtabp) != NULL;
 451              rtabp = &rtab->next) {
 452                 if (rtab == tab) {
 453                         *rtabp = rtab->next;
 454                         kfree(rtab);
 455                         return;
 456                 }
 457         }
 458 }
 459 EXPORT_SYMBOL(qdisc_put_rtab);
 460 
 461 static LIST_HEAD(qdisc_stab_list);
 462 
 463 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
 464         [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
 465         [TCA_STAB_DATA] = { .type = NLA_BINARY },
 466 };
 467 
 468 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
 469                                                struct netlink_ext_ack *extack)
 470 {
 471         struct nlattr *tb[TCA_STAB_MAX + 1];
 472         struct qdisc_size_table *stab;
 473         struct tc_sizespec *s;
 474         unsigned int tsize = 0;
 475         u16 *tab = NULL;
 476         int err;
 477 
 478         err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy,
 479                                           extack);
 480         if (err < 0)
 481                 return ERR_PTR(err);
 482         if (!tb[TCA_STAB_BASE]) {
 483                 NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
 484                 return ERR_PTR(-EINVAL);
 485         }
 486 
 487         s = nla_data(tb[TCA_STAB_BASE]);
 488 
 489         if (s->tsize > 0) {
 490                 if (!tb[TCA_STAB_DATA]) {
 491                         NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
 492                         return ERR_PTR(-EINVAL);
 493                 }
 494                 tab = nla_data(tb[TCA_STAB_DATA]);
 495                 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
 496         }
 497 
 498         if (tsize != s->tsize || (!tab && tsize > 0)) {
 499                 NL_SET_ERR_MSG(extack, "Invalid size of size table");
 500                 return ERR_PTR(-EINVAL);
 501         }
 502 
 503         list_for_each_entry(stab, &qdisc_stab_list, list) {
 504                 if (memcmp(&stab->szopts, s, sizeof(*s)))
 505                         continue;
 506                 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
 507                         continue;
 508                 stab->refcnt++;
 509                 return stab;
 510         }
 511 
 512         stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
 513         if (!stab)
 514                 return ERR_PTR(-ENOMEM);
 515 
 516         stab->refcnt = 1;
 517         stab->szopts = *s;
 518         if (tsize > 0)
 519                 memcpy(stab->data, tab, tsize * sizeof(u16));
 520 
 521         list_add_tail(&stab->list, &qdisc_stab_list);
 522 
 523         return stab;
 524 }
 525 
 526 void qdisc_put_stab(struct qdisc_size_table *tab)
 527 {
 528         if (!tab)
 529                 return;
 530 
 531         if (--tab->refcnt == 0) {
 532                 list_del(&tab->list);
 533                 kfree_rcu(tab, rcu);
 534         }
 535 }
 536 EXPORT_SYMBOL(qdisc_put_stab);
 537 
 538 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
 539 {
 540         struct nlattr *nest;
 541 
 542         nest = nla_nest_start_noflag(skb, TCA_STAB);
 543         if (nest == NULL)
 544                 goto nla_put_failure;
 545         if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
 546                 goto nla_put_failure;
 547         nla_nest_end(skb, nest);
 548 
 549         return skb->len;
 550 
 551 nla_put_failure:
 552         return -1;
 553 }
 554 
 555 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
 556                                const struct qdisc_size_table *stab)
 557 {
 558         int pkt_len, slot;
 559 
 560         pkt_len = skb->len + stab->szopts.overhead;
 561         if (unlikely(!stab->szopts.tsize))
 562                 goto out;
 563 
 564         slot = pkt_len + stab->szopts.cell_align;
 565         if (unlikely(slot < 0))
 566                 slot = 0;
 567 
 568         slot >>= stab->szopts.cell_log;
 569         if (likely(slot < stab->szopts.tsize))
 570                 pkt_len = stab->data[slot];
 571         else
 572                 pkt_len = stab->data[stab->szopts.tsize - 1] *
 573                                 (slot / stab->szopts.tsize) +
 574                                 stab->data[slot % stab->szopts.tsize];
 575 
 576         pkt_len <<= stab->szopts.size_log;
 577 out:
 578         if (unlikely(pkt_len < 1))
 579                 pkt_len = 1;
 580         qdisc_skb_cb(skb)->pkt_len = pkt_len;
 581 }
 582 EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
 583 
 584 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
 585 {
 586         if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
 587                 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
 588                         txt, qdisc->ops->id, qdisc->handle >> 16);
 589                 qdisc->flags |= TCQ_F_WARN_NONWC;
 590         }
 591 }
 592 EXPORT_SYMBOL(qdisc_warn_nonwc);
 593 
 594 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
 595 {
 596         struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
 597                                                  timer);
 598 
 599         rcu_read_lock();
 600         __netif_schedule(qdisc_root(wd->qdisc));
 601         rcu_read_unlock();
 602 
 603         return HRTIMER_NORESTART;
 604 }
 605 
 606 void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
 607                                  clockid_t clockid)
 608 {
 609         hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
 610         wd->timer.function = qdisc_watchdog;
 611         wd->qdisc = qdisc;
 612 }
 613 EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
 614 
 615 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
 616 {
 617         qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
 618 }
 619 EXPORT_SYMBOL(qdisc_watchdog_init);
 620 
 621 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
 622 {
 623         if (test_bit(__QDISC_STATE_DEACTIVATED,
 624                      &qdisc_root_sleeping(wd->qdisc)->state))
 625                 return;
 626 
 627         if (wd->last_expires == expires)
 628                 return;
 629 
 630         wd->last_expires = expires;
 631         hrtimer_start(&wd->timer,
 632                       ns_to_ktime(expires),
 633                       HRTIMER_MODE_ABS_PINNED);
 634 }
 635 EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
 636 
 637 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
 638 {
 639         hrtimer_cancel(&wd->timer);
 640 }
 641 EXPORT_SYMBOL(qdisc_watchdog_cancel);
 642 
 643 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
 644 {
 645         struct hlist_head *h;
 646         unsigned int i;
 647 
 648         h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
 649 
 650         if (h != NULL) {
 651                 for (i = 0; i < n; i++)
 652                         INIT_HLIST_HEAD(&h[i]);
 653         }
 654         return h;
 655 }
 656 
 657 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
 658 {
 659         struct Qdisc_class_common *cl;
 660         struct hlist_node *next;
 661         struct hlist_head *nhash, *ohash;
 662         unsigned int nsize, nmask, osize;
 663         unsigned int i, h;
 664 
 665         /* Rehash when load factor exceeds 0.75 */
 666         if (clhash->hashelems * 4 <= clhash->hashsize * 3)
 667                 return;
 668         nsize = clhash->hashsize * 2;
 669         nmask = nsize - 1;
 670         nhash = qdisc_class_hash_alloc(nsize);
 671         if (nhash == NULL)
 672                 return;
 673 
 674         ohash = clhash->hash;
 675         osize = clhash->hashsize;
 676 
 677         sch_tree_lock(sch);
 678         for (i = 0; i < osize; i++) {
 679                 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
 680                         h = qdisc_class_hash(cl->classid, nmask);
 681                         hlist_add_head(&cl->hnode, &nhash[h]);
 682                 }
 683         }
 684         clhash->hash     = nhash;
 685         clhash->hashsize = nsize;
 686         clhash->hashmask = nmask;
 687         sch_tree_unlock(sch);
 688 
 689         kvfree(ohash);
 690 }
 691 EXPORT_SYMBOL(qdisc_class_hash_grow);
 692 
 693 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
 694 {
 695         unsigned int size = 4;
 696 
 697         clhash->hash = qdisc_class_hash_alloc(size);
 698         if (!clhash->hash)
 699                 return -ENOMEM;
 700         clhash->hashsize  = size;
 701         clhash->hashmask  = size - 1;
 702         clhash->hashelems = 0;
 703         return 0;
 704 }
 705 EXPORT_SYMBOL(qdisc_class_hash_init);
 706 
 707 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
 708 {
 709         kvfree(clhash->hash);
 710 }
 711 EXPORT_SYMBOL(qdisc_class_hash_destroy);
 712 
 713 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
 714                              struct Qdisc_class_common *cl)
 715 {
 716         unsigned int h;
 717 
 718         INIT_HLIST_NODE(&cl->hnode);
 719         h = qdisc_class_hash(cl->classid, clhash->hashmask);
 720         hlist_add_head(&cl->hnode, &clhash->hash[h]);
 721         clhash->hashelems++;
 722 }
 723 EXPORT_SYMBOL(qdisc_class_hash_insert);
 724 
 725 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
 726                              struct Qdisc_class_common *cl)
 727 {
 728         hlist_del(&cl->hnode);
 729         clhash->hashelems--;
 730 }
 731 EXPORT_SYMBOL(qdisc_class_hash_remove);
 732 
 733 /* Allocate an unique handle from space managed by kernel
 734  * Possible range is [8000-FFFF]:0000 (0x8000 values)
 735  */
 736 static u32 qdisc_alloc_handle(struct net_device *dev)
 737 {
 738         int i = 0x8000;
 739         static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
 740 
 741         do {
 742                 autohandle += TC_H_MAKE(0x10000U, 0);
 743                 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
 744                         autohandle = TC_H_MAKE(0x80000000U, 0);
 745                 if (!qdisc_lookup(dev, autohandle))
 746                         return autohandle;
 747                 cond_resched();
 748         } while (--i > 0);
 749 
 750         return 0;
 751 }
 752 
 753 void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
 754 {
 755         bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
 756         const struct Qdisc_class_ops *cops;
 757         unsigned long cl;
 758         u32 parentid;
 759         bool notify;
 760         int drops;
 761 
 762         if (n == 0 && len == 0)
 763                 return;
 764         drops = max_t(int, n, 0);
 765         rcu_read_lock();
 766         while ((parentid = sch->parent)) {
 767                 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
 768                         break;
 769 
 770                 if (sch->flags & TCQ_F_NOPARENT)
 771                         break;
 772                 /* Notify parent qdisc only if child qdisc becomes empty.
 773                  *
 774                  * If child was empty even before update then backlog
 775                  * counter is screwed and we skip notification because
 776                  * parent class is already passive.
 777                  *
 778                  * If the original child was offloaded then it is allowed
 779                  * to be seem as empty, so the parent is notified anyway.
 780                  */
 781                 notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
 782                                                        !qdisc_is_offloaded);
 783                 /* TODO: perform the search on a per txq basis */
 784                 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
 785                 if (sch == NULL) {
 786                         WARN_ON_ONCE(parentid != TC_H_ROOT);
 787                         break;
 788                 }
 789                 cops = sch->ops->cl_ops;
 790                 if (notify && cops->qlen_notify) {
 791                         cl = cops->find(sch, parentid);
 792                         cops->qlen_notify(sch, cl);
 793                 }
 794                 sch->q.qlen -= n;
 795                 sch->qstats.backlog -= len;
 796                 __qdisc_qstats_drop(sch, drops);
 797         }
 798         rcu_read_unlock();
 799 }
 800 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
 801 
 802 int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
 803                               void *type_data)
 804 {
 805         struct net_device *dev = qdisc_dev(sch);
 806         int err;
 807 
 808         sch->flags &= ~TCQ_F_OFFLOADED;
 809         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
 810                 return 0;
 811 
 812         err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
 813         if (err == -EOPNOTSUPP)
 814                 return 0;
 815 
 816         if (!err)
 817                 sch->flags |= TCQ_F_OFFLOADED;
 818 
 819         return err;
 820 }
 821 EXPORT_SYMBOL(qdisc_offload_dump_helper);
 822 
 823 void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
 824                                 struct Qdisc *new, struct Qdisc *old,
 825                                 enum tc_setup_type type, void *type_data,
 826                                 struct netlink_ext_ack *extack)
 827 {
 828         bool any_qdisc_is_offloaded;
 829         int err;
 830 
 831         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
 832                 return;
 833 
 834         err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
 835 
 836         /* Don't report error if the graft is part of destroy operation. */
 837         if (!err || !new || new == &noop_qdisc)
 838                 return;
 839 
 840         /* Don't report error if the parent, the old child and the new
 841          * one are not offloaded.
 842          */
 843         any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
 844         any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
 845         any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
 846 
 847         if (any_qdisc_is_offloaded)
 848                 NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
 849 }
 850 EXPORT_SYMBOL(qdisc_offload_graft_helper);
 851 
 852 static void qdisc_offload_graft_root(struct net_device *dev,
 853                                      struct Qdisc *new, struct Qdisc *old,
 854                                      struct netlink_ext_ack *extack)
 855 {
 856         struct tc_root_qopt_offload graft_offload = {
 857                 .command        = TC_ROOT_GRAFT,
 858                 .handle         = new ? new->handle : 0,
 859                 .ingress        = (new && new->flags & TCQ_F_INGRESS) ||
 860                                   (old && old->flags & TCQ_F_INGRESS),
 861         };
 862 
 863         qdisc_offload_graft_helper(dev, NULL, new, old,
 864                                    TC_SETUP_ROOT_QDISC, &graft_offload, extack);
 865 }
 866 
 867 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 868                          u32 portid, u32 seq, u16 flags, int event)
 869 {
 870         struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
 871         struct gnet_stats_queue __percpu *cpu_qstats = NULL;
 872         struct tcmsg *tcm;
 873         struct nlmsghdr  *nlh;
 874         unsigned char *b = skb_tail_pointer(skb);
 875         struct gnet_dump d;
 876         struct qdisc_size_table *stab;
 877         u32 block_index;
 878         __u32 qlen;
 879 
 880         cond_resched();
 881         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
 882         if (!nlh)
 883                 goto out_nlmsg_trim;
 884         tcm = nlmsg_data(nlh);
 885         tcm->tcm_family = AF_UNSPEC;
 886         tcm->tcm__pad1 = 0;
 887         tcm->tcm__pad2 = 0;
 888         tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
 889         tcm->tcm_parent = clid;
 890         tcm->tcm_handle = q->handle;
 891         tcm->tcm_info = refcount_read(&q->refcnt);
 892         if (nla_put_string(skb, TCA_KIND, q->ops->id))
 893                 goto nla_put_failure;
 894         if (q->ops->ingress_block_get) {
 895                 block_index = q->ops->ingress_block_get(q);
 896                 if (block_index &&
 897                     nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
 898                         goto nla_put_failure;
 899         }
 900         if (q->ops->egress_block_get) {
 901                 block_index = q->ops->egress_block_get(q);
 902                 if (block_index &&
 903                     nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
 904                         goto nla_put_failure;
 905         }
 906         if (q->ops->dump && q->ops->dump(q, skb) < 0)
 907                 goto nla_put_failure;
 908         if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
 909                 goto nla_put_failure;
 910         qlen = qdisc_qlen_sum(q);
 911 
 912         stab = rtnl_dereference(q->stab);
 913         if (stab && qdisc_dump_stab(skb, stab) < 0)
 914                 goto nla_put_failure;
 915 
 916         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
 917                                          NULL, &d, TCA_PAD) < 0)
 918                 goto nla_put_failure;
 919 
 920         if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
 921                 goto nla_put_failure;
 922 
 923         if (qdisc_is_percpu_stats(q)) {
 924                 cpu_bstats = q->cpu_bstats;
 925                 cpu_qstats = q->cpu_qstats;
 926         }
 927 
 928         if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
 929                                   &d, cpu_bstats, &q->bstats) < 0 ||
 930             gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
 931             gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
 932                 goto nla_put_failure;
 933 
 934         if (gnet_stats_finish_copy(&d) < 0)
 935                 goto nla_put_failure;
 936 
 937         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 938         return skb->len;
 939 
 940 out_nlmsg_trim:
 941 nla_put_failure:
 942         nlmsg_trim(skb, b);
 943         return -1;
 944 }
 945 
 946 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
 947 {
 948         if (q->flags & TCQ_F_BUILTIN)
 949                 return true;
 950         if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
 951                 return true;
 952 
 953         return false;
 954 }
 955 
 956 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
 957                         struct nlmsghdr *n, u32 clid,
 958                         struct Qdisc *old, struct Qdisc *new)
 959 {
 960         struct sk_buff *skb;
 961         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
 962 
 963         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 964         if (!skb)
 965                 return -ENOBUFS;
 966 
 967         if (old && !tc_qdisc_dump_ignore(old, false)) {
 968                 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
 969                                   0, RTM_DELQDISC) < 0)
 970                         goto err_out;
 971         }
 972         if (new && !tc_qdisc_dump_ignore(new, false)) {
 973                 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
 974                                   old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
 975                         goto err_out;
 976         }
 977 
 978         if (skb->len)
 979                 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
 980                                       n->nlmsg_flags & NLM_F_ECHO);
 981 
 982 err_out:
 983         kfree_skb(skb);
 984         return -EINVAL;
 985 }
 986 
 987 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
 988                                struct nlmsghdr *n, u32 clid,
 989                                struct Qdisc *old, struct Qdisc *new)
 990 {
 991         if (new || old)
 992                 qdisc_notify(net, skb, n, clid, old, new);
 993 
 994         if (old)
 995                 qdisc_put(old);
 996 }
 997 
 998 static void qdisc_clear_nolock(struct Qdisc *sch)
 999 {
1000         sch->flags &= ~TCQ_F_NOLOCK;
1001         if (!(sch->flags & TCQ_F_CPUSTATS))
1002                 return;
1003 
1004         free_percpu(sch->cpu_bstats);
1005         free_percpu(sch->cpu_qstats);
1006         sch->cpu_bstats = NULL;
1007         sch->cpu_qstats = NULL;
1008         sch->flags &= ~TCQ_F_CPUSTATS;
1009 }
1010 
1011 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
1012  * to device "dev".
1013  *
1014  * When appropriate send a netlink notification using 'skb'
1015  * and "n".
1016  *
1017  * On success, destroy old qdisc.
1018  */
1019 
1020 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
1021                        struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
1022                        struct Qdisc *new, struct Qdisc *old,
1023                        struct netlink_ext_ack *extack)
1024 {
1025         struct Qdisc *q = old;
1026         struct net *net = dev_net(dev);
1027 
1028         if (parent == NULL) {
1029                 unsigned int i, num_q, ingress;
1030 
1031                 ingress = 0;
1032                 num_q = dev->num_tx_queues;
1033                 if ((q && q->flags & TCQ_F_INGRESS) ||
1034                     (new && new->flags & TCQ_F_INGRESS)) {
1035                         num_q = 1;
1036                         ingress = 1;
1037                         if (!dev_ingress_queue(dev)) {
1038                                 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
1039                                 return -ENOENT;
1040                         }
1041                 }
1042 
1043                 if (dev->flags & IFF_UP)
1044                         dev_deactivate(dev);
1045 
1046                 qdisc_offload_graft_root(dev, new, old, extack);
1047 
1048                 if (new && new->ops->attach)
1049                         goto skip;
1050 
1051                 for (i = 0; i < num_q; i++) {
1052                         struct netdev_queue *dev_queue = dev_ingress_queue(dev);
1053 
1054                         if (!ingress)
1055                                 dev_queue = netdev_get_tx_queue(dev, i);
1056 
1057                         old = dev_graft_qdisc(dev_queue, new);
1058                         if (new && i > 0)
1059                                 qdisc_refcount_inc(new);
1060 
1061                         if (!ingress)
1062                                 qdisc_put(old);
1063                 }
1064 
1065 skip:
1066                 if (!ingress) {
1067                         notify_and_destroy(net, skb, n, classid,
1068                                            dev->qdisc, new);
1069                         if (new && !new->ops->attach)
1070                                 qdisc_refcount_inc(new);
1071                         dev->qdisc = new ? : &noop_qdisc;
1072 
1073                         if (new && new->ops->attach)
1074                                 new->ops->attach(new);
1075                 } else {
1076                         notify_and_destroy(net, skb, n, classid, old, new);
1077                 }
1078 
1079                 if (dev->flags & IFF_UP)
1080                         dev_activate(dev);
1081         } else {
1082                 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1083                 unsigned long cl;
1084                 int err;
1085 
1086                 /* Only support running class lockless if parent is lockless */
1087                 if (new && (new->flags & TCQ_F_NOLOCK) &&
1088                     parent && !(parent->flags & TCQ_F_NOLOCK))
1089                         qdisc_clear_nolock(new);
1090 
1091                 if (!cops || !cops->graft)
1092                         return -EOPNOTSUPP;
1093 
1094                 cl = cops->find(parent, classid);
1095                 if (!cl) {
1096                         NL_SET_ERR_MSG(extack, "Specified class not found");
1097                         return -ENOENT;
1098                 }
1099 
1100                 err = cops->graft(parent, cl, new, &old, extack);
1101                 if (err)
1102                         return err;
1103                 notify_and_destroy(net, skb, n, classid, old, new);
1104         }
1105         return 0;
1106 }
1107 
1108 static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1109                                    struct netlink_ext_ack *extack)
1110 {
1111         u32 block_index;
1112 
1113         if (tca[TCA_INGRESS_BLOCK]) {
1114                 block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1115 
1116                 if (!block_index) {
1117                         NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1118                         return -EINVAL;
1119                 }
1120                 if (!sch->ops->ingress_block_set) {
1121                         NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1122                         return -EOPNOTSUPP;
1123                 }
1124                 sch->ops->ingress_block_set(sch, block_index);
1125         }
1126         if (tca[TCA_EGRESS_BLOCK]) {
1127                 block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1128 
1129                 if (!block_index) {
1130                         NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1131                         return -EINVAL;
1132                 }
1133                 if (!sch->ops->egress_block_set) {
1134                         NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1135                         return -EOPNOTSUPP;
1136                 }
1137                 sch->ops->egress_block_set(sch, block_index);
1138         }
1139         return 0;
1140 }
1141 
1142 /*
1143    Allocate and initialize new qdisc.
1144 
1145    Parameters are passed via opt.
1146  */
1147 
1148 static struct Qdisc *qdisc_create(struct net_device *dev,
1149                                   struct netdev_queue *dev_queue,
1150                                   struct Qdisc *p, u32 parent, u32 handle,
1151                                   struct nlattr **tca, int *errp,
1152                                   struct netlink_ext_ack *extack)
1153 {
1154         int err;
1155         struct nlattr *kind = tca[TCA_KIND];
1156         struct Qdisc *sch;
1157         struct Qdisc_ops *ops;
1158         struct qdisc_size_table *stab;
1159 
1160         ops = qdisc_lookup_ops(kind);
1161 #ifdef CONFIG_MODULES
1162         if (ops == NULL && kind != NULL) {
1163                 char name[IFNAMSIZ];
1164                 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
1165                         /* We dropped the RTNL semaphore in order to
1166                          * perform the module load.  So, even if we
1167                          * succeeded in loading the module we have to
1168                          * tell the caller to replay the request.  We
1169                          * indicate this using -EAGAIN.
1170                          * We replay the request because the device may
1171                          * go away in the mean time.
1172                          */
1173                         rtnl_unlock();
1174                         request_module("sch_%s", name);
1175                         rtnl_lock();
1176                         ops = qdisc_lookup_ops(kind);
1177                         if (ops != NULL) {
1178                                 /* We will try again qdisc_lookup_ops,
1179                                  * so don't keep a reference.
1180                                  */
1181                                 module_put(ops->owner);
1182                                 err = -EAGAIN;
1183                                 goto err_out;
1184                         }
1185                 }
1186         }
1187 #endif
1188 
1189         err = -ENOENT;
1190         if (!ops) {
1191                 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1192                 goto err_out;
1193         }
1194 
1195         sch = qdisc_alloc(dev_queue, ops, extack);
1196         if (IS_ERR(sch)) {
1197                 err = PTR_ERR(sch);
1198                 goto err_out2;
1199         }
1200 
1201         sch->parent = parent;
1202 
1203         if (handle == TC_H_INGRESS) {
1204                 sch->flags |= TCQ_F_INGRESS;
1205                 handle = TC_H_MAKE(TC_H_INGRESS, 0);
1206         } else {
1207                 if (handle == 0) {
1208                         handle = qdisc_alloc_handle(dev);
1209                         if (handle == 0) {
1210                                 NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
1211                                 err = -ENOSPC;
1212                                 goto err_out3;
1213                         }
1214                 }
1215                 if (!netif_is_multiqueue(dev))
1216                         sch->flags |= TCQ_F_ONETXQUEUE;
1217         }
1218 
1219         sch->handle = handle;
1220 
1221         /* This exist to keep backward compatible with a userspace
1222          * loophole, what allowed userspace to get IFF_NO_QUEUE
1223          * facility on older kernels by setting tx_queue_len=0 (prior
1224          * to qdisc init), and then forgot to reinit tx_queue_len
1225          * before again attaching a qdisc.
1226          */
1227         if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1228                 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1229                 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1230         }
1231 
1232         err = qdisc_block_indexes_set(sch, tca, extack);
1233         if (err)
1234                 goto err_out3;
1235 
1236         if (ops->init) {
1237                 err = ops->init(sch, tca[TCA_OPTIONS], extack);
1238                 if (err != 0)
1239                         goto err_out5;
1240         }
1241 
1242         if (tca[TCA_STAB]) {
1243                 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1244                 if (IS_ERR(stab)) {
1245                         err = PTR_ERR(stab);
1246                         goto err_out4;
1247                 }
1248                 rcu_assign_pointer(sch->stab, stab);
1249         }
1250         if (tca[TCA_RATE]) {
1251                 seqcount_t *running;
1252 
1253                 err = -EOPNOTSUPP;
1254                 if (sch->flags & TCQ_F_MQROOT) {
1255                         NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
1256                         goto err_out4;
1257                 }
1258 
1259                 if (sch->parent != TC_H_ROOT &&
1260                     !(sch->flags & TCQ_F_INGRESS) &&
1261                     (!p || !(p->flags & TCQ_F_MQROOT)))
1262                         running = qdisc_root_sleeping_running(sch);
1263                 else
1264                         running = &sch->running;
1265 
1266                 err = gen_new_estimator(&sch->bstats,
1267                                         sch->cpu_bstats,
1268                                         &sch->rate_est,
1269                                         NULL,
1270                                         running,
1271                                         tca[TCA_RATE]);
1272                 if (err) {
1273                         NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
1274                         goto err_out4;
1275                 }
1276         }
1277 
1278         qdisc_hash_add(sch, false);
1279 
1280         return sch;
1281 
1282 err_out5:
1283         /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
1284         if (ops->destroy)
1285                 ops->destroy(sch);
1286 err_out3:
1287         dev_put(dev);
1288         qdisc_free(sch);
1289 err_out2:
1290         module_put(ops->owner);
1291 err_out:
1292         *errp = err;
1293         return NULL;
1294 
1295 err_out4:
1296         /*
1297          * Any broken qdiscs that would require a ops->reset() here?
1298          * The qdisc was never in action so it shouldn't be necessary.
1299          */
1300         qdisc_put_stab(rtnl_dereference(sch->stab));
1301         if (ops->destroy)
1302                 ops->destroy(sch);
1303         goto err_out3;
1304 }
1305 
1306 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1307                         struct netlink_ext_ack *extack)
1308 {
1309         struct qdisc_size_table *ostab, *stab = NULL;
1310         int err = 0;
1311 
1312         if (tca[TCA_OPTIONS]) {
1313                 if (!sch->ops->change) {
1314                         NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
1315                         return -EINVAL;
1316                 }
1317                 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1318                         NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1319                         return -EOPNOTSUPP;
1320                 }
1321                 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
1322                 if (err)
1323                         return err;
1324         }
1325 
1326         if (tca[TCA_STAB]) {
1327                 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1328                 if (IS_ERR(stab))
1329                         return PTR_ERR(stab);
1330         }
1331 
1332         ostab = rtnl_dereference(sch->stab);
1333         rcu_assign_pointer(sch->stab, stab);
1334         qdisc_put_stab(ostab);
1335 
1336         if (tca[TCA_RATE]) {
1337                 /* NB: ignores errors from replace_estimator
1338                    because change can't be undone. */
1339                 if (sch->flags & TCQ_F_MQROOT)
1340                         goto out;
1341                 gen_replace_estimator(&sch->bstats,
1342                                       sch->cpu_bstats,
1343                                       &sch->rate_est,
1344                                       NULL,
1345                                       qdisc_root_sleeping_running(sch),
1346                                       tca[TCA_RATE]);
1347         }
1348 out:
1349         return 0;
1350 }
1351 
1352 struct check_loop_arg {
1353         struct qdisc_walker     w;
1354         struct Qdisc            *p;
1355         int                     depth;
1356 };
1357 
1358 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1359                          struct qdisc_walker *w);
1360 
1361 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1362 {
1363         struct check_loop_arg   arg;
1364 
1365         if (q->ops->cl_ops == NULL)
1366                 return 0;
1367 
1368         arg.w.stop = arg.w.skip = arg.w.count = 0;
1369         arg.w.fn = check_loop_fn;
1370         arg.depth = depth;
1371         arg.p = p;
1372         q->ops->cl_ops->walk(q, &arg.w);
1373         return arg.w.stop ? -ELOOP : 0;
1374 }
1375 
1376 static int
1377 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1378 {
1379         struct Qdisc *leaf;
1380         const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1381         struct check_loop_arg *arg = (struct check_loop_arg *)w;
1382 
1383         leaf = cops->leaf(q, cl);
1384         if (leaf) {
1385                 if (leaf == arg->p || arg->depth > 7)
1386                         return -ELOOP;
1387                 return check_loop(leaf, arg->p, arg->depth + 1);
1388         }
1389         return 0;
1390 }
1391 
1392 const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
1393         [TCA_KIND]              = { .type = NLA_STRING },
1394         [TCA_RATE]              = { .type = NLA_BINARY,
1395                                     .len = sizeof(struct tc_estimator) },
1396         [TCA_STAB]              = { .type = NLA_NESTED },
1397         [TCA_DUMP_INVISIBLE]    = { .type = NLA_FLAG },
1398         [TCA_CHAIN]             = { .type = NLA_U32 },
1399         [TCA_INGRESS_BLOCK]     = { .type = NLA_U32 },
1400         [TCA_EGRESS_BLOCK]      = { .type = NLA_U32 },
1401 };
1402 
1403 /*
1404  * Delete/get qdisc.
1405  */
1406 
1407 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1408                         struct netlink_ext_ack *extack)
1409 {
1410         struct net *net = sock_net(skb->sk);
1411         struct tcmsg *tcm = nlmsg_data(n);
1412         struct nlattr *tca[TCA_MAX + 1];
1413         struct net_device *dev;
1414         u32 clid;
1415         struct Qdisc *q = NULL;
1416         struct Qdisc *p = NULL;
1417         int err;
1418 
1419         if ((n->nlmsg_type != RTM_GETQDISC) &&
1420             !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1421                 return -EPERM;
1422 
1423         err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1424                                      rtm_tca_policy, extack);
1425         if (err < 0)
1426                 return err;
1427 
1428         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1429         if (!dev)
1430                 return -ENODEV;
1431 
1432         clid = tcm->tcm_parent;
1433         if (clid) {
1434                 if (clid != TC_H_ROOT) {
1435                         if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1436                                 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1437                                 if (!p) {
1438                                         NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
1439                                         return -ENOENT;
1440                                 }
1441                                 q = qdisc_leaf(p, clid);
1442                         } else if (dev_ingress_queue(dev)) {
1443                                 q = dev_ingress_queue(dev)->qdisc_sleeping;
1444                         }
1445                 } else {
1446                         q = dev->qdisc;
1447                 }
1448                 if (!q) {
1449                         NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
1450                         return -ENOENT;
1451                 }
1452 
1453                 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1454                         NL_SET_ERR_MSG(extack, "Invalid handle");
1455                         return -EINVAL;
1456                 }
1457         } else {
1458                 q = qdisc_lookup(dev, tcm->tcm_handle);
1459                 if (!q) {
1460                         NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
1461                         return -ENOENT;
1462                 }
1463         }
1464 
1465         if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1466                 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1467                 return -EINVAL;
1468         }
1469 
1470         if (n->nlmsg_type == RTM_DELQDISC) {
1471                 if (!clid) {
1472                         NL_SET_ERR_MSG(extack, "Classid cannot be zero");
1473                         return -EINVAL;
1474                 }
1475                 if (q->handle == 0) {
1476                         NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
1477                         return -ENOENT;
1478                 }
1479                 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
1480                 if (err != 0)
1481                         return err;
1482         } else {
1483                 qdisc_notify(net, skb, n, clid, NULL, q);
1484         }
1485         return 0;
1486 }
1487 
1488 /*
1489  * Create/change qdisc.
1490  */
1491 
1492 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1493                            struct netlink_ext_ack *extack)
1494 {
1495         struct net *net = sock_net(skb->sk);
1496         struct tcmsg *tcm;
1497         struct nlattr *tca[TCA_MAX + 1];
1498         struct net_device *dev;
1499         u32 clid;
1500         struct Qdisc *q, *p;
1501         int err;
1502 
1503         if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1504                 return -EPERM;
1505 
1506 replay:
1507         /* Reinit, just in case something touches this. */
1508         err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1509                                      rtm_tca_policy, extack);
1510         if (err < 0)
1511                 return err;
1512 
1513         tcm = nlmsg_data(n);
1514         clid = tcm->tcm_parent;
1515         q = p = NULL;
1516 
1517         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1518         if (!dev)
1519                 return -ENODEV;
1520 
1521 
1522         if (clid) {
1523                 if (clid != TC_H_ROOT) {
1524                         if (clid != TC_H_INGRESS) {
1525                                 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1526                                 if (!p) {
1527                                         NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
1528                                         return -ENOENT;
1529                                 }
1530                                 q = qdisc_leaf(p, clid);
1531                         } else if (dev_ingress_queue_create(dev)) {
1532                                 q = dev_ingress_queue(dev)->qdisc_sleeping;
1533                         }
1534                 } else {
1535                         q = dev->qdisc;
1536                 }
1537 
1538                 /* It may be default qdisc, ignore it */
1539                 if (q && q->handle == 0)
1540                         q = NULL;
1541 
1542                 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1543                         if (tcm->tcm_handle) {
1544                                 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1545                                         NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
1546                                         return -EEXIST;
1547                                 }
1548                                 if (TC_H_MIN(tcm->tcm_handle)) {
1549                                         NL_SET_ERR_MSG(extack, "Invalid minor handle");
1550                                         return -EINVAL;
1551                                 }
1552                                 q = qdisc_lookup(dev, tcm->tcm_handle);
1553                                 if (!q)
1554                                         goto create_n_graft;
1555                                 if (n->nlmsg_flags & NLM_F_EXCL) {
1556                                         NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
1557                                         return -EEXIST;
1558                                 }
1559                                 if (tca[TCA_KIND] &&
1560                                     nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1561                                         NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1562                                         return -EINVAL;
1563                                 }
1564                                 if (q == p ||
1565                                     (p && check_loop(q, p, 0))) {
1566                                         NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
1567                                         return -ELOOP;
1568                                 }
1569                                 qdisc_refcount_inc(q);
1570                                 goto graft;
1571                         } else {
1572                                 if (!q)
1573                                         goto create_n_graft;
1574 
1575                                 /* This magic test requires explanation.
1576                                  *
1577                                  *   We know, that some child q is already
1578                                  *   attached to this parent and have choice:
1579                                  *   either to change it or to create/graft new one.
1580                                  *
1581                                  *   1. We are allowed to create/graft only
1582                                  *   if CREATE and REPLACE flags are set.
1583                                  *
1584                                  *   2. If EXCL is set, requestor wanted to say,
1585                                  *   that qdisc tcm_handle is not expected
1586                                  *   to exist, so that we choose create/graft too.
1587                                  *
1588                                  *   3. The last case is when no flags are set.
1589                                  *   Alas, it is sort of hole in API, we
1590                                  *   cannot decide what to do unambiguously.
1591                                  *   For now we select create/graft, if
1592                                  *   user gave KIND, which does not match existing.
1593                                  */
1594                                 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1595                                     (n->nlmsg_flags & NLM_F_REPLACE) &&
1596                                     ((n->nlmsg_flags & NLM_F_EXCL) ||
1597                                      (tca[TCA_KIND] &&
1598                                       nla_strcmp(tca[TCA_KIND], q->ops->id))))
1599                                         goto create_n_graft;
1600                         }
1601                 }
1602         } else {
1603                 if (!tcm->tcm_handle) {
1604                         NL_SET_ERR_MSG(extack, "Handle cannot be zero");
1605                         return -EINVAL;
1606                 }
1607                 q = qdisc_lookup(dev, tcm->tcm_handle);
1608         }
1609 
1610         /* Change qdisc parameters */
1611         if (!q) {
1612                 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1613                 return -ENOENT;
1614         }
1615         if (n->nlmsg_flags & NLM_F_EXCL) {
1616                 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
1617                 return -EEXIST;
1618         }
1619         if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1620                 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1621                 return -EINVAL;
1622         }
1623         err = qdisc_change(q, tca, extack);
1624         if (err == 0)
1625                 qdisc_notify(net, skb, n, clid, NULL, q);
1626         return err;
1627 
1628 create_n_graft:
1629         if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1630                 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
1631                 return -ENOENT;
1632         }
1633         if (clid == TC_H_INGRESS) {
1634                 if (dev_ingress_queue(dev)) {
1635                         q = qdisc_create(dev, dev_ingress_queue(dev), p,
1636                                          tcm->tcm_parent, tcm->tcm_parent,
1637                                          tca, &err, extack);
1638                 } else {
1639                         NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
1640                         err = -ENOENT;
1641                 }
1642         } else {
1643                 struct netdev_queue *dev_queue;
1644 
1645                 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1646                         dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1647                 else if (p)
1648                         dev_queue = p->dev_queue;
1649                 else
1650                         dev_queue = netdev_get_tx_queue(dev, 0);
1651 
1652                 q = qdisc_create(dev, dev_queue, p,
1653                                  tcm->tcm_parent, tcm->tcm_handle,
1654                                  tca, &err, extack);
1655         }
1656         if (q == NULL) {
1657                 if (err == -EAGAIN)
1658                         goto replay;
1659                 return err;
1660         }
1661 
1662 graft:
1663         err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
1664         if (err) {
1665                 if (q)
1666                         qdisc_put(q);
1667                 return err;
1668         }
1669 
1670         return 0;
1671 }
1672 
1673 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1674                               struct netlink_callback *cb,
1675                               int *q_idx_p, int s_q_idx, bool recur,
1676                               bool dump_invisible)
1677 {
1678         int ret = 0, q_idx = *q_idx_p;
1679         struct Qdisc *q;
1680         int b;
1681 
1682         if (!root)
1683                 return 0;
1684 
1685         q = root;
1686         if (q_idx < s_q_idx) {
1687                 q_idx++;
1688         } else {
1689                 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1690                     tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1691                                   cb->nlh->nlmsg_seq, NLM_F_MULTI,
1692                                   RTM_NEWQDISC) <= 0)
1693                         goto done;
1694                 q_idx++;
1695         }
1696 
1697         /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1698          * itself has already been dumped.
1699          *
1700          * If we've already dumped the top-level (ingress) qdisc above and the global
1701          * qdisc hashtable, we don't want to hit it again
1702          */
1703         if (!qdisc_dev(root) || !recur)
1704                 goto out;
1705 
1706         hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1707                 if (q_idx < s_q_idx) {
1708                         q_idx++;
1709                         continue;
1710                 }
1711                 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1712                     tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1713                                   cb->nlh->nlmsg_seq, NLM_F_MULTI,
1714                                   RTM_NEWQDISC) <= 0)
1715                         goto done;
1716                 q_idx++;
1717         }
1718 
1719 out:
1720         *q_idx_p = q_idx;
1721         return ret;
1722 done:
1723         ret = -1;
1724         goto out;
1725 }
1726 
1727 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1728 {
1729         struct net *net = sock_net(skb->sk);
1730         int idx, q_idx;
1731         int s_idx, s_q_idx;
1732         struct net_device *dev;
1733         const struct nlmsghdr *nlh = cb->nlh;
1734         struct nlattr *tca[TCA_MAX + 1];
1735         int err;
1736 
1737         s_idx = cb->args[0];
1738         s_q_idx = q_idx = cb->args[1];
1739 
1740         idx = 0;
1741         ASSERT_RTNL();
1742 
1743         err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1744                                      rtm_tca_policy, cb->extack);
1745         if (err < 0)
1746                 return err;
1747 
1748         for_each_netdev(net, dev) {
1749                 struct netdev_queue *dev_queue;
1750 
1751                 if (idx < s_idx)
1752                         goto cont;
1753                 if (idx > s_idx)
1754                         s_q_idx = 0;
1755                 q_idx = 0;
1756 
1757                 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
1758                                        true, tca[TCA_DUMP_INVISIBLE]) < 0)
1759                         goto done;
1760 
1761                 dev_queue = dev_ingress_queue(dev);
1762                 if (dev_queue &&
1763                     tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1764                                        &q_idx, s_q_idx, false,
1765                                        tca[TCA_DUMP_INVISIBLE]) < 0)
1766                         goto done;
1767 
1768 cont:
1769                 idx++;
1770         }
1771 
1772 done:
1773         cb->args[0] = idx;
1774         cb->args[1] = q_idx;
1775 
1776         return skb->len;
1777 }
1778 
1779 
1780 
1781 /************************************************
1782  *      Traffic classes manipulation.           *
1783  ************************************************/
1784 
1785 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1786                           unsigned long cl,
1787                           u32 portid, u32 seq, u16 flags, int event)
1788 {
1789         struct tcmsg *tcm;
1790         struct nlmsghdr  *nlh;
1791         unsigned char *b = skb_tail_pointer(skb);
1792         struct gnet_dump d;
1793         const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1794 
1795         cond_resched();
1796         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1797         if (!nlh)
1798                 goto out_nlmsg_trim;
1799         tcm = nlmsg_data(nlh);
1800         tcm->tcm_family = AF_UNSPEC;
1801         tcm->tcm__pad1 = 0;
1802         tcm->tcm__pad2 = 0;
1803         tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1804         tcm->tcm_parent = q->handle;
1805         tcm->tcm_handle = q->handle;
1806         tcm->tcm_info = 0;
1807         if (nla_put_string(skb, TCA_KIND, q->ops->id))
1808                 goto nla_put_failure;
1809         if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1810                 goto nla_put_failure;
1811 
1812         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1813                                          NULL, &d, TCA_PAD) < 0)
1814                 goto nla_put_failure;
1815 
1816         if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1817                 goto nla_put_failure;
1818 
1819         if (gnet_stats_finish_copy(&d) < 0)
1820                 goto nla_put_failure;
1821 
1822         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1823         return skb->len;
1824 
1825 out_nlmsg_trim:
1826 nla_put_failure:
1827         nlmsg_trim(skb, b);
1828         return -1;
1829 }
1830 
1831 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1832                          struct nlmsghdr *n, struct Qdisc *q,
1833                          unsigned long cl, int event)
1834 {
1835         struct sk_buff *skb;
1836         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1837         int err = 0;
1838 
1839         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1840         if (!skb)
1841                 return -ENOBUFS;
1842 
1843         if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1844                 kfree_skb(skb);
1845                 return -EINVAL;
1846         }
1847 
1848         err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1849                              n->nlmsg_flags & NLM_F_ECHO);
1850         if (err > 0)
1851                 err = 0;
1852         return err;
1853 }
1854 
1855 static int tclass_del_notify(struct net *net,
1856                              const struct Qdisc_class_ops *cops,
1857                              struct sk_buff *oskb, struct nlmsghdr *n,
1858                              struct Qdisc *q, unsigned long cl)
1859 {
1860         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1861         struct sk_buff *skb;
1862         int err = 0;
1863 
1864         if (!cops->delete)
1865                 return -EOPNOTSUPP;
1866 
1867         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1868         if (!skb)
1869                 return -ENOBUFS;
1870 
1871         if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1872                            RTM_DELTCLASS) < 0) {
1873                 kfree_skb(skb);
1874                 return -EINVAL;
1875         }
1876 
1877         err = cops->delete(q, cl);
1878         if (err) {
1879                 kfree_skb(skb);
1880                 return err;
1881         }
1882 
1883         err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1884                              n->nlmsg_flags & NLM_F_ECHO);
1885         if (err > 0)
1886                 err = 0;
1887         return err;
1888 }
1889 
1890 #ifdef CONFIG_NET_CLS
1891 
1892 struct tcf_bind_args {
1893         struct tcf_walker w;
1894         unsigned long base;
1895         unsigned long cl;
1896         u32 classid;
1897 };
1898 
1899 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1900 {
1901         struct tcf_bind_args *a = (void *)arg;
1902 
1903         if (tp->ops->bind_class) {
1904                 struct Qdisc *q = tcf_block_q(tp->chain->block);
1905 
1906                 sch_tree_lock(q);
1907                 tp->ops->bind_class(n, a->classid, a->cl, q, a->base);
1908                 sch_tree_unlock(q);
1909         }
1910         return 0;
1911 }
1912 
1913 struct tc_bind_class_args {
1914         struct qdisc_walker w;
1915         unsigned long new_cl;
1916         u32 portid;
1917         u32 clid;
1918 };
1919 
1920 static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl,
1921                                 struct qdisc_walker *w)
1922 {
1923         struct tc_bind_class_args *a = (struct tc_bind_class_args *)w;
1924         const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1925         struct tcf_block *block;
1926         struct tcf_chain *chain;
1927 
1928         block = cops->tcf_block(q, cl, NULL);
1929         if (!block)
1930                 return 0;
1931         for (chain = tcf_get_next_chain(block, NULL);
1932              chain;
1933              chain = tcf_get_next_chain(block, chain)) {
1934                 struct tcf_proto *tp;
1935 
1936                 for (tp = tcf_get_next_proto(chain, NULL, true);
1937                      tp; tp = tcf_get_next_proto(chain, tp, true)) {
1938                         struct tcf_bind_args arg = {};
1939 
1940                         arg.w.fn = tcf_node_bind;
1941                         arg.classid = a->clid;
1942                         arg.base = cl;
1943                         arg.cl = a->new_cl;
1944                         tp->ops->walk(tp, &arg.w, true);
1945                 }
1946         }
1947 
1948         return 0;
1949 }
1950 
1951 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1952                            unsigned long new_cl)
1953 {
1954         const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1955         struct tc_bind_class_args args = {};
1956 
1957         if (!cops->tcf_block)
1958                 return;
1959         args.portid = portid;
1960         args.clid = clid;
1961         args.new_cl = new_cl;
1962         args.w.fn = tc_bind_class_walker;
1963         q->ops->cl_ops->walk(q, &args.w);
1964 }
1965 
1966 #else
1967 
1968 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1969                            unsigned long new_cl)
1970 {
1971 }
1972 
1973 #endif
1974 
1975 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1976                          struct netlink_ext_ack *extack)
1977 {
1978         struct net *net = sock_net(skb->sk);
1979         struct tcmsg *tcm = nlmsg_data(n);
1980         struct nlattr *tca[TCA_MAX + 1];
1981         struct net_device *dev;
1982         struct Qdisc *q = NULL;
1983         const struct Qdisc_class_ops *cops;
1984         unsigned long cl = 0;
1985         unsigned long new_cl;
1986         u32 portid;
1987         u32 clid;
1988         u32 qid;
1989         int err;
1990 
1991         if ((n->nlmsg_type != RTM_GETTCLASS) &&
1992             !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1993                 return -EPERM;
1994 
1995         err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1996                                      rtm_tca_policy, extack);
1997         if (err < 0)
1998                 return err;
1999 
2000         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2001         if (!dev)
2002                 return -ENODEV;
2003 
2004         /*
2005            parent == TC_H_UNSPEC - unspecified parent.
2006            parent == TC_H_ROOT   - class is root, which has no parent.
2007            parent == X:0         - parent is root class.
2008            parent == X:Y         - parent is a node in hierarchy.
2009            parent == 0:Y         - parent is X:Y, where X:0 is qdisc.
2010 
2011            handle == 0:0         - generate handle from kernel pool.
2012            handle == 0:Y         - class is X:Y, where X:0 is qdisc.
2013            handle == X:Y         - clear.
2014            handle == X:0         - root class.
2015          */
2016 
2017         /* Step 1. Determine qdisc handle X:0 */
2018 
2019         portid = tcm->tcm_parent;
2020         clid = tcm->tcm_handle;
2021         qid = TC_H_MAJ(clid);
2022 
2023         if (portid != TC_H_ROOT) {
2024                 u32 qid1 = TC_H_MAJ(portid);
2025 
2026                 if (qid && qid1) {
2027                         /* If both majors are known, they must be identical. */
2028                         if (qid != qid1)
2029                                 return -EINVAL;
2030                 } else if (qid1) {
2031                         qid = qid1;
2032                 } else if (qid == 0)
2033                         qid = dev->qdisc->handle;
2034 
2035                 /* Now qid is genuine qdisc handle consistent
2036                  * both with parent and child.
2037                  *
2038                  * TC_H_MAJ(portid) still may be unspecified, complete it now.
2039                  */
2040                 if (portid)
2041                         portid = TC_H_MAKE(qid, portid);
2042         } else {
2043                 if (qid == 0)
2044                         qid = dev->qdisc->handle;
2045         }
2046 
2047         /* OK. Locate qdisc */
2048         q = qdisc_lookup(dev, qid);
2049         if (!q)
2050                 return -ENOENT;
2051 
2052         /* An check that it supports classes */
2053         cops = q->ops->cl_ops;
2054         if (cops == NULL)
2055                 return -EINVAL;
2056 
2057         /* Now try to get class */
2058         if (clid == 0) {
2059                 if (portid == TC_H_ROOT)
2060                         clid = qid;
2061         } else
2062                 clid = TC_H_MAKE(qid, clid);
2063 
2064         if (clid)
2065                 cl = cops->find(q, clid);
2066 
2067         if (cl == 0) {
2068                 err = -ENOENT;
2069                 if (n->nlmsg_type != RTM_NEWTCLASS ||
2070                     !(n->nlmsg_flags & NLM_F_CREATE))
2071                         goto out;
2072         } else {
2073                 switch (n->nlmsg_type) {
2074                 case RTM_NEWTCLASS:
2075                         err = -EEXIST;
2076                         if (n->nlmsg_flags & NLM_F_EXCL)
2077                                 goto out;
2078                         break;
2079                 case RTM_DELTCLASS:
2080                         err = tclass_del_notify(net, cops, skb, n, q, cl);
2081                         /* Unbind the class with flilters with 0 */
2082                         tc_bind_tclass(q, portid, clid, 0);
2083                         goto out;
2084                 case RTM_GETTCLASS:
2085                         err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
2086                         goto out;
2087                 default:
2088                         err = -EINVAL;
2089                         goto out;
2090                 }
2091         }
2092 
2093         if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
2094                 NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
2095                 return -EOPNOTSUPP;
2096         }
2097 
2098         new_cl = cl;
2099         err = -EOPNOTSUPP;
2100         if (cops->change)
2101                 err = cops->change(q, clid, portid, tca, &new_cl, extack);
2102         if (err == 0) {
2103                 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
2104                 /* We just create a new class, need to do reverse binding. */
2105                 if (cl != new_cl)
2106                         tc_bind_tclass(q, portid, clid, new_cl);
2107         }
2108 out:
2109         return err;
2110 }
2111 
2112 struct qdisc_dump_args {
2113         struct qdisc_walker     w;
2114         struct sk_buff          *skb;
2115         struct netlink_callback *cb;
2116 };
2117 
2118 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
2119                             struct qdisc_walker *arg)
2120 {
2121         struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
2122 
2123         return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
2124                               a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2125                               RTM_NEWTCLASS);
2126 }
2127 
2128 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
2129                                 struct tcmsg *tcm, struct netlink_callback *cb,
2130                                 int *t_p, int s_t)
2131 {
2132         struct qdisc_dump_args arg;
2133 
2134         if (tc_qdisc_dump_ignore(q, false) ||
2135             *t_p < s_t || !q->ops->cl_ops ||
2136             (tcm->tcm_parent &&
2137              TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
2138                 (*t_p)++;
2139                 return 0;
2140         }
2141         if (*t_p > s_t)
2142                 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
2143         arg.w.fn = qdisc_class_dump;
2144         arg.skb = skb;
2145         arg.cb = cb;
2146         arg.w.stop  = 0;
2147         arg.w.skip = cb->args[1];
2148         arg.w.count = 0;
2149         q->ops->cl_ops->walk(q, &arg.w);
2150         cb->args[1] = arg.w.count;
2151         if (arg.w.stop)
2152                 return -1;
2153         (*t_p)++;
2154         return 0;
2155 }
2156 
2157 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
2158                                struct tcmsg *tcm, struct netlink_callback *cb,
2159                                int *t_p, int s_t)
2160 {
2161         struct Qdisc *q;
2162         int b;
2163 
2164         if (!root)
2165                 return 0;
2166 
2167         if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
2168                 return -1;
2169 
2170         if (!qdisc_dev(root))
2171                 return 0;
2172 
2173         if (tcm->tcm_parent) {
2174                 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
2175                 if (q && q != root &&
2176                     tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2177                         return -1;
2178                 return 0;
2179         }
2180         hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
2181                 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2182                         return -1;
2183         }
2184 
2185         return 0;
2186 }
2187 
2188 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2189 {
2190         struct tcmsg *tcm = nlmsg_data(cb->nlh);
2191         struct net *net = sock_net(skb->sk);
2192         struct netdev_queue *dev_queue;
2193         struct net_device *dev;
2194         int t, s_t;
2195 
2196         if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2197                 return 0;
2198         dev = dev_get_by_index(net, tcm->tcm_ifindex);
2199         if (!dev)
2200                 return 0;
2201 
2202         s_t = cb->args[0];
2203         t = 0;
2204 
2205         if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
2206                 goto done;
2207 
2208         dev_queue = dev_ingress_queue(dev);
2209         if (dev_queue &&
2210             tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
2211                                 &t, s_t) < 0)
2212                 goto done;
2213 
2214 done:
2215         cb->args[0] = t;
2216 
2217         dev_put(dev);
2218         return skb->len;
2219 }
2220 
2221 #ifdef CONFIG_PROC_FS
2222 static int psched_show(struct seq_file *seq, void *v)
2223 {
2224         seq_printf(seq, "%08x %08x %08x %08x\n",
2225                    (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
2226                    1000000,
2227                    (u32)NSEC_PER_SEC / hrtimer_resolution);
2228 
2229         return 0;
2230 }
2231 
2232 static int __net_init psched_net_init(struct net *net)
2233 {
2234         struct proc_dir_entry *e;
2235 
2236         e = proc_create_single("psched", 0, net->proc_net, psched_show);
2237         if (e == NULL)
2238                 return -ENOMEM;
2239 
2240         return 0;
2241 }
2242 
2243 static void __net_exit psched_net_exit(struct net *net)
2244 {
2245         remove_proc_entry("psched", net->proc_net);
2246 }
2247 #else
2248 static int __net_init psched_net_init(struct net *net)
2249 {
2250         return 0;
2251 }
2252 
2253 static void __net_exit psched_net_exit(struct net *net)
2254 {
2255 }
2256 #endif
2257 
2258 static struct pernet_operations psched_net_ops = {
2259         .init = psched_net_init,
2260         .exit = psched_net_exit,
2261 };
2262 
2263 static int __init pktsched_init(void)
2264 {
2265         int err;
2266 
2267         err = register_pernet_subsys(&psched_net_ops);
2268         if (err) {
2269                 pr_err("pktsched_init: "
2270                        "cannot initialize per netns operations\n");
2271                 return err;
2272         }
2273 
2274         register_qdisc(&pfifo_fast_ops);
2275         register_qdisc(&pfifo_qdisc_ops);
2276         register_qdisc(&bfifo_qdisc_ops);
2277         register_qdisc(&pfifo_head_drop_qdisc_ops);
2278         register_qdisc(&mq_qdisc_ops);
2279         register_qdisc(&noqueue_qdisc_ops);
2280 
2281         rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2282         rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
2283         rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
2284                       0);
2285         rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2286         rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
2287         rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
2288                       0);
2289 
2290         return 0;
2291 }
2292 
2293 subsys_initcall(pktsched_init);

/* [<][>][^][v][top][bottom][index][help] */