1/* 2 * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR 3 * policies) 4 */ 5 6#include "sched.h" 7 8#include <linux/slab.h> 9#include <linux/irq_work.h> 10 11int sched_rr_timeslice = RR_TIMESLICE; 12 13static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); 14 15struct rt_bandwidth def_rt_bandwidth; 16 17static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer) 18{ 19 struct rt_bandwidth *rt_b = 20 container_of(timer, struct rt_bandwidth, rt_period_timer); 21 int idle = 0; 22 int overrun; 23 24 raw_spin_lock(&rt_b->rt_runtime_lock); 25 for (;;) { 26 overrun = hrtimer_forward_now(timer, rt_b->rt_period); 27 if (!overrun) 28 break; 29 30 raw_spin_unlock(&rt_b->rt_runtime_lock); 31 idle = do_sched_rt_period_timer(rt_b, overrun); 32 raw_spin_lock(&rt_b->rt_runtime_lock); 33 } 34 if (idle) 35 rt_b->rt_period_active = 0; 36 raw_spin_unlock(&rt_b->rt_runtime_lock); 37 38 return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; 39} 40 41void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) 42{ 43 rt_b->rt_period = ns_to_ktime(period); 44 rt_b->rt_runtime = runtime; 45 46 raw_spin_lock_init(&rt_b->rt_runtime_lock); 47 48 hrtimer_init(&rt_b->rt_period_timer, 49 CLOCK_MONOTONIC, HRTIMER_MODE_REL); 50 rt_b->rt_period_timer.function = sched_rt_period_timer; 51} 52 53static void start_rt_bandwidth(struct rt_bandwidth *rt_b) 54{ 55 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) 56 return; 57 58 raw_spin_lock(&rt_b->rt_runtime_lock); 59 if (!rt_b->rt_period_active) { 60 rt_b->rt_period_active = 1; 61 hrtimer_forward_now(&rt_b->rt_period_timer, rt_b->rt_period); 62 hrtimer_start_expires(&rt_b->rt_period_timer, HRTIMER_MODE_ABS_PINNED); 63 } 64 raw_spin_unlock(&rt_b->rt_runtime_lock); 65} 66 67#if defined(CONFIG_SMP) && defined(HAVE_RT_PUSH_IPI) 68static void push_irq_work_func(struct irq_work *work); 69#endif 70 71void init_rt_rq(struct rt_rq *rt_rq) 72{ 73 struct rt_prio_array *array; 74 int i; 75 76 array = &rt_rq->active; 77 for (i = 0; i < MAX_RT_PRIO; i++) { 78 INIT_LIST_HEAD(array->queue + i); 79 __clear_bit(i, array->bitmap); 80 } 81 /* delimiter for bitsearch: */ 82 __set_bit(MAX_RT_PRIO, array->bitmap); 83 84#if defined CONFIG_SMP 85 rt_rq->highest_prio.curr = MAX_RT_PRIO; 86 rt_rq->highest_prio.next = MAX_RT_PRIO; 87 rt_rq->rt_nr_migratory = 0; 88 rt_rq->overloaded = 0; 89 plist_head_init(&rt_rq->pushable_tasks); 90 91#ifdef HAVE_RT_PUSH_IPI 92 rt_rq->push_flags = 0; 93 rt_rq->push_cpu = nr_cpu_ids; 94 raw_spin_lock_init(&rt_rq->push_lock); 95 init_irq_work(&rt_rq->push_work, push_irq_work_func); 96#endif 97#endif /* CONFIG_SMP */ 98 /* We start is dequeued state, because no RT tasks are queued */ 99 rt_rq->rt_queued = 0; 100 101 rt_rq->rt_time = 0; 102 rt_rq->rt_throttled = 0; 103 rt_rq->rt_runtime = 0; 104 raw_spin_lock_init(&rt_rq->rt_runtime_lock); 105} 106 107#ifdef CONFIG_RT_GROUP_SCHED 108static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b) 109{ 110 hrtimer_cancel(&rt_b->rt_period_timer); 111} 112 113#define rt_entity_is_task(rt_se) (!(rt_se)->my_q) 114 115static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se) 116{ 117#ifdef CONFIG_SCHED_DEBUG 118 WARN_ON_ONCE(!rt_entity_is_task(rt_se)); 119#endif 120 return container_of(rt_se, struct task_struct, rt); 121} 122 123static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) 124{ 125 return rt_rq->rq; 126} 127 128static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) 129{ 130 return rt_se->rt_rq; 131} 132 133static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se) 134{ 135 struct rt_rq *rt_rq = rt_se->rt_rq; 136 137 return rt_rq->rq; 138} 139 140void free_rt_sched_group(struct task_group *tg) 141{ 142 int i; 143 144 if (tg->rt_se) 145 destroy_rt_bandwidth(&tg->rt_bandwidth); 146 147 for_each_possible_cpu(i) { 148 if (tg->rt_rq) 149 kfree(tg->rt_rq[i]); 150 if (tg->rt_se) 151 kfree(tg->rt_se[i]); 152 } 153 154 kfree(tg->rt_rq); 155 kfree(tg->rt_se); 156} 157 158void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, 159 struct sched_rt_entity *rt_se, int cpu, 160 struct sched_rt_entity *parent) 161{ 162 struct rq *rq = cpu_rq(cpu); 163 164 rt_rq->highest_prio.curr = MAX_RT_PRIO; 165 rt_rq->rt_nr_boosted = 0; 166 rt_rq->rq = rq; 167 rt_rq->tg = tg; 168 169 tg->rt_rq[cpu] = rt_rq; 170 tg->rt_se[cpu] = rt_se; 171 172 if (!rt_se) 173 return; 174 175 if (!parent) 176 rt_se->rt_rq = &rq->rt; 177 else 178 rt_se->rt_rq = parent->my_q; 179 180 rt_se->my_q = rt_rq; 181 rt_se->parent = parent; 182 INIT_LIST_HEAD(&rt_se->run_list); 183} 184 185int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) 186{ 187 struct rt_rq *rt_rq; 188 struct sched_rt_entity *rt_se; 189 int i; 190 191 tg->rt_rq = kzalloc(sizeof(rt_rq) * nr_cpu_ids, GFP_KERNEL); 192 if (!tg->rt_rq) 193 goto err; 194 tg->rt_se = kzalloc(sizeof(rt_se) * nr_cpu_ids, GFP_KERNEL); 195 if (!tg->rt_se) 196 goto err; 197 198 init_rt_bandwidth(&tg->rt_bandwidth, 199 ktime_to_ns(def_rt_bandwidth.rt_period), 0); 200 201 for_each_possible_cpu(i) { 202 rt_rq = kzalloc_node(sizeof(struct rt_rq), 203 GFP_KERNEL, cpu_to_node(i)); 204 if (!rt_rq) 205 goto err; 206 207 rt_se = kzalloc_node(sizeof(struct sched_rt_entity), 208 GFP_KERNEL, cpu_to_node(i)); 209 if (!rt_se) 210 goto err_free_rq; 211 212 init_rt_rq(rt_rq); 213 rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime; 214 init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]); 215 } 216 217 return 1; 218 219err_free_rq: 220 kfree(rt_rq); 221err: 222 return 0; 223} 224 225#else /* CONFIG_RT_GROUP_SCHED */ 226 227#define rt_entity_is_task(rt_se) (1) 228 229static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se) 230{ 231 return container_of(rt_se, struct task_struct, rt); 232} 233 234static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) 235{ 236 return container_of(rt_rq, struct rq, rt); 237} 238 239static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se) 240{ 241 struct task_struct *p = rt_task_of(rt_se); 242 243 return task_rq(p); 244} 245 246static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) 247{ 248 struct rq *rq = rq_of_rt_se(rt_se); 249 250 return &rq->rt; 251} 252 253void free_rt_sched_group(struct task_group *tg) { } 254 255int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) 256{ 257 return 1; 258} 259#endif /* CONFIG_RT_GROUP_SCHED */ 260 261#ifdef CONFIG_SMP 262 263static void pull_rt_task(struct rq *this_rq); 264 265static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev) 266{ 267 /* Try to pull RT tasks here if we lower this rq's prio */ 268 return rq->rt.highest_prio.curr > prev->prio; 269} 270 271static inline int rt_overloaded(struct rq *rq) 272{ 273 return atomic_read(&rq->rd->rto_count); 274} 275 276static inline void rt_set_overload(struct rq *rq) 277{ 278 if (!rq->online) 279 return; 280 281 cpumask_set_cpu(rq->cpu, rq->rd->rto_mask); 282 /* 283 * Make sure the mask is visible before we set 284 * the overload count. That is checked to determine 285 * if we should look at the mask. It would be a shame 286 * if we looked at the mask, but the mask was not 287 * updated yet. 288 * 289 * Matched by the barrier in pull_rt_task(). 290 */ 291 smp_wmb(); 292 atomic_inc(&rq->rd->rto_count); 293} 294 295static inline void rt_clear_overload(struct rq *rq) 296{ 297 if (!rq->online) 298 return; 299 300 /* the order here really doesn't matter */ 301 atomic_dec(&rq->rd->rto_count); 302 cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask); 303} 304 305static void update_rt_migration(struct rt_rq *rt_rq) 306{ 307 if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) { 308 if (!rt_rq->overloaded) { 309 rt_set_overload(rq_of_rt_rq(rt_rq)); 310 rt_rq->overloaded = 1; 311 } 312 } else if (rt_rq->overloaded) { 313 rt_clear_overload(rq_of_rt_rq(rt_rq)); 314 rt_rq->overloaded = 0; 315 } 316} 317 318static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 319{ 320 struct task_struct *p; 321 322 if (!rt_entity_is_task(rt_se)) 323 return; 324 325 p = rt_task_of(rt_se); 326 rt_rq = &rq_of_rt_rq(rt_rq)->rt; 327 328 rt_rq->rt_nr_total++; 329 if (p->nr_cpus_allowed > 1) 330 rt_rq->rt_nr_migratory++; 331 332 update_rt_migration(rt_rq); 333} 334 335static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 336{ 337 struct task_struct *p; 338 339 if (!rt_entity_is_task(rt_se)) 340 return; 341 342 p = rt_task_of(rt_se); 343 rt_rq = &rq_of_rt_rq(rt_rq)->rt; 344 345 rt_rq->rt_nr_total--; 346 if (p->nr_cpus_allowed > 1) 347 rt_rq->rt_nr_migratory--; 348 349 update_rt_migration(rt_rq); 350} 351 352static inline int has_pushable_tasks(struct rq *rq) 353{ 354 return !plist_head_empty(&rq->rt.pushable_tasks); 355} 356 357static DEFINE_PER_CPU(struct callback_head, rt_push_head); 358static DEFINE_PER_CPU(struct callback_head, rt_pull_head); 359 360static void push_rt_tasks(struct rq *); 361static void pull_rt_task(struct rq *); 362 363static inline void queue_push_tasks(struct rq *rq) 364{ 365 if (!has_pushable_tasks(rq)) 366 return; 367 368 queue_balance_callback(rq, &per_cpu(rt_push_head, rq->cpu), push_rt_tasks); 369} 370 371static inline void queue_pull_task(struct rq *rq) 372{ 373 queue_balance_callback(rq, &per_cpu(rt_pull_head, rq->cpu), pull_rt_task); 374} 375 376static void enqueue_pushable_task(struct rq *rq, struct task_struct *p) 377{ 378 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); 379 plist_node_init(&p->pushable_tasks, p->prio); 380 plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks); 381 382 /* Update the highest prio pushable task */ 383 if (p->prio < rq->rt.highest_prio.next) 384 rq->rt.highest_prio.next = p->prio; 385} 386 387static void dequeue_pushable_task(struct rq *rq, struct task_struct *p) 388{ 389 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); 390 391 /* Update the new highest prio pushable task */ 392 if (has_pushable_tasks(rq)) { 393 p = plist_first_entry(&rq->rt.pushable_tasks, 394 struct task_struct, pushable_tasks); 395 rq->rt.highest_prio.next = p->prio; 396 } else 397 rq->rt.highest_prio.next = MAX_RT_PRIO; 398} 399 400#else 401 402static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p) 403{ 404} 405 406static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p) 407{ 408} 409 410static inline 411void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 412{ 413} 414 415static inline 416void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 417{ 418} 419 420static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev) 421{ 422 return false; 423} 424 425static inline void pull_rt_task(struct rq *this_rq) 426{ 427} 428 429static inline void queue_push_tasks(struct rq *rq) 430{ 431} 432#endif /* CONFIG_SMP */ 433 434static void enqueue_top_rt_rq(struct rt_rq *rt_rq); 435static void dequeue_top_rt_rq(struct rt_rq *rt_rq); 436 437static inline int on_rt_rq(struct sched_rt_entity *rt_se) 438{ 439 return !list_empty(&rt_se->run_list); 440} 441 442#ifdef CONFIG_RT_GROUP_SCHED 443 444static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) 445{ 446 if (!rt_rq->tg) 447 return RUNTIME_INF; 448 449 return rt_rq->rt_runtime; 450} 451 452static inline u64 sched_rt_period(struct rt_rq *rt_rq) 453{ 454 return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period); 455} 456 457typedef struct task_group *rt_rq_iter_t; 458 459static inline struct task_group *next_task_group(struct task_group *tg) 460{ 461 do { 462 tg = list_entry_rcu(tg->list.next, 463 typeof(struct task_group), list); 464 } while (&tg->list != &task_groups && task_group_is_autogroup(tg)); 465 466 if (&tg->list == &task_groups) 467 tg = NULL; 468 469 return tg; 470} 471 472#define for_each_rt_rq(rt_rq, iter, rq) \ 473 for (iter = container_of(&task_groups, typeof(*iter), list); \ 474 (iter = next_task_group(iter)) && \ 475 (rt_rq = iter->rt_rq[cpu_of(rq)]);) 476 477#define for_each_sched_rt_entity(rt_se) \ 478 for (; rt_se; rt_se = rt_se->parent) 479 480static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) 481{ 482 return rt_se->my_q; 483} 484 485static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head); 486static void dequeue_rt_entity(struct sched_rt_entity *rt_se); 487 488static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 489{ 490 struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; 491 struct rq *rq = rq_of_rt_rq(rt_rq); 492 struct sched_rt_entity *rt_se; 493 494 int cpu = cpu_of(rq); 495 496 rt_se = rt_rq->tg->rt_se[cpu]; 497 498 if (rt_rq->rt_nr_running) { 499 if (!rt_se) 500 enqueue_top_rt_rq(rt_rq); 501 else if (!on_rt_rq(rt_se)) 502 enqueue_rt_entity(rt_se, false); 503 504 if (rt_rq->highest_prio.curr < curr->prio) 505 resched_curr(rq); 506 } 507} 508 509static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 510{ 511 struct sched_rt_entity *rt_se; 512 int cpu = cpu_of(rq_of_rt_rq(rt_rq)); 513 514 rt_se = rt_rq->tg->rt_se[cpu]; 515 516 if (!rt_se) 517 dequeue_top_rt_rq(rt_rq); 518 else if (on_rt_rq(rt_se)) 519 dequeue_rt_entity(rt_se); 520} 521 522static inline int rt_rq_throttled(struct rt_rq *rt_rq) 523{ 524 return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted; 525} 526 527static int rt_se_boosted(struct sched_rt_entity *rt_se) 528{ 529 struct rt_rq *rt_rq = group_rt_rq(rt_se); 530 struct task_struct *p; 531 532 if (rt_rq) 533 return !!rt_rq->rt_nr_boosted; 534 535 p = rt_task_of(rt_se); 536 return p->prio != p->normal_prio; 537} 538 539#ifdef CONFIG_SMP 540static inline const struct cpumask *sched_rt_period_mask(void) 541{ 542 return this_rq()->rd->span; 543} 544#else 545static inline const struct cpumask *sched_rt_period_mask(void) 546{ 547 return cpu_online_mask; 548} 549#endif 550 551static inline 552struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) 553{ 554 return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu]; 555} 556 557static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) 558{ 559 return &rt_rq->tg->rt_bandwidth; 560} 561 562#else /* !CONFIG_RT_GROUP_SCHED */ 563 564static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) 565{ 566 return rt_rq->rt_runtime; 567} 568 569static inline u64 sched_rt_period(struct rt_rq *rt_rq) 570{ 571 return ktime_to_ns(def_rt_bandwidth.rt_period); 572} 573 574typedef struct rt_rq *rt_rq_iter_t; 575 576#define for_each_rt_rq(rt_rq, iter, rq) \ 577 for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL) 578 579#define for_each_sched_rt_entity(rt_se) \ 580 for (; rt_se; rt_se = NULL) 581 582static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) 583{ 584 return NULL; 585} 586 587static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 588{ 589 struct rq *rq = rq_of_rt_rq(rt_rq); 590 591 if (!rt_rq->rt_nr_running) 592 return; 593 594 enqueue_top_rt_rq(rt_rq); 595 resched_curr(rq); 596} 597 598static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 599{ 600 dequeue_top_rt_rq(rt_rq); 601} 602 603static inline int rt_rq_throttled(struct rt_rq *rt_rq) 604{ 605 return rt_rq->rt_throttled; 606} 607 608static inline const struct cpumask *sched_rt_period_mask(void) 609{ 610 return cpu_online_mask; 611} 612 613static inline 614struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) 615{ 616 return &cpu_rq(cpu)->rt; 617} 618 619static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) 620{ 621 return &def_rt_bandwidth; 622} 623 624#endif /* CONFIG_RT_GROUP_SCHED */ 625 626bool sched_rt_bandwidth_account(struct rt_rq *rt_rq) 627{ 628 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 629 630 return (hrtimer_active(&rt_b->rt_period_timer) || 631 rt_rq->rt_time < rt_b->rt_runtime); 632} 633 634#ifdef CONFIG_SMP 635/* 636 * We ran out of runtime, see if we can borrow some from our neighbours. 637 */ 638static void do_balance_runtime(struct rt_rq *rt_rq) 639{ 640 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 641 struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd; 642 int i, weight; 643 u64 rt_period; 644 645 weight = cpumask_weight(rd->span); 646 647 raw_spin_lock(&rt_b->rt_runtime_lock); 648 rt_period = ktime_to_ns(rt_b->rt_period); 649 for_each_cpu(i, rd->span) { 650 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); 651 s64 diff; 652 653 if (iter == rt_rq) 654 continue; 655 656 raw_spin_lock(&iter->rt_runtime_lock); 657 /* 658 * Either all rqs have inf runtime and there's nothing to steal 659 * or __disable_runtime() below sets a specific rq to inf to 660 * indicate its been disabled and disalow stealing. 661 */ 662 if (iter->rt_runtime == RUNTIME_INF) 663 goto next; 664 665 /* 666 * From runqueues with spare time, take 1/n part of their 667 * spare time, but no more than our period. 668 */ 669 diff = iter->rt_runtime - iter->rt_time; 670 if (diff > 0) { 671 diff = div_u64((u64)diff, weight); 672 if (rt_rq->rt_runtime + diff > rt_period) 673 diff = rt_period - rt_rq->rt_runtime; 674 iter->rt_runtime -= diff; 675 rt_rq->rt_runtime += diff; 676 if (rt_rq->rt_runtime == rt_period) { 677 raw_spin_unlock(&iter->rt_runtime_lock); 678 break; 679 } 680 } 681next: 682 raw_spin_unlock(&iter->rt_runtime_lock); 683 } 684 raw_spin_unlock(&rt_b->rt_runtime_lock); 685} 686 687/* 688 * Ensure this RQ takes back all the runtime it lend to its neighbours. 689 */ 690static void __disable_runtime(struct rq *rq) 691{ 692 struct root_domain *rd = rq->rd; 693 rt_rq_iter_t iter; 694 struct rt_rq *rt_rq; 695 696 if (unlikely(!scheduler_running)) 697 return; 698 699 for_each_rt_rq(rt_rq, iter, rq) { 700 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 701 s64 want; 702 int i; 703 704 raw_spin_lock(&rt_b->rt_runtime_lock); 705 raw_spin_lock(&rt_rq->rt_runtime_lock); 706 /* 707 * Either we're all inf and nobody needs to borrow, or we're 708 * already disabled and thus have nothing to do, or we have 709 * exactly the right amount of runtime to take out. 710 */ 711 if (rt_rq->rt_runtime == RUNTIME_INF || 712 rt_rq->rt_runtime == rt_b->rt_runtime) 713 goto balanced; 714 raw_spin_unlock(&rt_rq->rt_runtime_lock); 715 716 /* 717 * Calculate the difference between what we started out with 718 * and what we current have, that's the amount of runtime 719 * we lend and now have to reclaim. 720 */ 721 want = rt_b->rt_runtime - rt_rq->rt_runtime; 722 723 /* 724 * Greedy reclaim, take back as much as we can. 725 */ 726 for_each_cpu(i, rd->span) { 727 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); 728 s64 diff; 729 730 /* 731 * Can't reclaim from ourselves or disabled runqueues. 732 */ 733 if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF) 734 continue; 735 736 raw_spin_lock(&iter->rt_runtime_lock); 737 if (want > 0) { 738 diff = min_t(s64, iter->rt_runtime, want); 739 iter->rt_runtime -= diff; 740 want -= diff; 741 } else { 742 iter->rt_runtime -= want; 743 want -= want; 744 } 745 raw_spin_unlock(&iter->rt_runtime_lock); 746 747 if (!want) 748 break; 749 } 750 751 raw_spin_lock(&rt_rq->rt_runtime_lock); 752 /* 753 * We cannot be left wanting - that would mean some runtime 754 * leaked out of the system. 755 */ 756 BUG_ON(want); 757balanced: 758 /* 759 * Disable all the borrow logic by pretending we have inf 760 * runtime - in which case borrowing doesn't make sense. 761 */ 762 rt_rq->rt_runtime = RUNTIME_INF; 763 rt_rq->rt_throttled = 0; 764 raw_spin_unlock(&rt_rq->rt_runtime_lock); 765 raw_spin_unlock(&rt_b->rt_runtime_lock); 766 767 /* Make rt_rq available for pick_next_task() */ 768 sched_rt_rq_enqueue(rt_rq); 769 } 770} 771 772static void __enable_runtime(struct rq *rq) 773{ 774 rt_rq_iter_t iter; 775 struct rt_rq *rt_rq; 776 777 if (unlikely(!scheduler_running)) 778 return; 779 780 /* 781 * Reset each runqueue's bandwidth settings 782 */ 783 for_each_rt_rq(rt_rq, iter, rq) { 784 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 785 786 raw_spin_lock(&rt_b->rt_runtime_lock); 787 raw_spin_lock(&rt_rq->rt_runtime_lock); 788 rt_rq->rt_runtime = rt_b->rt_runtime; 789 rt_rq->rt_time = 0; 790 rt_rq->rt_throttled = 0; 791 raw_spin_unlock(&rt_rq->rt_runtime_lock); 792 raw_spin_unlock(&rt_b->rt_runtime_lock); 793 } 794} 795 796static void balance_runtime(struct rt_rq *rt_rq) 797{ 798 if (!sched_feat(RT_RUNTIME_SHARE)) 799 return; 800 801 if (rt_rq->rt_time > rt_rq->rt_runtime) { 802 raw_spin_unlock(&rt_rq->rt_runtime_lock); 803 do_balance_runtime(rt_rq); 804 raw_spin_lock(&rt_rq->rt_runtime_lock); 805 } 806} 807#else /* !CONFIG_SMP */ 808static inline void balance_runtime(struct rt_rq *rt_rq) {} 809#endif /* CONFIG_SMP */ 810 811static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) 812{ 813 int i, idle = 1, throttled = 0; 814 const struct cpumask *span; 815 816 span = sched_rt_period_mask(); 817#ifdef CONFIG_RT_GROUP_SCHED 818 /* 819 * FIXME: isolated CPUs should really leave the root task group, 820 * whether they are isolcpus or were isolated via cpusets, lest 821 * the timer run on a CPU which does not service all runqueues, 822 * potentially leaving other CPUs indefinitely throttled. If 823 * isolation is really required, the user will turn the throttle 824 * off to kill the perturbations it causes anyway. Meanwhile, 825 * this maintains functionality for boot and/or troubleshooting. 826 */ 827 if (rt_b == &root_task_group.rt_bandwidth) 828 span = cpu_online_mask; 829#endif 830 for_each_cpu(i, span) { 831 int enqueue = 0; 832 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); 833 struct rq *rq = rq_of_rt_rq(rt_rq); 834 835 raw_spin_lock(&rq->lock); 836 if (rt_rq->rt_time) { 837 u64 runtime; 838 839 raw_spin_lock(&rt_rq->rt_runtime_lock); 840 if (rt_rq->rt_throttled) 841 balance_runtime(rt_rq); 842 runtime = rt_rq->rt_runtime; 843 rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); 844 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { 845 rt_rq->rt_throttled = 0; 846 enqueue = 1; 847 848 /* 849 * When we're idle and a woken (rt) task is 850 * throttled check_preempt_curr() will set 851 * skip_update and the time between the wakeup 852 * and this unthrottle will get accounted as 853 * 'runtime'. 854 */ 855 if (rt_rq->rt_nr_running && rq->curr == rq->idle) 856 rq_clock_skip_update(rq, false); 857 } 858 if (rt_rq->rt_time || rt_rq->rt_nr_running) 859 idle = 0; 860 raw_spin_unlock(&rt_rq->rt_runtime_lock); 861 } else if (rt_rq->rt_nr_running) { 862 idle = 0; 863 if (!rt_rq_throttled(rt_rq)) 864 enqueue = 1; 865 } 866 if (rt_rq->rt_throttled) 867 throttled = 1; 868 869 if (enqueue) 870 sched_rt_rq_enqueue(rt_rq); 871 raw_spin_unlock(&rq->lock); 872 } 873 874 if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)) 875 return 1; 876 877 return idle; 878} 879 880static inline int rt_se_prio(struct sched_rt_entity *rt_se) 881{ 882#ifdef CONFIG_RT_GROUP_SCHED 883 struct rt_rq *rt_rq = group_rt_rq(rt_se); 884 885 if (rt_rq) 886 return rt_rq->highest_prio.curr; 887#endif 888 889 return rt_task_of(rt_se)->prio; 890} 891 892static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) 893{ 894 u64 runtime = sched_rt_runtime(rt_rq); 895 896 if (rt_rq->rt_throttled) 897 return rt_rq_throttled(rt_rq); 898 899 if (runtime >= sched_rt_period(rt_rq)) 900 return 0; 901 902 balance_runtime(rt_rq); 903 runtime = sched_rt_runtime(rt_rq); 904 if (runtime == RUNTIME_INF) 905 return 0; 906 907 if (rt_rq->rt_time > runtime) { 908 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 909 910 /* 911 * Don't actually throttle groups that have no runtime assigned 912 * but accrue some time due to boosting. 913 */ 914 if (likely(rt_b->rt_runtime)) { 915 rt_rq->rt_throttled = 1; 916 printk_deferred_once("sched: RT throttling activated\n"); 917 } else { 918 /* 919 * In case we did anyway, make it go away, 920 * replenishment is a joke, since it will replenish us 921 * with exactly 0 ns. 922 */ 923 rt_rq->rt_time = 0; 924 } 925 926 if (rt_rq_throttled(rt_rq)) { 927 sched_rt_rq_dequeue(rt_rq); 928 return 1; 929 } 930 } 931 932 return 0; 933} 934 935/* 936 * Update the current task's runtime statistics. Skip current tasks that 937 * are not in our scheduling class. 938 */ 939static void update_curr_rt(struct rq *rq) 940{ 941 struct task_struct *curr = rq->curr; 942 struct sched_rt_entity *rt_se = &curr->rt; 943 u64 delta_exec; 944 945 if (curr->sched_class != &rt_sched_class) 946 return; 947 948 delta_exec = rq_clock_task(rq) - curr->se.exec_start; 949 if (unlikely((s64)delta_exec <= 0)) 950 return; 951 952 schedstat_set(curr->se.statistics.exec_max, 953 max(curr->se.statistics.exec_max, delta_exec)); 954 955 curr->se.sum_exec_runtime += delta_exec; 956 account_group_exec_runtime(curr, delta_exec); 957 958 curr->se.exec_start = rq_clock_task(rq); 959 cpuacct_charge(curr, delta_exec); 960 961 sched_rt_avg_update(rq, delta_exec); 962 963 if (!rt_bandwidth_enabled()) 964 return; 965 966 for_each_sched_rt_entity(rt_se) { 967 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 968 969 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) { 970 raw_spin_lock(&rt_rq->rt_runtime_lock); 971 rt_rq->rt_time += delta_exec; 972 if (sched_rt_runtime_exceeded(rt_rq)) 973 resched_curr(rq); 974 raw_spin_unlock(&rt_rq->rt_runtime_lock); 975 } 976 } 977} 978 979static void 980dequeue_top_rt_rq(struct rt_rq *rt_rq) 981{ 982 struct rq *rq = rq_of_rt_rq(rt_rq); 983 984 BUG_ON(&rq->rt != rt_rq); 985 986 if (!rt_rq->rt_queued) 987 return; 988 989 BUG_ON(!rq->nr_running); 990 991 sub_nr_running(rq, rt_rq->rt_nr_running); 992 rt_rq->rt_queued = 0; 993} 994 995static void 996enqueue_top_rt_rq(struct rt_rq *rt_rq) 997{ 998 struct rq *rq = rq_of_rt_rq(rt_rq); 999 1000 BUG_ON(&rq->rt != rt_rq); 1001 1002 if (rt_rq->rt_queued) 1003 return; 1004 if (rt_rq_throttled(rt_rq) || !rt_rq->rt_nr_running) 1005 return; 1006 1007 add_nr_running(rq, rt_rq->rt_nr_running); 1008 rt_rq->rt_queued = 1; 1009} 1010 1011#if defined CONFIG_SMP 1012 1013static void 1014inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) 1015{ 1016 struct rq *rq = rq_of_rt_rq(rt_rq); 1017 1018#ifdef CONFIG_RT_GROUP_SCHED 1019 /* 1020 * Change rq's cpupri only if rt_rq is the top queue. 1021 */ 1022 if (&rq->rt != rt_rq) 1023 return; 1024#endif 1025 if (rq->online && prio < prev_prio) 1026 cpupri_set(&rq->rd->cpupri, rq->cpu, prio); 1027} 1028 1029static void 1030dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) 1031{ 1032 struct rq *rq = rq_of_rt_rq(rt_rq); 1033 1034#ifdef CONFIG_RT_GROUP_SCHED 1035 /* 1036 * Change rq's cpupri only if rt_rq is the top queue. 1037 */ 1038 if (&rq->rt != rt_rq) 1039 return; 1040#endif 1041 if (rq->online && rt_rq->highest_prio.curr != prev_prio) 1042 cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr); 1043} 1044 1045#else /* CONFIG_SMP */ 1046 1047static inline 1048void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {} 1049static inline 1050void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {} 1051 1052#endif /* CONFIG_SMP */ 1053 1054#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED 1055static void 1056inc_rt_prio(struct rt_rq *rt_rq, int prio) 1057{ 1058 int prev_prio = rt_rq->highest_prio.curr; 1059 1060 if (prio < prev_prio) 1061 rt_rq->highest_prio.curr = prio; 1062 1063 inc_rt_prio_smp(rt_rq, prio, prev_prio); 1064} 1065 1066static void 1067dec_rt_prio(struct rt_rq *rt_rq, int prio) 1068{ 1069 int prev_prio = rt_rq->highest_prio.curr; 1070 1071 if (rt_rq->rt_nr_running) { 1072 1073 WARN_ON(prio < prev_prio); 1074 1075 /* 1076 * This may have been our highest task, and therefore 1077 * we may have some recomputation to do 1078 */ 1079 if (prio == prev_prio) { 1080 struct rt_prio_array *array = &rt_rq->active; 1081 1082 rt_rq->highest_prio.curr = 1083 sched_find_first_bit(array->bitmap); 1084 } 1085 1086 } else 1087 rt_rq->highest_prio.curr = MAX_RT_PRIO; 1088 1089 dec_rt_prio_smp(rt_rq, prio, prev_prio); 1090} 1091 1092#else 1093 1094static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {} 1095static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {} 1096 1097#endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */ 1098 1099#ifdef CONFIG_RT_GROUP_SCHED 1100 1101static void 1102inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1103{ 1104 if (rt_se_boosted(rt_se)) 1105 rt_rq->rt_nr_boosted++; 1106 1107 if (rt_rq->tg) 1108 start_rt_bandwidth(&rt_rq->tg->rt_bandwidth); 1109} 1110 1111static void 1112dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1113{ 1114 if (rt_se_boosted(rt_se)) 1115 rt_rq->rt_nr_boosted--; 1116 1117 WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted); 1118} 1119 1120#else /* CONFIG_RT_GROUP_SCHED */ 1121 1122static void 1123inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1124{ 1125 start_rt_bandwidth(&def_rt_bandwidth); 1126} 1127 1128static inline 1129void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {} 1130 1131#endif /* CONFIG_RT_GROUP_SCHED */ 1132 1133static inline 1134unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se) 1135{ 1136 struct rt_rq *group_rq = group_rt_rq(rt_se); 1137 1138 if (group_rq) 1139 return group_rq->rt_nr_running; 1140 else 1141 return 1; 1142} 1143 1144static inline 1145void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1146{ 1147 int prio = rt_se_prio(rt_se); 1148 1149 WARN_ON(!rt_prio(prio)); 1150 rt_rq->rt_nr_running += rt_se_nr_running(rt_se); 1151 1152 inc_rt_prio(rt_rq, prio); 1153 inc_rt_migration(rt_se, rt_rq); 1154 inc_rt_group(rt_se, rt_rq); 1155} 1156 1157static inline 1158void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1159{ 1160 WARN_ON(!rt_prio(rt_se_prio(rt_se))); 1161 WARN_ON(!rt_rq->rt_nr_running); 1162 rt_rq->rt_nr_running -= rt_se_nr_running(rt_se); 1163 1164 dec_rt_prio(rt_rq, rt_se_prio(rt_se)); 1165 dec_rt_migration(rt_se, rt_rq); 1166 dec_rt_group(rt_se, rt_rq); 1167} 1168 1169static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head) 1170{ 1171 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 1172 struct rt_prio_array *array = &rt_rq->active; 1173 struct rt_rq *group_rq = group_rt_rq(rt_se); 1174 struct list_head *queue = array->queue + rt_se_prio(rt_se); 1175 1176 /* 1177 * Don't enqueue the group if its throttled, or when empty. 1178 * The latter is a consequence of the former when a child group 1179 * get throttled and the current group doesn't have any other 1180 * active members. 1181 */ 1182 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) 1183 return; 1184 1185 if (head) 1186 list_add(&rt_se->run_list, queue); 1187 else 1188 list_add_tail(&rt_se->run_list, queue); 1189 __set_bit(rt_se_prio(rt_se), array->bitmap); 1190 1191 inc_rt_tasks(rt_se, rt_rq); 1192} 1193 1194static void __dequeue_rt_entity(struct sched_rt_entity *rt_se) 1195{ 1196 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 1197 struct rt_prio_array *array = &rt_rq->active; 1198 1199 list_del_init(&rt_se->run_list); 1200 if (list_empty(array->queue + rt_se_prio(rt_se))) 1201 __clear_bit(rt_se_prio(rt_se), array->bitmap); 1202 1203 dec_rt_tasks(rt_se, rt_rq); 1204} 1205 1206/* 1207 * Because the prio of an upper entry depends on the lower 1208 * entries, we must remove entries top - down. 1209 */ 1210static void dequeue_rt_stack(struct sched_rt_entity *rt_se) 1211{ 1212 struct sched_rt_entity *back = NULL; 1213 1214 for_each_sched_rt_entity(rt_se) { 1215 rt_se->back = back; 1216 back = rt_se; 1217 } 1218 1219 dequeue_top_rt_rq(rt_rq_of_se(back)); 1220 1221 for (rt_se = back; rt_se; rt_se = rt_se->back) { 1222 if (on_rt_rq(rt_se)) 1223 __dequeue_rt_entity(rt_se); 1224 } 1225} 1226 1227static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head) 1228{ 1229 struct rq *rq = rq_of_rt_se(rt_se); 1230 1231 dequeue_rt_stack(rt_se); 1232 for_each_sched_rt_entity(rt_se) 1233 __enqueue_rt_entity(rt_se, head); 1234 enqueue_top_rt_rq(&rq->rt); 1235} 1236 1237static void dequeue_rt_entity(struct sched_rt_entity *rt_se) 1238{ 1239 struct rq *rq = rq_of_rt_se(rt_se); 1240 1241 dequeue_rt_stack(rt_se); 1242 1243 for_each_sched_rt_entity(rt_se) { 1244 struct rt_rq *rt_rq = group_rt_rq(rt_se); 1245 1246 if (rt_rq && rt_rq->rt_nr_running) 1247 __enqueue_rt_entity(rt_se, false); 1248 } 1249 enqueue_top_rt_rq(&rq->rt); 1250} 1251 1252/* 1253 * Adding/removing a task to/from a priority array: 1254 */ 1255static void 1256enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) 1257{ 1258 struct sched_rt_entity *rt_se = &p->rt; 1259 1260 if (flags & ENQUEUE_WAKEUP) 1261 rt_se->timeout = 0; 1262 1263 enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD); 1264 1265 if (!task_current(rq, p) && p->nr_cpus_allowed > 1) 1266 enqueue_pushable_task(rq, p); 1267} 1268 1269static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) 1270{ 1271 struct sched_rt_entity *rt_se = &p->rt; 1272 1273 update_curr_rt(rq); 1274 dequeue_rt_entity(rt_se); 1275 1276 dequeue_pushable_task(rq, p); 1277} 1278 1279/* 1280 * Put task to the head or the end of the run list without the overhead of 1281 * dequeue followed by enqueue. 1282 */ 1283static void 1284requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head) 1285{ 1286 if (on_rt_rq(rt_se)) { 1287 struct rt_prio_array *array = &rt_rq->active; 1288 struct list_head *queue = array->queue + rt_se_prio(rt_se); 1289 1290 if (head) 1291 list_move(&rt_se->run_list, queue); 1292 else 1293 list_move_tail(&rt_se->run_list, queue); 1294 } 1295} 1296 1297static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head) 1298{ 1299 struct sched_rt_entity *rt_se = &p->rt; 1300 struct rt_rq *rt_rq; 1301 1302 for_each_sched_rt_entity(rt_se) { 1303 rt_rq = rt_rq_of_se(rt_se); 1304 requeue_rt_entity(rt_rq, rt_se, head); 1305 } 1306} 1307 1308static void yield_task_rt(struct rq *rq) 1309{ 1310 requeue_task_rt(rq, rq->curr, 0); 1311} 1312 1313#ifdef CONFIG_SMP 1314static int find_lowest_rq(struct task_struct *task); 1315 1316static int 1317select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) 1318{ 1319 struct task_struct *curr; 1320 struct rq *rq; 1321 1322 /* For anything but wake ups, just return the task_cpu */ 1323 if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK) 1324 goto out; 1325 1326 rq = cpu_rq(cpu); 1327 1328 rcu_read_lock(); 1329 curr = READ_ONCE(rq->curr); /* unlocked access */ 1330 1331 /* 1332 * If the current task on @p's runqueue is an RT task, then 1333 * try to see if we can wake this RT task up on another 1334 * runqueue. Otherwise simply start this RT task 1335 * on its current runqueue. 1336 * 1337 * We want to avoid overloading runqueues. If the woken 1338 * task is a higher priority, then it will stay on this CPU 1339 * and the lower prio task should be moved to another CPU. 1340 * Even though this will probably make the lower prio task 1341 * lose its cache, we do not want to bounce a higher task 1342 * around just because it gave up its CPU, perhaps for a 1343 * lock? 1344 * 1345 * For equal prio tasks, we just let the scheduler sort it out. 1346 * 1347 * Otherwise, just let it ride on the affined RQ and the 1348 * post-schedule router will push the preempted task away 1349 * 1350 * This test is optimistic, if we get it wrong the load-balancer 1351 * will have to sort it out. 1352 */ 1353 if (curr && unlikely(rt_task(curr)) && 1354 (curr->nr_cpus_allowed < 2 || 1355 curr->prio <= p->prio)) { 1356 int target = find_lowest_rq(p); 1357 1358 /* 1359 * Don't bother moving it if the destination CPU is 1360 * not running a lower priority task. 1361 */ 1362 if (target != -1 && 1363 p->prio < cpu_rq(target)->rt.highest_prio.curr) 1364 cpu = target; 1365 } 1366 rcu_read_unlock(); 1367 1368out: 1369 return cpu; 1370} 1371 1372static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) 1373{ 1374 /* 1375 * Current can't be migrated, useless to reschedule, 1376 * let's hope p can move out. 1377 */ 1378 if (rq->curr->nr_cpus_allowed == 1 || 1379 !cpupri_find(&rq->rd->cpupri, rq->curr, NULL)) 1380 return; 1381 1382 /* 1383 * p is migratable, so let's not schedule it and 1384 * see if it is pushed or pulled somewhere else. 1385 */ 1386 if (p->nr_cpus_allowed != 1 1387 && cpupri_find(&rq->rd->cpupri, p, NULL)) 1388 return; 1389 1390 /* 1391 * There appears to be other cpus that can accept 1392 * current and none to run 'p', so lets reschedule 1393 * to try and push current away: 1394 */ 1395 requeue_task_rt(rq, p, 1); 1396 resched_curr(rq); 1397} 1398 1399#endif /* CONFIG_SMP */ 1400 1401/* 1402 * Preempt the current task with a newly woken task if needed: 1403 */ 1404static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags) 1405{ 1406 if (p->prio < rq->curr->prio) { 1407 resched_curr(rq); 1408 return; 1409 } 1410 1411#ifdef CONFIG_SMP 1412 /* 1413 * If: 1414 * 1415 * - the newly woken task is of equal priority to the current task 1416 * - the newly woken task is non-migratable while current is migratable 1417 * - current will be preempted on the next reschedule 1418 * 1419 * we should check to see if current can readily move to a different 1420 * cpu. If so, we will reschedule to allow the push logic to try 1421 * to move current somewhere else, making room for our non-migratable 1422 * task. 1423 */ 1424 if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr)) 1425 check_preempt_equal_prio(rq, p); 1426#endif 1427} 1428 1429static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, 1430 struct rt_rq *rt_rq) 1431{ 1432 struct rt_prio_array *array = &rt_rq->active; 1433 struct sched_rt_entity *next = NULL; 1434 struct list_head *queue; 1435 int idx; 1436 1437 idx = sched_find_first_bit(array->bitmap); 1438 BUG_ON(idx >= MAX_RT_PRIO); 1439 1440 queue = array->queue + idx; 1441 next = list_entry(queue->next, struct sched_rt_entity, run_list); 1442 1443 return next; 1444} 1445 1446static struct task_struct *_pick_next_task_rt(struct rq *rq) 1447{ 1448 struct sched_rt_entity *rt_se; 1449 struct task_struct *p; 1450 struct rt_rq *rt_rq = &rq->rt; 1451 1452 do { 1453 rt_se = pick_next_rt_entity(rq, rt_rq); 1454 BUG_ON(!rt_se); 1455 rt_rq = group_rt_rq(rt_se); 1456 } while (rt_rq); 1457 1458 p = rt_task_of(rt_se); 1459 p->se.exec_start = rq_clock_task(rq); 1460 1461 return p; 1462} 1463 1464static struct task_struct * 1465pick_next_task_rt(struct rq *rq, struct task_struct *prev) 1466{ 1467 struct task_struct *p; 1468 struct rt_rq *rt_rq = &rq->rt; 1469 1470 if (need_pull_rt_task(rq, prev)) { 1471 /* 1472 * This is OK, because current is on_cpu, which avoids it being 1473 * picked for load-balance and preemption/IRQs are still 1474 * disabled avoiding further scheduler activity on it and we're 1475 * being very careful to re-start the picking loop. 1476 */ 1477 lockdep_unpin_lock(&rq->lock); 1478 pull_rt_task(rq); 1479 lockdep_pin_lock(&rq->lock); 1480 /* 1481 * pull_rt_task() can drop (and re-acquire) rq->lock; this 1482 * means a dl or stop task can slip in, in which case we need 1483 * to re-start task selection. 1484 */ 1485 if (unlikely((rq->stop && task_on_rq_queued(rq->stop)) || 1486 rq->dl.dl_nr_running)) 1487 return RETRY_TASK; 1488 } 1489 1490 /* 1491 * We may dequeue prev's rt_rq in put_prev_task(). 1492 * So, we update time before rt_nr_running check. 1493 */ 1494 if (prev->sched_class == &rt_sched_class) 1495 update_curr_rt(rq); 1496 1497 if (!rt_rq->rt_queued) 1498 return NULL; 1499 1500 put_prev_task(rq, prev); 1501 1502 p = _pick_next_task_rt(rq); 1503 1504 /* The running task is never eligible for pushing */ 1505 dequeue_pushable_task(rq, p); 1506 1507 queue_push_tasks(rq); 1508 1509 return p; 1510} 1511 1512static void put_prev_task_rt(struct rq *rq, struct task_struct *p) 1513{ 1514 update_curr_rt(rq); 1515 1516 /* 1517 * The previous task needs to be made eligible for pushing 1518 * if it is still active 1519 */ 1520 if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1) 1521 enqueue_pushable_task(rq, p); 1522} 1523 1524#ifdef CONFIG_SMP 1525 1526/* Only try algorithms three times */ 1527#define RT_MAX_TRIES 3 1528 1529static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) 1530{ 1531 if (!task_running(rq, p) && 1532 cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) 1533 return 1; 1534 return 0; 1535} 1536 1537/* 1538 * Return the highest pushable rq's task, which is suitable to be executed 1539 * on the cpu, NULL otherwise 1540 */ 1541static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu) 1542{ 1543 struct plist_head *head = &rq->rt.pushable_tasks; 1544 struct task_struct *p; 1545 1546 if (!has_pushable_tasks(rq)) 1547 return NULL; 1548 1549 plist_for_each_entry(p, head, pushable_tasks) { 1550 if (pick_rt_task(rq, p, cpu)) 1551 return p; 1552 } 1553 1554 return NULL; 1555} 1556 1557static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); 1558 1559static int find_lowest_rq(struct task_struct *task) 1560{ 1561 struct sched_domain *sd; 1562 struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask); 1563 int this_cpu = smp_processor_id(); 1564 int cpu = task_cpu(task); 1565 1566 /* Make sure the mask is initialized first */ 1567 if (unlikely(!lowest_mask)) 1568 return -1; 1569 1570 if (task->nr_cpus_allowed == 1) 1571 return -1; /* No other targets possible */ 1572 1573 if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) 1574 return -1; /* No targets found */ 1575 1576 /* 1577 * At this point we have built a mask of cpus representing the 1578 * lowest priority tasks in the system. Now we want to elect 1579 * the best one based on our affinity and topology. 1580 * 1581 * We prioritize the last cpu that the task executed on since 1582 * it is most likely cache-hot in that location. 1583 */ 1584 if (cpumask_test_cpu(cpu, lowest_mask)) 1585 return cpu; 1586 1587 /* 1588 * Otherwise, we consult the sched_domains span maps to figure 1589 * out which cpu is logically closest to our hot cache data. 1590 */ 1591 if (!cpumask_test_cpu(this_cpu, lowest_mask)) 1592 this_cpu = -1; /* Skip this_cpu opt if not among lowest */ 1593 1594 rcu_read_lock(); 1595 for_each_domain(cpu, sd) { 1596 if (sd->flags & SD_WAKE_AFFINE) { 1597 int best_cpu; 1598 1599 /* 1600 * "this_cpu" is cheaper to preempt than a 1601 * remote processor. 1602 */ 1603 if (this_cpu != -1 && 1604 cpumask_test_cpu(this_cpu, sched_domain_span(sd))) { 1605 rcu_read_unlock(); 1606 return this_cpu; 1607 } 1608 1609 best_cpu = cpumask_first_and(lowest_mask, 1610 sched_domain_span(sd)); 1611 if (best_cpu < nr_cpu_ids) { 1612 rcu_read_unlock(); 1613 return best_cpu; 1614 } 1615 } 1616 } 1617 rcu_read_unlock(); 1618 1619 /* 1620 * And finally, if there were no matches within the domains 1621 * just give the caller *something* to work with from the compatible 1622 * locations. 1623 */ 1624 if (this_cpu != -1) 1625 return this_cpu; 1626 1627 cpu = cpumask_any(lowest_mask); 1628 if (cpu < nr_cpu_ids) 1629 return cpu; 1630 return -1; 1631} 1632 1633/* Will lock the rq it finds */ 1634static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) 1635{ 1636 struct rq *lowest_rq = NULL; 1637 int tries; 1638 int cpu; 1639 1640 for (tries = 0; tries < RT_MAX_TRIES; tries++) { 1641 cpu = find_lowest_rq(task); 1642 1643 if ((cpu == -1) || (cpu == rq->cpu)) 1644 break; 1645 1646 lowest_rq = cpu_rq(cpu); 1647 1648 if (lowest_rq->rt.highest_prio.curr <= task->prio) { 1649 /* 1650 * Target rq has tasks of equal or higher priority, 1651 * retrying does not release any lock and is unlikely 1652 * to yield a different result. 1653 */ 1654 lowest_rq = NULL; 1655 break; 1656 } 1657 1658 /* if the prio of this runqueue changed, try again */ 1659 if (double_lock_balance(rq, lowest_rq)) { 1660 /* 1661 * We had to unlock the run queue. In 1662 * the mean time, task could have 1663 * migrated already or had its affinity changed. 1664 * Also make sure that it wasn't scheduled on its rq. 1665 */ 1666 if (unlikely(task_rq(task) != rq || 1667 !cpumask_test_cpu(lowest_rq->cpu, 1668 tsk_cpus_allowed(task)) || 1669 task_running(rq, task) || 1670 !task_on_rq_queued(task))) { 1671 1672 double_unlock_balance(rq, lowest_rq); 1673 lowest_rq = NULL; 1674 break; 1675 } 1676 } 1677 1678 /* If this rq is still suitable use it. */ 1679 if (lowest_rq->rt.highest_prio.curr > task->prio) 1680 break; 1681 1682 /* try again */ 1683 double_unlock_balance(rq, lowest_rq); 1684 lowest_rq = NULL; 1685 } 1686 1687 return lowest_rq; 1688} 1689 1690static struct task_struct *pick_next_pushable_task(struct rq *rq) 1691{ 1692 struct task_struct *p; 1693 1694 if (!has_pushable_tasks(rq)) 1695 return NULL; 1696 1697 p = plist_first_entry(&rq->rt.pushable_tasks, 1698 struct task_struct, pushable_tasks); 1699 1700 BUG_ON(rq->cpu != task_cpu(p)); 1701 BUG_ON(task_current(rq, p)); 1702 BUG_ON(p->nr_cpus_allowed <= 1); 1703 1704 BUG_ON(!task_on_rq_queued(p)); 1705 BUG_ON(!rt_task(p)); 1706 1707 return p; 1708} 1709 1710/* 1711 * If the current CPU has more than one RT task, see if the non 1712 * running task can migrate over to a CPU that is running a task 1713 * of lesser priority. 1714 */ 1715static int push_rt_task(struct rq *rq) 1716{ 1717 struct task_struct *next_task; 1718 struct rq *lowest_rq; 1719 int ret = 0; 1720 1721 if (!rq->rt.overloaded) 1722 return 0; 1723 1724 next_task = pick_next_pushable_task(rq); 1725 if (!next_task) 1726 return 0; 1727 1728retry: 1729 if (unlikely(next_task == rq->curr)) { 1730 WARN_ON(1); 1731 return 0; 1732 } 1733 1734 /* 1735 * It's possible that the next_task slipped in of 1736 * higher priority than current. If that's the case 1737 * just reschedule current. 1738 */ 1739 if (unlikely(next_task->prio < rq->curr->prio)) { 1740 resched_curr(rq); 1741 return 0; 1742 } 1743 1744 /* We might release rq lock */ 1745 get_task_struct(next_task); 1746 1747 /* find_lock_lowest_rq locks the rq if found */ 1748 lowest_rq = find_lock_lowest_rq(next_task, rq); 1749 if (!lowest_rq) { 1750 struct task_struct *task; 1751 /* 1752 * find_lock_lowest_rq releases rq->lock 1753 * so it is possible that next_task has migrated. 1754 * 1755 * We need to make sure that the task is still on the same 1756 * run-queue and is also still the next task eligible for 1757 * pushing. 1758 */ 1759 task = pick_next_pushable_task(rq); 1760 if (task_cpu(next_task) == rq->cpu && task == next_task) { 1761 /* 1762 * The task hasn't migrated, and is still the next 1763 * eligible task, but we failed to find a run-queue 1764 * to push it to. Do not retry in this case, since 1765 * other cpus will pull from us when ready. 1766 */ 1767 goto out; 1768 } 1769 1770 if (!task) 1771 /* No more tasks, just exit */ 1772 goto out; 1773 1774 /* 1775 * Something has shifted, try again. 1776 */ 1777 put_task_struct(next_task); 1778 next_task = task; 1779 goto retry; 1780 } 1781 1782 deactivate_task(rq, next_task, 0); 1783 set_task_cpu(next_task, lowest_rq->cpu); 1784 activate_task(lowest_rq, next_task, 0); 1785 ret = 1; 1786 1787 resched_curr(lowest_rq); 1788 1789 double_unlock_balance(rq, lowest_rq); 1790 1791out: 1792 put_task_struct(next_task); 1793 1794 return ret; 1795} 1796 1797static void push_rt_tasks(struct rq *rq) 1798{ 1799 /* push_rt_task will return true if it moved an RT */ 1800 while (push_rt_task(rq)) 1801 ; 1802} 1803 1804#ifdef HAVE_RT_PUSH_IPI 1805/* 1806 * The search for the next cpu always starts at rq->cpu and ends 1807 * when we reach rq->cpu again. It will never return rq->cpu. 1808 * This returns the next cpu to check, or nr_cpu_ids if the loop 1809 * is complete. 1810 * 1811 * rq->rt.push_cpu holds the last cpu returned by this function, 1812 * or if this is the first instance, it must hold rq->cpu. 1813 */ 1814static int rto_next_cpu(struct rq *rq) 1815{ 1816 int prev_cpu = rq->rt.push_cpu; 1817 int cpu; 1818 1819 cpu = cpumask_next(prev_cpu, rq->rd->rto_mask); 1820 1821 /* 1822 * If the previous cpu is less than the rq's CPU, then it already 1823 * passed the end of the mask, and has started from the beginning. 1824 * We end if the next CPU is greater or equal to rq's CPU. 1825 */ 1826 if (prev_cpu < rq->cpu) { 1827 if (cpu >= rq->cpu) 1828 return nr_cpu_ids; 1829 1830 } else if (cpu >= nr_cpu_ids) { 1831 /* 1832 * We passed the end of the mask, start at the beginning. 1833 * If the result is greater or equal to the rq's CPU, then 1834 * the loop is finished. 1835 */ 1836 cpu = cpumask_first(rq->rd->rto_mask); 1837 if (cpu >= rq->cpu) 1838 return nr_cpu_ids; 1839 } 1840 rq->rt.push_cpu = cpu; 1841 1842 /* Return cpu to let the caller know if the loop is finished or not */ 1843 return cpu; 1844} 1845 1846static int find_next_push_cpu(struct rq *rq) 1847{ 1848 struct rq *next_rq; 1849 int cpu; 1850 1851 while (1) { 1852 cpu = rto_next_cpu(rq); 1853 if (cpu >= nr_cpu_ids) 1854 break; 1855 next_rq = cpu_rq(cpu); 1856 1857 /* Make sure the next rq can push to this rq */ 1858 if (next_rq->rt.highest_prio.next < rq->rt.highest_prio.curr) 1859 break; 1860 } 1861 1862 return cpu; 1863} 1864 1865#define RT_PUSH_IPI_EXECUTING 1 1866#define RT_PUSH_IPI_RESTART 2 1867 1868static void tell_cpu_to_push(struct rq *rq) 1869{ 1870 int cpu; 1871 1872 if (rq->rt.push_flags & RT_PUSH_IPI_EXECUTING) { 1873 raw_spin_lock(&rq->rt.push_lock); 1874 /* Make sure it's still executing */ 1875 if (rq->rt.push_flags & RT_PUSH_IPI_EXECUTING) { 1876 /* 1877 * Tell the IPI to restart the loop as things have 1878 * changed since it started. 1879 */ 1880 rq->rt.push_flags |= RT_PUSH_IPI_RESTART; 1881 raw_spin_unlock(&rq->rt.push_lock); 1882 return; 1883 } 1884 raw_spin_unlock(&rq->rt.push_lock); 1885 } 1886 1887 /* When here, there's no IPI going around */ 1888 1889 rq->rt.push_cpu = rq->cpu; 1890 cpu = find_next_push_cpu(rq); 1891 if (cpu >= nr_cpu_ids) 1892 return; 1893 1894 rq->rt.push_flags = RT_PUSH_IPI_EXECUTING; 1895 1896 irq_work_queue_on(&rq->rt.push_work, cpu); 1897} 1898 1899/* Called from hardirq context */ 1900static void try_to_push_tasks(void *arg) 1901{ 1902 struct rt_rq *rt_rq = arg; 1903 struct rq *rq, *src_rq; 1904 int this_cpu; 1905 int cpu; 1906 1907 this_cpu = rt_rq->push_cpu; 1908 1909 /* Paranoid check */ 1910 BUG_ON(this_cpu != smp_processor_id()); 1911 1912 rq = cpu_rq(this_cpu); 1913 src_rq = rq_of_rt_rq(rt_rq); 1914 1915again: 1916 if (has_pushable_tasks(rq)) { 1917 raw_spin_lock(&rq->lock); 1918 push_rt_task(rq); 1919 raw_spin_unlock(&rq->lock); 1920 } 1921 1922 /* Pass the IPI to the next rt overloaded queue */ 1923 raw_spin_lock(&rt_rq->push_lock); 1924 /* 1925 * If the source queue changed since the IPI went out, 1926 * we need to restart the search from that CPU again. 1927 */ 1928 if (rt_rq->push_flags & RT_PUSH_IPI_RESTART) { 1929 rt_rq->push_flags &= ~RT_PUSH_IPI_RESTART; 1930 rt_rq->push_cpu = src_rq->cpu; 1931 } 1932 1933 cpu = find_next_push_cpu(src_rq); 1934 1935 if (cpu >= nr_cpu_ids) 1936 rt_rq->push_flags &= ~RT_PUSH_IPI_EXECUTING; 1937 raw_spin_unlock(&rt_rq->push_lock); 1938 1939 if (cpu >= nr_cpu_ids) 1940 return; 1941 1942 /* 1943 * It is possible that a restart caused this CPU to be 1944 * chosen again. Don't bother with an IPI, just see if we 1945 * have more to push. 1946 */ 1947 if (unlikely(cpu == rq->cpu)) 1948 goto again; 1949 1950 /* Try the next RT overloaded CPU */ 1951 irq_work_queue_on(&rt_rq->push_work, cpu); 1952} 1953 1954static void push_irq_work_func(struct irq_work *work) 1955{ 1956 struct rt_rq *rt_rq = container_of(work, struct rt_rq, push_work); 1957 1958 try_to_push_tasks(rt_rq); 1959} 1960#endif /* HAVE_RT_PUSH_IPI */ 1961 1962static void pull_rt_task(struct rq *this_rq) 1963{ 1964 int this_cpu = this_rq->cpu, cpu; 1965 bool resched = false; 1966 struct task_struct *p; 1967 struct rq *src_rq; 1968 1969 if (likely(!rt_overloaded(this_rq))) 1970 return; 1971 1972 /* 1973 * Match the barrier from rt_set_overloaded; this guarantees that if we 1974 * see overloaded we must also see the rto_mask bit. 1975 */ 1976 smp_rmb(); 1977 1978#ifdef HAVE_RT_PUSH_IPI 1979 if (sched_feat(RT_PUSH_IPI)) { 1980 tell_cpu_to_push(this_rq); 1981 return; 1982 } 1983#endif 1984 1985 for_each_cpu(cpu, this_rq->rd->rto_mask) { 1986 if (this_cpu == cpu) 1987 continue; 1988 1989 src_rq = cpu_rq(cpu); 1990 1991 /* 1992 * Don't bother taking the src_rq->lock if the next highest 1993 * task is known to be lower-priority than our current task. 1994 * This may look racy, but if this value is about to go 1995 * logically higher, the src_rq will push this task away. 1996 * And if its going logically lower, we do not care 1997 */ 1998 if (src_rq->rt.highest_prio.next >= 1999 this_rq->rt.highest_prio.curr) 2000 continue; 2001 2002 /* 2003 * We can potentially drop this_rq's lock in 2004 * double_lock_balance, and another CPU could 2005 * alter this_rq 2006 */ 2007 double_lock_balance(this_rq, src_rq); 2008 2009 /* 2010 * We can pull only a task, which is pushable 2011 * on its rq, and no others. 2012 */ 2013 p = pick_highest_pushable_task(src_rq, this_cpu); 2014 2015 /* 2016 * Do we have an RT task that preempts 2017 * the to-be-scheduled task? 2018 */ 2019 if (p && (p->prio < this_rq->rt.highest_prio.curr)) { 2020 WARN_ON(p == src_rq->curr); 2021 WARN_ON(!task_on_rq_queued(p)); 2022 2023 /* 2024 * There's a chance that p is higher in priority 2025 * than what's currently running on its cpu. 2026 * This is just that p is wakeing up and hasn't 2027 * had a chance to schedule. We only pull 2028 * p if it is lower in priority than the 2029 * current task on the run queue 2030 */ 2031 if (p->prio < src_rq->curr->prio) 2032 goto skip; 2033 2034 resched = true; 2035 2036 deactivate_task(src_rq, p, 0); 2037 set_task_cpu(p, this_cpu); 2038 activate_task(this_rq, p, 0); 2039 /* 2040 * We continue with the search, just in 2041 * case there's an even higher prio task 2042 * in another runqueue. (low likelihood 2043 * but possible) 2044 */ 2045 } 2046skip: 2047 double_unlock_balance(this_rq, src_rq); 2048 } 2049 2050 if (resched) 2051 resched_curr(this_rq); 2052} 2053 2054/* 2055 * If we are not running and we are not going to reschedule soon, we should 2056 * try to push tasks away now 2057 */ 2058static void task_woken_rt(struct rq *rq, struct task_struct *p) 2059{ 2060 if (!task_running(rq, p) && 2061 !test_tsk_need_resched(rq->curr) && 2062 p->nr_cpus_allowed > 1 && 2063 (dl_task(rq->curr) || rt_task(rq->curr)) && 2064 (rq->curr->nr_cpus_allowed < 2 || 2065 rq->curr->prio <= p->prio)) 2066 push_rt_tasks(rq); 2067} 2068 2069/* Assumes rq->lock is held */ 2070static void rq_online_rt(struct rq *rq) 2071{ 2072 if (rq->rt.overloaded) 2073 rt_set_overload(rq); 2074 2075 __enable_runtime(rq); 2076 2077 cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr); 2078} 2079 2080/* Assumes rq->lock is held */ 2081static void rq_offline_rt(struct rq *rq) 2082{ 2083 if (rq->rt.overloaded) 2084 rt_clear_overload(rq); 2085 2086 __disable_runtime(rq); 2087 2088 cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID); 2089} 2090 2091/* 2092 * When switch from the rt queue, we bring ourselves to a position 2093 * that we might want to pull RT tasks from other runqueues. 2094 */ 2095static void switched_from_rt(struct rq *rq, struct task_struct *p) 2096{ 2097 /* 2098 * If there are other RT tasks then we will reschedule 2099 * and the scheduling of the other RT tasks will handle 2100 * the balancing. But if we are the last RT task 2101 * we may need to handle the pulling of RT tasks 2102 * now. 2103 */ 2104 if (!task_on_rq_queued(p) || rq->rt.rt_nr_running) 2105 return; 2106 2107 queue_pull_task(rq); 2108} 2109 2110void __init init_sched_rt_class(void) 2111{ 2112 unsigned int i; 2113 2114 for_each_possible_cpu(i) { 2115 zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i), 2116 GFP_KERNEL, cpu_to_node(i)); 2117 } 2118} 2119#endif /* CONFIG_SMP */ 2120 2121/* 2122 * When switching a task to RT, we may overload the runqueue 2123 * with RT tasks. In this case we try to push them off to 2124 * other runqueues. 2125 */ 2126static void switched_to_rt(struct rq *rq, struct task_struct *p) 2127{ 2128 /* 2129 * If we are already running, then there's nothing 2130 * that needs to be done. But if we are not running 2131 * we may need to preempt the current running task. 2132 * If that current running task is also an RT task 2133 * then see if we can move to another run queue. 2134 */ 2135 if (task_on_rq_queued(p) && rq->curr != p) { 2136#ifdef CONFIG_SMP 2137 if (p->nr_cpus_allowed > 1 && rq->rt.overloaded) 2138 queue_push_tasks(rq); 2139#else 2140 if (p->prio < rq->curr->prio) 2141 resched_curr(rq); 2142#endif /* CONFIG_SMP */ 2143 } 2144} 2145 2146/* 2147 * Priority of the task has changed. This may cause 2148 * us to initiate a push or pull. 2149 */ 2150static void 2151prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) 2152{ 2153 if (!task_on_rq_queued(p)) 2154 return; 2155 2156 if (rq->curr == p) { 2157#ifdef CONFIG_SMP 2158 /* 2159 * If our priority decreases while running, we 2160 * may need to pull tasks to this runqueue. 2161 */ 2162 if (oldprio < p->prio) 2163 queue_pull_task(rq); 2164 2165 /* 2166 * If there's a higher priority task waiting to run 2167 * then reschedule. 2168 */ 2169 if (p->prio > rq->rt.highest_prio.curr) 2170 resched_curr(rq); 2171#else 2172 /* For UP simply resched on drop of prio */ 2173 if (oldprio < p->prio) 2174 resched_curr(rq); 2175#endif /* CONFIG_SMP */ 2176 } else { 2177 /* 2178 * This task is not running, but if it is 2179 * greater than the current running task 2180 * then reschedule. 2181 */ 2182 if (p->prio < rq->curr->prio) 2183 resched_curr(rq); 2184 } 2185} 2186 2187static void watchdog(struct rq *rq, struct task_struct *p) 2188{ 2189 unsigned long soft, hard; 2190 2191 /* max may change after cur was read, this will be fixed next tick */ 2192 soft = task_rlimit(p, RLIMIT_RTTIME); 2193 hard = task_rlimit_max(p, RLIMIT_RTTIME); 2194 2195 if (soft != RLIM_INFINITY) { 2196 unsigned long next; 2197 2198 if (p->rt.watchdog_stamp != jiffies) { 2199 p->rt.timeout++; 2200 p->rt.watchdog_stamp = jiffies; 2201 } 2202 2203 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); 2204 if (p->rt.timeout > next) 2205 p->cputime_expires.sched_exp = p->se.sum_exec_runtime; 2206 } 2207} 2208 2209static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) 2210{ 2211 struct sched_rt_entity *rt_se = &p->rt; 2212 2213 update_curr_rt(rq); 2214 2215 watchdog(rq, p); 2216 2217 /* 2218 * RR tasks need a special form of timeslice management. 2219 * FIFO tasks have no timeslices. 2220 */ 2221 if (p->policy != SCHED_RR) 2222 return; 2223 2224 if (--p->rt.time_slice) 2225 return; 2226 2227 p->rt.time_slice = sched_rr_timeslice; 2228 2229 /* 2230 * Requeue to the end of queue if we (and all of our ancestors) are not 2231 * the only element on the queue 2232 */ 2233 for_each_sched_rt_entity(rt_se) { 2234 if (rt_se->run_list.prev != rt_se->run_list.next) { 2235 requeue_task_rt(rq, p, 0); 2236 resched_curr(rq); 2237 return; 2238 } 2239 } 2240} 2241 2242static void set_curr_task_rt(struct rq *rq) 2243{ 2244 struct task_struct *p = rq->curr; 2245 2246 p->se.exec_start = rq_clock_task(rq); 2247 2248 /* The running task is never eligible for pushing */ 2249 dequeue_pushable_task(rq, p); 2250} 2251 2252static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) 2253{ 2254 /* 2255 * Time slice is 0 for SCHED_FIFO tasks 2256 */ 2257 if (task->policy == SCHED_RR) 2258 return sched_rr_timeslice; 2259 else 2260 return 0; 2261} 2262 2263const struct sched_class rt_sched_class = { 2264 .next = &fair_sched_class, 2265 .enqueue_task = enqueue_task_rt, 2266 .dequeue_task = dequeue_task_rt, 2267 .yield_task = yield_task_rt, 2268 2269 .check_preempt_curr = check_preempt_curr_rt, 2270 2271 .pick_next_task = pick_next_task_rt, 2272 .put_prev_task = put_prev_task_rt, 2273 2274#ifdef CONFIG_SMP 2275 .select_task_rq = select_task_rq_rt, 2276 2277 .set_cpus_allowed = set_cpus_allowed_common, 2278 .rq_online = rq_online_rt, 2279 .rq_offline = rq_offline_rt, 2280 .task_woken = task_woken_rt, 2281 .switched_from = switched_from_rt, 2282#endif 2283 2284 .set_curr_task = set_curr_task_rt, 2285 .task_tick = task_tick_rt, 2286 2287 .get_rr_interval = get_rr_interval_rt, 2288 2289 .prio_changed = prio_changed_rt, 2290 .switched_to = switched_to_rt, 2291 2292 .update_curr = update_curr_rt, 2293}; 2294 2295#ifdef CONFIG_SCHED_DEBUG 2296extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq); 2297 2298void print_rt_stats(struct seq_file *m, int cpu) 2299{ 2300 rt_rq_iter_t iter; 2301 struct rt_rq *rt_rq; 2302 2303 rcu_read_lock(); 2304 for_each_rt_rq(rt_rq, iter, cpu_rq(cpu)) 2305 print_rt_rq(m, cpu, rt_rq); 2306 rcu_read_unlock(); 2307} 2308#endif /* CONFIG_SCHED_DEBUG */ 2309