This source file includes following definitions.
- __task_rq_lock
- task_rq_lock
- update_rq_clock_task
- update_rq_clock
- hrtick_clear
- hrtick
- __hrtick_restart
- __hrtick_start
- hrtick_start
- hrtick_start
- hrtick_rq_init
- hrtick_clear
- hrtick_rq_init
- set_nr_and_not_polling
- set_nr_if_polling
- set_nr_and_not_polling
- set_nr_if_polling
- __wake_q_add
- wake_q_add
- wake_q_add_safe
- wake_up_q
- resched_curr
- resched_cpu
- get_nohz_timer_target
- wake_up_idle_cpu
- wake_up_full_nohz_cpu
- wake_up_nohz_cpu
- got_nohz_idle_kick
- got_nohz_idle_kick
- sched_can_stop_tick
- walk_tg_tree_from
- tg_nop
- set_load_weight
- uclamp_bucket_id
- uclamp_bucket_base_value
- uclamp_none
- uclamp_se_set
- uclamp_idle_value
- uclamp_idle_reset
- uclamp_rq_max_value
- uclamp_tg_restrict
- uclamp_eff_get
- uclamp_eff_value
- uclamp_rq_inc_id
- uclamp_rq_dec_id
- uclamp_rq_inc
- uclamp_rq_dec
- uclamp_update_active
- uclamp_update_active_tasks
- uclamp_update_root_tg
- uclamp_update_root_tg
- sysctl_sched_uclamp_handler
- uclamp_validate
- __setscheduler_uclamp
- uclamp_fork
- init_uclamp
- uclamp_rq_inc
- uclamp_rq_dec
- uclamp_validate
- __setscheduler_uclamp
- uclamp_fork
- init_uclamp
- enqueue_task
- dequeue_task
- activate_task
- deactivate_task
- __normal_prio
- normal_prio
- effective_prio
- task_curr
- check_class_changed
- check_preempt_curr
- is_per_cpu_kthread
- is_cpu_allowed
- move_queued_task
- __migrate_task
- migration_cpu_stop
- set_cpus_allowed_common
- do_set_cpus_allowed
- __set_cpus_allowed_ptr
- set_cpus_allowed_ptr
- set_task_cpu
- __migrate_swap_task
- migrate_swap_stop
- migrate_swap
- wait_task_inactive
- kick_process
- select_fallback_rq
- select_task_rq
- update_avg
- sched_set_stop_task
- __set_cpus_allowed_ptr
- ttwu_stat
- ttwu_do_wakeup
- ttwu_do_activate
- ttwu_remote
- sched_ttwu_pending
- scheduler_ipi
- ttwu_queue_remote
- wake_up_if_idle
- cpus_share_cache
- ttwu_queue
- try_to_wake_up
- wake_up_process
- wake_up_state
- __sched_fork
- set_numabalancing_state
- sysctl_numa_balancing
- set_schedstats
- force_schedstat_enabled
- setup_schedstats
- init_schedstats
- sysctl_schedstats
- init_schedstats
- sched_fork
- to_ratio
- wake_up_new_task
- preempt_notifier_inc
- preempt_notifier_dec
- preempt_notifier_register
- preempt_notifier_unregister
- __fire_sched_in_preempt_notifiers
- fire_sched_in_preempt_notifiers
- __fire_sched_out_preempt_notifiers
- fire_sched_out_preempt_notifiers
- fire_sched_in_preempt_notifiers
- fire_sched_out_preempt_notifiers
- prepare_task
- finish_task
- prepare_lock_switch
- finish_lock_switch
- prepare_task_switch
- finish_task_switch
- __balance_callback
- balance_callback
- balance_callback
- schedule_tail
- context_switch
- nr_running
- single_task_running
- nr_context_switches
- nr_iowait_cpu
- nr_iowait
- sched_exec
- prefetch_curr_exec_start
- task_sched_runtime
- scheduler_tick
- sched_tick_remote
- sched_tick_start
- sched_tick_stop
- sched_tick_offload_init
- sched_tick_start
- sched_tick_stop
- preempt_latency_start
- preempt_count_add
- preempt_latency_stop
- preempt_count_sub
- preempt_latency_start
- preempt_latency_stop
- get_preempt_disable_ip
- __schedule_bug
- schedule_debug
- pick_next_task
- __schedule
- do_task_dead
- sched_submit_work
- sched_update_worker
- schedule
- schedule_idle
- schedule_user
- schedule_preempt_disabled
- preempt_schedule_common
- preempt_schedule
- preempt_schedule_notrace
- preempt_schedule_irq
- default_wake_function
- __rt_effective_prio
- rt_effective_prio
- rt_mutex_setprio
- rt_effective_prio
- set_user_nice
- can_nice
- SYSCALL_DEFINE1
- task_prio
- idle_cpu
- available_idle_cpu
- idle_task
- find_process_by_pid
- __setscheduler_params
- __setscheduler
- check_same_owner
- __sched_setscheduler
- _sched_setscheduler
- sched_setscheduler
- sched_setattr
- sched_setattr_nocheck
- sched_setscheduler_nocheck
- do_sched_setscheduler
- sched_copy_attr
- SYSCALL_DEFINE3
- SYSCALL_DEFINE2
- SYSCALL_DEFINE3
- SYSCALL_DEFINE1
- SYSCALL_DEFINE2
- sched_attr_copy_to_user
- SYSCALL_DEFINE4
- sched_setaffinity
- get_user_cpu_mask
- SYSCALL_DEFINE3
- sched_getaffinity
- SYSCALL_DEFINE3
- do_sched_yield
- SYSCALL_DEFINE0
- _cond_resched
- __cond_resched_lock
- yield
- yield_to
- io_schedule_prepare
- io_schedule_finish
- io_schedule_timeout
- io_schedule
- SYSCALL_DEFINE1
- SYSCALL_DEFINE1
- sched_rr_get_interval
- SYSCALL_DEFINE2
- SYSCALL_DEFINE2
- sched_show_task
- state_filter_match
- show_state_filter
- init_idle
- cpuset_cpumask_can_shrink
- task_can_attach
- migrate_task_to
- sched_setnuma
- idle_task_exit
- calc_load_migrate
- __pick_migrate_task
- migrate_tasks
- set_rq_online
- set_rq_offline
- cpuset_cpu_active
- cpuset_cpu_inactive
- sched_cpu_activate
- sched_cpu_deactivate
- sched_rq_cpu_starting
- sched_cpu_starting
- sched_cpu_dying
- sched_init_smp
- migration_init
- sched_init_smp
- in_sched_functions
- sched_init
- preempt_count_equals
- __might_sleep
- ___might_sleep
- __cant_sleep
- normalize_rt_tasks
- curr_task
- ia64_set_curr_task
- alloc_uclamp_sched_group
- sched_free_group
- sched_create_group
- sched_online_group
- sched_free_group_rcu
- sched_destroy_group
- sched_offline_group
- sched_change_group
- sched_move_task
- css_tg
- cpu_cgroup_css_alloc
- cpu_cgroup_css_online
- cpu_cgroup_css_released
- cpu_cgroup_css_free
- cpu_cgroup_fork
- cpu_cgroup_can_attach
- cpu_cgroup_attach
- cpu_util_update_eff
- capacity_from_percent
- cpu_uclamp_write
- cpu_uclamp_min_write
- cpu_uclamp_max_write
- cpu_uclamp_print
- cpu_uclamp_min_show
- cpu_uclamp_max_show
- cpu_shares_write_u64
- cpu_shares_read_u64
- tg_set_cfs_bandwidth
- tg_set_cfs_quota
- tg_get_cfs_quota
- tg_set_cfs_period
- tg_get_cfs_period
- cpu_cfs_quota_read_s64
- cpu_cfs_quota_write_s64
- cpu_cfs_period_read_u64
- cpu_cfs_period_write_u64
- normalize_cfs_quota
- tg_cfs_schedulable_down
- __cfs_schedulable
- cpu_cfs_stat_show
- cpu_rt_runtime_write
- cpu_rt_runtime_read
- cpu_rt_period_write_uint
- cpu_rt_period_read_uint
- cpu_extra_stat_show
- cpu_weight_read_u64
- cpu_weight_write_u64
- cpu_weight_nice_read_s64
- cpu_weight_nice_write_s64
- cpu_period_quota_print
- cpu_period_quota_parse
- cpu_max_show
- cpu_max_write
- dump_cpu_task
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 #include "sched.h"
  10 
  11 #include <linux/nospec.h>
  12 
  13 #include <linux/kcov.h>
  14 
  15 #include <asm/switch_to.h>
  16 #include <asm/tlb.h>
  17 
  18 #include "../workqueue_internal.h"
  19 #include "../smpboot.h"
  20 
  21 #include "pelt.h"
  22 
  23 #define CREATE_TRACE_POINTS
  24 #include <trace/events/sched.h>
  25 
  26 
  27 
  28 
  29 
  30 EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_cfs_tp);
  31 EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_rt_tp);
  32 EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_dl_tp);
  33 EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp);
  34 EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_se_tp);
  35 EXPORT_TRACEPOINT_SYMBOL_GPL(sched_overutilized_tp);
  36 
  37 DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
  38 
  39 #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_JUMP_LABEL)
  40 
  41 
  42 
  43 
  44 
  45 
  46 
  47 #define SCHED_FEAT(name, enabled)       \
  48         (1UL << __SCHED_FEAT_##name) * enabled |
  49 const_debug unsigned int sysctl_sched_features =
  50 #include "features.h"
  51         0;
  52 #undef SCHED_FEAT
  53 #endif
  54 
  55 
  56 
  57 
  58 
  59 const_debug unsigned int sysctl_sched_nr_migrate = 32;
  60 
  61 
  62 
  63 
  64 
  65 unsigned int sysctl_sched_rt_period = 1000000;
  66 
  67 __read_mostly int scheduler_running;
  68 
  69 
  70 
  71 
  72 
  73 int sysctl_sched_rt_runtime = 950000;
  74 
  75 
  76 
  77 
  78 struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
  79         __acquires(rq->lock)
  80 {
  81         struct rq *rq;
  82 
  83         lockdep_assert_held(&p->pi_lock);
  84 
  85         for (;;) {
  86                 rq = task_rq(p);
  87                 raw_spin_lock(&rq->lock);
  88                 if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
  89                         rq_pin_lock(rq, rf);
  90                         return rq;
  91                 }
  92                 raw_spin_unlock(&rq->lock);
  93 
  94                 while (unlikely(task_on_rq_migrating(p)))
  95                         cpu_relax();
  96         }
  97 }
  98 
  99 
 100 
 101 
 102 struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
 103         __acquires(p->pi_lock)
 104         __acquires(rq->lock)
 105 {
 106         struct rq *rq;
 107 
 108         for (;;) {
 109                 raw_spin_lock_irqsave(&p->pi_lock, rf->flags);
 110                 rq = task_rq(p);
 111                 raw_spin_lock(&rq->lock);
 112                 
 113 
 114 
 115 
 116 
 117 
 118 
 119 
 120 
 121 
 122 
 123 
 124 
 125 
 126 
 127 
 128 
 129                 if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
 130                         rq_pin_lock(rq, rf);
 131                         return rq;
 132                 }
 133                 raw_spin_unlock(&rq->lock);
 134                 raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
 135 
 136                 while (unlikely(task_on_rq_migrating(p)))
 137                         cpu_relax();
 138         }
 139 }
 140 
 141 
 142 
 143 
 144 
 145 static void update_rq_clock_task(struct rq *rq, s64 delta)
 146 {
 147 
 148 
 149 
 150 
 151         s64 __maybe_unused steal = 0, irq_delta = 0;
 152 
 153 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 154         irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
 155 
 156         
 157 
 158 
 159 
 160 
 161 
 162 
 163 
 164 
 165 
 166 
 167 
 168 
 169 
 170 
 171         if (irq_delta > delta)
 172                 irq_delta = delta;
 173 
 174         rq->prev_irq_time += irq_delta;
 175         delta -= irq_delta;
 176 #endif
 177 #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
 178         if (static_key_false((¶virt_steal_rq_enabled))) {
 179                 steal = paravirt_steal_clock(cpu_of(rq));
 180                 steal -= rq->prev_steal_time_rq;
 181 
 182                 if (unlikely(steal > delta))
 183                         steal = delta;
 184 
 185                 rq->prev_steal_time_rq += steal;
 186                 delta -= steal;
 187         }
 188 #endif
 189 
 190         rq->clock_task += delta;
 191 
 192 #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
 193         if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY))
 194                 update_irq_load_avg(rq, irq_delta + steal);
 195 #endif
 196         update_rq_clock_pelt(rq, delta);
 197 }
 198 
 199 void update_rq_clock(struct rq *rq)
 200 {
 201         s64 delta;
 202 
 203         lockdep_assert_held(&rq->lock);
 204 
 205         if (rq->clock_update_flags & RQCF_ACT_SKIP)
 206                 return;
 207 
 208 #ifdef CONFIG_SCHED_DEBUG
 209         if (sched_feat(WARN_DOUBLE_CLOCK))
 210                 SCHED_WARN_ON(rq->clock_update_flags & RQCF_UPDATED);
 211         rq->clock_update_flags |= RQCF_UPDATED;
 212 #endif
 213 
 214         delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
 215         if (delta < 0)
 216                 return;
 217         rq->clock += delta;
 218         update_rq_clock_task(rq, delta);
 219 }
 220 
 221 
 222 #ifdef CONFIG_SCHED_HRTICK
 223 
 224 
 225 
 226 
 227 static void hrtick_clear(struct rq *rq)
 228 {
 229         if (hrtimer_active(&rq->hrtick_timer))
 230                 hrtimer_cancel(&rq->hrtick_timer);
 231 }
 232 
 233 
 234 
 235 
 236 
 237 static enum hrtimer_restart hrtick(struct hrtimer *timer)
 238 {
 239         struct rq *rq = container_of(timer, struct rq, hrtick_timer);
 240         struct rq_flags rf;
 241 
 242         WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
 243 
 244         rq_lock(rq, &rf);
 245         update_rq_clock(rq);
 246         rq->curr->sched_class->task_tick(rq, rq->curr, 1);
 247         rq_unlock(rq, &rf);
 248 
 249         return HRTIMER_NORESTART;
 250 }
 251 
 252 #ifdef CONFIG_SMP
 253 
 254 static void __hrtick_restart(struct rq *rq)
 255 {
 256         struct hrtimer *timer = &rq->hrtick_timer;
 257 
 258         hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD);
 259 }
 260 
 261 
 262 
 263 
 264 static void __hrtick_start(void *arg)
 265 {
 266         struct rq *rq = arg;
 267         struct rq_flags rf;
 268 
 269         rq_lock(rq, &rf);
 270         __hrtick_restart(rq);
 271         rq->hrtick_csd_pending = 0;
 272         rq_unlock(rq, &rf);
 273 }
 274 
 275 
 276 
 277 
 278 
 279 
 280 void hrtick_start(struct rq *rq, u64 delay)
 281 {
 282         struct hrtimer *timer = &rq->hrtick_timer;
 283         ktime_t time;
 284         s64 delta;
 285 
 286         
 287 
 288 
 289 
 290         delta = max_t(s64, delay, 10000LL);
 291         time = ktime_add_ns(timer->base->get_time(), delta);
 292 
 293         hrtimer_set_expires(timer, time);
 294 
 295         if (rq == this_rq()) {
 296                 __hrtick_restart(rq);
 297         } else if (!rq->hrtick_csd_pending) {
 298                 smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd);
 299                 rq->hrtick_csd_pending = 1;
 300         }
 301 }
 302 
 303 #else
 304 
 305 
 306 
 307 
 308 
 309 void hrtick_start(struct rq *rq, u64 delay)
 310 {
 311         
 312 
 313 
 314 
 315         delay = max_t(u64, delay, 10000LL);
 316         hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
 317                       HRTIMER_MODE_REL_PINNED_HARD);
 318 }
 319 #endif 
 320 
 321 static void hrtick_rq_init(struct rq *rq)
 322 {
 323 #ifdef CONFIG_SMP
 324         rq->hrtick_csd_pending = 0;
 325 
 326         rq->hrtick_csd.flags = 0;
 327         rq->hrtick_csd.func = __hrtick_start;
 328         rq->hrtick_csd.info = rq;
 329 #endif
 330 
 331         hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
 332         rq->hrtick_timer.function = hrtick;
 333 }
 334 #else   
 335 static inline void hrtick_clear(struct rq *rq)
 336 {
 337 }
 338 
 339 static inline void hrtick_rq_init(struct rq *rq)
 340 {
 341 }
 342 #endif  
 343 
 344 
 345 
 346 
 347 #define fetch_or(ptr, mask)                                             \
 348         ({                                                              \
 349                 typeof(ptr) _ptr = (ptr);                               \
 350                 typeof(mask) _mask = (mask);                            \
 351                 typeof(*_ptr) _old, _val = *_ptr;                       \
 352                                                                         \
 353                 for (;;) {                                              \
 354                         _old = cmpxchg(_ptr, _val, _val | _mask);       \
 355                         if (_old == _val)                               \
 356                                 break;                                  \
 357                         _val = _old;                                    \
 358                 }                                                       \
 359         _old;                                                           \
 360 })
 361 
 362 #if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
 363 
 364 
 365 
 366 
 367 
 368 static bool set_nr_and_not_polling(struct task_struct *p)
 369 {
 370         struct thread_info *ti = task_thread_info(p);
 371         return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
 372 }
 373 
 374 
 375 
 376 
 377 
 378 
 379 
 380 static bool set_nr_if_polling(struct task_struct *p)
 381 {
 382         struct thread_info *ti = task_thread_info(p);
 383         typeof(ti->flags) old, val = READ_ONCE(ti->flags);
 384 
 385         for (;;) {
 386                 if (!(val & _TIF_POLLING_NRFLAG))
 387                         return false;
 388                 if (val & _TIF_NEED_RESCHED)
 389                         return true;
 390                 old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED);
 391                 if (old == val)
 392                         break;
 393                 val = old;
 394         }
 395         return true;
 396 }
 397 
 398 #else
 399 static bool set_nr_and_not_polling(struct task_struct *p)
 400 {
 401         set_tsk_need_resched(p);
 402         return true;
 403 }
 404 
 405 #ifdef CONFIG_SMP
 406 static bool set_nr_if_polling(struct task_struct *p)
 407 {
 408         return false;
 409 }
 410 #endif
 411 #endif
 412 
 413 static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task)
 414 {
 415         struct wake_q_node *node = &task->wake_q;
 416 
 417         
 418 
 419 
 420 
 421 
 422 
 423 
 424 
 425         smp_mb__before_atomic();
 426         if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)))
 427                 return false;
 428 
 429         
 430 
 431 
 432         *head->lastp = node;
 433         head->lastp = &node->next;
 434         return true;
 435 }
 436 
 437 
 438 
 439 
 440 
 441 
 442 
 443 
 444 
 445 
 446 
 447 
 448 
 449 void wake_q_add(struct wake_q_head *head, struct task_struct *task)
 450 {
 451         if (__wake_q_add(head, task))
 452                 get_task_struct(task);
 453 }
 454 
 455 
 456 
 457 
 458 
 459 
 460 
 461 
 462 
 463 
 464 
 465 
 466 
 467 
 468 
 469 
 470 
 471 
 472 void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
 473 {
 474         if (!__wake_q_add(head, task))
 475                 put_task_struct(task);
 476 }
 477 
 478 void wake_up_q(struct wake_q_head *head)
 479 {
 480         struct wake_q_node *node = head->first;
 481 
 482         while (node != WAKE_Q_TAIL) {
 483                 struct task_struct *task;
 484 
 485                 task = container_of(node, struct task_struct, wake_q);
 486                 BUG_ON(!task);
 487                 
 488                 node = node->next;
 489                 task->wake_q.next = NULL;
 490 
 491                 
 492 
 493 
 494 
 495                 wake_up_process(task);
 496                 put_task_struct(task);
 497         }
 498 }
 499 
 500 
 501 
 502 
 503 
 504 
 505 
 506 
 507 void resched_curr(struct rq *rq)
 508 {
 509         struct task_struct *curr = rq->curr;
 510         int cpu;
 511 
 512         lockdep_assert_held(&rq->lock);
 513 
 514         if (test_tsk_need_resched(curr))
 515                 return;
 516 
 517         cpu = cpu_of(rq);
 518 
 519         if (cpu == smp_processor_id()) {
 520                 set_tsk_need_resched(curr);
 521                 set_preempt_need_resched();
 522                 return;
 523         }
 524 
 525         if (set_nr_and_not_polling(curr))
 526                 smp_send_reschedule(cpu);
 527         else
 528                 trace_sched_wake_idle_without_ipi(cpu);
 529 }
 530 
 531 void resched_cpu(int cpu)
 532 {
 533         struct rq *rq = cpu_rq(cpu);
 534         unsigned long flags;
 535 
 536         raw_spin_lock_irqsave(&rq->lock, flags);
 537         if (cpu_online(cpu) || cpu == smp_processor_id())
 538                 resched_curr(rq);
 539         raw_spin_unlock_irqrestore(&rq->lock, flags);
 540 }
 541 
 542 #ifdef CONFIG_SMP
 543 #ifdef CONFIG_NO_HZ_COMMON
 544 
 545 
 546 
 547 
 548 
 549 
 550 
 551 
 552 int get_nohz_timer_target(void)
 553 {
 554         int i, cpu = smp_processor_id();
 555         struct sched_domain *sd;
 556 
 557         if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER))
 558                 return cpu;
 559 
 560         rcu_read_lock();
 561         for_each_domain(cpu, sd) {
 562                 for_each_cpu(i, sched_domain_span(sd)) {
 563                         if (cpu == i)
 564                                 continue;
 565 
 566                         if (!idle_cpu(i) && housekeeping_cpu(i, HK_FLAG_TIMER)) {
 567                                 cpu = i;
 568                                 goto unlock;
 569                         }
 570                 }
 571         }
 572 
 573         if (!housekeeping_cpu(cpu, HK_FLAG_TIMER))
 574                 cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
 575 unlock:
 576         rcu_read_unlock();
 577         return cpu;
 578 }
 579 
 580 
 581 
 582 
 583 
 584 
 585 
 586 
 587 
 588 
 589 
 590 static void wake_up_idle_cpu(int cpu)
 591 {
 592         struct rq *rq = cpu_rq(cpu);
 593 
 594         if (cpu == smp_processor_id())
 595                 return;
 596 
 597         if (set_nr_and_not_polling(rq->idle))
 598                 smp_send_reschedule(cpu);
 599         else
 600                 trace_sched_wake_idle_without_ipi(cpu);
 601 }
 602 
 603 static bool wake_up_full_nohz_cpu(int cpu)
 604 {
 605         
 606 
 607 
 608 
 609 
 610 
 611         if (cpu_is_offline(cpu))
 612                 return true;  
 613         if (tick_nohz_full_cpu(cpu)) {
 614                 if (cpu != smp_processor_id() ||
 615                     tick_nohz_tick_stopped())
 616                         tick_nohz_full_kick_cpu(cpu);
 617                 return true;
 618         }
 619 
 620         return false;
 621 }
 622 
 623 
 624 
 625 
 626 
 627 
 628 void wake_up_nohz_cpu(int cpu)
 629 {
 630         if (!wake_up_full_nohz_cpu(cpu))
 631                 wake_up_idle_cpu(cpu);
 632 }
 633 
 634 static inline bool got_nohz_idle_kick(void)
 635 {
 636         int cpu = smp_processor_id();
 637 
 638         if (!(atomic_read(nohz_flags(cpu)) & NOHZ_KICK_MASK))
 639                 return false;
 640 
 641         if (idle_cpu(cpu) && !need_resched())
 642                 return true;
 643 
 644         
 645 
 646 
 647 
 648         atomic_andnot(NOHZ_KICK_MASK, nohz_flags(cpu));
 649         return false;
 650 }
 651 
 652 #else 
 653 
 654 static inline bool got_nohz_idle_kick(void)
 655 {
 656         return false;
 657 }
 658 
 659 #endif 
 660 
 661 #ifdef CONFIG_NO_HZ_FULL
 662 bool sched_can_stop_tick(struct rq *rq)
 663 {
 664         int fifo_nr_running;
 665 
 666         
 667         if (rq->dl.dl_nr_running)
 668                 return false;
 669 
 670         
 671 
 672 
 673 
 674         if (rq->rt.rr_nr_running) {
 675                 if (rq->rt.rr_nr_running == 1)
 676                         return true;
 677                 else
 678                         return false;
 679         }
 680 
 681         
 682 
 683 
 684 
 685         fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running;
 686         if (fifo_nr_running)
 687                 return true;
 688 
 689         
 690 
 691 
 692 
 693 
 694         if (rq->nr_running > 1)
 695                 return false;
 696 
 697         return true;
 698 }
 699 #endif 
 700 #endif 
 701 
 702 #if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \
 703                         (defined(CONFIG_SMP) || defined(CONFIG_CFS_BANDWIDTH)))
 704 
 705 
 706 
 707 
 708 
 709 
 710 int walk_tg_tree_from(struct task_group *from,
 711                              tg_visitor down, tg_visitor up, void *data)
 712 {
 713         struct task_group *parent, *child;
 714         int ret;
 715 
 716         parent = from;
 717 
 718 down:
 719         ret = (*down)(parent, data);
 720         if (ret)
 721                 goto out;
 722         list_for_each_entry_rcu(child, &parent->children, siblings) {
 723                 parent = child;
 724                 goto down;
 725 
 726 up:
 727                 continue;
 728         }
 729         ret = (*up)(parent, data);
 730         if (ret || parent == from)
 731                 goto out;
 732 
 733         child = parent;
 734         parent = parent->parent;
 735         if (parent)
 736                 goto up;
 737 out:
 738         return ret;
 739 }
 740 
 741 int tg_nop(struct task_group *tg, void *data)
 742 {
 743         return 0;
 744 }
 745 #endif
 746 
 747 static void set_load_weight(struct task_struct *p, bool update_load)
 748 {
 749         int prio = p->static_prio - MAX_RT_PRIO;
 750         struct load_weight *load = &p->se.load;
 751 
 752         
 753 
 754 
 755         if (task_has_idle_policy(p)) {
 756                 load->weight = scale_load(WEIGHT_IDLEPRIO);
 757                 load->inv_weight = WMULT_IDLEPRIO;
 758                 p->se.runnable_weight = load->weight;
 759                 return;
 760         }
 761 
 762         
 763 
 764 
 765 
 766         if (update_load && p->sched_class == &fair_sched_class) {
 767                 reweight_task(p, prio);
 768         } else {
 769                 load->weight = scale_load(sched_prio_to_weight[prio]);
 770                 load->inv_weight = sched_prio_to_wmult[prio];
 771                 p->se.runnable_weight = load->weight;
 772         }
 773 }
 774 
 775 #ifdef CONFIG_UCLAMP_TASK
 776 
 777 
 778 
 779 
 780 
 781 
 782 
 783 
 784 
 785 
 786 static DEFINE_MUTEX(uclamp_mutex);
 787 
 788 
 789 unsigned int sysctl_sched_uclamp_util_min = SCHED_CAPACITY_SCALE;
 790 
 791 
 792 unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE;
 793 
 794 
 795 static struct uclamp_se uclamp_default[UCLAMP_CNT];
 796 
 797 
 798 #define UCLAMP_BUCKET_DELTA DIV_ROUND_CLOSEST(SCHED_CAPACITY_SCALE, UCLAMP_BUCKETS)
 799 
 800 #define for_each_clamp_id(clamp_id) \
 801         for ((clamp_id) = 0; (clamp_id) < UCLAMP_CNT; (clamp_id)++)
 802 
 803 static inline unsigned int uclamp_bucket_id(unsigned int clamp_value)
 804 {
 805         return clamp_value / UCLAMP_BUCKET_DELTA;
 806 }
 807 
 808 static inline unsigned int uclamp_bucket_base_value(unsigned int clamp_value)
 809 {
 810         return UCLAMP_BUCKET_DELTA * uclamp_bucket_id(clamp_value);
 811 }
 812 
 813 static inline unsigned int uclamp_none(enum uclamp_id clamp_id)
 814 {
 815         if (clamp_id == UCLAMP_MIN)
 816                 return 0;
 817         return SCHED_CAPACITY_SCALE;
 818 }
 819 
 820 static inline void uclamp_se_set(struct uclamp_se *uc_se,
 821                                  unsigned int value, bool user_defined)
 822 {
 823         uc_se->value = value;
 824         uc_se->bucket_id = uclamp_bucket_id(value);
 825         uc_se->user_defined = user_defined;
 826 }
 827 
 828 static inline unsigned int
 829 uclamp_idle_value(struct rq *rq, enum uclamp_id clamp_id,
 830                   unsigned int clamp_value)
 831 {
 832         
 833 
 834 
 835 
 836 
 837         if (clamp_id == UCLAMP_MAX) {
 838                 rq->uclamp_flags |= UCLAMP_FLAG_IDLE;
 839                 return clamp_value;
 840         }
 841 
 842         return uclamp_none(UCLAMP_MIN);
 843 }
 844 
 845 static inline void uclamp_idle_reset(struct rq *rq, enum uclamp_id clamp_id,
 846                                      unsigned int clamp_value)
 847 {
 848         
 849         if (!(rq->uclamp_flags & UCLAMP_FLAG_IDLE))
 850                 return;
 851 
 852         WRITE_ONCE(rq->uclamp[clamp_id].value, clamp_value);
 853 }
 854 
 855 static inline
 856 unsigned int uclamp_rq_max_value(struct rq *rq, enum uclamp_id clamp_id,
 857                                    unsigned int clamp_value)
 858 {
 859         struct uclamp_bucket *bucket = rq->uclamp[clamp_id].bucket;
 860         int bucket_id = UCLAMP_BUCKETS - 1;
 861 
 862         
 863 
 864 
 865 
 866         for ( ; bucket_id >= 0; bucket_id--) {
 867                 if (!bucket[bucket_id].tasks)
 868                         continue;
 869                 return bucket[bucket_id].value;
 870         }
 871 
 872         
 873         return uclamp_idle_value(rq, clamp_id, clamp_value);
 874 }
 875 
 876 static inline struct uclamp_se
 877 uclamp_tg_restrict(struct task_struct *p, enum uclamp_id clamp_id)
 878 {
 879         struct uclamp_se uc_req = p->uclamp_req[clamp_id];
 880 #ifdef CONFIG_UCLAMP_TASK_GROUP
 881         struct uclamp_se uc_max;
 882 
 883         
 884 
 885 
 886 
 887         if (task_group_is_autogroup(task_group(p)))
 888                 return uc_req;
 889         if (task_group(p) == &root_task_group)
 890                 return uc_req;
 891 
 892         uc_max = task_group(p)->uclamp[clamp_id];
 893         if (uc_req.value > uc_max.value || !uc_req.user_defined)
 894                 return uc_max;
 895 #endif
 896 
 897         return uc_req;
 898 }
 899 
 900 
 901 
 902 
 903 
 904 
 905 
 906 
 907 
 908 static inline struct uclamp_se
 909 uclamp_eff_get(struct task_struct *p, enum uclamp_id clamp_id)
 910 {
 911         struct uclamp_se uc_req = uclamp_tg_restrict(p, clamp_id);
 912         struct uclamp_se uc_max = uclamp_default[clamp_id];
 913 
 914         
 915         if (unlikely(uc_req.value > uc_max.value))
 916                 return uc_max;
 917 
 918         return uc_req;
 919 }
 920 
 921 unsigned int uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id)
 922 {
 923         struct uclamp_se uc_eff;
 924 
 925         
 926         if (p->uclamp[clamp_id].active)
 927                 return p->uclamp[clamp_id].value;
 928 
 929         uc_eff = uclamp_eff_get(p, clamp_id);
 930 
 931         return uc_eff.value;
 932 }
 933 
 934 
 935 
 936 
 937 
 938 
 939 
 940 
 941 
 942 
 943 
 944 static inline void uclamp_rq_inc_id(struct rq *rq, struct task_struct *p,
 945                                     enum uclamp_id clamp_id)
 946 {
 947         struct uclamp_rq *uc_rq = &rq->uclamp[clamp_id];
 948         struct uclamp_se *uc_se = &p->uclamp[clamp_id];
 949         struct uclamp_bucket *bucket;
 950 
 951         lockdep_assert_held(&rq->lock);
 952 
 953         
 954         p->uclamp[clamp_id] = uclamp_eff_get(p, clamp_id);
 955 
 956         bucket = &uc_rq->bucket[uc_se->bucket_id];
 957         bucket->tasks++;
 958         uc_se->active = true;
 959 
 960         uclamp_idle_reset(rq, clamp_id, uc_se->value);
 961 
 962         
 963 
 964 
 965 
 966         if (bucket->tasks == 1 || uc_se->value > bucket->value)
 967                 bucket->value = uc_se->value;
 968 
 969         if (uc_se->value > READ_ONCE(uc_rq->value))
 970                 WRITE_ONCE(uc_rq->value, uc_se->value);
 971 }
 972 
 973 
 974 
 975 
 976 
 977 
 978 
 979 
 980 
 981 
 982 static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
 983                                     enum uclamp_id clamp_id)
 984 {
 985         struct uclamp_rq *uc_rq = &rq->uclamp[clamp_id];
 986         struct uclamp_se *uc_se = &p->uclamp[clamp_id];
 987         struct uclamp_bucket *bucket;
 988         unsigned int bkt_clamp;
 989         unsigned int rq_clamp;
 990 
 991         lockdep_assert_held(&rq->lock);
 992 
 993         bucket = &uc_rq->bucket[uc_se->bucket_id];
 994         SCHED_WARN_ON(!bucket->tasks);
 995         if (likely(bucket->tasks))
 996                 bucket->tasks--;
 997         uc_se->active = false;
 998 
 999         
1000 
1001 
1002 
1003 
1004 
1005         if (likely(bucket->tasks))
1006                 return;
1007 
1008         rq_clamp = READ_ONCE(uc_rq->value);
1009         
1010 
1011 
1012 
1013         SCHED_WARN_ON(bucket->value > rq_clamp);
1014         if (bucket->value >= rq_clamp) {
1015                 bkt_clamp = uclamp_rq_max_value(rq, clamp_id, uc_se->value);
1016                 WRITE_ONCE(uc_rq->value, bkt_clamp);
1017         }
1018 }
1019 
1020 static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
1021 {
1022         enum uclamp_id clamp_id;
1023 
1024         if (unlikely(!p->sched_class->uclamp_enabled))
1025                 return;
1026 
1027         for_each_clamp_id(clamp_id)
1028                 uclamp_rq_inc_id(rq, p, clamp_id);
1029 
1030         
1031         if (rq->uclamp_flags & UCLAMP_FLAG_IDLE)
1032                 rq->uclamp_flags &= ~UCLAMP_FLAG_IDLE;
1033 }
1034 
1035 static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p)
1036 {
1037         enum uclamp_id clamp_id;
1038 
1039         if (unlikely(!p->sched_class->uclamp_enabled))
1040                 return;
1041 
1042         for_each_clamp_id(clamp_id)
1043                 uclamp_rq_dec_id(rq, p, clamp_id);
1044 }
1045 
1046 static inline void
1047 uclamp_update_active(struct task_struct *p, enum uclamp_id clamp_id)
1048 {
1049         struct rq_flags rf;
1050         struct rq *rq;
1051 
1052         
1053 
1054 
1055 
1056 
1057 
1058 
1059 
1060         rq = task_rq_lock(p, &rf);
1061 
1062         
1063 
1064 
1065 
1066 
1067 
1068         if (p->uclamp[clamp_id].active) {
1069                 uclamp_rq_dec_id(rq, p, clamp_id);
1070                 uclamp_rq_inc_id(rq, p, clamp_id);
1071         }
1072 
1073         task_rq_unlock(rq, p, &rf);
1074 }
1075 
1076 #ifdef CONFIG_UCLAMP_TASK_GROUP
1077 static inline void
1078 uclamp_update_active_tasks(struct cgroup_subsys_state *css,
1079                            unsigned int clamps)
1080 {
1081         enum uclamp_id clamp_id;
1082         struct css_task_iter it;
1083         struct task_struct *p;
1084 
1085         css_task_iter_start(css, 0, &it);
1086         while ((p = css_task_iter_next(&it))) {
1087                 for_each_clamp_id(clamp_id) {
1088                         if ((0x1 << clamp_id) & clamps)
1089                                 uclamp_update_active(p, clamp_id);
1090                 }
1091         }
1092         css_task_iter_end(&it);
1093 }
1094 
1095 static void cpu_util_update_eff(struct cgroup_subsys_state *css);
1096 static void uclamp_update_root_tg(void)
1097 {
1098         struct task_group *tg = &root_task_group;
1099 
1100         uclamp_se_set(&tg->uclamp_req[UCLAMP_MIN],
1101                       sysctl_sched_uclamp_util_min, false);
1102         uclamp_se_set(&tg->uclamp_req[UCLAMP_MAX],
1103                       sysctl_sched_uclamp_util_max, false);
1104 
1105         rcu_read_lock();
1106         cpu_util_update_eff(&root_task_group.css);
1107         rcu_read_unlock();
1108 }
1109 #else
1110 static void uclamp_update_root_tg(void) { }
1111 #endif
1112 
1113 int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
1114                                 void __user *buffer, size_t *lenp,
1115                                 loff_t *ppos)
1116 {
1117         bool update_root_tg = false;
1118         int old_min, old_max;
1119         int result;
1120 
1121         mutex_lock(&uclamp_mutex);
1122         old_min = sysctl_sched_uclamp_util_min;
1123         old_max = sysctl_sched_uclamp_util_max;
1124 
1125         result = proc_dointvec(table, write, buffer, lenp, ppos);
1126         if (result)
1127                 goto undo;
1128         if (!write)
1129                 goto done;
1130 
1131         if (sysctl_sched_uclamp_util_min > sysctl_sched_uclamp_util_max ||
1132             sysctl_sched_uclamp_util_max > SCHED_CAPACITY_SCALE) {
1133                 result = -EINVAL;
1134                 goto undo;
1135         }
1136 
1137         if (old_min != sysctl_sched_uclamp_util_min) {
1138                 uclamp_se_set(&uclamp_default[UCLAMP_MIN],
1139                               sysctl_sched_uclamp_util_min, false);
1140                 update_root_tg = true;
1141         }
1142         if (old_max != sysctl_sched_uclamp_util_max) {
1143                 uclamp_se_set(&uclamp_default[UCLAMP_MAX],
1144                               sysctl_sched_uclamp_util_max, false);
1145                 update_root_tg = true;
1146         }
1147 
1148         if (update_root_tg)
1149                 uclamp_update_root_tg();
1150 
1151         
1152 
1153 
1154 
1155 
1156 
1157         goto done;
1158 
1159 undo:
1160         sysctl_sched_uclamp_util_min = old_min;
1161         sysctl_sched_uclamp_util_max = old_max;
1162 done:
1163         mutex_unlock(&uclamp_mutex);
1164 
1165         return result;
1166 }
1167 
1168 static int uclamp_validate(struct task_struct *p,
1169                            const struct sched_attr *attr)
1170 {
1171         unsigned int lower_bound = p->uclamp_req[UCLAMP_MIN].value;
1172         unsigned int upper_bound = p->uclamp_req[UCLAMP_MAX].value;
1173 
1174         if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN)
1175                 lower_bound = attr->sched_util_min;
1176         if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX)
1177                 upper_bound = attr->sched_util_max;
1178 
1179         if (lower_bound > upper_bound)
1180                 return -EINVAL;
1181         if (upper_bound > SCHED_CAPACITY_SCALE)
1182                 return -EINVAL;
1183 
1184         return 0;
1185 }
1186 
1187 static void __setscheduler_uclamp(struct task_struct *p,
1188                                   const struct sched_attr *attr)
1189 {
1190         enum uclamp_id clamp_id;
1191 
1192         
1193 
1194 
1195 
1196         for_each_clamp_id(clamp_id) {
1197                 struct uclamp_se *uc_se = &p->uclamp_req[clamp_id];
1198                 unsigned int clamp_value = uclamp_none(clamp_id);
1199 
1200                 
1201                 if (uc_se->user_defined)
1202                         continue;
1203 
1204                 
1205                 if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN))
1206                         clamp_value = uclamp_none(UCLAMP_MAX);
1207 
1208                 uclamp_se_set(uc_se, clamp_value, false);
1209         }
1210 
1211         if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)))
1212                 return;
1213 
1214         if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
1215                 uclamp_se_set(&p->uclamp_req[UCLAMP_MIN],
1216                               attr->sched_util_min, true);
1217         }
1218 
1219         if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) {
1220                 uclamp_se_set(&p->uclamp_req[UCLAMP_MAX],
1221                               attr->sched_util_max, true);
1222         }
1223 }
1224 
1225 static void uclamp_fork(struct task_struct *p)
1226 {
1227         enum uclamp_id clamp_id;
1228 
1229         for_each_clamp_id(clamp_id)
1230                 p->uclamp[clamp_id].active = false;
1231 
1232         if (likely(!p->sched_reset_on_fork))
1233                 return;
1234 
1235         for_each_clamp_id(clamp_id) {
1236                 uclamp_se_set(&p->uclamp_req[clamp_id],
1237                               uclamp_none(clamp_id), false);
1238         }
1239 }
1240 
1241 static void __init init_uclamp(void)
1242 {
1243         struct uclamp_se uc_max = {};
1244         enum uclamp_id clamp_id;
1245         int cpu;
1246 
1247         mutex_init(&uclamp_mutex);
1248 
1249         for_each_possible_cpu(cpu) {
1250                 memset(&cpu_rq(cpu)->uclamp, 0,
1251                                 sizeof(struct uclamp_rq)*UCLAMP_CNT);
1252                 cpu_rq(cpu)->uclamp_flags = 0;
1253         }
1254 
1255         for_each_clamp_id(clamp_id) {
1256                 uclamp_se_set(&init_task.uclamp_req[clamp_id],
1257                               uclamp_none(clamp_id), false);
1258         }
1259 
1260         
1261         uclamp_se_set(&uc_max, uclamp_none(UCLAMP_MAX), false);
1262         for_each_clamp_id(clamp_id) {
1263                 uclamp_default[clamp_id] = uc_max;
1264 #ifdef CONFIG_UCLAMP_TASK_GROUP
1265                 root_task_group.uclamp_req[clamp_id] = uc_max;
1266                 root_task_group.uclamp[clamp_id] = uc_max;
1267 #endif
1268         }
1269 }
1270 
1271 #else 
1272 static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { }
1273 static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { }
1274 static inline int uclamp_validate(struct task_struct *p,
1275                                   const struct sched_attr *attr)
1276 {
1277         return -EOPNOTSUPP;
1278 }
1279 static void __setscheduler_uclamp(struct task_struct *p,
1280                                   const struct sched_attr *attr) { }
1281 static inline void uclamp_fork(struct task_struct *p) { }
1282 static inline void init_uclamp(void) { }
1283 #endif 
1284 
1285 static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
1286 {
1287         if (!(flags & ENQUEUE_NOCLOCK))
1288                 update_rq_clock(rq);
1289 
1290         if (!(flags & ENQUEUE_RESTORE)) {
1291                 sched_info_queued(rq, p);
1292                 psi_enqueue(p, flags & ENQUEUE_WAKEUP);
1293         }
1294 
1295         uclamp_rq_inc(rq, p);
1296         p->sched_class->enqueue_task(rq, p, flags);
1297 }
1298 
1299 static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
1300 {
1301         if (!(flags & DEQUEUE_NOCLOCK))
1302                 update_rq_clock(rq);
1303 
1304         if (!(flags & DEQUEUE_SAVE)) {
1305                 sched_info_dequeued(rq, p);
1306                 psi_dequeue(p, flags & DEQUEUE_SLEEP);
1307         }
1308 
1309         uclamp_rq_dec(rq, p);
1310         p->sched_class->dequeue_task(rq, p, flags);
1311 }
1312 
1313 void activate_task(struct rq *rq, struct task_struct *p, int flags)
1314 {
1315         if (task_contributes_to_load(p))
1316                 rq->nr_uninterruptible--;
1317 
1318         enqueue_task(rq, p, flags);
1319 
1320         p->on_rq = TASK_ON_RQ_QUEUED;
1321 }
1322 
1323 void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
1324 {
1325         p->on_rq = (flags & DEQUEUE_SLEEP) ? 0 : TASK_ON_RQ_MIGRATING;
1326 
1327         if (task_contributes_to_load(p))
1328                 rq->nr_uninterruptible++;
1329 
1330         dequeue_task(rq, p, flags);
1331 }
1332 
1333 
1334 
1335 
1336 static inline int __normal_prio(struct task_struct *p)
1337 {
1338         return p->static_prio;
1339 }
1340 
1341 
1342 
1343 
1344 
1345 
1346 
1347 
1348 static inline int normal_prio(struct task_struct *p)
1349 {
1350         int prio;
1351 
1352         if (task_has_dl_policy(p))
1353                 prio = MAX_DL_PRIO-1;
1354         else if (task_has_rt_policy(p))
1355                 prio = MAX_RT_PRIO-1 - p->rt_priority;
1356         else
1357                 prio = __normal_prio(p);
1358         return prio;
1359 }
1360 
1361 
1362 
1363 
1364 
1365 
1366 
1367 
1368 static int effective_prio(struct task_struct *p)
1369 {
1370         p->normal_prio = normal_prio(p);
1371         
1372 
1373 
1374 
1375 
1376         if (!rt_prio(p->prio))
1377                 return p->normal_prio;
1378         return p->prio;
1379 }
1380 
1381 
1382 
1383 
1384 
1385 
1386 
1387 inline int task_curr(const struct task_struct *p)
1388 {
1389         return cpu_curr(task_cpu(p)) == p;
1390 }
1391 
1392 
1393 
1394 
1395 
1396 
1397 
1398 
1399 static inline void check_class_changed(struct rq *rq, struct task_struct *p,
1400                                        const struct sched_class *prev_class,
1401                                        int oldprio)
1402 {
1403         if (prev_class != p->sched_class) {
1404                 if (prev_class->switched_from)
1405                         prev_class->switched_from(rq, p);
1406 
1407                 p->sched_class->switched_to(rq, p);
1408         } else if (oldprio != p->prio || dl_task(p))
1409                 p->sched_class->prio_changed(rq, p, oldprio);
1410 }
1411 
1412 void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
1413 {
1414         const struct sched_class *class;
1415 
1416         if (p->sched_class == rq->curr->sched_class) {
1417                 rq->curr->sched_class->check_preempt_curr(rq, p, flags);
1418         } else {
1419                 for_each_class(class) {
1420                         if (class == rq->curr->sched_class)
1421                                 break;
1422                         if (class == p->sched_class) {
1423                                 resched_curr(rq);
1424                                 break;
1425                         }
1426                 }
1427         }
1428 
1429         
1430 
1431 
1432 
1433         if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr))
1434                 rq_clock_skip_update(rq);
1435 }
1436 
1437 #ifdef CONFIG_SMP
1438 
1439 static inline bool is_per_cpu_kthread(struct task_struct *p)
1440 {
1441         if (!(p->flags & PF_KTHREAD))
1442                 return false;
1443 
1444         if (p->nr_cpus_allowed != 1)
1445                 return false;
1446 
1447         return true;
1448 }
1449 
1450 
1451 
1452 
1453 
1454 static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
1455 {
1456         if (!cpumask_test_cpu(cpu, p->cpus_ptr))
1457                 return false;
1458 
1459         if (is_per_cpu_kthread(p))
1460                 return cpu_online(cpu);
1461 
1462         return cpu_active(cpu);
1463 }
1464 
1465 
1466 
1467 
1468 
1469 
1470 
1471 
1472 
1473 
1474 
1475 
1476 
1477 
1478 
1479 
1480 
1481 
1482 
1483 
1484 static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf,
1485                                    struct task_struct *p, int new_cpu)
1486 {
1487         lockdep_assert_held(&rq->lock);
1488 
1489         WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING);
1490         dequeue_task(rq, p, DEQUEUE_NOCLOCK);
1491         set_task_cpu(p, new_cpu);
1492         rq_unlock(rq, rf);
1493 
1494         rq = cpu_rq(new_cpu);
1495 
1496         rq_lock(rq, rf);
1497         BUG_ON(task_cpu(p) != new_cpu);
1498         enqueue_task(rq, p, 0);
1499         p->on_rq = TASK_ON_RQ_QUEUED;
1500         check_preempt_curr(rq, p, 0);
1501 
1502         return rq;
1503 }
1504 
1505 struct migration_arg {
1506         struct task_struct *task;
1507         int dest_cpu;
1508 };
1509 
1510 
1511 
1512 
1513 
1514 
1515 
1516 
1517 
1518 
1519 static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
1520                                  struct task_struct *p, int dest_cpu)
1521 {
1522         
1523         if (!is_cpu_allowed(p, dest_cpu))
1524                 return rq;
1525 
1526         update_rq_clock(rq);
1527         rq = move_queued_task(rq, rf, p, dest_cpu);
1528 
1529         return rq;
1530 }
1531 
1532 
1533 
1534 
1535 
1536 
1537 static int migration_cpu_stop(void *data)
1538 {
1539         struct migration_arg *arg = data;
1540         struct task_struct *p = arg->task;
1541         struct rq *rq = this_rq();
1542         struct rq_flags rf;
1543 
1544         
1545 
1546 
1547 
1548         local_irq_disable();
1549         
1550 
1551 
1552 
1553 
1554         sched_ttwu_pending();
1555 
1556         raw_spin_lock(&p->pi_lock);
1557         rq_lock(rq, &rf);
1558         
1559 
1560 
1561 
1562 
1563         if (task_rq(p) == rq) {
1564                 if (task_on_rq_queued(p))
1565                         rq = __migrate_task(rq, &rf, p, arg->dest_cpu);
1566                 else
1567                         p->wake_cpu = arg->dest_cpu;
1568         }
1569         rq_unlock(rq, &rf);
1570         raw_spin_unlock(&p->pi_lock);
1571 
1572         local_irq_enable();
1573         return 0;
1574 }
1575 
1576 
1577 
1578 
1579 
1580 void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
1581 {
1582         cpumask_copy(&p->cpus_mask, new_mask);
1583         p->nr_cpus_allowed = cpumask_weight(new_mask);
1584 }
1585 
1586 void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
1587 {
1588         struct rq *rq = task_rq(p);
1589         bool queued, running;
1590 
1591         lockdep_assert_held(&p->pi_lock);
1592 
1593         queued = task_on_rq_queued(p);
1594         running = task_current(rq, p);
1595 
1596         if (queued) {
1597                 
1598 
1599 
1600 
1601                 lockdep_assert_held(&rq->lock);
1602                 dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK);
1603         }
1604         if (running)
1605                 put_prev_task(rq, p);
1606 
1607         p->sched_class->set_cpus_allowed(p, new_mask);
1608 
1609         if (queued)
1610                 enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
1611         if (running)
1612                 set_next_task(rq, p);
1613 }
1614 
1615 
1616 
1617 
1618 
1619 
1620 
1621 
1622 
1623 
1624 static int __set_cpus_allowed_ptr(struct task_struct *p,
1625                                   const struct cpumask *new_mask, bool check)
1626 {
1627         const struct cpumask *cpu_valid_mask = cpu_active_mask;
1628         unsigned int dest_cpu;
1629         struct rq_flags rf;
1630         struct rq *rq;
1631         int ret = 0;
1632 
1633         rq = task_rq_lock(p, &rf);
1634         update_rq_clock(rq);
1635 
1636         if (p->flags & PF_KTHREAD) {
1637                 
1638 
1639 
1640                 cpu_valid_mask = cpu_online_mask;
1641         }
1642 
1643         
1644 
1645 
1646 
1647         if (check && (p->flags & PF_NO_SETAFFINITY)) {
1648                 ret = -EINVAL;
1649                 goto out;
1650         }
1651 
1652         if (cpumask_equal(p->cpus_ptr, new_mask))
1653                 goto out;
1654 
1655         dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
1656         if (dest_cpu >= nr_cpu_ids) {
1657                 ret = -EINVAL;
1658                 goto out;
1659         }
1660 
1661         do_set_cpus_allowed(p, new_mask);
1662 
1663         if (p->flags & PF_KTHREAD) {
1664                 
1665 
1666 
1667 
1668                 WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
1669                         !cpumask_intersects(new_mask, cpu_active_mask) &&
1670                         p->nr_cpus_allowed != 1);
1671         }
1672 
1673         
1674         if (cpumask_test_cpu(task_cpu(p), new_mask))
1675                 goto out;
1676 
1677         if (task_running(rq, p) || p->state == TASK_WAKING) {
1678                 struct migration_arg arg = { p, dest_cpu };
1679                 
1680                 task_rq_unlock(rq, p, &rf);
1681                 stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
1682                 return 0;
1683         } else if (task_on_rq_queued(p)) {
1684                 
1685 
1686 
1687 
1688                 rq = move_queued_task(rq, &rf, p, dest_cpu);
1689         }
1690 out:
1691         task_rq_unlock(rq, p, &rf);
1692 
1693         return ret;
1694 }
1695 
1696 int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
1697 {
1698         return __set_cpus_allowed_ptr(p, new_mask, false);
1699 }
1700 EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
1701 
1702 void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
1703 {
1704 #ifdef CONFIG_SCHED_DEBUG
1705         
1706 
1707 
1708 
1709         WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
1710                         !p->on_rq);
1711 
1712         
1713 
1714 
1715 
1716 
1717         WARN_ON_ONCE(p->state == TASK_RUNNING &&
1718                      p->sched_class == &fair_sched_class &&
1719                      (p->on_rq && !task_on_rq_migrating(p)));
1720 
1721 #ifdef CONFIG_LOCKDEP
1722         
1723 
1724 
1725 
1726 
1727 
1728 
1729 
1730 
1731 
1732         WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
1733                                       lockdep_is_held(&task_rq(p)->lock)));
1734 #endif
1735         
1736 
1737 
1738         WARN_ON_ONCE(!cpu_online(new_cpu));
1739 #endif
1740 
1741         trace_sched_migrate_task(p, new_cpu);
1742 
1743         if (task_cpu(p) != new_cpu) {
1744                 if (p->sched_class->migrate_task_rq)
1745                         p->sched_class->migrate_task_rq(p, new_cpu);
1746                 p->se.nr_migrations++;
1747                 rseq_migrate(p);
1748                 perf_event_task_migrate(p);
1749         }
1750 
1751         __set_task_cpu(p, new_cpu);
1752 }
1753 
1754 #ifdef CONFIG_NUMA_BALANCING
1755 static void __migrate_swap_task(struct task_struct *p, int cpu)
1756 {
1757         if (task_on_rq_queued(p)) {
1758                 struct rq *src_rq, *dst_rq;
1759                 struct rq_flags srf, drf;
1760 
1761                 src_rq = task_rq(p);
1762                 dst_rq = cpu_rq(cpu);
1763 
1764                 rq_pin_lock(src_rq, &srf);
1765                 rq_pin_lock(dst_rq, &drf);
1766 
1767                 deactivate_task(src_rq, p, 0);
1768                 set_task_cpu(p, cpu);
1769                 activate_task(dst_rq, p, 0);
1770                 check_preempt_curr(dst_rq, p, 0);
1771 
1772                 rq_unpin_lock(dst_rq, &drf);
1773                 rq_unpin_lock(src_rq, &srf);
1774 
1775         } else {
1776                 
1777 
1778 
1779 
1780 
1781                 p->wake_cpu = cpu;
1782         }
1783 }
1784 
1785 struct migration_swap_arg {
1786         struct task_struct *src_task, *dst_task;
1787         int src_cpu, dst_cpu;
1788 };
1789 
1790 static int migrate_swap_stop(void *data)
1791 {
1792         struct migration_swap_arg *arg = data;
1793         struct rq *src_rq, *dst_rq;
1794         int ret = -EAGAIN;
1795 
1796         if (!cpu_active(arg->src_cpu) || !cpu_active(arg->dst_cpu))
1797                 return -EAGAIN;
1798 
1799         src_rq = cpu_rq(arg->src_cpu);
1800         dst_rq = cpu_rq(arg->dst_cpu);
1801 
1802         double_raw_lock(&arg->src_task->pi_lock,
1803                         &arg->dst_task->pi_lock);
1804         double_rq_lock(src_rq, dst_rq);
1805 
1806         if (task_cpu(arg->dst_task) != arg->dst_cpu)
1807                 goto unlock;
1808 
1809         if (task_cpu(arg->src_task) != arg->src_cpu)
1810                 goto unlock;
1811 
1812         if (!cpumask_test_cpu(arg->dst_cpu, arg->src_task->cpus_ptr))
1813                 goto unlock;
1814 
1815         if (!cpumask_test_cpu(arg->src_cpu, arg->dst_task->cpus_ptr))
1816                 goto unlock;
1817 
1818         __migrate_swap_task(arg->src_task, arg->dst_cpu);
1819         __migrate_swap_task(arg->dst_task, arg->src_cpu);
1820 
1821         ret = 0;
1822 
1823 unlock:
1824         double_rq_unlock(src_rq, dst_rq);
1825         raw_spin_unlock(&arg->dst_task->pi_lock);
1826         raw_spin_unlock(&arg->src_task->pi_lock);
1827 
1828         return ret;
1829 }
1830 
1831 
1832 
1833 
1834 int migrate_swap(struct task_struct *cur, struct task_struct *p,
1835                 int target_cpu, int curr_cpu)
1836 {
1837         struct migration_swap_arg arg;
1838         int ret = -EINVAL;
1839 
1840         arg = (struct migration_swap_arg){
1841                 .src_task = cur,
1842                 .src_cpu = curr_cpu,
1843                 .dst_task = p,
1844                 .dst_cpu = target_cpu,
1845         };
1846 
1847         if (arg.src_cpu == arg.dst_cpu)
1848                 goto out;
1849 
1850         
1851 
1852 
1853 
1854         if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu))
1855                 goto out;
1856 
1857         if (!cpumask_test_cpu(arg.dst_cpu, arg.src_task->cpus_ptr))
1858                 goto out;
1859 
1860         if (!cpumask_test_cpu(arg.src_cpu, arg.dst_task->cpus_ptr))
1861                 goto out;
1862 
1863         trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu);
1864         ret = stop_two_cpus(arg.dst_cpu, arg.src_cpu, migrate_swap_stop, &arg);
1865 
1866 out:
1867         return ret;
1868 }
1869 #endif 
1870 
1871 
1872 
1873 
1874 
1875 
1876 
1877 
1878 
1879 
1880 
1881 
1882 
1883 
1884 
1885 
1886 
1887 unsigned long wait_task_inactive(struct task_struct *p, long match_state)
1888 {
1889         int running, queued;
1890         struct rq_flags rf;
1891         unsigned long ncsw;
1892         struct rq *rq;
1893 
1894         for (;;) {
1895                 
1896 
1897 
1898 
1899 
1900 
1901                 rq = task_rq(p);
1902 
1903                 
1904 
1905 
1906 
1907 
1908 
1909 
1910 
1911 
1912 
1913 
1914                 while (task_running(rq, p)) {
1915                         if (match_state && unlikely(p->state != match_state))
1916                                 return 0;
1917                         cpu_relax();
1918                 }
1919 
1920                 
1921 
1922 
1923 
1924 
1925                 rq = task_rq_lock(p, &rf);
1926                 trace_sched_wait_task(p);
1927                 running = task_running(rq, p);
1928                 queued = task_on_rq_queued(p);
1929                 ncsw = 0;
1930                 if (!match_state || p->state == match_state)
1931                         ncsw = p->nvcsw | LONG_MIN; 
1932                 task_rq_unlock(rq, p, &rf);
1933 
1934                 
1935 
1936 
1937                 if (unlikely(!ncsw))
1938                         break;
1939 
1940                 
1941 
1942 
1943 
1944 
1945 
1946                 if (unlikely(running)) {
1947                         cpu_relax();
1948                         continue;
1949                 }
1950 
1951                 
1952 
1953 
1954 
1955 
1956 
1957 
1958 
1959 
1960                 if (unlikely(queued)) {
1961                         ktime_t to = NSEC_PER_SEC / HZ;
1962 
1963                         set_current_state(TASK_UNINTERRUPTIBLE);
1964                         schedule_hrtimeout(&to, HRTIMER_MODE_REL);
1965                         continue;
1966                 }
1967 
1968                 
1969 
1970 
1971 
1972 
1973                 break;
1974         }
1975 
1976         return ncsw;
1977 }
1978 
1979 
1980 
1981 
1982 
1983 
1984 
1985 
1986 
1987 
1988 
1989 
1990 
1991 
1992 void kick_process(struct task_struct *p)
1993 {
1994         int cpu;
1995 
1996         preempt_disable();
1997         cpu = task_cpu(p);
1998         if ((cpu != smp_processor_id()) && task_curr(p))
1999                 smp_send_reschedule(cpu);
2000         preempt_enable();
2001 }
2002 EXPORT_SYMBOL_GPL(kick_process);
2003 
2004 
2005 
2006 
2007 
2008 
2009 
2010 
2011 
2012 
2013 
2014 
2015 
2016 
2017 
2018 
2019 
2020 
2021 
2022 
2023 
2024 
2025 
2026 static int select_fallback_rq(int cpu, struct task_struct *p)
2027 {
2028         int nid = cpu_to_node(cpu);
2029         const struct cpumask *nodemask = NULL;
2030         enum { cpuset, possible, fail } state = cpuset;
2031         int dest_cpu;
2032 
2033         
2034 
2035 
2036 
2037 
2038         if (nid != -1) {
2039                 nodemask = cpumask_of_node(nid);
2040 
2041                 
2042                 for_each_cpu(dest_cpu, nodemask) {
2043                         if (!cpu_active(dest_cpu))
2044                                 continue;
2045                         if (cpumask_test_cpu(dest_cpu, p->cpus_ptr))
2046                                 return dest_cpu;
2047                 }
2048         }
2049 
2050         for (;;) {
2051                 
2052                 for_each_cpu(dest_cpu, p->cpus_ptr) {
2053                         if (!is_cpu_allowed(p, dest_cpu))
2054                                 continue;
2055 
2056                         goto out;
2057                 }
2058 
2059                 
2060                 switch (state) {
2061                 case cpuset:
2062                         if (IS_ENABLED(CONFIG_CPUSETS)) {
2063                                 cpuset_cpus_allowed_fallback(p);
2064                                 state = possible;
2065                                 break;
2066                         }
2067                         
2068                 case possible:
2069                         do_set_cpus_allowed(p, cpu_possible_mask);
2070                         state = fail;
2071                         break;
2072 
2073                 case fail:
2074                         BUG();
2075                         break;
2076                 }
2077         }
2078 
2079 out:
2080         if (state != cpuset) {
2081                 
2082 
2083 
2084 
2085 
2086                 if (p->mm && printk_ratelimit()) {
2087                         printk_deferred("process %d (%s) no longer affine to cpu%d\n",
2088                                         task_pid_nr(p), p->comm, cpu);
2089                 }
2090         }
2091 
2092         return dest_cpu;
2093 }
2094 
2095 
2096 
2097 
2098 static inline
2099 int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
2100 {
2101         lockdep_assert_held(&p->pi_lock);
2102 
2103         if (p->nr_cpus_allowed > 1)
2104                 cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
2105         else
2106                 cpu = cpumask_any(p->cpus_ptr);
2107 
2108         
2109 
2110 
2111 
2112 
2113 
2114 
2115 
2116 
2117 
2118         if (unlikely(!is_cpu_allowed(p, cpu)))
2119                 cpu = select_fallback_rq(task_cpu(p), p);
2120 
2121         return cpu;
2122 }
2123 
2124 static void update_avg(u64 *avg, u64 sample)
2125 {
2126         s64 diff = sample - *avg;
2127         *avg += diff >> 3;
2128 }
2129 
2130 void sched_set_stop_task(int cpu, struct task_struct *stop)
2131 {
2132         struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
2133         struct task_struct *old_stop = cpu_rq(cpu)->stop;
2134 
2135         if (stop) {
2136                 
2137 
2138 
2139 
2140 
2141 
2142 
2143 
2144                 sched_setscheduler_nocheck(stop, SCHED_FIFO, ¶m);
2145 
2146                 stop->sched_class = &stop_sched_class;
2147         }
2148 
2149         cpu_rq(cpu)->stop = stop;
2150 
2151         if (old_stop) {
2152                 
2153 
2154 
2155 
2156                 old_stop->sched_class = &rt_sched_class;
2157         }
2158 }
2159 
2160 #else
2161 
2162 static inline int __set_cpus_allowed_ptr(struct task_struct *p,
2163                                          const struct cpumask *new_mask, bool check)
2164 {
2165         return set_cpus_allowed_ptr(p, new_mask);
2166 }
2167 
2168 #endif 
2169 
2170 static void
2171 ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
2172 {
2173         struct rq *rq;
2174 
2175         if (!schedstat_enabled())
2176                 return;
2177 
2178         rq = this_rq();
2179 
2180 #ifdef CONFIG_SMP
2181         if (cpu == rq->cpu) {
2182                 __schedstat_inc(rq->ttwu_local);
2183                 __schedstat_inc(p->se.statistics.nr_wakeups_local);
2184         } else {
2185                 struct sched_domain *sd;
2186 
2187                 __schedstat_inc(p->se.statistics.nr_wakeups_remote);
2188                 rcu_read_lock();
2189                 for_each_domain(rq->cpu, sd) {
2190                         if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
2191                                 __schedstat_inc(sd->ttwu_wake_remote);
2192                                 break;
2193                         }
2194                 }
2195                 rcu_read_unlock();
2196         }
2197 
2198         if (wake_flags & WF_MIGRATED)
2199                 __schedstat_inc(p->se.statistics.nr_wakeups_migrate);
2200 #endif 
2201 
2202         __schedstat_inc(rq->ttwu_count);
2203         __schedstat_inc(p->se.statistics.nr_wakeups);
2204 
2205         if (wake_flags & WF_SYNC)
2206                 __schedstat_inc(p->se.statistics.nr_wakeups_sync);
2207 }
2208 
2209 
2210 
2211 
2212 static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags,
2213                            struct rq_flags *rf)
2214 {
2215         check_preempt_curr(rq, p, wake_flags);
2216         p->state = TASK_RUNNING;
2217         trace_sched_wakeup(p);
2218 
2219 #ifdef CONFIG_SMP
2220         if (p->sched_class->task_woken) {
2221                 
2222 
2223 
2224 
2225                 rq_unpin_lock(rq, rf);
2226                 p->sched_class->task_woken(rq, p);
2227                 rq_repin_lock(rq, rf);
2228         }
2229 
2230         if (rq->idle_stamp) {
2231                 u64 delta = rq_clock(rq) - rq->idle_stamp;
2232                 u64 max = 2*rq->max_idle_balance_cost;
2233 
2234                 update_avg(&rq->avg_idle, delta);
2235 
2236                 if (rq->avg_idle > max)
2237                         rq->avg_idle = max;
2238 
2239                 rq->idle_stamp = 0;
2240         }
2241 #endif
2242 }
2243 
2244 static void
2245 ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
2246                  struct rq_flags *rf)
2247 {
2248         int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK;
2249 
2250         lockdep_assert_held(&rq->lock);
2251 
2252 #ifdef CONFIG_SMP
2253         if (p->sched_contributes_to_load)
2254                 rq->nr_uninterruptible--;
2255 
2256         if (wake_flags & WF_MIGRATED)
2257                 en_flags |= ENQUEUE_MIGRATED;
2258 #endif
2259 
2260         activate_task(rq, p, en_flags);
2261         ttwu_do_wakeup(rq, p, wake_flags, rf);
2262 }
2263 
2264 
2265 
2266 
2267 
2268 
2269 
2270 static int ttwu_remote(struct task_struct *p, int wake_flags)
2271 {
2272         struct rq_flags rf;
2273         struct rq *rq;
2274         int ret = 0;
2275 
2276         rq = __task_rq_lock(p, &rf);
2277         if (task_on_rq_queued(p)) {
2278                 
2279                 update_rq_clock(rq);
2280                 ttwu_do_wakeup(rq, p, wake_flags, &rf);
2281                 ret = 1;
2282         }
2283         __task_rq_unlock(rq, &rf);
2284 
2285         return ret;
2286 }
2287 
2288 #ifdef CONFIG_SMP
2289 void sched_ttwu_pending(void)
2290 {
2291         struct rq *rq = this_rq();
2292         struct llist_node *llist = llist_del_all(&rq->wake_list);
2293         struct task_struct *p, *t;
2294         struct rq_flags rf;
2295 
2296         if (!llist)
2297                 return;
2298 
2299         rq_lock_irqsave(rq, &rf);
2300         update_rq_clock(rq);
2301 
2302         llist_for_each_entry_safe(p, t, llist, wake_entry)
2303                 ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0, &rf);
2304 
2305         rq_unlock_irqrestore(rq, &rf);
2306 }
2307 
2308 void scheduler_ipi(void)
2309 {
2310         
2311 
2312 
2313 
2314 
2315         preempt_fold_need_resched();
2316 
2317         if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick())
2318                 return;
2319 
2320         
2321 
2322 
2323 
2324 
2325 
2326 
2327 
2328 
2329 
2330 
2331 
2332 
2333         irq_enter();
2334         sched_ttwu_pending();
2335 
2336         
2337 
2338 
2339         if (unlikely(got_nohz_idle_kick())) {
2340                 this_rq()->idle_balance = 1;
2341                 raise_softirq_irqoff(SCHED_SOFTIRQ);
2342         }
2343         irq_exit();
2344 }
2345 
2346 static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags)
2347 {
2348         struct rq *rq = cpu_rq(cpu);
2349 
2350         p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED);
2351 
2352         if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) {
2353                 if (!set_nr_if_polling(rq->idle))
2354                         smp_send_reschedule(cpu);
2355                 else
2356                         trace_sched_wake_idle_without_ipi(cpu);
2357         }
2358 }
2359 
2360 void wake_up_if_idle(int cpu)
2361 {
2362         struct rq *rq = cpu_rq(cpu);
2363         struct rq_flags rf;
2364 
2365         rcu_read_lock();
2366 
2367         if (!is_idle_task(rcu_dereference(rq->curr)))
2368                 goto out;
2369 
2370         if (set_nr_if_polling(rq->idle)) {
2371                 trace_sched_wake_idle_without_ipi(cpu);
2372         } else {
2373                 rq_lock_irqsave(rq, &rf);
2374                 if (is_idle_task(rq->curr))
2375                         smp_send_reschedule(cpu);
2376                 
2377                 rq_unlock_irqrestore(rq, &rf);
2378         }
2379 
2380 out:
2381         rcu_read_unlock();
2382 }
2383 
2384 bool cpus_share_cache(int this_cpu, int that_cpu)
2385 {
2386         return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
2387 }
2388 #endif 
2389 
2390 static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
2391 {
2392         struct rq *rq = cpu_rq(cpu);
2393         struct rq_flags rf;
2394 
2395 #if defined(CONFIG_SMP)
2396         if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) {
2397                 sched_clock_cpu(cpu); 
2398                 ttwu_queue_remote(p, cpu, wake_flags);
2399                 return;
2400         }
2401 #endif
2402 
2403         rq_lock(rq, &rf);
2404         update_rq_clock(rq);
2405         ttwu_do_activate(rq, p, wake_flags, &rf);
2406         rq_unlock(rq, &rf);
2407 }
2408 
2409 
2410 
2411 
2412 
2413 
2414 
2415 
2416 
2417 
2418 
2419 
2420 
2421 
2422 
2423 
2424 
2425 
2426 
2427 
2428 
2429 
2430 
2431 
2432 
2433 
2434 
2435 
2436 
2437 
2438 
2439 
2440 
2441 
2442 
2443 
2444 
2445 
2446 
2447 
2448 
2449 
2450 
2451 
2452 
2453 
2454 
2455 
2456 
2457 
2458 
2459 
2460 
2461 
2462 
2463 
2464 
2465 
2466 
2467 
2468 
2469 
2470 
2471 
2472 
2473 
2474 
2475 
2476 
2477 
2478 
2479 
2480 
2481 
2482 
2483 
2484 
2485 
2486 
2487 
2488 
2489 
2490 
2491 
2492 
2493 
2494 
2495 
2496 
2497 
2498 
2499 
2500 
2501 
2502 
2503 
2504 
2505 
2506 
2507 
2508 
2509 
2510 
2511 static int
2512 try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
2513 {
2514         unsigned long flags;
2515         int cpu, success = 0;
2516 
2517         preempt_disable();
2518         if (p == current) {
2519                 
2520 
2521 
2522 
2523 
2524 
2525 
2526 
2527 
2528 
2529 
2530                 if (!(p->state & state))
2531                         goto out;
2532 
2533                 success = 1;
2534                 cpu = task_cpu(p);
2535                 trace_sched_waking(p);
2536                 p->state = TASK_RUNNING;
2537                 trace_sched_wakeup(p);
2538                 goto out;
2539         }
2540 
2541         
2542 
2543 
2544 
2545 
2546 
2547         raw_spin_lock_irqsave(&p->pi_lock, flags);
2548         smp_mb__after_spinlock();
2549         if (!(p->state & state))
2550                 goto unlock;
2551 
2552         trace_sched_waking(p);
2553 
2554         
2555         success = 1;
2556         cpu = task_cpu(p);
2557 
2558         
2559 
2560 
2561 
2562 
2563 
2564 
2565 
2566 
2567 
2568 
2569 
2570 
2571 
2572 
2573 
2574 
2575 
2576 
2577 
2578         smp_rmb();
2579         if (p->on_rq && ttwu_remote(p, wake_flags))
2580                 goto unlock;
2581 
2582 #ifdef CONFIG_SMP
2583         
2584 
2585 
2586 
2587 
2588 
2589 
2590 
2591 
2592 
2593 
2594 
2595 
2596 
2597 
2598 
2599 
2600 
2601 
2602         smp_rmb();
2603 
2604         
2605 
2606 
2607 
2608 
2609 
2610 
2611 
2612 
2613         smp_cond_load_acquire(&p->on_cpu, !VAL);
2614 
2615         p->sched_contributes_to_load = !!task_contributes_to_load(p);
2616         p->state = TASK_WAKING;
2617 
2618         if (p->in_iowait) {
2619                 delayacct_blkio_end(p);
2620                 atomic_dec(&task_rq(p)->nr_iowait);
2621         }
2622 
2623         cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
2624         if (task_cpu(p) != cpu) {
2625                 wake_flags |= WF_MIGRATED;
2626                 psi_ttwu_dequeue(p);
2627                 set_task_cpu(p, cpu);
2628         }
2629 
2630 #else 
2631 
2632         if (p->in_iowait) {
2633                 delayacct_blkio_end(p);
2634                 atomic_dec(&task_rq(p)->nr_iowait);
2635         }
2636 
2637 #endif 
2638 
2639         ttwu_queue(p, cpu, wake_flags);
2640 unlock:
2641         raw_spin_unlock_irqrestore(&p->pi_lock, flags);
2642 out:
2643         if (success)
2644                 ttwu_stat(p, cpu, wake_flags);
2645         preempt_enable();
2646 
2647         return success;
2648 }
2649 
2650 
2651 
2652 
2653 
2654 
2655 
2656 
2657 
2658 
2659 
2660 
2661 int wake_up_process(struct task_struct *p)
2662 {
2663         return try_to_wake_up(p, TASK_NORMAL, 0);
2664 }
2665 EXPORT_SYMBOL(wake_up_process);
2666 
2667 int wake_up_state(struct task_struct *p, unsigned int state)
2668 {
2669         return try_to_wake_up(p, state, 0);
2670 }
2671 
2672 
2673 
2674 
2675 
2676 
2677 
2678 static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
2679 {
2680         p->on_rq                        = 0;
2681 
2682         p->se.on_rq                     = 0;
2683         p->se.exec_start                = 0;
2684         p->se.sum_exec_runtime          = 0;
2685         p->se.prev_sum_exec_runtime     = 0;
2686         p->se.nr_migrations             = 0;
2687         p->se.vruntime                  = 0;
2688         INIT_LIST_HEAD(&p->se.group_node);
2689 
2690 #ifdef CONFIG_FAIR_GROUP_SCHED
2691         p->se.cfs_rq                    = NULL;
2692 #endif
2693 
2694 #ifdef CONFIG_SCHEDSTATS
2695         
2696         memset(&p->se.statistics, 0, sizeof(p->se.statistics));
2697 #endif
2698 
2699         RB_CLEAR_NODE(&p->dl.rb_node);
2700         init_dl_task_timer(&p->dl);
2701         init_dl_inactive_task_timer(&p->dl);
2702         __dl_clear_params(p);
2703 
2704         INIT_LIST_HEAD(&p->rt.run_list);
2705         p->rt.timeout           = 0;
2706         p->rt.time_slice        = sched_rr_timeslice;
2707         p->rt.on_rq             = 0;
2708         p->rt.on_list           = 0;
2709 
2710 #ifdef CONFIG_PREEMPT_NOTIFIERS
2711         INIT_HLIST_HEAD(&p->preempt_notifiers);
2712 #endif
2713 
2714 #ifdef CONFIG_COMPACTION
2715         p->capture_control = NULL;
2716 #endif
2717         init_numa_balancing(clone_flags, p);
2718 }
2719 
2720 DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
2721 
2722 #ifdef CONFIG_NUMA_BALANCING
2723 
2724 void set_numabalancing_state(bool enabled)
2725 {
2726         if (enabled)
2727                 static_branch_enable(&sched_numa_balancing);
2728         else
2729                 static_branch_disable(&sched_numa_balancing);
2730 }
2731 
2732 #ifdef CONFIG_PROC_SYSCTL
2733 int sysctl_numa_balancing(struct ctl_table *table, int write,
2734                          void __user *buffer, size_t *lenp, loff_t *ppos)
2735 {
2736         struct ctl_table t;
2737         int err;
2738         int state = static_branch_likely(&sched_numa_balancing);
2739 
2740         if (write && !capable(CAP_SYS_ADMIN))
2741                 return -EPERM;
2742 
2743         t = *table;
2744         t.data = &state;
2745         err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
2746         if (err < 0)
2747                 return err;
2748         if (write)
2749                 set_numabalancing_state(state);
2750         return err;
2751 }
2752 #endif
2753 #endif
2754 
2755 #ifdef CONFIG_SCHEDSTATS
2756 
2757 DEFINE_STATIC_KEY_FALSE(sched_schedstats);
2758 static bool __initdata __sched_schedstats = false;
2759 
2760 static void set_schedstats(bool enabled)
2761 {
2762         if (enabled)
2763                 static_branch_enable(&sched_schedstats);
2764         else
2765                 static_branch_disable(&sched_schedstats);
2766 }
2767 
2768 void force_schedstat_enabled(void)
2769 {
2770         if (!schedstat_enabled()) {
2771                 pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n");
2772                 static_branch_enable(&sched_schedstats);
2773         }
2774 }
2775 
2776 static int __init setup_schedstats(char *str)
2777 {
2778         int ret = 0;
2779         if (!str)
2780                 goto out;
2781 
2782         
2783 
2784 
2785 
2786 
2787         if (!strcmp(str, "enable")) {
2788                 __sched_schedstats = true;
2789                 ret = 1;
2790         } else if (!strcmp(str, "disable")) {
2791                 __sched_schedstats = false;
2792                 ret = 1;
2793         }
2794 out:
2795         if (!ret)
2796                 pr_warn("Unable to parse schedstats=\n");
2797 
2798         return ret;
2799 }
2800 __setup("schedstats=", setup_schedstats);
2801 
2802 static void __init init_schedstats(void)
2803 {
2804         set_schedstats(__sched_schedstats);
2805 }
2806 
2807 #ifdef CONFIG_PROC_SYSCTL
2808 int sysctl_schedstats(struct ctl_table *table, int write,
2809                          void __user *buffer, size_t *lenp, loff_t *ppos)
2810 {
2811         struct ctl_table t;
2812         int err;
2813         int state = static_branch_likely(&sched_schedstats);
2814 
2815         if (write && !capable(CAP_SYS_ADMIN))
2816                 return -EPERM;
2817 
2818         t = *table;
2819         t.data = &state;
2820         err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
2821         if (err < 0)
2822                 return err;
2823         if (write)
2824                 set_schedstats(state);
2825         return err;
2826 }
2827 #endif 
2828 #else  
2829 static inline void init_schedstats(void) {}
2830 #endif 
2831 
2832 
2833 
2834 
2835 int sched_fork(unsigned long clone_flags, struct task_struct *p)
2836 {
2837         unsigned long flags;
2838 
2839         __sched_fork(clone_flags, p);
2840         
2841 
2842 
2843 
2844 
2845         p->state = TASK_NEW;
2846 
2847         
2848 
2849 
2850         p->prio = current->normal_prio;
2851 
2852         uclamp_fork(p);
2853 
2854         
2855 
2856 
2857         if (unlikely(p->sched_reset_on_fork)) {
2858                 if (task_has_dl_policy(p) || task_has_rt_policy(p)) {
2859                         p->policy = SCHED_NORMAL;
2860                         p->static_prio = NICE_TO_PRIO(0);
2861                         p->rt_priority = 0;
2862                 } else if (PRIO_TO_NICE(p->static_prio) < 0)
2863                         p->static_prio = NICE_TO_PRIO(0);
2864 
2865                 p->prio = p->normal_prio = __normal_prio(p);
2866                 set_load_weight(p, false);
2867 
2868                 
2869 
2870 
2871 
2872                 p->sched_reset_on_fork = 0;
2873         }
2874 
2875         if (dl_prio(p->prio))
2876                 return -EAGAIN;
2877         else if (rt_prio(p->prio))
2878                 p->sched_class = &rt_sched_class;
2879         else
2880                 p->sched_class = &fair_sched_class;
2881 
2882         init_entity_runnable_average(&p->se);
2883 
2884         
2885 
2886 
2887 
2888 
2889 
2890 
2891         raw_spin_lock_irqsave(&p->pi_lock, flags);
2892         
2893 
2894 
2895 
2896         __set_task_cpu(p, smp_processor_id());
2897         if (p->sched_class->task_fork)
2898                 p->sched_class->task_fork(p);
2899         raw_spin_unlock_irqrestore(&p->pi_lock, flags);
2900 
2901 #ifdef CONFIG_SCHED_INFO
2902         if (likely(sched_info_on()))
2903                 memset(&p->sched_info, 0, sizeof(p->sched_info));
2904 #endif
2905 #if defined(CONFIG_SMP)
2906         p->on_cpu = 0;
2907 #endif
2908         init_task_preempt_count(p);
2909 #ifdef CONFIG_SMP
2910         plist_node_init(&p->pushable_tasks, MAX_PRIO);
2911         RB_CLEAR_NODE(&p->pushable_dl_tasks);
2912 #endif
2913         return 0;
2914 }
2915 
2916 unsigned long to_ratio(u64 period, u64 runtime)
2917 {
2918         if (runtime == RUNTIME_INF)
2919                 return BW_UNIT;
2920 
2921         
2922 
2923 
2924 
2925 
2926         if (period == 0)
2927                 return 0;
2928 
2929         return div64_u64(runtime << BW_SHIFT, period);
2930 }
2931 
2932 
2933 
2934 
2935 
2936 
2937 
2938 
2939 void wake_up_new_task(struct task_struct *p)
2940 {
2941         struct rq_flags rf;
2942         struct rq *rq;
2943 
2944         raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
2945         p->state = TASK_RUNNING;
2946 #ifdef CONFIG_SMP
2947         
2948 
2949 
2950 
2951 
2952 
2953 
2954 
2955         p->recent_used_cpu = task_cpu(p);
2956         __set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
2957 #endif
2958         rq = __task_rq_lock(p, &rf);
2959         update_rq_clock(rq);
2960         post_init_entity_util_avg(p);
2961 
2962         activate_task(rq, p, ENQUEUE_NOCLOCK);
2963         trace_sched_wakeup_new(p);
2964         check_preempt_curr(rq, p, WF_FORK);
2965 #ifdef CONFIG_SMP
2966         if (p->sched_class->task_woken) {
2967                 
2968 
2969 
2970 
2971                 rq_unpin_lock(rq, &rf);
2972                 p->sched_class->task_woken(rq, p);
2973                 rq_repin_lock(rq, &rf);
2974         }
2975 #endif
2976         task_rq_unlock(rq, p, &rf);
2977 }
2978 
2979 #ifdef CONFIG_PREEMPT_NOTIFIERS
2980 
2981 static DEFINE_STATIC_KEY_FALSE(preempt_notifier_key);
2982 
2983 void preempt_notifier_inc(void)
2984 {
2985         static_branch_inc(&preempt_notifier_key);
2986 }
2987 EXPORT_SYMBOL_GPL(preempt_notifier_inc);
2988 
2989 void preempt_notifier_dec(void)
2990 {
2991         static_branch_dec(&preempt_notifier_key);
2992 }
2993 EXPORT_SYMBOL_GPL(preempt_notifier_dec);
2994 
2995 
2996 
2997 
2998 
2999 void preempt_notifier_register(struct preempt_notifier *notifier)
3000 {
3001         if (!static_branch_unlikely(&preempt_notifier_key))
3002                 WARN(1, "registering preempt_notifier while notifiers disabled\n");
3003 
3004         hlist_add_head(¬ifier->link, ¤t->preempt_notifiers);
3005 }
3006 EXPORT_SYMBOL_GPL(preempt_notifier_register);
3007 
3008 
3009 
3010 
3011 
3012 
3013 
3014 void preempt_notifier_unregister(struct preempt_notifier *notifier)
3015 {
3016         hlist_del(¬ifier->link);
3017 }
3018 EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
3019 
3020 static void __fire_sched_in_preempt_notifiers(struct task_struct *curr)
3021 {
3022         struct preempt_notifier *notifier;
3023 
3024         hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
3025                 notifier->ops->sched_in(notifier, raw_smp_processor_id());
3026 }
3027 
3028 static __always_inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
3029 {
3030         if (static_branch_unlikely(&preempt_notifier_key))
3031                 __fire_sched_in_preempt_notifiers(curr);
3032 }
3033 
3034 static void
3035 __fire_sched_out_preempt_notifiers(struct task_struct *curr,
3036                                    struct task_struct *next)
3037 {
3038         struct preempt_notifier *notifier;
3039 
3040         hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
3041                 notifier->ops->sched_out(notifier, next);
3042 }
3043 
3044 static __always_inline void
3045 fire_sched_out_preempt_notifiers(struct task_struct *curr,
3046                                  struct task_struct *next)
3047 {
3048         if (static_branch_unlikely(&preempt_notifier_key))
3049                 __fire_sched_out_preempt_notifiers(curr, next);
3050 }
3051 
3052 #else 
3053 
3054 static inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
3055 {
3056 }
3057 
3058 static inline void
3059 fire_sched_out_preempt_notifiers(struct task_struct *curr,
3060                                  struct task_struct *next)
3061 {
3062 }
3063 
3064 #endif 
3065 
3066 static inline void prepare_task(struct task_struct *next)
3067 {
3068 #ifdef CONFIG_SMP
3069         
3070 
3071 
3072 
3073         next->on_cpu = 1;
3074 #endif
3075 }
3076 
3077 static inline void finish_task(struct task_struct *prev)
3078 {
3079 #ifdef CONFIG_SMP
3080         
3081 
3082 
3083 
3084 
3085 
3086 
3087 
3088 
3089 
3090         smp_store_release(&prev->on_cpu, 0);
3091 #endif
3092 }
3093 
3094 static inline void
3095 prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf)
3096 {
3097         
3098 
3099 
3100 
3101 
3102 
3103         rq_unpin_lock(rq, rf);
3104         spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
3105 #ifdef CONFIG_DEBUG_SPINLOCK
3106         
3107         rq->lock.owner = next;
3108 #endif
3109 }
3110 
3111 static inline void finish_lock_switch(struct rq *rq)
3112 {
3113         
3114 
3115 
3116 
3117 
3118         spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
3119         raw_spin_unlock_irq(&rq->lock);
3120 }
3121 
3122 
3123 
3124 
3125 
3126 #ifndef prepare_arch_switch
3127 # define prepare_arch_switch(next)      do { } while (0)
3128 #endif
3129 
3130 #ifndef finish_arch_post_lock_switch
3131 # define finish_arch_post_lock_switch() do { } while (0)
3132 #endif
3133 
3134 
3135 
3136 
3137 
3138 
3139 
3140 
3141 
3142 
3143 
3144 
3145 
3146 
3147 static inline void
3148 prepare_task_switch(struct rq *rq, struct task_struct *prev,
3149                     struct task_struct *next)
3150 {
3151         kcov_prepare_switch(prev);
3152         sched_info_switch(rq, prev, next);
3153         perf_event_task_sched_out(prev, next);
3154         rseq_preempt(prev);
3155         fire_sched_out_preempt_notifiers(prev, next);
3156         prepare_task(next);
3157         prepare_arch_switch(next);
3158 }
3159 
3160 
3161 
3162 
3163 
3164 
3165 
3166 
3167 
3168 
3169 
3170 
3171 
3172 
3173 
3174 
3175 
3176 
3177 
3178 
3179 static struct rq *finish_task_switch(struct task_struct *prev)
3180         __releases(rq->lock)
3181 {
3182         struct rq *rq = this_rq();
3183         struct mm_struct *mm = rq->prev_mm;
3184         long prev_state;
3185 
3186         
3187 
3188 
3189 
3190 
3191 
3192 
3193 
3194 
3195 
3196 
3197         if (WARN_ONCE(preempt_count() != 2*PREEMPT_DISABLE_OFFSET,
3198                       "corrupted preempt_count: %s/%d/0x%x\n",
3199                       current->comm, current->pid, preempt_count()))
3200                 preempt_count_set(FORK_PREEMPT_COUNT);
3201 
3202         rq->prev_mm = NULL;
3203 
3204         
3205 
3206 
3207 
3208 
3209 
3210 
3211 
3212 
3213 
3214 
3215         prev_state = prev->state;
3216         vtime_task_switch(prev);
3217         perf_event_task_sched_in(prev, current);
3218         finish_task(prev);
3219         finish_lock_switch(rq);
3220         finish_arch_post_lock_switch();
3221         kcov_finish_switch(current);
3222 
3223         fire_sched_in_preempt_notifiers(current);
3224         
3225 
3226 
3227 
3228 
3229 
3230 
3231 
3232 
3233 
3234 
3235 
3236         if (mm) {
3237                 membarrier_mm_sync_core_before_usermode(mm);
3238                 mmdrop(mm);
3239         }
3240         if (unlikely(prev_state == TASK_DEAD)) {
3241                 if (prev->sched_class->task_dead)
3242                         prev->sched_class->task_dead(prev);
3243 
3244                 
3245 
3246 
3247 
3248                 kprobe_flush_task(prev);
3249 
3250                 
3251                 put_task_stack(prev);
3252 
3253                 put_task_struct_rcu_user(prev);
3254         }
3255 
3256         tick_nohz_task_switch();
3257         return rq;
3258 }
3259 
3260 #ifdef CONFIG_SMP
3261 
3262 
3263 static void __balance_callback(struct rq *rq)
3264 {
3265         struct callback_head *head, *next;
3266         void (*func)(struct rq *rq);
3267         unsigned long flags;
3268 
3269         raw_spin_lock_irqsave(&rq->lock, flags);
3270         head = rq->balance_callback;
3271         rq->balance_callback = NULL;
3272         while (head) {
3273                 func = (void (*)(struct rq *))head->func;
3274                 next = head->next;
3275                 head->next = NULL;
3276                 head = next;
3277 
3278                 func(rq);
3279         }
3280         raw_spin_unlock_irqrestore(&rq->lock, flags);
3281 }
3282 
3283 static inline void balance_callback(struct rq *rq)
3284 {
3285         if (unlikely(rq->balance_callback))
3286                 __balance_callback(rq);
3287 }
3288 
3289 #else
3290 
3291 static inline void balance_callback(struct rq *rq)
3292 {
3293 }
3294 
3295 #endif
3296 
3297 
3298 
3299 
3300 
3301 asmlinkage __visible void schedule_tail(struct task_struct *prev)
3302         __releases(rq->lock)
3303 {
3304         struct rq *rq;
3305 
3306         
3307 
3308 
3309 
3310 
3311 
3312 
3313 
3314 
3315         rq = finish_task_switch(prev);
3316         balance_callback(rq);
3317         preempt_enable();
3318 
3319         if (current->set_child_tid)
3320                 put_user(task_pid_vnr(current), current->set_child_tid);
3321 
3322         calculate_sigpending();
3323 }
3324 
3325 
3326 
3327 
3328 static __always_inline struct rq *
3329 context_switch(struct rq *rq, struct task_struct *prev,
3330                struct task_struct *next, struct rq_flags *rf)
3331 {
3332         prepare_task_switch(rq, prev, next);
3333 
3334         
3335 
3336 
3337 
3338 
3339         arch_start_context_switch(prev);
3340 
3341         
3342 
3343 
3344 
3345 
3346 
3347 
3348         if (!next->mm) {                                
3349                 enter_lazy_tlb(prev->active_mm, next);
3350 
3351                 next->active_mm = prev->active_mm;
3352                 if (prev->mm)                           
3353                         mmgrab(prev->active_mm);
3354                 else
3355                         prev->active_mm = NULL;
3356         } else {                                        
3357                 membarrier_switch_mm(rq, prev->active_mm, next->mm);
3358                 
3359 
3360 
3361 
3362 
3363 
3364 
3365 
3366                 switch_mm_irqs_off(prev->active_mm, next->mm, next);
3367 
3368                 if (!prev->mm) {                        
3369                         
3370                         rq->prev_mm = prev->active_mm;
3371                         prev->active_mm = NULL;
3372                 }
3373         }
3374 
3375         rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
3376 
3377         prepare_lock_switch(rq, next, rf);
3378 
3379         
3380         switch_to(prev, next, prev);
3381         barrier();
3382 
3383         return finish_task_switch(prev);
3384 }
3385 
3386 
3387 
3388 
3389 
3390 
3391 
3392 unsigned long nr_running(void)
3393 {
3394         unsigned long i, sum = 0;
3395 
3396         for_each_online_cpu(i)
3397                 sum += cpu_rq(i)->nr_running;
3398 
3399         return sum;
3400 }
3401 
3402 
3403 
3404 
3405 
3406 
3407 
3408 
3409 
3410 
3411 
3412 
3413 
3414 
3415 bool single_task_running(void)
3416 {
3417         return raw_rq()->nr_running == 1;
3418 }
3419 EXPORT_SYMBOL(single_task_running);
3420 
3421 unsigned long long nr_context_switches(void)
3422 {
3423         int i;
3424         unsigned long long sum = 0;
3425 
3426         for_each_possible_cpu(i)
3427                 sum += cpu_rq(i)->nr_switches;
3428 
3429         return sum;
3430 }
3431 
3432 
3433 
3434 
3435 
3436 
3437 
3438 
3439 unsigned long nr_iowait_cpu(int cpu)
3440 {
3441         return atomic_read(&cpu_rq(cpu)->nr_iowait);
3442 }
3443 
3444 
3445 
3446 
3447 
3448 
3449 
3450 
3451 
3452 
3453 
3454 
3455 
3456 
3457 
3458 
3459 
3460 
3461 
3462 
3463 
3464 
3465 
3466 
3467 
3468 
3469 
3470 
3471 
3472 
3473 
3474 unsigned long nr_iowait(void)
3475 {
3476         unsigned long i, sum = 0;
3477 
3478         for_each_possible_cpu(i)
3479                 sum += nr_iowait_cpu(i);
3480 
3481         return sum;
3482 }
3483 
3484 #ifdef CONFIG_SMP
3485 
3486 
3487 
3488 
3489 
3490 void sched_exec(void)
3491 {
3492         struct task_struct *p = current;
3493         unsigned long flags;
3494         int dest_cpu;
3495 
3496         raw_spin_lock_irqsave(&p->pi_lock, flags);
3497         dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0);
3498         if (dest_cpu == smp_processor_id())
3499                 goto unlock;
3500 
3501         if (likely(cpu_active(dest_cpu))) {
3502                 struct migration_arg arg = { p, dest_cpu };
3503 
3504                 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
3505                 stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
3506                 return;
3507         }
3508 unlock:
3509         raw_spin_unlock_irqrestore(&p->pi_lock, flags);
3510 }
3511 
3512 #endif
3513 
3514 DEFINE_PER_CPU(struct kernel_stat, kstat);
3515 DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
3516 
3517 EXPORT_PER_CPU_SYMBOL(kstat);
3518 EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
3519 
3520 
3521 
3522 
3523 
3524 
3525 
3526 static inline void prefetch_curr_exec_start(struct task_struct *p)
3527 {
3528 #ifdef CONFIG_FAIR_GROUP_SCHED
3529         struct sched_entity *curr = (&p->se)->cfs_rq->curr;
3530 #else
3531         struct sched_entity *curr = (&task_rq(p)->cfs)->curr;
3532 #endif
3533         prefetch(curr);
3534         prefetch(&curr->exec_start);
3535 }
3536 
3537 
3538 
3539 
3540 
3541 
3542 unsigned long long task_sched_runtime(struct task_struct *p)
3543 {
3544         struct rq_flags rf;
3545         struct rq *rq;
3546         u64 ns;
3547 
3548 #if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
3549         
3550 
3551 
3552 
3553 
3554 
3555 
3556 
3557 
3558 
3559 
3560         if (!p->on_cpu || !task_on_rq_queued(p))
3561                 return p->se.sum_exec_runtime;
3562 #endif
3563 
3564         rq = task_rq_lock(p, &rf);
3565         
3566 
3567 
3568 
3569 
3570         if (task_current(rq, p) && task_on_rq_queued(p)) {
3571                 prefetch_curr_exec_start(p);
3572                 update_rq_clock(rq);
3573                 p->sched_class->update_curr(rq);
3574         }
3575         ns = p->se.sum_exec_runtime;
3576         task_rq_unlock(rq, p, &rf);
3577 
3578         return ns;
3579 }
3580 
3581 
3582 
3583 
3584 
3585 void scheduler_tick(void)
3586 {
3587         int cpu = smp_processor_id();
3588         struct rq *rq = cpu_rq(cpu);
3589         struct task_struct *curr = rq->curr;
3590         struct rq_flags rf;
3591 
3592         sched_clock_tick();
3593 
3594         rq_lock(rq, &rf);
3595 
3596         update_rq_clock(rq);
3597         curr->sched_class->task_tick(rq, curr, 0);
3598         calc_global_load_tick(rq);
3599         psi_task_tick(rq);
3600 
3601         rq_unlock(rq, &rf);
3602 
3603         perf_event_task_tick();
3604 
3605 #ifdef CONFIG_SMP
3606         rq->idle_balance = idle_cpu(cpu);
3607         trigger_load_balance(rq);
3608 #endif
3609 }
3610 
3611 #ifdef CONFIG_NO_HZ_FULL
3612 
3613 struct tick_work {
3614         int                     cpu;
3615         atomic_t                state;
3616         struct delayed_work     work;
3617 };
3618 
3619 #define TICK_SCHED_REMOTE_OFFLINE       0
3620 #define TICK_SCHED_REMOTE_OFFLINING     1
3621 #define TICK_SCHED_REMOTE_RUNNING       2
3622 
3623 
3624 
3625 
3626 
3627 
3628 
3629 
3630 
3631 
3632 
3633 
3634 
3635 
3636 
3637 
3638 
3639 
3640 
3641 
3642 
3643 
3644 
3645 
3646 static struct tick_work __percpu *tick_work_cpu;
3647 
3648 static void sched_tick_remote(struct work_struct *work)
3649 {
3650         struct delayed_work *dwork = to_delayed_work(work);
3651         struct tick_work *twork = container_of(dwork, struct tick_work, work);
3652         int cpu = twork->cpu;
3653         struct rq *rq = cpu_rq(cpu);
3654         struct task_struct *curr;
3655         struct rq_flags rf;
3656         u64 delta;
3657         int os;
3658 
3659         
3660 
3661 
3662 
3663 
3664 
3665 
3666         if (!tick_nohz_tick_stopped_cpu(cpu))
3667                 goto out_requeue;
3668 
3669         rq_lock_irq(rq, &rf);
3670         curr = rq->curr;
3671         if (cpu_is_offline(cpu))
3672                 goto out_unlock;
3673 
3674         update_rq_clock(rq);
3675 
3676         if (!is_idle_task(curr)) {
3677                 
3678 
3679 
3680 
3681                 delta = rq_clock_task(rq) - curr->se.exec_start;
3682                 WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
3683         }
3684         curr->sched_class->task_tick(rq, curr, 0);
3685 
3686         calc_load_nohz_remote(rq);
3687 out_unlock:
3688         rq_unlock_irq(rq, &rf);
3689 out_requeue:
3690 
3691         
3692 
3693 
3694 
3695 
3696 
3697         os = atomic_fetch_add_unless(&twork->state, -1, TICK_SCHED_REMOTE_RUNNING);
3698         WARN_ON_ONCE(os == TICK_SCHED_REMOTE_OFFLINE);
3699         if (os == TICK_SCHED_REMOTE_RUNNING)
3700                 queue_delayed_work(system_unbound_wq, dwork, HZ);
3701 }
3702 
3703 static void sched_tick_start(int cpu)
3704 {
3705         int os;
3706         struct tick_work *twork;
3707 
3708         if (housekeeping_cpu(cpu, HK_FLAG_TICK))
3709                 return;
3710 
3711         WARN_ON_ONCE(!tick_work_cpu);
3712 
3713         twork = per_cpu_ptr(tick_work_cpu, cpu);
3714         os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_RUNNING);
3715         WARN_ON_ONCE(os == TICK_SCHED_REMOTE_RUNNING);
3716         if (os == TICK_SCHED_REMOTE_OFFLINE) {
3717                 twork->cpu = cpu;
3718                 INIT_DELAYED_WORK(&twork->work, sched_tick_remote);
3719                 queue_delayed_work(system_unbound_wq, &twork->work, HZ);
3720         }
3721 }
3722 
3723 #ifdef CONFIG_HOTPLUG_CPU
3724 static void sched_tick_stop(int cpu)
3725 {
3726         struct tick_work *twork;
3727         int os;
3728 
3729         if (housekeeping_cpu(cpu, HK_FLAG_TICK))
3730                 return;
3731 
3732         WARN_ON_ONCE(!tick_work_cpu);
3733 
3734         twork = per_cpu_ptr(tick_work_cpu, cpu);
3735         
3736         os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_OFFLINING);
3737         WARN_ON_ONCE(os != TICK_SCHED_REMOTE_RUNNING);
3738         
3739 }
3740 #endif 
3741 
3742 int __init sched_tick_offload_init(void)
3743 {
3744         tick_work_cpu = alloc_percpu(struct tick_work);
3745         BUG_ON(!tick_work_cpu);
3746         return 0;
3747 }
3748 
3749 #else 
3750 static inline void sched_tick_start(int cpu) { }
3751 static inline void sched_tick_stop(int cpu) { }
3752 #endif
3753 
3754 #if defined(CONFIG_PREEMPTION) && (defined(CONFIG_DEBUG_PREEMPT) || \
3755                                 defined(CONFIG_TRACE_PREEMPT_TOGGLE))
3756 
3757 
3758 
3759 
3760 static inline void preempt_latency_start(int val)
3761 {
3762         if (preempt_count() == val) {
3763                 unsigned long ip = get_lock_parent_ip();
3764 #ifdef CONFIG_DEBUG_PREEMPT
3765                 current->preempt_disable_ip = ip;
3766 #endif
3767                 trace_preempt_off(CALLER_ADDR0, ip);
3768         }
3769 }
3770 
3771 void preempt_count_add(int val)
3772 {
3773 #ifdef CONFIG_DEBUG_PREEMPT
3774         
3775 
3776 
3777         if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
3778                 return;
3779 #endif
3780         __preempt_count_add(val);
3781 #ifdef CONFIG_DEBUG_PREEMPT
3782         
3783 
3784 
3785         DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
3786                                 PREEMPT_MASK - 10);
3787 #endif
3788         preempt_latency_start(val);
3789 }
3790 EXPORT_SYMBOL(preempt_count_add);
3791 NOKPROBE_SYMBOL(preempt_count_add);
3792 
3793 
3794 
3795 
3796 
3797 static inline void preempt_latency_stop(int val)
3798 {
3799         if (preempt_count() == val)
3800                 trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
3801 }
3802 
3803 void preempt_count_sub(int val)
3804 {
3805 #ifdef CONFIG_DEBUG_PREEMPT
3806         
3807 
3808 
3809         if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
3810                 return;
3811         
3812 
3813 
3814         if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
3815                         !(preempt_count() & PREEMPT_MASK)))
3816                 return;
3817 #endif
3818 
3819         preempt_latency_stop(val);
3820         __preempt_count_sub(val);
3821 }
3822 EXPORT_SYMBOL(preempt_count_sub);
3823 NOKPROBE_SYMBOL(preempt_count_sub);
3824 
3825 #else
3826 static inline void preempt_latency_start(int val) { }
3827 static inline void preempt_latency_stop(int val) { }
3828 #endif
3829 
3830 static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
3831 {
3832 #ifdef CONFIG_DEBUG_PREEMPT
3833         return p->preempt_disable_ip;
3834 #else
3835         return 0;
3836 #endif
3837 }
3838 
3839 
3840 
3841 
3842 static noinline void __schedule_bug(struct task_struct *prev)
3843 {
3844         
3845         unsigned long preempt_disable_ip = get_preempt_disable_ip(current);
3846 
3847         if (oops_in_progress)
3848                 return;
3849 
3850         printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
3851                 prev->comm, prev->pid, preempt_count());
3852 
3853         debug_show_held_locks(prev);
3854         print_modules();
3855         if (irqs_disabled())
3856                 print_irqtrace_events(prev);
3857         if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
3858             && in_atomic_preempt_off()) {
3859                 pr_err("Preemption disabled at:");
3860                 print_ip_sym(preempt_disable_ip);
3861                 pr_cont("\n");
3862         }
3863         if (panic_on_warn)
3864                 panic("scheduling while atomic\n");
3865 
3866         dump_stack();
3867         add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
3868 }
3869 
3870 
3871 
3872 
3873 static inline void schedule_debug(struct task_struct *prev, bool preempt)
3874 {
3875 #ifdef CONFIG_SCHED_STACK_END_CHECK
3876         if (task_stack_end_corrupted(prev))
3877                 panic("corrupted stack end detected inside scheduler\n");
3878 #endif
3879 
3880 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
3881         if (!preempt && prev->state && prev->non_block_count) {
3882                 printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n",
3883                         prev->comm, prev->pid, prev->non_block_count);
3884                 dump_stack();
3885                 add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
3886         }
3887 #endif
3888 
3889         if (unlikely(in_atomic_preempt_off())) {
3890                 __schedule_bug(prev);
3891                 preempt_count_set(PREEMPT_DISABLED);
3892         }
3893         rcu_sleep_check();
3894 
3895         profile_hit(SCHED_PROFILING, __builtin_return_address(0));
3896 
3897         schedstat_inc(this_rq()->sched_count);
3898 }
3899 
3900 
3901 
3902 
3903 static inline struct task_struct *
3904 pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
3905 {
3906         const struct sched_class *class;
3907         struct task_struct *p;
3908 
3909         
3910 
3911 
3912 
3913 
3914 
3915         if (likely((prev->sched_class == &idle_sched_class ||
3916                     prev->sched_class == &fair_sched_class) &&
3917                    rq->nr_running == rq->cfs.h_nr_running)) {
3918 
3919                 p = fair_sched_class.pick_next_task(rq, prev, rf);
3920                 if (unlikely(p == RETRY_TASK))
3921                         goto restart;
3922 
3923                 
3924                 if (unlikely(!p))
3925                         p = idle_sched_class.pick_next_task(rq, prev, rf);
3926 
3927                 return p;
3928         }
3929 
3930 restart:
3931 #ifdef CONFIG_SMP
3932         
3933 
3934 
3935 
3936 
3937 
3938 
3939 
3940         for_class_range(class, prev->sched_class, &idle_sched_class) {
3941                 if (class->balance(rq, prev, rf))
3942                         break;
3943         }
3944 #endif
3945 
3946         put_prev_task(rq, prev);
3947 
3948         for_each_class(class) {
3949                 p = class->pick_next_task(rq, NULL, NULL);
3950                 if (p)
3951                         return p;
3952         }
3953 
3954         
3955         BUG();
3956 }
3957 
3958 
3959 
3960 
3961 
3962 
3963 
3964 
3965 
3966 
3967 
3968 
3969 
3970 
3971 
3972 
3973 
3974 
3975 
3976 
3977 
3978 
3979 
3980 
3981 
3982 
3983 
3984 
3985 
3986 
3987 
3988 
3989 
3990 
3991 
3992 
3993 
3994 
3995 
3996 
3997 static void __sched notrace __schedule(bool preempt)
3998 {
3999         struct task_struct *prev, *next;
4000         unsigned long *switch_count;
4001         struct rq_flags rf;
4002         struct rq *rq;
4003         int cpu;
4004 
4005         cpu = smp_processor_id();
4006         rq = cpu_rq(cpu);
4007         prev = rq->curr;
4008 
4009         schedule_debug(prev, preempt);
4010 
4011         if (sched_feat(HRTICK))
4012                 hrtick_clear(rq);
4013 
4014         local_irq_disable();
4015         rcu_note_context_switch(preempt);
4016 
4017         
4018 
4019 
4020 
4021 
4022 
4023 
4024 
4025         rq_lock(rq, &rf);
4026         smp_mb__after_spinlock();
4027 
4028         
4029         rq->clock_update_flags <<= 1;
4030         update_rq_clock(rq);
4031 
4032         switch_count = &prev->nivcsw;
4033         if (!preempt && prev->state) {
4034                 if (signal_pending_state(prev->state, prev)) {
4035                         prev->state = TASK_RUNNING;
4036                 } else {
4037                         deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK);
4038 
4039                         if (prev->in_iowait) {
4040                                 atomic_inc(&rq->nr_iowait);
4041                                 delayacct_blkio_start();
4042                         }
4043                 }
4044                 switch_count = &prev->nvcsw;
4045         }
4046 
4047         next = pick_next_task(rq, prev, &rf);
4048         clear_tsk_need_resched(prev);
4049         clear_preempt_need_resched();
4050 
4051         if (likely(prev != next)) {
4052                 rq->nr_switches++;
4053                 
4054 
4055 
4056 
4057                 RCU_INIT_POINTER(rq->curr, next);
4058                 
4059 
4060 
4061 
4062 
4063 
4064 
4065 
4066 
4067 
4068 
4069 
4070 
4071 
4072                 ++*switch_count;
4073 
4074                 trace_sched_switch(preempt, prev, next);
4075 
4076                 
4077                 rq = context_switch(rq, prev, next, &rf);
4078         } else {
4079                 rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
4080                 rq_unlock_irq(rq, &rf);
4081         }
4082 
4083         balance_callback(rq);
4084 }
4085 
4086 void __noreturn do_task_dead(void)
4087 {
4088         
4089         set_special_state(TASK_DEAD);
4090 
4091         
4092         current->flags |= PF_NOFREEZE;
4093 
4094         __schedule(false);
4095         BUG();
4096 
4097         
4098         for (;;)
4099                 cpu_relax();
4100 }
4101 
4102 static inline void sched_submit_work(struct task_struct *tsk)
4103 {
4104         if (!tsk->state)
4105                 return;
4106 
4107         
4108 
4109 
4110 
4111 
4112 
4113 
4114         if (tsk->flags & PF_WQ_WORKER) {
4115                 preempt_disable();
4116                 wq_worker_sleeping(tsk);
4117                 preempt_enable_no_resched();
4118         }
4119 
4120         if (tsk_is_pi_blocked(tsk))
4121                 return;
4122 
4123         
4124 
4125 
4126 
4127         if (blk_needs_flush_plug(tsk))
4128                 blk_schedule_flush_plug(tsk);
4129 }
4130 
4131 static void sched_update_worker(struct task_struct *tsk)
4132 {
4133         if (tsk->flags & PF_WQ_WORKER)
4134                 wq_worker_running(tsk);
4135 }
4136 
4137 asmlinkage __visible void __sched schedule(void)
4138 {
4139         struct task_struct *tsk = current;
4140 
4141         sched_submit_work(tsk);
4142         do {
4143                 preempt_disable();
4144                 __schedule(false);
4145                 sched_preempt_enable_no_resched();
4146         } while (need_resched());
4147         sched_update_worker(tsk);
4148 }
4149 EXPORT_SYMBOL(schedule);
4150 
4151 
4152 
4153 
4154 
4155 
4156 
4157 
4158 
4159 
4160 
4161 void __sched schedule_idle(void)
4162 {
4163         
4164 
4165 
4166 
4167 
4168 
4169 
4170         WARN_ON_ONCE(current->state);
4171         do {
4172                 __schedule(false);
4173         } while (need_resched());
4174 }
4175 
4176 #ifdef CONFIG_CONTEXT_TRACKING
4177 asmlinkage __visible void __sched schedule_user(void)
4178 {
4179         
4180 
4181 
4182 
4183 
4184 
4185 
4186 
4187 
4188 
4189         enum ctx_state prev_state = exception_enter();
4190         schedule();
4191         exception_exit(prev_state);
4192 }
4193 #endif
4194 
4195 
4196 
4197 
4198 
4199 
4200 void __sched schedule_preempt_disabled(void)
4201 {
4202         sched_preempt_enable_no_resched();
4203         schedule();
4204         preempt_disable();
4205 }
4206 
4207 static void __sched notrace preempt_schedule_common(void)
4208 {
4209         do {
4210                 
4211 
4212 
4213 
4214 
4215 
4216 
4217 
4218 
4219 
4220 
4221 
4222 
4223                 preempt_disable_notrace();
4224                 preempt_latency_start(1);
4225                 __schedule(true);
4226                 preempt_latency_stop(1);
4227                 preempt_enable_no_resched_notrace();
4228 
4229                 
4230 
4231 
4232 
4233         } while (need_resched());
4234 }
4235 
4236 #ifdef CONFIG_PREEMPTION
4237 
4238 
4239 
4240 
4241 asmlinkage __visible void __sched notrace preempt_schedule(void)
4242 {
4243         
4244 
4245 
4246 
4247         if (likely(!preemptible()))
4248                 return;
4249 
4250         preempt_schedule_common();
4251 }
4252 NOKPROBE_SYMBOL(preempt_schedule);
4253 EXPORT_SYMBOL(preempt_schedule);
4254 
4255 
4256 
4257 
4258 
4259 
4260 
4261 
4262 
4263 
4264 
4265 
4266 
4267 
4268 
4269 asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
4270 {
4271         enum ctx_state prev_ctx;
4272 
4273         if (likely(!preemptible()))
4274                 return;
4275 
4276         do {
4277                 
4278 
4279 
4280 
4281 
4282 
4283 
4284 
4285 
4286 
4287 
4288 
4289 
4290                 preempt_disable_notrace();
4291                 preempt_latency_start(1);
4292                 
4293 
4294 
4295 
4296 
4297                 prev_ctx = exception_enter();
4298                 __schedule(true);
4299                 exception_exit(prev_ctx);
4300 
4301                 preempt_latency_stop(1);
4302                 preempt_enable_no_resched_notrace();
4303         } while (need_resched());
4304 }
4305 EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
4306 
4307 #endif 
4308 
4309 
4310 
4311 
4312 
4313 
4314 
4315 asmlinkage __visible void __sched preempt_schedule_irq(void)
4316 {
4317         enum ctx_state prev_state;
4318 
4319         
4320         BUG_ON(preempt_count() || !irqs_disabled());
4321 
4322         prev_state = exception_enter();
4323 
4324         do {
4325                 preempt_disable();
4326                 local_irq_enable();
4327                 __schedule(true);
4328                 local_irq_disable();
4329                 sched_preempt_enable_no_resched();
4330         } while (need_resched());
4331 
4332         exception_exit(prev_state);
4333 }
4334 
4335 int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
4336                           void *key)
4337 {
4338         return try_to_wake_up(curr->private, mode, wake_flags);
4339 }
4340 EXPORT_SYMBOL(default_wake_function);
4341 
4342 #ifdef CONFIG_RT_MUTEXES
4343 
4344 static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
4345 {
4346         if (pi_task)
4347                 prio = min(prio, pi_task->prio);
4348 
4349         return prio;
4350 }
4351 
4352 static inline int rt_effective_prio(struct task_struct *p, int prio)
4353 {
4354         struct task_struct *pi_task = rt_mutex_get_top_task(p);
4355 
4356         return __rt_effective_prio(pi_task, prio);
4357 }
4358 
4359 
4360 
4361 
4362 
4363 
4364 
4365 
4366 
4367 
4368 
4369 
4370 void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
4371 {
4372         int prio, oldprio, queued, running, queue_flag =
4373                 DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
4374         const struct sched_class *prev_class;
4375         struct rq_flags rf;
4376         struct rq *rq;
4377 
4378         
4379         prio = __rt_effective_prio(pi_task, p->normal_prio);
4380 
4381         
4382 
4383 
4384         if (p->pi_top_task == pi_task && prio == p->prio && !dl_prio(prio))
4385                 return;
4386 
4387         rq = __task_rq_lock(p, &rf);
4388         update_rq_clock(rq);
4389         
4390 
4391 
4392 
4393 
4394 
4395 
4396 
4397 
4398 
4399         p->pi_top_task = pi_task;
4400 
4401         
4402 
4403 
4404         if (prio == p->prio && !dl_prio(prio))
4405                 goto out_unlock;
4406 
4407         
4408 
4409 
4410 
4411 
4412 
4413 
4414 
4415 
4416 
4417 
4418 
4419         if (unlikely(p == rq->idle)) {
4420                 WARN_ON(p != rq->curr);
4421                 WARN_ON(p->pi_blocked_on);
4422                 goto out_unlock;
4423         }
4424 
4425         trace_sched_pi_setprio(p, pi_task);
4426         oldprio = p->prio;
4427 
4428         if (oldprio == prio)
4429                 queue_flag &= ~DEQUEUE_MOVE;
4430 
4431         prev_class = p->sched_class;
4432         queued = task_on_rq_queued(p);
4433         running = task_current(rq, p);
4434         if (queued)
4435                 dequeue_task(rq, p, queue_flag);
4436         if (running)
4437                 put_prev_task(rq, p);
4438 
4439         
4440 
4441 
4442 
4443 
4444 
4445 
4446 
4447 
4448         if (dl_prio(prio)) {
4449                 if (!dl_prio(p->normal_prio) ||
4450                     (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) {
4451                         p->dl.dl_boosted = 1;
4452                         queue_flag |= ENQUEUE_REPLENISH;
4453                 } else
4454                         p->dl.dl_boosted = 0;
4455                 p->sched_class = &dl_sched_class;
4456         } else if (rt_prio(prio)) {
4457                 if (dl_prio(oldprio))
4458                         p->dl.dl_boosted = 0;
4459                 if (oldprio < prio)
4460                         queue_flag |= ENQUEUE_HEAD;
4461                 p->sched_class = &rt_sched_class;
4462         } else {
4463                 if (dl_prio(oldprio))
4464                         p->dl.dl_boosted = 0;
4465                 if (rt_prio(oldprio))
4466                         p->rt.timeout = 0;
4467                 p->sched_class = &fair_sched_class;
4468         }
4469 
4470         p->prio = prio;
4471 
4472         if (queued)
4473                 enqueue_task(rq, p, queue_flag);
4474         if (running)
4475                 set_next_task(rq, p);
4476 
4477         check_class_changed(rq, p, prev_class, oldprio);
4478 out_unlock:
4479         
4480         preempt_disable();
4481         __task_rq_unlock(rq, &rf);
4482 
4483         balance_callback(rq);
4484         preempt_enable();
4485 }
4486 #else
4487 static inline int rt_effective_prio(struct task_struct *p, int prio)
4488 {
4489         return prio;
4490 }
4491 #endif
4492 
4493 void set_user_nice(struct task_struct *p, long nice)
4494 {
4495         bool queued, running;
4496         int old_prio, delta;
4497         struct rq_flags rf;
4498         struct rq *rq;
4499 
4500         if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE)
4501                 return;
4502         
4503 
4504 
4505 
4506         rq = task_rq_lock(p, &rf);
4507         update_rq_clock(rq);
4508 
4509         
4510 
4511 
4512 
4513 
4514 
4515         if (task_has_dl_policy(p) || task_has_rt_policy(p)) {
4516                 p->static_prio = NICE_TO_PRIO(nice);
4517                 goto out_unlock;
4518         }
4519         queued = task_on_rq_queued(p);
4520         running = task_current(rq, p);
4521         if (queued)
4522                 dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK);
4523         if (running)
4524                 put_prev_task(rq, p);
4525 
4526         p->static_prio = NICE_TO_PRIO(nice);
4527         set_load_weight(p, true);
4528         old_prio = p->prio;
4529         p->prio = effective_prio(p);
4530         delta = p->prio - old_prio;
4531 
4532         if (queued) {
4533                 enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
4534                 
4535 
4536 
4537 
4538                 if (delta < 0 || (delta > 0 && task_running(rq, p)))
4539                         resched_curr(rq);
4540         }
4541         if (running)
4542                 set_next_task(rq, p);
4543 out_unlock:
4544         task_rq_unlock(rq, p, &rf);
4545 }
4546 EXPORT_SYMBOL(set_user_nice);
4547 
4548 
4549 
4550 
4551 
4552 
4553 int can_nice(const struct task_struct *p, const int nice)
4554 {
4555         
4556         int nice_rlim = nice_to_rlimit(nice);
4557 
4558         return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
4559                 capable(CAP_SYS_NICE));
4560 }
4561 
4562 #ifdef __ARCH_WANT_SYS_NICE
4563 
4564 
4565 
4566 
4567 
4568 
4569 
4570 
4571 SYSCALL_DEFINE1(nice, int, increment)
4572 {
4573         long nice, retval;
4574 
4575         
4576 
4577 
4578 
4579 
4580         increment = clamp(increment, -NICE_WIDTH, NICE_WIDTH);
4581         nice = task_nice(current) + increment;
4582 
4583         nice = clamp_val(nice, MIN_NICE, MAX_NICE);
4584         if (increment < 0 && !can_nice(current, nice))
4585                 return -EPERM;
4586 
4587         retval = security_task_setnice(current, nice);
4588         if (retval)
4589                 return retval;
4590 
4591         set_user_nice(current, nice);
4592         return 0;
4593 }
4594 
4595 #endif
4596 
4597 
4598 
4599 
4600 
4601 
4602 
4603 
4604 
4605 int task_prio(const struct task_struct *p)
4606 {
4607         return p->prio - MAX_RT_PRIO;
4608 }
4609 
4610 
4611 
4612 
4613 
4614 
4615 
4616 int idle_cpu(int cpu)
4617 {
4618         struct rq *rq = cpu_rq(cpu);
4619 
4620         if (rq->curr != rq->idle)
4621                 return 0;
4622 
4623         if (rq->nr_running)
4624                 return 0;
4625 
4626 #ifdef CONFIG_SMP
4627         if (!llist_empty(&rq->wake_list))
4628                 return 0;
4629 #endif
4630 
4631         return 1;
4632 }
4633 
4634 
4635 
4636 
4637 
4638 
4639 
4640 int available_idle_cpu(int cpu)
4641 {
4642         if (!idle_cpu(cpu))
4643                 return 0;
4644 
4645         if (vcpu_is_preempted(cpu))
4646                 return 0;
4647 
4648         return 1;
4649 }
4650 
4651 
4652 
4653 
4654 
4655 
4656 
4657 struct task_struct *idle_task(int cpu)
4658 {
4659         return cpu_rq(cpu)->idle;
4660 }
4661 
4662 
4663 
4664 
4665 
4666 
4667 
4668 static struct task_struct *find_process_by_pid(pid_t pid)
4669 {
4670         return pid ? find_task_by_vpid(pid) : current;
4671 }
4672 
4673 
4674 
4675 
4676 
4677 #define SETPARAM_POLICY -1
4678 
4679 static void __setscheduler_params(struct task_struct *p,
4680                 const struct sched_attr *attr)
4681 {
4682         int policy = attr->sched_policy;
4683 
4684         if (policy == SETPARAM_POLICY)
4685                 policy = p->policy;
4686 
4687         p->policy = policy;
4688 
4689         if (dl_policy(policy))
4690                 __setparam_dl(p, attr);
4691         else if (fair_policy(policy))
4692                 p->static_prio = NICE_TO_PRIO(attr->sched_nice);
4693 
4694         
4695 
4696 
4697 
4698 
4699         p->rt_priority = attr->sched_priority;
4700         p->normal_prio = normal_prio(p);
4701         set_load_weight(p, true);
4702 }
4703 
4704 
4705 static void __setscheduler(struct rq *rq, struct task_struct *p,
4706                            const struct sched_attr *attr, bool keep_boost)
4707 {
4708         
4709 
4710 
4711 
4712         if (attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)
4713                 return;
4714 
4715         __setscheduler_params(p, attr);
4716 
4717         
4718 
4719 
4720 
4721         p->prio = normal_prio(p);
4722         if (keep_boost)
4723                 p->prio = rt_effective_prio(p, p->prio);
4724 
4725         if (dl_prio(p->prio))
4726                 p->sched_class = &dl_sched_class;
4727         else if (rt_prio(p->prio))
4728                 p->sched_class = &rt_sched_class;
4729         else
4730                 p->sched_class = &fair_sched_class;
4731 }
4732 
4733 
4734 
4735 
4736 static bool check_same_owner(struct task_struct *p)
4737 {
4738         const struct cred *cred = current_cred(), *pcred;
4739         bool match;
4740 
4741         rcu_read_lock();
4742         pcred = __task_cred(p);
4743         match = (uid_eq(cred->euid, pcred->euid) ||
4744                  uid_eq(cred->euid, pcred->uid));
4745         rcu_read_unlock();
4746         return match;
4747 }
4748 
4749 static int __sched_setscheduler(struct task_struct *p,
4750                                 const struct sched_attr *attr,
4751                                 bool user, bool pi)
4752 {
4753         int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 :
4754                       MAX_RT_PRIO - 1 - attr->sched_priority;
4755         int retval, oldprio, oldpolicy = -1, queued, running;
4756         int new_effective_prio, policy = attr->sched_policy;
4757         const struct sched_class *prev_class;
4758         struct rq_flags rf;
4759         int reset_on_fork;
4760         int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
4761         struct rq *rq;
4762 
4763         
4764         BUG_ON(pi && in_interrupt());
4765 recheck:
4766         
4767         if (policy < 0) {
4768                 reset_on_fork = p->sched_reset_on_fork;
4769                 policy = oldpolicy = p->policy;
4770         } else {
4771                 reset_on_fork = !!(attr->sched_flags & SCHED_FLAG_RESET_ON_FORK);
4772 
4773                 if (!valid_policy(policy))
4774                         return -EINVAL;
4775         }
4776 
4777         if (attr->sched_flags & ~(SCHED_FLAG_ALL | SCHED_FLAG_SUGOV))
4778                 return -EINVAL;
4779 
4780         
4781 
4782 
4783 
4784 
4785         if ((p->mm && attr->sched_priority > MAX_USER_RT_PRIO-1) ||
4786             (!p->mm && attr->sched_priority > MAX_RT_PRIO-1))
4787                 return -EINVAL;
4788         if ((dl_policy(policy) && !__checkparam_dl(attr)) ||
4789             (rt_policy(policy) != (attr->sched_priority != 0)))
4790                 return -EINVAL;
4791 
4792         
4793 
4794 
4795         if (user && !capable(CAP_SYS_NICE)) {
4796                 if (fair_policy(policy)) {
4797                         if (attr->sched_nice < task_nice(p) &&
4798                             !can_nice(p, attr->sched_nice))
4799                                 return -EPERM;
4800                 }
4801 
4802                 if (rt_policy(policy)) {
4803                         unsigned long rlim_rtprio =
4804                                         task_rlimit(p, RLIMIT_RTPRIO);
4805 
4806                         
4807                         if (policy != p->policy && !rlim_rtprio)
4808                                 return -EPERM;
4809 
4810                         
4811                         if (attr->sched_priority > p->rt_priority &&
4812                             attr->sched_priority > rlim_rtprio)
4813                                 return -EPERM;
4814                 }
4815 
4816                  
4817 
4818 
4819 
4820 
4821 
4822                 if (dl_policy(policy))
4823                         return -EPERM;
4824 
4825                 
4826 
4827 
4828 
4829                 if (task_has_idle_policy(p) && !idle_policy(policy)) {
4830                         if (!can_nice(p, task_nice(p)))
4831                                 return -EPERM;
4832                 }
4833 
4834                 
4835                 if (!check_same_owner(p))
4836                         return -EPERM;
4837 
4838                 
4839                 if (p->sched_reset_on_fork && !reset_on_fork)
4840                         return -EPERM;
4841         }
4842 
4843         if (user) {
4844                 if (attr->sched_flags & SCHED_FLAG_SUGOV)
4845                         return -EINVAL;
4846 
4847                 retval = security_task_setscheduler(p);
4848                 if (retval)
4849                         return retval;
4850         }
4851 
4852         
4853         if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) {
4854                 retval = uclamp_validate(p, attr);
4855                 if (retval)
4856                         return retval;
4857         }
4858 
4859         if (pi)
4860                 cpuset_read_lock();
4861 
4862         
4863 
4864 
4865 
4866 
4867 
4868 
4869         rq = task_rq_lock(p, &rf);
4870         update_rq_clock(rq);
4871 
4872         
4873 
4874 
4875         if (p == rq->stop) {
4876                 retval = -EINVAL;
4877                 goto unlock;
4878         }
4879 
4880         
4881 
4882 
4883 
4884         if (unlikely(policy == p->policy)) {
4885                 if (fair_policy(policy) && attr->sched_nice != task_nice(p))
4886                         goto change;
4887                 if (rt_policy(policy) && attr->sched_priority != p->rt_priority)
4888                         goto change;
4889                 if (dl_policy(policy) && dl_param_changed(p, attr))
4890                         goto change;
4891                 if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)
4892                         goto change;
4893 
4894                 p->sched_reset_on_fork = reset_on_fork;
4895                 retval = 0;
4896                 goto unlock;
4897         }
4898 change:
4899 
4900         if (user) {
4901 #ifdef CONFIG_RT_GROUP_SCHED
4902                 
4903 
4904 
4905 
4906                 if (rt_bandwidth_enabled() && rt_policy(policy) &&
4907                                 task_group(p)->rt_bandwidth.rt_runtime == 0 &&
4908                                 !task_group_is_autogroup(task_group(p))) {
4909                         retval = -EPERM;
4910                         goto unlock;
4911                 }
4912 #endif
4913 #ifdef CONFIG_SMP
4914                 if (dl_bandwidth_enabled() && dl_policy(policy) &&
4915                                 !(attr->sched_flags & SCHED_FLAG_SUGOV)) {
4916                         cpumask_t *span = rq->rd->span;
4917 
4918                         
4919 
4920 
4921 
4922 
4923                         if (!cpumask_subset(span, p->cpus_ptr) ||
4924                             rq->rd->dl_bw.bw == 0) {
4925                                 retval = -EPERM;
4926                                 goto unlock;
4927                         }
4928                 }
4929 #endif
4930         }
4931 
4932         
4933         if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
4934                 policy = oldpolicy = -1;
4935                 task_rq_unlock(rq, p, &rf);
4936                 if (pi)
4937                         cpuset_read_unlock();
4938                 goto recheck;
4939         }
4940 
4941         
4942 
4943 
4944 
4945 
4946         if ((dl_policy(policy) || dl_task(p)) && sched_dl_overflow(p, policy, attr)) {
4947                 retval = -EBUSY;
4948                 goto unlock;
4949         }
4950 
4951         p->sched_reset_on_fork = reset_on_fork;
4952         oldprio = p->prio;
4953 
4954         if (pi) {
4955                 
4956 
4957 
4958 
4959 
4960 
4961 
4962                 new_effective_prio = rt_effective_prio(p, newprio);
4963                 if (new_effective_prio == oldprio)
4964                         queue_flags &= ~DEQUEUE_MOVE;
4965         }
4966 
4967         queued = task_on_rq_queued(p);
4968         running = task_current(rq, p);
4969         if (queued)
4970                 dequeue_task(rq, p, queue_flags);
4971         if (running)
4972                 put_prev_task(rq, p);
4973 
4974         prev_class = p->sched_class;
4975 
4976         __setscheduler(rq, p, attr, pi);
4977         __setscheduler_uclamp(p, attr);
4978 
4979         if (queued) {
4980                 
4981 
4982 
4983 
4984                 if (oldprio < p->prio)
4985                         queue_flags |= ENQUEUE_HEAD;
4986 
4987                 enqueue_task(rq, p, queue_flags);
4988         }
4989         if (running)
4990                 set_next_task(rq, p);
4991 
4992         check_class_changed(rq, p, prev_class, oldprio);
4993 
4994         
4995         preempt_disable();
4996         task_rq_unlock(rq, p, &rf);
4997 
4998         if (pi) {
4999                 cpuset_read_unlock();
5000                 rt_mutex_adjust_pi(p);
5001         }
5002 
5003         
5004         balance_callback(rq);
5005         preempt_enable();
5006 
5007         return 0;
5008 
5009 unlock:
5010         task_rq_unlock(rq, p, &rf);
5011         if (pi)
5012                 cpuset_read_unlock();
5013         return retval;
5014 }
5015 
5016 static int _sched_setscheduler(struct task_struct *p, int policy,
5017                                const struct sched_param *param, bool check)
5018 {
5019         struct sched_attr attr = {
5020                 .sched_policy   = policy,
5021                 .sched_priority = param->sched_priority,
5022                 .sched_nice     = PRIO_TO_NICE(p->static_prio),
5023         };
5024 
5025         
5026         if ((policy != SETPARAM_POLICY) && (policy & SCHED_RESET_ON_FORK)) {
5027                 attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
5028                 policy &= ~SCHED_RESET_ON_FORK;
5029                 attr.sched_policy = policy;
5030         }
5031 
5032         return __sched_setscheduler(p, &attr, check, true);
5033 }
5034 
5035 
5036 
5037 
5038 
5039 
5040 
5041 
5042 
5043 
5044 int sched_setscheduler(struct task_struct *p, int policy,
5045                        const struct sched_param *param)
5046 {
5047         return _sched_setscheduler(p, policy, param, true);
5048 }
5049 EXPORT_SYMBOL_GPL(sched_setscheduler);
5050 
5051 int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
5052 {
5053         return __sched_setscheduler(p, attr, true, true);
5054 }
5055 EXPORT_SYMBOL_GPL(sched_setattr);
5056 
5057 int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr)
5058 {
5059         return __sched_setscheduler(p, attr, false, true);
5060 }
5061 
5062 
5063 
5064 
5065 
5066 
5067 
5068 
5069 
5070 
5071 
5072 
5073 
5074 
5075 int sched_setscheduler_nocheck(struct task_struct *p, int policy,
5076                                const struct sched_param *param)
5077 {
5078         return _sched_setscheduler(p, policy, param, false);
5079 }
5080 EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck);
5081 
5082 static int
5083 do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
5084 {
5085         struct sched_param lparam;
5086         struct task_struct *p;
5087         int retval;
5088 
5089         if (!param || pid < 0)
5090                 return -EINVAL;
5091         if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
5092                 return -EFAULT;
5093 
5094         rcu_read_lock();
5095         retval = -ESRCH;
5096         p = find_process_by_pid(pid);
5097         if (likely(p))
5098                 get_task_struct(p);
5099         rcu_read_unlock();
5100 
5101         if (likely(p)) {
5102                 retval = sched_setscheduler(p, policy, &lparam);
5103                 put_task_struct(p);
5104         }
5105 
5106         return retval;
5107 }
5108 
5109 
5110 
5111 
5112 static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *attr)
5113 {
5114         u32 size;
5115         int ret;
5116 
5117         
5118         memset(attr, 0, sizeof(*attr));
5119 
5120         ret = get_user(size, &uattr->size);
5121         if (ret)
5122                 return ret;
5123 
5124         
5125         if (!size)
5126                 size = SCHED_ATTR_SIZE_VER0;
5127         if (size < SCHED_ATTR_SIZE_VER0 || size > PAGE_SIZE)
5128                 goto err_size;
5129 
5130         ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size);
5131         if (ret) {
5132                 if (ret == -E2BIG)
5133                         goto err_size;
5134                 return ret;
5135         }
5136 
5137         if ((attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) &&
5138             size < SCHED_ATTR_SIZE_VER1)
5139                 return -EINVAL;
5140 
5141         
5142 
5143 
5144 
5145         attr->sched_nice = clamp(attr->sched_nice, MIN_NICE, MAX_NICE);
5146 
5147         return 0;
5148 
5149 err_size:
5150         put_user(sizeof(*attr), &uattr->size);
5151         return -E2BIG;
5152 }
5153 
5154 
5155 
5156 
5157 
5158 
5159 
5160 
5161 
5162 SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param)
5163 {
5164         if (policy < 0)
5165                 return -EINVAL;
5166 
5167         return do_sched_setscheduler(pid, policy, param);
5168 }
5169 
5170 
5171 
5172 
5173 
5174 
5175 
5176 
5177 SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
5178 {
5179         return do_sched_setscheduler(pid, SETPARAM_POLICY, param);
5180 }
5181 
5182 
5183 
5184 
5185 
5186 
5187 
5188 SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
5189                                unsigned int, flags)
5190 {
5191         struct sched_attr attr;
5192         struct task_struct *p;
5193         int retval;
5194 
5195         if (!uattr || pid < 0 || flags)
5196                 return -EINVAL;
5197 
5198         retval = sched_copy_attr(uattr, &attr);
5199         if (retval)
5200                 return retval;
5201 
5202         if ((int)attr.sched_policy < 0)
5203                 return -EINVAL;
5204         if (attr.sched_flags & SCHED_FLAG_KEEP_POLICY)
5205                 attr.sched_policy = SETPARAM_POLICY;
5206 
5207         rcu_read_lock();
5208         retval = -ESRCH;
5209         p = find_process_by_pid(pid);
5210         if (likely(p))
5211                 get_task_struct(p);
5212         rcu_read_unlock();
5213 
5214         if (likely(p)) {
5215                 retval = sched_setattr(p, &attr);
5216                 put_task_struct(p);
5217         }
5218 
5219         return retval;
5220 }
5221 
5222 
5223 
5224 
5225 
5226 
5227 
5228 
5229 SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
5230 {
5231         struct task_struct *p;
5232         int retval;
5233 
5234         if (pid < 0)
5235                 return -EINVAL;
5236 
5237         retval = -ESRCH;
5238         rcu_read_lock();
5239         p = find_process_by_pid(pid);
5240         if (p) {
5241                 retval = security_task_getscheduler(p);
5242                 if (!retval)
5243                         retval = p->policy
5244                                 | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0);
5245         }
5246         rcu_read_unlock();
5247         return retval;
5248 }
5249 
5250 
5251 
5252 
5253 
5254 
5255 
5256 
5257 
5258 SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
5259 {
5260         struct sched_param lp = { .sched_priority = 0 };
5261         struct task_struct *p;
5262         int retval;
5263 
5264         if (!param || pid < 0)
5265                 return -EINVAL;
5266 
5267         rcu_read_lock();
5268         p = find_process_by_pid(pid);
5269         retval = -ESRCH;
5270         if (!p)
5271                 goto out_unlock;
5272 
5273         retval = security_task_getscheduler(p);
5274         if (retval)
5275                 goto out_unlock;
5276 
5277         if (task_has_rt_policy(p))
5278                 lp.sched_priority = p->rt_priority;
5279         rcu_read_unlock();
5280 
5281         
5282 
5283 
5284         retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0;
5285 
5286         return retval;
5287 
5288 out_unlock:
5289         rcu_read_unlock();
5290         return retval;
5291 }
5292 
5293 
5294 
5295 
5296 
5297 
5298 
5299 
5300 
5301 static int
5302 sched_attr_copy_to_user(struct sched_attr __user *uattr,
5303                         struct sched_attr *kattr,
5304                         unsigned int usize)
5305 {
5306         unsigned int ksize = sizeof(*kattr);
5307 
5308         if (!access_ok(uattr, usize))
5309                 return -EFAULT;
5310 
5311         
5312 
5313 
5314 
5315 
5316 
5317 
5318 
5319 
5320 
5321 
5322 
5323 
5324         kattr->size = min(usize, ksize);
5325 
5326         if (copy_to_user(uattr, kattr, kattr->size))
5327                 return -EFAULT;
5328 
5329         return 0;
5330 }
5331 
5332 
5333 
5334 
5335 
5336 
5337 
5338 
5339 SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
5340                 unsigned int, usize, unsigned int, flags)
5341 {
5342         struct sched_attr kattr = { };
5343         struct task_struct *p;
5344         int retval;
5345 
5346         if (!uattr || pid < 0 || usize > PAGE_SIZE ||
5347             usize < SCHED_ATTR_SIZE_VER0 || flags)
5348                 return -EINVAL;
5349 
5350         rcu_read_lock();
5351         p = find_process_by_pid(pid);
5352         retval = -ESRCH;
5353         if (!p)
5354                 goto out_unlock;
5355 
5356         retval = security_task_getscheduler(p);
5357         if (retval)
5358                 goto out_unlock;
5359 
5360         kattr.sched_policy = p->policy;
5361         if (p->sched_reset_on_fork)
5362                 kattr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
5363         if (task_has_dl_policy(p))
5364                 __getparam_dl(p, &kattr);
5365         else if (task_has_rt_policy(p))
5366                 kattr.sched_priority = p->rt_priority;
5367         else
5368                 kattr.sched_nice = task_nice(p);
5369 
5370 #ifdef CONFIG_UCLAMP_TASK
5371         kattr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value;
5372         kattr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value;
5373 #endif
5374 
5375         rcu_read_unlock();
5376 
5377         return sched_attr_copy_to_user(uattr, &kattr, usize);
5378 
5379 out_unlock:
5380         rcu_read_unlock();
5381         return retval;
5382 }
5383 
5384 long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
5385 {
5386         cpumask_var_t cpus_allowed, new_mask;
5387         struct task_struct *p;
5388         int retval;
5389 
5390         rcu_read_lock();
5391 
5392         p = find_process_by_pid(pid);
5393         if (!p) {
5394                 rcu_read_unlock();
5395                 return -ESRCH;
5396         }
5397 
5398         
5399         get_task_struct(p);
5400         rcu_read_unlock();
5401 
5402         if (p->flags & PF_NO_SETAFFINITY) {
5403                 retval = -EINVAL;
5404                 goto out_put_task;
5405         }
5406         if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
5407                 retval = -ENOMEM;
5408                 goto out_put_task;
5409         }
5410         if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
5411                 retval = -ENOMEM;
5412                 goto out_free_cpus_allowed;
5413         }
5414         retval = -EPERM;
5415         if (!check_same_owner(p)) {
5416                 rcu_read_lock();
5417                 if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
5418                         rcu_read_unlock();
5419                         goto out_free_new_mask;
5420                 }
5421                 rcu_read_unlock();
5422         }
5423 
5424         retval = security_task_setscheduler(p);
5425         if (retval)
5426                 goto out_free_new_mask;
5427 
5428 
5429         cpuset_cpus_allowed(p, cpus_allowed);
5430         cpumask_and(new_mask, in_mask, cpus_allowed);
5431 
5432         
5433 
5434 
5435 
5436 
5437 
5438 #ifdef CONFIG_SMP
5439         if (task_has_dl_policy(p) && dl_bandwidth_enabled()) {
5440                 rcu_read_lock();
5441                 if (!cpumask_subset(task_rq(p)->rd->span, new_mask)) {
5442                         retval = -EBUSY;
5443                         rcu_read_unlock();
5444                         goto out_free_new_mask;
5445                 }
5446                 rcu_read_unlock();
5447         }
5448 #endif
5449 again:
5450         retval = __set_cpus_allowed_ptr(p, new_mask, true);
5451 
5452         if (!retval) {
5453                 cpuset_cpus_allowed(p, cpus_allowed);
5454                 if (!cpumask_subset(new_mask, cpus_allowed)) {
5455                         
5456 
5457 
5458 
5459 
5460                         cpumask_copy(new_mask, cpus_allowed);
5461                         goto again;
5462                 }
5463         }
5464 out_free_new_mask:
5465         free_cpumask_var(new_mask);
5466 out_free_cpus_allowed:
5467         free_cpumask_var(cpus_allowed);
5468 out_put_task:
5469         put_task_struct(p);
5470         return retval;
5471 }
5472 
5473 static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
5474                              struct cpumask *new_mask)
5475 {
5476         if (len < cpumask_size())
5477                 cpumask_clear(new_mask);
5478         else if (len > cpumask_size())
5479                 len = cpumask_size();
5480 
5481         return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
5482 }
5483 
5484 
5485 
5486 
5487 
5488 
5489 
5490 
5491 
5492 SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
5493                 unsigned long __user *, user_mask_ptr)
5494 {
5495         cpumask_var_t new_mask;
5496         int retval;
5497 
5498         if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
5499                 return -ENOMEM;
5500 
5501         retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
5502         if (retval == 0)
5503                 retval = sched_setaffinity(pid, new_mask);
5504         free_cpumask_var(new_mask);
5505         return retval;
5506 }
5507 
5508 long sched_getaffinity(pid_t pid, struct cpumask *mask)
5509 {
5510         struct task_struct *p;
5511         unsigned long flags;
5512         int retval;
5513 
5514         rcu_read_lock();
5515 
5516         retval = -ESRCH;
5517         p = find_process_by_pid(pid);
5518         if (!p)
5519                 goto out_unlock;
5520 
5521         retval = security_task_getscheduler(p);
5522         if (retval)
5523                 goto out_unlock;
5524 
5525         raw_spin_lock_irqsave(&p->pi_lock, flags);
5526         cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
5527         raw_spin_unlock_irqrestore(&p->pi_lock, flags);
5528 
5529 out_unlock:
5530         rcu_read_unlock();
5531 
5532         return retval;
5533 }
5534 
5535 
5536 
5537 
5538 
5539 
5540 
5541 
5542 
5543 
5544 SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
5545                 unsigned long __user *, user_mask_ptr)
5546 {
5547         int ret;
5548         cpumask_var_t mask;
5549 
5550         if ((len * BITS_PER_BYTE) < nr_cpu_ids)
5551                 return -EINVAL;
5552         if (len & (sizeof(unsigned long)-1))
5553                 return -EINVAL;
5554 
5555         if (!alloc_cpumask_var(&mask, GFP_KERNEL))
5556                 return -ENOMEM;
5557 
5558         ret = sched_getaffinity(pid, mask);
5559         if (ret == 0) {
5560                 unsigned int retlen = min(len, cpumask_size());
5561 
5562                 if (copy_to_user(user_mask_ptr, mask, retlen))
5563                         ret = -EFAULT;
5564                 else
5565                         ret = retlen;
5566         }
5567         free_cpumask_var(mask);
5568 
5569         return ret;
5570 }
5571 
5572 
5573 
5574 
5575 
5576 
5577 
5578 
5579 
5580 static void do_sched_yield(void)
5581 {
5582         struct rq_flags rf;
5583         struct rq *rq;
5584 
5585         rq = this_rq_lock_irq(&rf);
5586 
5587         schedstat_inc(rq->yld_count);
5588         current->sched_class->yield_task(rq);
5589 
5590         
5591 
5592 
5593 
5594         preempt_disable();
5595         rq_unlock(rq, &rf);
5596         sched_preempt_enable_no_resched();
5597 
5598         schedule();
5599 }
5600 
5601 SYSCALL_DEFINE0(sched_yield)
5602 {
5603         do_sched_yield();
5604         return 0;
5605 }
5606 
5607 #ifndef CONFIG_PREEMPTION
5608 int __sched _cond_resched(void)
5609 {
5610         if (should_resched(0)) {
5611                 preempt_schedule_common();
5612                 return 1;
5613         }
5614         rcu_all_qs();
5615         return 0;
5616 }
5617 EXPORT_SYMBOL(_cond_resched);
5618 #endif
5619 
5620 
5621 
5622 
5623 
5624 
5625 
5626 
5627 
5628 int __cond_resched_lock(spinlock_t *lock)
5629 {
5630         int resched = should_resched(PREEMPT_LOCK_OFFSET);
5631         int ret = 0;
5632 
5633         lockdep_assert_held(lock);
5634 
5635         if (spin_needbreak(lock) || resched) {
5636                 spin_unlock(lock);
5637                 if (resched)
5638                         preempt_schedule_common();
5639                 else
5640                         cpu_relax();
5641                 ret = 1;
5642                 spin_lock(lock);
5643         }
5644         return ret;
5645 }
5646 EXPORT_SYMBOL(__cond_resched_lock);
5647 
5648 
5649 
5650 
5651 
5652 
5653 
5654 
5655 
5656 
5657 
5658 
5659 
5660 
5661 
5662 
5663 
5664 
5665 
5666 
5667 
5668 
5669 
5670 void __sched yield(void)
5671 {
5672         set_current_state(TASK_RUNNING);
5673         do_sched_yield();
5674 }
5675 EXPORT_SYMBOL(yield);
5676 
5677 
5678 
5679 
5680 
5681 
5682 
5683 
5684 
5685 
5686 
5687 
5688 
5689 
5690 
5691 
5692 int __sched yield_to(struct task_struct *p, bool preempt)
5693 {
5694         struct task_struct *curr = current;
5695         struct rq *rq, *p_rq;
5696         unsigned long flags;
5697         int yielded = 0;
5698 
5699         local_irq_save(flags);
5700         rq = this_rq();
5701 
5702 again:
5703         p_rq = task_rq(p);
5704         
5705 
5706 
5707 
5708         if (rq->nr_running == 1 && p_rq->nr_running == 1) {
5709                 yielded = -ESRCH;
5710                 goto out_irq;
5711         }
5712 
5713         double_rq_lock(rq, p_rq);
5714         if (task_rq(p) != p_rq) {
5715                 double_rq_unlock(rq, p_rq);
5716                 goto again;
5717         }
5718 
5719         if (!curr->sched_class->yield_to_task)
5720                 goto out_unlock;
5721 
5722         if (curr->sched_class != p->sched_class)
5723                 goto out_unlock;
5724 
5725         if (task_running(p_rq, p) || p->state)
5726                 goto out_unlock;
5727 
5728         yielded = curr->sched_class->yield_to_task(rq, p, preempt);
5729         if (yielded) {
5730                 schedstat_inc(rq->yld_count);
5731                 
5732 
5733 
5734 
5735                 if (preempt && rq != p_rq)
5736                         resched_curr(p_rq);
5737         }
5738 
5739 out_unlock:
5740         double_rq_unlock(rq, p_rq);
5741 out_irq:
5742         local_irq_restore(flags);
5743 
5744         if (yielded > 0)
5745                 schedule();
5746 
5747         return yielded;
5748 }
5749 EXPORT_SYMBOL_GPL(yield_to);
5750 
5751 int io_schedule_prepare(void)
5752 {
5753         int old_iowait = current->in_iowait;
5754 
5755         current->in_iowait = 1;
5756         blk_schedule_flush_plug(current);
5757 
5758         return old_iowait;
5759 }
5760 
5761 void io_schedule_finish(int token)
5762 {
5763         current->in_iowait = token;
5764 }
5765 
5766 
5767 
5768 
5769 
5770 long __sched io_schedule_timeout(long timeout)
5771 {
5772         int token;
5773         long ret;
5774 
5775         token = io_schedule_prepare();
5776         ret = schedule_timeout(timeout);
5777         io_schedule_finish(token);
5778 
5779         return ret;
5780 }
5781 EXPORT_SYMBOL(io_schedule_timeout);
5782 
5783 void __sched io_schedule(void)
5784 {
5785         int token;
5786 
5787         token = io_schedule_prepare();
5788         schedule();
5789         io_schedule_finish(token);
5790 }
5791 EXPORT_SYMBOL(io_schedule);
5792 
5793 
5794 
5795 
5796 
5797 
5798 
5799 
5800 
5801 SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
5802 {
5803         int ret = -EINVAL;
5804 
5805         switch (policy) {
5806         case SCHED_FIFO:
5807         case SCHED_RR:
5808                 ret = MAX_USER_RT_PRIO-1;
5809                 break;
5810         case SCHED_DEADLINE:
5811         case SCHED_NORMAL:
5812         case SCHED_BATCH:
5813         case SCHED_IDLE:
5814                 ret = 0;
5815                 break;
5816         }
5817         return ret;
5818 }
5819 
5820 
5821 
5822 
5823 
5824 
5825 
5826 
5827 
5828 SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
5829 {
5830         int ret = -EINVAL;
5831 
5832         switch (policy) {
5833         case SCHED_FIFO:
5834         case SCHED_RR:
5835                 ret = 1;
5836                 break;
5837         case SCHED_DEADLINE:
5838         case SCHED_NORMAL:
5839         case SCHED_BATCH:
5840         case SCHED_IDLE:
5841                 ret = 0;
5842         }
5843         return ret;
5844 }
5845 
5846 static int sched_rr_get_interval(pid_t pid, struct timespec64 *t)
5847 {
5848         struct task_struct *p;
5849         unsigned int time_slice;
5850         struct rq_flags rf;
5851         struct rq *rq;
5852         int retval;
5853 
5854         if (pid < 0)
5855                 return -EINVAL;
5856 
5857         retval = -ESRCH;
5858         rcu_read_lock();
5859         p = find_process_by_pid(pid);
5860         if (!p)
5861                 goto out_unlock;
5862 
5863         retval = security_task_getscheduler(p);
5864         if (retval)
5865                 goto out_unlock;
5866 
5867         rq = task_rq_lock(p, &rf);
5868         time_slice = 0;
5869         if (p->sched_class->get_rr_interval)
5870                 time_slice = p->sched_class->get_rr_interval(rq, p);
5871         task_rq_unlock(rq, p, &rf);
5872 
5873         rcu_read_unlock();
5874         jiffies_to_timespec64(time_slice, t);
5875         return 0;
5876 
5877 out_unlock:
5878         rcu_read_unlock();
5879         return retval;
5880 }
5881 
5882 
5883 
5884 
5885 
5886 
5887 
5888 
5889 
5890 
5891 
5892 
5893 SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
5894                 struct __kernel_timespec __user *, interval)
5895 {
5896         struct timespec64 t;
5897         int retval = sched_rr_get_interval(pid, &t);
5898 
5899         if (retval == 0)
5900                 retval = put_timespec64(&t, interval);
5901 
5902         return retval;
5903 }
5904 
5905 #ifdef CONFIG_COMPAT_32BIT_TIME
5906 SYSCALL_DEFINE2(sched_rr_get_interval_time32, pid_t, pid,
5907                 struct old_timespec32 __user *, interval)
5908 {
5909         struct timespec64 t;
5910         int retval = sched_rr_get_interval(pid, &t);
5911 
5912         if (retval == 0)
5913                 retval = put_old_timespec32(&t, interval);
5914         return retval;
5915 }
5916 #endif
5917 
5918 void sched_show_task(struct task_struct *p)
5919 {
5920         unsigned long free = 0;
5921         int ppid;
5922 
5923         if (!try_get_task_stack(p))
5924                 return;
5925 
5926         printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p));
5927 
5928         if (p->state == TASK_RUNNING)
5929                 printk(KERN_CONT "  running task    ");
5930 #ifdef CONFIG_DEBUG_STACK_USAGE
5931         free = stack_not_used(p);
5932 #endif
5933         ppid = 0;
5934         rcu_read_lock();
5935         if (pid_alive(p))
5936                 ppid = task_pid_nr(rcu_dereference(p->real_parent));
5937         rcu_read_unlock();
5938         printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
5939                 task_pid_nr(p), ppid,
5940                 (unsigned long)task_thread_info(p)->flags);
5941 
5942         print_worker_info(KERN_INFO, p);
5943         show_stack(p, NULL);
5944         put_task_stack(p);
5945 }
5946 EXPORT_SYMBOL_GPL(sched_show_task);
5947 
5948 static inline bool
5949 state_filter_match(unsigned long state_filter, struct task_struct *p)
5950 {
5951         
5952         if (!state_filter)
5953                 return true;
5954 
5955         
5956         if (!(p->state & state_filter))
5957                 return false;
5958 
5959         
5960 
5961 
5962 
5963         if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE)
5964                 return false;
5965 
5966         return true;
5967 }
5968 
5969 
5970 void show_state_filter(unsigned long state_filter)
5971 {
5972         struct task_struct *g, *p;
5973 
5974 #if BITS_PER_LONG == 32
5975         printk(KERN_INFO
5976                 "  task                PC stack   pid father\n");
5977 #else
5978         printk(KERN_INFO
5979                 "  task                        PC stack   pid father\n");
5980 #endif
5981         rcu_read_lock();
5982         for_each_process_thread(g, p) {
5983                 
5984 
5985 
5986 
5987 
5988 
5989 
5990                 touch_nmi_watchdog();
5991                 touch_all_softlockup_watchdogs();
5992                 if (state_filter_match(state_filter, p))
5993                         sched_show_task(p);
5994         }
5995 
5996 #ifdef CONFIG_SCHED_DEBUG
5997         if (!state_filter)
5998                 sysrq_sched_debug_show();
5999 #endif
6000         rcu_read_unlock();
6001         
6002 
6003 
6004         if (!state_filter)
6005                 debug_show_all_locks();
6006 }
6007 
6008 
6009 
6010 
6011 
6012 
6013 
6014 
6015 
6016 void init_idle(struct task_struct *idle, int cpu)
6017 {
6018         struct rq *rq = cpu_rq(cpu);
6019         unsigned long flags;
6020 
6021         __sched_fork(0, idle);
6022 
6023         raw_spin_lock_irqsave(&idle->pi_lock, flags);
6024         raw_spin_lock(&rq->lock);
6025 
6026         idle->state = TASK_RUNNING;
6027         idle->se.exec_start = sched_clock();
6028         idle->flags |= PF_IDLE;
6029 
6030         kasan_unpoison_task_stack(idle);
6031 
6032 #ifdef CONFIG_SMP
6033         
6034 
6035 
6036 
6037 
6038 
6039         set_cpus_allowed_common(idle, cpumask_of(cpu));
6040 #endif
6041         
6042 
6043 
6044 
6045 
6046 
6047 
6048 
6049 
6050 
6051         rcu_read_lock();
6052         __set_task_cpu(idle, cpu);
6053         rcu_read_unlock();
6054 
6055         rq->idle = idle;
6056         rcu_assign_pointer(rq->curr, idle);
6057         idle->on_rq = TASK_ON_RQ_QUEUED;
6058 #ifdef CONFIG_SMP
6059         idle->on_cpu = 1;
6060 #endif
6061         raw_spin_unlock(&rq->lock);
6062         raw_spin_unlock_irqrestore(&idle->pi_lock, flags);
6063 
6064         
6065         init_idle_preempt_count(idle, cpu);
6066 
6067         
6068 
6069 
6070         idle->sched_class = &idle_sched_class;
6071         ftrace_graph_init_idle_task(idle, cpu);
6072         vtime_init_idle(idle, cpu);
6073 #ifdef CONFIG_SMP
6074         sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
6075 #endif
6076 }
6077 
6078 #ifdef CONFIG_SMP
6079 
6080 int cpuset_cpumask_can_shrink(const struct cpumask *cur,
6081                               const struct cpumask *trial)
6082 {
6083         int ret = 1;
6084 
6085         if (!cpumask_weight(cur))
6086                 return ret;
6087 
6088         ret = dl_cpuset_cpumask_can_shrink(cur, trial);
6089 
6090         return ret;
6091 }
6092 
6093 int task_can_attach(struct task_struct *p,
6094                     const struct cpumask *cs_cpus_allowed)
6095 {
6096         int ret = 0;
6097 
6098         
6099 
6100 
6101 
6102 
6103 
6104 
6105 
6106 
6107         if (p->flags & PF_NO_SETAFFINITY) {
6108                 ret = -EINVAL;
6109                 goto out;
6110         }
6111 
6112         if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span,
6113                                               cs_cpus_allowed))
6114                 ret = dl_task_can_attach(p, cs_cpus_allowed);
6115 
6116 out:
6117         return ret;
6118 }
6119 
6120 bool sched_smp_initialized __read_mostly;
6121 
6122 #ifdef CONFIG_NUMA_BALANCING
6123 
6124 int migrate_task_to(struct task_struct *p, int target_cpu)
6125 {
6126         struct migration_arg arg = { p, target_cpu };
6127         int curr_cpu = task_cpu(p);
6128 
6129         if (curr_cpu == target_cpu)
6130                 return 0;
6131 
6132         if (!cpumask_test_cpu(target_cpu, p->cpus_ptr))
6133                 return -EINVAL;
6134 
6135         
6136 
6137         trace_sched_move_numa(p, curr_cpu, target_cpu);
6138         return stop_one_cpu(curr_cpu, migration_cpu_stop, &arg);
6139 }
6140 
6141 
6142 
6143 
6144 
6145 void sched_setnuma(struct task_struct *p, int nid)
6146 {
6147         bool queued, running;
6148         struct rq_flags rf;
6149         struct rq *rq;
6150 
6151         rq = task_rq_lock(p, &rf);
6152         queued = task_on_rq_queued(p);
6153         running = task_current(rq, p);
6154 
6155         if (queued)
6156                 dequeue_task(rq, p, DEQUEUE_SAVE);
6157         if (running)
6158                 put_prev_task(rq, p);
6159 
6160         p->numa_preferred_nid = nid;
6161 
6162         if (queued)
6163                 enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
6164         if (running)
6165                 set_next_task(rq, p);
6166         task_rq_unlock(rq, p, &rf);
6167 }
6168 #endif 
6169 
6170 #ifdef CONFIG_HOTPLUG_CPU
6171 
6172 
6173 
6174 
6175 void idle_task_exit(void)
6176 {
6177         struct mm_struct *mm = current->active_mm;
6178 
6179         BUG_ON(cpu_online(smp_processor_id()));
6180 
6181         if (mm != &init_mm) {
6182                 switch_mm(mm, &init_mm, current);
6183                 current->active_mm = &init_mm;
6184                 finish_arch_post_lock_switch();
6185         }
6186         mmdrop(mm);
6187 }
6188 
6189 
6190 
6191 
6192 
6193 
6194 
6195 
6196 
6197 
6198 static void calc_load_migrate(struct rq *rq)
6199 {
6200         long delta = calc_load_fold_active(rq, 1);
6201         if (delta)
6202                 atomic_long_add(delta, &calc_load_tasks);
6203 }
6204 
6205 static struct task_struct *__pick_migrate_task(struct rq *rq)
6206 {
6207         const struct sched_class *class;
6208         struct task_struct *next;
6209 
6210         for_each_class(class) {
6211                 next = class->pick_next_task(rq, NULL, NULL);
6212                 if (next) {
6213                         next->sched_class->put_prev_task(rq, next);
6214                         return next;
6215                 }
6216         }
6217 
6218         
6219         BUG();
6220 }
6221 
6222 
6223 
6224 
6225 
6226 
6227 
6228 
6229 
6230 static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
6231 {
6232         struct rq *rq = dead_rq;
6233         struct task_struct *next, *stop = rq->stop;
6234         struct rq_flags orf = *rf;
6235         int dest_cpu;
6236 
6237         
6238 
6239 
6240 
6241 
6242 
6243 
6244 
6245 
6246         rq->stop = NULL;
6247 
6248         
6249 
6250 
6251 
6252 
6253         update_rq_clock(rq);
6254 
6255         for (;;) {
6256                 
6257 
6258 
6259 
6260                 if (rq->nr_running == 1)
6261                         break;
6262 
6263                 next = __pick_migrate_task(rq);
6264 
6265                 
6266 
6267 
6268 
6269 
6270 
6271 
6272 
6273 
6274                 rq_unlock(rq, rf);
6275                 raw_spin_lock(&next->pi_lock);
6276                 rq_relock(rq, rf);
6277 
6278                 
6279 
6280 
6281 
6282 
6283                 if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) {
6284                         raw_spin_unlock(&next->pi_lock);
6285                         continue;
6286                 }
6287 
6288                 
6289                 dest_cpu = select_fallback_rq(dead_rq->cpu, next);
6290                 rq = __migrate_task(rq, rf, next, dest_cpu);
6291                 if (rq != dead_rq) {
6292                         rq_unlock(rq, rf);
6293                         rq = dead_rq;
6294                         *rf = orf;
6295                         rq_relock(rq, rf);
6296                 }
6297                 raw_spin_unlock(&next->pi_lock);
6298         }
6299 
6300         rq->stop = stop;
6301 }
6302 #endif 
6303 
6304 void set_rq_online(struct rq *rq)
6305 {
6306         if (!rq->online) {
6307                 const struct sched_class *class;
6308 
6309                 cpumask_set_cpu(rq->cpu, rq->rd->online);
6310                 rq->online = 1;
6311 
6312                 for_each_class(class) {
6313                         if (class->rq_online)
6314                                 class->rq_online(rq);
6315                 }
6316         }
6317 }
6318 
6319 void set_rq_offline(struct rq *rq)
6320 {
6321         if (rq->online) {
6322                 const struct sched_class *class;
6323 
6324                 for_each_class(class) {
6325                         if (class->rq_offline)
6326                                 class->rq_offline(rq);
6327                 }
6328 
6329                 cpumask_clear_cpu(rq->cpu, rq->rd->online);
6330                 rq->online = 0;
6331         }
6332 }
6333 
6334 
6335 
6336 
6337 static int num_cpus_frozen;
6338 
6339 
6340 
6341 
6342 
6343 
6344 
6345 
6346 
6347 static void cpuset_cpu_active(void)
6348 {
6349         if (cpuhp_tasks_frozen) {
6350                 
6351 
6352 
6353 
6354 
6355 
6356                 partition_sched_domains(1, NULL, NULL);
6357                 if (--num_cpus_frozen)
6358                         return;
6359                 
6360 
6361 
6362 
6363 
6364                 cpuset_force_rebuild();
6365         }
6366         cpuset_update_active_cpus();
6367 }
6368 
6369 static int cpuset_cpu_inactive(unsigned int cpu)
6370 {
6371         if (!cpuhp_tasks_frozen) {
6372                 if (dl_cpu_busy(cpu))
6373                         return -EBUSY;
6374                 cpuset_update_active_cpus();
6375         } else {
6376                 num_cpus_frozen++;
6377                 partition_sched_domains(1, NULL, NULL);
6378         }
6379         return 0;
6380 }
6381 
6382 int sched_cpu_activate(unsigned int cpu)
6383 {
6384         struct rq *rq = cpu_rq(cpu);
6385         struct rq_flags rf;
6386 
6387 #ifdef CONFIG_SCHED_SMT
6388         
6389 
6390 
6391         if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
6392                 static_branch_inc_cpuslocked(&sched_smt_present);
6393 #endif
6394         set_cpu_active(cpu, true);
6395 
6396         if (sched_smp_initialized) {
6397                 sched_domains_numa_masks_set(cpu);
6398                 cpuset_cpu_active();
6399         }
6400 
6401         
6402 
6403 
6404 
6405 
6406 
6407 
6408 
6409 
6410         rq_lock_irqsave(rq, &rf);
6411         if (rq->rd) {
6412                 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
6413                 set_rq_online(rq);
6414         }
6415         rq_unlock_irqrestore(rq, &rf);
6416 
6417         return 0;
6418 }
6419 
6420 int sched_cpu_deactivate(unsigned int cpu)
6421 {
6422         int ret;
6423 
6424         set_cpu_active(cpu, false);
6425         
6426 
6427 
6428 
6429 
6430 
6431 
6432         synchronize_rcu();
6433 
6434 #ifdef CONFIG_SCHED_SMT
6435         
6436 
6437 
6438         if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
6439                 static_branch_dec_cpuslocked(&sched_smt_present);
6440 #endif
6441 
6442         if (!sched_smp_initialized)
6443                 return 0;
6444 
6445         ret = cpuset_cpu_inactive(cpu);
6446         if (ret) {
6447                 set_cpu_active(cpu, true);
6448                 return ret;
6449         }
6450         sched_domains_numa_masks_clear(cpu);
6451         return 0;
6452 }
6453 
6454 static void sched_rq_cpu_starting(unsigned int cpu)
6455 {
6456         struct rq *rq = cpu_rq(cpu);
6457 
6458         rq->calc_load_update = calc_load_update;
6459         update_max_interval();
6460 }
6461 
6462 int sched_cpu_starting(unsigned int cpu)
6463 {
6464         sched_rq_cpu_starting(cpu);
6465         sched_tick_start(cpu);
6466         return 0;
6467 }
6468 
6469 #ifdef CONFIG_HOTPLUG_CPU
6470 int sched_cpu_dying(unsigned int cpu)
6471 {
6472         struct rq *rq = cpu_rq(cpu);
6473         struct rq_flags rf;
6474 
6475         
6476         sched_ttwu_pending();
6477         sched_tick_stop(cpu);
6478 
6479         rq_lock_irqsave(rq, &rf);
6480         if (rq->rd) {
6481                 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
6482                 set_rq_offline(rq);
6483         }
6484         migrate_tasks(rq, &rf);
6485         BUG_ON(rq->nr_running != 1);
6486         rq_unlock_irqrestore(rq, &rf);
6487 
6488         calc_load_migrate(rq);
6489         update_max_interval();
6490         nohz_balance_exit_idle(rq);
6491         hrtick_clear(rq);
6492         return 0;
6493 }
6494 #endif
6495 
6496 void __init sched_init_smp(void)
6497 {
6498         sched_init_numa();
6499 
6500         
6501 
6502 
6503 
6504 
6505         mutex_lock(&sched_domains_mutex);
6506         sched_init_domains(cpu_active_mask);
6507         mutex_unlock(&sched_domains_mutex);
6508 
6509         
6510         if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
6511                 BUG();
6512         sched_init_granularity();
6513 
6514         init_sched_rt_class();
6515         init_sched_dl_class();
6516 
6517         sched_smp_initialized = true;
6518 }
6519 
6520 static int __init migration_init(void)
6521 {
6522         sched_cpu_starting(smp_processor_id());
6523         return 0;
6524 }
6525 early_initcall(migration_init);
6526 
6527 #else
6528 void __init sched_init_smp(void)
6529 {
6530         sched_init_granularity();
6531 }
6532 #endif 
6533 
6534 int in_sched_functions(unsigned long addr)
6535 {
6536         return in_lock_functions(addr) ||
6537                 (addr >= (unsigned long)__sched_text_start
6538                 && addr < (unsigned long)__sched_text_end);
6539 }
6540 
6541 #ifdef CONFIG_CGROUP_SCHED
6542 
6543 
6544 
6545 
6546 struct task_group root_task_group;
6547 LIST_HEAD(task_groups);
6548 
6549 
6550 static struct kmem_cache *task_group_cache __read_mostly;
6551 #endif
6552 
6553 DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
6554 DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
6555 
6556 void __init sched_init(void)
6557 {
6558         unsigned long ptr = 0;
6559         int i;
6560 
6561         wait_bit_init();
6562 
6563 #ifdef CONFIG_FAIR_GROUP_SCHED
6564         ptr += 2 * nr_cpu_ids * sizeof(void **);
6565 #endif
6566 #ifdef CONFIG_RT_GROUP_SCHED
6567         ptr += 2 * nr_cpu_ids * sizeof(void **);
6568 #endif
6569         if (ptr) {
6570                 ptr = (unsigned long)kzalloc(ptr, GFP_NOWAIT);
6571 
6572 #ifdef CONFIG_FAIR_GROUP_SCHED
6573                 root_task_group.se = (struct sched_entity **)ptr;
6574                 ptr += nr_cpu_ids * sizeof(void **);
6575 
6576                 root_task_group.cfs_rq = (struct cfs_rq **)ptr;
6577                 ptr += nr_cpu_ids * sizeof(void **);
6578 
6579 #endif 
6580 #ifdef CONFIG_RT_GROUP_SCHED
6581                 root_task_group.rt_se = (struct sched_rt_entity **)ptr;
6582                 ptr += nr_cpu_ids * sizeof(void **);
6583 
6584                 root_task_group.rt_rq = (struct rt_rq **)ptr;
6585                 ptr += nr_cpu_ids * sizeof(void **);
6586 
6587 #endif 
6588         }
6589 #ifdef CONFIG_CPUMASK_OFFSTACK
6590         for_each_possible_cpu(i) {
6591                 per_cpu(load_balance_mask, i) = (cpumask_var_t)kzalloc_node(
6592                         cpumask_size(), GFP_KERNEL, cpu_to_node(i));
6593                 per_cpu(select_idle_mask, i) = (cpumask_var_t)kzalloc_node(
6594                         cpumask_size(), GFP_KERNEL, cpu_to_node(i));
6595         }
6596 #endif 
6597 
6598         init_rt_bandwidth(&def_rt_bandwidth, global_rt_period(), global_rt_runtime());
6599         init_dl_bandwidth(&def_dl_bandwidth, global_rt_period(), global_rt_runtime());
6600 
6601 #ifdef CONFIG_SMP
6602         init_defrootdomain();
6603 #endif
6604 
6605 #ifdef CONFIG_RT_GROUP_SCHED
6606         init_rt_bandwidth(&root_task_group.rt_bandwidth,
6607                         global_rt_period(), global_rt_runtime());
6608 #endif 
6609 
6610 #ifdef CONFIG_CGROUP_SCHED
6611         task_group_cache = KMEM_CACHE(task_group, 0);
6612 
6613         list_add(&root_task_group.list, &task_groups);
6614         INIT_LIST_HEAD(&root_task_group.children);
6615         INIT_LIST_HEAD(&root_task_group.siblings);
6616         autogroup_init(&init_task);
6617 #endif 
6618 
6619         for_each_possible_cpu(i) {
6620                 struct rq *rq;
6621 
6622                 rq = cpu_rq(i);
6623                 raw_spin_lock_init(&rq->lock);
6624                 rq->nr_running = 0;
6625                 rq->calc_load_active = 0;
6626                 rq->calc_load_update = jiffies + LOAD_FREQ;
6627                 init_cfs_rq(&rq->cfs);
6628                 init_rt_rq(&rq->rt);
6629                 init_dl_rq(&rq->dl);
6630 #ifdef CONFIG_FAIR_GROUP_SCHED
6631                 root_task_group.shares = ROOT_TASK_GROUP_LOAD;
6632                 INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
6633                 rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
6634                 
6635 
6636 
6637 
6638 
6639 
6640 
6641 
6642 
6643 
6644 
6645 
6646 
6647 
6648 
6649 
6650 
6651 
6652 
6653                 init_cfs_bandwidth(&root_task_group.cfs_bandwidth);
6654                 init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL);
6655 #endif 
6656 
6657                 rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
6658 #ifdef CONFIG_RT_GROUP_SCHED
6659                 init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL);
6660 #endif
6661 #ifdef CONFIG_SMP
6662                 rq->sd = NULL;
6663                 rq->rd = NULL;
6664                 rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE;
6665                 rq->balance_callback = NULL;
6666                 rq->active_balance = 0;
6667                 rq->next_balance = jiffies;
6668                 rq->push_cpu = 0;
6669                 rq->cpu = i;
6670                 rq->online = 0;
6671                 rq->idle_stamp = 0;
6672                 rq->avg_idle = 2*sysctl_sched_migration_cost;
6673                 rq->max_idle_balance_cost = sysctl_sched_migration_cost;
6674 
6675                 INIT_LIST_HEAD(&rq->cfs_tasks);
6676 
6677                 rq_attach_root(rq, &def_root_domain);
6678 #ifdef CONFIG_NO_HZ_COMMON
6679                 rq->last_load_update_tick = jiffies;
6680                 rq->last_blocked_load_update_tick = jiffies;
6681                 atomic_set(&rq->nohz_flags, 0);
6682 #endif
6683 #endif 
6684                 hrtick_rq_init(rq);
6685                 atomic_set(&rq->nr_iowait, 0);
6686         }
6687 
6688         set_load_weight(&init_task, false);
6689 
6690         
6691 
6692 
6693         mmgrab(&init_mm);
6694         enter_lazy_tlb(&init_mm, current);
6695 
6696         
6697 
6698 
6699 
6700 
6701 
6702         init_idle(current, smp_processor_id());
6703 
6704         calc_load_update = jiffies + LOAD_FREQ;
6705 
6706 #ifdef CONFIG_SMP
6707         idle_thread_set_boot_cpu();
6708 #endif
6709         init_sched_fair_class();
6710 
6711         init_schedstats();
6712 
6713         psi_init();
6714 
6715         init_uclamp();
6716 
6717         scheduler_running = 1;
6718 }
6719 
6720 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
6721 static inline int preempt_count_equals(int preempt_offset)
6722 {
6723         int nested = preempt_count() + rcu_preempt_depth();
6724 
6725         return (nested == preempt_offset);
6726 }
6727 
6728 void __might_sleep(const char *file, int line, int preempt_offset)
6729 {
6730         
6731 
6732 
6733 
6734 
6735         WARN_ONCE(current->state != TASK_RUNNING && current->task_state_change,
6736                         "do not call blocking ops when !TASK_RUNNING; "
6737                         "state=%lx set at [<%p>] %pS\n",
6738                         current->state,
6739                         (void *)current->task_state_change,
6740                         (void *)current->task_state_change);
6741 
6742         ___might_sleep(file, line, preempt_offset);
6743 }
6744 EXPORT_SYMBOL(__might_sleep);
6745 
6746 void ___might_sleep(const char *file, int line, int preempt_offset)
6747 {
6748         
6749         static unsigned long prev_jiffy;
6750 
6751         unsigned long preempt_disable_ip;
6752 
6753         
6754         rcu_sleep_check();
6755 
6756         if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
6757              !is_idle_task(current) && !current->non_block_count) ||
6758             system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING ||
6759             oops_in_progress)
6760                 return;
6761 
6762         if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
6763                 return;
6764         prev_jiffy = jiffies;
6765 
6766         
6767         preempt_disable_ip = get_preempt_disable_ip(current);
6768 
6769         printk(KERN_ERR
6770                 "BUG: sleeping function called from invalid context at %s:%d\n",
6771                         file, line);
6772         printk(KERN_ERR
6773                 "in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
6774                         in_atomic(), irqs_disabled(), current->non_block_count,
6775                         current->pid, current->comm);
6776 
6777         if (task_stack_end_corrupted(current))
6778                 printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
6779 
6780         debug_show_held_locks(current);
6781         if (irqs_disabled())
6782                 print_irqtrace_events(current);
6783         if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
6784             && !preempt_count_equals(preempt_offset)) {
6785                 pr_err("Preemption disabled at:");
6786                 print_ip_sym(preempt_disable_ip);
6787                 pr_cont("\n");
6788         }
6789         dump_stack();
6790         add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
6791 }
6792 EXPORT_SYMBOL(___might_sleep);
6793 
6794 void __cant_sleep(const char *file, int line, int preempt_offset)
6795 {
6796         static unsigned long prev_jiffy;
6797 
6798         if (irqs_disabled())
6799                 return;
6800 
6801         if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
6802                 return;
6803 
6804         if (preempt_count() > preempt_offset)
6805                 return;
6806 
6807         if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
6808                 return;
6809         prev_jiffy = jiffies;
6810 
6811         printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line);
6812         printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
6813                         in_atomic(), irqs_disabled(),
6814                         current->pid, current->comm);
6815 
6816         debug_show_held_locks(current);
6817         dump_stack();
6818         add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
6819 }
6820 EXPORT_SYMBOL_GPL(__cant_sleep);
6821 #endif
6822 
6823 #ifdef CONFIG_MAGIC_SYSRQ
6824 void normalize_rt_tasks(void)
6825 {
6826         struct task_struct *g, *p;
6827         struct sched_attr attr = {
6828                 .sched_policy = SCHED_NORMAL,
6829         };
6830 
6831         read_lock(&tasklist_lock);
6832         for_each_process_thread(g, p) {
6833                 
6834 
6835 
6836                 if (p->flags & PF_KTHREAD)
6837                         continue;
6838 
6839                 p->se.exec_start = 0;
6840                 schedstat_set(p->se.statistics.wait_start,  0);
6841                 schedstat_set(p->se.statistics.sleep_start, 0);
6842                 schedstat_set(p->se.statistics.block_start, 0);
6843 
6844                 if (!dl_task(p) && !rt_task(p)) {
6845                         
6846 
6847 
6848 
6849                         if (task_nice(p) < 0)
6850                                 set_user_nice(p, 0);
6851                         continue;
6852                 }
6853 
6854                 __sched_setscheduler(p, &attr, false, false);
6855         }
6856         read_unlock(&tasklist_lock);
6857 }
6858 
6859 #endif 
6860 
6861 #if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB)
6862 
6863 
6864 
6865 
6866 
6867 
6868 
6869 
6870 
6871 
6872 
6873 
6874 
6875 
6876 
6877 
6878 
6879 
6880 struct task_struct *curr_task(int cpu)
6881 {
6882         return cpu_curr(cpu);
6883 }
6884 
6885 #endif 
6886 
6887 #ifdef CONFIG_IA64
6888 
6889 
6890 
6891 
6892 
6893 
6894 
6895 
6896 
6897 
6898 
6899 
6900 
6901 
6902 
6903 void ia64_set_curr_task(int cpu, struct task_struct *p)
6904 {
6905         cpu_curr(cpu) = p;
6906 }
6907 
6908 #endif
6909 
6910 #ifdef CONFIG_CGROUP_SCHED
6911 
6912 static DEFINE_SPINLOCK(task_group_lock);
6913 
6914 static inline void alloc_uclamp_sched_group(struct task_group *tg,
6915                                             struct task_group *parent)
6916 {
6917 #ifdef CONFIG_UCLAMP_TASK_GROUP
6918         enum uclamp_id clamp_id;
6919 
6920         for_each_clamp_id(clamp_id) {
6921                 uclamp_se_set(&tg->uclamp_req[clamp_id],
6922                               uclamp_none(clamp_id), false);
6923                 tg->uclamp[clamp_id] = parent->uclamp[clamp_id];
6924         }
6925 #endif
6926 }
6927 
6928 static void sched_free_group(struct task_group *tg)
6929 {
6930         free_fair_sched_group(tg);
6931         free_rt_sched_group(tg);
6932         autogroup_free(tg);
6933         kmem_cache_free(task_group_cache, tg);
6934 }
6935 
6936 
6937 struct task_group *sched_create_group(struct task_group *parent)
6938 {
6939         struct task_group *tg;
6940 
6941         tg = kmem_cache_alloc(task_group_cache, GFP_KERNEL | __GFP_ZERO);
6942         if (!tg)
6943                 return ERR_PTR(-ENOMEM);
6944 
6945         if (!alloc_fair_sched_group(tg, parent))
6946                 goto err;
6947 
6948         if (!alloc_rt_sched_group(tg, parent))
6949                 goto err;
6950 
6951         alloc_uclamp_sched_group(tg, parent);
6952 
6953         return tg;
6954 
6955 err:
6956         sched_free_group(tg);
6957         return ERR_PTR(-ENOMEM);
6958 }
6959 
6960 void sched_online_group(struct task_group *tg, struct task_group *parent)
6961 {
6962         unsigned long flags;
6963 
6964         spin_lock_irqsave(&task_group_lock, flags);
6965         list_add_rcu(&tg->list, &task_groups);
6966 
6967         
6968         WARN_ON(!parent);
6969 
6970         tg->parent = parent;
6971         INIT_LIST_HEAD(&tg->children);
6972         list_add_rcu(&tg->siblings, &parent->children);
6973         spin_unlock_irqrestore(&task_group_lock, flags);
6974 
6975         online_fair_sched_group(tg);
6976 }
6977 
6978 
6979 static void sched_free_group_rcu(struct rcu_head *rhp)
6980 {
6981         
6982         sched_free_group(container_of(rhp, struct task_group, rcu));
6983 }
6984 
6985 void sched_destroy_group(struct task_group *tg)
6986 {
6987         
6988         call_rcu(&tg->rcu, sched_free_group_rcu);
6989 }
6990 
6991 void sched_offline_group(struct task_group *tg)
6992 {
6993         unsigned long flags;
6994 
6995         
6996         unregister_fair_sched_group(tg);
6997 
6998         spin_lock_irqsave(&task_group_lock, flags);
6999         list_del_rcu(&tg->list);
7000         list_del_rcu(&tg->siblings);
7001         spin_unlock_irqrestore(&task_group_lock, flags);
7002 }
7003 
7004 static void sched_change_group(struct task_struct *tsk, int type)
7005 {
7006         struct task_group *tg;
7007 
7008         
7009 
7010 
7011 
7012 
7013         tg = container_of(task_css_check(tsk, cpu_cgrp_id, true),
7014                           struct task_group, css);
7015         tg = autogroup_task_group(tsk, tg);
7016         tsk->sched_task_group = tg;
7017 
7018 #ifdef CONFIG_FAIR_GROUP_SCHED
7019         if (tsk->sched_class->task_change_group)
7020                 tsk->sched_class->task_change_group(tsk, type);
7021         else
7022 #endif
7023                 set_task_rq(tsk, task_cpu(tsk));
7024 }
7025 
7026 
7027 
7028 
7029 
7030 
7031 
7032 
7033 void sched_move_task(struct task_struct *tsk)
7034 {
7035         int queued, running, queue_flags =
7036                 DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
7037         struct rq_flags rf;
7038         struct rq *rq;
7039 
7040         rq = task_rq_lock(tsk, &rf);
7041         update_rq_clock(rq);
7042 
7043         running = task_current(rq, tsk);
7044         queued = task_on_rq_queued(tsk);
7045 
7046         if (queued)
7047                 dequeue_task(rq, tsk, queue_flags);
7048         if (running)
7049                 put_prev_task(rq, tsk);
7050 
7051         sched_change_group(tsk, TASK_MOVE_GROUP);
7052 
7053         if (queued)
7054                 enqueue_task(rq, tsk, queue_flags);
7055         if (running) {
7056                 set_next_task(rq, tsk);
7057                 
7058 
7059 
7060 
7061 
7062                 resched_curr(rq);
7063         }
7064 
7065         task_rq_unlock(rq, tsk, &rf);
7066 }
7067 
7068 static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
7069 {
7070         return css ? container_of(css, struct task_group, css) : NULL;
7071 }
7072 
7073 static struct cgroup_subsys_state *
7074 cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
7075 {
7076         struct task_group *parent = css_tg(parent_css);
7077         struct task_group *tg;
7078 
7079         if (!parent) {
7080                 
7081                 return &root_task_group.css;
7082         }
7083 
7084         tg = sched_create_group(parent);
7085         if (IS_ERR(tg))
7086                 return ERR_PTR(-ENOMEM);
7087 
7088         return &tg->css;
7089 }
7090 
7091 
7092 static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
7093 {
7094         struct task_group *tg = css_tg(css);
7095         struct task_group *parent = css_tg(css->parent);
7096 
7097         if (parent)
7098                 sched_online_group(tg, parent);
7099 
7100 #ifdef CONFIG_UCLAMP_TASK_GROUP
7101         
7102         cpu_util_update_eff(css);
7103 #endif
7104 
7105         return 0;
7106 }
7107 
7108 static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
7109 {
7110         struct task_group *tg = css_tg(css);
7111 
7112         sched_offline_group(tg);
7113 }
7114 
7115 static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
7116 {
7117         struct task_group *tg = css_tg(css);
7118 
7119         
7120 
7121 
7122         sched_free_group(tg);
7123 }
7124 
7125 
7126 
7127 
7128 
7129 static void cpu_cgroup_fork(struct task_struct *task)
7130 {
7131         struct rq_flags rf;
7132         struct rq *rq;
7133 
7134         rq = task_rq_lock(task, &rf);
7135 
7136         update_rq_clock(rq);
7137         sched_change_group(task, TASK_SET_GROUP);
7138 
7139         task_rq_unlock(rq, task, &rf);
7140 }
7141 
7142 static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
7143 {
7144         struct task_struct *task;
7145         struct cgroup_subsys_state *css;
7146         int ret = 0;
7147 
7148         cgroup_taskset_for_each(task, css, tset) {
7149 #ifdef CONFIG_RT_GROUP_SCHED
7150                 if (!sched_rt_can_attach(css_tg(css), task))
7151                         return -EINVAL;
7152 #endif
7153                 
7154 
7155 
7156 
7157                 raw_spin_lock_irq(&task->pi_lock);
7158                 
7159 
7160 
7161 
7162 
7163                 if (task->state == TASK_NEW)
7164                         ret = -EINVAL;
7165                 raw_spin_unlock_irq(&task->pi_lock);
7166 
7167                 if (ret)
7168                         break;
7169         }
7170         return ret;
7171 }
7172 
7173 static void cpu_cgroup_attach(struct cgroup_taskset *tset)
7174 {
7175         struct task_struct *task;
7176         struct cgroup_subsys_state *css;
7177 
7178         cgroup_taskset_for_each(task, css, tset)
7179                 sched_move_task(task);
7180 }
7181 
7182 #ifdef CONFIG_UCLAMP_TASK_GROUP
7183 static void cpu_util_update_eff(struct cgroup_subsys_state *css)
7184 {
7185         struct cgroup_subsys_state *top_css = css;
7186         struct uclamp_se *uc_parent = NULL;
7187         struct uclamp_se *uc_se = NULL;
7188         unsigned int eff[UCLAMP_CNT];
7189         enum uclamp_id clamp_id;
7190         unsigned int clamps;
7191 
7192         css_for_each_descendant_pre(css, top_css) {
7193                 uc_parent = css_tg(css)->parent
7194                         ? css_tg(css)->parent->uclamp : NULL;
7195 
7196                 for_each_clamp_id(clamp_id) {
7197                         
7198                         eff[clamp_id] = css_tg(css)->uclamp_req[clamp_id].value;
7199                         
7200                         if (uc_parent &&
7201                             eff[clamp_id] > uc_parent[clamp_id].value) {
7202                                 eff[clamp_id] = uc_parent[clamp_id].value;
7203                         }
7204                 }
7205                 
7206                 eff[UCLAMP_MIN] = min(eff[UCLAMP_MIN], eff[UCLAMP_MAX]);
7207 
7208                 
7209                 clamps = 0x0;
7210                 uc_se = css_tg(css)->uclamp;
7211                 for_each_clamp_id(clamp_id) {
7212                         if (eff[clamp_id] == uc_se[clamp_id].value)
7213                                 continue;
7214                         uc_se[clamp_id].value = eff[clamp_id];
7215                         uc_se[clamp_id].bucket_id = uclamp_bucket_id(eff[clamp_id]);
7216                         clamps |= (0x1 << clamp_id);
7217                 }
7218                 if (!clamps) {
7219                         css = css_rightmost_descendant(css);
7220                         continue;
7221                 }
7222 
7223                 
7224                 uclamp_update_active_tasks(css, clamps);
7225         }
7226 }
7227 
7228 
7229 
7230 
7231 
7232 
7233 #define _POW10(exp) ((unsigned int)1e##exp)
7234 #define POW10(exp) _POW10(exp)
7235 
7236 struct uclamp_request {
7237 #define UCLAMP_PERCENT_SHIFT    2
7238 #define UCLAMP_PERCENT_SCALE    (100 * POW10(UCLAMP_PERCENT_SHIFT))
7239         s64 percent;
7240         u64 util;
7241         int ret;
7242 };
7243 
7244 static inline struct uclamp_request
7245 capacity_from_percent(char *buf)
7246 {
7247         struct uclamp_request req = {
7248                 .percent = UCLAMP_PERCENT_SCALE,
7249                 .util = SCHED_CAPACITY_SCALE,
7250                 .ret = 0,
7251         };
7252 
7253         buf = strim(buf);
7254         if (strcmp(buf, "max")) {
7255                 req.ret = cgroup_parse_float(buf, UCLAMP_PERCENT_SHIFT,
7256                                              &req.percent);
7257                 if (req.ret)
7258                         return req;
7259                 if ((u64)req.percent > UCLAMP_PERCENT_SCALE) {
7260                         req.ret = -ERANGE;
7261                         return req;
7262                 }
7263 
7264                 req.util = req.percent << SCHED_CAPACITY_SHIFT;
7265                 req.util = DIV_ROUND_CLOSEST_ULL(req.util, UCLAMP_PERCENT_SCALE);
7266         }
7267 
7268         return req;
7269 }
7270 
7271 static ssize_t cpu_uclamp_write(struct kernfs_open_file *of, char *buf,
7272                                 size_t nbytes, loff_t off,
7273                                 enum uclamp_id clamp_id)
7274 {
7275         struct uclamp_request req;
7276         struct task_group *tg;
7277 
7278         req = capacity_from_percent(buf);
7279         if (req.ret)
7280                 return req.ret;
7281 
7282         mutex_lock(&uclamp_mutex);
7283         rcu_read_lock();
7284 
7285         tg = css_tg(of_css(of));
7286         if (tg->uclamp_req[clamp_id].value != req.util)
7287                 uclamp_se_set(&tg->uclamp_req[clamp_id], req.util, false);
7288 
7289         
7290 
7291 
7292 
7293         tg->uclamp_pct[clamp_id] = req.percent;
7294 
7295         
7296         cpu_util_update_eff(of_css(of));
7297 
7298         rcu_read_unlock();
7299         mutex_unlock(&uclamp_mutex);
7300 
7301         return nbytes;
7302 }
7303 
7304 static ssize_t cpu_uclamp_min_write(struct kernfs_open_file *of,
7305                                     char *buf, size_t nbytes,
7306                                     loff_t off)
7307 {
7308         return cpu_uclamp_write(of, buf, nbytes, off, UCLAMP_MIN);
7309 }
7310 
7311 static ssize_t cpu_uclamp_max_write(struct kernfs_open_file *of,
7312                                     char *buf, size_t nbytes,
7313                                     loff_t off)
7314 {
7315         return cpu_uclamp_write(of, buf, nbytes, off, UCLAMP_MAX);
7316 }
7317 
7318 static inline void cpu_uclamp_print(struct seq_file *sf,
7319                                     enum uclamp_id clamp_id)
7320 {
7321         struct task_group *tg;
7322         u64 util_clamp;
7323         u64 percent;
7324         u32 rem;
7325 
7326         rcu_read_lock();
7327         tg = css_tg(seq_css(sf));
7328         util_clamp = tg->uclamp_req[clamp_id].value;
7329         rcu_read_unlock();
7330 
7331         if (util_clamp == SCHED_CAPACITY_SCALE) {
7332                 seq_puts(sf, "max\n");
7333                 return;
7334         }
7335 
7336         percent = tg->uclamp_pct[clamp_id];
7337         percent = div_u64_rem(percent, POW10(UCLAMP_PERCENT_SHIFT), &rem);
7338         seq_printf(sf, "%llu.%0*u\n", percent, UCLAMP_PERCENT_SHIFT, rem);
7339 }
7340 
7341 static int cpu_uclamp_min_show(struct seq_file *sf, void *v)
7342 {
7343         cpu_uclamp_print(sf, UCLAMP_MIN);
7344         return 0;
7345 }
7346 
7347 static int cpu_uclamp_max_show(struct seq_file *sf, void *v)
7348 {
7349         cpu_uclamp_print(sf, UCLAMP_MAX);
7350         return 0;
7351 }
7352 #endif 
7353 
7354 #ifdef CONFIG_FAIR_GROUP_SCHED
7355 static int cpu_shares_write_u64(struct cgroup_subsys_state *css,
7356                                 struct cftype *cftype, u64 shareval)
7357 {
7358         if (shareval > scale_load_down(ULONG_MAX))
7359                 shareval = MAX_SHARES;
7360         return sched_group_set_shares(css_tg(css), scale_load(shareval));
7361 }
7362 
7363 static u64 cpu_shares_read_u64(struct cgroup_subsys_state *css,
7364                                struct cftype *cft)
7365 {
7366         struct task_group *tg = css_tg(css);
7367 
7368         return (u64) scale_load_down(tg->shares);
7369 }
7370 
7371 #ifdef CONFIG_CFS_BANDWIDTH
7372 static DEFINE_MUTEX(cfs_constraints_mutex);
7373 
7374 const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; 
7375 static const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; 
7376 
7377 static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime);
7378 
7379 static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
7380 {
7381         int i, ret = 0, runtime_enabled, runtime_was_enabled;
7382         struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
7383 
7384         if (tg == &root_task_group)
7385                 return -EINVAL;
7386 
7387         
7388 
7389 
7390 
7391 
7392         if (quota < min_cfs_quota_period || period < min_cfs_quota_period)
7393                 return -EINVAL;
7394 
7395         
7396 
7397 
7398 
7399 
7400         if (period > max_cfs_quota_period)
7401                 return -EINVAL;
7402 
7403         
7404 
7405 
7406 
7407         get_online_cpus();
7408         mutex_lock(&cfs_constraints_mutex);
7409         ret = __cfs_schedulable(tg, period, quota);
7410         if (ret)
7411                 goto out_unlock;
7412 
7413         runtime_enabled = quota != RUNTIME_INF;
7414         runtime_was_enabled = cfs_b->quota != RUNTIME_INF;
7415         
7416 
7417 
7418 
7419         if (runtime_enabled && !runtime_was_enabled)
7420                 cfs_bandwidth_usage_inc();
7421         raw_spin_lock_irq(&cfs_b->lock);
7422         cfs_b->period = ns_to_ktime(period);
7423         cfs_b->quota = quota;
7424 
7425         __refill_cfs_bandwidth_runtime(cfs_b);
7426 
7427         
7428         if (runtime_enabled)
7429                 start_cfs_bandwidth(cfs_b);
7430 
7431         raw_spin_unlock_irq(&cfs_b->lock);
7432 
7433         for_each_online_cpu(i) {
7434                 struct cfs_rq *cfs_rq = tg->cfs_rq[i];
7435                 struct rq *rq = cfs_rq->rq;
7436                 struct rq_flags rf;
7437 
7438                 rq_lock_irq(rq, &rf);
7439                 cfs_rq->runtime_enabled = runtime_enabled;
7440                 cfs_rq->runtime_remaining = 0;
7441 
7442                 if (cfs_rq->throttled)
7443                         unthrottle_cfs_rq(cfs_rq);
7444                 rq_unlock_irq(rq, &rf);
7445         }
7446         if (runtime_was_enabled && !runtime_enabled)
7447                 cfs_bandwidth_usage_dec();
7448 out_unlock:
7449         mutex_unlock(&cfs_constraints_mutex);
7450         put_online_cpus();
7451 
7452         return ret;
7453 }
7454 
7455 static int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
7456 {
7457         u64 quota, period;
7458 
7459         period = ktime_to_ns(tg->cfs_bandwidth.period);
7460         if (cfs_quota_us < 0)
7461                 quota = RUNTIME_INF;
7462         else if ((u64)cfs_quota_us <= U64_MAX / NSEC_PER_USEC)
7463                 quota = (u64)cfs_quota_us * NSEC_PER_USEC;
7464         else
7465                 return -EINVAL;
7466 
7467         return tg_set_cfs_bandwidth(tg, period, quota);
7468 }
7469 
7470 static long tg_get_cfs_quota(struct task_group *tg)
7471 {
7472         u64 quota_us;
7473 
7474         if (tg->cfs_bandwidth.quota == RUNTIME_INF)
7475                 return -1;
7476 
7477         quota_us = tg->cfs_bandwidth.quota;
7478         do_div(quota_us, NSEC_PER_USEC);
7479 
7480         return quota_us;
7481 }
7482 
7483 static int tg_set_cfs_period(struct task_group *tg, long cfs_period_us)
7484 {
7485         u64 quota, period;
7486 
7487         if ((u64)cfs_period_us > U64_MAX / NSEC_PER_USEC)
7488                 return -EINVAL;
7489 
7490         period = (u64)cfs_period_us * NSEC_PER_USEC;
7491         quota = tg->cfs_bandwidth.quota;
7492 
7493         return tg_set_cfs_bandwidth(tg, period, quota);
7494 }
7495 
7496 static long tg_get_cfs_period(struct task_group *tg)
7497 {
7498         u64 cfs_period_us;
7499 
7500         cfs_period_us = ktime_to_ns(tg->cfs_bandwidth.period);
7501         do_div(cfs_period_us, NSEC_PER_USEC);
7502 
7503         return cfs_period_us;
7504 }
7505 
7506 static s64 cpu_cfs_quota_read_s64(struct cgroup_subsys_state *css,
7507                                   struct cftype *cft)
7508 {
7509         return tg_get_cfs_quota(css_tg(css));
7510 }
7511 
7512 static int cpu_cfs_quota_write_s64(struct cgroup_subsys_state *css,
7513                                    struct cftype *cftype, s64 cfs_quota_us)
7514 {
7515         return tg_set_cfs_quota(css_tg(css), cfs_quota_us);
7516 }
7517 
7518 static u64 cpu_cfs_period_read_u64(struct cgroup_subsys_state *css,
7519                                    struct cftype *cft)
7520 {
7521         return tg_get_cfs_period(css_tg(css));
7522 }
7523 
7524 static int cpu_cfs_period_write_u64(struct cgroup_subsys_state *css,
7525                                     struct cftype *cftype, u64 cfs_period_us)
7526 {
7527         return tg_set_cfs_period(css_tg(css), cfs_period_us);
7528 }
7529 
7530 struct cfs_schedulable_data {
7531         struct task_group *tg;
7532         u64 period, quota;
7533 };
7534 
7535 
7536 
7537 
7538 
7539 static u64 normalize_cfs_quota(struct task_group *tg,
7540                                struct cfs_schedulable_data *d)
7541 {
7542         u64 quota, period;
7543 
7544         if (tg == d->tg) {
7545                 period = d->period;
7546                 quota = d->quota;
7547         } else {
7548                 period = tg_get_cfs_period(tg);
7549                 quota = tg_get_cfs_quota(tg);
7550         }
7551 
7552         
7553         if (quota == RUNTIME_INF || quota == -1)
7554                 return RUNTIME_INF;
7555 
7556         return to_ratio(period, quota);
7557 }
7558 
7559 static int tg_cfs_schedulable_down(struct task_group *tg, void *data)
7560 {
7561         struct cfs_schedulable_data *d = data;
7562         struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
7563         s64 quota = 0, parent_quota = -1;
7564 
7565         if (!tg->parent) {
7566                 quota = RUNTIME_INF;
7567         } else {
7568                 struct cfs_bandwidth *parent_b = &tg->parent->cfs_bandwidth;
7569 
7570                 quota = normalize_cfs_quota(tg, d);
7571                 parent_quota = parent_b->hierarchical_quota;
7572 
7573                 
7574 
7575 
7576 
7577 
7578                 if (cgroup_subsys_on_dfl(cpu_cgrp_subsys)) {
7579                         quota = min(quota, parent_quota);
7580                 } else {
7581                         if (quota == RUNTIME_INF)
7582                                 quota = parent_quota;
7583                         else if (parent_quota != RUNTIME_INF && quota > parent_quota)
7584                                 return -EINVAL;
7585                 }
7586         }
7587         cfs_b->hierarchical_quota = quota;
7588 
7589         return 0;
7590 }
7591 
7592 static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota)
7593 {
7594         int ret;
7595         struct cfs_schedulable_data data = {
7596                 .tg = tg,
7597                 .period = period,
7598                 .quota = quota,
7599         };
7600 
7601         if (quota != RUNTIME_INF) {
7602                 do_div(data.period, NSEC_PER_USEC);
7603                 do_div(data.quota, NSEC_PER_USEC);
7604         }
7605 
7606         rcu_read_lock();
7607         ret = walk_tg_tree(tg_cfs_schedulable_down, tg_nop, &data);
7608         rcu_read_unlock();
7609 
7610         return ret;
7611 }
7612 
7613 static int cpu_cfs_stat_show(struct seq_file *sf, void *v)
7614 {
7615         struct task_group *tg = css_tg(seq_css(sf));
7616         struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
7617 
7618         seq_printf(sf, "nr_periods %d\n", cfs_b->nr_periods);
7619         seq_printf(sf, "nr_throttled %d\n", cfs_b->nr_throttled);
7620         seq_printf(sf, "throttled_time %llu\n", cfs_b->throttled_time);
7621 
7622         if (schedstat_enabled() && tg != &root_task_group) {
7623                 u64 ws = 0;
7624                 int i;
7625 
7626                 for_each_possible_cpu(i)
7627                         ws += schedstat_val(tg->se[i]->statistics.wait_sum);
7628 
7629                 seq_printf(sf, "wait_sum %llu\n", ws);
7630         }
7631 
7632         return 0;
7633 }
7634 #endif 
7635 #endif 
7636 
7637 #ifdef CONFIG_RT_GROUP_SCHED
7638 static int cpu_rt_runtime_write(struct cgroup_subsys_state *css,
7639                                 struct cftype *cft, s64 val)
7640 {
7641         return sched_group_set_rt_runtime(css_tg(css), val);
7642 }
7643 
7644 static s64 cpu_rt_runtime_read(struct cgroup_subsys_state *css,
7645                                struct cftype *cft)
7646 {
7647         return sched_group_rt_runtime(css_tg(css));
7648 }
7649 
7650 static int cpu_rt_period_write_uint(struct cgroup_subsys_state *css,
7651                                     struct cftype *cftype, u64 rt_period_us)
7652 {
7653         return sched_group_set_rt_period(css_tg(css), rt_period_us);
7654 }
7655 
7656 static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css,
7657                                    struct cftype *cft)
7658 {
7659         return sched_group_rt_period(css_tg(css));
7660 }
7661 #endif 
7662 
7663 static struct cftype cpu_legacy_files[] = {
7664 #ifdef CONFIG_FAIR_GROUP_SCHED
7665         {
7666                 .name = "shares",
7667                 .read_u64 = cpu_shares_read_u64,
7668                 .write_u64 = cpu_shares_write_u64,
7669         },
7670 #endif
7671 #ifdef CONFIG_CFS_BANDWIDTH
7672         {
7673                 .name = "cfs_quota_us",
7674                 .read_s64 = cpu_cfs_quota_read_s64,
7675                 .write_s64 = cpu_cfs_quota_write_s64,
7676         },
7677         {
7678                 .name = "cfs_period_us",
7679                 .read_u64 = cpu_cfs_period_read_u64,
7680                 .write_u64 = cpu_cfs_period_write_u64,
7681         },
7682         {
7683                 .name = "stat",
7684                 .seq_show = cpu_cfs_stat_show,
7685         },
7686 #endif
7687 #ifdef CONFIG_RT_GROUP_SCHED
7688         {
7689                 .name = "rt_runtime_us",
7690                 .read_s64 = cpu_rt_runtime_read,
7691                 .write_s64 = cpu_rt_runtime_write,
7692         },
7693         {
7694                 .name = "rt_period_us",
7695                 .read_u64 = cpu_rt_period_read_uint,
7696                 .write_u64 = cpu_rt_period_write_uint,
7697         },
7698 #endif
7699 #ifdef CONFIG_UCLAMP_TASK_GROUP
7700         {
7701                 .name = "uclamp.min",
7702                 .flags = CFTYPE_NOT_ON_ROOT,
7703                 .seq_show = cpu_uclamp_min_show,
7704                 .write = cpu_uclamp_min_write,
7705         },
7706         {
7707                 .name = "uclamp.max",
7708                 .flags = CFTYPE_NOT_ON_ROOT,
7709                 .seq_show = cpu_uclamp_max_show,
7710                 .write = cpu_uclamp_max_write,
7711         },
7712 #endif
7713         { }     
7714 };
7715 
7716 static int cpu_extra_stat_show(struct seq_file *sf,
7717                                struct cgroup_subsys_state *css)
7718 {
7719 #ifdef CONFIG_CFS_BANDWIDTH
7720         {
7721                 struct task_group *tg = css_tg(css);
7722                 struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
7723                 u64 throttled_usec;
7724 
7725                 throttled_usec = cfs_b->throttled_time;
7726                 do_div(throttled_usec, NSEC_PER_USEC);
7727 
7728                 seq_printf(sf, "nr_periods %d\n"
7729                            "nr_throttled %d\n"
7730                            "throttled_usec %llu\n",
7731                            cfs_b->nr_periods, cfs_b->nr_throttled,
7732                            throttled_usec);
7733         }
7734 #endif
7735         return 0;
7736 }
7737 
7738 #ifdef CONFIG_FAIR_GROUP_SCHED
7739 static u64 cpu_weight_read_u64(struct cgroup_subsys_state *css,
7740                                struct cftype *cft)
7741 {
7742         struct task_group *tg = css_tg(css);
7743         u64 weight = scale_load_down(tg->shares);
7744 
7745         return DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, 1024);
7746 }
7747 
7748 static int cpu_weight_write_u64(struct cgroup_subsys_state *css,
7749                                 struct cftype *cft, u64 weight)
7750 {
7751         
7752 
7753 
7754 
7755 
7756 
7757 
7758         if (weight < CGROUP_WEIGHT_MIN || weight > CGROUP_WEIGHT_MAX)
7759                 return -ERANGE;
7760 
7761         weight = DIV_ROUND_CLOSEST_ULL(weight * 1024, CGROUP_WEIGHT_DFL);
7762 
7763         return sched_group_set_shares(css_tg(css), scale_load(weight));
7764 }
7765 
7766 static s64 cpu_weight_nice_read_s64(struct cgroup_subsys_state *css,
7767                                     struct cftype *cft)
7768 {
7769         unsigned long weight = scale_load_down(css_tg(css)->shares);
7770         int last_delta = INT_MAX;
7771         int prio, delta;
7772 
7773         
7774         for (prio = 0; prio < ARRAY_SIZE(sched_prio_to_weight); prio++) {
7775                 delta = abs(sched_prio_to_weight[prio] - weight);
7776                 if (delta >= last_delta)
7777                         break;
7778                 last_delta = delta;
7779         }
7780 
7781         return PRIO_TO_NICE(prio - 1 + MAX_RT_PRIO);
7782 }
7783 
7784 static int cpu_weight_nice_write_s64(struct cgroup_subsys_state *css,
7785                                      struct cftype *cft, s64 nice)
7786 {
7787         unsigned long weight;
7788         int idx;
7789 
7790         if (nice < MIN_NICE || nice > MAX_NICE)
7791                 return -ERANGE;
7792 
7793         idx = NICE_TO_PRIO(nice) - MAX_RT_PRIO;
7794         idx = array_index_nospec(idx, 40);
7795         weight = sched_prio_to_weight[idx];
7796 
7797         return sched_group_set_shares(css_tg(css), scale_load(weight));
7798 }
7799 #endif
7800 
7801 static void __maybe_unused cpu_period_quota_print(struct seq_file *sf,
7802                                                   long period, long quota)
7803 {
7804         if (quota < 0)
7805                 seq_puts(sf, "max");
7806         else
7807                 seq_printf(sf, "%ld", quota);
7808 
7809         seq_printf(sf, " %ld\n", period);
7810 }
7811 
7812 
7813 static int __maybe_unused cpu_period_quota_parse(char *buf,
7814                                                  u64 *periodp, u64 *quotap)
7815 {
7816         char tok[21];   
7817 
7818         if (sscanf(buf, "%20s %llu", tok, periodp) < 1)
7819                 return -EINVAL;
7820 
7821         *periodp *= NSEC_PER_USEC;
7822 
7823         if (sscanf(tok, "%llu", quotap))
7824                 *quotap *= NSEC_PER_USEC;
7825         else if (!strcmp(tok, "max"))
7826                 *quotap = RUNTIME_INF;
7827         else
7828                 return -EINVAL;
7829 
7830         return 0;
7831 }
7832 
7833 #ifdef CONFIG_CFS_BANDWIDTH
7834 static int cpu_max_show(struct seq_file *sf, void *v)
7835 {
7836         struct task_group *tg = css_tg(seq_css(sf));
7837 
7838         cpu_period_quota_print(sf, tg_get_cfs_period(tg), tg_get_cfs_quota(tg));
7839         return 0;
7840 }
7841 
7842 static ssize_t cpu_max_write(struct kernfs_open_file *of,
7843                              char *buf, size_t nbytes, loff_t off)
7844 {
7845         struct task_group *tg = css_tg(of_css(of));
7846         u64 period = tg_get_cfs_period(tg);
7847         u64 quota;
7848         int ret;
7849 
7850         ret = cpu_period_quota_parse(buf, &period, "a);
7851         if (!ret)
7852                 ret = tg_set_cfs_bandwidth(tg, period, quota);
7853         return ret ?: nbytes;
7854 }
7855 #endif
7856 
7857 static struct cftype cpu_files[] = {
7858 #ifdef CONFIG_FAIR_GROUP_SCHED
7859         {
7860                 .name = "weight",
7861                 .flags = CFTYPE_NOT_ON_ROOT,
7862                 .read_u64 = cpu_weight_read_u64,
7863                 .write_u64 = cpu_weight_write_u64,
7864         },
7865         {
7866                 .name = "weight.nice",
7867                 .flags = CFTYPE_NOT_ON_ROOT,
7868                 .read_s64 = cpu_weight_nice_read_s64,
7869                 .write_s64 = cpu_weight_nice_write_s64,
7870         },
7871 #endif
7872 #ifdef CONFIG_CFS_BANDWIDTH
7873         {
7874                 .name = "max",
7875                 .flags = CFTYPE_NOT_ON_ROOT,
7876                 .seq_show = cpu_max_show,
7877                 .write = cpu_max_write,
7878         },
7879 #endif
7880 #ifdef CONFIG_UCLAMP_TASK_GROUP
7881         {
7882                 .name = "uclamp.min",
7883                 .flags = CFTYPE_NOT_ON_ROOT,
7884                 .seq_show = cpu_uclamp_min_show,
7885                 .write = cpu_uclamp_min_write,
7886         },
7887         {
7888                 .name = "uclamp.max",
7889                 .flags = CFTYPE_NOT_ON_ROOT,
7890                 .seq_show = cpu_uclamp_max_show,
7891                 .write = cpu_uclamp_max_write,
7892         },
7893 #endif
7894         { }     
7895 };
7896 
7897 struct cgroup_subsys cpu_cgrp_subsys = {
7898         .css_alloc      = cpu_cgroup_css_alloc,
7899         .css_online     = cpu_cgroup_css_online,
7900         .css_released   = cpu_cgroup_css_released,
7901         .css_free       = cpu_cgroup_css_free,
7902         .css_extra_stat_show = cpu_extra_stat_show,
7903         .fork           = cpu_cgroup_fork,
7904         .can_attach     = cpu_cgroup_can_attach,
7905         .attach         = cpu_cgroup_attach,
7906         .legacy_cftypes = cpu_legacy_files,
7907         .dfl_cftypes    = cpu_files,
7908         .early_init     = true,
7909         .threaded       = true,
7910 };
7911 
7912 #endif  
7913 
7914 void dump_cpu_task(int cpu)
7915 {
7916         pr_info("Task dump for CPU %d:\n", cpu);
7917         sched_show_task(cpu_curr(cpu));
7918 }
7919 
7920 
7921 
7922 
7923 
7924 
7925 
7926 
7927 
7928 
7929 
7930 
7931 
7932 const int sched_prio_to_weight[40] = {
7933       88761,     71755,     56483,     46273,     36291,
7934       29154,     23254,     18705,     14949,     11916,
7935        9548,      7620,      6100,      4904,      3906,
7936        3121,      2501,      1991,      1586,      1277,
7937        1024,       820,       655,       526,       423,
7938         335,       272,       215,       172,       137,
7939         110,        87,        70,        56,        45,
7940          36,        29,        23,        18,        15,
7941 };
7942 
7943 
7944 
7945 
7946 
7947 
7948 
7949 
7950 const u32 sched_prio_to_wmult[40] = {
7951       48388,     59856,     76040,     92818,    118348,
7952      147320,    184698,    229616,    287308,    360437,
7953      449829,    563644,    704093,    875809,   1099582,
7954     1376151,   1717300,   2157191,   2708050,   3363326,
7955     4194304,   5237765,   6557202,   8165337,  10153587,
7956    12820798,  15790321,  19976592,  24970740,  31350126,
7957    39045157,  49367440,  61356676,  76695844,  95443717,
7958   119304647, 148102320, 186737708, 238609294, 286331153,
7959 };
7960 
7961 #undef CREATE_TRACE_POINTS