1/* 2 * Read-Copy Update mechanism for mutual exclusion 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, you can access it online at 16 * http://www.gnu.org/licenses/gpl-2.0.html. 17 * 18 * Copyright IBM Corporation, 2001 19 * 20 * Authors: Dipankar Sarma <dipankar@in.ibm.com> 21 * Manfred Spraul <manfred@colorfullife.com> 22 * 23 * Based on the original work by Paul McKenney <paulmck@us.ibm.com> 24 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. 25 * Papers: 26 * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf 27 * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) 28 * 29 * For detailed explanation of Read-Copy Update mechanism see - 30 * http://lse.sourceforge.net/locking/rcupdate.html 31 * 32 */ 33#include <linux/types.h> 34#include <linux/kernel.h> 35#include <linux/init.h> 36#include <linux/spinlock.h> 37#include <linux/smp.h> 38#include <linux/interrupt.h> 39#include <linux/sched.h> 40#include <linux/atomic.h> 41#include <linux/bitops.h> 42#include <linux/percpu.h> 43#include <linux/notifier.h> 44#include <linux/cpu.h> 45#include <linux/mutex.h> 46#include <linux/export.h> 47#include <linux/hardirq.h> 48#include <linux/delay.h> 49#include <linux/module.h> 50#include <linux/kthread.h> 51#include <linux/tick.h> 52 53#define CREATE_TRACE_POINTS 54 55#include "rcu.h" 56 57MODULE_ALIAS("rcupdate"); 58#ifdef MODULE_PARAM_PREFIX 59#undef MODULE_PARAM_PREFIX 60#endif 61#define MODULE_PARAM_PREFIX "rcupdate." 62 63module_param(rcu_expedited, int, 0); 64 65#ifndef CONFIG_TINY_RCU 66 67static atomic_t rcu_expedited_nesting = 68 ATOMIC_INIT(IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT) ? 1 : 0); 69 70/* 71 * Should normal grace-period primitives be expedited? Intended for 72 * use within RCU. Note that this function takes the rcu_expedited 73 * sysfs/boot variable into account as well as the rcu_expedite_gp() 74 * nesting. So looping on rcu_unexpedite_gp() until rcu_gp_is_expedited() 75 * returns false is a -really- bad idea. 76 */ 77bool rcu_gp_is_expedited(void) 78{ 79 return rcu_expedited || atomic_read(&rcu_expedited_nesting); 80} 81EXPORT_SYMBOL_GPL(rcu_gp_is_expedited); 82 83/** 84 * rcu_expedite_gp - Expedite future RCU grace periods 85 * 86 * After a call to this function, future calls to synchronize_rcu() and 87 * friends act as the corresponding synchronize_rcu_expedited() function 88 * had instead been called. 89 */ 90void rcu_expedite_gp(void) 91{ 92 atomic_inc(&rcu_expedited_nesting); 93} 94EXPORT_SYMBOL_GPL(rcu_expedite_gp); 95 96/** 97 * rcu_unexpedite_gp - Cancel prior rcu_expedite_gp() invocation 98 * 99 * Undo a prior call to rcu_expedite_gp(). If all prior calls to 100 * rcu_expedite_gp() are undone by a subsequent call to rcu_unexpedite_gp(), 101 * and if the rcu_expedited sysfs/boot parameter is not set, then all 102 * subsequent calls to synchronize_rcu() and friends will return to 103 * their normal non-expedited behavior. 104 */ 105void rcu_unexpedite_gp(void) 106{ 107 atomic_dec(&rcu_expedited_nesting); 108} 109EXPORT_SYMBOL_GPL(rcu_unexpedite_gp); 110 111#endif /* #ifndef CONFIG_TINY_RCU */ 112 113/* 114 * Inform RCU of the end of the in-kernel boot sequence. 115 */ 116void rcu_end_inkernel_boot(void) 117{ 118 if (IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT)) 119 rcu_unexpedite_gp(); 120} 121 122#ifdef CONFIG_PREEMPT_RCU 123 124/* 125 * Preemptible RCU implementation for rcu_read_lock(). 126 * Just increment ->rcu_read_lock_nesting, shared state will be updated 127 * if we block. 128 */ 129void __rcu_read_lock(void) 130{ 131 current->rcu_read_lock_nesting++; 132 barrier(); /* critical section after entry code. */ 133} 134EXPORT_SYMBOL_GPL(__rcu_read_lock); 135 136/* 137 * Preemptible RCU implementation for rcu_read_unlock(). 138 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost 139 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then 140 * invoke rcu_read_unlock_special() to clean up after a context switch 141 * in an RCU read-side critical section and other special cases. 142 */ 143void __rcu_read_unlock(void) 144{ 145 struct task_struct *t = current; 146 147 if (t->rcu_read_lock_nesting != 1) { 148 --t->rcu_read_lock_nesting; 149 } else { 150 barrier(); /* critical section before exit code. */ 151 t->rcu_read_lock_nesting = INT_MIN; 152 barrier(); /* assign before ->rcu_read_unlock_special load */ 153 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special.s))) 154 rcu_read_unlock_special(t); 155 barrier(); /* ->rcu_read_unlock_special load before assign */ 156 t->rcu_read_lock_nesting = 0; 157 } 158#ifdef CONFIG_PROVE_LOCKING 159 { 160 int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting); 161 162 WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); 163 } 164#endif /* #ifdef CONFIG_PROVE_LOCKING */ 165} 166EXPORT_SYMBOL_GPL(__rcu_read_unlock); 167 168#endif /* #ifdef CONFIG_PREEMPT_RCU */ 169 170#ifdef CONFIG_DEBUG_LOCK_ALLOC 171static struct lock_class_key rcu_lock_key; 172struct lockdep_map rcu_lock_map = 173 STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key); 174EXPORT_SYMBOL_GPL(rcu_lock_map); 175 176static struct lock_class_key rcu_bh_lock_key; 177struct lockdep_map rcu_bh_lock_map = 178 STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_bh", &rcu_bh_lock_key); 179EXPORT_SYMBOL_GPL(rcu_bh_lock_map); 180 181static struct lock_class_key rcu_sched_lock_key; 182struct lockdep_map rcu_sched_lock_map = 183 STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_sched", &rcu_sched_lock_key); 184EXPORT_SYMBOL_GPL(rcu_sched_lock_map); 185 186static struct lock_class_key rcu_callback_key; 187struct lockdep_map rcu_callback_map = 188 STATIC_LOCKDEP_MAP_INIT("rcu_callback", &rcu_callback_key); 189EXPORT_SYMBOL_GPL(rcu_callback_map); 190 191int notrace debug_lockdep_rcu_enabled(void) 192{ 193 return rcu_scheduler_active && debug_locks && 194 current->lockdep_recursion == 0; 195} 196EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); 197 198/** 199 * rcu_read_lock_held() - might we be in RCU read-side critical section? 200 * 201 * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU 202 * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, 203 * this assumes we are in an RCU read-side critical section unless it can 204 * prove otherwise. This is useful for debug checks in functions that 205 * require that they be called within an RCU read-side critical section. 206 * 207 * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot 208 * and while lockdep is disabled. 209 * 210 * Note that rcu_read_lock() and the matching rcu_read_unlock() must 211 * occur in the same context, for example, it is illegal to invoke 212 * rcu_read_unlock() in process context if the matching rcu_read_lock() 213 * was invoked from within an irq handler. 214 * 215 * Note that rcu_read_lock() is disallowed if the CPU is either idle or 216 * offline from an RCU perspective, so check for those as well. 217 */ 218int rcu_read_lock_held(void) 219{ 220 if (!debug_lockdep_rcu_enabled()) 221 return 1; 222 if (!rcu_is_watching()) 223 return 0; 224 if (!rcu_lockdep_current_cpu_online()) 225 return 0; 226 return lock_is_held(&rcu_lock_map); 227} 228EXPORT_SYMBOL_GPL(rcu_read_lock_held); 229 230/** 231 * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? 232 * 233 * Check for bottom half being disabled, which covers both the 234 * CONFIG_PROVE_RCU and not cases. Note that if someone uses 235 * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled) 236 * will show the situation. This is useful for debug checks in functions 237 * that require that they be called within an RCU read-side critical 238 * section. 239 * 240 * Check debug_lockdep_rcu_enabled() to prevent false positives during boot. 241 * 242 * Note that rcu_read_lock() is disallowed if the CPU is either idle or 243 * offline from an RCU perspective, so check for those as well. 244 */ 245int rcu_read_lock_bh_held(void) 246{ 247 if (!debug_lockdep_rcu_enabled()) 248 return 1; 249 if (!rcu_is_watching()) 250 return 0; 251 if (!rcu_lockdep_current_cpu_online()) 252 return 0; 253 return in_softirq() || irqs_disabled(); 254} 255EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); 256 257#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 258 259/** 260 * wakeme_after_rcu() - Callback function to awaken a task after grace period 261 * @head: Pointer to rcu_head member within rcu_synchronize structure 262 * 263 * Awaken the corresponding task now that a grace period has elapsed. 264 */ 265void wakeme_after_rcu(struct rcu_head *head) 266{ 267 struct rcu_synchronize *rcu; 268 269 rcu = container_of(head, struct rcu_synchronize, head); 270 complete(&rcu->completion); 271} 272 273void wait_rcu_gp(call_rcu_func_t crf) 274{ 275 struct rcu_synchronize rcu; 276 277 init_rcu_head_on_stack(&rcu.head); 278 init_completion(&rcu.completion); 279 /* Will wake me after RCU finished. */ 280 crf(&rcu.head, wakeme_after_rcu); 281 /* Wait for it. */ 282 wait_for_completion(&rcu.completion); 283 destroy_rcu_head_on_stack(&rcu.head); 284} 285EXPORT_SYMBOL_GPL(wait_rcu_gp); 286 287#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD 288void init_rcu_head(struct rcu_head *head) 289{ 290 debug_object_init(head, &rcuhead_debug_descr); 291} 292 293void destroy_rcu_head(struct rcu_head *head) 294{ 295 debug_object_free(head, &rcuhead_debug_descr); 296} 297 298/* 299 * fixup_activate is called when: 300 * - an active object is activated 301 * - an unknown object is activated (might be a statically initialized object) 302 * Activation is performed internally by call_rcu(). 303 */ 304static int rcuhead_fixup_activate(void *addr, enum debug_obj_state state) 305{ 306 struct rcu_head *head = addr; 307 308 switch (state) { 309 310 case ODEBUG_STATE_NOTAVAILABLE: 311 /* 312 * This is not really a fixup. We just make sure that it is 313 * tracked in the object tracker. 314 */ 315 debug_object_init(head, &rcuhead_debug_descr); 316 debug_object_activate(head, &rcuhead_debug_descr); 317 return 0; 318 default: 319 return 1; 320 } 321} 322 323/** 324 * init_rcu_head_on_stack() - initialize on-stack rcu_head for debugobjects 325 * @head: pointer to rcu_head structure to be initialized 326 * 327 * This function informs debugobjects of a new rcu_head structure that 328 * has been allocated as an auto variable on the stack. This function 329 * is not required for rcu_head structures that are statically defined or 330 * that are dynamically allocated on the heap. This function has no 331 * effect for !CONFIG_DEBUG_OBJECTS_RCU_HEAD kernel builds. 332 */ 333void init_rcu_head_on_stack(struct rcu_head *head) 334{ 335 debug_object_init_on_stack(head, &rcuhead_debug_descr); 336} 337EXPORT_SYMBOL_GPL(init_rcu_head_on_stack); 338 339/** 340 * destroy_rcu_head_on_stack() - destroy on-stack rcu_head for debugobjects 341 * @head: pointer to rcu_head structure to be initialized 342 * 343 * This function informs debugobjects that an on-stack rcu_head structure 344 * is about to go out of scope. As with init_rcu_head_on_stack(), this 345 * function is not required for rcu_head structures that are statically 346 * defined or that are dynamically allocated on the heap. Also as with 347 * init_rcu_head_on_stack(), this function has no effect for 348 * !CONFIG_DEBUG_OBJECTS_RCU_HEAD kernel builds. 349 */ 350void destroy_rcu_head_on_stack(struct rcu_head *head) 351{ 352 debug_object_free(head, &rcuhead_debug_descr); 353} 354EXPORT_SYMBOL_GPL(destroy_rcu_head_on_stack); 355 356struct debug_obj_descr rcuhead_debug_descr = { 357 .name = "rcu_head", 358 .fixup_activate = rcuhead_fixup_activate, 359}; 360EXPORT_SYMBOL_GPL(rcuhead_debug_descr); 361#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 362 363#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE) 364void do_trace_rcu_torture_read(const char *rcutorturename, struct rcu_head *rhp, 365 unsigned long secs, 366 unsigned long c_old, unsigned long c) 367{ 368 trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c); 369} 370EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read); 371#else 372#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \ 373 do { } while (0) 374#endif 375 376#ifdef CONFIG_RCU_STALL_COMMON 377 378#ifdef CONFIG_PROVE_RCU 379#define RCU_STALL_DELAY_DELTA (5 * HZ) 380#else 381#define RCU_STALL_DELAY_DELTA 0 382#endif 383 384int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */ 385static int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; 386 387module_param(rcu_cpu_stall_suppress, int, 0644); 388module_param(rcu_cpu_stall_timeout, int, 0644); 389 390int rcu_jiffies_till_stall_check(void) 391{ 392 int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout); 393 394 /* 395 * Limit check must be consistent with the Kconfig limits 396 * for CONFIG_RCU_CPU_STALL_TIMEOUT. 397 */ 398 if (till_stall_check < 3) { 399 ACCESS_ONCE(rcu_cpu_stall_timeout) = 3; 400 till_stall_check = 3; 401 } else if (till_stall_check > 300) { 402 ACCESS_ONCE(rcu_cpu_stall_timeout) = 300; 403 till_stall_check = 300; 404 } 405 return till_stall_check * HZ + RCU_STALL_DELAY_DELTA; 406} 407 408void rcu_sysrq_start(void) 409{ 410 if (!rcu_cpu_stall_suppress) 411 rcu_cpu_stall_suppress = 2; 412} 413 414void rcu_sysrq_end(void) 415{ 416 if (rcu_cpu_stall_suppress == 2) 417 rcu_cpu_stall_suppress = 0; 418} 419 420static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) 421{ 422 rcu_cpu_stall_suppress = 1; 423 return NOTIFY_DONE; 424} 425 426static struct notifier_block rcu_panic_block = { 427 .notifier_call = rcu_panic, 428}; 429 430static int __init check_cpu_stall_init(void) 431{ 432 atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block); 433 return 0; 434} 435early_initcall(check_cpu_stall_init); 436 437#endif /* #ifdef CONFIG_RCU_STALL_COMMON */ 438 439#ifdef CONFIG_TASKS_RCU 440 441/* 442 * Simple variant of RCU whose quiescent states are voluntary context switch, 443 * user-space execution, and idle. As such, grace periods can take one good 444 * long time. There are no read-side primitives similar to rcu_read_lock() 445 * and rcu_read_unlock() because this implementation is intended to get 446 * the system into a safe state for some of the manipulations involved in 447 * tracing and the like. Finally, this implementation does not support 448 * high call_rcu_tasks() rates from multiple CPUs. If this is required, 449 * per-CPU callback lists will be needed. 450 */ 451 452/* Global list of callbacks and associated lock. */ 453static struct rcu_head *rcu_tasks_cbs_head; 454static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head; 455static DECLARE_WAIT_QUEUE_HEAD(rcu_tasks_cbs_wq); 456static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock); 457 458/* Track exiting tasks in order to allow them to be waited for. */ 459DEFINE_SRCU(tasks_rcu_exit_srcu); 460 461/* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */ 462static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 10; 463module_param(rcu_task_stall_timeout, int, 0644); 464 465static void rcu_spawn_tasks_kthread(void); 466 467/* 468 * Post an RCU-tasks callback. First call must be from process context 469 * after the scheduler if fully operational. 470 */ 471void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp)) 472{ 473 unsigned long flags; 474 bool needwake; 475 476 rhp->next = NULL; 477 rhp->func = func; 478 raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags); 479 needwake = !rcu_tasks_cbs_head; 480 *rcu_tasks_cbs_tail = rhp; 481 rcu_tasks_cbs_tail = &rhp->next; 482 raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags); 483 if (needwake) { 484 rcu_spawn_tasks_kthread(); 485 wake_up(&rcu_tasks_cbs_wq); 486 } 487} 488EXPORT_SYMBOL_GPL(call_rcu_tasks); 489 490/** 491 * synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed. 492 * 493 * Control will return to the caller some time after a full rcu-tasks 494 * grace period has elapsed, in other words after all currently 495 * executing rcu-tasks read-side critical sections have elapsed. These 496 * read-side critical sections are delimited by calls to schedule(), 497 * cond_resched_rcu_qs(), idle execution, userspace execution, calls 498 * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched(). 499 * 500 * This is a very specialized primitive, intended only for a few uses in 501 * tracing and other situations requiring manipulation of function 502 * preambles and profiling hooks. The synchronize_rcu_tasks() function 503 * is not (yet) intended for heavy use from multiple CPUs. 504 * 505 * Note that this guarantee implies further memory-ordering guarantees. 506 * On systems with more than one CPU, when synchronize_rcu_tasks() returns, 507 * each CPU is guaranteed to have executed a full memory barrier since the 508 * end of its last RCU-tasks read-side critical section whose beginning 509 * preceded the call to synchronize_rcu_tasks(). In addition, each CPU 510 * having an RCU-tasks read-side critical section that extends beyond 511 * the return from synchronize_rcu_tasks() is guaranteed to have executed 512 * a full memory barrier after the beginning of synchronize_rcu_tasks() 513 * and before the beginning of that RCU-tasks read-side critical section. 514 * Note that these guarantees include CPUs that are offline, idle, or 515 * executing in user mode, as well as CPUs that are executing in the kernel. 516 * 517 * Furthermore, if CPU A invoked synchronize_rcu_tasks(), which returned 518 * to its caller on CPU B, then both CPU A and CPU B are guaranteed 519 * to have executed a full memory barrier during the execution of 520 * synchronize_rcu_tasks() -- even if CPU A and CPU B are the same CPU 521 * (but again only if the system has more than one CPU). 522 */ 523void synchronize_rcu_tasks(void) 524{ 525 /* Complain if the scheduler has not started. */ 526 rcu_lockdep_assert(!rcu_scheduler_active, 527 "synchronize_rcu_tasks called too soon"); 528 529 /* Wait for the grace period. */ 530 wait_rcu_gp(call_rcu_tasks); 531} 532EXPORT_SYMBOL_GPL(synchronize_rcu_tasks); 533 534/** 535 * rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks. 536 * 537 * Although the current implementation is guaranteed to wait, it is not 538 * obligated to, for example, if there are no pending callbacks. 539 */ 540void rcu_barrier_tasks(void) 541{ 542 /* There is only one callback queue, so this is easy. ;-) */ 543 synchronize_rcu_tasks(); 544} 545EXPORT_SYMBOL_GPL(rcu_barrier_tasks); 546 547/* See if tasks are still holding out, complain if so. */ 548static void check_holdout_task(struct task_struct *t, 549 bool needreport, bool *firstreport) 550{ 551 int cpu; 552 553 if (!ACCESS_ONCE(t->rcu_tasks_holdout) || 554 t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) || 555 !ACCESS_ONCE(t->on_rq) || 556 (IS_ENABLED(CONFIG_NO_HZ_FULL) && 557 !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) { 558 ACCESS_ONCE(t->rcu_tasks_holdout) = false; 559 list_del_init(&t->rcu_tasks_holdout_list); 560 put_task_struct(t); 561 return; 562 } 563 if (!needreport) 564 return; 565 if (*firstreport) { 566 pr_err("INFO: rcu_tasks detected stalls on tasks:\n"); 567 *firstreport = false; 568 } 569 cpu = task_cpu(t); 570 pr_alert("%p: %c%c nvcsw: %lu/%lu holdout: %d idle_cpu: %d/%d\n", 571 t, ".I"[is_idle_task(t)], 572 "N."[cpu < 0 || !tick_nohz_full_cpu(cpu)], 573 t->rcu_tasks_nvcsw, t->nvcsw, t->rcu_tasks_holdout, 574 t->rcu_tasks_idle_cpu, cpu); 575 sched_show_task(t); 576} 577 578/* RCU-tasks kthread that detects grace periods and invokes callbacks. */ 579static int __noreturn rcu_tasks_kthread(void *arg) 580{ 581 unsigned long flags; 582 struct task_struct *g, *t; 583 unsigned long lastreport; 584 struct rcu_head *list; 585 struct rcu_head *next; 586 LIST_HEAD(rcu_tasks_holdouts); 587 588 /* Run on housekeeping CPUs by default. Sysadm can move if desired. */ 589 housekeeping_affine(current); 590 591 /* 592 * Each pass through the following loop makes one check for 593 * newly arrived callbacks, and, if there are some, waits for 594 * one RCU-tasks grace period and then invokes the callbacks. 595 * This loop is terminated by the system going down. ;-) 596 */ 597 for (;;) { 598 599 /* Pick up any new callbacks. */ 600 raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags); 601 list = rcu_tasks_cbs_head; 602 rcu_tasks_cbs_head = NULL; 603 rcu_tasks_cbs_tail = &rcu_tasks_cbs_head; 604 raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags); 605 606 /* If there were none, wait a bit and start over. */ 607 if (!list) { 608 wait_event_interruptible(rcu_tasks_cbs_wq, 609 rcu_tasks_cbs_head); 610 if (!rcu_tasks_cbs_head) { 611 WARN_ON(signal_pending(current)); 612 schedule_timeout_interruptible(HZ/10); 613 } 614 continue; 615 } 616 617 /* 618 * Wait for all pre-existing t->on_rq and t->nvcsw 619 * transitions to complete. Invoking synchronize_sched() 620 * suffices because all these transitions occur with 621 * interrupts disabled. Without this synchronize_sched(), 622 * a read-side critical section that started before the 623 * grace period might be incorrectly seen as having started 624 * after the grace period. 625 * 626 * This synchronize_sched() also dispenses with the 627 * need for a memory barrier on the first store to 628 * ->rcu_tasks_holdout, as it forces the store to happen 629 * after the beginning of the grace period. 630 */ 631 synchronize_sched(); 632 633 /* 634 * There were callbacks, so we need to wait for an 635 * RCU-tasks grace period. Start off by scanning 636 * the task list for tasks that are not already 637 * voluntarily blocked. Mark these tasks and make 638 * a list of them in rcu_tasks_holdouts. 639 */ 640 rcu_read_lock(); 641 for_each_process_thread(g, t) { 642 if (t != current && ACCESS_ONCE(t->on_rq) && 643 !is_idle_task(t)) { 644 get_task_struct(t); 645 t->rcu_tasks_nvcsw = ACCESS_ONCE(t->nvcsw); 646 ACCESS_ONCE(t->rcu_tasks_holdout) = true; 647 list_add(&t->rcu_tasks_holdout_list, 648 &rcu_tasks_holdouts); 649 } 650 } 651 rcu_read_unlock(); 652 653 /* 654 * Wait for tasks that are in the process of exiting. 655 * This does only part of the job, ensuring that all 656 * tasks that were previously exiting reach the point 657 * where they have disabled preemption, allowing the 658 * later synchronize_sched() to finish the job. 659 */ 660 synchronize_srcu(&tasks_rcu_exit_srcu); 661 662 /* 663 * Each pass through the following loop scans the list 664 * of holdout tasks, removing any that are no longer 665 * holdouts. When the list is empty, we are done. 666 */ 667 lastreport = jiffies; 668 while (!list_empty(&rcu_tasks_holdouts)) { 669 bool firstreport; 670 bool needreport; 671 int rtst; 672 struct task_struct *t1; 673 674 schedule_timeout_interruptible(HZ); 675 rtst = ACCESS_ONCE(rcu_task_stall_timeout); 676 needreport = rtst > 0 && 677 time_after(jiffies, lastreport + rtst); 678 if (needreport) 679 lastreport = jiffies; 680 firstreport = true; 681 WARN_ON(signal_pending(current)); 682 list_for_each_entry_safe(t, t1, &rcu_tasks_holdouts, 683 rcu_tasks_holdout_list) { 684 check_holdout_task(t, needreport, &firstreport); 685 cond_resched(); 686 } 687 } 688 689 /* 690 * Because ->on_rq and ->nvcsw are not guaranteed 691 * to have a full memory barriers prior to them in the 692 * schedule() path, memory reordering on other CPUs could 693 * cause their RCU-tasks read-side critical sections to 694 * extend past the end of the grace period. However, 695 * because these ->nvcsw updates are carried out with 696 * interrupts disabled, we can use synchronize_sched() 697 * to force the needed ordering on all such CPUs. 698 * 699 * This synchronize_sched() also confines all 700 * ->rcu_tasks_holdout accesses to be within the grace 701 * period, avoiding the need for memory barriers for 702 * ->rcu_tasks_holdout accesses. 703 * 704 * In addition, this synchronize_sched() waits for exiting 705 * tasks to complete their final preempt_disable() region 706 * of execution, cleaning up after the synchronize_srcu() 707 * above. 708 */ 709 synchronize_sched(); 710 711 /* Invoke the callbacks. */ 712 while (list) { 713 next = list->next; 714 local_bh_disable(); 715 list->func(list); 716 local_bh_enable(); 717 list = next; 718 cond_resched(); 719 } 720 schedule_timeout_uninterruptible(HZ/10); 721 } 722} 723 724/* Spawn rcu_tasks_kthread() at first call to call_rcu_tasks(). */ 725static void rcu_spawn_tasks_kthread(void) 726{ 727 static DEFINE_MUTEX(rcu_tasks_kthread_mutex); 728 static struct task_struct *rcu_tasks_kthread_ptr; 729 struct task_struct *t; 730 731 if (ACCESS_ONCE(rcu_tasks_kthread_ptr)) { 732 smp_mb(); /* Ensure caller sees full kthread. */ 733 return; 734 } 735 mutex_lock(&rcu_tasks_kthread_mutex); 736 if (rcu_tasks_kthread_ptr) { 737 mutex_unlock(&rcu_tasks_kthread_mutex); 738 return; 739 } 740 t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread"); 741 BUG_ON(IS_ERR(t)); 742 smp_mb(); /* Ensure others see full kthread. */ 743 ACCESS_ONCE(rcu_tasks_kthread_ptr) = t; 744 mutex_unlock(&rcu_tasks_kthread_mutex); 745} 746 747#endif /* #ifdef CONFIG_TASKS_RCU */ 748 749#ifdef CONFIG_PROVE_RCU 750 751/* 752 * Early boot self test parameters, one for each flavor 753 */ 754static bool rcu_self_test; 755static bool rcu_self_test_bh; 756static bool rcu_self_test_sched; 757 758module_param(rcu_self_test, bool, 0444); 759module_param(rcu_self_test_bh, bool, 0444); 760module_param(rcu_self_test_sched, bool, 0444); 761 762static int rcu_self_test_counter; 763 764static void test_callback(struct rcu_head *r) 765{ 766 rcu_self_test_counter++; 767 pr_info("RCU test callback executed %d\n", rcu_self_test_counter); 768} 769 770static void early_boot_test_call_rcu(void) 771{ 772 static struct rcu_head head; 773 774 call_rcu(&head, test_callback); 775} 776 777static void early_boot_test_call_rcu_bh(void) 778{ 779 static struct rcu_head head; 780 781 call_rcu_bh(&head, test_callback); 782} 783 784static void early_boot_test_call_rcu_sched(void) 785{ 786 static struct rcu_head head; 787 788 call_rcu_sched(&head, test_callback); 789} 790 791void rcu_early_boot_tests(void) 792{ 793 pr_info("Running RCU self tests\n"); 794 795 if (rcu_self_test) 796 early_boot_test_call_rcu(); 797 if (rcu_self_test_bh) 798 early_boot_test_call_rcu_bh(); 799 if (rcu_self_test_sched) 800 early_boot_test_call_rcu_sched(); 801} 802 803static int rcu_verify_early_boot_tests(void) 804{ 805 int ret = 0; 806 int early_boot_test_counter = 0; 807 808 if (rcu_self_test) { 809 early_boot_test_counter++; 810 rcu_barrier(); 811 } 812 if (rcu_self_test_bh) { 813 early_boot_test_counter++; 814 rcu_barrier_bh(); 815 } 816 if (rcu_self_test_sched) { 817 early_boot_test_counter++; 818 rcu_barrier_sched(); 819 } 820 821 if (rcu_self_test_counter != early_boot_test_counter) { 822 WARN_ON(1); 823 ret = -1; 824 } 825 826 return ret; 827} 828late_initcall(rcu_verify_early_boot_tests); 829#else 830void rcu_early_boot_tests(void) {} 831#endif /* CONFIG_PROVE_RCU */ 832