This source file includes following definitions.
- hardlockup_detector_disable
- hardlockup_panic_setup
- hardlockup_all_cpu_backtrace_setup
- watchdog_nmi_enable
- watchdog_nmi_disable
- watchdog_nmi_probe
- watchdog_nmi_stop
- watchdog_nmi_start
- lockup_detector_update_enable
- softlockup_panic_setup
- nowatchdog_setup
- nosoftlockup_setup
- watchdog_thresh_setup
- softlockup_all_cpu_backtrace_setup
- get_softlockup_thresh
- get_timestamp
- set_sample_period
- __touch_watchdog
- touch_softlockup_watchdog_sched
- touch_softlockup_watchdog
- touch_all_softlockup_watchdogs
- touch_softlockup_watchdog_sync
- is_softlockup
- is_hardlockup
- watchdog_interrupt_count
- softlockup_fn
- watchdog_timer_fn
- watchdog_enable
- watchdog_disable
- softlockup_stop_fn
- softlockup_stop_all
- softlockup_start_fn
- softlockup_start_all
- lockup_detector_online_cpu
- lockup_detector_offline_cpu
- lockup_detector_reconfigure
- lockup_detector_setup
- lockup_detector_reconfigure
- lockup_detector_setup
- __lockup_detector_cleanup
- lockup_detector_cleanup
- lockup_detector_soft_poweroff
- proc_watchdog_update
- proc_watchdog_common
- proc_watchdog
- proc_nmi_watchdog
- proc_soft_watchdog
- proc_watchdog_thresh
- proc_watchdog_cpumask
- lockup_detector_init
1
2
3
4
5
6
7
8
9
10
11
12
13 #define pr_fmt(fmt) "watchdog: " fmt
14
15 #include <linux/mm.h>
16 #include <linux/cpu.h>
17 #include <linux/nmi.h>
18 #include <linux/init.h>
19 #include <linux/module.h>
20 #include <linux/sysctl.h>
21 #include <linux/tick.h>
22 #include <linux/sched/clock.h>
23 #include <linux/sched/debug.h>
24 #include <linux/sched/isolation.h>
25 #include <linux/stop_machine.h>
26
27 #include <asm/irq_regs.h>
28 #include <linux/kvm_para.h>
29
30 static DEFINE_MUTEX(watchdog_mutex);
31
32 #if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
33 # define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED | NMI_WATCHDOG_ENABLED)
34 # define NMI_WATCHDOG_DEFAULT 1
35 #else
36 # define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED)
37 # define NMI_WATCHDOG_DEFAULT 0
38 #endif
39
40 unsigned long __read_mostly watchdog_enabled;
41 int __read_mostly watchdog_user_enabled = 1;
42 int __read_mostly nmi_watchdog_user_enabled = NMI_WATCHDOG_DEFAULT;
43 int __read_mostly soft_watchdog_user_enabled = 1;
44 int __read_mostly watchdog_thresh = 10;
45 static int __read_mostly nmi_watchdog_available;
46
47 static struct cpumask watchdog_allowed_mask __read_mostly;
48
49 struct cpumask watchdog_cpumask __read_mostly;
50 unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
51
52 #ifdef CONFIG_HARDLOCKUP_DETECTOR
53
54
55
56 unsigned int __read_mostly hardlockup_panic =
57 CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
58
59
60
61
62
63
64
65
66 void __init hardlockup_detector_disable(void)
67 {
68 nmi_watchdog_user_enabled = 0;
69 }
70
71 static int __init hardlockup_panic_setup(char *str)
72 {
73 if (!strncmp(str, "panic", 5))
74 hardlockup_panic = 1;
75 else if (!strncmp(str, "nopanic", 7))
76 hardlockup_panic = 0;
77 else if (!strncmp(str, "0", 1))
78 nmi_watchdog_user_enabled = 0;
79 else if (!strncmp(str, "1", 1))
80 nmi_watchdog_user_enabled = 1;
81 return 1;
82 }
83 __setup("nmi_watchdog=", hardlockup_panic_setup);
84
85 # ifdef CONFIG_SMP
86 int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
87
88 static int __init hardlockup_all_cpu_backtrace_setup(char *str)
89 {
90 sysctl_hardlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0);
91 return 1;
92 }
93 __setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
94 # endif
95 #endif
96
97
98
99
100
101
102
103
104
105 int __weak watchdog_nmi_enable(unsigned int cpu)
106 {
107 hardlockup_detector_perf_enable();
108 return 0;
109 }
110
111 void __weak watchdog_nmi_disable(unsigned int cpu)
112 {
113 hardlockup_detector_perf_disable();
114 }
115
116
117 int __weak __init watchdog_nmi_probe(void)
118 {
119 return hardlockup_detector_perf_init();
120 }
121
122
123
124
125
126
127
128
129
130 void __weak watchdog_nmi_stop(void) { }
131
132
133
134
135
136
137
138
139
140
141
142
143 void __weak watchdog_nmi_start(void) { }
144
145
146
147
148
149
150
151 static void lockup_detector_update_enable(void)
152 {
153 watchdog_enabled = 0;
154 if (!watchdog_user_enabled)
155 return;
156 if (nmi_watchdog_available && nmi_watchdog_user_enabled)
157 watchdog_enabled |= NMI_WATCHDOG_ENABLED;
158 if (soft_watchdog_user_enabled)
159 watchdog_enabled |= SOFT_WATCHDOG_ENABLED;
160 }
161
162 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
163
164 #define SOFTLOCKUP_RESET ULONG_MAX
165
166
167 unsigned int __read_mostly softlockup_panic =
168 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
169
170 static bool softlockup_initialized __read_mostly;
171 static u64 __read_mostly sample_period;
172
173 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
174 static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
175 static DEFINE_PER_CPU(bool, softlockup_touch_sync);
176 static DEFINE_PER_CPU(bool, soft_watchdog_warn);
177 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
178 static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
179 static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
180 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
181 static unsigned long soft_lockup_nmi_warn;
182
183 static int __init softlockup_panic_setup(char *str)
184 {
185 softlockup_panic = simple_strtoul(str, NULL, 0);
186 return 1;
187 }
188 __setup("softlockup_panic=", softlockup_panic_setup);
189
190 static int __init nowatchdog_setup(char *str)
191 {
192 watchdog_user_enabled = 0;
193 return 1;
194 }
195 __setup("nowatchdog", nowatchdog_setup);
196
197 static int __init nosoftlockup_setup(char *str)
198 {
199 soft_watchdog_user_enabled = 0;
200 return 1;
201 }
202 __setup("nosoftlockup", nosoftlockup_setup);
203
204 static int __init watchdog_thresh_setup(char *str)
205 {
206 get_option(&str, &watchdog_thresh);
207 return 1;
208 }
209 __setup("watchdog_thresh=", watchdog_thresh_setup);
210
211 #ifdef CONFIG_SMP
212 int __read_mostly sysctl_softlockup_all_cpu_backtrace;
213
214 static int __init softlockup_all_cpu_backtrace_setup(char *str)
215 {
216 sysctl_softlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0);
217 return 1;
218 }
219 __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
220 #endif
221
222 static void __lockup_detector_cleanup(void);
223
224
225
226
227
228
229
230
231 static int get_softlockup_thresh(void)
232 {
233 return watchdog_thresh * 2;
234 }
235
236
237
238
239
240
241 static unsigned long get_timestamp(void)
242 {
243 return running_clock() >> 30LL;
244 }
245
246 static void set_sample_period(void)
247 {
248
249
250
251
252
253
254
255 sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
256 watchdog_update_hrtimer_threshold(sample_period);
257 }
258
259
260 static void __touch_watchdog(void)
261 {
262 __this_cpu_write(watchdog_touch_ts, get_timestamp());
263 }
264
265
266
267
268
269
270
271
272
273 notrace void touch_softlockup_watchdog_sched(void)
274 {
275
276
277
278
279 raw_cpu_write(watchdog_touch_ts, SOFTLOCKUP_RESET);
280 }
281
282 notrace void touch_softlockup_watchdog(void)
283 {
284 touch_softlockup_watchdog_sched();
285 wq_watchdog_touch(raw_smp_processor_id());
286 }
287 EXPORT_SYMBOL(touch_softlockup_watchdog);
288
289 void touch_all_softlockup_watchdogs(void)
290 {
291 int cpu;
292
293
294
295
296
297
298
299
300
301
302 for_each_cpu(cpu, &watchdog_allowed_mask)
303 per_cpu(watchdog_touch_ts, cpu) = SOFTLOCKUP_RESET;
304 wq_watchdog_touch(-1);
305 }
306
307 void touch_softlockup_watchdog_sync(void)
308 {
309 __this_cpu_write(softlockup_touch_sync, true);
310 __this_cpu_write(watchdog_touch_ts, SOFTLOCKUP_RESET);
311 }
312
313 static int is_softlockup(unsigned long touch_ts)
314 {
315 unsigned long now = get_timestamp();
316
317 if ((watchdog_enabled & SOFT_WATCHDOG_ENABLED) && watchdog_thresh){
318
319 if (time_after(now, touch_ts + get_softlockup_thresh()))
320 return now - touch_ts;
321 }
322 return 0;
323 }
324
325
326 bool is_hardlockup(void)
327 {
328 unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
329
330 if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
331 return true;
332
333 __this_cpu_write(hrtimer_interrupts_saved, hrint);
334 return false;
335 }
336
337 static void watchdog_interrupt_count(void)
338 {
339 __this_cpu_inc(hrtimer_interrupts);
340 }
341
342 static DEFINE_PER_CPU(struct completion, softlockup_completion);
343 static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work);
344
345
346
347
348
349
350
351
352
353 static int softlockup_fn(void *data)
354 {
355 __this_cpu_write(soft_lockup_hrtimer_cnt,
356 __this_cpu_read(hrtimer_interrupts));
357 __touch_watchdog();
358 complete(this_cpu_ptr(&softlockup_completion));
359
360 return 0;
361 }
362
363
364 static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
365 {
366 unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
367 struct pt_regs *regs = get_irq_regs();
368 int duration;
369 int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
370
371 if (!watchdog_enabled)
372 return HRTIMER_NORESTART;
373
374
375 watchdog_interrupt_count();
376
377
378 if (completion_done(this_cpu_ptr(&softlockup_completion))) {
379 reinit_completion(this_cpu_ptr(&softlockup_completion));
380 stop_one_cpu_nowait(smp_processor_id(),
381 softlockup_fn, NULL,
382 this_cpu_ptr(&softlockup_stop_work));
383 }
384
385
386 hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
387
388 if (touch_ts == SOFTLOCKUP_RESET) {
389 if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
390
391
392
393
394 __this_cpu_write(softlockup_touch_sync, false);
395 sched_clock_tick();
396 }
397
398
399 kvm_check_and_clear_guest_paused();
400 __touch_watchdog();
401 return HRTIMER_RESTART;
402 }
403
404
405
406
407
408
409
410 duration = is_softlockup(touch_ts);
411 if (unlikely(duration)) {
412
413
414
415
416
417 if (kvm_check_and_clear_guest_paused())
418 return HRTIMER_RESTART;
419
420
421 if (__this_cpu_read(soft_watchdog_warn) == true) {
422
423
424
425
426
427
428
429
430 if (__this_cpu_read(softlockup_task_ptr_saved) !=
431 current) {
432 __this_cpu_write(soft_watchdog_warn, false);
433 __touch_watchdog();
434 }
435 return HRTIMER_RESTART;
436 }
437
438 if (softlockup_all_cpu_backtrace) {
439
440
441
442 if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {
443
444 __this_cpu_write(soft_watchdog_warn, true);
445 return HRTIMER_RESTART;
446 }
447 }
448
449 pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
450 smp_processor_id(), duration,
451 current->comm, task_pid_nr(current));
452 __this_cpu_write(softlockup_task_ptr_saved, current);
453 print_modules();
454 print_irqtrace_events(current);
455 if (regs)
456 show_regs(regs);
457 else
458 dump_stack();
459
460 if (softlockup_all_cpu_backtrace) {
461
462
463
464 trigger_allbutself_cpu_backtrace();
465
466 clear_bit(0, &soft_lockup_nmi_warn);
467
468 smp_mb__after_atomic();
469 }
470
471 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
472 if (softlockup_panic)
473 panic("softlockup: hung tasks");
474 __this_cpu_write(soft_watchdog_warn, true);
475 } else
476 __this_cpu_write(soft_watchdog_warn, false);
477
478 return HRTIMER_RESTART;
479 }
480
481 static void watchdog_enable(unsigned int cpu)
482 {
483 struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
484 struct completion *done = this_cpu_ptr(&softlockup_completion);
485
486 WARN_ON_ONCE(cpu != smp_processor_id());
487
488 init_completion(done);
489 complete(done);
490
491
492
493
494
495 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
496 hrtimer->function = watchdog_timer_fn;
497 hrtimer_start(hrtimer, ns_to_ktime(sample_period),
498 HRTIMER_MODE_REL_PINNED_HARD);
499
500
501 __touch_watchdog();
502
503 if (watchdog_enabled & NMI_WATCHDOG_ENABLED)
504 watchdog_nmi_enable(cpu);
505 }
506
507 static void watchdog_disable(unsigned int cpu)
508 {
509 struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
510
511 WARN_ON_ONCE(cpu != smp_processor_id());
512
513
514
515
516
517
518 watchdog_nmi_disable(cpu);
519 hrtimer_cancel(hrtimer);
520 wait_for_completion(this_cpu_ptr(&softlockup_completion));
521 }
522
523 static int softlockup_stop_fn(void *data)
524 {
525 watchdog_disable(smp_processor_id());
526 return 0;
527 }
528
529 static void softlockup_stop_all(void)
530 {
531 int cpu;
532
533 if (!softlockup_initialized)
534 return;
535
536 for_each_cpu(cpu, &watchdog_allowed_mask)
537 smp_call_on_cpu(cpu, softlockup_stop_fn, NULL, false);
538
539 cpumask_clear(&watchdog_allowed_mask);
540 }
541
542 static int softlockup_start_fn(void *data)
543 {
544 watchdog_enable(smp_processor_id());
545 return 0;
546 }
547
548 static void softlockup_start_all(void)
549 {
550 int cpu;
551
552 cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask);
553 for_each_cpu(cpu, &watchdog_allowed_mask)
554 smp_call_on_cpu(cpu, softlockup_start_fn, NULL, false);
555 }
556
557 int lockup_detector_online_cpu(unsigned int cpu)
558 {
559 if (cpumask_test_cpu(cpu, &watchdog_allowed_mask))
560 watchdog_enable(cpu);
561 return 0;
562 }
563
564 int lockup_detector_offline_cpu(unsigned int cpu)
565 {
566 if (cpumask_test_cpu(cpu, &watchdog_allowed_mask))
567 watchdog_disable(cpu);
568 return 0;
569 }
570
571 static void lockup_detector_reconfigure(void)
572 {
573 cpus_read_lock();
574 watchdog_nmi_stop();
575
576 softlockup_stop_all();
577 set_sample_period();
578 lockup_detector_update_enable();
579 if (watchdog_enabled && watchdog_thresh)
580 softlockup_start_all();
581
582 watchdog_nmi_start();
583 cpus_read_unlock();
584
585
586
587
588 __lockup_detector_cleanup();
589 }
590
591
592
593
594
595
596
597
598 static __init void lockup_detector_setup(void)
599 {
600
601
602
603
604 lockup_detector_update_enable();
605
606 if (!IS_ENABLED(CONFIG_SYSCTL) &&
607 !(watchdog_enabled && watchdog_thresh))
608 return;
609
610 mutex_lock(&watchdog_mutex);
611 lockup_detector_reconfigure();
612 softlockup_initialized = true;
613 mutex_unlock(&watchdog_mutex);
614 }
615
616 #else
617 static void lockup_detector_reconfigure(void)
618 {
619 cpus_read_lock();
620 watchdog_nmi_stop();
621 lockup_detector_update_enable();
622 watchdog_nmi_start();
623 cpus_read_unlock();
624 }
625 static inline void lockup_detector_setup(void)
626 {
627 lockup_detector_reconfigure();
628 }
629 #endif
630
631 static void __lockup_detector_cleanup(void)
632 {
633 lockdep_assert_held(&watchdog_mutex);
634 hardlockup_detector_perf_cleanup();
635 }
636
637
638
639
640
641
642 void lockup_detector_cleanup(void)
643 {
644 mutex_lock(&watchdog_mutex);
645 __lockup_detector_cleanup();
646 mutex_unlock(&watchdog_mutex);
647 }
648
649
650
651
652
653
654
655 void lockup_detector_soft_poweroff(void)
656 {
657 watchdog_enabled = 0;
658 }
659
660 #ifdef CONFIG_SYSCTL
661
662
663 static void proc_watchdog_update(void)
664 {
665
666 cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask);
667 lockup_detector_reconfigure();
668 }
669
670
671
672
673
674
675
676
677
678
679
680
681
682 static int proc_watchdog_common(int which, struct ctl_table *table, int write,
683 void __user *buffer, size_t *lenp, loff_t *ppos)
684 {
685 int err, old, *param = table->data;
686
687 mutex_lock(&watchdog_mutex);
688
689 if (!write) {
690
691
692
693
694 *param = (watchdog_enabled & which) != 0;
695 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
696 } else {
697 old = READ_ONCE(*param);
698 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
699 if (!err && old != READ_ONCE(*param))
700 proc_watchdog_update();
701 }
702 mutex_unlock(&watchdog_mutex);
703 return err;
704 }
705
706
707
708
709 int proc_watchdog(struct ctl_table *table, int write,
710 void __user *buffer, size_t *lenp, loff_t *ppos)
711 {
712 return proc_watchdog_common(NMI_WATCHDOG_ENABLED|SOFT_WATCHDOG_ENABLED,
713 table, write, buffer, lenp, ppos);
714 }
715
716
717
718
719 int proc_nmi_watchdog(struct ctl_table *table, int write,
720 void __user *buffer, size_t *lenp, loff_t *ppos)
721 {
722 if (!nmi_watchdog_available && write)
723 return -ENOTSUPP;
724 return proc_watchdog_common(NMI_WATCHDOG_ENABLED,
725 table, write, buffer, lenp, ppos);
726 }
727
728
729
730
731 int proc_soft_watchdog(struct ctl_table *table, int write,
732 void __user *buffer, size_t *lenp, loff_t *ppos)
733 {
734 return proc_watchdog_common(SOFT_WATCHDOG_ENABLED,
735 table, write, buffer, lenp, ppos);
736 }
737
738
739
740
741 int proc_watchdog_thresh(struct ctl_table *table, int write,
742 void __user *buffer, size_t *lenp, loff_t *ppos)
743 {
744 int err, old;
745
746 mutex_lock(&watchdog_mutex);
747
748 old = READ_ONCE(watchdog_thresh);
749 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
750
751 if (!err && write && old != READ_ONCE(watchdog_thresh))
752 proc_watchdog_update();
753
754 mutex_unlock(&watchdog_mutex);
755 return err;
756 }
757
758
759
760
761
762
763
764 int proc_watchdog_cpumask(struct ctl_table *table, int write,
765 void __user *buffer, size_t *lenp, loff_t *ppos)
766 {
767 int err;
768
769 mutex_lock(&watchdog_mutex);
770
771 err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
772 if (!err && write)
773 proc_watchdog_update();
774
775 mutex_unlock(&watchdog_mutex);
776 return err;
777 }
778 #endif
779
780 void __init lockup_detector_init(void)
781 {
782 if (tick_nohz_full_enabled())
783 pr_info("Disabling watchdog on nohz_full cores by default\n");
784
785 cpumask_copy(&watchdog_cpumask,
786 housekeeping_cpumask(HK_FLAG_TIMER));
787
788 if (!watchdog_nmi_probe())
789 nmi_watchdog_available = true;
790 lockup_detector_setup();
791 }