1/*
2 * Blackfin nmi_watchdog Driver
3 *
4 * Originally based on bfin_wdt.c
5 * Copyright 2010-2010 Analog Devices Inc.
6 *		Graff Yang <graf.yang@analog.com>
7 *
8 * Enter bugs at http://blackfin.uclinux.org/
9 *
10 * Licensed under the GPL-2 or later.
11 */
12
13#include <linux/bitops.h>
14#include <linux/hardirq.h>
15#include <linux/syscore_ops.h>
16#include <linux/pm.h>
17#include <linux/nmi.h>
18#include <linux/smp.h>
19#include <linux/timer.h>
20#include <asm/blackfin.h>
21#include <linux/atomic.h>
22#include <asm/cacheflush.h>
23#include <asm/bfin_watchdog.h>
24
25#define DRV_NAME "nmi-wdt"
26
27#define NMI_WDT_TIMEOUT 5          /* 5 seconds */
28#define NMI_CHECK_TIMEOUT (4 * HZ) /* 4 seconds in jiffies */
29static int nmi_wdt_cpu = 1;
30
31static unsigned int timeout = NMI_WDT_TIMEOUT;
32static int nmi_active;
33
34static unsigned short wdoga_ctl;
35static unsigned int wdoga_cnt;
36static struct corelock_slot saved_corelock;
37static atomic_t nmi_touched[NR_CPUS];
38static struct timer_list ntimer;
39
40enum {
41	COREA_ENTER_NMI = 0,
42	COREA_EXIT_NMI,
43	COREB_EXIT_NMI,
44
45	NMI_EVENT_NR,
46};
47static unsigned long nmi_event __attribute__ ((__section__(".l2.bss")));
48
49/* we are in nmi, non-atomic bit ops is safe */
50static inline void set_nmi_event(int event)
51{
52	__set_bit(event, &nmi_event);
53}
54
55static inline void wait_nmi_event(int event)
56{
57	while (!test_bit(event, &nmi_event))
58		barrier();
59	__clear_bit(event, &nmi_event);
60}
61
62static inline void send_corea_nmi(void)
63{
64	wdoga_ctl = bfin_read_WDOGA_CTL();
65	wdoga_cnt = bfin_read_WDOGA_CNT();
66
67	bfin_write_WDOGA_CTL(WDEN_DISABLE);
68	bfin_write_WDOGA_CNT(0);
69	bfin_write_WDOGA_CTL(WDEN_ENABLE | ICTL_NMI);
70}
71
72static inline void restore_corea_nmi(void)
73{
74	bfin_write_WDOGA_CTL(WDEN_DISABLE);
75	bfin_write_WDOGA_CTL(WDOG_EXPIRED | WDEN_DISABLE | ICTL_NONE);
76
77	bfin_write_WDOGA_CNT(wdoga_cnt);
78	bfin_write_WDOGA_CTL(wdoga_ctl);
79}
80
81static inline void save_corelock(void)
82{
83	saved_corelock = corelock;
84	corelock.lock = 0;
85}
86
87static inline void restore_corelock(void)
88{
89	corelock = saved_corelock;
90}
91
92
93static inline void nmi_wdt_keepalive(void)
94{
95	bfin_write_WDOGB_STAT(0);
96}
97
98static inline void nmi_wdt_stop(void)
99{
100	bfin_write_WDOGB_CTL(WDEN_DISABLE);
101}
102
103/* before calling this function, you must stop the WDT */
104static inline void nmi_wdt_clear(void)
105{
106	/* clear TRO bit, disable event generation */
107	bfin_write_WDOGB_CTL(WDOG_EXPIRED | WDEN_DISABLE | ICTL_NONE);
108}
109
110static inline void nmi_wdt_start(void)
111{
112	bfin_write_WDOGB_CTL(WDEN_ENABLE | ICTL_NMI);
113}
114
115static inline int nmi_wdt_running(void)
116{
117	return ((bfin_read_WDOGB_CTL() & WDEN_MASK) != WDEN_DISABLE);
118}
119
120static inline int nmi_wdt_set_timeout(unsigned long t)
121{
122	u32 cnt, max_t, sclk;
123	int run;
124
125	sclk = get_sclk();
126	max_t = -1 / sclk;
127	cnt = t * sclk;
128	if (t > max_t) {
129		pr_warning("NMI: timeout value is too large\n");
130		return -EINVAL;
131	}
132
133	run = nmi_wdt_running();
134	nmi_wdt_stop();
135	bfin_write_WDOGB_CNT(cnt);
136	if (run)
137		nmi_wdt_start();
138
139	timeout = t;
140
141	return 0;
142}
143
144int check_nmi_wdt_touched(void)
145{
146	unsigned int this_cpu = smp_processor_id();
147	unsigned int cpu;
148	cpumask_t mask;
149
150	cpumask_copy(&mask, cpu_online_mask);
151	if (!atomic_read(&nmi_touched[this_cpu]))
152		return 0;
153
154	atomic_set(&nmi_touched[this_cpu], 0);
155
156	cpumask_clear_cpu(this_cpu, &mask);
157	for_each_cpu(cpu, &mask) {
158		invalidate_dcache_range((unsigned long)(&nmi_touched[cpu]),
159				(unsigned long)(&nmi_touched[cpu]));
160		if (!atomic_read(&nmi_touched[cpu]))
161			return 0;
162		atomic_set(&nmi_touched[cpu], 0);
163	}
164
165	return 1;
166}
167
168static void nmi_wdt_timer(unsigned long data)
169{
170	if (check_nmi_wdt_touched())
171		nmi_wdt_keepalive();
172
173	mod_timer(&ntimer, jiffies + NMI_CHECK_TIMEOUT);
174}
175
176static int __init init_nmi_wdt(void)
177{
178	nmi_wdt_set_timeout(timeout);
179	nmi_wdt_start();
180	nmi_active = true;
181
182	init_timer(&ntimer);
183	ntimer.function = nmi_wdt_timer;
184	ntimer.expires = jiffies + NMI_CHECK_TIMEOUT;
185	add_timer(&ntimer);
186
187	pr_info("nmi_wdt: initialized: timeout=%d sec\n", timeout);
188	return 0;
189}
190device_initcall(init_nmi_wdt);
191
192void touch_nmi_watchdog(void)
193{
194	atomic_set(&nmi_touched[smp_processor_id()], 1);
195}
196
197/* Suspend/resume support */
198#ifdef CONFIG_PM
199static int nmi_wdt_suspend(void)
200{
201	nmi_wdt_stop();
202	return 0;
203}
204
205static void nmi_wdt_resume(void)
206{
207	if (nmi_active)
208		nmi_wdt_start();
209}
210
211static struct syscore_ops nmi_syscore_ops = {
212	.resume		= nmi_wdt_resume,
213	.suspend	= nmi_wdt_suspend,
214};
215
216static int __init init_nmi_wdt_syscore(void)
217{
218	if (nmi_active)
219		register_syscore_ops(&nmi_syscore_ops);
220
221	return 0;
222}
223late_initcall(init_nmi_wdt_syscore);
224
225#endif	/* CONFIG_PM */
226
227
228asmlinkage notrace void do_nmi(struct pt_regs *fp)
229{
230	unsigned int cpu = smp_processor_id();
231	nmi_enter();
232
233	cpu_pda[cpu].__nmi_count += 1;
234
235	if (cpu == nmi_wdt_cpu) {
236		/* CoreB goes here first */
237
238		/* reload the WDOG_STAT */
239		nmi_wdt_keepalive();
240
241		/* clear nmi interrupt for CoreB */
242		nmi_wdt_stop();
243		nmi_wdt_clear();
244
245		/* trigger NMI interrupt of CoreA */
246		send_corea_nmi();
247
248		/* waiting CoreB to enter NMI */
249		wait_nmi_event(COREA_ENTER_NMI);
250
251		/* recover WDOGA's settings */
252		restore_corea_nmi();
253
254		save_corelock();
255
256		/* corelock is save/cleared, CoreA is dummping messages */
257
258		wait_nmi_event(COREA_EXIT_NMI);
259	} else {
260		/* OK, CoreA entered NMI */
261		set_nmi_event(COREA_ENTER_NMI);
262	}
263
264	pr_emerg("\nNMI Watchdog detected LOCKUP, dump for CPU %d\n", cpu);
265	dump_bfin_process(fp);
266	dump_bfin_mem(fp);
267	show_regs(fp);
268	dump_bfin_trace_buffer();
269	show_stack(current, (unsigned long *)fp);
270
271	if (cpu == nmi_wdt_cpu) {
272		pr_emerg("This fault is not recoverable, sorry!\n");
273
274		/* CoreA dump finished, restore the corelock */
275		restore_corelock();
276
277		set_nmi_event(COREB_EXIT_NMI);
278	} else {
279		/* CoreB dump finished, notice the CoreA we are done */
280		set_nmi_event(COREA_EXIT_NMI);
281
282		/* synchronize with CoreA */
283		wait_nmi_event(COREB_EXIT_NMI);
284	}
285
286	nmi_exit();
287}
288