1/*
2 * Intel specific MCE features.
3 * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
4 * Copyright (C) 2008, 2009 Intel Corporation
5 * Author: Andi Kleen
6 */
7
8#include <linux/gfp.h>
9#include <linux/interrupt.h>
10#include <linux/percpu.h>
11#include <linux/sched.h>
12#include <linux/cpumask.h>
13#include <asm/apic.h>
14#include <asm/processor.h>
15#include <asm/msr.h>
16#include <asm/mce.h>
17
18#include "mce-internal.h"
19
20/*
21 * Support for Intel Correct Machine Check Interrupts. This allows
22 * the CPU to raise an interrupt when a corrected machine check happened.
23 * Normally we pick those up using a regular polling timer.
24 * Also supports reliable discovery of shared banks.
25 */
26
27/*
28 * CMCI can be delivered to multiple cpus that share a machine check bank
29 * so we need to designate a single cpu to process errors logged in each bank
30 * in the interrupt handler (otherwise we would have many races and potential
31 * double reporting of the same error).
32 * Note that this can change when a cpu is offlined or brought online since
33 * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear()
34 * disables CMCI on all banks owned by the cpu and clears this bitfield. At
35 * this point, cmci_rediscover() kicks in and a different cpu may end up
36 * taking ownership of some of the shared MCA banks that were previously
37 * owned by the offlined cpu.
38 */
39static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
40
41/*
42 * CMCI storm detection backoff counter
43 *
44 * During storm, we reset this counter to INITIAL_CHECK_INTERVAL in case we've
45 * encountered an error. If not, we decrement it by one. We signal the end of
46 * the CMCI storm when it reaches 0.
47 */
48static DEFINE_PER_CPU(int, cmci_backoff_cnt);
49
50/*
51 * cmci_discover_lock protects against parallel discovery attempts
52 * which could race against each other.
53 */
54static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
55
56#define CMCI_THRESHOLD		1
57#define CMCI_POLL_INTERVAL	(30 * HZ)
58#define CMCI_STORM_INTERVAL	(HZ)
59#define CMCI_STORM_THRESHOLD	15
60
61static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
62static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt);
63static DEFINE_PER_CPU(unsigned int, cmci_storm_state);
64
65enum {
66	CMCI_STORM_NONE,
67	CMCI_STORM_ACTIVE,
68	CMCI_STORM_SUBSIDED,
69};
70
71static atomic_t cmci_storm_on_cpus;
72
73static int cmci_supported(int *banks)
74{
75	u64 cap;
76
77	if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce)
78		return 0;
79
80	/*
81	 * Vendor check is not strictly needed, but the initial
82	 * initialization is vendor keyed and this
83	 * makes sure none of the backdoors are entered otherwise.
84	 */
85	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
86		return 0;
87	if (!cpu_has_apic || lapic_get_maxlvt() < 6)
88		return 0;
89	rdmsrl(MSR_IA32_MCG_CAP, cap);
90	*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
91	return !!(cap & MCG_CMCI_P);
92}
93
94bool mce_intel_cmci_poll(void)
95{
96	if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
97		return false;
98
99	/*
100	 * Reset the counter if we've logged an error in the last poll
101	 * during the storm.
102	 */
103	if (machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)))
104		this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
105	else
106		this_cpu_dec(cmci_backoff_cnt);
107
108	return true;
109}
110
111void mce_intel_hcpu_update(unsigned long cpu)
112{
113	if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE)
114		atomic_dec(&cmci_storm_on_cpus);
115
116	per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
117}
118
119static void cmci_toggle_interrupt_mode(bool on)
120{
121	unsigned long flags, *owned;
122	int bank;
123	u64 val;
124
125	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
126	owned = this_cpu_ptr(mce_banks_owned);
127	for_each_set_bit(bank, owned, MAX_NR_BANKS) {
128		rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
129
130		if (on)
131			val |= MCI_CTL2_CMCI_EN;
132		else
133			val &= ~MCI_CTL2_CMCI_EN;
134
135		wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
136	}
137	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
138}
139
140unsigned long cmci_intel_adjust_timer(unsigned long interval)
141{
142	if ((this_cpu_read(cmci_backoff_cnt) > 0) &&
143	    (__this_cpu_read(cmci_storm_state) == CMCI_STORM_ACTIVE)) {
144		mce_notify_irq();
145		return CMCI_STORM_INTERVAL;
146	}
147
148	switch (__this_cpu_read(cmci_storm_state)) {
149	case CMCI_STORM_ACTIVE:
150
151		/*
152		 * We switch back to interrupt mode once the poll timer has
153		 * silenced itself. That means no events recorded and the timer
154		 * interval is back to our poll interval.
155		 */
156		__this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
157		if (!atomic_sub_return(1, &cmci_storm_on_cpus))
158			pr_notice("CMCI storm subsided: switching to interrupt mode\n");
159
160		/* FALLTHROUGH */
161
162	case CMCI_STORM_SUBSIDED:
163		/*
164		 * We wait for all CPUs to go back to SUBSIDED state. When that
165		 * happens we switch back to interrupt mode.
166		 */
167		if (!atomic_read(&cmci_storm_on_cpus)) {
168			__this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
169			cmci_toggle_interrupt_mode(true);
170			cmci_recheck();
171		}
172		return CMCI_POLL_INTERVAL;
173	default:
174
175		/* We have shiny weather. Let the poll do whatever it thinks. */
176		return interval;
177	}
178}
179
180static bool cmci_storm_detect(void)
181{
182	unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
183	unsigned long ts = __this_cpu_read(cmci_time_stamp);
184	unsigned long now = jiffies;
185	int r;
186
187	if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE)
188		return true;
189
190	if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) {
191		cnt++;
192	} else {
193		cnt = 1;
194		__this_cpu_write(cmci_time_stamp, now);
195	}
196	__this_cpu_write(cmci_storm_cnt, cnt);
197
198	if (cnt <= CMCI_STORM_THRESHOLD)
199		return false;
200
201	cmci_toggle_interrupt_mode(false);
202	__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
203	r = atomic_add_return(1, &cmci_storm_on_cpus);
204	mce_timer_kick(CMCI_STORM_INTERVAL);
205	this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
206
207	if (r == 1)
208		pr_notice("CMCI storm detected: switching to poll mode\n");
209	return true;
210}
211
212/*
213 * The interrupt handler. This is called on every event.
214 * Just call the poller directly to log any events.
215 * This could in theory increase the threshold under high load,
216 * but doesn't for now.
217 */
218static void intel_threshold_interrupt(void)
219{
220	if (cmci_storm_detect())
221		return;
222
223	machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
224	mce_notify_irq();
225}
226
227/*
228 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
229 * on this CPU. Use the algorithm recommended in the SDM to discover shared
230 * banks.
231 */
232static void cmci_discover(int banks)
233{
234	unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned);
235	unsigned long flags;
236	int i;
237	int bios_wrong_thresh = 0;
238
239	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
240	for (i = 0; i < banks; i++) {
241		u64 val;
242		int bios_zero_thresh = 0;
243
244		if (test_bit(i, owned))
245			continue;
246
247		/* Skip banks in firmware first mode */
248		if (test_bit(i, mce_banks_ce_disabled))
249			continue;
250
251		rdmsrl(MSR_IA32_MCx_CTL2(i), val);
252
253		/* Already owned by someone else? */
254		if (val & MCI_CTL2_CMCI_EN) {
255			clear_bit(i, owned);
256			__clear_bit(i, this_cpu_ptr(mce_poll_banks));
257			continue;
258		}
259
260		if (!mca_cfg.bios_cmci_threshold) {
261			val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
262			val |= CMCI_THRESHOLD;
263		} else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
264			/*
265			 * If bios_cmci_threshold boot option was specified
266			 * but the threshold is zero, we'll try to initialize
267			 * it to 1.
268			 */
269			bios_zero_thresh = 1;
270			val |= CMCI_THRESHOLD;
271		}
272
273		val |= MCI_CTL2_CMCI_EN;
274		wrmsrl(MSR_IA32_MCx_CTL2(i), val);
275		rdmsrl(MSR_IA32_MCx_CTL2(i), val);
276
277		/* Did the enable bit stick? -- the bank supports CMCI */
278		if (val & MCI_CTL2_CMCI_EN) {
279			set_bit(i, owned);
280			__clear_bit(i, this_cpu_ptr(mce_poll_banks));
281			/*
282			 * We are able to set thresholds for some banks that
283			 * had a threshold of 0. This means the BIOS has not
284			 * set the thresholds properly or does not work with
285			 * this boot option. Note down now and report later.
286			 */
287			if (mca_cfg.bios_cmci_threshold && bios_zero_thresh &&
288					(val & MCI_CTL2_CMCI_THRESHOLD_MASK))
289				bios_wrong_thresh = 1;
290		} else {
291			WARN_ON(!test_bit(i, this_cpu_ptr(mce_poll_banks)));
292		}
293	}
294	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
295	if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
296		pr_info_once(
297			"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
298		pr_info_once(
299			"bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
300	}
301}
302
303/*
304 * Just in case we missed an event during initialization check
305 * all the CMCI owned banks.
306 */
307void cmci_recheck(void)
308{
309	unsigned long flags;
310	int banks;
311
312	if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
313		return;
314
315	local_irq_save(flags);
316	machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
317	local_irq_restore(flags);
318}
319
320/* Caller must hold the lock on cmci_discover_lock */
321static void __cmci_disable_bank(int bank)
322{
323	u64 val;
324
325	if (!test_bit(bank, this_cpu_ptr(mce_banks_owned)))
326		return;
327	rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
328	val &= ~MCI_CTL2_CMCI_EN;
329	wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
330	__clear_bit(bank, this_cpu_ptr(mce_banks_owned));
331}
332
333/*
334 * Disable CMCI on this CPU for all banks it owns when it goes down.
335 * This allows other CPUs to claim the banks on rediscovery.
336 */
337void cmci_clear(void)
338{
339	unsigned long flags;
340	int i;
341	int banks;
342
343	if (!cmci_supported(&banks))
344		return;
345	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
346	for (i = 0; i < banks; i++)
347		__cmci_disable_bank(i);
348	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
349}
350
351static void cmci_rediscover_work_func(void *arg)
352{
353	int banks;
354
355	/* Recheck banks in case CPUs don't all have the same */
356	if (cmci_supported(&banks))
357		cmci_discover(banks);
358}
359
360/* After a CPU went down cycle through all the others and rediscover */
361void cmci_rediscover(void)
362{
363	int banks;
364
365	if (!cmci_supported(&banks))
366		return;
367
368	on_each_cpu(cmci_rediscover_work_func, NULL, 1);
369}
370
371/*
372 * Reenable CMCI on this CPU in case a CPU down failed.
373 */
374void cmci_reenable(void)
375{
376	int banks;
377	if (cmci_supported(&banks))
378		cmci_discover(banks);
379}
380
381void cmci_disable_bank(int bank)
382{
383	int banks;
384	unsigned long flags;
385
386	if (!cmci_supported(&banks))
387		return;
388
389	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
390	__cmci_disable_bank(bank);
391	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
392}
393
394static void intel_init_cmci(void)
395{
396	int banks;
397
398	if (!cmci_supported(&banks))
399		return;
400
401	mce_threshold_vector = intel_threshold_interrupt;
402	cmci_discover(banks);
403	/*
404	 * For CPU #0 this runs with still disabled APIC, but that's
405	 * ok because only the vector is set up. We still do another
406	 * check for the banks later for CPU #0 just to make sure
407	 * to not miss any events.
408	 */
409	apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
410	cmci_recheck();
411}
412
413void mce_intel_feature_init(struct cpuinfo_x86 *c)
414{
415	intel_init_thermal(c);
416	intel_init_cmci();
417}
418