1/* 2 * Intel specific MCE features. 3 * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> 4 * Copyright (C) 2008, 2009 Intel Corporation 5 * Author: Andi Kleen 6 */ 7 8#include <linux/gfp.h> 9#include <linux/interrupt.h> 10#include <linux/percpu.h> 11#include <linux/sched.h> 12#include <linux/cpumask.h> 13#include <asm/apic.h> 14#include <asm/processor.h> 15#include <asm/msr.h> 16#include <asm/mce.h> 17 18#include "mce-internal.h" 19 20/* 21 * Support for Intel Correct Machine Check Interrupts. This allows 22 * the CPU to raise an interrupt when a corrected machine check happened. 23 * Normally we pick those up using a regular polling timer. 24 * Also supports reliable discovery of shared banks. 25 */ 26 27/* 28 * CMCI can be delivered to multiple cpus that share a machine check bank 29 * so we need to designate a single cpu to process errors logged in each bank 30 * in the interrupt handler (otherwise we would have many races and potential 31 * double reporting of the same error). 32 * Note that this can change when a cpu is offlined or brought online since 33 * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear() 34 * disables CMCI on all banks owned by the cpu and clears this bitfield. At 35 * this point, cmci_rediscover() kicks in and a different cpu may end up 36 * taking ownership of some of the shared MCA banks that were previously 37 * owned by the offlined cpu. 38 */ 39static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); 40 41/* 42 * CMCI storm detection backoff counter 43 * 44 * During storm, we reset this counter to INITIAL_CHECK_INTERVAL in case we've 45 * encountered an error. If not, we decrement it by one. We signal the end of 46 * the CMCI storm when it reaches 0. 47 */ 48static DEFINE_PER_CPU(int, cmci_backoff_cnt); 49 50/* 51 * cmci_discover_lock protects against parallel discovery attempts 52 * which could race against each other. 53 */ 54static DEFINE_RAW_SPINLOCK(cmci_discover_lock); 55 56#define CMCI_THRESHOLD 1 57#define CMCI_POLL_INTERVAL (30 * HZ) 58#define CMCI_STORM_INTERVAL (HZ) 59#define CMCI_STORM_THRESHOLD 15 60 61static DEFINE_PER_CPU(unsigned long, cmci_time_stamp); 62static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt); 63static DEFINE_PER_CPU(unsigned int, cmci_storm_state); 64 65enum { 66 CMCI_STORM_NONE, 67 CMCI_STORM_ACTIVE, 68 CMCI_STORM_SUBSIDED, 69}; 70 71static atomic_t cmci_storm_on_cpus; 72 73static int cmci_supported(int *banks) 74{ 75 u64 cap; 76 77 if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce) 78 return 0; 79 80 /* 81 * Vendor check is not strictly needed, but the initial 82 * initialization is vendor keyed and this 83 * makes sure none of the backdoors are entered otherwise. 84 */ 85 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) 86 return 0; 87 if (!cpu_has_apic || lapic_get_maxlvt() < 6) 88 return 0; 89 rdmsrl(MSR_IA32_MCG_CAP, cap); 90 *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); 91 return !!(cap & MCG_CMCI_P); 92} 93 94bool mce_intel_cmci_poll(void) 95{ 96 if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) 97 return false; 98 99 /* 100 * Reset the counter if we've logged an error in the last poll 101 * during the storm. 102 */ 103 if (machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned))) 104 this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL); 105 else 106 this_cpu_dec(cmci_backoff_cnt); 107 108 return true; 109} 110 111void mce_intel_hcpu_update(unsigned long cpu) 112{ 113 if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE) 114 atomic_dec(&cmci_storm_on_cpus); 115 116 per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; 117} 118 119static void cmci_toggle_interrupt_mode(bool on) 120{ 121 unsigned long flags, *owned; 122 int bank; 123 u64 val; 124 125 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 126 owned = this_cpu_ptr(mce_banks_owned); 127 for_each_set_bit(bank, owned, MAX_NR_BANKS) { 128 rdmsrl(MSR_IA32_MCx_CTL2(bank), val); 129 130 if (on) 131 val |= MCI_CTL2_CMCI_EN; 132 else 133 val &= ~MCI_CTL2_CMCI_EN; 134 135 wrmsrl(MSR_IA32_MCx_CTL2(bank), val); 136 } 137 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 138} 139 140unsigned long cmci_intel_adjust_timer(unsigned long interval) 141{ 142 if ((this_cpu_read(cmci_backoff_cnt) > 0) && 143 (__this_cpu_read(cmci_storm_state) == CMCI_STORM_ACTIVE)) { 144 mce_notify_irq(); 145 return CMCI_STORM_INTERVAL; 146 } 147 148 switch (__this_cpu_read(cmci_storm_state)) { 149 case CMCI_STORM_ACTIVE: 150 151 /* 152 * We switch back to interrupt mode once the poll timer has 153 * silenced itself. That means no events recorded and the timer 154 * interval is back to our poll interval. 155 */ 156 __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED); 157 if (!atomic_sub_return(1, &cmci_storm_on_cpus)) 158 pr_notice("CMCI storm subsided: switching to interrupt mode\n"); 159 160 /* FALLTHROUGH */ 161 162 case CMCI_STORM_SUBSIDED: 163 /* 164 * We wait for all CPUs to go back to SUBSIDED state. When that 165 * happens we switch back to interrupt mode. 166 */ 167 if (!atomic_read(&cmci_storm_on_cpus)) { 168 __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); 169 cmci_toggle_interrupt_mode(true); 170 cmci_recheck(); 171 } 172 return CMCI_POLL_INTERVAL; 173 default: 174 175 /* We have shiny weather. Let the poll do whatever it thinks. */ 176 return interval; 177 } 178} 179 180static bool cmci_storm_detect(void) 181{ 182 unsigned int cnt = __this_cpu_read(cmci_storm_cnt); 183 unsigned long ts = __this_cpu_read(cmci_time_stamp); 184 unsigned long now = jiffies; 185 int r; 186 187 if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE) 188 return true; 189 190 if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) { 191 cnt++; 192 } else { 193 cnt = 1; 194 __this_cpu_write(cmci_time_stamp, now); 195 } 196 __this_cpu_write(cmci_storm_cnt, cnt); 197 198 if (cnt <= CMCI_STORM_THRESHOLD) 199 return false; 200 201 cmci_toggle_interrupt_mode(false); 202 __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); 203 r = atomic_add_return(1, &cmci_storm_on_cpus); 204 mce_timer_kick(CMCI_STORM_INTERVAL); 205 this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL); 206 207 if (r == 1) 208 pr_notice("CMCI storm detected: switching to poll mode\n"); 209 return true; 210} 211 212/* 213 * The interrupt handler. This is called on every event. 214 * Just call the poller directly to log any events. 215 * This could in theory increase the threshold under high load, 216 * but doesn't for now. 217 */ 218static void intel_threshold_interrupt(void) 219{ 220 if (cmci_storm_detect()) 221 return; 222 223 machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); 224 mce_notify_irq(); 225} 226 227/* 228 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks 229 * on this CPU. Use the algorithm recommended in the SDM to discover shared 230 * banks. 231 */ 232static void cmci_discover(int banks) 233{ 234 unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned); 235 unsigned long flags; 236 int i; 237 int bios_wrong_thresh = 0; 238 239 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 240 for (i = 0; i < banks; i++) { 241 u64 val; 242 int bios_zero_thresh = 0; 243 244 if (test_bit(i, owned)) 245 continue; 246 247 /* Skip banks in firmware first mode */ 248 if (test_bit(i, mce_banks_ce_disabled)) 249 continue; 250 251 rdmsrl(MSR_IA32_MCx_CTL2(i), val); 252 253 /* Already owned by someone else? */ 254 if (val & MCI_CTL2_CMCI_EN) { 255 clear_bit(i, owned); 256 __clear_bit(i, this_cpu_ptr(mce_poll_banks)); 257 continue; 258 } 259 260 if (!mca_cfg.bios_cmci_threshold) { 261 val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; 262 val |= CMCI_THRESHOLD; 263 } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) { 264 /* 265 * If bios_cmci_threshold boot option was specified 266 * but the threshold is zero, we'll try to initialize 267 * it to 1. 268 */ 269 bios_zero_thresh = 1; 270 val |= CMCI_THRESHOLD; 271 } 272 273 val |= MCI_CTL2_CMCI_EN; 274 wrmsrl(MSR_IA32_MCx_CTL2(i), val); 275 rdmsrl(MSR_IA32_MCx_CTL2(i), val); 276 277 /* Did the enable bit stick? -- the bank supports CMCI */ 278 if (val & MCI_CTL2_CMCI_EN) { 279 set_bit(i, owned); 280 __clear_bit(i, this_cpu_ptr(mce_poll_banks)); 281 /* 282 * We are able to set thresholds for some banks that 283 * had a threshold of 0. This means the BIOS has not 284 * set the thresholds properly or does not work with 285 * this boot option. Note down now and report later. 286 */ 287 if (mca_cfg.bios_cmci_threshold && bios_zero_thresh && 288 (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) 289 bios_wrong_thresh = 1; 290 } else { 291 WARN_ON(!test_bit(i, this_cpu_ptr(mce_poll_banks))); 292 } 293 } 294 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 295 if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) { 296 pr_info_once( 297 "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); 298 pr_info_once( 299 "bios_cmci_threshold: Make sure your BIOS supports this boot option\n"); 300 } 301} 302 303/* 304 * Just in case we missed an event during initialization check 305 * all the CMCI owned banks. 306 */ 307void cmci_recheck(void) 308{ 309 unsigned long flags; 310 int banks; 311 312 if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks)) 313 return; 314 315 local_irq_save(flags); 316 machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); 317 local_irq_restore(flags); 318} 319 320/* Caller must hold the lock on cmci_discover_lock */ 321static void __cmci_disable_bank(int bank) 322{ 323 u64 val; 324 325 if (!test_bit(bank, this_cpu_ptr(mce_banks_owned))) 326 return; 327 rdmsrl(MSR_IA32_MCx_CTL2(bank), val); 328 val &= ~MCI_CTL2_CMCI_EN; 329 wrmsrl(MSR_IA32_MCx_CTL2(bank), val); 330 __clear_bit(bank, this_cpu_ptr(mce_banks_owned)); 331} 332 333/* 334 * Disable CMCI on this CPU for all banks it owns when it goes down. 335 * This allows other CPUs to claim the banks on rediscovery. 336 */ 337void cmci_clear(void) 338{ 339 unsigned long flags; 340 int i; 341 int banks; 342 343 if (!cmci_supported(&banks)) 344 return; 345 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 346 for (i = 0; i < banks; i++) 347 __cmci_disable_bank(i); 348 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 349} 350 351static void cmci_rediscover_work_func(void *arg) 352{ 353 int banks; 354 355 /* Recheck banks in case CPUs don't all have the same */ 356 if (cmci_supported(&banks)) 357 cmci_discover(banks); 358} 359 360/* After a CPU went down cycle through all the others and rediscover */ 361void cmci_rediscover(void) 362{ 363 int banks; 364 365 if (!cmci_supported(&banks)) 366 return; 367 368 on_each_cpu(cmci_rediscover_work_func, NULL, 1); 369} 370 371/* 372 * Reenable CMCI on this CPU in case a CPU down failed. 373 */ 374void cmci_reenable(void) 375{ 376 int banks; 377 if (cmci_supported(&banks)) 378 cmci_discover(banks); 379} 380 381void cmci_disable_bank(int bank) 382{ 383 int banks; 384 unsigned long flags; 385 386 if (!cmci_supported(&banks)) 387 return; 388 389 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 390 __cmci_disable_bank(bank); 391 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 392} 393 394static void intel_init_cmci(void) 395{ 396 int banks; 397 398 if (!cmci_supported(&banks)) 399 return; 400 401 mce_threshold_vector = intel_threshold_interrupt; 402 cmci_discover(banks); 403 /* 404 * For CPU #0 this runs with still disabled APIC, but that's 405 * ok because only the vector is set up. We still do another 406 * check for the banks later for CPU #0 just to make sure 407 * to not miss any events. 408 */ 409 apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED); 410 cmci_recheck(); 411} 412 413void mce_intel_feature_init(struct cpuinfo_x86 *c) 414{ 415 intel_init_thermal(c); 416 intel_init_cmci(); 417} 418