1/* 2 * Intel specific MCE features. 3 * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> 4 * Copyright (C) 2008, 2009 Intel Corporation 5 * Author: Andi Kleen 6 */ 7 8#include <linux/gfp.h> 9#include <linux/interrupt.h> 10#include <linux/percpu.h> 11#include <linux/sched.h> 12#include <linux/cpumask.h> 13#include <asm/apic.h> 14#include <asm/processor.h> 15#include <asm/msr.h> 16#include <asm/mce.h> 17 18#include "mce-internal.h" 19 20/* 21 * Support for Intel Correct Machine Check Interrupts. This allows 22 * the CPU to raise an interrupt when a corrected machine check happened. 23 * Normally we pick those up using a regular polling timer. 24 * Also supports reliable discovery of shared banks. 25 */ 26 27/* 28 * CMCI can be delivered to multiple cpus that share a machine check bank 29 * so we need to designate a single cpu to process errors logged in each bank 30 * in the interrupt handler (otherwise we would have many races and potential 31 * double reporting of the same error). 32 * Note that this can change when a cpu is offlined or brought online since 33 * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear() 34 * disables CMCI on all banks owned by the cpu and clears this bitfield. At 35 * this point, cmci_rediscover() kicks in and a different cpu may end up 36 * taking ownership of some of the shared MCA banks that were previously 37 * owned by the offlined cpu. 38 */ 39static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); 40 41/* 42 * CMCI storm detection backoff counter 43 * 44 * During storm, we reset this counter to INITIAL_CHECK_INTERVAL in case we've 45 * encountered an error. If not, we decrement it by one. We signal the end of 46 * the CMCI storm when it reaches 0. 47 */ 48static DEFINE_PER_CPU(int, cmci_backoff_cnt); 49 50/* 51 * cmci_discover_lock protects against parallel discovery attempts 52 * which could race against each other. 53 */ 54static DEFINE_RAW_SPINLOCK(cmci_discover_lock); 55 56#define CMCI_THRESHOLD 1 57#define CMCI_POLL_INTERVAL (30 * HZ) 58#define CMCI_STORM_INTERVAL (HZ) 59#define CMCI_STORM_THRESHOLD 15 60 61static DEFINE_PER_CPU(unsigned long, cmci_time_stamp); 62static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt); 63static DEFINE_PER_CPU(unsigned int, cmci_storm_state); 64 65enum { 66 CMCI_STORM_NONE, 67 CMCI_STORM_ACTIVE, 68 CMCI_STORM_SUBSIDED, 69}; 70 71static atomic_t cmci_storm_on_cpus; 72 73static int cmci_supported(int *banks) 74{ 75 u64 cap; 76 77 if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce) 78 return 0; 79 80 /* 81 * Vendor check is not strictly needed, but the initial 82 * initialization is vendor keyed and this 83 * makes sure none of the backdoors are entered otherwise. 84 */ 85 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) 86 return 0; 87 if (!cpu_has_apic || lapic_get_maxlvt() < 6) 88 return 0; 89 rdmsrl(MSR_IA32_MCG_CAP, cap); 90 *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); 91 return !!(cap & MCG_CMCI_P); 92} 93 94static bool lmce_supported(void) 95{ 96 u64 tmp; 97 98 if (mca_cfg.lmce_disabled) 99 return false; 100 101 rdmsrl(MSR_IA32_MCG_CAP, tmp); 102 103 /* 104 * LMCE depends on recovery support in the processor. Hence both 105 * MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP. 106 */ 107 if ((tmp & (MCG_SER_P | MCG_LMCE_P)) != 108 (MCG_SER_P | MCG_LMCE_P)) 109 return false; 110 111 /* 112 * BIOS should indicate support for LMCE by setting bit 20 in 113 * IA32_FEATURE_CONTROL without which touching MCG_EXT_CTL will 114 * generate a #GP fault. 115 */ 116 rdmsrl(MSR_IA32_FEATURE_CONTROL, tmp); 117 if ((tmp & (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) == 118 (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) 119 return true; 120 121 return false; 122} 123 124bool mce_intel_cmci_poll(void) 125{ 126 if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) 127 return false; 128 129 /* 130 * Reset the counter if we've logged an error in the last poll 131 * during the storm. 132 */ 133 if (machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned))) 134 this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL); 135 else 136 this_cpu_dec(cmci_backoff_cnt); 137 138 return true; 139} 140 141void mce_intel_hcpu_update(unsigned long cpu) 142{ 143 if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE) 144 atomic_dec(&cmci_storm_on_cpus); 145 146 per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; 147} 148 149static void cmci_toggle_interrupt_mode(bool on) 150{ 151 unsigned long flags, *owned; 152 int bank; 153 u64 val; 154 155 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 156 owned = this_cpu_ptr(mce_banks_owned); 157 for_each_set_bit(bank, owned, MAX_NR_BANKS) { 158 rdmsrl(MSR_IA32_MCx_CTL2(bank), val); 159 160 if (on) 161 val |= MCI_CTL2_CMCI_EN; 162 else 163 val &= ~MCI_CTL2_CMCI_EN; 164 165 wrmsrl(MSR_IA32_MCx_CTL2(bank), val); 166 } 167 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 168} 169 170unsigned long cmci_intel_adjust_timer(unsigned long interval) 171{ 172 if ((this_cpu_read(cmci_backoff_cnt) > 0) && 173 (__this_cpu_read(cmci_storm_state) == CMCI_STORM_ACTIVE)) { 174 mce_notify_irq(); 175 return CMCI_STORM_INTERVAL; 176 } 177 178 switch (__this_cpu_read(cmci_storm_state)) { 179 case CMCI_STORM_ACTIVE: 180 181 /* 182 * We switch back to interrupt mode once the poll timer has 183 * silenced itself. That means no events recorded and the timer 184 * interval is back to our poll interval. 185 */ 186 __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED); 187 if (!atomic_sub_return(1, &cmci_storm_on_cpus)) 188 pr_notice("CMCI storm subsided: switching to interrupt mode\n"); 189 190 /* FALLTHROUGH */ 191 192 case CMCI_STORM_SUBSIDED: 193 /* 194 * We wait for all CPUs to go back to SUBSIDED state. When that 195 * happens we switch back to interrupt mode. 196 */ 197 if (!atomic_read(&cmci_storm_on_cpus)) { 198 __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); 199 cmci_toggle_interrupt_mode(true); 200 cmci_recheck(); 201 } 202 return CMCI_POLL_INTERVAL; 203 default: 204 205 /* We have shiny weather. Let the poll do whatever it thinks. */ 206 return interval; 207 } 208} 209 210static bool cmci_storm_detect(void) 211{ 212 unsigned int cnt = __this_cpu_read(cmci_storm_cnt); 213 unsigned long ts = __this_cpu_read(cmci_time_stamp); 214 unsigned long now = jiffies; 215 int r; 216 217 if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE) 218 return true; 219 220 if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) { 221 cnt++; 222 } else { 223 cnt = 1; 224 __this_cpu_write(cmci_time_stamp, now); 225 } 226 __this_cpu_write(cmci_storm_cnt, cnt); 227 228 if (cnt <= CMCI_STORM_THRESHOLD) 229 return false; 230 231 cmci_toggle_interrupt_mode(false); 232 __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); 233 r = atomic_add_return(1, &cmci_storm_on_cpus); 234 mce_timer_kick(CMCI_STORM_INTERVAL); 235 this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL); 236 237 if (r == 1) 238 pr_notice("CMCI storm detected: switching to poll mode\n"); 239 return true; 240} 241 242/* 243 * The interrupt handler. This is called on every event. 244 * Just call the poller directly to log any events. 245 * This could in theory increase the threshold under high load, 246 * but doesn't for now. 247 */ 248static void intel_threshold_interrupt(void) 249{ 250 if (cmci_storm_detect()) 251 return; 252 253 machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); 254} 255 256/* 257 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks 258 * on this CPU. Use the algorithm recommended in the SDM to discover shared 259 * banks. 260 */ 261static void cmci_discover(int banks) 262{ 263 unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned); 264 unsigned long flags; 265 int i; 266 int bios_wrong_thresh = 0; 267 268 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 269 for (i = 0; i < banks; i++) { 270 u64 val; 271 int bios_zero_thresh = 0; 272 273 if (test_bit(i, owned)) 274 continue; 275 276 /* Skip banks in firmware first mode */ 277 if (test_bit(i, mce_banks_ce_disabled)) 278 continue; 279 280 rdmsrl(MSR_IA32_MCx_CTL2(i), val); 281 282 /* Already owned by someone else? */ 283 if (val & MCI_CTL2_CMCI_EN) { 284 clear_bit(i, owned); 285 __clear_bit(i, this_cpu_ptr(mce_poll_banks)); 286 continue; 287 } 288 289 if (!mca_cfg.bios_cmci_threshold) { 290 val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; 291 val |= CMCI_THRESHOLD; 292 } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) { 293 /* 294 * If bios_cmci_threshold boot option was specified 295 * but the threshold is zero, we'll try to initialize 296 * it to 1. 297 */ 298 bios_zero_thresh = 1; 299 val |= CMCI_THRESHOLD; 300 } 301 302 val |= MCI_CTL2_CMCI_EN; 303 wrmsrl(MSR_IA32_MCx_CTL2(i), val); 304 rdmsrl(MSR_IA32_MCx_CTL2(i), val); 305 306 /* Did the enable bit stick? -- the bank supports CMCI */ 307 if (val & MCI_CTL2_CMCI_EN) { 308 set_bit(i, owned); 309 __clear_bit(i, this_cpu_ptr(mce_poll_banks)); 310 /* 311 * We are able to set thresholds for some banks that 312 * had a threshold of 0. This means the BIOS has not 313 * set the thresholds properly or does not work with 314 * this boot option. Note down now and report later. 315 */ 316 if (mca_cfg.bios_cmci_threshold && bios_zero_thresh && 317 (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) 318 bios_wrong_thresh = 1; 319 } else { 320 WARN_ON(!test_bit(i, this_cpu_ptr(mce_poll_banks))); 321 } 322 } 323 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 324 if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) { 325 pr_info_once( 326 "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); 327 pr_info_once( 328 "bios_cmci_threshold: Make sure your BIOS supports this boot option\n"); 329 } 330} 331 332/* 333 * Just in case we missed an event during initialization check 334 * all the CMCI owned banks. 335 */ 336void cmci_recheck(void) 337{ 338 unsigned long flags; 339 int banks; 340 341 if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks)) 342 return; 343 344 local_irq_save(flags); 345 machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); 346 local_irq_restore(flags); 347} 348 349/* Caller must hold the lock on cmci_discover_lock */ 350static void __cmci_disable_bank(int bank) 351{ 352 u64 val; 353 354 if (!test_bit(bank, this_cpu_ptr(mce_banks_owned))) 355 return; 356 rdmsrl(MSR_IA32_MCx_CTL2(bank), val); 357 val &= ~MCI_CTL2_CMCI_EN; 358 wrmsrl(MSR_IA32_MCx_CTL2(bank), val); 359 __clear_bit(bank, this_cpu_ptr(mce_banks_owned)); 360} 361 362/* 363 * Disable CMCI on this CPU for all banks it owns when it goes down. 364 * This allows other CPUs to claim the banks on rediscovery. 365 */ 366void cmci_clear(void) 367{ 368 unsigned long flags; 369 int i; 370 int banks; 371 372 if (!cmci_supported(&banks)) 373 return; 374 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 375 for (i = 0; i < banks; i++) 376 __cmci_disable_bank(i); 377 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 378} 379 380static void cmci_rediscover_work_func(void *arg) 381{ 382 int banks; 383 384 /* Recheck banks in case CPUs don't all have the same */ 385 if (cmci_supported(&banks)) 386 cmci_discover(banks); 387} 388 389/* After a CPU went down cycle through all the others and rediscover */ 390void cmci_rediscover(void) 391{ 392 int banks; 393 394 if (!cmci_supported(&banks)) 395 return; 396 397 on_each_cpu(cmci_rediscover_work_func, NULL, 1); 398} 399 400/* 401 * Reenable CMCI on this CPU in case a CPU down failed. 402 */ 403void cmci_reenable(void) 404{ 405 int banks; 406 if (cmci_supported(&banks)) 407 cmci_discover(banks); 408} 409 410void cmci_disable_bank(int bank) 411{ 412 int banks; 413 unsigned long flags; 414 415 if (!cmci_supported(&banks)) 416 return; 417 418 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 419 __cmci_disable_bank(bank); 420 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 421} 422 423static void intel_init_cmci(void) 424{ 425 int banks; 426 427 if (!cmci_supported(&banks)) 428 return; 429 430 mce_threshold_vector = intel_threshold_interrupt; 431 cmci_discover(banks); 432 /* 433 * For CPU #0 this runs with still disabled APIC, but that's 434 * ok because only the vector is set up. We still do another 435 * check for the banks later for CPU #0 just to make sure 436 * to not miss any events. 437 */ 438 apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED); 439 cmci_recheck(); 440} 441 442static void intel_init_lmce(void) 443{ 444 u64 val; 445 446 if (!lmce_supported()) 447 return; 448 449 rdmsrl(MSR_IA32_MCG_EXT_CTL, val); 450 451 if (!(val & MCG_EXT_CTL_LMCE_EN)) 452 wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN); 453} 454 455static void intel_clear_lmce(void) 456{ 457 u64 val; 458 459 if (!lmce_supported()) 460 return; 461 462 rdmsrl(MSR_IA32_MCG_EXT_CTL, val); 463 val &= ~MCG_EXT_CTL_LMCE_EN; 464 wrmsrl(MSR_IA32_MCG_EXT_CTL, val); 465} 466 467void mce_intel_feature_init(struct cpuinfo_x86 *c) 468{ 469 intel_init_thermal(c); 470 intel_init_cmci(); 471 intel_init_lmce(); 472} 473 474void mce_intel_feature_clear(struct cpuinfo_x86 *c) 475{ 476 intel_clear_lmce(); 477} 478