1/* 2 * Thermal throttle event support code (such as syslog messaging and rate 3 * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). 4 * 5 * This allows consistent reporting of CPU thermal throttle events. 6 * 7 * Maintains a counter in /sys that keeps track of the number of thermal 8 * events, such that the user knows how bad the thermal problem might be 9 * (since the logging to syslog and mcelog is rate limited). 10 * 11 * Author: Dmitriy Zavin (dmitriyz@google.com) 12 * 13 * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. 14 * Inspired by Ross Biro's and Al Borchers' counter code. 15 */ 16#include <linux/interrupt.h> 17#include <linux/notifier.h> 18#include <linux/jiffies.h> 19#include <linux/kernel.h> 20#include <linux/percpu.h> 21#include <linux/export.h> 22#include <linux/types.h> 23#include <linux/init.h> 24#include <linux/smp.h> 25#include <linux/cpu.h> 26 27#include <asm/processor.h> 28#include <asm/apic.h> 29#include <asm/idle.h> 30#include <asm/mce.h> 31#include <asm/msr.h> 32#include <asm/trace/irq_vectors.h> 33 34/* How long to wait between reporting thermal events */ 35#define CHECK_INTERVAL (300 * HZ) 36 37#define THERMAL_THROTTLING_EVENT 0 38#define POWER_LIMIT_EVENT 1 39 40/* 41 * Current thermal event state: 42 */ 43struct _thermal_state { 44 bool new_event; 45 int event; 46 u64 next_check; 47 unsigned long count; 48 unsigned long last_count; 49}; 50 51struct thermal_state { 52 struct _thermal_state core_throttle; 53 struct _thermal_state core_power_limit; 54 struct _thermal_state package_throttle; 55 struct _thermal_state package_power_limit; 56 struct _thermal_state core_thresh0; 57 struct _thermal_state core_thresh1; 58 struct _thermal_state pkg_thresh0; 59 struct _thermal_state pkg_thresh1; 60}; 61 62/* Callback to handle core threshold interrupts */ 63int (*platform_thermal_notify)(__u64 msr_val); 64EXPORT_SYMBOL(platform_thermal_notify); 65 66/* Callback to handle core package threshold_interrupts */ 67int (*platform_thermal_package_notify)(__u64 msr_val); 68EXPORT_SYMBOL_GPL(platform_thermal_package_notify); 69 70/* Callback support of rate control, return true, if 71 * callback has rate control */ 72bool (*platform_thermal_package_rate_control)(void); 73EXPORT_SYMBOL_GPL(platform_thermal_package_rate_control); 74 75 76static DEFINE_PER_CPU(struct thermal_state, thermal_state); 77 78static atomic_t therm_throt_en = ATOMIC_INIT(0); 79 80static u32 lvtthmr_init __read_mostly; 81 82#ifdef CONFIG_SYSFS 83#define define_therm_throt_device_one_ro(_name) \ 84 static DEVICE_ATTR(_name, 0444, \ 85 therm_throt_device_show_##_name, \ 86 NULL) \ 87 88#define define_therm_throt_device_show_func(event, name) \ 89 \ 90static ssize_t therm_throt_device_show_##event##_##name( \ 91 struct device *dev, \ 92 struct device_attribute *attr, \ 93 char *buf) \ 94{ \ 95 unsigned int cpu = dev->id; \ 96 ssize_t ret; \ 97 \ 98 preempt_disable(); /* CPU hotplug */ \ 99 if (cpu_online(cpu)) { \ 100 ret = sprintf(buf, "%lu\n", \ 101 per_cpu(thermal_state, cpu).event.name); \ 102 } else \ 103 ret = 0; \ 104 preempt_enable(); \ 105 \ 106 return ret; \ 107} 108 109define_therm_throt_device_show_func(core_throttle, count); 110define_therm_throt_device_one_ro(core_throttle_count); 111 112define_therm_throt_device_show_func(core_power_limit, count); 113define_therm_throt_device_one_ro(core_power_limit_count); 114 115define_therm_throt_device_show_func(package_throttle, count); 116define_therm_throt_device_one_ro(package_throttle_count); 117 118define_therm_throt_device_show_func(package_power_limit, count); 119define_therm_throt_device_one_ro(package_power_limit_count); 120 121static struct attribute *thermal_throttle_attrs[] = { 122 &dev_attr_core_throttle_count.attr, 123 NULL 124}; 125 126static struct attribute_group thermal_attr_group = { 127 .attrs = thermal_throttle_attrs, 128 .name = "thermal_throttle" 129}; 130#endif /* CONFIG_SYSFS */ 131 132#define CORE_LEVEL 0 133#define PACKAGE_LEVEL 1 134 135/*** 136 * therm_throt_process - Process thermal throttling event from interrupt 137 * @curr: Whether the condition is current or not (boolean), since the 138 * thermal interrupt normally gets called both when the thermal 139 * event begins and once the event has ended. 140 * 141 * This function is called by the thermal interrupt after the 142 * IRQ has been acknowledged. 143 * 144 * It will take care of rate limiting and printing messages to the syslog. 145 * 146 * Returns: 0 : Event should NOT be further logged, i.e. still in 147 * "timeout" from previous log message. 148 * 1 : Event should be logged further, and a message has been 149 * printed to the syslog. 150 */ 151static int therm_throt_process(bool new_event, int event, int level) 152{ 153 struct _thermal_state *state; 154 unsigned int this_cpu = smp_processor_id(); 155 bool old_event; 156 u64 now; 157 struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu); 158 159 now = get_jiffies_64(); 160 if (level == CORE_LEVEL) { 161 if (event == THERMAL_THROTTLING_EVENT) 162 state = &pstate->core_throttle; 163 else if (event == POWER_LIMIT_EVENT) 164 state = &pstate->core_power_limit; 165 else 166 return 0; 167 } else if (level == PACKAGE_LEVEL) { 168 if (event == THERMAL_THROTTLING_EVENT) 169 state = &pstate->package_throttle; 170 else if (event == POWER_LIMIT_EVENT) 171 state = &pstate->package_power_limit; 172 else 173 return 0; 174 } else 175 return 0; 176 177 old_event = state->new_event; 178 state->new_event = new_event; 179 180 if (new_event) 181 state->count++; 182 183 if (time_before64(now, state->next_check) && 184 state->count != state->last_count) 185 return 0; 186 187 state->next_check = now + CHECK_INTERVAL; 188 state->last_count = state->count; 189 190 /* if we just entered the thermal event */ 191 if (new_event) { 192 if (event == THERMAL_THROTTLING_EVENT) 193 printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n", 194 this_cpu, 195 level == CORE_LEVEL ? "Core" : "Package", 196 state->count); 197 return 1; 198 } 199 if (old_event) { 200 if (event == THERMAL_THROTTLING_EVENT) 201 printk(KERN_INFO "CPU%d: %s temperature/speed normal\n", 202 this_cpu, 203 level == CORE_LEVEL ? "Core" : "Package"); 204 return 1; 205 } 206 207 return 0; 208} 209 210static int thresh_event_valid(int level, int event) 211{ 212 struct _thermal_state *state; 213 unsigned int this_cpu = smp_processor_id(); 214 struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu); 215 u64 now = get_jiffies_64(); 216 217 if (level == PACKAGE_LEVEL) 218 state = (event == 0) ? &pstate->pkg_thresh0 : 219 &pstate->pkg_thresh1; 220 else 221 state = (event == 0) ? &pstate->core_thresh0 : 222 &pstate->core_thresh1; 223 224 if (time_before64(now, state->next_check)) 225 return 0; 226 227 state->next_check = now + CHECK_INTERVAL; 228 229 return 1; 230} 231 232static bool int_pln_enable; 233static int __init int_pln_enable_setup(char *s) 234{ 235 int_pln_enable = true; 236 237 return 1; 238} 239__setup("int_pln_enable", int_pln_enable_setup); 240 241#ifdef CONFIG_SYSFS 242/* Add/Remove thermal_throttle interface for CPU device: */ 243static int thermal_throttle_add_dev(struct device *dev, unsigned int cpu) 244{ 245 int err; 246 struct cpuinfo_x86 *c = &cpu_data(cpu); 247 248 err = sysfs_create_group(&dev->kobj, &thermal_attr_group); 249 if (err) 250 return err; 251 252 if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) 253 err = sysfs_add_file_to_group(&dev->kobj, 254 &dev_attr_core_power_limit_count.attr, 255 thermal_attr_group.name); 256 if (cpu_has(c, X86_FEATURE_PTS)) { 257 err = sysfs_add_file_to_group(&dev->kobj, 258 &dev_attr_package_throttle_count.attr, 259 thermal_attr_group.name); 260 if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) 261 err = sysfs_add_file_to_group(&dev->kobj, 262 &dev_attr_package_power_limit_count.attr, 263 thermal_attr_group.name); 264 } 265 266 return err; 267} 268 269static void thermal_throttle_remove_dev(struct device *dev) 270{ 271 sysfs_remove_group(&dev->kobj, &thermal_attr_group); 272} 273 274/* Get notified when a cpu comes on/off. Be hotplug friendly. */ 275static int 276thermal_throttle_cpu_callback(struct notifier_block *nfb, 277 unsigned long action, 278 void *hcpu) 279{ 280 unsigned int cpu = (unsigned long)hcpu; 281 struct device *dev; 282 int err = 0; 283 284 dev = get_cpu_device(cpu); 285 286 switch (action) { 287 case CPU_UP_PREPARE: 288 case CPU_UP_PREPARE_FROZEN: 289 err = thermal_throttle_add_dev(dev, cpu); 290 WARN_ON(err); 291 break; 292 case CPU_UP_CANCELED: 293 case CPU_UP_CANCELED_FROZEN: 294 case CPU_DEAD: 295 case CPU_DEAD_FROZEN: 296 thermal_throttle_remove_dev(dev); 297 break; 298 } 299 return notifier_from_errno(err); 300} 301 302static struct notifier_block thermal_throttle_cpu_notifier = 303{ 304 .notifier_call = thermal_throttle_cpu_callback, 305}; 306 307static __init int thermal_throttle_init_device(void) 308{ 309 unsigned int cpu = 0; 310 int err; 311 312 if (!atomic_read(&therm_throt_en)) 313 return 0; 314 315 cpu_notifier_register_begin(); 316 317 /* connect live CPUs to sysfs */ 318 for_each_online_cpu(cpu) { 319 err = thermal_throttle_add_dev(get_cpu_device(cpu), cpu); 320 WARN_ON(err); 321 } 322 323 __register_hotcpu_notifier(&thermal_throttle_cpu_notifier); 324 cpu_notifier_register_done(); 325 326 return 0; 327} 328device_initcall(thermal_throttle_init_device); 329 330#endif /* CONFIG_SYSFS */ 331 332static void notify_package_thresholds(__u64 msr_val) 333{ 334 bool notify_thres_0 = false; 335 bool notify_thres_1 = false; 336 337 if (!platform_thermal_package_notify) 338 return; 339 340 /* lower threshold check */ 341 if (msr_val & THERM_LOG_THRESHOLD0) 342 notify_thres_0 = true; 343 /* higher threshold check */ 344 if (msr_val & THERM_LOG_THRESHOLD1) 345 notify_thres_1 = true; 346 347 if (!notify_thres_0 && !notify_thres_1) 348 return; 349 350 if (platform_thermal_package_rate_control && 351 platform_thermal_package_rate_control()) { 352 /* Rate control is implemented in callback */ 353 platform_thermal_package_notify(msr_val); 354 return; 355 } 356 357 /* lower threshold reached */ 358 if (notify_thres_0 && thresh_event_valid(PACKAGE_LEVEL, 0)) 359 platform_thermal_package_notify(msr_val); 360 /* higher threshold reached */ 361 if (notify_thres_1 && thresh_event_valid(PACKAGE_LEVEL, 1)) 362 platform_thermal_package_notify(msr_val); 363} 364 365static void notify_thresholds(__u64 msr_val) 366{ 367 /* check whether the interrupt handler is defined; 368 * otherwise simply return 369 */ 370 if (!platform_thermal_notify) 371 return; 372 373 /* lower threshold reached */ 374 if ((msr_val & THERM_LOG_THRESHOLD0) && 375 thresh_event_valid(CORE_LEVEL, 0)) 376 platform_thermal_notify(msr_val); 377 /* higher threshold reached */ 378 if ((msr_val & THERM_LOG_THRESHOLD1) && 379 thresh_event_valid(CORE_LEVEL, 1)) 380 platform_thermal_notify(msr_val); 381} 382 383/* Thermal transition interrupt handler */ 384static void intel_thermal_interrupt(void) 385{ 386 __u64 msr_val; 387 388 rdmsrl(MSR_IA32_THERM_STATUS, msr_val); 389 390 /* Check for violation of core thermal thresholds*/ 391 notify_thresholds(msr_val); 392 393 if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, 394 THERMAL_THROTTLING_EVENT, 395 CORE_LEVEL) != 0) 396 mce_log_therm_throt_event(msr_val); 397 398 if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable) 399 therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, 400 POWER_LIMIT_EVENT, 401 CORE_LEVEL); 402 403 if (this_cpu_has(X86_FEATURE_PTS)) { 404 rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); 405 /* check violations of package thermal thresholds */ 406 notify_package_thresholds(msr_val); 407 therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, 408 THERMAL_THROTTLING_EVENT, 409 PACKAGE_LEVEL); 410 if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable) 411 therm_throt_process(msr_val & 412 PACKAGE_THERM_STATUS_POWER_LIMIT, 413 POWER_LIMIT_EVENT, 414 PACKAGE_LEVEL); 415 } 416} 417 418static void unexpected_thermal_interrupt(void) 419{ 420 printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n", 421 smp_processor_id()); 422} 423 424static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; 425 426static inline void __smp_thermal_interrupt(void) 427{ 428 inc_irq_stat(irq_thermal_count); 429 smp_thermal_vector(); 430} 431 432asmlinkage __visible void smp_thermal_interrupt(struct pt_regs *regs) 433{ 434 entering_irq(); 435 __smp_thermal_interrupt(); 436 exiting_ack_irq(); 437} 438 439asmlinkage __visible void smp_trace_thermal_interrupt(struct pt_regs *regs) 440{ 441 entering_irq(); 442 trace_thermal_apic_entry(THERMAL_APIC_VECTOR); 443 __smp_thermal_interrupt(); 444 trace_thermal_apic_exit(THERMAL_APIC_VECTOR); 445 exiting_ack_irq(); 446} 447 448/* Thermal monitoring depends on APIC, ACPI and clock modulation */ 449static int intel_thermal_supported(struct cpuinfo_x86 *c) 450{ 451 if (!cpu_has_apic) 452 return 0; 453 if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) 454 return 0; 455 return 1; 456} 457 458void __init mcheck_intel_therm_init(void) 459{ 460 /* 461 * This function is only called on boot CPU. Save the init thermal 462 * LVT value on BSP and use that value to restore APs' thermal LVT 463 * entry BIOS programmed later 464 */ 465 if (intel_thermal_supported(&boot_cpu_data)) 466 lvtthmr_init = apic_read(APIC_LVTTHMR); 467} 468 469void intel_init_thermal(struct cpuinfo_x86 *c) 470{ 471 unsigned int cpu = smp_processor_id(); 472 int tm2 = 0; 473 u32 l, h; 474 475 if (!intel_thermal_supported(c)) 476 return; 477 478 /* 479 * First check if its enabled already, in which case there might 480 * be some SMM goo which handles it, so we can't even put a handler 481 * since it might be delivered via SMI already: 482 */ 483 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 484 485 h = lvtthmr_init; 486 /* 487 * The initial value of thermal LVT entries on all APs always reads 488 * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI 489 * sequence to them and LVT registers are reset to 0s except for 490 * the mask bits which are set to 1s when APs receive INIT IPI. 491 * If BIOS takes over the thermal interrupt and sets its interrupt 492 * delivery mode to SMI (not fixed), it restores the value that the 493 * BIOS has programmed on AP based on BSP's info we saved since BIOS 494 * is always setting the same value for all threads/cores. 495 */ 496 if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED) 497 apic_write(APIC_LVTTHMR, lvtthmr_init); 498 499 500 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { 501 if (system_state == SYSTEM_BOOTING) 502 printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu); 503 return; 504 } 505 506 /* Check whether a vector already exists */ 507 if (h & APIC_VECTOR_MASK) { 508 printk(KERN_DEBUG 509 "CPU%d: Thermal LVT vector (%#x) already installed\n", 510 cpu, (h & APIC_VECTOR_MASK)); 511 return; 512 } 513 514 /* early Pentium M models use different method for enabling TM2 */ 515 if (cpu_has(c, X86_FEATURE_TM2)) { 516 if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) { 517 rdmsr(MSR_THERM2_CTL, l, h); 518 if (l & MSR_THERM2_CTL_TM_SELECT) 519 tm2 = 1; 520 } else if (l & MSR_IA32_MISC_ENABLE_TM2) 521 tm2 = 1; 522 } 523 524 /* We'll mask the thermal vector in the lapic till we're ready: */ 525 h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; 526 apic_write(APIC_LVTTHMR, h); 527 528 rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); 529 if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable) 530 wrmsr(MSR_IA32_THERM_INTERRUPT, 531 (l | (THERM_INT_LOW_ENABLE 532 | THERM_INT_HIGH_ENABLE)) & ~THERM_INT_PLN_ENABLE, h); 533 else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) 534 wrmsr(MSR_IA32_THERM_INTERRUPT, 535 l | (THERM_INT_LOW_ENABLE 536 | THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h); 537 else 538 wrmsr(MSR_IA32_THERM_INTERRUPT, 539 l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); 540 541 if (cpu_has(c, X86_FEATURE_PTS)) { 542 rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); 543 if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable) 544 wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, 545 (l | (PACKAGE_THERM_INT_LOW_ENABLE 546 | PACKAGE_THERM_INT_HIGH_ENABLE)) 547 & ~PACKAGE_THERM_INT_PLN_ENABLE, h); 548 else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) 549 wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, 550 l | (PACKAGE_THERM_INT_LOW_ENABLE 551 | PACKAGE_THERM_INT_HIGH_ENABLE 552 | PACKAGE_THERM_INT_PLN_ENABLE), h); 553 else 554 wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, 555 l | (PACKAGE_THERM_INT_LOW_ENABLE 556 | PACKAGE_THERM_INT_HIGH_ENABLE), h); 557 } 558 559 smp_thermal_vector = intel_thermal_interrupt; 560 561 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 562 wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); 563 564 /* Unmask the thermal vector: */ 565 l = apic_read(APIC_LVTTHMR); 566 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); 567 568 printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n", 569 tm2 ? "TM2" : "TM1"); 570 571 /* enable thermal throttle processing */ 572 atomic_set(&therm_throt_en, 1); 573} 574