1#include <linux/export.h> 2#include <linux/bitops.h> 3#include <linux/elf.h> 4#include <linux/mm.h> 5 6#include <linux/io.h> 7#include <linux/sched.h> 8#include <linux/random.h> 9#include <asm/processor.h> 10#include <asm/apic.h> 11#include <asm/cpu.h> 12#include <asm/smp.h> 13#include <asm/pci-direct.h> 14 15#ifdef CONFIG_X86_64 16# include <asm/mmconfig.h> 17# include <asm/cacheflush.h> 18#endif 19 20#include "cpu.h" 21 22static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) 23{ 24 u32 gprs[8] = { 0 }; 25 int err; 26 27 WARN_ONCE((boot_cpu_data.x86 != 0xf), 28 "%s should only be used on K8!\n", __func__); 29 30 gprs[1] = msr; 31 gprs[7] = 0x9c5a203a; 32 33 err = rdmsr_safe_regs(gprs); 34 35 *p = gprs[0] | ((u64)gprs[2] << 32); 36 37 return err; 38} 39 40static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) 41{ 42 u32 gprs[8] = { 0 }; 43 44 WARN_ONCE((boot_cpu_data.x86 != 0xf), 45 "%s should only be used on K8!\n", __func__); 46 47 gprs[0] = (u32)val; 48 gprs[1] = msr; 49 gprs[2] = val >> 32; 50 gprs[7] = 0x9c5a203a; 51 52 return wrmsr_safe_regs(gprs); 53} 54 55/* 56 * B step AMD K6 before B 9730xxxx have hardware bugs that can cause 57 * misexecution of code under Linux. Owners of such processors should 58 * contact AMD for precise details and a CPU swap. 59 * 60 * See http://www.multimania.com/poulot/k6bug.html 61 * and section 2.6.2 of "AMD-K6 Processor Revision Guide - Model 6" 62 * (Publication # 21266 Issue Date: August 1998) 63 * 64 * The following test is erm.. interesting. AMD neglected to up 65 * the chip setting when fixing the bug but they also tweaked some 66 * performance at the same time.. 67 */ 68 69extern __visible void vide(void); 70__asm__(".globl vide\n\t.align 4\nvide: ret"); 71 72static void init_amd_k5(struct cpuinfo_x86 *c) 73{ 74#ifdef CONFIG_X86_32 75/* 76 * General Systems BIOSen alias the cpu frequency registers 77 * of the Elan at 0x000df000. Unfortuantly, one of the Linux 78 * drivers subsequently pokes it, and changes the CPU speed. 79 * Workaround : Remove the unneeded alias. 80 */ 81#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ 82#define CBAR_ENB (0x80000000) 83#define CBAR_KEY (0X000000CB) 84 if (c->x86_model == 9 || c->x86_model == 10) { 85 if (inl(CBAR) & CBAR_ENB) 86 outl(0 | CBAR_KEY, CBAR); 87 } 88#endif 89} 90 91static void init_amd_k6(struct cpuinfo_x86 *c) 92{ 93#ifdef CONFIG_X86_32 94 u32 l, h; 95 int mbytes = get_num_physpages() >> (20-PAGE_SHIFT); 96 97 if (c->x86_model < 6) { 98 /* Based on AMD doc 20734R - June 2000 */ 99 if (c->x86_model == 0) { 100 clear_cpu_cap(c, X86_FEATURE_APIC); 101 set_cpu_cap(c, X86_FEATURE_PGE); 102 } 103 return; 104 } 105 106 if (c->x86_model == 6 && c->x86_mask == 1) { 107 const int K6_BUG_LOOP = 1000000; 108 int n; 109 void (*f_vide)(void); 110 unsigned long d, d2; 111 112 printk(KERN_INFO "AMD K6 stepping B detected - "); 113 114 /* 115 * It looks like AMD fixed the 2.6.2 bug and improved indirect 116 * calls at the same time. 117 */ 118 119 n = K6_BUG_LOOP; 120 f_vide = vide; 121 rdtscl(d); 122 while (n--) 123 f_vide(); 124 rdtscl(d2); 125 d = d2-d; 126 127 if (d > 20*K6_BUG_LOOP) 128 printk(KERN_CONT 129 "system stability may be impaired when more than 32 MB are used.\n"); 130 else 131 printk(KERN_CONT "probably OK (after B9730xxxx).\n"); 132 } 133 134 /* K6 with old style WHCR */ 135 if (c->x86_model < 8 || 136 (c->x86_model == 8 && c->x86_mask < 8)) { 137 /* We can only write allocate on the low 508Mb */ 138 if (mbytes > 508) 139 mbytes = 508; 140 141 rdmsr(MSR_K6_WHCR, l, h); 142 if ((l&0x0000FFFF) == 0) { 143 unsigned long flags; 144 l = (1<<0)|((mbytes/4)<<1); 145 local_irq_save(flags); 146 wbinvd(); 147 wrmsr(MSR_K6_WHCR, l, h); 148 local_irq_restore(flags); 149 printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", 150 mbytes); 151 } 152 return; 153 } 154 155 if ((c->x86_model == 8 && c->x86_mask > 7) || 156 c->x86_model == 9 || c->x86_model == 13) { 157 /* The more serious chips .. */ 158 159 if (mbytes > 4092) 160 mbytes = 4092; 161 162 rdmsr(MSR_K6_WHCR, l, h); 163 if ((l&0xFFFF0000) == 0) { 164 unsigned long flags; 165 l = ((mbytes>>2)<<22)|(1<<16); 166 local_irq_save(flags); 167 wbinvd(); 168 wrmsr(MSR_K6_WHCR, l, h); 169 local_irq_restore(flags); 170 printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", 171 mbytes); 172 } 173 174 return; 175 } 176 177 if (c->x86_model == 10) { 178 /* AMD Geode LX is model 10 */ 179 /* placeholder for any needed mods */ 180 return; 181 } 182#endif 183} 184 185static void init_amd_k7(struct cpuinfo_x86 *c) 186{ 187#ifdef CONFIG_X86_32 188 u32 l, h; 189 190 /* 191 * Bit 15 of Athlon specific MSR 15, needs to be 0 192 * to enable SSE on Palomino/Morgan/Barton CPU's. 193 * If the BIOS didn't enable it already, enable it here. 194 */ 195 if (c->x86_model >= 6 && c->x86_model <= 10) { 196 if (!cpu_has(c, X86_FEATURE_XMM)) { 197 printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); 198 msr_clear_bit(MSR_K7_HWCR, 15); 199 set_cpu_cap(c, X86_FEATURE_XMM); 200 } 201 } 202 203 /* 204 * It's been determined by AMD that Athlons since model 8 stepping 1 205 * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx 206 * As per AMD technical note 27212 0.2 207 */ 208 if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { 209 rdmsr(MSR_K7_CLK_CTL, l, h); 210 if ((l & 0xfff00000) != 0x20000000) { 211 printk(KERN_INFO 212 "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", 213 l, ((l & 0x000fffff)|0x20000000)); 214 wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); 215 } 216 } 217 218 set_cpu_cap(c, X86_FEATURE_K7); 219 220 /* calling is from identify_secondary_cpu() ? */ 221 if (!c->cpu_index) 222 return; 223 224 /* 225 * Certain Athlons might work (for various values of 'work') in SMP 226 * but they are not certified as MP capable. 227 */ 228 /* Athlon 660/661 is valid. */ 229 if ((c->x86_model == 6) && ((c->x86_mask == 0) || 230 (c->x86_mask == 1))) 231 return; 232 233 /* Duron 670 is valid */ 234 if ((c->x86_model == 7) && (c->x86_mask == 0)) 235 return; 236 237 /* 238 * Athlon 662, Duron 671, and Athlon >model 7 have capability 239 * bit. It's worth noting that the A5 stepping (662) of some 240 * Athlon XP's have the MP bit set. 241 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for 242 * more. 243 */ 244 if (((c->x86_model == 6) && (c->x86_mask >= 2)) || 245 ((c->x86_model == 7) && (c->x86_mask >= 1)) || 246 (c->x86_model > 7)) 247 if (cpu_has(c, X86_FEATURE_MP)) 248 return; 249 250 /* If we get here, not a certified SMP capable AMD system. */ 251 252 /* 253 * Don't taint if we are running SMP kernel on a single non-MP 254 * approved Athlon 255 */ 256 WARN_ONCE(1, "WARNING: This combination of AMD" 257 " processors is not suitable for SMP.\n"); 258 add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE); 259#endif 260} 261 262#ifdef CONFIG_NUMA 263/* 264 * To workaround broken NUMA config. Read the comment in 265 * srat_detect_node(). 266 */ 267static int nearby_node(int apicid) 268{ 269 int i, node; 270 271 for (i = apicid - 1; i >= 0; i--) { 272 node = __apicid_to_node[i]; 273 if (node != NUMA_NO_NODE && node_online(node)) 274 return node; 275 } 276 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { 277 node = __apicid_to_node[i]; 278 if (node != NUMA_NO_NODE && node_online(node)) 279 return node; 280 } 281 return first_node(node_online_map); /* Shouldn't happen */ 282} 283#endif 284 285/* 286 * Fixup core topology information for 287 * (1) AMD multi-node processors 288 * Assumption: Number of cores in each internal node is the same. 289 * (2) AMD processors supporting compute units 290 */ 291#ifdef CONFIG_X86_HT 292static void amd_get_topology(struct cpuinfo_x86 *c) 293{ 294 u32 nodes, cores_per_cu = 1; 295 u8 node_id; 296 int cpu = smp_processor_id(); 297 298 /* get information required for multi-node processors */ 299 if (cpu_has_topoext) { 300 u32 eax, ebx, ecx, edx; 301 302 cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); 303 nodes = ((ecx >> 8) & 7) + 1; 304 node_id = ecx & 7; 305 306 /* get compute unit information */ 307 smp_num_siblings = ((ebx >> 8) & 3) + 1; 308 c->compute_unit_id = ebx & 0xff; 309 cores_per_cu += ((ebx >> 8) & 3); 310 } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { 311 u64 value; 312 313 rdmsrl(MSR_FAM10H_NODE_ID, value); 314 nodes = ((value >> 3) & 7) + 1; 315 node_id = value & 7; 316 } else 317 return; 318 319 /* fixup multi-node processor information */ 320 if (nodes > 1) { 321 u32 cores_per_node; 322 u32 cus_per_node; 323 324 set_cpu_cap(c, X86_FEATURE_AMD_DCM); 325 cores_per_node = c->x86_max_cores / nodes; 326 cus_per_node = cores_per_node / cores_per_cu; 327 328 /* store NodeID, use llc_shared_map to store sibling info */ 329 per_cpu(cpu_llc_id, cpu) = node_id; 330 331 /* core id has to be in the [0 .. cores_per_node - 1] range */ 332 c->cpu_core_id %= cores_per_node; 333 c->compute_unit_id %= cus_per_node; 334 } 335} 336#endif 337 338/* 339 * On a AMD dual core setup the lower bits of the APIC id distinguish the cores. 340 * Assumes number of cores is a power of two. 341 */ 342static void amd_detect_cmp(struct cpuinfo_x86 *c) 343{ 344#ifdef CONFIG_X86_HT 345 unsigned bits; 346 int cpu = smp_processor_id(); 347 348 bits = c->x86_coreid_bits; 349 /* Low order bits define the core id (index of core in socket) */ 350 c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); 351 /* Convert the initial APIC ID into the socket ID */ 352 c->phys_proc_id = c->initial_apicid >> bits; 353 /* use socket ID also for last level cache */ 354 per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; 355 amd_get_topology(c); 356#endif 357} 358 359u16 amd_get_nb_id(int cpu) 360{ 361 u16 id = 0; 362#ifdef CONFIG_SMP 363 id = per_cpu(cpu_llc_id, cpu); 364#endif 365 return id; 366} 367EXPORT_SYMBOL_GPL(amd_get_nb_id); 368 369static void srat_detect_node(struct cpuinfo_x86 *c) 370{ 371#ifdef CONFIG_NUMA 372 int cpu = smp_processor_id(); 373 int node; 374 unsigned apicid = c->apicid; 375 376 node = numa_cpu_node(cpu); 377 if (node == NUMA_NO_NODE) 378 node = per_cpu(cpu_llc_id, cpu); 379 380 /* 381 * On multi-fabric platform (e.g. Numascale NumaChip) a 382 * platform-specific handler needs to be called to fixup some 383 * IDs of the CPU. 384 */ 385 if (x86_cpuinit.fixup_cpu_id) 386 x86_cpuinit.fixup_cpu_id(c, node); 387 388 if (!node_online(node)) { 389 /* 390 * Two possibilities here: 391 * 392 * - The CPU is missing memory and no node was created. In 393 * that case try picking one from a nearby CPU. 394 * 395 * - The APIC IDs differ from the HyperTransport node IDs 396 * which the K8 northbridge parsing fills in. Assume 397 * they are all increased by a constant offset, but in 398 * the same order as the HT nodeids. If that doesn't 399 * result in a usable node fall back to the path for the 400 * previous case. 401 * 402 * This workaround operates directly on the mapping between 403 * APIC ID and NUMA node, assuming certain relationship 404 * between APIC ID, HT node ID and NUMA topology. As going 405 * through CPU mapping may alter the outcome, directly 406 * access __apicid_to_node[]. 407 */ 408 int ht_nodeid = c->initial_apicid; 409 410 if (ht_nodeid >= 0 && 411 __apicid_to_node[ht_nodeid] != NUMA_NO_NODE) 412 node = __apicid_to_node[ht_nodeid]; 413 /* Pick a nearby node */ 414 if (!node_online(node)) 415 node = nearby_node(apicid); 416 } 417 numa_set_node(cpu, node); 418#endif 419} 420 421static void early_init_amd_mc(struct cpuinfo_x86 *c) 422{ 423#ifdef CONFIG_X86_HT 424 unsigned bits, ecx; 425 426 /* Multi core CPU? */ 427 if (c->extended_cpuid_level < 0x80000008) 428 return; 429 430 ecx = cpuid_ecx(0x80000008); 431 432 c->x86_max_cores = (ecx & 0xff) + 1; 433 434 /* CPU telling us the core id bits shift? */ 435 bits = (ecx >> 12) & 0xF; 436 437 /* Otherwise recompute */ 438 if (bits == 0) { 439 while ((1 << bits) < c->x86_max_cores) 440 bits++; 441 } 442 443 c->x86_coreid_bits = bits; 444#endif 445} 446 447static void bsp_init_amd(struct cpuinfo_x86 *c) 448{ 449 450#ifdef CONFIG_X86_64 451 if (c->x86 >= 0xf) { 452 unsigned long long tseg; 453 454 /* 455 * Split up direct mapping around the TSEG SMM area. 456 * Don't do it for gbpages because there seems very little 457 * benefit in doing so. 458 */ 459 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { 460 unsigned long pfn = tseg >> PAGE_SHIFT; 461 462 printk(KERN_DEBUG "tseg: %010llx\n", tseg); 463 if (pfn_range_is_mapped(pfn, pfn + 1)) 464 set_memory_4k((unsigned long)__va(tseg), 1); 465 } 466 } 467#endif 468 469 if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { 470 471 if (c->x86 > 0x10 || 472 (c->x86 == 0x10 && c->x86_model >= 0x2)) { 473 u64 val; 474 475 rdmsrl(MSR_K7_HWCR, val); 476 if (!(val & BIT(24))) 477 printk(KERN_WARNING FW_BUG "TSC doesn't count " 478 "with P0 frequency!\n"); 479 } 480 } 481 482 if (c->x86 == 0x15) { 483 unsigned long upperbit; 484 u32 cpuid, assoc; 485 486 cpuid = cpuid_edx(0x80000005); 487 assoc = cpuid >> 16 & 0xff; 488 upperbit = ((cpuid >> 24) << 10) / assoc; 489 490 va_align.mask = (upperbit - 1) & PAGE_MASK; 491 va_align.flags = ALIGN_VA_32 | ALIGN_VA_64; 492 493 /* A random value per boot for bit slice [12:upper_bit) */ 494 va_align.bits = get_random_int() & va_align.mask; 495 } 496} 497 498static void early_init_amd(struct cpuinfo_x86 *c) 499{ 500 early_init_amd_mc(c); 501 502 /* 503 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate 504 * with P/T states and does not stop in deep C-states 505 */ 506 if (c->x86_power & (1 << 8)) { 507 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 508 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); 509 if (!check_tsc_unstable()) 510 set_sched_clock_stable(); 511 } 512 513#ifdef CONFIG_X86_64 514 set_cpu_cap(c, X86_FEATURE_SYSCALL32); 515#else 516 /* Set MTRR capability flag if appropriate */ 517 if (c->x86 == 5) 518 if (c->x86_model == 13 || c->x86_model == 9 || 519 (c->x86_model == 8 && c->x86_mask >= 8)) 520 set_cpu_cap(c, X86_FEATURE_K6_MTRR); 521#endif 522#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI) 523 /* check CPU config space for extended APIC ID */ 524 if (cpu_has_apic && c->x86 >= 0xf) { 525 unsigned int val; 526 val = read_pci_config(0, 24, 0, 0x68); 527 if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18))) 528 set_cpu_cap(c, X86_FEATURE_EXTD_APICID); 529 } 530#endif 531 532 /* 533 * This is only needed to tell the kernel whether to use VMCALL 534 * and VMMCALL. VMMCALL is never executed except under virt, so 535 * we can set it unconditionally. 536 */ 537 set_cpu_cap(c, X86_FEATURE_VMMCALL); 538 539 /* F16h erratum 793, CVE-2013-6885 */ 540 if (c->x86 == 0x16 && c->x86_model <= 0xf) 541 msr_set_bit(MSR_AMD64_LS_CFG, 15); 542} 543 544static const int amd_erratum_383[]; 545static const int amd_erratum_400[]; 546static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum); 547 548static void init_amd_k8(struct cpuinfo_x86 *c) 549{ 550 u32 level; 551 u64 value; 552 553 /* On C+ stepping K8 rep microcode works well for copy/memset */ 554 level = cpuid_eax(1); 555 if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) 556 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 557 558 /* 559 * Some BIOSes incorrectly force this feature, but only K8 revision D 560 * (model = 0x14) and later actually support it. 561 * (AMD Erratum #110, docId: 25759). 562 */ 563 if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) { 564 clear_cpu_cap(c, X86_FEATURE_LAHF_LM); 565 if (!rdmsrl_amd_safe(0xc001100d, &value)) { 566 value &= ~BIT_64(32); 567 wrmsrl_amd_safe(0xc001100d, value); 568 } 569 } 570 571 if (!c->x86_model_id[0]) 572 strcpy(c->x86_model_id, "Hammer"); 573 574#ifdef CONFIG_SMP 575 /* 576 * Disable TLB flush filter by setting HWCR.FFDIS on K8 577 * bit 6 of msr C001_0015 578 * 579 * Errata 63 for SH-B3 steppings 580 * Errata 122 for all steppings (F+ have it disabled by default) 581 */ 582 msr_set_bit(MSR_K7_HWCR, 6); 583#endif 584} 585 586static void init_amd_gh(struct cpuinfo_x86 *c) 587{ 588#ifdef CONFIG_X86_64 589 /* do this for boot cpu */ 590 if (c == &boot_cpu_data) 591 check_enable_amd_mmconf_dmi(); 592 593 fam10h_check_enable_mmcfg(); 594#endif 595 596 /* 597 * Disable GART TLB Walk Errors on Fam10h. We do this here because this 598 * is always needed when GART is enabled, even in a kernel which has no 599 * MCE support built in. BIOS should disable GartTlbWlk Errors already. 600 * If it doesn't, we do it here as suggested by the BKDG. 601 * 602 * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012 603 */ 604 msr_set_bit(MSR_AMD64_MCx_MASK(4), 10); 605 606 /* 607 * On family 10h BIOS may not have properly enabled WC+ support, causing 608 * it to be converted to CD memtype. This may result in performance 609 * degradation for certain nested-paging guests. Prevent this conversion 610 * by clearing bit 24 in MSR_AMD64_BU_CFG2. 611 * 612 * NOTE: we want to use the _safe accessors so as not to #GP kvm 613 * guests on older kvm hosts. 614 */ 615 msr_clear_bit(MSR_AMD64_BU_CFG2, 24); 616 617 if (cpu_has_amd_erratum(c, amd_erratum_383)) 618 set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH); 619} 620 621static void init_amd_bd(struct cpuinfo_x86 *c) 622{ 623 u64 value; 624 625 /* re-enable TopologyExtensions if switched off by BIOS */ 626 if ((c->x86_model >= 0x10) && (c->x86_model <= 0x1f) && 627 !cpu_has(c, X86_FEATURE_TOPOEXT)) { 628 629 if (msr_set_bit(0xc0011005, 54) > 0) { 630 rdmsrl(0xc0011005, value); 631 if (value & BIT_64(54)) { 632 set_cpu_cap(c, X86_FEATURE_TOPOEXT); 633 pr_info(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n"); 634 } 635 } 636 } 637 638 /* 639 * The way access filter has a performance penalty on some workloads. 640 * Disable it on the affected CPUs. 641 */ 642 if ((c->x86_model >= 0x02) && (c->x86_model < 0x20)) { 643 if (!rdmsrl_safe(0xc0011021, &value) && !(value & 0x1E)) { 644 value |= 0x1E; 645 wrmsrl_safe(0xc0011021, value); 646 } 647 } 648} 649 650static void init_amd(struct cpuinfo_x86 *c) 651{ 652 u32 dummy; 653 654 early_init_amd(c); 655 656 /* 657 * Bit 31 in normal CPUID used for nonstandard 3DNow ID; 658 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway 659 */ 660 clear_cpu_cap(c, 0*32+31); 661 662 if (c->x86 >= 0x10) 663 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 664 665 /* get apicid instead of initial apic id from cpuid */ 666 c->apicid = hard_smp_processor_id(); 667 668 /* K6s reports MCEs but don't actually have all the MSRs */ 669 if (c->x86 < 6) 670 clear_cpu_cap(c, X86_FEATURE_MCE); 671 672 switch (c->x86) { 673 case 4: init_amd_k5(c); break; 674 case 5: init_amd_k6(c); break; 675 case 6: init_amd_k7(c); break; 676 case 0xf: init_amd_k8(c); break; 677 case 0x10: init_amd_gh(c); break; 678 case 0x15: init_amd_bd(c); break; 679 } 680 681 /* Enable workaround for FXSAVE leak */ 682 if (c->x86 >= 6) 683 set_cpu_bug(c, X86_BUG_FXSAVE_LEAK); 684 685 cpu_detect_cache_sizes(c); 686 687 /* Multi core CPU? */ 688 if (c->extended_cpuid_level >= 0x80000008) { 689 amd_detect_cmp(c); 690 srat_detect_node(c); 691 } 692 693#ifdef CONFIG_X86_32 694 detect_ht(c); 695#endif 696 697 init_amd_cacheinfo(c); 698 699 if (c->x86 >= 0xf) 700 set_cpu_cap(c, X86_FEATURE_K8); 701 702 if (cpu_has_xmm2) { 703 /* MFENCE stops RDTSC speculation */ 704 set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); 705 } 706 707 /* 708 * Family 0x12 and above processors have APIC timer 709 * running in deep C states. 710 */ 711 if (c->x86 > 0x11) 712 set_cpu_cap(c, X86_FEATURE_ARAT); 713 714 if (cpu_has_amd_erratum(c, amd_erratum_400)) 715 set_cpu_bug(c, X86_BUG_AMD_APIC_C1E); 716 717 rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); 718 719 /* 3DNow or LM implies PREFETCHW */ 720 if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH)) 721 if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM)) 722 set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH); 723 724 /* AMD CPUs don't reset SS attributes on SYSRET */ 725 set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); 726} 727 728#ifdef CONFIG_X86_32 729static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) 730{ 731 /* AMD errata T13 (order #21922) */ 732 if ((c->x86 == 6)) { 733 /* Duron Rev A0 */ 734 if (c->x86_model == 3 && c->x86_mask == 0) 735 size = 64; 736 /* Tbird rev A1/A2 */ 737 if (c->x86_model == 4 && 738 (c->x86_mask == 0 || c->x86_mask == 1)) 739 size = 256; 740 } 741 return size; 742} 743#endif 744 745static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c) 746{ 747 u32 ebx, eax, ecx, edx; 748 u16 mask = 0xfff; 749 750 if (c->x86 < 0xf) 751 return; 752 753 if (c->extended_cpuid_level < 0x80000006) 754 return; 755 756 cpuid(0x80000006, &eax, &ebx, &ecx, &edx); 757 758 tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask; 759 tlb_lli_4k[ENTRIES] = ebx & mask; 760 761 /* 762 * K8 doesn't have 2M/4M entries in the L2 TLB so read out the L1 TLB 763 * characteristics from the CPUID function 0x80000005 instead. 764 */ 765 if (c->x86 == 0xf) { 766 cpuid(0x80000005, &eax, &ebx, &ecx, &edx); 767 mask = 0xff; 768 } 769 770 /* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ 771 if (!((eax >> 16) & mask)) 772 tlb_lld_2m[ENTRIES] = (cpuid_eax(0x80000005) >> 16) & 0xff; 773 else 774 tlb_lld_2m[ENTRIES] = (eax >> 16) & mask; 775 776 /* a 4M entry uses two 2M entries */ 777 tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1; 778 779 /* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ 780 if (!(eax & mask)) { 781 /* Erratum 658 */ 782 if (c->x86 == 0x15 && c->x86_model <= 0x1f) { 783 tlb_lli_2m[ENTRIES] = 1024; 784 } else { 785 cpuid(0x80000005, &eax, &ebx, &ecx, &edx); 786 tlb_lli_2m[ENTRIES] = eax & 0xff; 787 } 788 } else 789 tlb_lli_2m[ENTRIES] = eax & mask; 790 791 tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; 792} 793 794static const struct cpu_dev amd_cpu_dev = { 795 .c_vendor = "AMD", 796 .c_ident = { "AuthenticAMD" }, 797#ifdef CONFIG_X86_32 798 .legacy_models = { 799 { .family = 4, .model_names = 800 { 801 [3] = "486 DX/2", 802 [7] = "486 DX/2-WB", 803 [8] = "486 DX/4", 804 [9] = "486 DX/4-WB", 805 [14] = "Am5x86-WT", 806 [15] = "Am5x86-WB" 807 } 808 }, 809 }, 810 .legacy_cache_size = amd_size_cache, 811#endif 812 .c_early_init = early_init_amd, 813 .c_detect_tlb = cpu_detect_tlb_amd, 814 .c_bsp_init = bsp_init_amd, 815 .c_init = init_amd, 816 .c_x86_vendor = X86_VENDOR_AMD, 817}; 818 819cpu_dev_register(amd_cpu_dev); 820 821/* 822 * AMD errata checking 823 * 824 * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or 825 * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that 826 * have an OSVW id assigned, which it takes as first argument. Both take a 827 * variable number of family-specific model-stepping ranges created by 828 * AMD_MODEL_RANGE(). 829 * 830 * Example: 831 * 832 * const int amd_erratum_319[] = 833 * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2), 834 * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0), 835 * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0)); 836 */ 837 838#define AMD_LEGACY_ERRATUM(...) { -1, __VA_ARGS__, 0 } 839#define AMD_OSVW_ERRATUM(osvw_id, ...) { osvw_id, __VA_ARGS__, 0 } 840#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \ 841 ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end)) 842#define AMD_MODEL_RANGE_FAMILY(range) (((range) >> 24) & 0xff) 843#define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff) 844#define AMD_MODEL_RANGE_END(range) ((range) & 0xfff) 845 846static const int amd_erratum_400[] = 847 AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), 848 AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); 849 850static const int amd_erratum_383[] = 851 AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf)); 852 853 854static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) 855{ 856 int osvw_id = *erratum++; 857 u32 range; 858 u32 ms; 859 860 if (osvw_id >= 0 && osvw_id < 65536 && 861 cpu_has(cpu, X86_FEATURE_OSVW)) { 862 u64 osvw_len; 863 864 rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len); 865 if (osvw_id < osvw_len) { 866 u64 osvw_bits; 867 868 rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6), 869 osvw_bits); 870 return osvw_bits & (1ULL << (osvw_id & 0x3f)); 871 } 872 } 873 874 /* OSVW unavailable or ID unknown, match family-model-stepping range */ 875 ms = (cpu->x86_model << 4) | cpu->x86_mask; 876 while ((range = *erratum++)) 877 if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && 878 (ms >= AMD_MODEL_RANGE_START(range)) && 879 (ms <= AMD_MODEL_RANGE_END(range))) 880 return true; 881 882 return false; 883} 884 885void set_dr_addr_mask(unsigned long mask, int dr) 886{ 887 if (!cpu_has_bpext) 888 return; 889 890 switch (dr) { 891 case 0: 892 wrmsr(MSR_F16H_DR0_ADDR_MASK, mask, 0); 893 break; 894 case 1: 895 case 2: 896 case 3: 897 wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0); 898 break; 899 default: 900 break; 901 } 902} 903