root/arch/x86/include/asm/processor.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. l1tf_pfn_limit
  2. have_cpuid_p
  3. native_cpuid
  4. native_cpuid_reg
  5. native_read_cr3_pa
  6. load_cr3
  7. DECLARE_PER_CPU_FIRST
  8. arch_thread_struct_whitelist
  9. native_set_iopl_mask
  10. native_load_sp0
  11. native_swapgs
  12. current_top_of_stack
  13. on_thread_stack
  14. load_sp0
  15. cpuid
  16. cpuid_count
  17. cpuid_eax
  18. cpuid_ebx
  19. cpuid_ecx
  20. cpuid_edx
  21. rep_nop
  22. cpu_relax
  23. sync_core
  24. get_debugctlmsr
  25. update_debugctlmsr
  26. prefetch
  27. prefetchw
  28. spin_lock_prefetch
  29. mpx_enable_management
  30. mpx_disable_management
  31. amd_get_nb_id
  32. amd_get_nodes_per_socket
  33. hypervisor_cpuid_base

   1 /* SPDX-License-Identifier: GPL-2.0 */
   2 #ifndef _ASM_X86_PROCESSOR_H
   3 #define _ASM_X86_PROCESSOR_H
   4 
   5 #include <asm/processor-flags.h>
   6 
   7 /* Forward declaration, a strange C thing */
   8 struct task_struct;
   9 struct mm_struct;
  10 struct vm86;
  11 
  12 #include <asm/math_emu.h>
  13 #include <asm/segment.h>
  14 #include <asm/types.h>
  15 #include <uapi/asm/sigcontext.h>
  16 #include <asm/current.h>
  17 #include <asm/cpufeatures.h>
  18 #include <asm/page.h>
  19 #include <asm/pgtable_types.h>
  20 #include <asm/percpu.h>
  21 #include <asm/msr.h>
  22 #include <asm/desc_defs.h>
  23 #include <asm/nops.h>
  24 #include <asm/special_insns.h>
  25 #include <asm/fpu/types.h>
  26 #include <asm/unwind_hints.h>
  27 
  28 #include <linux/personality.h>
  29 #include <linux/cache.h>
  30 #include <linux/threads.h>
  31 #include <linux/math64.h>
  32 #include <linux/err.h>
  33 #include <linux/irqflags.h>
  34 #include <linux/mem_encrypt.h>
  35 
  36 /*
  37  * We handle most unaligned accesses in hardware.  On the other hand
  38  * unaligned DMA can be quite expensive on some Nehalem processors.
  39  *
  40  * Based on this we disable the IP header alignment in network drivers.
  41  */
  42 #define NET_IP_ALIGN    0
  43 
  44 #define HBP_NUM 4
  45 
  46 /*
  47  * These alignment constraints are for performance in the vSMP case,
  48  * but in the task_struct case we must also meet hardware imposed
  49  * alignment requirements of the FPU state:
  50  */
  51 #ifdef CONFIG_X86_VSMP
  52 # define ARCH_MIN_TASKALIGN             (1 << INTERNODE_CACHE_SHIFT)
  53 # define ARCH_MIN_MMSTRUCT_ALIGN        (1 << INTERNODE_CACHE_SHIFT)
  54 #else
  55 # define ARCH_MIN_TASKALIGN             __alignof__(union fpregs_state)
  56 # define ARCH_MIN_MMSTRUCT_ALIGN        0
  57 #endif
  58 
  59 enum tlb_infos {
  60         ENTRIES,
  61         NR_INFO
  62 };
  63 
  64 extern u16 __read_mostly tlb_lli_4k[NR_INFO];
  65 extern u16 __read_mostly tlb_lli_2m[NR_INFO];
  66 extern u16 __read_mostly tlb_lli_4m[NR_INFO];
  67 extern u16 __read_mostly tlb_lld_4k[NR_INFO];
  68 extern u16 __read_mostly tlb_lld_2m[NR_INFO];
  69 extern u16 __read_mostly tlb_lld_4m[NR_INFO];
  70 extern u16 __read_mostly tlb_lld_1g[NR_INFO];
  71 
  72 /*
  73  *  CPU type and hardware bug flags. Kept separately for each CPU.
  74  *  Members of this structure are referenced in head_32.S, so think twice
  75  *  before touching them. [mj]
  76  */
  77 
  78 struct cpuinfo_x86 {
  79         __u8                    x86;            /* CPU family */
  80         __u8                    x86_vendor;     /* CPU vendor */
  81         __u8                    x86_model;
  82         __u8                    x86_stepping;
  83 #ifdef CONFIG_X86_64
  84         /* Number of 4K pages in DTLB/ITLB combined(in pages): */
  85         int                     x86_tlbsize;
  86 #endif
  87         __u8                    x86_virt_bits;
  88         __u8                    x86_phys_bits;
  89         /* CPUID returned core id bits: */
  90         __u8                    x86_coreid_bits;
  91         __u8                    cu_id;
  92         /* Max extended CPUID function supported: */
  93         __u32                   extended_cpuid_level;
  94         /* Maximum supported CPUID level, -1=no CPUID: */
  95         int                     cpuid_level;
  96         __u32                   x86_capability[NCAPINTS + NBUGINTS];
  97         char                    x86_vendor_id[16];
  98         char                    x86_model_id[64];
  99         /* in KB - valid for CPUS which support this call: */
 100         unsigned int            x86_cache_size;
 101         int                     x86_cache_alignment;    /* In bytes */
 102         /* Cache QoS architectural values: */
 103         int                     x86_cache_max_rmid;     /* max index */
 104         int                     x86_cache_occ_scale;    /* scale to bytes */
 105         int                     x86_power;
 106         unsigned long           loops_per_jiffy;
 107         /* cpuid returned max cores value: */
 108         u16                     x86_max_cores;
 109         u16                     apicid;
 110         u16                     initial_apicid;
 111         u16                     x86_clflush_size;
 112         /* number of cores as seen by the OS: */
 113         u16                     booted_cores;
 114         /* Physical processor id: */
 115         u16                     phys_proc_id;
 116         /* Logical processor id: */
 117         u16                     logical_proc_id;
 118         /* Core id: */
 119         u16                     cpu_core_id;
 120         u16                     cpu_die_id;
 121         u16                     logical_die_id;
 122         /* Index into per_cpu list: */
 123         u16                     cpu_index;
 124         u32                     microcode;
 125         /* Address space bits used by the cache internally */
 126         u8                      x86_cache_bits;
 127         unsigned                initialized : 1;
 128 } __randomize_layout;
 129 
 130 struct cpuid_regs {
 131         u32 eax, ebx, ecx, edx;
 132 };
 133 
 134 enum cpuid_regs_idx {
 135         CPUID_EAX = 0,
 136         CPUID_EBX,
 137         CPUID_ECX,
 138         CPUID_EDX,
 139 };
 140 
 141 #define X86_VENDOR_INTEL        0
 142 #define X86_VENDOR_CYRIX        1
 143 #define X86_VENDOR_AMD          2
 144 #define X86_VENDOR_UMC          3
 145 #define X86_VENDOR_CENTAUR      5
 146 #define X86_VENDOR_TRANSMETA    7
 147 #define X86_VENDOR_NSC          8
 148 #define X86_VENDOR_HYGON        9
 149 #define X86_VENDOR_ZHAOXIN      10
 150 #define X86_VENDOR_NUM          11
 151 
 152 #define X86_VENDOR_UNKNOWN      0xff
 153 
 154 /*
 155  * capabilities of CPUs
 156  */
 157 extern struct cpuinfo_x86       boot_cpu_data;
 158 extern struct cpuinfo_x86       new_cpu_data;
 159 
 160 extern struct x86_hw_tss        doublefault_tss;
 161 extern __u32                    cpu_caps_cleared[NCAPINTS + NBUGINTS];
 162 extern __u32                    cpu_caps_set[NCAPINTS + NBUGINTS];
 163 
 164 #ifdef CONFIG_SMP
 165 DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
 166 #define cpu_data(cpu)           per_cpu(cpu_info, cpu)
 167 #else
 168 #define cpu_info                boot_cpu_data
 169 #define cpu_data(cpu)           boot_cpu_data
 170 #endif
 171 
 172 extern const struct seq_operations cpuinfo_op;
 173 
 174 #define cache_line_size()       (boot_cpu_data.x86_cache_alignment)
 175 
 176 extern void cpu_detect(struct cpuinfo_x86 *c);
 177 
 178 static inline unsigned long long l1tf_pfn_limit(void)
 179 {
 180         return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT);
 181 }
 182 
 183 extern void early_cpu_init(void);
 184 extern void identify_boot_cpu(void);
 185 extern void identify_secondary_cpu(struct cpuinfo_x86 *);
 186 extern void print_cpu_info(struct cpuinfo_x86 *);
 187 void print_cpu_msr(struct cpuinfo_x86 *);
 188 
 189 #ifdef CONFIG_X86_32
 190 extern int have_cpuid_p(void);
 191 #else
 192 static inline int have_cpuid_p(void)
 193 {
 194         return 1;
 195 }
 196 #endif
 197 static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
 198                                 unsigned int *ecx, unsigned int *edx)
 199 {
 200         /* ecx is often an input as well as an output. */
 201         asm volatile("cpuid"
 202             : "=a" (*eax),
 203               "=b" (*ebx),
 204               "=c" (*ecx),
 205               "=d" (*edx)
 206             : "0" (*eax), "2" (*ecx)
 207             : "memory");
 208 }
 209 
 210 #define native_cpuid_reg(reg)                                   \
 211 static inline unsigned int native_cpuid_##reg(unsigned int op)  \
 212 {                                                               \
 213         unsigned int eax = op, ebx, ecx = 0, edx;               \
 214                                                                 \
 215         native_cpuid(&eax, &ebx, &ecx, &edx);                   \
 216                                                                 \
 217         return reg;                                             \
 218 }
 219 
 220 /*
 221  * Native CPUID functions returning a single datum.
 222  */
 223 native_cpuid_reg(eax)
 224 native_cpuid_reg(ebx)
 225 native_cpuid_reg(ecx)
 226 native_cpuid_reg(edx)
 227 
 228 /*
 229  * Friendlier CR3 helpers.
 230  */
 231 static inline unsigned long read_cr3_pa(void)
 232 {
 233         return __read_cr3() & CR3_ADDR_MASK;
 234 }
 235 
 236 static inline unsigned long native_read_cr3_pa(void)
 237 {
 238         return __native_read_cr3() & CR3_ADDR_MASK;
 239 }
 240 
 241 static inline void load_cr3(pgd_t *pgdir)
 242 {
 243         write_cr3(__sme_pa(pgdir));
 244 }
 245 
 246 /*
 247  * Note that while the legacy 'TSS' name comes from 'Task State Segment',
 248  * on modern x86 CPUs the TSS also holds information important to 64-bit mode,
 249  * unrelated to the task-switch mechanism:
 250  */
 251 #ifdef CONFIG_X86_32
 252 /* This is the TSS defined by the hardware. */
 253 struct x86_hw_tss {
 254         unsigned short          back_link, __blh;
 255         unsigned long           sp0;
 256         unsigned short          ss0, __ss0h;
 257         unsigned long           sp1;
 258 
 259         /*
 260          * We don't use ring 1, so ss1 is a convenient scratch space in
 261          * the same cacheline as sp0.  We use ss1 to cache the value in
 262          * MSR_IA32_SYSENTER_CS.  When we context switch
 263          * MSR_IA32_SYSENTER_CS, we first check if the new value being
 264          * written matches ss1, and, if it's not, then we wrmsr the new
 265          * value and update ss1.
 266          *
 267          * The only reason we context switch MSR_IA32_SYSENTER_CS is
 268          * that we set it to zero in vm86 tasks to avoid corrupting the
 269          * stack if we were to go through the sysenter path from vm86
 270          * mode.
 271          */
 272         unsigned short          ss1;    /* MSR_IA32_SYSENTER_CS */
 273 
 274         unsigned short          __ss1h;
 275         unsigned long           sp2;
 276         unsigned short          ss2, __ss2h;
 277         unsigned long           __cr3;
 278         unsigned long           ip;
 279         unsigned long           flags;
 280         unsigned long           ax;
 281         unsigned long           cx;
 282         unsigned long           dx;
 283         unsigned long           bx;
 284         unsigned long           sp;
 285         unsigned long           bp;
 286         unsigned long           si;
 287         unsigned long           di;
 288         unsigned short          es, __esh;
 289         unsigned short          cs, __csh;
 290         unsigned short          ss, __ssh;
 291         unsigned short          ds, __dsh;
 292         unsigned short          fs, __fsh;
 293         unsigned short          gs, __gsh;
 294         unsigned short          ldt, __ldth;
 295         unsigned short          trace;
 296         unsigned short          io_bitmap_base;
 297 
 298 } __attribute__((packed));
 299 #else
 300 struct x86_hw_tss {
 301         u32                     reserved1;
 302         u64                     sp0;
 303 
 304         /*
 305          * We store cpu_current_top_of_stack in sp1 so it's always accessible.
 306          * Linux does not use ring 1, so sp1 is not otherwise needed.
 307          */
 308         u64                     sp1;
 309 
 310         /*
 311          * Since Linux does not use ring 2, the 'sp2' slot is unused by
 312          * hardware.  entry_SYSCALL_64 uses it as scratch space to stash
 313          * the user RSP value.
 314          */
 315         u64                     sp2;
 316 
 317         u64                     reserved2;
 318         u64                     ist[7];
 319         u32                     reserved3;
 320         u32                     reserved4;
 321         u16                     reserved5;
 322         u16                     io_bitmap_base;
 323 
 324 } __attribute__((packed));
 325 #endif
 326 
 327 /*
 328  * IO-bitmap sizes:
 329  */
 330 #define IO_BITMAP_BITS                  65536
 331 #define IO_BITMAP_BYTES                 (IO_BITMAP_BITS/8)
 332 #define IO_BITMAP_LONGS                 (IO_BITMAP_BYTES/sizeof(long))
 333 #define IO_BITMAP_OFFSET                (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
 334 #define INVALID_IO_BITMAP_OFFSET        0x8000
 335 
 336 struct entry_stack {
 337         unsigned long           words[64];
 338 };
 339 
 340 struct entry_stack_page {
 341         struct entry_stack stack;
 342 } __aligned(PAGE_SIZE);
 343 
 344 struct tss_struct {
 345         /*
 346          * The fixed hardware portion.  This must not cross a page boundary
 347          * at risk of violating the SDM's advice and potentially triggering
 348          * errata.
 349          */
 350         struct x86_hw_tss       x86_tss;
 351 
 352         /*
 353          * The extra 1 is there because the CPU will access an
 354          * additional byte beyond the end of the IO permission
 355          * bitmap. The extra byte must be all 1 bits, and must
 356          * be within the limit.
 357          */
 358         unsigned long           io_bitmap[IO_BITMAP_LONGS + 1];
 359 } __aligned(PAGE_SIZE);
 360 
 361 DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
 362 
 363 /*
 364  * sizeof(unsigned long) coming from an extra "long" at the end
 365  * of the iobitmap.
 366  *
 367  * -1? seg base+limit should be pointing to the address of the
 368  * last valid byte
 369  */
 370 #define __KERNEL_TSS_LIMIT      \
 371         (IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
 372 
 373 /* Per CPU interrupt stacks */
 374 struct irq_stack {
 375         char            stack[IRQ_STACK_SIZE];
 376 } __aligned(IRQ_STACK_SIZE);
 377 
 378 DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
 379 
 380 #ifdef CONFIG_X86_32
 381 DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
 382 #else
 383 /* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
 384 #define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
 385 #endif
 386 
 387 #ifdef CONFIG_X86_64
 388 struct fixed_percpu_data {
 389         /*
 390          * GCC hardcodes the stack canary as %gs:40.  Since the
 391          * irq_stack is the object at %gs:0, we reserve the bottom
 392          * 48 bytes of the irq stack for the canary.
 393          */
 394         char            gs_base[40];
 395         unsigned long   stack_canary;
 396 };
 397 
 398 DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible;
 399 DECLARE_INIT_PER_CPU(fixed_percpu_data);
 400 
 401 static inline unsigned long cpu_kernelmode_gs_base(int cpu)
 402 {
 403         return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
 404 }
 405 
 406 DECLARE_PER_CPU(unsigned int, irq_count);
 407 extern asmlinkage void ignore_sysret(void);
 408 
 409 #if IS_ENABLED(CONFIG_KVM)
 410 /* Save actual FS/GS selectors and bases to current->thread */
 411 void save_fsgs_for_kvm(void);
 412 #endif
 413 #else   /* X86_64 */
 414 #ifdef CONFIG_STACKPROTECTOR
 415 /*
 416  * Make sure stack canary segment base is cached-aligned:
 417  *   "For Intel Atom processors, avoid non zero segment base address
 418  *    that is not aligned to cache line boundary at all cost."
 419  * (Optim Ref Manual Assembly/Compiler Coding Rule 15.)
 420  */
 421 struct stack_canary {
 422         char __pad[20];         /* canary at %gs:20 */
 423         unsigned long canary;
 424 };
 425 DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
 426 #endif
 427 /* Per CPU softirq stack pointer */
 428 DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
 429 #endif  /* X86_64 */
 430 
 431 extern unsigned int fpu_kernel_xstate_size;
 432 extern unsigned int fpu_user_xstate_size;
 433 
 434 struct perf_event;
 435 
 436 typedef struct {
 437         unsigned long           seg;
 438 } mm_segment_t;
 439 
 440 struct thread_struct {
 441         /* Cached TLS descriptors: */
 442         struct desc_struct      tls_array[GDT_ENTRY_TLS_ENTRIES];
 443 #ifdef CONFIG_X86_32
 444         unsigned long           sp0;
 445 #endif
 446         unsigned long           sp;
 447 #ifdef CONFIG_X86_32
 448         unsigned long           sysenter_cs;
 449 #else
 450         unsigned short          es;
 451         unsigned short          ds;
 452         unsigned short          fsindex;
 453         unsigned short          gsindex;
 454 #endif
 455 
 456 #ifdef CONFIG_X86_64
 457         unsigned long           fsbase;
 458         unsigned long           gsbase;
 459 #else
 460         /*
 461          * XXX: this could presumably be unsigned short.  Alternatively,
 462          * 32-bit kernels could be taught to use fsindex instead.
 463          */
 464         unsigned long fs;
 465         unsigned long gs;
 466 #endif
 467 
 468         /* Save middle states of ptrace breakpoints */
 469         struct perf_event       *ptrace_bps[HBP_NUM];
 470         /* Debug status used for traps, single steps, etc... */
 471         unsigned long           debugreg6;
 472         /* Keep track of the exact dr7 value set by the user */
 473         unsigned long           ptrace_dr7;
 474         /* Fault info: */
 475         unsigned long           cr2;
 476         unsigned long           trap_nr;
 477         unsigned long           error_code;
 478 #ifdef CONFIG_VM86
 479         /* Virtual 86 mode info */
 480         struct vm86             *vm86;
 481 #endif
 482         /* IO permissions: */
 483         unsigned long           *io_bitmap_ptr;
 484         unsigned long           iopl;
 485         /* Max allowed port in the bitmap, in bytes: */
 486         unsigned                io_bitmap_max;
 487 
 488         mm_segment_t            addr_limit;
 489 
 490         unsigned int            sig_on_uaccess_err:1;
 491         unsigned int            uaccess_err:1;  /* uaccess failed */
 492 
 493         /* Floating point and extended processor state */
 494         struct fpu              fpu;
 495         /*
 496          * WARNING: 'fpu' is dynamically-sized.  It *MUST* be at
 497          * the end.
 498          */
 499 };
 500 
 501 /* Whitelist the FPU state from the task_struct for hardened usercopy. */
 502 static inline void arch_thread_struct_whitelist(unsigned long *offset,
 503                                                 unsigned long *size)
 504 {
 505         *offset = offsetof(struct thread_struct, fpu.state);
 506         *size = fpu_kernel_xstate_size;
 507 }
 508 
 509 /*
 510  * Thread-synchronous status.
 511  *
 512  * This is different from the flags in that nobody else
 513  * ever touches our thread-synchronous status, so we don't
 514  * have to worry about atomic accesses.
 515  */
 516 #define TS_COMPAT               0x0002  /* 32bit syscall active (64BIT)*/
 517 
 518 /*
 519  * Set IOPL bits in EFLAGS from given mask
 520  */
 521 static inline void native_set_iopl_mask(unsigned mask)
 522 {
 523 #ifdef CONFIG_X86_32
 524         unsigned int reg;
 525 
 526         asm volatile ("pushfl;"
 527                       "popl %0;"
 528                       "andl %1, %0;"
 529                       "orl %2, %0;"
 530                       "pushl %0;"
 531                       "popfl"
 532                       : "=&r" (reg)
 533                       : "i" (~X86_EFLAGS_IOPL), "r" (mask));
 534 #endif
 535 }
 536 
 537 static inline void
 538 native_load_sp0(unsigned long sp0)
 539 {
 540         this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
 541 }
 542 
 543 static inline void native_swapgs(void)
 544 {
 545 #ifdef CONFIG_X86_64
 546         asm volatile("swapgs" ::: "memory");
 547 #endif
 548 }
 549 
 550 static inline unsigned long current_top_of_stack(void)
 551 {
 552         /*
 553          *  We can't read directly from tss.sp0: sp0 on x86_32 is special in
 554          *  and around vm86 mode and sp0 on x86_64 is special because of the
 555          *  entry trampoline.
 556          */
 557         return this_cpu_read_stable(cpu_current_top_of_stack);
 558 }
 559 
 560 static inline bool on_thread_stack(void)
 561 {
 562         return (unsigned long)(current_top_of_stack() -
 563                                current_stack_pointer) < THREAD_SIZE;
 564 }
 565 
 566 #ifdef CONFIG_PARAVIRT_XXL
 567 #include <asm/paravirt.h>
 568 #else
 569 #define __cpuid                 native_cpuid
 570 
 571 static inline void load_sp0(unsigned long sp0)
 572 {
 573         native_load_sp0(sp0);
 574 }
 575 
 576 #define set_iopl_mask native_set_iopl_mask
 577 #endif /* CONFIG_PARAVIRT_XXL */
 578 
 579 /* Free all resources held by a thread. */
 580 extern void release_thread(struct task_struct *);
 581 
 582 unsigned long get_wchan(struct task_struct *p);
 583 
 584 /*
 585  * Generic CPUID function
 586  * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
 587  * resulting in stale register contents being returned.
 588  */
 589 static inline void cpuid(unsigned int op,
 590                          unsigned int *eax, unsigned int *ebx,
 591                          unsigned int *ecx, unsigned int *edx)
 592 {
 593         *eax = op;
 594         *ecx = 0;
 595         __cpuid(eax, ebx, ecx, edx);
 596 }
 597 
 598 /* Some CPUID calls want 'count' to be placed in ecx */
 599 static inline void cpuid_count(unsigned int op, int count,
 600                                unsigned int *eax, unsigned int *ebx,
 601                                unsigned int *ecx, unsigned int *edx)
 602 {
 603         *eax = op;
 604         *ecx = count;
 605         __cpuid(eax, ebx, ecx, edx);
 606 }
 607 
 608 /*
 609  * CPUID functions returning a single datum
 610  */
 611 static inline unsigned int cpuid_eax(unsigned int op)
 612 {
 613         unsigned int eax, ebx, ecx, edx;
 614 
 615         cpuid(op, &eax, &ebx, &ecx, &edx);
 616 
 617         return eax;
 618 }
 619 
 620 static inline unsigned int cpuid_ebx(unsigned int op)
 621 {
 622         unsigned int eax, ebx, ecx, edx;
 623 
 624         cpuid(op, &eax, &ebx, &ecx, &edx);
 625 
 626         return ebx;
 627 }
 628 
 629 static inline unsigned int cpuid_ecx(unsigned int op)
 630 {
 631         unsigned int eax, ebx, ecx, edx;
 632 
 633         cpuid(op, &eax, &ebx, &ecx, &edx);
 634 
 635         return ecx;
 636 }
 637 
 638 static inline unsigned int cpuid_edx(unsigned int op)
 639 {
 640         unsigned int eax, ebx, ecx, edx;
 641 
 642         cpuid(op, &eax, &ebx, &ecx, &edx);
 643 
 644         return edx;
 645 }
 646 
 647 /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
 648 static __always_inline void rep_nop(void)
 649 {
 650         asm volatile("rep; nop" ::: "memory");
 651 }
 652 
 653 static __always_inline void cpu_relax(void)
 654 {
 655         rep_nop();
 656 }
 657 
 658 /*
 659  * This function forces the icache and prefetched instruction stream to
 660  * catch up with reality in two very specific cases:
 661  *
 662  *  a) Text was modified using one virtual address and is about to be executed
 663  *     from the same physical page at a different virtual address.
 664  *
 665  *  b) Text was modified on a different CPU, may subsequently be
 666  *     executed on this CPU, and you want to make sure the new version
 667  *     gets executed.  This generally means you're calling this in a IPI.
 668  *
 669  * If you're calling this for a different reason, you're probably doing
 670  * it wrong.
 671  */
 672 static inline void sync_core(void)
 673 {
 674         /*
 675          * There are quite a few ways to do this.  IRET-to-self is nice
 676          * because it works on every CPU, at any CPL (so it's compatible
 677          * with paravirtualization), and it never exits to a hypervisor.
 678          * The only down sides are that it's a bit slow (it seems to be
 679          * a bit more than 2x slower than the fastest options) and that
 680          * it unmasks NMIs.  The "push %cs" is needed because, in
 681          * paravirtual environments, __KERNEL_CS may not be a valid CS
 682          * value when we do IRET directly.
 683          *
 684          * In case NMI unmasking or performance ever becomes a problem,
 685          * the next best option appears to be MOV-to-CR2 and an
 686          * unconditional jump.  That sequence also works on all CPUs,
 687          * but it will fault at CPL3 (i.e. Xen PV).
 688          *
 689          * CPUID is the conventional way, but it's nasty: it doesn't
 690          * exist on some 486-like CPUs, and it usually exits to a
 691          * hypervisor.
 692          *
 693          * Like all of Linux's memory ordering operations, this is a
 694          * compiler barrier as well.
 695          */
 696 #ifdef CONFIG_X86_32
 697         asm volatile (
 698                 "pushfl\n\t"
 699                 "pushl %%cs\n\t"
 700                 "pushl $1f\n\t"
 701                 "iret\n\t"
 702                 "1:"
 703                 : ASM_CALL_CONSTRAINT : : "memory");
 704 #else
 705         unsigned int tmp;
 706 
 707         asm volatile (
 708                 UNWIND_HINT_SAVE
 709                 "mov %%ss, %0\n\t"
 710                 "pushq %q0\n\t"
 711                 "pushq %%rsp\n\t"
 712                 "addq $8, (%%rsp)\n\t"
 713                 "pushfq\n\t"
 714                 "mov %%cs, %0\n\t"
 715                 "pushq %q0\n\t"
 716                 "pushq $1f\n\t"
 717                 "iretq\n\t"
 718                 UNWIND_HINT_RESTORE
 719                 "1:"
 720                 : "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
 721 #endif
 722 }
 723 
 724 extern void select_idle_routine(const struct cpuinfo_x86 *c);
 725 extern void amd_e400_c1e_apic_setup(void);
 726 
 727 extern unsigned long            boot_option_idle_override;
 728 
 729 enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT,
 730                          IDLE_POLL};
 731 
 732 extern void enable_sep_cpu(void);
 733 extern int sysenter_setup(void);
 734 
 735 
 736 /* Defined in head.S */
 737 extern struct desc_ptr          early_gdt_descr;
 738 
 739 extern void switch_to_new_gdt(int);
 740 extern void load_direct_gdt(int);
 741 extern void load_fixmap_gdt(int);
 742 extern void load_percpu_segment(int);
 743 extern void cpu_init(void);
 744 extern void cr4_init(void);
 745 
 746 static inline unsigned long get_debugctlmsr(void)
 747 {
 748         unsigned long debugctlmsr = 0;
 749 
 750 #ifndef CONFIG_X86_DEBUGCTLMSR
 751         if (boot_cpu_data.x86 < 6)
 752                 return 0;
 753 #endif
 754         rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
 755 
 756         return debugctlmsr;
 757 }
 758 
 759 static inline void update_debugctlmsr(unsigned long debugctlmsr)
 760 {
 761 #ifndef CONFIG_X86_DEBUGCTLMSR
 762         if (boot_cpu_data.x86 < 6)
 763                 return;
 764 #endif
 765         wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
 766 }
 767 
 768 extern void set_task_blockstep(struct task_struct *task, bool on);
 769 
 770 /* Boot loader type from the setup header: */
 771 extern int                      bootloader_type;
 772 extern int                      bootloader_version;
 773 
 774 extern char                     ignore_fpu_irq;
 775 
 776 #define HAVE_ARCH_PICK_MMAP_LAYOUT 1
 777 #define ARCH_HAS_PREFETCHW
 778 #define ARCH_HAS_SPINLOCK_PREFETCH
 779 
 780 #ifdef CONFIG_X86_32
 781 # define BASE_PREFETCH          ""
 782 # define ARCH_HAS_PREFETCH
 783 #else
 784 # define BASE_PREFETCH          "prefetcht0 %P1"
 785 #endif
 786 
 787 /*
 788  * Prefetch instructions for Pentium III (+) and AMD Athlon (+)
 789  *
 790  * It's not worth to care about 3dnow prefetches for the K6
 791  * because they are microcoded there and very slow.
 792  */
 793 static inline void prefetch(const void *x)
 794 {
 795         alternative_input(BASE_PREFETCH, "prefetchnta %P1",
 796                           X86_FEATURE_XMM,
 797                           "m" (*(const char *)x));
 798 }
 799 
 800 /*
 801  * 3dnow prefetch to get an exclusive cache line.
 802  * Useful for spinlocks to avoid one state transition in the
 803  * cache coherency protocol:
 804  */
 805 static inline void prefetchw(const void *x)
 806 {
 807         alternative_input(BASE_PREFETCH, "prefetchw %P1",
 808                           X86_FEATURE_3DNOWPREFETCH,
 809                           "m" (*(const char *)x));
 810 }
 811 
 812 static inline void spin_lock_prefetch(const void *x)
 813 {
 814         prefetchw(x);
 815 }
 816 
 817 #define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
 818                            TOP_OF_KERNEL_STACK_PADDING)
 819 
 820 #define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))
 821 
 822 #define task_pt_regs(task) \
 823 ({                                                                      \
 824         unsigned long __ptr = (unsigned long)task_stack_page(task);     \
 825         __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;             \
 826         ((struct pt_regs *)__ptr) - 1;                                  \
 827 })
 828 
 829 #ifdef CONFIG_X86_32
 830 /*
 831  * User space process size: 3GB (default).
 832  */
 833 #define IA32_PAGE_OFFSET        PAGE_OFFSET
 834 #define TASK_SIZE               PAGE_OFFSET
 835 #define TASK_SIZE_LOW           TASK_SIZE
 836 #define TASK_SIZE_MAX           TASK_SIZE
 837 #define DEFAULT_MAP_WINDOW      TASK_SIZE
 838 #define STACK_TOP               TASK_SIZE
 839 #define STACK_TOP_MAX           STACK_TOP
 840 
 841 #define INIT_THREAD  {                                                    \
 842         .sp0                    = TOP_OF_INIT_STACK,                      \
 843         .sysenter_cs            = __KERNEL_CS,                            \
 844         .io_bitmap_ptr          = NULL,                                   \
 845         .addr_limit             = KERNEL_DS,                              \
 846 }
 847 
 848 #define KSTK_ESP(task)          (task_pt_regs(task)->sp)
 849 
 850 #else
 851 /*
 852  * User space process size.  This is the first address outside the user range.
 853  * There are a few constraints that determine this:
 854  *
 855  * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
 856  * address, then that syscall will enter the kernel with a
 857  * non-canonical return address, and SYSRET will explode dangerously.
 858  * We avoid this particular problem by preventing anything executable
 859  * from being mapped at the maximum canonical address.
 860  *
 861  * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
 862  * CPUs malfunction if they execute code from the highest canonical page.
 863  * They'll speculate right off the end of the canonical space, and
 864  * bad things happen.  This is worked around in the same way as the
 865  * Intel problem.
 866  *
 867  * With page table isolation enabled, we map the LDT in ... [stay tuned]
 868  */
 869 #define TASK_SIZE_MAX   ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
 870 
 871 #define DEFAULT_MAP_WINDOW      ((1UL << 47) - PAGE_SIZE)
 872 
 873 /* This decides where the kernel will search for a free chunk of vm
 874  * space during mmap's.
 875  */
 876 #define IA32_PAGE_OFFSET        ((current->personality & ADDR_LIMIT_3GB) ? \
 877                                         0xc0000000 : 0xFFFFe000)
 878 
 879 #define TASK_SIZE_LOW           (test_thread_flag(TIF_ADDR32) ? \
 880                                         IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW)
 881 #define TASK_SIZE               (test_thread_flag(TIF_ADDR32) ? \
 882                                         IA32_PAGE_OFFSET : TASK_SIZE_MAX)
 883 #define TASK_SIZE_OF(child)     ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \
 884                                         IA32_PAGE_OFFSET : TASK_SIZE_MAX)
 885 
 886 #define STACK_TOP               TASK_SIZE_LOW
 887 #define STACK_TOP_MAX           TASK_SIZE_MAX
 888 
 889 #define INIT_THREAD  {                                          \
 890         .addr_limit             = KERNEL_DS,                    \
 891 }
 892 
 893 extern unsigned long KSTK_ESP(struct task_struct *task);
 894 
 895 #endif /* CONFIG_X86_64 */
 896 
 897 extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
 898                                                unsigned long new_sp);
 899 
 900 /*
 901  * This decides where the kernel will search for a free chunk of vm
 902  * space during mmap's.
 903  */
 904 #define __TASK_UNMAPPED_BASE(task_size) (PAGE_ALIGN(task_size / 3))
 905 #define TASK_UNMAPPED_BASE              __TASK_UNMAPPED_BASE(TASK_SIZE_LOW)
 906 
 907 #define KSTK_EIP(task)          (task_pt_regs(task)->ip)
 908 
 909 /* Get/set a process' ability to use the timestamp counter instruction */
 910 #define GET_TSC_CTL(adr)        get_tsc_mode((adr))
 911 #define SET_TSC_CTL(val)        set_tsc_mode((val))
 912 
 913 extern int get_tsc_mode(unsigned long adr);
 914 extern int set_tsc_mode(unsigned int val);
 915 
 916 DECLARE_PER_CPU(u64, msr_misc_features_shadow);
 917 
 918 /* Register/unregister a process' MPX related resource */
 919 #define MPX_ENABLE_MANAGEMENT() mpx_enable_management()
 920 #define MPX_DISABLE_MANAGEMENT()        mpx_disable_management()
 921 
 922 #ifdef CONFIG_X86_INTEL_MPX
 923 extern int mpx_enable_management(void);
 924 extern int mpx_disable_management(void);
 925 #else
 926 static inline int mpx_enable_management(void)
 927 {
 928         return -EINVAL;
 929 }
 930 static inline int mpx_disable_management(void)
 931 {
 932         return -EINVAL;
 933 }
 934 #endif /* CONFIG_X86_INTEL_MPX */
 935 
 936 #ifdef CONFIG_CPU_SUP_AMD
 937 extern u16 amd_get_nb_id(int cpu);
 938 extern u32 amd_get_nodes_per_socket(void);
 939 #else
 940 static inline u16 amd_get_nb_id(int cpu)                { return 0; }
 941 static inline u32 amd_get_nodes_per_socket(void)        { return 0; }
 942 #endif
 943 
 944 static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
 945 {
 946         uint32_t base, eax, signature[3];
 947 
 948         for (base = 0x40000000; base < 0x40010000; base += 0x100) {
 949                 cpuid(base, &eax, &signature[0], &signature[1], &signature[2]);
 950 
 951                 if (!memcmp(sig, signature, 12) &&
 952                     (leaves == 0 || ((eax - base) >= leaves)))
 953                         return base;
 954         }
 955 
 956         return 0;
 957 }
 958 
 959 extern unsigned long arch_align_stack(unsigned long sp);
 960 void free_init_pages(const char *what, unsigned long begin, unsigned long end);
 961 extern void free_kernel_image_pages(void *begin, void *end);
 962 
 963 void default_idle(void);
 964 #ifdef  CONFIG_XEN
 965 bool xen_set_default_idle(void);
 966 #else
 967 #define xen_set_default_idle 0
 968 #endif
 969 
 970 void stop_this_cpu(void *dummy);
 971 void df_debug(struct pt_regs *regs, long error_code);
 972 void microcode_check(void);
 973 
 974 enum l1tf_mitigations {
 975         L1TF_MITIGATION_OFF,
 976         L1TF_MITIGATION_FLUSH_NOWARN,
 977         L1TF_MITIGATION_FLUSH,
 978         L1TF_MITIGATION_FLUSH_NOSMT,
 979         L1TF_MITIGATION_FULL,
 980         L1TF_MITIGATION_FULL_FORCE
 981 };
 982 
 983 extern enum l1tf_mitigations l1tf_mitigation;
 984 
 985 enum mds_mitigations {
 986         MDS_MITIGATION_OFF,
 987         MDS_MITIGATION_FULL,
 988         MDS_MITIGATION_VMWERV,
 989 };
 990 
 991 enum taa_mitigations {
 992         TAA_MITIGATION_OFF,
 993         TAA_MITIGATION_UCODE_NEEDED,
 994         TAA_MITIGATION_VERW,
 995         TAA_MITIGATION_TSX_DISABLED,
 996 };
 997 
 998 #endif /* _ASM_X86_PROCESSOR_H */

/* [<][>][^][v][top][bottom][index][help] */