root/arch/x86/power/cpu.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. msr_save_context
  2. msr_restore_context
  3. __save_processor_state
  4. save_processor_state
  5. do_fpu_end
  6. fix_processor_context
  7. __restore_processor_state
  8. restore_processor_state
  9. resume_play_dead
  10. hibernate_resume_nonboot_cpu_disable
  11. bsp_check
  12. bsp_pm_callback
  13. bsp_pm_check_init
  14. msr_build_context
  15. msr_initialize_bdw
  16. msr_save_cpuid_features
  17. pm_cpu_check
  18. pm_check_save_msr

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Suspend support specific for i386/x86-64.
   4  *
   5  * Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl>
   6  * Copyright (c) 2002 Pavel Machek <pavel@ucw.cz>
   7  * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
   8  */
   9 
  10 #include <linux/suspend.h>
  11 #include <linux/export.h>
  12 #include <linux/smp.h>
  13 #include <linux/perf_event.h>
  14 #include <linux/tboot.h>
  15 #include <linux/dmi.h>
  16 
  17 #include <asm/pgtable.h>
  18 #include <asm/proto.h>
  19 #include <asm/mtrr.h>
  20 #include <asm/page.h>
  21 #include <asm/mce.h>
  22 #include <asm/suspend.h>
  23 #include <asm/fpu/internal.h>
  24 #include <asm/debugreg.h>
  25 #include <asm/cpu.h>
  26 #include <asm/mmu_context.h>
  27 #include <asm/cpu_device_id.h>
  28 
  29 #ifdef CONFIG_X86_32
  30 __visible unsigned long saved_context_ebx;
  31 __visible unsigned long saved_context_esp, saved_context_ebp;
  32 __visible unsigned long saved_context_esi, saved_context_edi;
  33 __visible unsigned long saved_context_eflags;
  34 #endif
  35 struct saved_context saved_context;
  36 
  37 static void msr_save_context(struct saved_context *ctxt)
  38 {
  39         struct saved_msr *msr = ctxt->saved_msrs.array;
  40         struct saved_msr *end = msr + ctxt->saved_msrs.num;
  41 
  42         while (msr < end) {
  43                 msr->valid = !rdmsrl_safe(msr->info.msr_no, &msr->info.reg.q);
  44                 msr++;
  45         }
  46 }
  47 
  48 static void msr_restore_context(struct saved_context *ctxt)
  49 {
  50         struct saved_msr *msr = ctxt->saved_msrs.array;
  51         struct saved_msr *end = msr + ctxt->saved_msrs.num;
  52 
  53         while (msr < end) {
  54                 if (msr->valid)
  55                         wrmsrl(msr->info.msr_no, msr->info.reg.q);
  56                 msr++;
  57         }
  58 }
  59 
  60 /**
  61  *      __save_processor_state - save CPU registers before creating a
  62  *              hibernation image and before restoring the memory state from it
  63  *      @ctxt - structure to store the registers contents in
  64  *
  65  *      NOTE: If there is a CPU register the modification of which by the
  66  *      boot kernel (ie. the kernel used for loading the hibernation image)
  67  *      might affect the operations of the restored target kernel (ie. the one
  68  *      saved in the hibernation image), then its contents must be saved by this
  69  *      function.  In other words, if kernel A is hibernated and different
  70  *      kernel B is used for loading the hibernation image into memory, the
  71  *      kernel A's __save_processor_state() function must save all registers
  72  *      needed by kernel A, so that it can operate correctly after the resume
  73  *      regardless of what kernel B does in the meantime.
  74  */
  75 static void __save_processor_state(struct saved_context *ctxt)
  76 {
  77 #ifdef CONFIG_X86_32
  78         mtrr_save_fixed_ranges(NULL);
  79 #endif
  80         kernel_fpu_begin();
  81 
  82         /*
  83          * descriptor tables
  84          */
  85         store_idt(&ctxt->idt);
  86 
  87         /*
  88          * We save it here, but restore it only in the hibernate case.
  89          * For ACPI S3 resume, this is loaded via 'early_gdt_desc' in 64-bit
  90          * mode in "secondary_startup_64". In 32-bit mode it is done via
  91          * 'pmode_gdt' in wakeup_start.
  92          */
  93         ctxt->gdt_desc.size = GDT_SIZE - 1;
  94         ctxt->gdt_desc.address = (unsigned long)get_cpu_gdt_rw(smp_processor_id());
  95 
  96         store_tr(ctxt->tr);
  97 
  98         /* XMM0..XMM15 should be handled by kernel_fpu_begin(). */
  99         /*
 100          * segment registers
 101          */
 102 #ifdef CONFIG_X86_32_LAZY_GS
 103         savesegment(gs, ctxt->gs);
 104 #endif
 105 #ifdef CONFIG_X86_64
 106         savesegment(gs, ctxt->gs);
 107         savesegment(fs, ctxt->fs);
 108         savesegment(ds, ctxt->ds);
 109         savesegment(es, ctxt->es);
 110 
 111         rdmsrl(MSR_FS_BASE, ctxt->fs_base);
 112         rdmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
 113         rdmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
 114         mtrr_save_fixed_ranges(NULL);
 115 
 116         rdmsrl(MSR_EFER, ctxt->efer);
 117 #endif
 118 
 119         /*
 120          * control registers
 121          */
 122         ctxt->cr0 = read_cr0();
 123         ctxt->cr2 = read_cr2();
 124         ctxt->cr3 = __read_cr3();
 125         ctxt->cr4 = __read_cr4();
 126         ctxt->misc_enable_saved = !rdmsrl_safe(MSR_IA32_MISC_ENABLE,
 127                                                &ctxt->misc_enable);
 128         msr_save_context(ctxt);
 129 }
 130 
 131 /* Needed by apm.c */
 132 void save_processor_state(void)
 133 {
 134         __save_processor_state(&saved_context);
 135         x86_platform.save_sched_clock_state();
 136 }
 137 #ifdef CONFIG_X86_32
 138 EXPORT_SYMBOL(save_processor_state);
 139 #endif
 140 
 141 static void do_fpu_end(void)
 142 {
 143         /*
 144          * Restore FPU regs if necessary.
 145          */
 146         kernel_fpu_end();
 147 }
 148 
 149 static void fix_processor_context(void)
 150 {
 151         int cpu = smp_processor_id();
 152 #ifdef CONFIG_X86_64
 153         struct desc_struct *desc = get_cpu_gdt_rw(cpu);
 154         tss_desc tss;
 155 #endif
 156 
 157         /*
 158          * We need to reload TR, which requires that we change the
 159          * GDT entry to indicate "available" first.
 160          *
 161          * XXX: This could probably all be replaced by a call to
 162          * force_reload_TR().
 163          */
 164         set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
 165 
 166 #ifdef CONFIG_X86_64
 167         memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc));
 168         tss.type = 0x9; /* The available 64-bit TSS (see AMD vol 2, pg 91 */
 169         write_gdt_entry(desc, GDT_ENTRY_TSS, &tss, DESC_TSS);
 170 
 171         syscall_init();                         /* This sets MSR_*STAR and related */
 172 #else
 173         if (boot_cpu_has(X86_FEATURE_SEP))
 174                 enable_sep_cpu();
 175 #endif
 176         load_TR_desc();                         /* This does ltr */
 177         load_mm_ldt(current->active_mm);        /* This does lldt */
 178         initialize_tlbstate_and_flush();
 179 
 180         fpu__resume_cpu();
 181 
 182         /* The processor is back on the direct GDT, load back the fixmap */
 183         load_fixmap_gdt(cpu);
 184 }
 185 
 186 /**
 187  * __restore_processor_state - restore the contents of CPU registers saved
 188  *                             by __save_processor_state()
 189  * @ctxt - structure to load the registers contents from
 190  *
 191  * The asm code that gets us here will have restored a usable GDT, although
 192  * it will be pointing to the wrong alias.
 193  */
 194 static void notrace __restore_processor_state(struct saved_context *ctxt)
 195 {
 196         if (ctxt->misc_enable_saved)
 197                 wrmsrl(MSR_IA32_MISC_ENABLE, ctxt->misc_enable);
 198         /*
 199          * control registers
 200          */
 201         /* cr4 was introduced in the Pentium CPU */
 202 #ifdef CONFIG_X86_32
 203         if (ctxt->cr4)
 204                 __write_cr4(ctxt->cr4);
 205 #else
 206 /* CONFIG X86_64 */
 207         wrmsrl(MSR_EFER, ctxt->efer);
 208         __write_cr4(ctxt->cr4);
 209 #endif
 210         write_cr3(ctxt->cr3);
 211         write_cr2(ctxt->cr2);
 212         write_cr0(ctxt->cr0);
 213 
 214         /* Restore the IDT. */
 215         load_idt(&ctxt->idt);
 216 
 217         /*
 218          * Just in case the asm code got us here with the SS, DS, or ES
 219          * out of sync with the GDT, update them.
 220          */
 221         loadsegment(ss, __KERNEL_DS);
 222         loadsegment(ds, __USER_DS);
 223         loadsegment(es, __USER_DS);
 224 
 225         /*
 226          * Restore percpu access.  Percpu access can happen in exception
 227          * handlers or in complicated helpers like load_gs_index().
 228          */
 229 #ifdef CONFIG_X86_64
 230         wrmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
 231 #else
 232         loadsegment(fs, __KERNEL_PERCPU);
 233         loadsegment(gs, __KERNEL_STACK_CANARY);
 234 #endif
 235 
 236         /* Restore the TSS, RO GDT, LDT, and usermode-relevant MSRs. */
 237         fix_processor_context();
 238 
 239         /*
 240          * Now that we have descriptor tables fully restored and working
 241          * exception handling, restore the usermode segments.
 242          */
 243 #ifdef CONFIG_X86_64
 244         loadsegment(ds, ctxt->es);
 245         loadsegment(es, ctxt->es);
 246         loadsegment(fs, ctxt->fs);
 247         load_gs_index(ctxt->gs);
 248 
 249         /*
 250          * Restore FSBASE and GSBASE after restoring the selectors, since
 251          * restoring the selectors clobbers the bases.  Keep in mind
 252          * that MSR_KERNEL_GS_BASE is horribly misnamed.
 253          */
 254         wrmsrl(MSR_FS_BASE, ctxt->fs_base);
 255         wrmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
 256 #elif defined(CONFIG_X86_32_LAZY_GS)
 257         loadsegment(gs, ctxt->gs);
 258 #endif
 259 
 260         do_fpu_end();
 261         tsc_verify_tsc_adjust(true);
 262         x86_platform.restore_sched_clock_state();
 263         mtrr_bp_restore();
 264         perf_restore_debug_store();
 265         msr_restore_context(ctxt);
 266 }
 267 
 268 /* Needed by apm.c */
 269 void notrace restore_processor_state(void)
 270 {
 271         __restore_processor_state(&saved_context);
 272 }
 273 #ifdef CONFIG_X86_32
 274 EXPORT_SYMBOL(restore_processor_state);
 275 #endif
 276 
 277 #if defined(CONFIG_HIBERNATION) && defined(CONFIG_HOTPLUG_CPU)
 278 static void resume_play_dead(void)
 279 {
 280         play_dead_common();
 281         tboot_shutdown(TB_SHUTDOWN_WFS);
 282         hlt_play_dead();
 283 }
 284 
 285 int hibernate_resume_nonboot_cpu_disable(void)
 286 {
 287         void (*play_dead)(void) = smp_ops.play_dead;
 288         int ret;
 289 
 290         /*
 291          * Ensure that MONITOR/MWAIT will not be used in the "play dead" loop
 292          * during hibernate image restoration, because it is likely that the
 293          * monitored address will be actually written to at that time and then
 294          * the "dead" CPU will attempt to execute instructions again, but the
 295          * address in its instruction pointer may not be possible to resolve
 296          * any more at that point (the page tables used by it previously may
 297          * have been overwritten by hibernate image data).
 298          *
 299          * First, make sure that we wake up all the potentially disabled SMT
 300          * threads which have been initially brought up and then put into
 301          * mwait/cpuidle sleep.
 302          * Those will be put to proper (not interfering with hibernation
 303          * resume) sleep afterwards, and the resumed kernel will decide itself
 304          * what to do with them.
 305          */
 306         ret = cpuhp_smt_enable();
 307         if (ret)
 308                 return ret;
 309         smp_ops.play_dead = resume_play_dead;
 310         ret = disable_nonboot_cpus();
 311         smp_ops.play_dead = play_dead;
 312         return ret;
 313 }
 314 #endif
 315 
 316 /*
 317  * When bsp_check() is called in hibernate and suspend, cpu hotplug
 318  * is disabled already. So it's unnessary to handle race condition between
 319  * cpumask query and cpu hotplug.
 320  */
 321 static int bsp_check(void)
 322 {
 323         if (cpumask_first(cpu_online_mask) != 0) {
 324                 pr_warn("CPU0 is offline.\n");
 325                 return -ENODEV;
 326         }
 327 
 328         return 0;
 329 }
 330 
 331 static int bsp_pm_callback(struct notifier_block *nb, unsigned long action,
 332                            void *ptr)
 333 {
 334         int ret = 0;
 335 
 336         switch (action) {
 337         case PM_SUSPEND_PREPARE:
 338         case PM_HIBERNATION_PREPARE:
 339                 ret = bsp_check();
 340                 break;
 341 #ifdef CONFIG_DEBUG_HOTPLUG_CPU0
 342         case PM_RESTORE_PREPARE:
 343                 /*
 344                  * When system resumes from hibernation, online CPU0 because
 345                  * 1. it's required for resume and
 346                  * 2. the CPU was online before hibernation
 347                  */
 348                 if (!cpu_online(0))
 349                         _debug_hotplug_cpu(0, 1);
 350                 break;
 351         case PM_POST_RESTORE:
 352                 /*
 353                  * When a resume really happens, this code won't be called.
 354                  *
 355                  * This code is called only when user space hibernation software
 356                  * prepares for snapshot device during boot time. So we just
 357                  * call _debug_hotplug_cpu() to restore to CPU0's state prior to
 358                  * preparing the snapshot device.
 359                  *
 360                  * This works for normal boot case in our CPU0 hotplug debug
 361                  * mode, i.e. CPU0 is offline and user mode hibernation
 362                  * software initializes during boot time.
 363                  *
 364                  * If CPU0 is online and user application accesses snapshot
 365                  * device after boot time, this will offline CPU0 and user may
 366                  * see different CPU0 state before and after accessing
 367                  * the snapshot device. But hopefully this is not a case when
 368                  * user debugging CPU0 hotplug. Even if users hit this case,
 369                  * they can easily online CPU0 back.
 370                  *
 371                  * To simplify this debug code, we only consider normal boot
 372                  * case. Otherwise we need to remember CPU0's state and restore
 373                  * to that state and resolve racy conditions etc.
 374                  */
 375                 _debug_hotplug_cpu(0, 0);
 376                 break;
 377 #endif
 378         default:
 379                 break;
 380         }
 381         return notifier_from_errno(ret);
 382 }
 383 
 384 static int __init bsp_pm_check_init(void)
 385 {
 386         /*
 387          * Set this bsp_pm_callback as lower priority than
 388          * cpu_hotplug_pm_callback. So cpu_hotplug_pm_callback will be called
 389          * earlier to disable cpu hotplug before bsp online check.
 390          */
 391         pm_notifier(bsp_pm_callback, -INT_MAX);
 392         return 0;
 393 }
 394 
 395 core_initcall(bsp_pm_check_init);
 396 
 397 static int msr_build_context(const u32 *msr_id, const int num)
 398 {
 399         struct saved_msrs *saved_msrs = &saved_context.saved_msrs;
 400         struct saved_msr *msr_array;
 401         int total_num;
 402         int i, j;
 403 
 404         total_num = saved_msrs->num + num;
 405 
 406         msr_array = kmalloc_array(total_num, sizeof(struct saved_msr), GFP_KERNEL);
 407         if (!msr_array) {
 408                 pr_err("x86/pm: Can not allocate memory to save/restore MSRs during suspend.\n");
 409                 return -ENOMEM;
 410         }
 411 
 412         if (saved_msrs->array) {
 413                 /*
 414                  * Multiple callbacks can invoke this function, so copy any
 415                  * MSR save requests from previous invocations.
 416                  */
 417                 memcpy(msr_array, saved_msrs->array,
 418                        sizeof(struct saved_msr) * saved_msrs->num);
 419 
 420                 kfree(saved_msrs->array);
 421         }
 422 
 423         for (i = saved_msrs->num, j = 0; i < total_num; i++, j++) {
 424                 msr_array[i].info.msr_no        = msr_id[j];
 425                 msr_array[i].valid              = false;
 426                 msr_array[i].info.reg.q         = 0;
 427         }
 428         saved_msrs->num   = total_num;
 429         saved_msrs->array = msr_array;
 430 
 431         return 0;
 432 }
 433 
 434 /*
 435  * The following sections are a quirk framework for problematic BIOSen:
 436  * Sometimes MSRs are modified by the BIOSen after suspended to
 437  * RAM, this might cause unexpected behavior after wakeup.
 438  * Thus we save/restore these specified MSRs across suspend/resume
 439  * in order to work around it.
 440  *
 441  * For any further problematic BIOSen/platforms,
 442  * please add your own function similar to msr_initialize_bdw.
 443  */
 444 static int msr_initialize_bdw(const struct dmi_system_id *d)
 445 {
 446         /* Add any extra MSR ids into this array. */
 447         u32 bdw_msr_id[] = { MSR_IA32_THERM_CONTROL };
 448 
 449         pr_info("x86/pm: %s detected, MSR saving is needed during suspending.\n", d->ident);
 450         return msr_build_context(bdw_msr_id, ARRAY_SIZE(bdw_msr_id));
 451 }
 452 
 453 static const struct dmi_system_id msr_save_dmi_table[] = {
 454         {
 455          .callback = msr_initialize_bdw,
 456          .ident = "BROADWELL BDX_EP",
 457          .matches = {
 458                 DMI_MATCH(DMI_PRODUCT_NAME, "GRANTLEY"),
 459                 DMI_MATCH(DMI_PRODUCT_VERSION, "E63448-400"),
 460                 },
 461         },
 462         {}
 463 };
 464 
 465 static int msr_save_cpuid_features(const struct x86_cpu_id *c)
 466 {
 467         u32 cpuid_msr_id[] = {
 468                 MSR_AMD64_CPUID_FN_1,
 469         };
 470 
 471         pr_info("x86/pm: family %#hx cpu detected, MSR saving is needed during suspending.\n",
 472                 c->family);
 473 
 474         return msr_build_context(cpuid_msr_id, ARRAY_SIZE(cpuid_msr_id));
 475 }
 476 
 477 static const struct x86_cpu_id msr_save_cpu_table[] = {
 478         {
 479                 .vendor = X86_VENDOR_AMD,
 480                 .family = 0x15,
 481                 .model = X86_MODEL_ANY,
 482                 .feature = X86_FEATURE_ANY,
 483                 .driver_data = (kernel_ulong_t)msr_save_cpuid_features,
 484         },
 485         {
 486                 .vendor = X86_VENDOR_AMD,
 487                 .family = 0x16,
 488                 .model = X86_MODEL_ANY,
 489                 .feature = X86_FEATURE_ANY,
 490                 .driver_data = (kernel_ulong_t)msr_save_cpuid_features,
 491         },
 492         {}
 493 };
 494 
 495 typedef int (*pm_cpu_match_t)(const struct x86_cpu_id *);
 496 static int pm_cpu_check(const struct x86_cpu_id *c)
 497 {
 498         const struct x86_cpu_id *m;
 499         int ret = 0;
 500 
 501         m = x86_match_cpu(msr_save_cpu_table);
 502         if (m) {
 503                 pm_cpu_match_t fn;
 504 
 505                 fn = (pm_cpu_match_t)m->driver_data;
 506                 ret = fn(m);
 507         }
 508 
 509         return ret;
 510 }
 511 
 512 static int pm_check_save_msr(void)
 513 {
 514         dmi_check_system(msr_save_dmi_table);
 515         pm_cpu_check(msr_save_cpu_table);
 516 
 517         return 0;
 518 }
 519 
 520 device_initcall(pm_check_save_msr);

/* [<][>][^][v][top][bottom][index][help] */