root/arch/powerpc/platforms/powernv/idle.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. pnv_save_sprs_for_deep_states
  2. pnv_get_supported_cpuidle_states
  3. pnv_fastsleep_workaround_apply
  4. show_fastsleep_workaround_applyonce
  5. store_fastsleep_workaround_applyonce
  6. atomic_start_thread_idle
  7. atomic_stop_thread_idle
  8. atomic_lock_thread_idle
  9. atomic_unlock_and_stop_thread_idle
  10. atomic_unlock_thread_idle
  11. power7_idle_insn
  12. power7_offline
  13. power7_idle_type
  14. power7_idle
  15. power9_idle_stop
  16. power9_offline_stop
  17. power9_idle_type
  18. power9_idle
  19. pnv_power9_force_smt4_catch
  20. pnv_power9_force_smt4_release
  21. pnv_program_cpu_hotplug_lpcr
  22. pnv_cpu_offline
  23. validate_psscr_val_mask
  24. pnv_power9_idle_init
  25. pnv_disable_deep_states
  26. pnv_probe_idle_states
  27. pnv_parse_cpuidle_dt
  28. pnv_init_idle_states

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * PowerNV cpuidle code
   4  *
   5  * Copyright 2015 IBM Corp.
   6  */
   7 
   8 #include <linux/types.h>
   9 #include <linux/mm.h>
  10 #include <linux/slab.h>
  11 #include <linux/of.h>
  12 #include <linux/device.h>
  13 #include <linux/cpu.h>
  14 
  15 #include <asm/asm-prototypes.h>
  16 #include <asm/firmware.h>
  17 #include <asm/machdep.h>
  18 #include <asm/opal.h>
  19 #include <asm/cputhreads.h>
  20 #include <asm/cpuidle.h>
  21 #include <asm/code-patching.h>
  22 #include <asm/smp.h>
  23 #include <asm/runlatch.h>
  24 #include <asm/dbell.h>
  25 
  26 #include "powernv.h"
  27 #include "subcore.h"
  28 
  29 /* Power ISA 3.0 allows for stop states 0x0 - 0xF */
  30 #define MAX_STOP_STATE  0xF
  31 
  32 #define P9_STOP_SPR_MSR 2000
  33 #define P9_STOP_SPR_PSSCR      855
  34 
  35 static u32 supported_cpuidle_states;
  36 struct pnv_idle_states_t *pnv_idle_states;
  37 int nr_pnv_idle_states;
  38 
  39 /*
  40  * The default stop state that will be used by ppc_md.power_save
  41  * function on platforms that support stop instruction.
  42  */
  43 static u64 pnv_default_stop_val;
  44 static u64 pnv_default_stop_mask;
  45 static bool default_stop_found;
  46 
  47 /*
  48  * First stop state levels when SPR and TB loss can occur.
  49  */
  50 static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
  51 static u64 pnv_first_spr_loss_level = MAX_STOP_STATE + 1;
  52 
  53 /*
  54  * psscr value and mask of the deepest stop idle state.
  55  * Used when a cpu is offlined.
  56  */
  57 static u64 pnv_deepest_stop_psscr_val;
  58 static u64 pnv_deepest_stop_psscr_mask;
  59 static u64 pnv_deepest_stop_flag;
  60 static bool deepest_stop_found;
  61 
  62 static unsigned long power7_offline_type;
  63 
  64 static int pnv_save_sprs_for_deep_states(void)
  65 {
  66         int cpu;
  67         int rc;
  68 
  69         /*
  70          * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across
  71          * all cpus at boot. Get these reg values of current cpu and use the
  72          * same across all cpus.
  73          */
  74         uint64_t lpcr_val       = mfspr(SPRN_LPCR);
  75         uint64_t hid0_val       = mfspr(SPRN_HID0);
  76         uint64_t hid1_val       = mfspr(SPRN_HID1);
  77         uint64_t hid4_val       = mfspr(SPRN_HID4);
  78         uint64_t hid5_val       = mfspr(SPRN_HID5);
  79         uint64_t hmeer_val      = mfspr(SPRN_HMEER);
  80         uint64_t msr_val = MSR_IDLE;
  81         uint64_t psscr_val = pnv_deepest_stop_psscr_val;
  82 
  83         for_each_present_cpu(cpu) {
  84                 uint64_t pir = get_hard_smp_processor_id(cpu);
  85                 uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu];
  86 
  87                 rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
  88                 if (rc != 0)
  89                         return rc;
  90 
  91                 rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
  92                 if (rc != 0)
  93                         return rc;
  94 
  95                 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
  96                         rc = opal_slw_set_reg(pir, P9_STOP_SPR_MSR, msr_val);
  97                         if (rc)
  98                                 return rc;
  99 
 100                         rc = opal_slw_set_reg(pir,
 101                                               P9_STOP_SPR_PSSCR, psscr_val);
 102 
 103                         if (rc)
 104                                 return rc;
 105                 }
 106 
 107                 /* HIDs are per core registers */
 108                 if (cpu_thread_in_core(cpu) == 0) {
 109 
 110                         rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
 111                         if (rc != 0)
 112                                 return rc;
 113 
 114                         rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
 115                         if (rc != 0)
 116                                 return rc;
 117 
 118                         /* Only p8 needs to set extra HID regiters */
 119                         if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
 120 
 121                                 rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
 122                                 if (rc != 0)
 123                                         return rc;
 124 
 125                                 rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
 126                                 if (rc != 0)
 127                                         return rc;
 128 
 129                                 rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
 130                                 if (rc != 0)
 131                                         return rc;
 132                         }
 133                 }
 134         }
 135 
 136         return 0;
 137 }
 138 
 139 u32 pnv_get_supported_cpuidle_states(void)
 140 {
 141         return supported_cpuidle_states;
 142 }
 143 EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
 144 
 145 static void pnv_fastsleep_workaround_apply(void *info)
 146 
 147 {
 148         int rc;
 149         int *err = info;
 150 
 151         rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
 152                                         OPAL_CONFIG_IDLE_APPLY);
 153         if (rc)
 154                 *err = 1;
 155 }
 156 
 157 static bool power7_fastsleep_workaround_entry = true;
 158 static bool power7_fastsleep_workaround_exit = true;
 159 
 160 /*
 161  * Used to store fastsleep workaround state
 162  * 0 - Workaround applied/undone at fastsleep entry/exit path (Default)
 163  * 1 - Workaround applied once, never undone.
 164  */
 165 static u8 fastsleep_workaround_applyonce;
 166 
 167 static ssize_t show_fastsleep_workaround_applyonce(struct device *dev,
 168                 struct device_attribute *attr, char *buf)
 169 {
 170         return sprintf(buf, "%u\n", fastsleep_workaround_applyonce);
 171 }
 172 
 173 static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
 174                 struct device_attribute *attr, const char *buf,
 175                 size_t count)
 176 {
 177         cpumask_t primary_thread_mask;
 178         int err;
 179         u8 val;
 180 
 181         if (kstrtou8(buf, 0, &val) || val != 1)
 182                 return -EINVAL;
 183 
 184         if (fastsleep_workaround_applyonce == 1)
 185                 return count;
 186 
 187         /*
 188          * fastsleep_workaround_applyonce = 1 implies
 189          * fastsleep workaround needs to be left in 'applied' state on all
 190          * the cores. Do this by-
 191          * 1. Disable the 'undo' workaround in fastsleep exit path
 192          * 2. Sendi IPIs to all the cores which have at least one online thread
 193          * 3. Disable the 'apply' workaround in fastsleep entry path
 194          *
 195          * There is no need to send ipi to cores which have all threads
 196          * offlined, as last thread of the core entering fastsleep or deeper
 197          * state would have applied workaround.
 198          */
 199         power7_fastsleep_workaround_exit = false;
 200 
 201         get_online_cpus();
 202         primary_thread_mask = cpu_online_cores_map();
 203         on_each_cpu_mask(&primary_thread_mask,
 204                                 pnv_fastsleep_workaround_apply,
 205                                 &err, 1);
 206         put_online_cpus();
 207         if (err) {
 208                 pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply");
 209                 goto fail;
 210         }
 211 
 212         power7_fastsleep_workaround_entry = false;
 213 
 214         fastsleep_workaround_applyonce = 1;
 215 
 216         return count;
 217 fail:
 218         return -EIO;
 219 }
 220 
 221 static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
 222                         show_fastsleep_workaround_applyonce,
 223                         store_fastsleep_workaround_applyonce);
 224 
 225 static inline void atomic_start_thread_idle(void)
 226 {
 227         int cpu = raw_smp_processor_id();
 228         int first = cpu_first_thread_sibling(cpu);
 229         int thread_nr = cpu_thread_in_core(cpu);
 230         unsigned long *state = &paca_ptrs[first]->idle_state;
 231 
 232         clear_bit(thread_nr, state);
 233 }
 234 
 235 static inline void atomic_stop_thread_idle(void)
 236 {
 237         int cpu = raw_smp_processor_id();
 238         int first = cpu_first_thread_sibling(cpu);
 239         int thread_nr = cpu_thread_in_core(cpu);
 240         unsigned long *state = &paca_ptrs[first]->idle_state;
 241 
 242         set_bit(thread_nr, state);
 243 }
 244 
 245 static inline void atomic_lock_thread_idle(void)
 246 {
 247         int cpu = raw_smp_processor_id();
 248         int first = cpu_first_thread_sibling(cpu);
 249         unsigned long *state = &paca_ptrs[first]->idle_state;
 250 
 251         while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, state)))
 252                 barrier();
 253 }
 254 
 255 static inline void atomic_unlock_and_stop_thread_idle(void)
 256 {
 257         int cpu = raw_smp_processor_id();
 258         int first = cpu_first_thread_sibling(cpu);
 259         unsigned long thread = 1UL << cpu_thread_in_core(cpu);
 260         unsigned long *state = &paca_ptrs[first]->idle_state;
 261         u64 s = READ_ONCE(*state);
 262         u64 new, tmp;
 263 
 264         BUG_ON(!(s & PNV_CORE_IDLE_LOCK_BIT));
 265         BUG_ON(s & thread);
 266 
 267 again:
 268         new = (s | thread) & ~PNV_CORE_IDLE_LOCK_BIT;
 269         tmp = cmpxchg(state, s, new);
 270         if (unlikely(tmp != s)) {
 271                 s = tmp;
 272                 goto again;
 273         }
 274 }
 275 
 276 static inline void atomic_unlock_thread_idle(void)
 277 {
 278         int cpu = raw_smp_processor_id();
 279         int first = cpu_first_thread_sibling(cpu);
 280         unsigned long *state = &paca_ptrs[first]->idle_state;
 281 
 282         BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, state));
 283         clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, state);
 284 }
 285 
 286 /* P7 and P8 */
 287 struct p7_sprs {
 288         /* per core */
 289         u64 tscr;
 290         u64 worc;
 291 
 292         /* per subcore */
 293         u64 sdr1;
 294         u64 rpr;
 295 
 296         /* per thread */
 297         u64 lpcr;
 298         u64 hfscr;
 299         u64 fscr;
 300         u64 purr;
 301         u64 spurr;
 302         u64 dscr;
 303         u64 wort;
 304 
 305         /* per thread SPRs that get lost in shallow states */
 306         u64 amr;
 307         u64 iamr;
 308         u64 amor;
 309         u64 uamor;
 310 };
 311 
 312 static unsigned long power7_idle_insn(unsigned long type)
 313 {
 314         int cpu = raw_smp_processor_id();
 315         int first = cpu_first_thread_sibling(cpu);
 316         unsigned long *state = &paca_ptrs[first]->idle_state;
 317         unsigned long thread = 1UL << cpu_thread_in_core(cpu);
 318         unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
 319         unsigned long srr1;
 320         bool full_winkle;
 321         struct p7_sprs sprs = {}; /* avoid false use-uninitialised */
 322         bool sprs_saved = false;
 323         int rc;
 324 
 325         if (unlikely(type != PNV_THREAD_NAP)) {
 326                 atomic_lock_thread_idle();
 327 
 328                 BUG_ON(!(*state & thread));
 329                 *state &= ~thread;
 330 
 331                 if (power7_fastsleep_workaround_entry) {
 332                         if ((*state & core_thread_mask) == 0) {
 333                                 rc = opal_config_cpu_idle_state(
 334                                                 OPAL_CONFIG_IDLE_FASTSLEEP,
 335                                                 OPAL_CONFIG_IDLE_APPLY);
 336                                 BUG_ON(rc);
 337                         }
 338                 }
 339 
 340                 if (type == PNV_THREAD_WINKLE) {
 341                         sprs.tscr       = mfspr(SPRN_TSCR);
 342                         sprs.worc       = mfspr(SPRN_WORC);
 343 
 344                         sprs.sdr1       = mfspr(SPRN_SDR1);
 345                         sprs.rpr        = mfspr(SPRN_RPR);
 346 
 347                         sprs.lpcr       = mfspr(SPRN_LPCR);
 348                         if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
 349                                 sprs.hfscr      = mfspr(SPRN_HFSCR);
 350                                 sprs.fscr       = mfspr(SPRN_FSCR);
 351                         }
 352                         sprs.purr       = mfspr(SPRN_PURR);
 353                         sprs.spurr      = mfspr(SPRN_SPURR);
 354                         sprs.dscr       = mfspr(SPRN_DSCR);
 355                         sprs.wort       = mfspr(SPRN_WORT);
 356 
 357                         sprs_saved = true;
 358 
 359                         /*
 360                          * Increment winkle counter and set all winkle bits if
 361                          * all threads are winkling. This allows wakeup side to
 362                          * distinguish between fast sleep and winkle state
 363                          * loss. Fast sleep still has to resync the timebase so
 364                          * this may not be a really big win.
 365                          */
 366                         *state += 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
 367                         if ((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS)
 368                                         >> PNV_CORE_IDLE_WINKLE_COUNT_SHIFT
 369                                         == threads_per_core)
 370                                 *state |= PNV_CORE_IDLE_THREAD_WINKLE_BITS;
 371                         WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
 372                 }
 373 
 374                 atomic_unlock_thread_idle();
 375         }
 376 
 377         if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
 378                 sprs.amr        = mfspr(SPRN_AMR);
 379                 sprs.iamr       = mfspr(SPRN_IAMR);
 380                 sprs.amor       = mfspr(SPRN_AMOR);
 381                 sprs.uamor      = mfspr(SPRN_UAMOR);
 382         }
 383 
 384         local_paca->thread_idle_state = type;
 385         srr1 = isa206_idle_insn_mayloss(type);          /* go idle */
 386         local_paca->thread_idle_state = PNV_THREAD_RUNNING;
 387 
 388         WARN_ON_ONCE(!srr1);
 389         WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
 390 
 391         if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
 392                 if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) {
 393                         /*
 394                          * We don't need an isync after the mtsprs here because
 395                          * the upcoming mtmsrd is execution synchronizing.
 396                          */
 397                         mtspr(SPRN_AMR,         sprs.amr);
 398                         mtspr(SPRN_IAMR,        sprs.iamr);
 399                         mtspr(SPRN_AMOR,        sprs.amor);
 400                         mtspr(SPRN_UAMOR,       sprs.uamor);
 401                 }
 402         }
 403 
 404         if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
 405                 hmi_exception_realmode(NULL);
 406 
 407         if (likely((srr1 & SRR1_WAKESTATE) != SRR1_WS_HVLOSS)) {
 408                 if (unlikely(type != PNV_THREAD_NAP)) {
 409                         atomic_lock_thread_idle();
 410                         if (type == PNV_THREAD_WINKLE) {
 411                                 WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
 412                                 *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
 413                                 *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT);
 414                         }
 415                         atomic_unlock_and_stop_thread_idle();
 416                 }
 417                 return srr1;
 418         }
 419 
 420         /* HV state loss */
 421         BUG_ON(type == PNV_THREAD_NAP);
 422 
 423         atomic_lock_thread_idle();
 424 
 425         full_winkle = false;
 426         if (type == PNV_THREAD_WINKLE) {
 427                 WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
 428                 *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
 429                 if (*state & (thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT)) {
 430                         *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT);
 431                         full_winkle = true;
 432                         BUG_ON(!sprs_saved);
 433                 }
 434         }
 435 
 436         WARN_ON(*state & thread);
 437 
 438         if ((*state & core_thread_mask) != 0)
 439                 goto core_woken;
 440 
 441         /* Per-core SPRs */
 442         if (full_winkle) {
 443                 mtspr(SPRN_TSCR,        sprs.tscr);
 444                 mtspr(SPRN_WORC,        sprs.worc);
 445         }
 446 
 447         if (power7_fastsleep_workaround_exit) {
 448                 rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
 449                                                 OPAL_CONFIG_IDLE_UNDO);
 450                 BUG_ON(rc);
 451         }
 452 
 453         /* TB */
 454         if (opal_resync_timebase() != OPAL_SUCCESS)
 455                 BUG();
 456 
 457 core_woken:
 458         if (!full_winkle)
 459                 goto subcore_woken;
 460 
 461         if ((*state & local_paca->subcore_sibling_mask) != 0)
 462                 goto subcore_woken;
 463 
 464         /* Per-subcore SPRs */
 465         mtspr(SPRN_SDR1,        sprs.sdr1);
 466         mtspr(SPRN_RPR,         sprs.rpr);
 467 
 468 subcore_woken:
 469         /*
 470          * isync after restoring shared SPRs and before unlocking. Unlock
 471          * only contains hwsync which does not necessarily do the right
 472          * thing for SPRs.
 473          */
 474         isync();
 475         atomic_unlock_and_stop_thread_idle();
 476 
 477         /* Fast sleep does not lose SPRs */
 478         if (!full_winkle)
 479                 return srr1;
 480 
 481         /* Per-thread SPRs */
 482         mtspr(SPRN_LPCR,        sprs.lpcr);
 483         if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
 484                 mtspr(SPRN_HFSCR,       sprs.hfscr);
 485                 mtspr(SPRN_FSCR,        sprs.fscr);
 486         }
 487         mtspr(SPRN_PURR,        sprs.purr);
 488         mtspr(SPRN_SPURR,       sprs.spurr);
 489         mtspr(SPRN_DSCR,        sprs.dscr);
 490         mtspr(SPRN_WORT,        sprs.wort);
 491 
 492         mtspr(SPRN_SPRG3,       local_paca->sprg_vdso);
 493 
 494         /*
 495          * The SLB has to be restored here, but it sometimes still
 496          * contains entries, so the __ variant must be used to prevent
 497          * multi hits.
 498          */
 499         __slb_restore_bolted_realmode();
 500 
 501         return srr1;
 502 }
 503 
 504 extern unsigned long idle_kvm_start_guest(unsigned long srr1);
 505 
 506 #ifdef CONFIG_HOTPLUG_CPU
 507 static unsigned long power7_offline(void)
 508 {
 509         unsigned long srr1;
 510 
 511         mtmsr(MSR_IDLE);
 512 
 513 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 514         /* Tell KVM we're entering idle. */
 515         /******************************************************/
 516         /*  N O T E   W E L L    ! ! !    N O T E   W E L L   */
 517         /* The following store to HSTATE_HWTHREAD_STATE(r13)  */
 518         /* MUST occur in real mode, i.e. with the MMU off,    */
 519         /* and the MMU must stay off until we clear this flag */
 520         /* and test HSTATE_HWTHREAD_REQ(r13) in               */
 521         /* pnv_powersave_wakeup in this file.                 */
 522         /* The reason is that another thread can switch the   */
 523         /* MMU to a guest context whenever this flag is set   */
 524         /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on,    */
 525         /* that would potentially cause this thread to start  */
 526         /* executing instructions from guest memory in        */
 527         /* hypervisor mode, leading to a host crash or data   */
 528         /* corruption, or worse.                              */
 529         /******************************************************/
 530         local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
 531 #endif
 532 
 533         __ppc64_runlatch_off();
 534         srr1 = power7_idle_insn(power7_offline_type);
 535         __ppc64_runlatch_on();
 536 
 537 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 538         local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
 539         /* Order setting hwthread_state vs. testing hwthread_req */
 540         smp_mb();
 541         if (local_paca->kvm_hstate.hwthread_req)
 542                 srr1 = idle_kvm_start_guest(srr1);
 543 #endif
 544 
 545         mtmsr(MSR_KERNEL);
 546 
 547         return srr1;
 548 }
 549 #endif
 550 
 551 void power7_idle_type(unsigned long type)
 552 {
 553         unsigned long srr1;
 554 
 555         if (!prep_irq_for_idle_irqsoff())
 556                 return;
 557 
 558         mtmsr(MSR_IDLE);
 559         __ppc64_runlatch_off();
 560         srr1 = power7_idle_insn(type);
 561         __ppc64_runlatch_on();
 562         mtmsr(MSR_KERNEL);
 563 
 564         fini_irq_for_idle_irqsoff();
 565         irq_set_pending_from_srr1(srr1);
 566 }
 567 
 568 void power7_idle(void)
 569 {
 570         if (!powersave_nap)
 571                 return;
 572 
 573         power7_idle_type(PNV_THREAD_NAP);
 574 }
 575 
 576 struct p9_sprs {
 577         /* per core */
 578         u64 ptcr;
 579         u64 rpr;
 580         u64 tscr;
 581         u64 ldbar;
 582 
 583         /* per thread */
 584         u64 lpcr;
 585         u64 hfscr;
 586         u64 fscr;
 587         u64 pid;
 588         u64 purr;
 589         u64 spurr;
 590         u64 dscr;
 591         u64 wort;
 592 
 593         u64 mmcra;
 594         u32 mmcr0;
 595         u32 mmcr1;
 596         u64 mmcr2;
 597 
 598         /* per thread SPRs that get lost in shallow states */
 599         u64 amr;
 600         u64 iamr;
 601         u64 amor;
 602         u64 uamor;
 603 };
 604 
 605 static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
 606 {
 607         int cpu = raw_smp_processor_id();
 608         int first = cpu_first_thread_sibling(cpu);
 609         unsigned long *state = &paca_ptrs[first]->idle_state;
 610         unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
 611         unsigned long srr1;
 612         unsigned long pls;
 613         unsigned long mmcr0 = 0;
 614         struct p9_sprs sprs = {}; /* avoid false used-uninitialised */
 615         bool sprs_saved = false;
 616 
 617         if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
 618                 /* EC=ESL=0 case */
 619 
 620                 BUG_ON(!mmu_on);
 621 
 622                 /*
 623                  * Wake synchronously. SRESET via xscom may still cause
 624                  * a 0x100 powersave wakeup with SRR1 reason!
 625                  */
 626                 srr1 = isa300_idle_stop_noloss(psscr);          /* go idle */
 627                 if (likely(!srr1))
 628                         return 0;
 629 
 630                 /*
 631                  * Registers not saved, can't recover!
 632                  * This would be a hardware bug
 633                  */
 634                 BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS);
 635 
 636                 goto out;
 637         }
 638 
 639         /* EC=ESL=1 case */
 640 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 641         if (cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG)) {
 642                 local_paca->requested_psscr = psscr;
 643                 /* order setting requested_psscr vs testing dont_stop */
 644                 smp_mb();
 645                 if (atomic_read(&local_paca->dont_stop)) {
 646                         local_paca->requested_psscr = 0;
 647                         return 0;
 648                 }
 649         }
 650 #endif
 651 
 652         if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
 653                  /*
 654                   * POWER9 DD2 can incorrectly set PMAO when waking up
 655                   * after a state-loss idle. Saving and restoring MMCR0
 656                   * over idle is a workaround.
 657                   */
 658                 mmcr0           = mfspr(SPRN_MMCR0);
 659         }
 660         if ((psscr & PSSCR_RL_MASK) >= pnv_first_spr_loss_level) {
 661                 sprs.lpcr       = mfspr(SPRN_LPCR);
 662                 sprs.hfscr      = mfspr(SPRN_HFSCR);
 663                 sprs.fscr       = mfspr(SPRN_FSCR);
 664                 sprs.pid        = mfspr(SPRN_PID);
 665                 sprs.purr       = mfspr(SPRN_PURR);
 666                 sprs.spurr      = mfspr(SPRN_SPURR);
 667                 sprs.dscr       = mfspr(SPRN_DSCR);
 668                 sprs.wort       = mfspr(SPRN_WORT);
 669 
 670                 sprs.mmcra      = mfspr(SPRN_MMCRA);
 671                 sprs.mmcr0      = mfspr(SPRN_MMCR0);
 672                 sprs.mmcr1      = mfspr(SPRN_MMCR1);
 673                 sprs.mmcr2      = mfspr(SPRN_MMCR2);
 674 
 675                 sprs.ptcr       = mfspr(SPRN_PTCR);
 676                 sprs.rpr        = mfspr(SPRN_RPR);
 677                 sprs.tscr       = mfspr(SPRN_TSCR);
 678                 if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
 679                         sprs.ldbar = mfspr(SPRN_LDBAR);
 680 
 681                 sprs_saved = true;
 682 
 683                 atomic_start_thread_idle();
 684         }
 685 
 686         sprs.amr        = mfspr(SPRN_AMR);
 687         sprs.iamr       = mfspr(SPRN_IAMR);
 688         sprs.amor       = mfspr(SPRN_AMOR);
 689         sprs.uamor      = mfspr(SPRN_UAMOR);
 690 
 691         srr1 = isa300_idle_stop_mayloss(psscr);         /* go idle */
 692 
 693 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 694         local_paca->requested_psscr = 0;
 695 #endif
 696 
 697         psscr = mfspr(SPRN_PSSCR);
 698 
 699         WARN_ON_ONCE(!srr1);
 700         WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
 701 
 702         if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) {
 703                 unsigned long mmcra;
 704 
 705                 /*
 706                  * We don't need an isync after the mtsprs here because the
 707                  * upcoming mtmsrd is execution synchronizing.
 708                  */
 709                 mtspr(SPRN_AMR,         sprs.amr);
 710                 mtspr(SPRN_IAMR,        sprs.iamr);
 711                 mtspr(SPRN_AMOR,        sprs.amor);
 712                 mtspr(SPRN_UAMOR,       sprs.uamor);
 713 
 714                 /*
 715                  * Workaround for POWER9 DD2.0, if we lost resources, the ERAT
 716                  * might have been corrupted and needs flushing. We also need
 717                  * to reload MMCR0 (see mmcr0 comment above).
 718                  */
 719                 if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
 720                         asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT);
 721                         mtspr(SPRN_MMCR0, mmcr0);
 722                 }
 723 
 724                 /*
 725                  * DD2.2 and earlier need to set then clear bit 60 in MMCRA
 726                  * to ensure the PMU starts running.
 727                  */
 728                 mmcra = mfspr(SPRN_MMCRA);
 729                 mmcra |= PPC_BIT(60);
 730                 mtspr(SPRN_MMCRA, mmcra);
 731                 mmcra &= ~PPC_BIT(60);
 732                 mtspr(SPRN_MMCRA, mmcra);
 733         }
 734 
 735         if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
 736                 hmi_exception_realmode(NULL);
 737 
 738         /*
 739          * On POWER9, SRR1 bits do not match exactly as expected.
 740          * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so
 741          * just always test PSSCR for SPR/TB state loss.
 742          */
 743         pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT;
 744         if (likely(pls < pnv_first_spr_loss_level)) {
 745                 if (sprs_saved)
 746                         atomic_stop_thread_idle();
 747                 goto out;
 748         }
 749 
 750         /* HV state loss */
 751         BUG_ON(!sprs_saved);
 752 
 753         atomic_lock_thread_idle();
 754 
 755         if ((*state & core_thread_mask) != 0)
 756                 goto core_woken;
 757 
 758         /* Per-core SPRs */
 759         mtspr(SPRN_PTCR,        sprs.ptcr);
 760         mtspr(SPRN_RPR,         sprs.rpr);
 761         mtspr(SPRN_TSCR,        sprs.tscr);
 762 
 763         if (pls >= pnv_first_tb_loss_level) {
 764                 /* TB loss */
 765                 if (opal_resync_timebase() != OPAL_SUCCESS)
 766                         BUG();
 767         }
 768 
 769         /*
 770          * isync after restoring shared SPRs and before unlocking. Unlock
 771          * only contains hwsync which does not necessarily do the right
 772          * thing for SPRs.
 773          */
 774         isync();
 775 
 776 core_woken:
 777         atomic_unlock_and_stop_thread_idle();
 778 
 779         /* Per-thread SPRs */
 780         mtspr(SPRN_LPCR,        sprs.lpcr);
 781         mtspr(SPRN_HFSCR,       sprs.hfscr);
 782         mtspr(SPRN_FSCR,        sprs.fscr);
 783         mtspr(SPRN_PID,         sprs.pid);
 784         mtspr(SPRN_PURR,        sprs.purr);
 785         mtspr(SPRN_SPURR,       sprs.spurr);
 786         mtspr(SPRN_DSCR,        sprs.dscr);
 787         mtspr(SPRN_WORT,        sprs.wort);
 788 
 789         mtspr(SPRN_MMCRA,       sprs.mmcra);
 790         mtspr(SPRN_MMCR0,       sprs.mmcr0);
 791         mtspr(SPRN_MMCR1,       sprs.mmcr1);
 792         mtspr(SPRN_MMCR2,       sprs.mmcr2);
 793         if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
 794                 mtspr(SPRN_LDBAR, sprs.ldbar);
 795 
 796         mtspr(SPRN_SPRG3,       local_paca->sprg_vdso);
 797 
 798         if (!radix_enabled())
 799                 __slb_restore_bolted_realmode();
 800 
 801 out:
 802         if (mmu_on)
 803                 mtmsr(MSR_KERNEL);
 804 
 805         return srr1;
 806 }
 807 
 808 #ifdef CONFIG_HOTPLUG_CPU
 809 static unsigned long power9_offline_stop(unsigned long psscr)
 810 {
 811         unsigned long srr1;
 812 
 813 #ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 814         __ppc64_runlatch_off();
 815         srr1 = power9_idle_stop(psscr, true);
 816         __ppc64_runlatch_on();
 817 #else
 818         /*
 819          * Tell KVM we're entering idle.
 820          * This does not have to be done in real mode because the P9 MMU
 821          * is independent per-thread. Some steppings share radix/hash mode
 822          * between threads, but in that case KVM has a barrier sync in real
 823          * mode before and after switching between radix and hash.
 824          *
 825          * kvm_start_guest must still be called in real mode though, hence
 826          * the false argument.
 827          */
 828         local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
 829 
 830         __ppc64_runlatch_off();
 831         srr1 = power9_idle_stop(psscr, false);
 832         __ppc64_runlatch_on();
 833 
 834         local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
 835         /* Order setting hwthread_state vs. testing hwthread_req */
 836         smp_mb();
 837         if (local_paca->kvm_hstate.hwthread_req)
 838                 srr1 = idle_kvm_start_guest(srr1);
 839         mtmsr(MSR_KERNEL);
 840 #endif
 841 
 842         return srr1;
 843 }
 844 #endif
 845 
 846 void power9_idle_type(unsigned long stop_psscr_val,
 847                                       unsigned long stop_psscr_mask)
 848 {
 849         unsigned long psscr;
 850         unsigned long srr1;
 851 
 852         if (!prep_irq_for_idle_irqsoff())
 853                 return;
 854 
 855         psscr = mfspr(SPRN_PSSCR);
 856         psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
 857 
 858         __ppc64_runlatch_off();
 859         srr1 = power9_idle_stop(psscr, true);
 860         __ppc64_runlatch_on();
 861 
 862         fini_irq_for_idle_irqsoff();
 863 
 864         irq_set_pending_from_srr1(srr1);
 865 }
 866 
 867 /*
 868  * Used for ppc_md.power_save which needs a function with no parameters
 869  */
 870 void power9_idle(void)
 871 {
 872         power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
 873 }
 874 
 875 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 876 /*
 877  * This is used in working around bugs in thread reconfiguration
 878  * on POWER9 (at least up to Nimbus DD2.2) relating to transactional
 879  * memory and the way that XER[SO] is checkpointed.
 880  * This function forces the core into SMT4 in order by asking
 881  * all other threads not to stop, and sending a message to any
 882  * that are in a stop state.
 883  * Must be called with preemption disabled.
 884  */
 885 void pnv_power9_force_smt4_catch(void)
 886 {
 887         int cpu, cpu0, thr;
 888         int awake_threads = 1;          /* this thread is awake */
 889         int poke_threads = 0;
 890         int need_awake = threads_per_core;
 891 
 892         cpu = smp_processor_id();
 893         cpu0 = cpu & ~(threads_per_core - 1);
 894         for (thr = 0; thr < threads_per_core; ++thr) {
 895                 if (cpu != cpu0 + thr)
 896                         atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop);
 897         }
 898         /* order setting dont_stop vs testing requested_psscr */
 899         smp_mb();
 900         for (thr = 0; thr < threads_per_core; ++thr) {
 901                 if (!paca_ptrs[cpu0+thr]->requested_psscr)
 902                         ++awake_threads;
 903                 else
 904                         poke_threads |= (1 << thr);
 905         }
 906 
 907         /* If at least 3 threads are awake, the core is in SMT4 already */
 908         if (awake_threads < need_awake) {
 909                 /* We have to wake some threads; we'll use msgsnd */
 910                 for (thr = 0; thr < threads_per_core; ++thr) {
 911                         if (poke_threads & (1 << thr)) {
 912                                 ppc_msgsnd_sync();
 913                                 ppc_msgsnd(PPC_DBELL_MSGTYPE, 0,
 914                                            paca_ptrs[cpu0+thr]->hw_cpu_id);
 915                         }
 916                 }
 917                 /* now spin until at least 3 threads are awake */
 918                 do {
 919                         for (thr = 0; thr < threads_per_core; ++thr) {
 920                                 if ((poke_threads & (1 << thr)) &&
 921                                     !paca_ptrs[cpu0+thr]->requested_psscr) {
 922                                         ++awake_threads;
 923                                         poke_threads &= ~(1 << thr);
 924                                 }
 925                         }
 926                 } while (awake_threads < need_awake);
 927         }
 928 }
 929 EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch);
 930 
 931 void pnv_power9_force_smt4_release(void)
 932 {
 933         int cpu, cpu0, thr;
 934 
 935         cpu = smp_processor_id();
 936         cpu0 = cpu & ~(threads_per_core - 1);
 937 
 938         /* clear all the dont_stop flags */
 939         for (thr = 0; thr < threads_per_core; ++thr) {
 940                 if (cpu != cpu0 + thr)
 941                         atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop);
 942         }
 943 }
 944 EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release);
 945 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 946 
 947 #ifdef CONFIG_HOTPLUG_CPU
 948 
 949 void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
 950 {
 951         u64 pir = get_hard_smp_processor_id(cpu);
 952 
 953         mtspr(SPRN_LPCR, lpcr_val);
 954 
 955         /*
 956          * Program the LPCR via stop-api only if the deepest stop state
 957          * can lose hypervisor context.
 958          */
 959         if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)
 960                 opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
 961 }
 962 
 963 /*
 964  * pnv_cpu_offline: A function that puts the CPU into the deepest
 965  * available platform idle state on a CPU-Offline.
 966  * interrupts hard disabled and no lazy irq pending.
 967  */
 968 unsigned long pnv_cpu_offline(unsigned int cpu)
 969 {
 970         unsigned long srr1;
 971 
 972         __ppc64_runlatch_off();
 973 
 974         if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) {
 975                 unsigned long psscr;
 976 
 977                 psscr = mfspr(SPRN_PSSCR);
 978                 psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
 979                                                 pnv_deepest_stop_psscr_val;
 980                 srr1 = power9_offline_stop(psscr);
 981         } else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) {
 982                 srr1 = power7_offline();
 983         } else {
 984                 /* This is the fallback method. We emulate snooze */
 985                 while (!generic_check_cpu_restart(cpu)) {
 986                         HMT_low();
 987                         HMT_very_low();
 988                 }
 989                 srr1 = 0;
 990                 HMT_medium();
 991         }
 992 
 993         __ppc64_runlatch_on();
 994 
 995         return srr1;
 996 }
 997 #endif
 998 
 999 /*
1000  * Power ISA 3.0 idle initialization.
1001  *
1002  * POWER ISA 3.0 defines a new SPR Processor stop Status and Control
1003  * Register (PSSCR) to control idle behavior.
1004  *
1005  * PSSCR layout:
1006  * ----------------------------------------------------------
1007  * | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL |
1008  * ----------------------------------------------------------
1009  * 0      4     41   42    43   44     48    54   56    60
1010  *
1011  * PSSCR key fields:
1012  *      Bits 0:3  - Power-Saving Level Status (PLS). This field indicates the
1013  *      lowest power-saving state the thread entered since stop instruction was
1014  *      last executed.
1015  *
1016  *      Bit 41 - Status Disable(SD)
1017  *      0 - Shows PLS entries
1018  *      1 - PLS entries are all 0
1019  *
1020  *      Bit 42 - Enable State Loss
1021  *      0 - No state is lost irrespective of other fields
1022  *      1 - Allows state loss
1023  *
1024  *      Bit 43 - Exit Criterion
1025  *      0 - Exit from power-save mode on any interrupt
1026  *      1 - Exit from power-save mode controlled by LPCR's PECE bits
1027  *
1028  *      Bits 44:47 - Power-Saving Level Limit
1029  *      This limits the power-saving level that can be entered into.
1030  *
1031  *      Bits 60:63 - Requested Level
1032  *      Used to specify which power-saving level must be entered on executing
1033  *      stop instruction
1034  */
1035 
1036 int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
1037 {
1038         int err = 0;
1039 
1040         /*
1041          * psscr_mask == 0xf indicates an older firmware.
1042          * Set remaining fields of psscr to the default values.
1043          * See NOTE above definition of PSSCR_HV_DEFAULT_VAL
1044          */
1045         if (*psscr_mask == 0xf) {
1046                 *psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL;
1047                 *psscr_mask = PSSCR_HV_DEFAULT_MASK;
1048                 return err;
1049         }
1050 
1051         /*
1052          * New firmware is expected to set the psscr_val bits correctly.
1053          * Validate that the following invariants are correctly maintained by
1054          * the new firmware.
1055          * - ESL bit value matches the EC bit value.
1056          * - ESL bit is set for all the deep stop states.
1057          */
1058         if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) {
1059                 err = ERR_EC_ESL_MISMATCH;
1060         } else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
1061                 GET_PSSCR_ESL(*psscr_val) == 0) {
1062                 err = ERR_DEEP_STATE_ESL_MISMATCH;
1063         }
1064 
1065         return err;
1066 }
1067 
1068 /*
1069  * pnv_arch300_idle_init: Initializes the default idle state, first
1070  *                        deep idle state and deepest idle state on
1071  *                        ISA 3.0 CPUs.
1072  *
1073  * @np: /ibm,opal/power-mgt device node
1074  * @flags: cpu-idle-state-flags array
1075  * @dt_idle_states: Number of idle state entries
1076  * Returns 0 on success
1077  */
1078 static void __init pnv_power9_idle_init(void)
1079 {
1080         u64 max_residency_ns = 0;
1081         int i;
1082 
1083         /*
1084          * pnv_deepest_stop_{val,mask} should be set to values corresponding to
1085          * the deepest stop state.
1086          *
1087          * pnv_default_stop_{val,mask} should be set to values corresponding to
1088          * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state.
1089          */
1090         pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
1091         pnv_first_spr_loss_level = MAX_STOP_STATE + 1;
1092         for (i = 0; i < nr_pnv_idle_states; i++) {
1093                 int err;
1094                 struct pnv_idle_states_t *state = &pnv_idle_states[i];
1095                 u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK;
1096 
1097                 if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
1098                      (pnv_first_tb_loss_level > psscr_rl))
1099                         pnv_first_tb_loss_level = psscr_rl;
1100 
1101                 if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
1102                      (pnv_first_spr_loss_level > psscr_rl))
1103                         pnv_first_spr_loss_level = psscr_rl;
1104 
1105                 /*
1106                  * The idle code does not deal with TB loss occurring
1107                  * in a shallower state than SPR loss, so force it to
1108                  * behave like SPRs are lost if TB is lost. POWER9 would
1109                  * never encouter this, but a POWER8 core would if it
1110                  * implemented the stop instruction. So this is for forward
1111                  * compatibility.
1112                  */
1113                 if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
1114                      (pnv_first_spr_loss_level > psscr_rl))
1115                         pnv_first_spr_loss_level = psscr_rl;
1116 
1117                 err = validate_psscr_val_mask(&state->psscr_val,
1118                                               &state->psscr_mask,
1119                                               state->flags);
1120                 if (err) {
1121                         report_invalid_psscr_val(state->psscr_val, err);
1122                         continue;
1123                 }
1124 
1125                 state->valid = true;
1126 
1127                 if (max_residency_ns < state->residency_ns) {
1128                         max_residency_ns = state->residency_ns;
1129                         pnv_deepest_stop_psscr_val = state->psscr_val;
1130                         pnv_deepest_stop_psscr_mask = state->psscr_mask;
1131                         pnv_deepest_stop_flag = state->flags;
1132                         deepest_stop_found = true;
1133                 }
1134 
1135                 if (!default_stop_found &&
1136                     (state->flags & OPAL_PM_STOP_INST_FAST)) {
1137                         pnv_default_stop_val = state->psscr_val;
1138                         pnv_default_stop_mask = state->psscr_mask;
1139                         default_stop_found = true;
1140                         WARN_ON(state->flags & OPAL_PM_LOSE_FULL_CONTEXT);
1141                 }
1142         }
1143 
1144         if (unlikely(!default_stop_found)) {
1145                 pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n");
1146         } else {
1147                 ppc_md.power_save = power9_idle;
1148                 pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n",
1149                         pnv_default_stop_val, pnv_default_stop_mask);
1150         }
1151 
1152         if (unlikely(!deepest_stop_found)) {
1153                 pr_warn("cpuidle-powernv: No suitable stop state for CPU-Hotplug. Offlined CPUs will busy wait");
1154         } else {
1155                 pr_info("cpuidle-powernv: Deepest stop: psscr = 0x%016llx,mask=0x%016llx\n",
1156                         pnv_deepest_stop_psscr_val,
1157                         pnv_deepest_stop_psscr_mask);
1158         }
1159 
1160         pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%llx\n",
1161                 pnv_first_spr_loss_level);
1162 
1163         pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%llx\n",
1164                 pnv_first_tb_loss_level);
1165 }
1166 
1167 static void __init pnv_disable_deep_states(void)
1168 {
1169         /*
1170          * The stop-api is unable to restore hypervisor
1171          * resources on wakeup from platform idle states which
1172          * lose full context. So disable such states.
1173          */
1174         supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
1175         pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
1176         pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
1177 
1178         if (cpu_has_feature(CPU_FTR_ARCH_300) &&
1179             (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
1180                 /*
1181                  * Use the default stop state for CPU-Hotplug
1182                  * if available.
1183                  */
1184                 if (default_stop_found) {
1185                         pnv_deepest_stop_psscr_val = pnv_default_stop_val;
1186                         pnv_deepest_stop_psscr_mask = pnv_default_stop_mask;
1187                         pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
1188                                 pnv_deepest_stop_psscr_val);
1189                 } else { /* Fallback to snooze loop for CPU-Hotplug */
1190                         deepest_stop_found = false;
1191                         pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
1192                 }
1193         }
1194 }
1195 
1196 /*
1197  * Probe device tree for supported idle states
1198  */
1199 static void __init pnv_probe_idle_states(void)
1200 {
1201         int i;
1202 
1203         if (nr_pnv_idle_states < 0) {
1204                 pr_warn("cpuidle-powernv: no idle states found in the DT\n");
1205                 return;
1206         }
1207 
1208         if (cpu_has_feature(CPU_FTR_ARCH_300))
1209                 pnv_power9_idle_init();
1210 
1211         for (i = 0; i < nr_pnv_idle_states; i++)
1212                 supported_cpuidle_states |= pnv_idle_states[i].flags;
1213 }
1214 
1215 /*
1216  * This function parses device-tree and populates all the information
1217  * into pnv_idle_states structure. It also sets up nr_pnv_idle_states
1218  * which is the number of cpuidle states discovered through device-tree.
1219  */
1220 
1221 static int pnv_parse_cpuidle_dt(void)
1222 {
1223         struct device_node *np;
1224         int nr_idle_states, i;
1225         int rc = 0;
1226         u32 *temp_u32;
1227         u64 *temp_u64;
1228         const char **temp_string;
1229 
1230         np = of_find_node_by_path("/ibm,opal/power-mgt");
1231         if (!np) {
1232                 pr_warn("opal: PowerMgmt Node not found\n");
1233                 return -ENODEV;
1234         }
1235         nr_idle_states = of_property_count_u32_elems(np,
1236                                                 "ibm,cpu-idle-state-flags");
1237 
1238         pnv_idle_states = kcalloc(nr_idle_states, sizeof(*pnv_idle_states),
1239                                   GFP_KERNEL);
1240         temp_u32 = kcalloc(nr_idle_states, sizeof(u32),  GFP_KERNEL);
1241         temp_u64 = kcalloc(nr_idle_states, sizeof(u64),  GFP_KERNEL);
1242         temp_string = kcalloc(nr_idle_states, sizeof(char *),  GFP_KERNEL);
1243 
1244         if (!(pnv_idle_states && temp_u32 && temp_u64 && temp_string)) {
1245                 pr_err("Could not allocate memory for dt parsing\n");
1246                 rc = -ENOMEM;
1247                 goto out;
1248         }
1249 
1250         /* Read flags */
1251         if (of_property_read_u32_array(np, "ibm,cpu-idle-state-flags",
1252                                        temp_u32, nr_idle_states)) {
1253                 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n");
1254                 rc = -EINVAL;
1255                 goto out;
1256         }
1257         for (i = 0; i < nr_idle_states; i++)
1258                 pnv_idle_states[i].flags = temp_u32[i];
1259 
1260         /* Read latencies */
1261         if (of_property_read_u32_array(np, "ibm,cpu-idle-state-latencies-ns",
1262                                        temp_u32, nr_idle_states)) {
1263                 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n");
1264                 rc = -EINVAL;
1265                 goto out;
1266         }
1267         for (i = 0; i < nr_idle_states; i++)
1268                 pnv_idle_states[i].latency_ns = temp_u32[i];
1269 
1270         /* Read residencies */
1271         if (of_property_read_u32_array(np, "ibm,cpu-idle-state-residency-ns",
1272                                        temp_u32, nr_idle_states)) {
1273                 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n");
1274                 rc = -EINVAL;
1275                 goto out;
1276         }
1277         for (i = 0; i < nr_idle_states; i++)
1278                 pnv_idle_states[i].residency_ns = temp_u32[i];
1279 
1280         /* For power9 */
1281         if (cpu_has_feature(CPU_FTR_ARCH_300)) {
1282                 /* Read pm_crtl_val */
1283                 if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr",
1284                                                temp_u64, nr_idle_states)) {
1285                         pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n");
1286                         rc = -EINVAL;
1287                         goto out;
1288                 }
1289                 for (i = 0; i < nr_idle_states; i++)
1290                         pnv_idle_states[i].psscr_val = temp_u64[i];
1291 
1292                 /* Read pm_crtl_mask */
1293                 if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr-mask",
1294                                                temp_u64, nr_idle_states)) {
1295                         pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n");
1296                         rc = -EINVAL;
1297                         goto out;
1298                 }
1299                 for (i = 0; i < nr_idle_states; i++)
1300                         pnv_idle_states[i].psscr_mask = temp_u64[i];
1301         }
1302 
1303         /*
1304          * power8 specific properties ibm,cpu-idle-state-pmicr-mask and
1305          * ibm,cpu-idle-state-pmicr-val were never used and there is no
1306          * plan to use it in near future. Hence, not parsing these properties
1307          */
1308 
1309         if (of_property_read_string_array(np, "ibm,cpu-idle-state-names",
1310                                           temp_string, nr_idle_states) < 0) {
1311                 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n");
1312                 rc = -EINVAL;
1313                 goto out;
1314         }
1315         for (i = 0; i < nr_idle_states; i++)
1316                 strlcpy(pnv_idle_states[i].name, temp_string[i],
1317                         PNV_IDLE_NAME_LEN);
1318         nr_pnv_idle_states = nr_idle_states;
1319         rc = 0;
1320 out:
1321         kfree(temp_u32);
1322         kfree(temp_u64);
1323         kfree(temp_string);
1324         return rc;
1325 }
1326 
1327 static int __init pnv_init_idle_states(void)
1328 {
1329         int cpu;
1330         int rc = 0;
1331 
1332         /* Set up PACA fields */
1333         for_each_present_cpu(cpu) {
1334                 struct paca_struct *p = paca_ptrs[cpu];
1335 
1336                 p->idle_state = 0;
1337                 if (cpu == cpu_first_thread_sibling(cpu))
1338                         p->idle_state = (1 << threads_per_core) - 1;
1339 
1340                 if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
1341                         /* P7/P8 nap */
1342                         p->thread_idle_state = PNV_THREAD_RUNNING;
1343                 } else {
1344                         /* P9 stop */
1345 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
1346                         p->requested_psscr = 0;
1347                         atomic_set(&p->dont_stop, 0);
1348 #endif
1349                 }
1350         }
1351 
1352         /* In case we error out nr_pnv_idle_states will be zero */
1353         nr_pnv_idle_states = 0;
1354         supported_cpuidle_states = 0;
1355 
1356         if (cpuidle_disable != IDLE_NO_OVERRIDE)
1357                 goto out;
1358         rc = pnv_parse_cpuidle_dt();
1359         if (rc)
1360                 return rc;
1361         pnv_probe_idle_states();
1362 
1363         if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
1364                 if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
1365                         power7_fastsleep_workaround_entry = false;
1366                         power7_fastsleep_workaround_exit = false;
1367                 } else {
1368                         /*
1369                          * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
1370                          * workaround is needed to use fastsleep. Provide sysfs
1371                          * control to choose how this workaround has to be
1372                          * applied.
1373                          */
1374                         device_create_file(cpu_subsys.dev_root,
1375                                 &dev_attr_fastsleep_workaround_applyonce);
1376                 }
1377 
1378                 update_subcore_sibling_mask();
1379 
1380                 if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) {
1381                         ppc_md.power_save = power7_idle;
1382                         power7_offline_type = PNV_THREAD_NAP;
1383                 }
1384 
1385                 if ((supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) &&
1386                            (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT))
1387                         power7_offline_type = PNV_THREAD_WINKLE;
1388                 else if ((supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED) ||
1389                            (supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1))
1390                         power7_offline_type = PNV_THREAD_SLEEP;
1391         }
1392 
1393         if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
1394                 if (pnv_save_sprs_for_deep_states())
1395                         pnv_disable_deep_states();
1396         }
1397 
1398 out:
1399         return 0;
1400 }
1401 machine_subsys_initcall(powernv, pnv_init_idle_states);

/* [<][>][^][v][top][bottom][index][help] */