root/drivers/idle/intel_idle.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. intel_idle
  2. intel_idle_s2idle
  3. __setup_broadcast_timer
  4. auto_demotion_disable
  5. c1e_promotion_disable
  6. intel_idle_probe
  7. intel_idle_cpuidle_devices_uninit
  8. ivt_idle_state_table_update
  9. irtl_2_usec
  10. bxt_idle_state_table_update
  11. sklh_idle_state_table_update
  12. intel_idle_state_table_update
  13. intel_idle_cpuidle_driver_init
  14. intel_idle_cpu_init
  15. intel_idle_cpu_online
  16. intel_idle_init

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * intel_idle.c - native hardware idle loop for modern Intel processors
   4  *
   5  * Copyright (c) 2013, Intel Corporation.
   6  * Len Brown <len.brown@intel.com>
   7  */
   8 
   9 /*
  10  * intel_idle is a cpuidle driver that loads on specific Intel processors
  11  * in lieu of the legacy ACPI processor_idle driver.  The intent is to
  12  * make Linux more efficient on these processors, as intel_idle knows
  13  * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
  14  */
  15 
  16 /*
  17  * Design Assumptions
  18  *
  19  * All CPUs have same idle states as boot CPU
  20  *
  21  * Chipset BM_STS (bus master status) bit is a NOP
  22  *      for preventing entry into deep C-stats
  23  */
  24 
  25 /*
  26  * Known limitations
  27  *
  28  * The driver currently initializes for_each_online_cpu() upon modprobe.
  29  * It it unaware of subsequent processors hot-added to the system.
  30  * This means that if you boot with maxcpus=n and later online
  31  * processors above n, those processors will use C1 only.
  32  *
  33  * ACPI has a .suspend hack to turn off deep c-statees during suspend
  34  * to avoid complications with the lapic timer workaround.
  35  * Have not seen issues with suspend, but may need same workaround here.
  36  *
  37  */
  38 
  39 /* un-comment DEBUG to enable pr_debug() statements */
  40 #define DEBUG
  41 
  42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  43 
  44 #include <linux/kernel.h>
  45 #include <linux/cpuidle.h>
  46 #include <linux/tick.h>
  47 #include <trace/events/power.h>
  48 #include <linux/sched.h>
  49 #include <linux/notifier.h>
  50 #include <linux/cpu.h>
  51 #include <linux/moduleparam.h>
  52 #include <asm/cpu_device_id.h>
  53 #include <asm/intel-family.h>
  54 #include <asm/mwait.h>
  55 #include <asm/msr.h>
  56 
  57 #define INTEL_IDLE_VERSION "0.4.1"
  58 
  59 static struct cpuidle_driver intel_idle_driver = {
  60         .name = "intel_idle",
  61         .owner = THIS_MODULE,
  62 };
  63 /* intel_idle.max_cstate=0 disables driver */
  64 static int max_cstate = CPUIDLE_STATE_MAX - 1;
  65 
  66 static unsigned int mwait_substates;
  67 
  68 #define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF
  69 /* Reliable LAPIC Timer States, bit 1 for C1 etc.  */
  70 static unsigned int lapic_timer_reliable_states = (1 << 1);      /* Default to only C1 */
  71 
  72 struct idle_cpu {
  73         struct cpuidle_state *state_table;
  74 
  75         /*
  76          * Hardware C-state auto-demotion may not always be optimal.
  77          * Indicate which enable bits to clear here.
  78          */
  79         unsigned long auto_demotion_disable_flags;
  80         bool byt_auto_demotion_disable_flag;
  81         bool disable_promotion_to_c1e;
  82 };
  83 
  84 static const struct idle_cpu *icpu;
  85 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
  86 static int intel_idle(struct cpuidle_device *dev,
  87                         struct cpuidle_driver *drv, int index);
  88 static void intel_idle_s2idle(struct cpuidle_device *dev,
  89                               struct cpuidle_driver *drv, int index);
  90 static struct cpuidle_state *cpuidle_state_table;
  91 
  92 /*
  93  * Set this flag for states where the HW flushes the TLB for us
  94  * and so we don't need cross-calls to keep it consistent.
  95  * If this flag is set, SW flushes the TLB, so even if the
  96  * HW doesn't do the flushing, this flag is safe to use.
  97  */
  98 #define CPUIDLE_FLAG_TLB_FLUSHED        0x10000
  99 
 100 /*
 101  * MWAIT takes an 8-bit "hint" in EAX "suggesting"
 102  * the C-state (top nibble) and sub-state (bottom nibble)
 103  * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
 104  *
 105  * We store the hint at the top of our "flags" for each state.
 106  */
 107 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
 108 #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
 109 
 110 /*
 111  * States are indexed by the cstate number,
 112  * which is also the index into the MWAIT hint array.
 113  * Thus C0 is a dummy.
 114  */
 115 static struct cpuidle_state nehalem_cstates[] = {
 116         {
 117                 .name = "C1",
 118                 .desc = "MWAIT 0x00",
 119                 .flags = MWAIT2flg(0x00),
 120                 .exit_latency = 3,
 121                 .target_residency = 6,
 122                 .enter = &intel_idle,
 123                 .enter_s2idle = intel_idle_s2idle, },
 124         {
 125                 .name = "C1E",
 126                 .desc = "MWAIT 0x01",
 127                 .flags = MWAIT2flg(0x01),
 128                 .exit_latency = 10,
 129                 .target_residency = 20,
 130                 .enter = &intel_idle,
 131                 .enter_s2idle = intel_idle_s2idle, },
 132         {
 133                 .name = "C3",
 134                 .desc = "MWAIT 0x10",
 135                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 136                 .exit_latency = 20,
 137                 .target_residency = 80,
 138                 .enter = &intel_idle,
 139                 .enter_s2idle = intel_idle_s2idle, },
 140         {
 141                 .name = "C6",
 142                 .desc = "MWAIT 0x20",
 143                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 144                 .exit_latency = 200,
 145                 .target_residency = 800,
 146                 .enter = &intel_idle,
 147                 .enter_s2idle = intel_idle_s2idle, },
 148         {
 149                 .enter = NULL }
 150 };
 151 
 152 static struct cpuidle_state snb_cstates[] = {
 153         {
 154                 .name = "C1",
 155                 .desc = "MWAIT 0x00",
 156                 .flags = MWAIT2flg(0x00),
 157                 .exit_latency = 2,
 158                 .target_residency = 2,
 159                 .enter = &intel_idle,
 160                 .enter_s2idle = intel_idle_s2idle, },
 161         {
 162                 .name = "C1E",
 163                 .desc = "MWAIT 0x01",
 164                 .flags = MWAIT2flg(0x01),
 165                 .exit_latency = 10,
 166                 .target_residency = 20,
 167                 .enter = &intel_idle,
 168                 .enter_s2idle = intel_idle_s2idle, },
 169         {
 170                 .name = "C3",
 171                 .desc = "MWAIT 0x10",
 172                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 173                 .exit_latency = 80,
 174                 .target_residency = 211,
 175                 .enter = &intel_idle,
 176                 .enter_s2idle = intel_idle_s2idle, },
 177         {
 178                 .name = "C6",
 179                 .desc = "MWAIT 0x20",
 180                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 181                 .exit_latency = 104,
 182                 .target_residency = 345,
 183                 .enter = &intel_idle,
 184                 .enter_s2idle = intel_idle_s2idle, },
 185         {
 186                 .name = "C7",
 187                 .desc = "MWAIT 0x30",
 188                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
 189                 .exit_latency = 109,
 190                 .target_residency = 345,
 191                 .enter = &intel_idle,
 192                 .enter_s2idle = intel_idle_s2idle, },
 193         {
 194                 .enter = NULL }
 195 };
 196 
 197 static struct cpuidle_state byt_cstates[] = {
 198         {
 199                 .name = "C1",
 200                 .desc = "MWAIT 0x00",
 201                 .flags = MWAIT2flg(0x00),
 202                 .exit_latency = 1,
 203                 .target_residency = 1,
 204                 .enter = &intel_idle,
 205                 .enter_s2idle = intel_idle_s2idle, },
 206         {
 207                 .name = "C6N",
 208                 .desc = "MWAIT 0x58",
 209                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
 210                 .exit_latency = 300,
 211                 .target_residency = 275,
 212                 .enter = &intel_idle,
 213                 .enter_s2idle = intel_idle_s2idle, },
 214         {
 215                 .name = "C6S",
 216                 .desc = "MWAIT 0x52",
 217                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
 218                 .exit_latency = 500,
 219                 .target_residency = 560,
 220                 .enter = &intel_idle,
 221                 .enter_s2idle = intel_idle_s2idle, },
 222         {
 223                 .name = "C7",
 224                 .desc = "MWAIT 0x60",
 225                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 226                 .exit_latency = 1200,
 227                 .target_residency = 4000,
 228                 .enter = &intel_idle,
 229                 .enter_s2idle = intel_idle_s2idle, },
 230         {
 231                 .name = "C7S",
 232                 .desc = "MWAIT 0x64",
 233                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
 234                 .exit_latency = 10000,
 235                 .target_residency = 20000,
 236                 .enter = &intel_idle,
 237                 .enter_s2idle = intel_idle_s2idle, },
 238         {
 239                 .enter = NULL }
 240 };
 241 
 242 static struct cpuidle_state cht_cstates[] = {
 243         {
 244                 .name = "C1",
 245                 .desc = "MWAIT 0x00",
 246                 .flags = MWAIT2flg(0x00),
 247                 .exit_latency = 1,
 248                 .target_residency = 1,
 249                 .enter = &intel_idle,
 250                 .enter_s2idle = intel_idle_s2idle, },
 251         {
 252                 .name = "C6N",
 253                 .desc = "MWAIT 0x58",
 254                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
 255                 .exit_latency = 80,
 256                 .target_residency = 275,
 257                 .enter = &intel_idle,
 258                 .enter_s2idle = intel_idle_s2idle, },
 259         {
 260                 .name = "C6S",
 261                 .desc = "MWAIT 0x52",
 262                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
 263                 .exit_latency = 200,
 264                 .target_residency = 560,
 265                 .enter = &intel_idle,
 266                 .enter_s2idle = intel_idle_s2idle, },
 267         {
 268                 .name = "C7",
 269                 .desc = "MWAIT 0x60",
 270                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 271                 .exit_latency = 1200,
 272                 .target_residency = 4000,
 273                 .enter = &intel_idle,
 274                 .enter_s2idle = intel_idle_s2idle, },
 275         {
 276                 .name = "C7S",
 277                 .desc = "MWAIT 0x64",
 278                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
 279                 .exit_latency = 10000,
 280                 .target_residency = 20000,
 281                 .enter = &intel_idle,
 282                 .enter_s2idle = intel_idle_s2idle, },
 283         {
 284                 .enter = NULL }
 285 };
 286 
 287 static struct cpuidle_state ivb_cstates[] = {
 288         {
 289                 .name = "C1",
 290                 .desc = "MWAIT 0x00",
 291                 .flags = MWAIT2flg(0x00),
 292                 .exit_latency = 1,
 293                 .target_residency = 1,
 294                 .enter = &intel_idle,
 295                 .enter_s2idle = intel_idle_s2idle, },
 296         {
 297                 .name = "C1E",
 298                 .desc = "MWAIT 0x01",
 299                 .flags = MWAIT2flg(0x01),
 300                 .exit_latency = 10,
 301                 .target_residency = 20,
 302                 .enter = &intel_idle,
 303                 .enter_s2idle = intel_idle_s2idle, },
 304         {
 305                 .name = "C3",
 306                 .desc = "MWAIT 0x10",
 307                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 308                 .exit_latency = 59,
 309                 .target_residency = 156,
 310                 .enter = &intel_idle,
 311                 .enter_s2idle = intel_idle_s2idle, },
 312         {
 313                 .name = "C6",
 314                 .desc = "MWAIT 0x20",
 315                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 316                 .exit_latency = 80,
 317                 .target_residency = 300,
 318                 .enter = &intel_idle,
 319                 .enter_s2idle = intel_idle_s2idle, },
 320         {
 321                 .name = "C7",
 322                 .desc = "MWAIT 0x30",
 323                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
 324                 .exit_latency = 87,
 325                 .target_residency = 300,
 326                 .enter = &intel_idle,
 327                 .enter_s2idle = intel_idle_s2idle, },
 328         {
 329                 .enter = NULL }
 330 };
 331 
 332 static struct cpuidle_state ivt_cstates[] = {
 333         {
 334                 .name = "C1",
 335                 .desc = "MWAIT 0x00",
 336                 .flags = MWAIT2flg(0x00),
 337                 .exit_latency = 1,
 338                 .target_residency = 1,
 339                 .enter = &intel_idle,
 340                 .enter_s2idle = intel_idle_s2idle, },
 341         {
 342                 .name = "C1E",
 343                 .desc = "MWAIT 0x01",
 344                 .flags = MWAIT2flg(0x01),
 345                 .exit_latency = 10,
 346                 .target_residency = 80,
 347                 .enter = &intel_idle,
 348                 .enter_s2idle = intel_idle_s2idle, },
 349         {
 350                 .name = "C3",
 351                 .desc = "MWAIT 0x10",
 352                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 353                 .exit_latency = 59,
 354                 .target_residency = 156,
 355                 .enter = &intel_idle,
 356                 .enter_s2idle = intel_idle_s2idle, },
 357         {
 358                 .name = "C6",
 359                 .desc = "MWAIT 0x20",
 360                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 361                 .exit_latency = 82,
 362                 .target_residency = 300,
 363                 .enter = &intel_idle,
 364                 .enter_s2idle = intel_idle_s2idle, },
 365         {
 366                 .enter = NULL }
 367 };
 368 
 369 static struct cpuidle_state ivt_cstates_4s[] = {
 370         {
 371                 .name = "C1",
 372                 .desc = "MWAIT 0x00",
 373                 .flags = MWAIT2flg(0x00),
 374                 .exit_latency = 1,
 375                 .target_residency = 1,
 376                 .enter = &intel_idle,
 377                 .enter_s2idle = intel_idle_s2idle, },
 378         {
 379                 .name = "C1E",
 380                 .desc = "MWAIT 0x01",
 381                 .flags = MWAIT2flg(0x01),
 382                 .exit_latency = 10,
 383                 .target_residency = 250,
 384                 .enter = &intel_idle,
 385                 .enter_s2idle = intel_idle_s2idle, },
 386         {
 387                 .name = "C3",
 388                 .desc = "MWAIT 0x10",
 389                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 390                 .exit_latency = 59,
 391                 .target_residency = 300,
 392                 .enter = &intel_idle,
 393                 .enter_s2idle = intel_idle_s2idle, },
 394         {
 395                 .name = "C6",
 396                 .desc = "MWAIT 0x20",
 397                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 398                 .exit_latency = 84,
 399                 .target_residency = 400,
 400                 .enter = &intel_idle,
 401                 .enter_s2idle = intel_idle_s2idle, },
 402         {
 403                 .enter = NULL }
 404 };
 405 
 406 static struct cpuidle_state ivt_cstates_8s[] = {
 407         {
 408                 .name = "C1",
 409                 .desc = "MWAIT 0x00",
 410                 .flags = MWAIT2flg(0x00),
 411                 .exit_latency = 1,
 412                 .target_residency = 1,
 413                 .enter = &intel_idle,
 414                 .enter_s2idle = intel_idle_s2idle, },
 415         {
 416                 .name = "C1E",
 417                 .desc = "MWAIT 0x01",
 418                 .flags = MWAIT2flg(0x01),
 419                 .exit_latency = 10,
 420                 .target_residency = 500,
 421                 .enter = &intel_idle,
 422                 .enter_s2idle = intel_idle_s2idle, },
 423         {
 424                 .name = "C3",
 425                 .desc = "MWAIT 0x10",
 426                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 427                 .exit_latency = 59,
 428                 .target_residency = 600,
 429                 .enter = &intel_idle,
 430                 .enter_s2idle = intel_idle_s2idle, },
 431         {
 432                 .name = "C6",
 433                 .desc = "MWAIT 0x20",
 434                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 435                 .exit_latency = 88,
 436                 .target_residency = 700,
 437                 .enter = &intel_idle,
 438                 .enter_s2idle = intel_idle_s2idle, },
 439         {
 440                 .enter = NULL }
 441 };
 442 
 443 static struct cpuidle_state hsw_cstates[] = {
 444         {
 445                 .name = "C1",
 446                 .desc = "MWAIT 0x00",
 447                 .flags = MWAIT2flg(0x00),
 448                 .exit_latency = 2,
 449                 .target_residency = 2,
 450                 .enter = &intel_idle,
 451                 .enter_s2idle = intel_idle_s2idle, },
 452         {
 453                 .name = "C1E",
 454                 .desc = "MWAIT 0x01",
 455                 .flags = MWAIT2flg(0x01),
 456                 .exit_latency = 10,
 457                 .target_residency = 20,
 458                 .enter = &intel_idle,
 459                 .enter_s2idle = intel_idle_s2idle, },
 460         {
 461                 .name = "C3",
 462                 .desc = "MWAIT 0x10",
 463                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 464                 .exit_latency = 33,
 465                 .target_residency = 100,
 466                 .enter = &intel_idle,
 467                 .enter_s2idle = intel_idle_s2idle, },
 468         {
 469                 .name = "C6",
 470                 .desc = "MWAIT 0x20",
 471                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 472                 .exit_latency = 133,
 473                 .target_residency = 400,
 474                 .enter = &intel_idle,
 475                 .enter_s2idle = intel_idle_s2idle, },
 476         {
 477                 .name = "C7s",
 478                 .desc = "MWAIT 0x32",
 479                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
 480                 .exit_latency = 166,
 481                 .target_residency = 500,
 482                 .enter = &intel_idle,
 483                 .enter_s2idle = intel_idle_s2idle, },
 484         {
 485                 .name = "C8",
 486                 .desc = "MWAIT 0x40",
 487                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 488                 .exit_latency = 300,
 489                 .target_residency = 900,
 490                 .enter = &intel_idle,
 491                 .enter_s2idle = intel_idle_s2idle, },
 492         {
 493                 .name = "C9",
 494                 .desc = "MWAIT 0x50",
 495                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 496                 .exit_latency = 600,
 497                 .target_residency = 1800,
 498                 .enter = &intel_idle,
 499                 .enter_s2idle = intel_idle_s2idle, },
 500         {
 501                 .name = "C10",
 502                 .desc = "MWAIT 0x60",
 503                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 504                 .exit_latency = 2600,
 505                 .target_residency = 7700,
 506                 .enter = &intel_idle,
 507                 .enter_s2idle = intel_idle_s2idle, },
 508         {
 509                 .enter = NULL }
 510 };
 511 static struct cpuidle_state bdw_cstates[] = {
 512         {
 513                 .name = "C1",
 514                 .desc = "MWAIT 0x00",
 515                 .flags = MWAIT2flg(0x00),
 516                 .exit_latency = 2,
 517                 .target_residency = 2,
 518                 .enter = &intel_idle,
 519                 .enter_s2idle = intel_idle_s2idle, },
 520         {
 521                 .name = "C1E",
 522                 .desc = "MWAIT 0x01",
 523                 .flags = MWAIT2flg(0x01),
 524                 .exit_latency = 10,
 525                 .target_residency = 20,
 526                 .enter = &intel_idle,
 527                 .enter_s2idle = intel_idle_s2idle, },
 528         {
 529                 .name = "C3",
 530                 .desc = "MWAIT 0x10",
 531                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 532                 .exit_latency = 40,
 533                 .target_residency = 100,
 534                 .enter = &intel_idle,
 535                 .enter_s2idle = intel_idle_s2idle, },
 536         {
 537                 .name = "C6",
 538                 .desc = "MWAIT 0x20",
 539                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 540                 .exit_latency = 133,
 541                 .target_residency = 400,
 542                 .enter = &intel_idle,
 543                 .enter_s2idle = intel_idle_s2idle, },
 544         {
 545                 .name = "C7s",
 546                 .desc = "MWAIT 0x32",
 547                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
 548                 .exit_latency = 166,
 549                 .target_residency = 500,
 550                 .enter = &intel_idle,
 551                 .enter_s2idle = intel_idle_s2idle, },
 552         {
 553                 .name = "C8",
 554                 .desc = "MWAIT 0x40",
 555                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 556                 .exit_latency = 300,
 557                 .target_residency = 900,
 558                 .enter = &intel_idle,
 559                 .enter_s2idle = intel_idle_s2idle, },
 560         {
 561                 .name = "C9",
 562                 .desc = "MWAIT 0x50",
 563                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 564                 .exit_latency = 600,
 565                 .target_residency = 1800,
 566                 .enter = &intel_idle,
 567                 .enter_s2idle = intel_idle_s2idle, },
 568         {
 569                 .name = "C10",
 570                 .desc = "MWAIT 0x60",
 571                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 572                 .exit_latency = 2600,
 573                 .target_residency = 7700,
 574                 .enter = &intel_idle,
 575                 .enter_s2idle = intel_idle_s2idle, },
 576         {
 577                 .enter = NULL }
 578 };
 579 
 580 static struct cpuidle_state skl_cstates[] = {
 581         {
 582                 .name = "C1",
 583                 .desc = "MWAIT 0x00",
 584                 .flags = MWAIT2flg(0x00),
 585                 .exit_latency = 2,
 586                 .target_residency = 2,
 587                 .enter = &intel_idle,
 588                 .enter_s2idle = intel_idle_s2idle, },
 589         {
 590                 .name = "C1E",
 591                 .desc = "MWAIT 0x01",
 592                 .flags = MWAIT2flg(0x01),
 593                 .exit_latency = 10,
 594                 .target_residency = 20,
 595                 .enter = &intel_idle,
 596                 .enter_s2idle = intel_idle_s2idle, },
 597         {
 598                 .name = "C3",
 599                 .desc = "MWAIT 0x10",
 600                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 601                 .exit_latency = 70,
 602                 .target_residency = 100,
 603                 .enter = &intel_idle,
 604                 .enter_s2idle = intel_idle_s2idle, },
 605         {
 606                 .name = "C6",
 607                 .desc = "MWAIT 0x20",
 608                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 609                 .exit_latency = 85,
 610                 .target_residency = 200,
 611                 .enter = &intel_idle,
 612                 .enter_s2idle = intel_idle_s2idle, },
 613         {
 614                 .name = "C7s",
 615                 .desc = "MWAIT 0x33",
 616                 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
 617                 .exit_latency = 124,
 618                 .target_residency = 800,
 619                 .enter = &intel_idle,
 620                 .enter_s2idle = intel_idle_s2idle, },
 621         {
 622                 .name = "C8",
 623                 .desc = "MWAIT 0x40",
 624                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 625                 .exit_latency = 200,
 626                 .target_residency = 800,
 627                 .enter = &intel_idle,
 628                 .enter_s2idle = intel_idle_s2idle, },
 629         {
 630                 .name = "C9",
 631                 .desc = "MWAIT 0x50",
 632                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 633                 .exit_latency = 480,
 634                 .target_residency = 5000,
 635                 .enter = &intel_idle,
 636                 .enter_s2idle = intel_idle_s2idle, },
 637         {
 638                 .name = "C10",
 639                 .desc = "MWAIT 0x60",
 640                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 641                 .exit_latency = 890,
 642                 .target_residency = 5000,
 643                 .enter = &intel_idle,
 644                 .enter_s2idle = intel_idle_s2idle, },
 645         {
 646                 .enter = NULL }
 647 };
 648 
 649 static struct cpuidle_state skx_cstates[] = {
 650         {
 651                 .name = "C1",
 652                 .desc = "MWAIT 0x00",
 653                 .flags = MWAIT2flg(0x00),
 654                 .exit_latency = 2,
 655                 .target_residency = 2,
 656                 .enter = &intel_idle,
 657                 .enter_s2idle = intel_idle_s2idle, },
 658         {
 659                 .name = "C1E",
 660                 .desc = "MWAIT 0x01",
 661                 .flags = MWAIT2flg(0x01),
 662                 .exit_latency = 10,
 663                 .target_residency = 20,
 664                 .enter = &intel_idle,
 665                 .enter_s2idle = intel_idle_s2idle, },
 666         {
 667                 .name = "C6",
 668                 .desc = "MWAIT 0x20",
 669                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 670                 .exit_latency = 133,
 671                 .target_residency = 600,
 672                 .enter = &intel_idle,
 673                 .enter_s2idle = intel_idle_s2idle, },
 674         {
 675                 .enter = NULL }
 676 };
 677 
 678 static struct cpuidle_state atom_cstates[] = {
 679         {
 680                 .name = "C1E",
 681                 .desc = "MWAIT 0x00",
 682                 .flags = MWAIT2flg(0x00),
 683                 .exit_latency = 10,
 684                 .target_residency = 20,
 685                 .enter = &intel_idle,
 686                 .enter_s2idle = intel_idle_s2idle, },
 687         {
 688                 .name = "C2",
 689                 .desc = "MWAIT 0x10",
 690                 .flags = MWAIT2flg(0x10),
 691                 .exit_latency = 20,
 692                 .target_residency = 80,
 693                 .enter = &intel_idle,
 694                 .enter_s2idle = intel_idle_s2idle, },
 695         {
 696                 .name = "C4",
 697                 .desc = "MWAIT 0x30",
 698                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
 699                 .exit_latency = 100,
 700                 .target_residency = 400,
 701                 .enter = &intel_idle,
 702                 .enter_s2idle = intel_idle_s2idle, },
 703         {
 704                 .name = "C6",
 705                 .desc = "MWAIT 0x52",
 706                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
 707                 .exit_latency = 140,
 708                 .target_residency = 560,
 709                 .enter = &intel_idle,
 710                 .enter_s2idle = intel_idle_s2idle, },
 711         {
 712                 .enter = NULL }
 713 };
 714 static struct cpuidle_state tangier_cstates[] = {
 715         {
 716                 .name = "C1",
 717                 .desc = "MWAIT 0x00",
 718                 .flags = MWAIT2flg(0x00),
 719                 .exit_latency = 1,
 720                 .target_residency = 4,
 721                 .enter = &intel_idle,
 722                 .enter_s2idle = intel_idle_s2idle, },
 723         {
 724                 .name = "C4",
 725                 .desc = "MWAIT 0x30",
 726                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
 727                 .exit_latency = 100,
 728                 .target_residency = 400,
 729                 .enter = &intel_idle,
 730                 .enter_s2idle = intel_idle_s2idle, },
 731         {
 732                 .name = "C6",
 733                 .desc = "MWAIT 0x52",
 734                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
 735                 .exit_latency = 140,
 736                 .target_residency = 560,
 737                 .enter = &intel_idle,
 738                 .enter_s2idle = intel_idle_s2idle, },
 739         {
 740                 .name = "C7",
 741                 .desc = "MWAIT 0x60",
 742                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 743                 .exit_latency = 1200,
 744                 .target_residency = 4000,
 745                 .enter = &intel_idle,
 746                 .enter_s2idle = intel_idle_s2idle, },
 747         {
 748                 .name = "C9",
 749                 .desc = "MWAIT 0x64",
 750                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
 751                 .exit_latency = 10000,
 752                 .target_residency = 20000,
 753                 .enter = &intel_idle,
 754                 .enter_s2idle = intel_idle_s2idle, },
 755         {
 756                 .enter = NULL }
 757 };
 758 static struct cpuidle_state avn_cstates[] = {
 759         {
 760                 .name = "C1",
 761                 .desc = "MWAIT 0x00",
 762                 .flags = MWAIT2flg(0x00),
 763                 .exit_latency = 2,
 764                 .target_residency = 2,
 765                 .enter = &intel_idle,
 766                 .enter_s2idle = intel_idle_s2idle, },
 767         {
 768                 .name = "C6",
 769                 .desc = "MWAIT 0x51",
 770                 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
 771                 .exit_latency = 15,
 772                 .target_residency = 45,
 773                 .enter = &intel_idle,
 774                 .enter_s2idle = intel_idle_s2idle, },
 775         {
 776                 .enter = NULL }
 777 };
 778 static struct cpuidle_state knl_cstates[] = {
 779         {
 780                 .name = "C1",
 781                 .desc = "MWAIT 0x00",
 782                 .flags = MWAIT2flg(0x00),
 783                 .exit_latency = 1,
 784                 .target_residency = 2,
 785                 .enter = &intel_idle,
 786                 .enter_s2idle = intel_idle_s2idle },
 787         {
 788                 .name = "C6",
 789                 .desc = "MWAIT 0x10",
 790                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 791                 .exit_latency = 120,
 792                 .target_residency = 500,
 793                 .enter = &intel_idle,
 794                 .enter_s2idle = intel_idle_s2idle },
 795         {
 796                 .enter = NULL }
 797 };
 798 
 799 static struct cpuidle_state bxt_cstates[] = {
 800         {
 801                 .name = "C1",
 802                 .desc = "MWAIT 0x00",
 803                 .flags = MWAIT2flg(0x00),
 804                 .exit_latency = 2,
 805                 .target_residency = 2,
 806                 .enter = &intel_idle,
 807                 .enter_s2idle = intel_idle_s2idle, },
 808         {
 809                 .name = "C1E",
 810                 .desc = "MWAIT 0x01",
 811                 .flags = MWAIT2flg(0x01),
 812                 .exit_latency = 10,
 813                 .target_residency = 20,
 814                 .enter = &intel_idle,
 815                 .enter_s2idle = intel_idle_s2idle, },
 816         {
 817                 .name = "C6",
 818                 .desc = "MWAIT 0x20",
 819                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 820                 .exit_latency = 133,
 821                 .target_residency = 133,
 822                 .enter = &intel_idle,
 823                 .enter_s2idle = intel_idle_s2idle, },
 824         {
 825                 .name = "C7s",
 826                 .desc = "MWAIT 0x31",
 827                 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
 828                 .exit_latency = 155,
 829                 .target_residency = 155,
 830                 .enter = &intel_idle,
 831                 .enter_s2idle = intel_idle_s2idle, },
 832         {
 833                 .name = "C8",
 834                 .desc = "MWAIT 0x40",
 835                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 836                 .exit_latency = 1000,
 837                 .target_residency = 1000,
 838                 .enter = &intel_idle,
 839                 .enter_s2idle = intel_idle_s2idle, },
 840         {
 841                 .name = "C9",
 842                 .desc = "MWAIT 0x50",
 843                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 844                 .exit_latency = 2000,
 845                 .target_residency = 2000,
 846                 .enter = &intel_idle,
 847                 .enter_s2idle = intel_idle_s2idle, },
 848         {
 849                 .name = "C10",
 850                 .desc = "MWAIT 0x60",
 851                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 852                 .exit_latency = 10000,
 853                 .target_residency = 10000,
 854                 .enter = &intel_idle,
 855                 .enter_s2idle = intel_idle_s2idle, },
 856         {
 857                 .enter = NULL }
 858 };
 859 
 860 static struct cpuidle_state dnv_cstates[] = {
 861         {
 862                 .name = "C1",
 863                 .desc = "MWAIT 0x00",
 864                 .flags = MWAIT2flg(0x00),
 865                 .exit_latency = 2,
 866                 .target_residency = 2,
 867                 .enter = &intel_idle,
 868                 .enter_s2idle = intel_idle_s2idle, },
 869         {
 870                 .name = "C1E",
 871                 .desc = "MWAIT 0x01",
 872                 .flags = MWAIT2flg(0x01),
 873                 .exit_latency = 10,
 874                 .target_residency = 20,
 875                 .enter = &intel_idle,
 876                 .enter_s2idle = intel_idle_s2idle, },
 877         {
 878                 .name = "C6",
 879                 .desc = "MWAIT 0x20",
 880                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 881                 .exit_latency = 50,
 882                 .target_residency = 500,
 883                 .enter = &intel_idle,
 884                 .enter_s2idle = intel_idle_s2idle, },
 885         {
 886                 .enter = NULL }
 887 };
 888 
 889 /**
 890  * intel_idle
 891  * @dev: cpuidle_device
 892  * @drv: cpuidle driver
 893  * @index: index of cpuidle state
 894  *
 895  * Must be called under local_irq_disable().
 896  */
 897 static __cpuidle int intel_idle(struct cpuidle_device *dev,
 898                                 struct cpuidle_driver *drv, int index)
 899 {
 900         unsigned long ecx = 1; /* break on interrupt flag */
 901         struct cpuidle_state *state = &drv->states[index];
 902         unsigned long eax = flg2MWAIT(state->flags);
 903         unsigned int cstate;
 904         bool uninitialized_var(tick);
 905         int cpu = smp_processor_id();
 906 
 907         /*
 908          * leave_mm() to avoid costly and often unnecessary wakeups
 909          * for flushing the user TLB's associated with the active mm.
 910          */
 911         if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
 912                 leave_mm(cpu);
 913 
 914         if (!static_cpu_has(X86_FEATURE_ARAT)) {
 915                 cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) &
 916                                 MWAIT_CSTATE_MASK) + 1;
 917                 tick = false;
 918                 if (!(lapic_timer_reliable_states & (1 << (cstate)))) {
 919                         tick = true;
 920                         tick_broadcast_enter();
 921                 }
 922         }
 923 
 924         mwait_idle_with_hints(eax, ecx);
 925 
 926         if (!static_cpu_has(X86_FEATURE_ARAT) && tick)
 927                 tick_broadcast_exit();
 928 
 929         return index;
 930 }
 931 
 932 /**
 933  * intel_idle_s2idle - simplified "enter" callback routine for suspend-to-idle
 934  * @dev: cpuidle_device
 935  * @drv: cpuidle driver
 936  * @index: state index
 937  */
 938 static void intel_idle_s2idle(struct cpuidle_device *dev,
 939                              struct cpuidle_driver *drv, int index)
 940 {
 941         unsigned long ecx = 1; /* break on interrupt flag */
 942         unsigned long eax = flg2MWAIT(drv->states[index].flags);
 943 
 944         mwait_idle_with_hints(eax, ecx);
 945 }
 946 
 947 static void __setup_broadcast_timer(bool on)
 948 {
 949         if (on)
 950                 tick_broadcast_enable();
 951         else
 952                 tick_broadcast_disable();
 953 }
 954 
 955 static void auto_demotion_disable(void)
 956 {
 957         unsigned long long msr_bits;
 958 
 959         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
 960         msr_bits &= ~(icpu->auto_demotion_disable_flags);
 961         wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
 962 }
 963 static void c1e_promotion_disable(void)
 964 {
 965         unsigned long long msr_bits;
 966 
 967         rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
 968         msr_bits &= ~0x2;
 969         wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
 970 }
 971 
 972 static const struct idle_cpu idle_cpu_nehalem = {
 973         .state_table = nehalem_cstates,
 974         .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
 975         .disable_promotion_to_c1e = true,
 976 };
 977 
 978 static const struct idle_cpu idle_cpu_atom = {
 979         .state_table = atom_cstates,
 980 };
 981 
 982 static const struct idle_cpu idle_cpu_tangier = {
 983         .state_table = tangier_cstates,
 984 };
 985 
 986 static const struct idle_cpu idle_cpu_lincroft = {
 987         .state_table = atom_cstates,
 988         .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
 989 };
 990 
 991 static const struct idle_cpu idle_cpu_snb = {
 992         .state_table = snb_cstates,
 993         .disable_promotion_to_c1e = true,
 994 };
 995 
 996 static const struct idle_cpu idle_cpu_byt = {
 997         .state_table = byt_cstates,
 998         .disable_promotion_to_c1e = true,
 999         .byt_auto_demotion_disable_flag = true,
1000 };
1001 
1002 static const struct idle_cpu idle_cpu_cht = {
1003         .state_table = cht_cstates,
1004         .disable_promotion_to_c1e = true,
1005         .byt_auto_demotion_disable_flag = true,
1006 };
1007 
1008 static const struct idle_cpu idle_cpu_ivb = {
1009         .state_table = ivb_cstates,
1010         .disable_promotion_to_c1e = true,
1011 };
1012 
1013 static const struct idle_cpu idle_cpu_ivt = {
1014         .state_table = ivt_cstates,
1015         .disable_promotion_to_c1e = true,
1016 };
1017 
1018 static const struct idle_cpu idle_cpu_hsw = {
1019         .state_table = hsw_cstates,
1020         .disable_promotion_to_c1e = true,
1021 };
1022 
1023 static const struct idle_cpu idle_cpu_bdw = {
1024         .state_table = bdw_cstates,
1025         .disable_promotion_to_c1e = true,
1026 };
1027 
1028 static const struct idle_cpu idle_cpu_skl = {
1029         .state_table = skl_cstates,
1030         .disable_promotion_to_c1e = true,
1031 };
1032 
1033 static const struct idle_cpu idle_cpu_skx = {
1034         .state_table = skx_cstates,
1035         .disable_promotion_to_c1e = true,
1036 };
1037 
1038 static const struct idle_cpu idle_cpu_avn = {
1039         .state_table = avn_cstates,
1040         .disable_promotion_to_c1e = true,
1041 };
1042 
1043 static const struct idle_cpu idle_cpu_knl = {
1044         .state_table = knl_cstates,
1045 };
1046 
1047 static const struct idle_cpu idle_cpu_bxt = {
1048         .state_table = bxt_cstates,
1049         .disable_promotion_to_c1e = true,
1050 };
1051 
1052 static const struct idle_cpu idle_cpu_dnv = {
1053         .state_table = dnv_cstates,
1054         .disable_promotion_to_c1e = true,
1055 };
1056 
1057 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1058         INTEL_CPU_FAM6(NEHALEM_EP,              idle_cpu_nehalem),
1059         INTEL_CPU_FAM6(NEHALEM,                 idle_cpu_nehalem),
1060         INTEL_CPU_FAM6(NEHALEM_G,               idle_cpu_nehalem),
1061         INTEL_CPU_FAM6(WESTMERE,                idle_cpu_nehalem),
1062         INTEL_CPU_FAM6(WESTMERE_EP,             idle_cpu_nehalem),
1063         INTEL_CPU_FAM6(NEHALEM_EX,              idle_cpu_nehalem),
1064         INTEL_CPU_FAM6(ATOM_BONNELL,            idle_cpu_atom),
1065         INTEL_CPU_FAM6(ATOM_BONNELL_MID,        idle_cpu_lincroft),
1066         INTEL_CPU_FAM6(WESTMERE_EX,             idle_cpu_nehalem),
1067         INTEL_CPU_FAM6(SANDYBRIDGE,             idle_cpu_snb),
1068         INTEL_CPU_FAM6(SANDYBRIDGE_X,           idle_cpu_snb),
1069         INTEL_CPU_FAM6(ATOM_SALTWELL,           idle_cpu_atom),
1070         INTEL_CPU_FAM6(ATOM_SILVERMONT,         idle_cpu_byt),
1071         INTEL_CPU_FAM6(ATOM_SILVERMONT_MID,     idle_cpu_tangier),
1072         INTEL_CPU_FAM6(ATOM_AIRMONT,            idle_cpu_cht),
1073         INTEL_CPU_FAM6(IVYBRIDGE,               idle_cpu_ivb),
1074         INTEL_CPU_FAM6(IVYBRIDGE_X,             idle_cpu_ivt),
1075         INTEL_CPU_FAM6(HASWELL,                 idle_cpu_hsw),
1076         INTEL_CPU_FAM6(HASWELL_X,               idle_cpu_hsw),
1077         INTEL_CPU_FAM6(HASWELL_L,               idle_cpu_hsw),
1078         INTEL_CPU_FAM6(HASWELL_G,               idle_cpu_hsw),
1079         INTEL_CPU_FAM6(ATOM_SILVERMONT_D,       idle_cpu_avn),
1080         INTEL_CPU_FAM6(BROADWELL,               idle_cpu_bdw),
1081         INTEL_CPU_FAM6(BROADWELL_G,             idle_cpu_bdw),
1082         INTEL_CPU_FAM6(BROADWELL_X,             idle_cpu_bdw),
1083         INTEL_CPU_FAM6(BROADWELL_D,             idle_cpu_bdw),
1084         INTEL_CPU_FAM6(SKYLAKE_L,               idle_cpu_skl),
1085         INTEL_CPU_FAM6(SKYLAKE,                 idle_cpu_skl),
1086         INTEL_CPU_FAM6(KABYLAKE_L,              idle_cpu_skl),
1087         INTEL_CPU_FAM6(KABYLAKE,                idle_cpu_skl),
1088         INTEL_CPU_FAM6(SKYLAKE_X,               idle_cpu_skx),
1089         INTEL_CPU_FAM6(XEON_PHI_KNL,            idle_cpu_knl),
1090         INTEL_CPU_FAM6(XEON_PHI_KNM,            idle_cpu_knl),
1091         INTEL_CPU_FAM6(ATOM_GOLDMONT,           idle_cpu_bxt),
1092         INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS,      idle_cpu_bxt),
1093         INTEL_CPU_FAM6(ATOM_GOLDMONT_D,         idle_cpu_dnv),
1094         INTEL_CPU_FAM6(ATOM_TREMONT_D,          idle_cpu_dnv),
1095         {}
1096 };
1097 
1098 /*
1099  * intel_idle_probe()
1100  */
1101 static int __init intel_idle_probe(void)
1102 {
1103         unsigned int eax, ebx, ecx;
1104         const struct x86_cpu_id *id;
1105 
1106         if (max_cstate == 0) {
1107                 pr_debug("disabled\n");
1108                 return -EPERM;
1109         }
1110 
1111         id = x86_match_cpu(intel_idle_ids);
1112         if (!id) {
1113                 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
1114                     boot_cpu_data.x86 == 6)
1115                         pr_debug("does not run on family %d model %d\n",
1116                                  boot_cpu_data.x86, boot_cpu_data.x86_model);
1117                 return -ENODEV;
1118         }
1119 
1120         if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
1121                 pr_debug("Please enable MWAIT in BIOS SETUP\n");
1122                 return -ENODEV;
1123         }
1124 
1125         if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1126                 return -ENODEV;
1127 
1128         cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
1129 
1130         if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
1131             !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
1132             !mwait_substates)
1133                         return -ENODEV;
1134 
1135         pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
1136 
1137         icpu = (const struct idle_cpu *)id->driver_data;
1138         cpuidle_state_table = icpu->state_table;
1139 
1140         pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
1141                  boot_cpu_data.x86_model);
1142 
1143         return 0;
1144 }
1145 
1146 /*
1147  * intel_idle_cpuidle_devices_uninit()
1148  * Unregisters the cpuidle devices.
1149  */
1150 static void intel_idle_cpuidle_devices_uninit(void)
1151 {
1152         int i;
1153         struct cpuidle_device *dev;
1154 
1155         for_each_online_cpu(i) {
1156                 dev = per_cpu_ptr(intel_idle_cpuidle_devices, i);
1157                 cpuidle_unregister_device(dev);
1158         }
1159 }
1160 
1161 /*
1162  * ivt_idle_state_table_update(void)
1163  *
1164  * Tune IVT multi-socket targets
1165  * Assumption: num_sockets == (max_package_num + 1)
1166  */
1167 static void ivt_idle_state_table_update(void)
1168 {
1169         /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1170         int cpu, package_num, num_sockets = 1;
1171 
1172         for_each_online_cpu(cpu) {
1173                 package_num = topology_physical_package_id(cpu);
1174                 if (package_num + 1 > num_sockets) {
1175                         num_sockets = package_num + 1;
1176 
1177                         if (num_sockets > 4) {
1178                                 cpuidle_state_table = ivt_cstates_8s;
1179                                 return;
1180                         }
1181                 }
1182         }
1183 
1184         if (num_sockets > 2)
1185                 cpuidle_state_table = ivt_cstates_4s;
1186 
1187         /* else, 1 and 2 socket systems use default ivt_cstates */
1188 }
1189 
1190 /*
1191  * Translate IRTL (Interrupt Response Time Limit) MSR to usec
1192  */
1193 
1194 static unsigned int irtl_ns_units[] = {
1195         1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
1196 
1197 static unsigned long long irtl_2_usec(unsigned long long irtl)
1198 {
1199         unsigned long long ns;
1200 
1201         if (!irtl)
1202                 return 0;
1203 
1204         ns = irtl_ns_units[(irtl >> 10) & 0x7];
1205 
1206         return div64_u64((irtl & 0x3FF) * ns, 1000);
1207 }
1208 /*
1209  * bxt_idle_state_table_update(void)
1210  *
1211  * On BXT, we trust the IRTL to show the definitive maximum latency
1212  * We use the same value for target_residency.
1213  */
1214 static void bxt_idle_state_table_update(void)
1215 {
1216         unsigned long long msr;
1217         unsigned int usec;
1218 
1219         rdmsrl(MSR_PKGC6_IRTL, msr);
1220         usec = irtl_2_usec(msr);
1221         if (usec) {
1222                 bxt_cstates[2].exit_latency = usec;
1223                 bxt_cstates[2].target_residency = usec;
1224         }
1225 
1226         rdmsrl(MSR_PKGC7_IRTL, msr);
1227         usec = irtl_2_usec(msr);
1228         if (usec) {
1229                 bxt_cstates[3].exit_latency = usec;
1230                 bxt_cstates[3].target_residency = usec;
1231         }
1232 
1233         rdmsrl(MSR_PKGC8_IRTL, msr);
1234         usec = irtl_2_usec(msr);
1235         if (usec) {
1236                 bxt_cstates[4].exit_latency = usec;
1237                 bxt_cstates[4].target_residency = usec;
1238         }
1239 
1240         rdmsrl(MSR_PKGC9_IRTL, msr);
1241         usec = irtl_2_usec(msr);
1242         if (usec) {
1243                 bxt_cstates[5].exit_latency = usec;
1244                 bxt_cstates[5].target_residency = usec;
1245         }
1246 
1247         rdmsrl(MSR_PKGC10_IRTL, msr);
1248         usec = irtl_2_usec(msr);
1249         if (usec) {
1250                 bxt_cstates[6].exit_latency = usec;
1251                 bxt_cstates[6].target_residency = usec;
1252         }
1253 
1254 }
1255 /*
1256  * sklh_idle_state_table_update(void)
1257  *
1258  * On SKL-H (model 0x5e) disable C8 and C9 if:
1259  * C10 is enabled and SGX disabled
1260  */
1261 static void sklh_idle_state_table_update(void)
1262 {
1263         unsigned long long msr;
1264         unsigned int eax, ebx, ecx, edx;
1265 
1266 
1267         /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
1268         if (max_cstate <= 7)
1269                 return;
1270 
1271         /* if PC10 not present in CPUID.MWAIT.EDX */
1272         if ((mwait_substates & (0xF << 28)) == 0)
1273                 return;
1274 
1275         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1276 
1277         /* PC10 is not enabled in PKG C-state limit */
1278         if ((msr & 0xF) != 8)
1279                 return;
1280 
1281         ecx = 0;
1282         cpuid(7, &eax, &ebx, &ecx, &edx);
1283 
1284         /* if SGX is present */
1285         if (ebx & (1 << 2)) {
1286 
1287                 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
1288 
1289                 /* if SGX is enabled */
1290                 if (msr & (1 << 18))
1291                         return;
1292         }
1293 
1294         skl_cstates[5].disabled = 1;    /* C8-SKL */
1295         skl_cstates[6].disabled = 1;    /* C9-SKL */
1296 }
1297 /*
1298  * intel_idle_state_table_update()
1299  *
1300  * Update the default state_table for this CPU-id
1301  */
1302 
1303 static void intel_idle_state_table_update(void)
1304 {
1305         switch (boot_cpu_data.x86_model) {
1306 
1307         case INTEL_FAM6_IVYBRIDGE_X:
1308                 ivt_idle_state_table_update();
1309                 break;
1310         case INTEL_FAM6_ATOM_GOLDMONT:
1311         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
1312                 bxt_idle_state_table_update();
1313                 break;
1314         case INTEL_FAM6_SKYLAKE:
1315                 sklh_idle_state_table_update();
1316                 break;
1317         }
1318 }
1319 
1320 /*
1321  * intel_idle_cpuidle_driver_init()
1322  * allocate, initialize cpuidle_states
1323  */
1324 static void __init intel_idle_cpuidle_driver_init(void)
1325 {
1326         int cstate;
1327         struct cpuidle_driver *drv = &intel_idle_driver;
1328 
1329         intel_idle_state_table_update();
1330 
1331         cpuidle_poll_state_init(drv);
1332         drv->state_count = 1;
1333 
1334         for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
1335                 int num_substates, mwait_hint, mwait_cstate;
1336 
1337                 if ((cpuidle_state_table[cstate].enter == NULL) &&
1338                     (cpuidle_state_table[cstate].enter_s2idle == NULL))
1339                         break;
1340 
1341                 if (cstate + 1 > max_cstate) {
1342                         pr_info("max_cstate %d reached\n", max_cstate);
1343                         break;
1344                 }
1345 
1346                 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1347                 mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint);
1348 
1349                 /* number of sub-states for this state in CPUID.MWAIT */
1350                 num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4))
1351                                         & MWAIT_SUBSTATE_MASK;
1352 
1353                 /* if NO sub-states for this state in CPUID, skip it */
1354                 if (num_substates == 0)
1355                         continue;
1356 
1357                 /* if state marked as disabled, skip it */
1358                 if (cpuidle_state_table[cstate].disabled != 0) {
1359                         pr_debug("state %s is disabled\n",
1360                                  cpuidle_state_table[cstate].name);
1361                         continue;
1362                 }
1363 
1364 
1365                 if (((mwait_cstate + 1) > 2) &&
1366                         !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1367                         mark_tsc_unstable("TSC halts in idle"
1368                                         " states deeper than C2");
1369 
1370                 drv->states[drv->state_count] = /* structure copy */
1371                         cpuidle_state_table[cstate];
1372 
1373                 drv->state_count += 1;
1374         }
1375 
1376         if (icpu->byt_auto_demotion_disable_flag) {
1377                 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
1378                 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
1379         }
1380 }
1381 
1382 
1383 /*
1384  * intel_idle_cpu_init()
1385  * allocate, initialize, register cpuidle_devices
1386  * @cpu: cpu/core to initialize
1387  */
1388 static int intel_idle_cpu_init(unsigned int cpu)
1389 {
1390         struct cpuidle_device *dev;
1391 
1392         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1393         dev->cpu = cpu;
1394 
1395         if (cpuidle_register_device(dev)) {
1396                 pr_debug("cpuidle_register_device %d failed!\n", cpu);
1397                 return -EIO;
1398         }
1399 
1400         if (icpu->auto_demotion_disable_flags)
1401                 auto_demotion_disable();
1402 
1403         if (icpu->disable_promotion_to_c1e)
1404                 c1e_promotion_disable();
1405 
1406         return 0;
1407 }
1408 
1409 static int intel_idle_cpu_online(unsigned int cpu)
1410 {
1411         struct cpuidle_device *dev;
1412 
1413         if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE)
1414                 __setup_broadcast_timer(true);
1415 
1416         /*
1417          * Some systems can hotplug a cpu at runtime after
1418          * the kernel has booted, we have to initialize the
1419          * driver in this case
1420          */
1421         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1422         if (!dev->registered)
1423                 return intel_idle_cpu_init(cpu);
1424 
1425         return 0;
1426 }
1427 
1428 static int __init intel_idle_init(void)
1429 {
1430         int retval;
1431 
1432         /* Do not load intel_idle at all for now if idle= is passed */
1433         if (boot_option_idle_override != IDLE_NO_OVERRIDE)
1434                 return -ENODEV;
1435 
1436         retval = intel_idle_probe();
1437         if (retval)
1438                 return retval;
1439 
1440         intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
1441         if (intel_idle_cpuidle_devices == NULL)
1442                 return -ENOMEM;
1443 
1444         intel_idle_cpuidle_driver_init();
1445         retval = cpuidle_register_driver(&intel_idle_driver);
1446         if (retval) {
1447                 struct cpuidle_driver *drv = cpuidle_get_driver();
1448                 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
1449                        drv ? drv->name : "none");
1450                 goto init_driver_fail;
1451         }
1452 
1453         if (boot_cpu_has(X86_FEATURE_ARAT))     /* Always Reliable APIC Timer */
1454                 lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE;
1455 
1456         retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
1457                                    intel_idle_cpu_online, NULL);
1458         if (retval < 0)
1459                 goto hp_setup_fail;
1460 
1461         pr_debug("lapic_timer_reliable_states 0x%x\n",
1462                  lapic_timer_reliable_states);
1463 
1464         return 0;
1465 
1466 hp_setup_fail:
1467         intel_idle_cpuidle_devices_uninit();
1468         cpuidle_unregister_driver(&intel_idle_driver);
1469 init_driver_fail:
1470         free_percpu(intel_idle_cpuidle_devices);
1471         return retval;
1472 
1473 }
1474 device_initcall(intel_idle_init);
1475 
1476 /*
1477  * We are not really modular, but we used to support that.  Meaning we also
1478  * support "intel_idle.max_cstate=..." at boot and also a read-only export of
1479  * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
1480  * is the easiest way (currently) to continue doing that.
1481  */
1482 module_param(max_cstate, int, 0444);

/* [<][>][^][v][top][bottom][index][help] */