root/drivers/thermal/intel/intel_powerclamp.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. duration_set
  2. window_size_set
  3. find_target_mwait
  4. has_pkg_state_counter
  5. pkg_state_counter
  6. get_compensation
  7. adjust_compensation
  8. powerclamp_adjust_controls
  9. clamp_balancing_func
  10. clamp_idle_injection_func
  11. poll_pkg_cstate
  12. start_power_clamp_worker
  13. stop_power_clamp_worker
  14. start_power_clamp
  15. end_power_clamp
  16. powerclamp_cpu_online
  17. powerclamp_cpu_predown
  18. powerclamp_get_max_state
  19. powerclamp_get_cur_state
  20. powerclamp_set_cur_state
  21. powerclamp_probe
  22. powerclamp_debug_show
  23. powerclamp_create_debug_files
  24. powerclamp_init
  25. powerclamp_exit

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * intel_powerclamp.c - package c-state idle injection
   4  *
   5  * Copyright (c) 2012, Intel Corporation.
   6  *
   7  * Authors:
   8  *     Arjan van de Ven <arjan@linux.intel.com>
   9  *     Jacob Pan <jacob.jun.pan@linux.intel.com>
  10  *
  11  *      TODO:
  12  *           1. better handle wakeup from external interrupts, currently a fixed
  13  *              compensation is added to clamping duration when excessive amount
  14  *              of wakeups are observed during idle time. the reason is that in
  15  *              case of external interrupts without need for ack, clamping down
  16  *              cpu in non-irq context does not reduce irq. for majority of the
  17  *              cases, clamping down cpu does help reduce irq as well, we should
  18  *              be able to differentiate the two cases and give a quantitative
  19  *              solution for the irqs that we can control. perhaps based on
  20  *              get_cpu_iowait_time_us()
  21  *
  22  *           2. synchronization with other hw blocks
  23  */
  24 
  25 #define pr_fmt(fmt)     KBUILD_MODNAME ": " fmt
  26 
  27 #include <linux/module.h>
  28 #include <linux/kernel.h>
  29 #include <linux/delay.h>
  30 #include <linux/kthread.h>
  31 #include <linux/cpu.h>
  32 #include <linux/thermal.h>
  33 #include <linux/slab.h>
  34 #include <linux/tick.h>
  35 #include <linux/debugfs.h>
  36 #include <linux/seq_file.h>
  37 #include <linux/sched/rt.h>
  38 #include <uapi/linux/sched/types.h>
  39 
  40 #include <asm/nmi.h>
  41 #include <asm/msr.h>
  42 #include <asm/mwait.h>
  43 #include <asm/cpu_device_id.h>
  44 #include <asm/hardirq.h>
  45 
  46 #define MAX_TARGET_RATIO (50U)
  47 /* For each undisturbed clamping period (no extra wake ups during idle time),
  48  * we increment the confidence counter for the given target ratio.
  49  * CONFIDENCE_OK defines the level where runtime calibration results are
  50  * valid.
  51  */
  52 #define CONFIDENCE_OK (3)
  53 /* Default idle injection duration, driver adjust sleep time to meet target
  54  * idle ratio. Similar to frequency modulation.
  55  */
  56 #define DEFAULT_DURATION_JIFFIES (6)
  57 
  58 static unsigned int target_mwait;
  59 static struct dentry *debug_dir;
  60 
  61 /* user selected target */
  62 static unsigned int set_target_ratio;
  63 static unsigned int current_ratio;
  64 static bool should_skip;
  65 static bool reduce_irq;
  66 static atomic_t idle_wakeup_counter;
  67 static unsigned int control_cpu; /* The cpu assigned to collect stat and update
  68                                   * control parameters. default to BSP but BSP
  69                                   * can be offlined.
  70                                   */
  71 static bool clamping;
  72 
  73 static const struct sched_param sparam = {
  74         .sched_priority = MAX_USER_RT_PRIO / 2,
  75 };
  76 struct powerclamp_worker_data {
  77         struct kthread_worker *worker;
  78         struct kthread_work balancing_work;
  79         struct kthread_delayed_work idle_injection_work;
  80         unsigned int cpu;
  81         unsigned int count;
  82         unsigned int guard;
  83         unsigned int window_size_now;
  84         unsigned int target_ratio;
  85         unsigned int duration_jiffies;
  86         bool clamping;
  87 };
  88 
  89 static struct powerclamp_worker_data __percpu *worker_data;
  90 static struct thermal_cooling_device *cooling_dev;
  91 static unsigned long *cpu_clamping_mask;  /* bit map for tracking per cpu
  92                                            * clamping kthread worker
  93                                            */
  94 
  95 static unsigned int duration;
  96 static unsigned int pkg_cstate_ratio_cur;
  97 static unsigned int window_size;
  98 
  99 static int duration_set(const char *arg, const struct kernel_param *kp)
 100 {
 101         int ret = 0;
 102         unsigned long new_duration;
 103 
 104         ret = kstrtoul(arg, 10, &new_duration);
 105         if (ret)
 106                 goto exit;
 107         if (new_duration > 25 || new_duration < 6) {
 108                 pr_err("Out of recommended range %lu, between 6-25ms\n",
 109                         new_duration);
 110                 ret = -EINVAL;
 111         }
 112 
 113         duration = clamp(new_duration, 6ul, 25ul);
 114         smp_mb();
 115 
 116 exit:
 117 
 118         return ret;
 119 }
 120 
 121 static const struct kernel_param_ops duration_ops = {
 122         .set = duration_set,
 123         .get = param_get_int,
 124 };
 125 
 126 
 127 module_param_cb(duration, &duration_ops, &duration, 0644);
 128 MODULE_PARM_DESC(duration, "forced idle time for each attempt in msec.");
 129 
 130 struct powerclamp_calibration_data {
 131         unsigned long confidence;  /* used for calibration, basically a counter
 132                                     * gets incremented each time a clamping
 133                                     * period is completed without extra wakeups
 134                                     * once that counter is reached given level,
 135                                     * compensation is deemed usable.
 136                                     */
 137         unsigned long steady_comp; /* steady state compensation used when
 138                                     * no extra wakeups occurred.
 139                                     */
 140         unsigned long dynamic_comp; /* compensate excessive wakeup from idle
 141                                      * mostly from external interrupts.
 142                                      */
 143 };
 144 
 145 static struct powerclamp_calibration_data cal_data[MAX_TARGET_RATIO];
 146 
 147 static int window_size_set(const char *arg, const struct kernel_param *kp)
 148 {
 149         int ret = 0;
 150         unsigned long new_window_size;
 151 
 152         ret = kstrtoul(arg, 10, &new_window_size);
 153         if (ret)
 154                 goto exit_win;
 155         if (new_window_size > 10 || new_window_size < 2) {
 156                 pr_err("Out of recommended window size %lu, between 2-10\n",
 157                         new_window_size);
 158                 ret = -EINVAL;
 159         }
 160 
 161         window_size = clamp(new_window_size, 2ul, 10ul);
 162         smp_mb();
 163 
 164 exit_win:
 165 
 166         return ret;
 167 }
 168 
 169 static const struct kernel_param_ops window_size_ops = {
 170         .set = window_size_set,
 171         .get = param_get_int,
 172 };
 173 
 174 module_param_cb(window_size, &window_size_ops, &window_size, 0644);
 175 MODULE_PARM_DESC(window_size, "sliding window in number of clamping cycles\n"
 176         "\tpowerclamp controls idle ratio within this window. larger\n"
 177         "\twindow size results in slower response time but more smooth\n"
 178         "\tclamping results. default to 2.");
 179 
 180 static void find_target_mwait(void)
 181 {
 182         unsigned int eax, ebx, ecx, edx;
 183         unsigned int highest_cstate = 0;
 184         unsigned int highest_subcstate = 0;
 185         int i;
 186 
 187         if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
 188                 return;
 189 
 190         cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
 191 
 192         if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
 193             !(ecx & CPUID5_ECX_INTERRUPT_BREAK))
 194                 return;
 195 
 196         edx >>= MWAIT_SUBSTATE_SIZE;
 197         for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
 198                 if (edx & MWAIT_SUBSTATE_MASK) {
 199                         highest_cstate = i;
 200                         highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
 201                 }
 202         }
 203         target_mwait = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
 204                 (highest_subcstate - 1);
 205 
 206 }
 207 
 208 struct pkg_cstate_info {
 209         bool skip;
 210         int msr_index;
 211         int cstate_id;
 212 };
 213 
 214 #define PKG_CSTATE_INIT(id) {                           \
 215                 .msr_index = MSR_PKG_C##id##_RESIDENCY, \
 216                 .cstate_id = id                         \
 217                         }
 218 
 219 static struct pkg_cstate_info pkg_cstates[] = {
 220         PKG_CSTATE_INIT(2),
 221         PKG_CSTATE_INIT(3),
 222         PKG_CSTATE_INIT(6),
 223         PKG_CSTATE_INIT(7),
 224         PKG_CSTATE_INIT(8),
 225         PKG_CSTATE_INIT(9),
 226         PKG_CSTATE_INIT(10),
 227         {NULL},
 228 };
 229 
 230 static bool has_pkg_state_counter(void)
 231 {
 232         u64 val;
 233         struct pkg_cstate_info *info = pkg_cstates;
 234 
 235         /* check if any one of the counter msrs exists */
 236         while (info->msr_index) {
 237                 if (!rdmsrl_safe(info->msr_index, &val))
 238                         return true;
 239                 info++;
 240         }
 241 
 242         return false;
 243 }
 244 
 245 static u64 pkg_state_counter(void)
 246 {
 247         u64 val;
 248         u64 count = 0;
 249         struct pkg_cstate_info *info = pkg_cstates;
 250 
 251         while (info->msr_index) {
 252                 if (!info->skip) {
 253                         if (!rdmsrl_safe(info->msr_index, &val))
 254                                 count += val;
 255                         else
 256                                 info->skip = true;
 257                 }
 258                 info++;
 259         }
 260 
 261         return count;
 262 }
 263 
 264 static unsigned int get_compensation(int ratio)
 265 {
 266         unsigned int comp = 0;
 267 
 268         /* we only use compensation if all adjacent ones are good */
 269         if (ratio == 1 &&
 270                 cal_data[ratio].confidence >= CONFIDENCE_OK &&
 271                 cal_data[ratio + 1].confidence >= CONFIDENCE_OK &&
 272                 cal_data[ratio + 2].confidence >= CONFIDENCE_OK) {
 273                 comp = (cal_data[ratio].steady_comp +
 274                         cal_data[ratio + 1].steady_comp +
 275                         cal_data[ratio + 2].steady_comp) / 3;
 276         } else if (ratio == MAX_TARGET_RATIO - 1 &&
 277                 cal_data[ratio].confidence >= CONFIDENCE_OK &&
 278                 cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
 279                 cal_data[ratio - 2].confidence >= CONFIDENCE_OK) {
 280                 comp = (cal_data[ratio].steady_comp +
 281                         cal_data[ratio - 1].steady_comp +
 282                         cal_data[ratio - 2].steady_comp) / 3;
 283         } else if (cal_data[ratio].confidence >= CONFIDENCE_OK &&
 284                 cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
 285                 cal_data[ratio + 1].confidence >= CONFIDENCE_OK) {
 286                 comp = (cal_data[ratio].steady_comp +
 287                         cal_data[ratio - 1].steady_comp +
 288                         cal_data[ratio + 1].steady_comp) / 3;
 289         }
 290 
 291         /* REVISIT: simple penalty of double idle injection */
 292         if (reduce_irq)
 293                 comp = ratio;
 294         /* do not exceed limit */
 295         if (comp + ratio >= MAX_TARGET_RATIO)
 296                 comp = MAX_TARGET_RATIO - ratio - 1;
 297 
 298         return comp;
 299 }
 300 
 301 static void adjust_compensation(int target_ratio, unsigned int win)
 302 {
 303         int delta;
 304         struct powerclamp_calibration_data *d = &cal_data[target_ratio];
 305 
 306         /*
 307          * adjust compensations if confidence level has not been reached or
 308          * there are too many wakeups during the last idle injection period, we
 309          * cannot trust the data for compensation.
 310          */
 311         if (d->confidence >= CONFIDENCE_OK ||
 312                 atomic_read(&idle_wakeup_counter) >
 313                 win * num_online_cpus())
 314                 return;
 315 
 316         delta = set_target_ratio - current_ratio;
 317         /* filter out bad data */
 318         if (delta >= 0 && delta <= (1+target_ratio/10)) {
 319                 if (d->steady_comp)
 320                         d->steady_comp =
 321                                 roundup(delta+d->steady_comp, 2)/2;
 322                 else
 323                         d->steady_comp = delta;
 324                 d->confidence++;
 325         }
 326 }
 327 
 328 static bool powerclamp_adjust_controls(unsigned int target_ratio,
 329                                 unsigned int guard, unsigned int win)
 330 {
 331         static u64 msr_last, tsc_last;
 332         u64 msr_now, tsc_now;
 333         u64 val64;
 334 
 335         /* check result for the last window */
 336         msr_now = pkg_state_counter();
 337         tsc_now = rdtsc();
 338 
 339         /* calculate pkg cstate vs tsc ratio */
 340         if (!msr_last || !tsc_last)
 341                 current_ratio = 1;
 342         else if (tsc_now-tsc_last) {
 343                 val64 = 100*(msr_now-msr_last);
 344                 do_div(val64, (tsc_now-tsc_last));
 345                 current_ratio = val64;
 346         }
 347 
 348         /* update record */
 349         msr_last = msr_now;
 350         tsc_last = tsc_now;
 351 
 352         adjust_compensation(target_ratio, win);
 353         /*
 354          * too many external interrupts, set flag such
 355          * that we can take measure later.
 356          */
 357         reduce_irq = atomic_read(&idle_wakeup_counter) >=
 358                 2 * win * num_online_cpus();
 359 
 360         atomic_set(&idle_wakeup_counter, 0);
 361         /* if we are above target+guard, skip */
 362         return set_target_ratio + guard <= current_ratio;
 363 }
 364 
 365 static void clamp_balancing_func(struct kthread_work *work)
 366 {
 367         struct powerclamp_worker_data *w_data;
 368         int sleeptime;
 369         unsigned long target_jiffies;
 370         unsigned int compensated_ratio;
 371         int interval; /* jiffies to sleep for each attempt */
 372 
 373         w_data = container_of(work, struct powerclamp_worker_data,
 374                               balancing_work);
 375 
 376         /*
 377          * make sure user selected ratio does not take effect until
 378          * the next round. adjust target_ratio if user has changed
 379          * target such that we can converge quickly.
 380          */
 381         w_data->target_ratio = READ_ONCE(set_target_ratio);
 382         w_data->guard = 1 + w_data->target_ratio / 20;
 383         w_data->window_size_now = window_size;
 384         w_data->duration_jiffies = msecs_to_jiffies(duration);
 385         w_data->count++;
 386 
 387         /*
 388          * systems may have different ability to enter package level
 389          * c-states, thus we need to compensate the injected idle ratio
 390          * to achieve the actual target reported by the HW.
 391          */
 392         compensated_ratio = w_data->target_ratio +
 393                 get_compensation(w_data->target_ratio);
 394         if (compensated_ratio <= 0)
 395                 compensated_ratio = 1;
 396         interval = w_data->duration_jiffies * 100 / compensated_ratio;
 397 
 398         /* align idle time */
 399         target_jiffies = roundup(jiffies, interval);
 400         sleeptime = target_jiffies - jiffies;
 401         if (sleeptime <= 0)
 402                 sleeptime = 1;
 403 
 404         if (clamping && w_data->clamping && cpu_online(w_data->cpu))
 405                 kthread_queue_delayed_work(w_data->worker,
 406                                            &w_data->idle_injection_work,
 407                                            sleeptime);
 408 }
 409 
 410 static void clamp_idle_injection_func(struct kthread_work *work)
 411 {
 412         struct powerclamp_worker_data *w_data;
 413 
 414         w_data = container_of(work, struct powerclamp_worker_data,
 415                               idle_injection_work.work);
 416 
 417         /*
 418          * only elected controlling cpu can collect stats and update
 419          * control parameters.
 420          */
 421         if (w_data->cpu == control_cpu &&
 422             !(w_data->count % w_data->window_size_now)) {
 423                 should_skip =
 424                         powerclamp_adjust_controls(w_data->target_ratio,
 425                                                    w_data->guard,
 426                                                    w_data->window_size_now);
 427                 smp_mb();
 428         }
 429 
 430         if (should_skip)
 431                 goto balance;
 432 
 433         play_idle(jiffies_to_usecs(w_data->duration_jiffies));
 434 
 435 balance:
 436         if (clamping && w_data->clamping && cpu_online(w_data->cpu))
 437                 kthread_queue_work(w_data->worker, &w_data->balancing_work);
 438 }
 439 
 440 /*
 441  * 1 HZ polling while clamping is active, useful for userspace
 442  * to monitor actual idle ratio.
 443  */
 444 static void poll_pkg_cstate(struct work_struct *dummy);
 445 static DECLARE_DELAYED_WORK(poll_pkg_cstate_work, poll_pkg_cstate);
 446 static void poll_pkg_cstate(struct work_struct *dummy)
 447 {
 448         static u64 msr_last;
 449         static u64 tsc_last;
 450 
 451         u64 msr_now;
 452         u64 tsc_now;
 453         u64 val64;
 454 
 455         msr_now = pkg_state_counter();
 456         tsc_now = rdtsc();
 457 
 458         /* calculate pkg cstate vs tsc ratio */
 459         if (!msr_last || !tsc_last)
 460                 pkg_cstate_ratio_cur = 1;
 461         else {
 462                 if (tsc_now - tsc_last) {
 463                         val64 = 100 * (msr_now - msr_last);
 464                         do_div(val64, (tsc_now - tsc_last));
 465                         pkg_cstate_ratio_cur = val64;
 466                 }
 467         }
 468 
 469         /* update record */
 470         msr_last = msr_now;
 471         tsc_last = tsc_now;
 472 
 473         if (true == clamping)
 474                 schedule_delayed_work(&poll_pkg_cstate_work, HZ);
 475 }
 476 
 477 static void start_power_clamp_worker(unsigned long cpu)
 478 {
 479         struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu);
 480         struct kthread_worker *worker;
 481 
 482         worker = kthread_create_worker_on_cpu(cpu, 0, "kidle_inj/%ld", cpu);
 483         if (IS_ERR(worker))
 484                 return;
 485 
 486         w_data->worker = worker;
 487         w_data->count = 0;
 488         w_data->cpu = cpu;
 489         w_data->clamping = true;
 490         set_bit(cpu, cpu_clamping_mask);
 491         sched_setscheduler(worker->task, SCHED_FIFO, &sparam);
 492         kthread_init_work(&w_data->balancing_work, clamp_balancing_func);
 493         kthread_init_delayed_work(&w_data->idle_injection_work,
 494                                   clamp_idle_injection_func);
 495         kthread_queue_work(w_data->worker, &w_data->balancing_work);
 496 }
 497 
 498 static void stop_power_clamp_worker(unsigned long cpu)
 499 {
 500         struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu);
 501 
 502         if (!w_data->worker)
 503                 return;
 504 
 505         w_data->clamping = false;
 506         /*
 507          * Make sure that all works that get queued after this point see
 508          * the clamping disabled. The counter part is not needed because
 509          * there is an implicit memory barrier when the queued work
 510          * is proceed.
 511          */
 512         smp_wmb();
 513         kthread_cancel_work_sync(&w_data->balancing_work);
 514         kthread_cancel_delayed_work_sync(&w_data->idle_injection_work);
 515         /*
 516          * The balancing work still might be queued here because
 517          * the handling of the "clapming" variable, cancel, and queue
 518          * operations are not synchronized via a lock. But it is not
 519          * a big deal. The balancing work is fast and destroy kthread
 520          * will wait for it.
 521          */
 522         clear_bit(w_data->cpu, cpu_clamping_mask);
 523         kthread_destroy_worker(w_data->worker);
 524 
 525         w_data->worker = NULL;
 526 }
 527 
 528 static int start_power_clamp(void)
 529 {
 530         unsigned long cpu;
 531 
 532         set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1);
 533         /* prevent cpu hotplug */
 534         get_online_cpus();
 535 
 536         /* prefer BSP */
 537         control_cpu = 0;
 538         if (!cpu_online(control_cpu))
 539                 control_cpu = smp_processor_id();
 540 
 541         clamping = true;
 542         schedule_delayed_work(&poll_pkg_cstate_work, 0);
 543 
 544         /* start one kthread worker per online cpu */
 545         for_each_online_cpu(cpu) {
 546                 start_power_clamp_worker(cpu);
 547         }
 548         put_online_cpus();
 549 
 550         return 0;
 551 }
 552 
 553 static void end_power_clamp(void)
 554 {
 555         int i;
 556 
 557         /*
 558          * Block requeuing in all the kthread workers. They will flush and
 559          * stop faster.
 560          */
 561         clamping = false;
 562         if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) {
 563                 for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) {
 564                         pr_debug("clamping worker for cpu %d alive, destroy\n",
 565                                  i);
 566                         stop_power_clamp_worker(i);
 567                 }
 568         }
 569 }
 570 
 571 static int powerclamp_cpu_online(unsigned int cpu)
 572 {
 573         if (clamping == false)
 574                 return 0;
 575         start_power_clamp_worker(cpu);
 576         /* prefer BSP as controlling CPU */
 577         if (cpu == 0) {
 578                 control_cpu = 0;
 579                 smp_mb();
 580         }
 581         return 0;
 582 }
 583 
 584 static int powerclamp_cpu_predown(unsigned int cpu)
 585 {
 586         if (clamping == false)
 587                 return 0;
 588 
 589         stop_power_clamp_worker(cpu);
 590         if (cpu != control_cpu)
 591                 return 0;
 592 
 593         control_cpu = cpumask_first(cpu_online_mask);
 594         if (control_cpu == cpu)
 595                 control_cpu = cpumask_next(cpu, cpu_online_mask);
 596         smp_mb();
 597         return 0;
 598 }
 599 
 600 static int powerclamp_get_max_state(struct thermal_cooling_device *cdev,
 601                                  unsigned long *state)
 602 {
 603         *state = MAX_TARGET_RATIO;
 604 
 605         return 0;
 606 }
 607 
 608 static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev,
 609                                  unsigned long *state)
 610 {
 611         if (true == clamping)
 612                 *state = pkg_cstate_ratio_cur;
 613         else
 614                 /* to save power, do not poll idle ratio while not clamping */
 615                 *state = -1; /* indicates invalid state */
 616 
 617         return 0;
 618 }
 619 
 620 static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev,
 621                                  unsigned long new_target_ratio)
 622 {
 623         int ret = 0;
 624 
 625         new_target_ratio = clamp(new_target_ratio, 0UL,
 626                                 (unsigned long) (MAX_TARGET_RATIO-1));
 627         if (set_target_ratio == 0 && new_target_ratio > 0) {
 628                 pr_info("Start idle injection to reduce power\n");
 629                 set_target_ratio = new_target_ratio;
 630                 ret = start_power_clamp();
 631                 goto exit_set;
 632         } else  if (set_target_ratio > 0 && new_target_ratio == 0) {
 633                 pr_info("Stop forced idle injection\n");
 634                 end_power_clamp();
 635                 set_target_ratio = 0;
 636         } else  /* adjust currently running */ {
 637                 set_target_ratio = new_target_ratio;
 638                 /* make new set_target_ratio visible to other cpus */
 639                 smp_mb();
 640         }
 641 
 642 exit_set:
 643         return ret;
 644 }
 645 
 646 /* bind to generic thermal layer as cooling device*/
 647 static struct thermal_cooling_device_ops powerclamp_cooling_ops = {
 648         .get_max_state = powerclamp_get_max_state,
 649         .get_cur_state = powerclamp_get_cur_state,
 650         .set_cur_state = powerclamp_set_cur_state,
 651 };
 652 
 653 static const struct x86_cpu_id __initconst intel_powerclamp_ids[] = {
 654         { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_MWAIT },
 655         {}
 656 };
 657 MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
 658 
 659 static int __init powerclamp_probe(void)
 660 {
 661 
 662         if (!x86_match_cpu(intel_powerclamp_ids)) {
 663                 pr_err("CPU does not support MWAIT\n");
 664                 return -ENODEV;
 665         }
 666 
 667         /* The goal for idle time alignment is to achieve package cstate. */
 668         if (!has_pkg_state_counter()) {
 669                 pr_info("No package C-state available\n");
 670                 return -ENODEV;
 671         }
 672 
 673         /* find the deepest mwait value */
 674         find_target_mwait();
 675 
 676         return 0;
 677 }
 678 
 679 static int powerclamp_debug_show(struct seq_file *m, void *unused)
 680 {
 681         int i = 0;
 682 
 683         seq_printf(m, "controlling cpu: %d\n", control_cpu);
 684         seq_printf(m, "pct confidence steady dynamic (compensation)\n");
 685         for (i = 0; i < MAX_TARGET_RATIO; i++) {
 686                 seq_printf(m, "%d\t%lu\t%lu\t%lu\n",
 687                         i,
 688                         cal_data[i].confidence,
 689                         cal_data[i].steady_comp,
 690                         cal_data[i].dynamic_comp);
 691         }
 692 
 693         return 0;
 694 }
 695 
 696 DEFINE_SHOW_ATTRIBUTE(powerclamp_debug);
 697 
 698 static inline void powerclamp_create_debug_files(void)
 699 {
 700         debug_dir = debugfs_create_dir("intel_powerclamp", NULL);
 701 
 702         debugfs_create_file("powerclamp_calib", S_IRUGO, debug_dir, cal_data,
 703                             &powerclamp_debug_fops);
 704 }
 705 
 706 static enum cpuhp_state hp_state;
 707 
 708 static int __init powerclamp_init(void)
 709 {
 710         int retval;
 711         int bitmap_size;
 712 
 713         bitmap_size = BITS_TO_LONGS(num_possible_cpus()) * sizeof(long);
 714         cpu_clamping_mask = kzalloc(bitmap_size, GFP_KERNEL);
 715         if (!cpu_clamping_mask)
 716                 return -ENOMEM;
 717 
 718         /* probe cpu features and ids here */
 719         retval = powerclamp_probe();
 720         if (retval)
 721                 goto exit_free;
 722 
 723         /* set default limit, maybe adjusted during runtime based on feedback */
 724         window_size = 2;
 725         retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
 726                                            "thermal/intel_powerclamp:online",
 727                                            powerclamp_cpu_online,
 728                                            powerclamp_cpu_predown);
 729         if (retval < 0)
 730                 goto exit_free;
 731 
 732         hp_state = retval;
 733 
 734         worker_data = alloc_percpu(struct powerclamp_worker_data);
 735         if (!worker_data) {
 736                 retval = -ENOMEM;
 737                 goto exit_unregister;
 738         }
 739 
 740         cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL,
 741                                                 &powerclamp_cooling_ops);
 742         if (IS_ERR(cooling_dev)) {
 743                 retval = -ENODEV;
 744                 goto exit_free_thread;
 745         }
 746 
 747         if (!duration)
 748                 duration = jiffies_to_msecs(DEFAULT_DURATION_JIFFIES);
 749 
 750         powerclamp_create_debug_files();
 751 
 752         return 0;
 753 
 754 exit_free_thread:
 755         free_percpu(worker_data);
 756 exit_unregister:
 757         cpuhp_remove_state_nocalls(hp_state);
 758 exit_free:
 759         kfree(cpu_clamping_mask);
 760         return retval;
 761 }
 762 module_init(powerclamp_init);
 763 
 764 static void __exit powerclamp_exit(void)
 765 {
 766         end_power_clamp();
 767         cpuhp_remove_state_nocalls(hp_state);
 768         free_percpu(worker_data);
 769         thermal_cooling_device_unregister(cooling_dev);
 770         kfree(cpu_clamping_mask);
 771 
 772         cancel_delayed_work_sync(&poll_pkg_cstate_work);
 773         debugfs_remove_recursive(debug_dir);
 774 }
 775 module_exit(powerclamp_exit);
 776 
 777 MODULE_LICENSE("GPL");
 778 MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
 779 MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@linux.intel.com>");
 780 MODULE_DESCRIPTION("Package Level C-state Idle Injection for Intel CPUs");

/* [<][>][^][v][top][bottom][index][help] */