1/* 2 * drivers/cpufreq/cpufreq_governor.c 3 * 4 * CPUFREQ governors common code 5 * 6 * Copyright (C) 2001 Russell King 7 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. 8 * (C) 2003 Jun Nakajima <jun.nakajima@intel.com> 9 * (C) 2009 Alexander Clouter <alex@digriz.org.uk> 10 * (c) 2012 Viresh Kumar <viresh.kumar@linaro.org> 11 * 12 * This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License version 2 as 14 * published by the Free Software Foundation. 15 */ 16 17#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 19#include <linux/export.h> 20#include <linux/kernel_stat.h> 21#include <linux/slab.h> 22 23#include "cpufreq_governor.h" 24 25static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data) 26{ 27 if (have_governor_per_policy()) 28 return dbs_data->cdata->attr_group_gov_pol; 29 else 30 return dbs_data->cdata->attr_group_gov_sys; 31} 32 33void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) 34{ 35 struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); 36 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 37 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 38 struct cpufreq_policy *policy = cdbs->shared->policy; 39 unsigned int sampling_rate; 40 unsigned int max_load = 0; 41 unsigned int ignore_nice; 42 unsigned int j; 43 44 if (dbs_data->cdata->governor == GOV_ONDEMAND) { 45 struct od_cpu_dbs_info_s *od_dbs_info = 46 dbs_data->cdata->get_cpu_dbs_info_s(cpu); 47 48 /* 49 * Sometimes, the ondemand governor uses an additional 50 * multiplier to give long delays. So apply this multiplier to 51 * the 'sampling_rate', so as to keep the wake-up-from-idle 52 * detection logic a bit conservative. 53 */ 54 sampling_rate = od_tuners->sampling_rate; 55 sampling_rate *= od_dbs_info->rate_mult; 56 57 ignore_nice = od_tuners->ignore_nice_load; 58 } else { 59 sampling_rate = cs_tuners->sampling_rate; 60 ignore_nice = cs_tuners->ignore_nice_load; 61 } 62 63 /* Get Absolute Load */ 64 for_each_cpu(j, policy->cpus) { 65 struct cpu_dbs_info *j_cdbs; 66 u64 cur_wall_time, cur_idle_time; 67 unsigned int idle_time, wall_time; 68 unsigned int load; 69 int io_busy = 0; 70 71 j_cdbs = dbs_data->cdata->get_cpu_cdbs(j); 72 73 /* 74 * For the purpose of ondemand, waiting for disk IO is 75 * an indication that you're performance critical, and 76 * not that the system is actually idle. So do not add 77 * the iowait time to the cpu idle time. 78 */ 79 if (dbs_data->cdata->governor == GOV_ONDEMAND) 80 io_busy = od_tuners->io_is_busy; 81 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy); 82 83 wall_time = (unsigned int) 84 (cur_wall_time - j_cdbs->prev_cpu_wall); 85 j_cdbs->prev_cpu_wall = cur_wall_time; 86 87 idle_time = (unsigned int) 88 (cur_idle_time - j_cdbs->prev_cpu_idle); 89 j_cdbs->prev_cpu_idle = cur_idle_time; 90 91 if (ignore_nice) { 92 u64 cur_nice; 93 unsigned long cur_nice_jiffies; 94 95 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] - 96 cdbs->prev_cpu_nice; 97 /* 98 * Assumption: nice time between sampling periods will 99 * be less than 2^32 jiffies for 32 bit sys 100 */ 101 cur_nice_jiffies = (unsigned long) 102 cputime64_to_jiffies64(cur_nice); 103 104 cdbs->prev_cpu_nice = 105 kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 106 idle_time += jiffies_to_usecs(cur_nice_jiffies); 107 } 108 109 if (unlikely(!wall_time || wall_time < idle_time)) 110 continue; 111 112 /* 113 * If the CPU had gone completely idle, and a task just woke up 114 * on this CPU now, it would be unfair to calculate 'load' the 115 * usual way for this elapsed time-window, because it will show 116 * near-zero load, irrespective of how CPU intensive that task 117 * actually is. This is undesirable for latency-sensitive bursty 118 * workloads. 119 * 120 * To avoid this, we reuse the 'load' from the previous 121 * time-window and give this task a chance to start with a 122 * reasonably high CPU frequency. (However, we shouldn't over-do 123 * this copy, lest we get stuck at a high load (high frequency) 124 * for too long, even when the current system load has actually 125 * dropped down. So we perform the copy only once, upon the 126 * first wake-up from idle.) 127 * 128 * Detecting this situation is easy: the governor's deferrable 129 * timer would not have fired during CPU-idle periods. Hence 130 * an unusually large 'wall_time' (as compared to the sampling 131 * rate) indicates this scenario. 132 * 133 * prev_load can be zero in two cases and we must recalculate it 134 * for both cases: 135 * - during long idle intervals 136 * - explicitly set to zero 137 */ 138 if (unlikely(wall_time > (2 * sampling_rate) && 139 j_cdbs->prev_load)) { 140 load = j_cdbs->prev_load; 141 142 /* 143 * Perform a destructive copy, to ensure that we copy 144 * the previous load only once, upon the first wake-up 145 * from idle. 146 */ 147 j_cdbs->prev_load = 0; 148 } else { 149 load = 100 * (wall_time - idle_time) / wall_time; 150 j_cdbs->prev_load = load; 151 } 152 153 if (load > max_load) 154 max_load = load; 155 } 156 157 dbs_data->cdata->gov_check_cpu(cpu, max_load); 158} 159EXPORT_SYMBOL_GPL(dbs_check_cpu); 160 161static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data, 162 unsigned int delay) 163{ 164 struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); 165 166 mod_delayed_work_on(cpu, system_wq, &cdbs->dwork, delay); 167} 168 169void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy, 170 unsigned int delay, bool all_cpus) 171{ 172 int i; 173 174 if (!all_cpus) { 175 /* 176 * Use raw_smp_processor_id() to avoid preemptible warnings. 177 * We know that this is only called with all_cpus == false from 178 * works that have been queued with *_work_on() functions and 179 * those works are canceled during CPU_DOWN_PREPARE so they 180 * can't possibly run on any other CPU. 181 */ 182 __gov_queue_work(raw_smp_processor_id(), dbs_data, delay); 183 } else { 184 for_each_cpu(i, policy->cpus) 185 __gov_queue_work(i, dbs_data, delay); 186 } 187} 188EXPORT_SYMBOL_GPL(gov_queue_work); 189 190static inline void gov_cancel_work(struct dbs_data *dbs_data, 191 struct cpufreq_policy *policy) 192{ 193 struct cpu_dbs_info *cdbs; 194 int i; 195 196 for_each_cpu(i, policy->cpus) { 197 cdbs = dbs_data->cdata->get_cpu_cdbs(i); 198 cancel_delayed_work_sync(&cdbs->dwork); 199 } 200} 201 202/* Will return if we need to evaluate cpu load again or not */ 203static bool need_load_eval(struct cpu_common_dbs_info *shared, 204 unsigned int sampling_rate) 205{ 206 if (policy_is_shared(shared->policy)) { 207 ktime_t time_now = ktime_get(); 208 s64 delta_us = ktime_us_delta(time_now, shared->time_stamp); 209 210 /* Do nothing if we recently have sampled */ 211 if (delta_us < (s64)(sampling_rate / 2)) 212 return false; 213 else 214 shared->time_stamp = time_now; 215 } 216 217 return true; 218} 219 220static void dbs_timer(struct work_struct *work) 221{ 222 struct cpu_dbs_info *cdbs = container_of(work, struct cpu_dbs_info, 223 dwork.work); 224 struct cpu_common_dbs_info *shared = cdbs->shared; 225 struct cpufreq_policy *policy; 226 struct dbs_data *dbs_data; 227 unsigned int sampling_rate, delay; 228 bool modify_all = true; 229 230 mutex_lock(&shared->timer_mutex); 231 232 policy = shared->policy; 233 234 /* 235 * Governor might already be disabled and there is no point continuing 236 * with the work-handler. 237 */ 238 if (!policy) 239 goto unlock; 240 241 dbs_data = policy->governor_data; 242 243 if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { 244 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 245 246 sampling_rate = cs_tuners->sampling_rate; 247 } else { 248 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 249 250 sampling_rate = od_tuners->sampling_rate; 251 } 252 253 if (!need_load_eval(cdbs->shared, sampling_rate)) 254 modify_all = false; 255 256 delay = dbs_data->cdata->gov_dbs_timer(cdbs, dbs_data, modify_all); 257 gov_queue_work(dbs_data, policy, delay, modify_all); 258 259unlock: 260 mutex_unlock(&shared->timer_mutex); 261} 262 263static void set_sampling_rate(struct dbs_data *dbs_data, 264 unsigned int sampling_rate) 265{ 266 if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { 267 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 268 cs_tuners->sampling_rate = sampling_rate; 269 } else { 270 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 271 od_tuners->sampling_rate = sampling_rate; 272 } 273} 274 275static int alloc_common_dbs_info(struct cpufreq_policy *policy, 276 struct common_dbs_data *cdata) 277{ 278 struct cpu_common_dbs_info *shared; 279 int j; 280 281 /* Allocate memory for the common information for policy->cpus */ 282 shared = kzalloc(sizeof(*shared), GFP_KERNEL); 283 if (!shared) 284 return -ENOMEM; 285 286 /* Set shared for all CPUs, online+offline */ 287 for_each_cpu(j, policy->related_cpus) 288 cdata->get_cpu_cdbs(j)->shared = shared; 289 290 return 0; 291} 292 293static void free_common_dbs_info(struct cpufreq_policy *policy, 294 struct common_dbs_data *cdata) 295{ 296 struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu); 297 struct cpu_common_dbs_info *shared = cdbs->shared; 298 int j; 299 300 for_each_cpu(j, policy->cpus) 301 cdata->get_cpu_cdbs(j)->shared = NULL; 302 303 kfree(shared); 304} 305 306static int cpufreq_governor_init(struct cpufreq_policy *policy, 307 struct dbs_data *dbs_data, 308 struct common_dbs_data *cdata) 309{ 310 unsigned int latency; 311 int ret; 312 313 /* State should be equivalent to EXIT */ 314 if (policy->governor_data) 315 return -EBUSY; 316 317 if (dbs_data) { 318 if (WARN_ON(have_governor_per_policy())) 319 return -EINVAL; 320 321 ret = alloc_common_dbs_info(policy, cdata); 322 if (ret) 323 return ret; 324 325 dbs_data->usage_count++; 326 policy->governor_data = dbs_data; 327 return 0; 328 } 329 330 dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL); 331 if (!dbs_data) 332 return -ENOMEM; 333 334 ret = alloc_common_dbs_info(policy, cdata); 335 if (ret) 336 goto free_dbs_data; 337 338 dbs_data->cdata = cdata; 339 dbs_data->usage_count = 1; 340 341 ret = cdata->init(dbs_data, !policy->governor->initialized); 342 if (ret) 343 goto free_common_dbs_info; 344 345 /* policy latency is in ns. Convert it to us first */ 346 latency = policy->cpuinfo.transition_latency / 1000; 347 if (latency == 0) 348 latency = 1; 349 350 /* Bring kernel and HW constraints together */ 351 dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate, 352 MIN_LATENCY_MULTIPLIER * latency); 353 set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate, 354 latency * LATENCY_MULTIPLIER)); 355 356 if (!have_governor_per_policy()) 357 cdata->gdbs_data = dbs_data; 358 359 policy->governor_data = dbs_data; 360 361 ret = sysfs_create_group(get_governor_parent_kobj(policy), 362 get_sysfs_attr(dbs_data)); 363 if (ret) 364 goto reset_gdbs_data; 365 366 return 0; 367 368reset_gdbs_data: 369 policy->governor_data = NULL; 370 371 if (!have_governor_per_policy()) 372 cdata->gdbs_data = NULL; 373 cdata->exit(dbs_data, !policy->governor->initialized); 374free_common_dbs_info: 375 free_common_dbs_info(policy, cdata); 376free_dbs_data: 377 kfree(dbs_data); 378 return ret; 379} 380 381static int cpufreq_governor_exit(struct cpufreq_policy *policy, 382 struct dbs_data *dbs_data) 383{ 384 struct common_dbs_data *cdata = dbs_data->cdata; 385 struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu); 386 387 /* State should be equivalent to INIT */ 388 if (!cdbs->shared || cdbs->shared->policy) 389 return -EBUSY; 390 391 if (!--dbs_data->usage_count) { 392 sysfs_remove_group(get_governor_parent_kobj(policy), 393 get_sysfs_attr(dbs_data)); 394 395 policy->governor_data = NULL; 396 397 if (!have_governor_per_policy()) 398 cdata->gdbs_data = NULL; 399 400 cdata->exit(dbs_data, policy->governor->initialized == 1); 401 kfree(dbs_data); 402 } else { 403 policy->governor_data = NULL; 404 } 405 406 free_common_dbs_info(policy, cdata); 407 return 0; 408} 409 410static int cpufreq_governor_start(struct cpufreq_policy *policy, 411 struct dbs_data *dbs_data) 412{ 413 struct common_dbs_data *cdata = dbs_data->cdata; 414 unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu; 415 struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); 416 struct cpu_common_dbs_info *shared = cdbs->shared; 417 int io_busy = 0; 418 419 if (!policy->cur) 420 return -EINVAL; 421 422 /* State should be equivalent to INIT */ 423 if (!shared || shared->policy) 424 return -EBUSY; 425 426 if (cdata->governor == GOV_CONSERVATIVE) { 427 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 428 429 sampling_rate = cs_tuners->sampling_rate; 430 ignore_nice = cs_tuners->ignore_nice_load; 431 } else { 432 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 433 434 sampling_rate = od_tuners->sampling_rate; 435 ignore_nice = od_tuners->ignore_nice_load; 436 io_busy = od_tuners->io_is_busy; 437 } 438 439 shared->policy = policy; 440 shared->time_stamp = ktime_get(); 441 mutex_init(&shared->timer_mutex); 442 443 for_each_cpu(j, policy->cpus) { 444 struct cpu_dbs_info *j_cdbs = cdata->get_cpu_cdbs(j); 445 unsigned int prev_load; 446 447 j_cdbs->prev_cpu_idle = 448 get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy); 449 450 prev_load = (unsigned int)(j_cdbs->prev_cpu_wall - 451 j_cdbs->prev_cpu_idle); 452 j_cdbs->prev_load = 100 * prev_load / 453 (unsigned int)j_cdbs->prev_cpu_wall; 454 455 if (ignore_nice) 456 j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 457 458 INIT_DEFERRABLE_WORK(&j_cdbs->dwork, dbs_timer); 459 } 460 461 if (cdata->governor == GOV_CONSERVATIVE) { 462 struct cs_cpu_dbs_info_s *cs_dbs_info = 463 cdata->get_cpu_dbs_info_s(cpu); 464 465 cs_dbs_info->down_skip = 0; 466 cs_dbs_info->requested_freq = policy->cur; 467 } else { 468 struct od_ops *od_ops = cdata->gov_ops; 469 struct od_cpu_dbs_info_s *od_dbs_info = cdata->get_cpu_dbs_info_s(cpu); 470 471 od_dbs_info->rate_mult = 1; 472 od_dbs_info->sample_type = OD_NORMAL_SAMPLE; 473 od_ops->powersave_bias_init_cpu(cpu); 474 } 475 476 gov_queue_work(dbs_data, policy, delay_for_sampling_rate(sampling_rate), 477 true); 478 return 0; 479} 480 481static int cpufreq_governor_stop(struct cpufreq_policy *policy, 482 struct dbs_data *dbs_data) 483{ 484 struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(policy->cpu); 485 struct cpu_common_dbs_info *shared = cdbs->shared; 486 487 /* State should be equivalent to START */ 488 if (!shared || !shared->policy) 489 return -EBUSY; 490 491 /* 492 * Work-handler must see this updated, as it should not proceed any 493 * further after governor is disabled. And so timer_mutex is taken while 494 * updating this value. 495 */ 496 mutex_lock(&shared->timer_mutex); 497 shared->policy = NULL; 498 mutex_unlock(&shared->timer_mutex); 499 500 gov_cancel_work(dbs_data, policy); 501 502 mutex_destroy(&shared->timer_mutex); 503 return 0; 504} 505 506static int cpufreq_governor_limits(struct cpufreq_policy *policy, 507 struct dbs_data *dbs_data) 508{ 509 struct common_dbs_data *cdata = dbs_data->cdata; 510 unsigned int cpu = policy->cpu; 511 struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); 512 513 /* State should be equivalent to START */ 514 if (!cdbs->shared || !cdbs->shared->policy) 515 return -EBUSY; 516 517 mutex_lock(&cdbs->shared->timer_mutex); 518 if (policy->max < cdbs->shared->policy->cur) 519 __cpufreq_driver_target(cdbs->shared->policy, policy->max, 520 CPUFREQ_RELATION_H); 521 else if (policy->min > cdbs->shared->policy->cur) 522 __cpufreq_driver_target(cdbs->shared->policy, policy->min, 523 CPUFREQ_RELATION_L); 524 dbs_check_cpu(dbs_data, cpu); 525 mutex_unlock(&cdbs->shared->timer_mutex); 526 527 return 0; 528} 529 530int cpufreq_governor_dbs(struct cpufreq_policy *policy, 531 struct common_dbs_data *cdata, unsigned int event) 532{ 533 struct dbs_data *dbs_data; 534 int ret; 535 536 /* Lock governor to block concurrent initialization of governor */ 537 mutex_lock(&cdata->mutex); 538 539 if (have_governor_per_policy()) 540 dbs_data = policy->governor_data; 541 else 542 dbs_data = cdata->gdbs_data; 543 544 if (!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT)) { 545 ret = -EINVAL; 546 goto unlock; 547 } 548 549 switch (event) { 550 case CPUFREQ_GOV_POLICY_INIT: 551 ret = cpufreq_governor_init(policy, dbs_data, cdata); 552 break; 553 case CPUFREQ_GOV_POLICY_EXIT: 554 ret = cpufreq_governor_exit(policy, dbs_data); 555 break; 556 case CPUFREQ_GOV_START: 557 ret = cpufreq_governor_start(policy, dbs_data); 558 break; 559 case CPUFREQ_GOV_STOP: 560 ret = cpufreq_governor_stop(policy, dbs_data); 561 break; 562 case CPUFREQ_GOV_LIMITS: 563 ret = cpufreq_governor_limits(policy, dbs_data); 564 break; 565 default: 566 ret = -EINVAL; 567 } 568 569unlock: 570 mutex_unlock(&cdata->mutex); 571 572 return ret; 573} 574EXPORT_SYMBOL_GPL(cpufreq_governor_dbs); 575