1/* 2 * fam15h_power.c - AMD Family 15h processor power monitoring 3 * 4 * Copyright (c) 2011 Advanced Micro Devices, Inc. 5 * Author: Andreas Herrmann <herrmann.der.user@googlemail.com> 6 * 7 * 8 * This driver is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License; either 10 * version 2 of the License, or (at your option) any later version. 11 * 12 * This driver is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 15 * See the GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this driver; if not, see <http://www.gnu.org/licenses/>. 19 */ 20 21#include <linux/err.h> 22#include <linux/hwmon.h> 23#include <linux/hwmon-sysfs.h> 24#include <linux/init.h> 25#include <linux/module.h> 26#include <linux/pci.h> 27#include <linux/bitops.h> 28#include <asm/processor.h> 29#include <asm/msr.h> 30 31MODULE_DESCRIPTION("AMD Family 15h CPU processor power monitor"); 32MODULE_AUTHOR("Andreas Herrmann <herrmann.der.user@googlemail.com>"); 33MODULE_LICENSE("GPL"); 34 35/* D18F3 */ 36#define REG_NORTHBRIDGE_CAP 0xe8 37 38/* D18F4 */ 39#define REG_PROCESSOR_TDP 0x1b8 40 41/* D18F5 */ 42#define REG_TDP_RUNNING_AVERAGE 0xe0 43#define REG_TDP_LIMIT3 0xe8 44 45#define FAM15H_MIN_NUM_ATTRS 2 46#define FAM15H_NUM_GROUPS 2 47 48#define MSR_F15H_CU_MAX_PWR_ACCUMULATOR 0xc001007b 49 50struct fam15h_power_data { 51 struct pci_dev *pdev; 52 unsigned int tdp_to_watts; 53 unsigned int base_tdp; 54 unsigned int processor_pwr_watts; 55 unsigned int cpu_pwr_sample_ratio; 56 const struct attribute_group *groups[FAM15H_NUM_GROUPS]; 57 struct attribute_group group; 58 /* maximum accumulated power of a compute unit */ 59 u64 max_cu_acc_power; 60}; 61 62static ssize_t show_power(struct device *dev, 63 struct device_attribute *attr, char *buf) 64{ 65 u32 val, tdp_limit, running_avg_range; 66 s32 running_avg_capture; 67 u64 curr_pwr_watts; 68 struct fam15h_power_data *data = dev_get_drvdata(dev); 69 struct pci_dev *f4 = data->pdev; 70 71 pci_bus_read_config_dword(f4->bus, PCI_DEVFN(PCI_SLOT(f4->devfn), 5), 72 REG_TDP_RUNNING_AVERAGE, &val); 73 74 /* 75 * On Carrizo and later platforms, TdpRunAvgAccCap bit field 76 * is extended to 4:31 from 4:25. 77 */ 78 if (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model >= 0x60) { 79 running_avg_capture = val >> 4; 80 running_avg_capture = sign_extend32(running_avg_capture, 27); 81 } else { 82 running_avg_capture = (val >> 4) & 0x3fffff; 83 running_avg_capture = sign_extend32(running_avg_capture, 21); 84 } 85 86 running_avg_range = (val & 0xf) + 1; 87 88 pci_bus_read_config_dword(f4->bus, PCI_DEVFN(PCI_SLOT(f4->devfn), 5), 89 REG_TDP_LIMIT3, &val); 90 91 tdp_limit = val >> 16; 92 curr_pwr_watts = ((u64)(tdp_limit + 93 data->base_tdp)) << running_avg_range; 94 curr_pwr_watts -= running_avg_capture; 95 curr_pwr_watts *= data->tdp_to_watts; 96 97 /* 98 * Convert to microWatt 99 * 100 * power is in Watt provided as fixed point integer with 101 * scaling factor 1/(2^16). For conversion we use 102 * (10^6)/(2^16) = 15625/(2^10) 103 */ 104 curr_pwr_watts = (curr_pwr_watts * 15625) >> (10 + running_avg_range); 105 return sprintf(buf, "%u\n", (unsigned int) curr_pwr_watts); 106} 107static DEVICE_ATTR(power1_input, S_IRUGO, show_power, NULL); 108 109static ssize_t show_power_crit(struct device *dev, 110 struct device_attribute *attr, char *buf) 111{ 112 struct fam15h_power_data *data = dev_get_drvdata(dev); 113 114 return sprintf(buf, "%u\n", data->processor_pwr_watts); 115} 116static DEVICE_ATTR(power1_crit, S_IRUGO, show_power_crit, NULL); 117 118static int fam15h_power_init_attrs(struct pci_dev *pdev, 119 struct fam15h_power_data *data) 120{ 121 int n = FAM15H_MIN_NUM_ATTRS; 122 struct attribute **fam15h_power_attrs; 123 struct cpuinfo_x86 *c = &boot_cpu_data; 124 125 if (c->x86 == 0x15 && 126 (c->x86_model <= 0xf || 127 (c->x86_model >= 0x60 && c->x86_model <= 0x6f))) 128 n += 1; 129 130 fam15h_power_attrs = devm_kcalloc(&pdev->dev, n, 131 sizeof(*fam15h_power_attrs), 132 GFP_KERNEL); 133 134 if (!fam15h_power_attrs) 135 return -ENOMEM; 136 137 n = 0; 138 fam15h_power_attrs[n++] = &dev_attr_power1_crit.attr; 139 if (c->x86 == 0x15 && 140 (c->x86_model <= 0xf || 141 (c->x86_model >= 0x60 && c->x86_model <= 0x6f))) 142 fam15h_power_attrs[n++] = &dev_attr_power1_input.attr; 143 144 data->group.attrs = fam15h_power_attrs; 145 146 return 0; 147} 148 149static bool should_load_on_this_node(struct pci_dev *f4) 150{ 151 u32 val; 152 153 pci_bus_read_config_dword(f4->bus, PCI_DEVFN(PCI_SLOT(f4->devfn), 3), 154 REG_NORTHBRIDGE_CAP, &val); 155 if ((val & BIT(29)) && ((val >> 30) & 3)) 156 return false; 157 158 return true; 159} 160 161/* 162 * Newer BKDG versions have an updated recommendation on how to properly 163 * initialize the running average range (was: 0xE, now: 0x9). This avoids 164 * counter saturations resulting in bogus power readings. 165 * We correct this value ourselves to cope with older BIOSes. 166 */ 167static const struct pci_device_id affected_device[] = { 168 { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) }, 169 { 0 } 170}; 171 172static void tweak_runavg_range(struct pci_dev *pdev) 173{ 174 u32 val; 175 176 /* 177 * let this quirk apply only to the current version of the 178 * northbridge, since future versions may change the behavior 179 */ 180 if (!pci_match_id(affected_device, pdev)) 181 return; 182 183 pci_bus_read_config_dword(pdev->bus, 184 PCI_DEVFN(PCI_SLOT(pdev->devfn), 5), 185 REG_TDP_RUNNING_AVERAGE, &val); 186 if ((val & 0xf) != 0xe) 187 return; 188 189 val &= ~0xf; 190 val |= 0x9; 191 pci_bus_write_config_dword(pdev->bus, 192 PCI_DEVFN(PCI_SLOT(pdev->devfn), 5), 193 REG_TDP_RUNNING_AVERAGE, val); 194} 195 196#ifdef CONFIG_PM 197static int fam15h_power_resume(struct pci_dev *pdev) 198{ 199 tweak_runavg_range(pdev); 200 return 0; 201} 202#else 203#define fam15h_power_resume NULL 204#endif 205 206static int fam15h_power_init_data(struct pci_dev *f4, 207 struct fam15h_power_data *data) 208{ 209 u32 val, eax, ebx, ecx, edx; 210 u64 tmp; 211 int ret; 212 213 pci_read_config_dword(f4, REG_PROCESSOR_TDP, &val); 214 data->base_tdp = val >> 16; 215 tmp = val & 0xffff; 216 217 pci_bus_read_config_dword(f4->bus, PCI_DEVFN(PCI_SLOT(f4->devfn), 5), 218 REG_TDP_LIMIT3, &val); 219 220 data->tdp_to_watts = ((val & 0x3ff) << 6) | ((val >> 10) & 0x3f); 221 tmp *= data->tdp_to_watts; 222 223 /* result not allowed to be >= 256W */ 224 if ((tmp >> 16) >= 256) 225 dev_warn(&f4->dev, 226 "Bogus value for ProcessorPwrWatts (processor_pwr_watts>=%u)\n", 227 (unsigned int) (tmp >> 16)); 228 229 /* convert to microWatt */ 230 data->processor_pwr_watts = (tmp * 15625) >> 10; 231 232 ret = fam15h_power_init_attrs(f4, data); 233 if (ret) 234 return ret; 235 236 cpuid(0x80000007, &eax, &ebx, &ecx, &edx); 237 238 /* CPUID Fn8000_0007:EDX[12] indicates to support accumulated power */ 239 if (!(edx & BIT(12))) 240 return 0; 241 242 /* 243 * determine the ratio of the compute unit power accumulator 244 * sample period to the PTSC counter period by executing CPUID 245 * Fn8000_0007:ECX 246 */ 247 data->cpu_pwr_sample_ratio = ecx; 248 249 if (rdmsrl_safe(MSR_F15H_CU_MAX_PWR_ACCUMULATOR, &tmp)) { 250 pr_err("Failed to read max compute unit power accumulator MSR\n"); 251 return -ENODEV; 252 } 253 254 data->max_cu_acc_power = tmp; 255 256 return 0; 257} 258 259static int fam15h_power_probe(struct pci_dev *pdev, 260 const struct pci_device_id *id) 261{ 262 struct fam15h_power_data *data; 263 struct device *dev = &pdev->dev; 264 struct device *hwmon_dev; 265 int ret; 266 267 /* 268 * though we ignore every other northbridge, we still have to 269 * do the tweaking on _each_ node in MCM processors as the counters 270 * are working hand-in-hand 271 */ 272 tweak_runavg_range(pdev); 273 274 if (!should_load_on_this_node(pdev)) 275 return -ENODEV; 276 277 data = devm_kzalloc(dev, sizeof(struct fam15h_power_data), GFP_KERNEL); 278 if (!data) 279 return -ENOMEM; 280 281 ret = fam15h_power_init_data(pdev, data); 282 if (ret) 283 return ret; 284 285 data->pdev = pdev; 286 287 data->groups[0] = &data->group; 288 289 hwmon_dev = devm_hwmon_device_register_with_groups(dev, "fam15h_power", 290 data, 291 &data->groups[0]); 292 return PTR_ERR_OR_ZERO(hwmon_dev); 293} 294 295static const struct pci_device_id fam15h_power_id_table[] = { 296 { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) }, 297 { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F4) }, 298 { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F4) }, 299 { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) }, 300 { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) }, 301 {} 302}; 303MODULE_DEVICE_TABLE(pci, fam15h_power_id_table); 304 305static struct pci_driver fam15h_power_driver = { 306 .name = "fam15h_power", 307 .id_table = fam15h_power_id_table, 308 .probe = fam15h_power_probe, 309 .resume = fam15h_power_resume, 310}; 311 312module_pci_driver(fam15h_power_driver); 313