1/** 2 * @file op_model_p4.c 3 * P4 model-specific MSR operations 4 * 5 * @remark Copyright 2002 OProfile authors 6 * @remark Read the file COPYING 7 * 8 * @author Graydon Hoare 9 */ 10 11#include <linux/oprofile.h> 12#include <linux/smp.h> 13#include <linux/ptrace.h> 14#include <asm/nmi.h> 15#include <asm/msr.h> 16#include <asm/fixmap.h> 17#include <asm/apic.h> 18 19 20#include "op_x86_model.h" 21#include "op_counter.h" 22 23#define NUM_EVENTS 39 24 25#define NUM_COUNTERS_NON_HT 8 26#define NUM_ESCRS_NON_HT 45 27#define NUM_CCCRS_NON_HT 18 28#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT) 29 30#define NUM_COUNTERS_HT2 4 31#define NUM_ESCRS_HT2 23 32#define NUM_CCCRS_HT2 9 33#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) 34 35#define OP_CTR_OVERFLOW (1ULL<<31) 36 37static unsigned int num_counters = NUM_COUNTERS_NON_HT; 38static unsigned int num_controls = NUM_CONTROLS_NON_HT; 39 40/* this has to be checked dynamically since the 41 hyper-threadedness of a chip is discovered at 42 kernel boot-time. */ 43static inline void setup_num_counters(void) 44{ 45#ifdef CONFIG_SMP 46 if (smp_num_siblings == 2) { 47 num_counters = NUM_COUNTERS_HT2; 48 num_controls = NUM_CONTROLS_HT2; 49 } 50#endif 51} 52 53static inline int addr_increment(void) 54{ 55#ifdef CONFIG_SMP 56 return smp_num_siblings == 2 ? 2 : 1; 57#else 58 return 1; 59#endif 60} 61 62 63/* tables to simulate simplified hardware view of p4 registers */ 64struct p4_counter_binding { 65 int virt_counter; 66 int counter_address; 67 int cccr_address; 68}; 69 70struct p4_event_binding { 71 int escr_select; /* value to put in CCCR */ 72 int event_select; /* value to put in ESCR */ 73 struct { 74 int virt_counter; /* for this counter... */ 75 int escr_address; /* use this ESCR */ 76 } bindings[2]; 77}; 78 79/* nb: these CTR_* defines are a duplicate of defines in 80 event/i386.p4*events. */ 81 82 83#define CTR_BPU_0 (1 << 0) 84#define CTR_MS_0 (1 << 1) 85#define CTR_FLAME_0 (1 << 2) 86#define CTR_IQ_4 (1 << 3) 87#define CTR_BPU_2 (1 << 4) 88#define CTR_MS_2 (1 << 5) 89#define CTR_FLAME_2 (1 << 6) 90#define CTR_IQ_5 (1 << 7) 91 92static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = { 93 { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 }, 94 { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 }, 95 { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 }, 96 { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 }, 97 { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 }, 98 { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 }, 99 { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 }, 100 { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 } 101}; 102 103#define NUM_UNUSED_CCCRS (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT) 104 105/* p4 event codes in libop/op_event.h are indices into this table. */ 106 107static struct p4_event_binding p4_events[NUM_EVENTS] = { 108 109 { /* BRANCH_RETIRED */ 110 0x05, 0x06, 111 { {CTR_IQ_4, MSR_P4_CRU_ESCR2}, 112 {CTR_IQ_5, MSR_P4_CRU_ESCR3} } 113 }, 114 115 { /* MISPRED_BRANCH_RETIRED */ 116 0x04, 0x03, 117 { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, 118 { CTR_IQ_5, MSR_P4_CRU_ESCR1} } 119 }, 120 121 { /* TC_DELIVER_MODE */ 122 0x01, 0x01, 123 { { CTR_MS_0, MSR_P4_TC_ESCR0}, 124 { CTR_MS_2, MSR_P4_TC_ESCR1} } 125 }, 126 127 { /* BPU_FETCH_REQUEST */ 128 0x00, 0x03, 129 { { CTR_BPU_0, MSR_P4_BPU_ESCR0}, 130 { CTR_BPU_2, MSR_P4_BPU_ESCR1} } 131 }, 132 133 { /* ITLB_REFERENCE */ 134 0x03, 0x18, 135 { { CTR_BPU_0, MSR_P4_ITLB_ESCR0}, 136 { CTR_BPU_2, MSR_P4_ITLB_ESCR1} } 137 }, 138 139 { /* MEMORY_CANCEL */ 140 0x05, 0x02, 141 { { CTR_FLAME_0, MSR_P4_DAC_ESCR0}, 142 { CTR_FLAME_2, MSR_P4_DAC_ESCR1} } 143 }, 144 145 { /* MEMORY_COMPLETE */ 146 0x02, 0x08, 147 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, 148 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } 149 }, 150 151 { /* LOAD_PORT_REPLAY */ 152 0x02, 0x04, 153 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, 154 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } 155 }, 156 157 { /* STORE_PORT_REPLAY */ 158 0x02, 0x05, 159 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, 160 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } 161 }, 162 163 { /* MOB_LOAD_REPLAY */ 164 0x02, 0x03, 165 { { CTR_BPU_0, MSR_P4_MOB_ESCR0}, 166 { CTR_BPU_2, MSR_P4_MOB_ESCR1} } 167 }, 168 169 { /* PAGE_WALK_TYPE */ 170 0x04, 0x01, 171 { { CTR_BPU_0, MSR_P4_PMH_ESCR0}, 172 { CTR_BPU_2, MSR_P4_PMH_ESCR1} } 173 }, 174 175 { /* BSQ_CACHE_REFERENCE */ 176 0x07, 0x0c, 177 { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, 178 { CTR_BPU_2, MSR_P4_BSU_ESCR1} } 179 }, 180 181 { /* IOQ_ALLOCATION */ 182 0x06, 0x03, 183 { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, 184 { 0, 0 } } 185 }, 186 187 { /* IOQ_ACTIVE_ENTRIES */ 188 0x06, 0x1a, 189 { { CTR_BPU_2, MSR_P4_FSB_ESCR1}, 190 { 0, 0 } } 191 }, 192 193 { /* FSB_DATA_ACTIVITY */ 194 0x06, 0x17, 195 { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, 196 { CTR_BPU_2, MSR_P4_FSB_ESCR1} } 197 }, 198 199 { /* BSQ_ALLOCATION */ 200 0x07, 0x05, 201 { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, 202 { 0, 0 } } 203 }, 204 205 { /* BSQ_ACTIVE_ENTRIES */ 206 0x07, 0x06, 207 { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */}, 208 { 0, 0 } } 209 }, 210 211 { /* X87_ASSIST */ 212 0x05, 0x03, 213 { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, 214 { CTR_IQ_5, MSR_P4_CRU_ESCR3} } 215 }, 216 217 { /* SSE_INPUT_ASSIST */ 218 0x01, 0x34, 219 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 220 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 221 }, 222 223 { /* PACKED_SP_UOP */ 224 0x01, 0x08, 225 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 226 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 227 }, 228 229 { /* PACKED_DP_UOP */ 230 0x01, 0x0c, 231 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 232 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 233 }, 234 235 { /* SCALAR_SP_UOP */ 236 0x01, 0x0a, 237 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 238 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 239 }, 240 241 { /* SCALAR_DP_UOP */ 242 0x01, 0x0e, 243 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 244 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 245 }, 246 247 { /* 64BIT_MMX_UOP */ 248 0x01, 0x02, 249 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 250 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 251 }, 252 253 { /* 128BIT_MMX_UOP */ 254 0x01, 0x1a, 255 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 256 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 257 }, 258 259 { /* X87_FP_UOP */ 260 0x01, 0x04, 261 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 262 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 263 }, 264 265 { /* X87_SIMD_MOVES_UOP */ 266 0x01, 0x2e, 267 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 268 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 269 }, 270 271 { /* MACHINE_CLEAR */ 272 0x05, 0x02, 273 { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, 274 { CTR_IQ_5, MSR_P4_CRU_ESCR3} } 275 }, 276 277 { /* GLOBAL_POWER_EVENTS */ 278 0x06, 0x13 /* older manual says 0x05, newer 0x13 */, 279 { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, 280 { CTR_BPU_2, MSR_P4_FSB_ESCR1} } 281 }, 282 283 { /* TC_MS_XFER */ 284 0x00, 0x05, 285 { { CTR_MS_0, MSR_P4_MS_ESCR0}, 286 { CTR_MS_2, MSR_P4_MS_ESCR1} } 287 }, 288 289 { /* UOP_QUEUE_WRITES */ 290 0x00, 0x09, 291 { { CTR_MS_0, MSR_P4_MS_ESCR0}, 292 { CTR_MS_2, MSR_P4_MS_ESCR1} } 293 }, 294 295 { /* FRONT_END_EVENT */ 296 0x05, 0x08, 297 { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, 298 { CTR_IQ_5, MSR_P4_CRU_ESCR3} } 299 }, 300 301 { /* EXECUTION_EVENT */ 302 0x05, 0x0c, 303 { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, 304 { CTR_IQ_5, MSR_P4_CRU_ESCR3} } 305 }, 306 307 { /* REPLAY_EVENT */ 308 0x05, 0x09, 309 { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, 310 { CTR_IQ_5, MSR_P4_CRU_ESCR3} } 311 }, 312 313 { /* INSTR_RETIRED */ 314 0x04, 0x02, 315 { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, 316 { CTR_IQ_5, MSR_P4_CRU_ESCR1} } 317 }, 318 319 { /* UOPS_RETIRED */ 320 0x04, 0x01, 321 { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, 322 { CTR_IQ_5, MSR_P4_CRU_ESCR1} } 323 }, 324 325 { /* UOP_TYPE */ 326 0x02, 0x02, 327 { { CTR_IQ_4, MSR_P4_RAT_ESCR0}, 328 { CTR_IQ_5, MSR_P4_RAT_ESCR1} } 329 }, 330 331 { /* RETIRED_MISPRED_BRANCH_TYPE */ 332 0x02, 0x05, 333 { { CTR_MS_0, MSR_P4_TBPU_ESCR0}, 334 { CTR_MS_2, MSR_P4_TBPU_ESCR1} } 335 }, 336 337 { /* RETIRED_BRANCH_TYPE */ 338 0x02, 0x04, 339 { { CTR_MS_0, MSR_P4_TBPU_ESCR0}, 340 { CTR_MS_2, MSR_P4_TBPU_ESCR1} } 341 } 342}; 343 344 345#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7) 346 347#define ESCR_RESERVED_BITS 0x80000003 348#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS) 349#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2)) 350#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3)) 351#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1))) 352#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1)) 353#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25)) 354#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9)) 355 356#define CCCR_RESERVED_BITS 0x38030FFF 357#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS) 358#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000) 359#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13)) 360#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26)) 361#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27)) 362#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12)) 363#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12)) 364#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) 365#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) 366 367 368/* this assigns a "stagger" to the current CPU, which is used throughout 369 the code in this module as an extra array offset, to select the "even" 370 or "odd" part of all the divided resources. */ 371static unsigned int get_stagger(void) 372{ 373#ifdef CONFIG_SMP 374 int cpu = smp_processor_id(); 375 return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map)); 376#endif 377 return 0; 378} 379 380 381/* finally, mediate access to a real hardware counter 382 by passing a "virtual" counter numer to this macro, 383 along with your stagger setting. */ 384#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger))) 385 386static unsigned long reset_value[NUM_COUNTERS_NON_HT]; 387 388static void p4_shutdown(struct op_msrs const * const msrs) 389{ 390 int i; 391 392 for (i = 0; i < num_counters; ++i) { 393 if (msrs->counters[i].addr) 394 release_perfctr_nmi(msrs->counters[i].addr); 395 } 396 /* 397 * some of the control registers are specially reserved in 398 * conjunction with the counter registers (hence the starting offset). 399 * This saves a few bits. 400 */ 401 for (i = num_counters; i < num_controls; ++i) { 402 if (msrs->controls[i].addr) 403 release_evntsel_nmi(msrs->controls[i].addr); 404 } 405} 406 407static int p4_fill_in_addresses(struct op_msrs * const msrs) 408{ 409 unsigned int i; 410 unsigned int addr, cccraddr, stag; 411 412 setup_num_counters(); 413 stag = get_stagger(); 414 415 /* the counter & cccr registers we pay attention to */ 416 for (i = 0; i < num_counters; ++i) { 417 addr = p4_counters[VIRT_CTR(stag, i)].counter_address; 418 cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address; 419 if (reserve_perfctr_nmi(addr)) { 420 msrs->counters[i].addr = addr; 421 msrs->controls[i].addr = cccraddr; 422 } 423 } 424 425 /* 43 ESCR registers in three or four discontiguous group */ 426 for (addr = MSR_P4_BSU_ESCR0 + stag; 427 addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) { 428 if (reserve_evntsel_nmi(addr)) 429 msrs->controls[i].addr = addr; 430 } 431 432 /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1 433 * to avoid special case in nmi_{save|restore}_registers() */ 434 if (boot_cpu_data.x86_model >= 0x3) { 435 for (addr = MSR_P4_BSU_ESCR0 + stag; 436 addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) { 437 if (reserve_evntsel_nmi(addr)) 438 msrs->controls[i].addr = addr; 439 } 440 } else { 441 for (addr = MSR_P4_IQ_ESCR0 + stag; 442 addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) { 443 if (reserve_evntsel_nmi(addr)) 444 msrs->controls[i].addr = addr; 445 } 446 } 447 448 for (addr = MSR_P4_RAT_ESCR0 + stag; 449 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { 450 if (reserve_evntsel_nmi(addr)) 451 msrs->controls[i].addr = addr; 452 } 453 454 for (addr = MSR_P4_MS_ESCR0 + stag; 455 addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { 456 if (reserve_evntsel_nmi(addr)) 457 msrs->controls[i].addr = addr; 458 } 459 460 for (addr = MSR_P4_IX_ESCR0 + stag; 461 addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { 462 if (reserve_evntsel_nmi(addr)) 463 msrs->controls[i].addr = addr; 464 } 465 466 /* there are 2 remaining non-contiguously located ESCRs */ 467 468 if (num_counters == NUM_COUNTERS_NON_HT) { 469 /* standard non-HT CPUs handle both remaining ESCRs*/ 470 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) 471 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; 472 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4)) 473 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; 474 475 } else if (stag == 0) { 476 /* HT CPUs give the first remainder to the even thread, as 477 the 32nd control register */ 478 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4)) 479 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; 480 481 } else { 482 /* and two copies of the second to the odd thread, 483 for the 22st and 23nd control registers */ 484 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) { 485 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; 486 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; 487 } 488 } 489 490 for (i = 0; i < num_counters; ++i) { 491 if (!counter_config[i].enabled) 492 continue; 493 if (msrs->controls[i].addr) 494 continue; 495 op_x86_warn_reserved(i); 496 p4_shutdown(msrs); 497 return -EBUSY; 498 } 499 500 return 0; 501} 502 503 504static void pmc_setup_one_p4_counter(unsigned int ctr) 505{ 506 int i; 507 int const maxbind = 2; 508 unsigned int cccr = 0; 509 unsigned int escr = 0; 510 unsigned int high = 0; 511 unsigned int counter_bit; 512 struct p4_event_binding *ev = NULL; 513 unsigned int stag; 514 515 stag = get_stagger(); 516 517 /* convert from counter *number* to counter *bit* */ 518 counter_bit = 1 << VIRT_CTR(stag, ctr); 519 520 /* find our event binding structure. */ 521 if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) { 522 printk(KERN_ERR 523 "oprofile: P4 event code 0x%lx out of range\n", 524 counter_config[ctr].event); 525 return; 526 } 527 528 ev = &(p4_events[counter_config[ctr].event - 1]); 529 530 for (i = 0; i < maxbind; i++) { 531 if (ev->bindings[i].virt_counter & counter_bit) { 532 533 /* modify ESCR */ 534 rdmsr(ev->bindings[i].escr_address, escr, high); 535 ESCR_CLEAR(escr); 536 if (stag == 0) { 537 ESCR_SET_USR_0(escr, counter_config[ctr].user); 538 ESCR_SET_OS_0(escr, counter_config[ctr].kernel); 539 } else { 540 ESCR_SET_USR_1(escr, counter_config[ctr].user); 541 ESCR_SET_OS_1(escr, counter_config[ctr].kernel); 542 } 543 ESCR_SET_EVENT_SELECT(escr, ev->event_select); 544 ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask); 545 wrmsr(ev->bindings[i].escr_address, escr, high); 546 547 /* modify CCCR */ 548 rdmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address, 549 cccr, high); 550 CCCR_CLEAR(cccr); 551 CCCR_SET_REQUIRED_BITS(cccr); 552 CCCR_SET_ESCR_SELECT(cccr, ev->escr_select); 553 if (stag == 0) 554 CCCR_SET_PMI_OVF_0(cccr); 555 else 556 CCCR_SET_PMI_OVF_1(cccr); 557 wrmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address, 558 cccr, high); 559 return; 560 } 561 } 562 563 printk(KERN_ERR 564 "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n", 565 counter_config[ctr].event, stag, ctr); 566} 567 568 569static void p4_setup_ctrs(struct op_x86_model_spec const *model, 570 struct op_msrs const * const msrs) 571{ 572 unsigned int i; 573 unsigned int low, high; 574 unsigned int stag; 575 576 stag = get_stagger(); 577 578 rdmsr(MSR_IA32_MISC_ENABLE, low, high); 579 if (!MISC_PMC_ENABLED_P(low)) { 580 printk(KERN_ERR "oprofile: P4 PMC not available\n"); 581 return; 582 } 583 584 /* clear the cccrs we will use */ 585 for (i = 0; i < num_counters; i++) { 586 if (unlikely(!msrs->controls[i].addr)) 587 continue; 588 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); 589 CCCR_CLEAR(low); 590 CCCR_SET_REQUIRED_BITS(low); 591 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); 592 } 593 594 /* clear all escrs (including those outside our concern) */ 595 for (i = num_counters; i < num_controls; i++) { 596 if (unlikely(!msrs->controls[i].addr)) 597 continue; 598 wrmsr(msrs->controls[i].addr, 0, 0); 599 } 600 601 /* setup all counters */ 602 for (i = 0; i < num_counters; ++i) { 603 if (counter_config[i].enabled && msrs->controls[i].addr) { 604 reset_value[i] = counter_config[i].count; 605 pmc_setup_one_p4_counter(i); 606 wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address, 607 -(u64)counter_config[i].count); 608 } else { 609 reset_value[i] = 0; 610 } 611 } 612} 613 614 615static int p4_check_ctrs(struct pt_regs * const regs, 616 struct op_msrs const * const msrs) 617{ 618 unsigned long ctr, low, high, stag, real; 619 int i; 620 621 stag = get_stagger(); 622 623 for (i = 0; i < num_counters; ++i) { 624 625 if (!reset_value[i]) 626 continue; 627 628 /* 629 * there is some eccentricity in the hardware which 630 * requires that we perform 2 extra corrections: 631 * 632 * - check both the CCCR:OVF flag for overflow and the 633 * counter high bit for un-flagged overflows. 634 * 635 * - write the counter back twice to ensure it gets 636 * updated properly. 637 * 638 * the former seems to be related to extra NMIs happening 639 * during the current NMI; the latter is reported as errata 640 * N15 in intel doc 249199-029, pentium 4 specification 641 * update, though their suggested work-around does not 642 * appear to solve the problem. 643 */ 644 645 real = VIRT_CTR(stag, i); 646 647 rdmsr(p4_counters[real].cccr_address, low, high); 648 rdmsr(p4_counters[real].counter_address, ctr, high); 649 if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) { 650 oprofile_add_sample(regs, i); 651 wrmsrl(p4_counters[real].counter_address, 652 -(u64)reset_value[i]); 653 CCCR_CLEAR_OVF(low); 654 wrmsr(p4_counters[real].cccr_address, low, high); 655 wrmsrl(p4_counters[real].counter_address, 656 -(u64)reset_value[i]); 657 } 658 } 659 660 /* P4 quirk: you have to re-unmask the apic vector */ 661 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); 662 663 /* See op_model_ppro.c */ 664 return 1; 665} 666 667 668static void p4_start(struct op_msrs const * const msrs) 669{ 670 unsigned int low, high, stag; 671 int i; 672 673 stag = get_stagger(); 674 675 for (i = 0; i < num_counters; ++i) { 676 if (!reset_value[i]) 677 continue; 678 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); 679 CCCR_SET_ENABLE(low); 680 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); 681 } 682} 683 684 685static void p4_stop(struct op_msrs const * const msrs) 686{ 687 unsigned int low, high, stag; 688 int i; 689 690 stag = get_stagger(); 691 692 for (i = 0; i < num_counters; ++i) { 693 if (!reset_value[i]) 694 continue; 695 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); 696 CCCR_SET_DISABLE(low); 697 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); 698 } 699} 700 701#ifdef CONFIG_SMP 702struct op_x86_model_spec op_p4_ht2_spec = { 703 .num_counters = NUM_COUNTERS_HT2, 704 .num_controls = NUM_CONTROLS_HT2, 705 .fill_in_addresses = &p4_fill_in_addresses, 706 .setup_ctrs = &p4_setup_ctrs, 707 .check_ctrs = &p4_check_ctrs, 708 .start = &p4_start, 709 .stop = &p4_stop, 710 .shutdown = &p4_shutdown 711}; 712#endif 713 714struct op_x86_model_spec op_p4_spec = { 715 .num_counters = NUM_COUNTERS_NON_HT, 716 .num_controls = NUM_CONTROLS_NON_HT, 717 .fill_in_addresses = &p4_fill_in_addresses, 718 .setup_ctrs = &p4_setup_ctrs, 719 .check_ctrs = &p4_check_ctrs, 720 .start = &p4_start, 721 .stop = &p4_stop, 722 .shutdown = &p4_shutdown 723}; 724