1#include <linux/perf_event.h> 2#include <linux/types.h> 3 4#include <asm/perf_event.h> 5#include <asm/msr.h> 6#include <asm/insn.h> 7 8#include "perf_event.h" 9 10enum { 11 LBR_FORMAT_32 = 0x00, 12 LBR_FORMAT_LIP = 0x01, 13 LBR_FORMAT_EIP = 0x02, 14 LBR_FORMAT_EIP_FLAGS = 0x03, 15 LBR_FORMAT_EIP_FLAGS2 = 0x04, 16 LBR_FORMAT_INFO = 0x05, 17 LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO, 18}; 19 20static enum { 21 LBR_EIP_FLAGS = 1, 22 LBR_TSX = 2, 23} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = { 24 [LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS, 25 [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX, 26}; 27 28/* 29 * Intel LBR_SELECT bits 30 * Intel Vol3a, April 2011, Section 16.7 Table 16-10 31 * 32 * Hardware branch filter (not available on all CPUs) 33 */ 34#define LBR_KERNEL_BIT 0 /* do not capture at ring0 */ 35#define LBR_USER_BIT 1 /* do not capture at ring > 0 */ 36#define LBR_JCC_BIT 2 /* do not capture conditional branches */ 37#define LBR_REL_CALL_BIT 3 /* do not capture relative calls */ 38#define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */ 39#define LBR_RETURN_BIT 5 /* do not capture near returns */ 40#define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */ 41#define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */ 42#define LBR_FAR_BIT 8 /* do not capture far branches */ 43#define LBR_CALL_STACK_BIT 9 /* enable call stack */ 44 45#define LBR_KERNEL (1 << LBR_KERNEL_BIT) 46#define LBR_USER (1 << LBR_USER_BIT) 47#define LBR_JCC (1 << LBR_JCC_BIT) 48#define LBR_REL_CALL (1 << LBR_REL_CALL_BIT) 49#define LBR_IND_CALL (1 << LBR_IND_CALL_BIT) 50#define LBR_RETURN (1 << LBR_RETURN_BIT) 51#define LBR_REL_JMP (1 << LBR_REL_JMP_BIT) 52#define LBR_IND_JMP (1 << LBR_IND_JMP_BIT) 53#define LBR_FAR (1 << LBR_FAR_BIT) 54#define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT) 55 56#define LBR_PLM (LBR_KERNEL | LBR_USER) 57 58#define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */ 59#define LBR_NOT_SUPP -1 /* LBR filter not supported */ 60#define LBR_IGN 0 /* ignored */ 61 62#define LBR_ANY \ 63 (LBR_JCC |\ 64 LBR_REL_CALL |\ 65 LBR_IND_CALL |\ 66 LBR_RETURN |\ 67 LBR_REL_JMP |\ 68 LBR_IND_JMP |\ 69 LBR_FAR) 70 71#define LBR_FROM_FLAG_MISPRED (1ULL << 63) 72#define LBR_FROM_FLAG_IN_TX (1ULL << 62) 73#define LBR_FROM_FLAG_ABORT (1ULL << 61) 74 75/* 76 * x86control flow change classification 77 * x86control flow changes include branches, interrupts, traps, faults 78 */ 79enum { 80 X86_BR_NONE = 0, /* unknown */ 81 82 X86_BR_USER = 1 << 0, /* branch target is user */ 83 X86_BR_KERNEL = 1 << 1, /* branch target is kernel */ 84 85 X86_BR_CALL = 1 << 2, /* call */ 86 X86_BR_RET = 1 << 3, /* return */ 87 X86_BR_SYSCALL = 1 << 4, /* syscall */ 88 X86_BR_SYSRET = 1 << 5, /* syscall return */ 89 X86_BR_INT = 1 << 6, /* sw interrupt */ 90 X86_BR_IRET = 1 << 7, /* return from interrupt */ 91 X86_BR_JCC = 1 << 8, /* conditional */ 92 X86_BR_JMP = 1 << 9, /* jump */ 93 X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ 94 X86_BR_IND_CALL = 1 << 11,/* indirect calls */ 95 X86_BR_ABORT = 1 << 12,/* transaction abort */ 96 X86_BR_IN_TX = 1 << 13,/* in transaction */ 97 X86_BR_NO_TX = 1 << 14,/* not in transaction */ 98 X86_BR_ZERO_CALL = 1 << 15,/* zero length call */ 99 X86_BR_CALL_STACK = 1 << 16,/* call stack */ 100 X86_BR_IND_JMP = 1 << 17,/* indirect jump */ 101}; 102 103#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) 104#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX) 105 106#define X86_BR_ANY \ 107 (X86_BR_CALL |\ 108 X86_BR_RET |\ 109 X86_BR_SYSCALL |\ 110 X86_BR_SYSRET |\ 111 X86_BR_INT |\ 112 X86_BR_IRET |\ 113 X86_BR_JCC |\ 114 X86_BR_JMP |\ 115 X86_BR_IRQ |\ 116 X86_BR_ABORT |\ 117 X86_BR_IND_CALL |\ 118 X86_BR_IND_JMP |\ 119 X86_BR_ZERO_CALL) 120 121#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) 122 123#define X86_BR_ANY_CALL \ 124 (X86_BR_CALL |\ 125 X86_BR_IND_CALL |\ 126 X86_BR_ZERO_CALL |\ 127 X86_BR_SYSCALL |\ 128 X86_BR_IRQ |\ 129 X86_BR_INT) 130 131static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc); 132 133/* 134 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI 135 * otherwise it becomes near impossible to get a reliable stack. 136 */ 137 138static void __intel_pmu_lbr_enable(bool pmi) 139{ 140 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 141 u64 debugctl, lbr_select = 0, orig_debugctl; 142 143 /* 144 * No need to unfreeze manually, as v4 can do that as part 145 * of the GLOBAL_STATUS ack. 146 */ 147 if (pmi && x86_pmu.version >= 4) 148 return; 149 150 /* 151 * No need to reprogram LBR_SELECT in a PMI, as it 152 * did not change. 153 */ 154 if (cpuc->lbr_sel) 155 lbr_select = cpuc->lbr_sel->config; 156 if (!pmi) 157 wrmsrl(MSR_LBR_SELECT, lbr_select); 158 159 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 160 orig_debugctl = debugctl; 161 debugctl |= DEBUGCTLMSR_LBR; 162 /* 163 * LBR callstack does not work well with FREEZE_LBRS_ON_PMI. 164 * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions 165 * may cause superfluous increase/decrease of LBR_TOS. 166 */ 167 if (!(lbr_select & LBR_CALL_STACK)) 168 debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; 169 if (orig_debugctl != debugctl) 170 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 171} 172 173static void __intel_pmu_lbr_disable(void) 174{ 175 u64 debugctl; 176 177 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 178 debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); 179 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 180} 181 182static void intel_pmu_lbr_reset_32(void) 183{ 184 int i; 185 186 for (i = 0; i < x86_pmu.lbr_nr; i++) 187 wrmsrl(x86_pmu.lbr_from + i, 0); 188} 189 190static void intel_pmu_lbr_reset_64(void) 191{ 192 int i; 193 194 for (i = 0; i < x86_pmu.lbr_nr; i++) { 195 wrmsrl(x86_pmu.lbr_from + i, 0); 196 wrmsrl(x86_pmu.lbr_to + i, 0); 197 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) 198 wrmsrl(MSR_LBR_INFO_0 + i, 0); 199 } 200} 201 202void intel_pmu_lbr_reset(void) 203{ 204 if (!x86_pmu.lbr_nr) 205 return; 206 207 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) 208 intel_pmu_lbr_reset_32(); 209 else 210 intel_pmu_lbr_reset_64(); 211} 212 213/* 214 * TOS = most recently recorded branch 215 */ 216static inline u64 intel_pmu_lbr_tos(void) 217{ 218 u64 tos; 219 220 rdmsrl(x86_pmu.lbr_tos, tos); 221 return tos; 222} 223 224enum { 225 LBR_NONE, 226 LBR_VALID, 227}; 228 229static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) 230{ 231 int i; 232 unsigned lbr_idx, mask; 233 u64 tos; 234 235 if (task_ctx->lbr_callstack_users == 0 || 236 task_ctx->lbr_stack_state == LBR_NONE) { 237 intel_pmu_lbr_reset(); 238 return; 239 } 240 241 mask = x86_pmu.lbr_nr - 1; 242 tos = task_ctx->tos; 243 for (i = 0; i < tos; i++) { 244 lbr_idx = (tos - i) & mask; 245 wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]); 246 wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]); 247 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) 248 wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); 249 } 250 wrmsrl(x86_pmu.lbr_tos, tos); 251 task_ctx->lbr_stack_state = LBR_NONE; 252} 253 254static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) 255{ 256 int i; 257 unsigned lbr_idx, mask; 258 u64 tos; 259 260 if (task_ctx->lbr_callstack_users == 0) { 261 task_ctx->lbr_stack_state = LBR_NONE; 262 return; 263 } 264 265 mask = x86_pmu.lbr_nr - 1; 266 tos = intel_pmu_lbr_tos(); 267 for (i = 0; i < tos; i++) { 268 lbr_idx = (tos - i) & mask; 269 rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]); 270 rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]); 271 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) 272 rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); 273 } 274 task_ctx->tos = tos; 275 task_ctx->lbr_stack_state = LBR_VALID; 276} 277 278void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) 279{ 280 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 281 struct x86_perf_task_context *task_ctx; 282 283 /* 284 * If LBR callstack feature is enabled and the stack was saved when 285 * the task was scheduled out, restore the stack. Otherwise flush 286 * the LBR stack. 287 */ 288 task_ctx = ctx ? ctx->task_ctx_data : NULL; 289 if (task_ctx) { 290 if (sched_in) { 291 __intel_pmu_lbr_restore(task_ctx); 292 cpuc->lbr_context = ctx; 293 } else { 294 __intel_pmu_lbr_save(task_ctx); 295 } 296 return; 297 } 298 299 /* 300 * When sampling the branck stack in system-wide, it may be 301 * necessary to flush the stack on context switch. This happens 302 * when the branch stack does not tag its entries with the pid 303 * of the current task. Otherwise it becomes impossible to 304 * associate a branch entry with a task. This ambiguity is more 305 * likely to appear when the branch stack supports priv level 306 * filtering and the user sets it to monitor only at the user 307 * level (which could be a useful measurement in system-wide 308 * mode). In that case, the risk is high of having a branch 309 * stack with branch from multiple tasks. 310 */ 311 if (sched_in) { 312 intel_pmu_lbr_reset(); 313 cpuc->lbr_context = ctx; 314 } 315} 316 317static inline bool branch_user_callstack(unsigned br_sel) 318{ 319 return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK); 320} 321 322void intel_pmu_lbr_enable(struct perf_event *event) 323{ 324 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 325 struct x86_perf_task_context *task_ctx; 326 327 if (!x86_pmu.lbr_nr) 328 return; 329 330 /* 331 * Reset the LBR stack if we changed task context to 332 * avoid data leaks. 333 */ 334 if (event->ctx->task && cpuc->lbr_context != event->ctx) { 335 intel_pmu_lbr_reset(); 336 cpuc->lbr_context = event->ctx; 337 } 338 cpuc->br_sel = event->hw.branch_reg.reg; 339 340 if (branch_user_callstack(cpuc->br_sel) && event->ctx && 341 event->ctx->task_ctx_data) { 342 task_ctx = event->ctx->task_ctx_data; 343 task_ctx->lbr_callstack_users++; 344 } 345 346 cpuc->lbr_users++; 347 perf_sched_cb_inc(event->ctx->pmu); 348} 349 350void intel_pmu_lbr_disable(struct perf_event *event) 351{ 352 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 353 struct x86_perf_task_context *task_ctx; 354 355 if (!x86_pmu.lbr_nr) 356 return; 357 358 if (branch_user_callstack(cpuc->br_sel) && event->ctx && 359 event->ctx->task_ctx_data) { 360 task_ctx = event->ctx->task_ctx_data; 361 task_ctx->lbr_callstack_users--; 362 } 363 364 cpuc->lbr_users--; 365 WARN_ON_ONCE(cpuc->lbr_users < 0); 366 perf_sched_cb_dec(event->ctx->pmu); 367 368 if (cpuc->enabled && !cpuc->lbr_users) { 369 __intel_pmu_lbr_disable(); 370 /* avoid stale pointer */ 371 cpuc->lbr_context = NULL; 372 } 373} 374 375void intel_pmu_lbr_enable_all(bool pmi) 376{ 377 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 378 379 if (cpuc->lbr_users) 380 __intel_pmu_lbr_enable(pmi); 381} 382 383void intel_pmu_lbr_disable_all(void) 384{ 385 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 386 387 if (cpuc->lbr_users) 388 __intel_pmu_lbr_disable(); 389} 390 391static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) 392{ 393 unsigned long mask = x86_pmu.lbr_nr - 1; 394 u64 tos = intel_pmu_lbr_tos(); 395 int i; 396 397 for (i = 0; i < x86_pmu.lbr_nr; i++) { 398 unsigned long lbr_idx = (tos - i) & mask; 399 union { 400 struct { 401 u32 from; 402 u32 to; 403 }; 404 u64 lbr; 405 } msr_lastbranch; 406 407 rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); 408 409 cpuc->lbr_entries[i].from = msr_lastbranch.from; 410 cpuc->lbr_entries[i].to = msr_lastbranch.to; 411 cpuc->lbr_entries[i].mispred = 0; 412 cpuc->lbr_entries[i].predicted = 0; 413 cpuc->lbr_entries[i].reserved = 0; 414 } 415 cpuc->lbr_stack.nr = i; 416} 417 418/* 419 * Due to lack of segmentation in Linux the effective address (offset) 420 * is the same as the linear address, allowing us to merge the LIP and EIP 421 * LBR formats. 422 */ 423static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) 424{ 425 unsigned long mask = x86_pmu.lbr_nr - 1; 426 int lbr_format = x86_pmu.intel_cap.lbr_format; 427 u64 tos = intel_pmu_lbr_tos(); 428 int i; 429 int out = 0; 430 int num = x86_pmu.lbr_nr; 431 432 if (cpuc->lbr_sel->config & LBR_CALL_STACK) 433 num = tos; 434 435 for (i = 0; i < num; i++) { 436 unsigned long lbr_idx = (tos - i) & mask; 437 u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0; 438 int skip = 0; 439 u16 cycles = 0; 440 int lbr_flags = lbr_desc[lbr_format]; 441 442 rdmsrl(x86_pmu.lbr_from + lbr_idx, from); 443 rdmsrl(x86_pmu.lbr_to + lbr_idx, to); 444 445 if (lbr_format == LBR_FORMAT_INFO) { 446 u64 info; 447 448 rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info); 449 mis = !!(info & LBR_INFO_MISPRED); 450 pred = !mis; 451 in_tx = !!(info & LBR_INFO_IN_TX); 452 abort = !!(info & LBR_INFO_ABORT); 453 cycles = (info & LBR_INFO_CYCLES); 454 } 455 if (lbr_flags & LBR_EIP_FLAGS) { 456 mis = !!(from & LBR_FROM_FLAG_MISPRED); 457 pred = !mis; 458 skip = 1; 459 } 460 if (lbr_flags & LBR_TSX) { 461 in_tx = !!(from & LBR_FROM_FLAG_IN_TX); 462 abort = !!(from & LBR_FROM_FLAG_ABORT); 463 skip = 3; 464 } 465 from = (u64)((((s64)from) << skip) >> skip); 466 467 /* 468 * Some CPUs report duplicated abort records, 469 * with the second entry not having an abort bit set. 470 * Skip them here. This loop runs backwards, 471 * so we need to undo the previous record. 472 * If the abort just happened outside the window 473 * the extra entry cannot be removed. 474 */ 475 if (abort && x86_pmu.lbr_double_abort && out > 0) 476 out--; 477 478 cpuc->lbr_entries[out].from = from; 479 cpuc->lbr_entries[out].to = to; 480 cpuc->lbr_entries[out].mispred = mis; 481 cpuc->lbr_entries[out].predicted = pred; 482 cpuc->lbr_entries[out].in_tx = in_tx; 483 cpuc->lbr_entries[out].abort = abort; 484 cpuc->lbr_entries[out].cycles = cycles; 485 cpuc->lbr_entries[out].reserved = 0; 486 out++; 487 } 488 cpuc->lbr_stack.nr = out; 489} 490 491void intel_pmu_lbr_read(void) 492{ 493 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 494 495 if (!cpuc->lbr_users) 496 return; 497 498 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) 499 intel_pmu_lbr_read_32(cpuc); 500 else 501 intel_pmu_lbr_read_64(cpuc); 502 503 intel_pmu_lbr_filter(cpuc); 504} 505 506/* 507 * SW filter is used: 508 * - in case there is no HW filter 509 * - in case the HW filter has errata or limitations 510 */ 511static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event) 512{ 513 u64 br_type = event->attr.branch_sample_type; 514 int mask = 0; 515 516 if (br_type & PERF_SAMPLE_BRANCH_USER) 517 mask |= X86_BR_USER; 518 519 if (br_type & PERF_SAMPLE_BRANCH_KERNEL) 520 mask |= X86_BR_KERNEL; 521 522 /* we ignore BRANCH_HV here */ 523 524 if (br_type & PERF_SAMPLE_BRANCH_ANY) 525 mask |= X86_BR_ANY; 526 527 if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL) 528 mask |= X86_BR_ANY_CALL; 529 530 if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN) 531 mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET; 532 533 if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) 534 mask |= X86_BR_IND_CALL; 535 536 if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX) 537 mask |= X86_BR_ABORT; 538 539 if (br_type & PERF_SAMPLE_BRANCH_IN_TX) 540 mask |= X86_BR_IN_TX; 541 542 if (br_type & PERF_SAMPLE_BRANCH_NO_TX) 543 mask |= X86_BR_NO_TX; 544 545 if (br_type & PERF_SAMPLE_BRANCH_COND) 546 mask |= X86_BR_JCC; 547 548 if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) { 549 if (!x86_pmu_has_lbr_callstack()) 550 return -EOPNOTSUPP; 551 if (mask & ~(X86_BR_USER | X86_BR_KERNEL)) 552 return -EINVAL; 553 mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET | 554 X86_BR_CALL_STACK; 555 } 556 557 if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP) 558 mask |= X86_BR_IND_JMP; 559 560 if (br_type & PERF_SAMPLE_BRANCH_CALL) 561 mask |= X86_BR_CALL | X86_BR_ZERO_CALL; 562 /* 563 * stash actual user request into reg, it may 564 * be used by fixup code for some CPU 565 */ 566 event->hw.branch_reg.reg = mask; 567 return 0; 568} 569 570/* 571 * setup the HW LBR filter 572 * Used only when available, may not be enough to disambiguate 573 * all branches, may need the help of the SW filter 574 */ 575static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) 576{ 577 struct hw_perf_event_extra *reg; 578 u64 br_type = event->attr.branch_sample_type; 579 u64 mask = 0, v; 580 int i; 581 582 for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) { 583 if (!(br_type & (1ULL << i))) 584 continue; 585 586 v = x86_pmu.lbr_sel_map[i]; 587 if (v == LBR_NOT_SUPP) 588 return -EOPNOTSUPP; 589 590 if (v != LBR_IGN) 591 mask |= v; 592 } 593 reg = &event->hw.branch_reg; 594 reg->idx = EXTRA_REG_LBR; 595 596 /* 597 * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate 598 * in suppress mode. So LBR_SELECT should be set to 599 * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK) 600 */ 601 reg->config = mask ^ x86_pmu.lbr_sel_mask; 602 603 return 0; 604} 605 606int intel_pmu_setup_lbr_filter(struct perf_event *event) 607{ 608 int ret = 0; 609 610 /* 611 * no LBR on this PMU 612 */ 613 if (!x86_pmu.lbr_nr) 614 return -EOPNOTSUPP; 615 616 /* 617 * setup SW LBR filter 618 */ 619 ret = intel_pmu_setup_sw_lbr_filter(event); 620 if (ret) 621 return ret; 622 623 /* 624 * setup HW LBR filter, if any 625 */ 626 if (x86_pmu.lbr_sel_map) 627 ret = intel_pmu_setup_hw_lbr_filter(event); 628 629 return ret; 630} 631 632/* 633 * return the type of control flow change at address "from" 634 * intruction is not necessarily a branch (in case of interrupt). 635 * 636 * The branch type returned also includes the priv level of the 637 * target of the control flow change (X86_BR_USER, X86_BR_KERNEL). 638 * 639 * If a branch type is unknown OR the instruction cannot be 640 * decoded (e.g., text page not present), then X86_BR_NONE is 641 * returned. 642 */ 643static int branch_type(unsigned long from, unsigned long to, int abort) 644{ 645 struct insn insn; 646 void *addr; 647 int bytes_read, bytes_left; 648 int ret = X86_BR_NONE; 649 int ext, to_plm, from_plm; 650 u8 buf[MAX_INSN_SIZE]; 651 int is64 = 0; 652 653 to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER; 654 from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER; 655 656 /* 657 * maybe zero if lbr did not fill up after a reset by the time 658 * we get a PMU interrupt 659 */ 660 if (from == 0 || to == 0) 661 return X86_BR_NONE; 662 663 if (abort) 664 return X86_BR_ABORT | to_plm; 665 666 if (from_plm == X86_BR_USER) { 667 /* 668 * can happen if measuring at the user level only 669 * and we interrupt in a kernel thread, e.g., idle. 670 */ 671 if (!current->mm) 672 return X86_BR_NONE; 673 674 /* may fail if text not present */ 675 bytes_left = copy_from_user_nmi(buf, (void __user *)from, 676 MAX_INSN_SIZE); 677 bytes_read = MAX_INSN_SIZE - bytes_left; 678 if (!bytes_read) 679 return X86_BR_NONE; 680 681 addr = buf; 682 } else { 683 /* 684 * The LBR logs any address in the IP, even if the IP just 685 * faulted. This means userspace can control the from address. 686 * Ensure we don't blindy read any address by validating it is 687 * a known text address. 688 */ 689 if (kernel_text_address(from)) { 690 addr = (void *)from; 691 /* 692 * Assume we can get the maximum possible size 693 * when grabbing kernel data. This is not 694 * _strictly_ true since we could possibly be 695 * executing up next to a memory hole, but 696 * it is very unlikely to be a problem. 697 */ 698 bytes_read = MAX_INSN_SIZE; 699 } else { 700 return X86_BR_NONE; 701 } 702 } 703 704 /* 705 * decoder needs to know the ABI especially 706 * on 64-bit systems running 32-bit apps 707 */ 708#ifdef CONFIG_X86_64 709 is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32); 710#endif 711 insn_init(&insn, addr, bytes_read, is64); 712 insn_get_opcode(&insn); 713 if (!insn.opcode.got) 714 return X86_BR_ABORT; 715 716 switch (insn.opcode.bytes[0]) { 717 case 0xf: 718 switch (insn.opcode.bytes[1]) { 719 case 0x05: /* syscall */ 720 case 0x34: /* sysenter */ 721 ret = X86_BR_SYSCALL; 722 break; 723 case 0x07: /* sysret */ 724 case 0x35: /* sysexit */ 725 ret = X86_BR_SYSRET; 726 break; 727 case 0x80 ... 0x8f: /* conditional */ 728 ret = X86_BR_JCC; 729 break; 730 default: 731 ret = X86_BR_NONE; 732 } 733 break; 734 case 0x70 ... 0x7f: /* conditional */ 735 ret = X86_BR_JCC; 736 break; 737 case 0xc2: /* near ret */ 738 case 0xc3: /* near ret */ 739 case 0xca: /* far ret */ 740 case 0xcb: /* far ret */ 741 ret = X86_BR_RET; 742 break; 743 case 0xcf: /* iret */ 744 ret = X86_BR_IRET; 745 break; 746 case 0xcc ... 0xce: /* int */ 747 ret = X86_BR_INT; 748 break; 749 case 0xe8: /* call near rel */ 750 insn_get_immediate(&insn); 751 if (insn.immediate1.value == 0) { 752 /* zero length call */ 753 ret = X86_BR_ZERO_CALL; 754 break; 755 } 756 case 0x9a: /* call far absolute */ 757 ret = X86_BR_CALL; 758 break; 759 case 0xe0 ... 0xe3: /* loop jmp */ 760 ret = X86_BR_JCC; 761 break; 762 case 0xe9 ... 0xeb: /* jmp */ 763 ret = X86_BR_JMP; 764 break; 765 case 0xff: /* call near absolute, call far absolute ind */ 766 insn_get_modrm(&insn); 767 ext = (insn.modrm.bytes[0] >> 3) & 0x7; 768 switch (ext) { 769 case 2: /* near ind call */ 770 case 3: /* far ind call */ 771 ret = X86_BR_IND_CALL; 772 break; 773 case 4: 774 case 5: 775 ret = X86_BR_IND_JMP; 776 break; 777 } 778 break; 779 default: 780 ret = X86_BR_NONE; 781 } 782 /* 783 * interrupts, traps, faults (and thus ring transition) may 784 * occur on any instructions. Thus, to classify them correctly, 785 * we need to first look at the from and to priv levels. If they 786 * are different and to is in the kernel, then it indicates 787 * a ring transition. If the from instruction is not a ring 788 * transition instr (syscall, systenter, int), then it means 789 * it was a irq, trap or fault. 790 * 791 * we have no way of detecting kernel to kernel faults. 792 */ 793 if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL 794 && ret != X86_BR_SYSCALL && ret != X86_BR_INT) 795 ret = X86_BR_IRQ; 796 797 /* 798 * branch priv level determined by target as 799 * is done by HW when LBR_SELECT is implemented 800 */ 801 if (ret != X86_BR_NONE) 802 ret |= to_plm; 803 804 return ret; 805} 806 807/* 808 * implement actual branch filter based on user demand. 809 * Hardware may not exactly satisfy that request, thus 810 * we need to inspect opcodes. Mismatched branches are 811 * discarded. Therefore, the number of branches returned 812 * in PERF_SAMPLE_BRANCH_STACK sample may vary. 813 */ 814static void 815intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) 816{ 817 u64 from, to; 818 int br_sel = cpuc->br_sel; 819 int i, j, type; 820 bool compress = false; 821 822 /* if sampling all branches, then nothing to filter */ 823 if ((br_sel & X86_BR_ALL) == X86_BR_ALL) 824 return; 825 826 for (i = 0; i < cpuc->lbr_stack.nr; i++) { 827 828 from = cpuc->lbr_entries[i].from; 829 to = cpuc->lbr_entries[i].to; 830 831 type = branch_type(from, to, cpuc->lbr_entries[i].abort); 832 if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) { 833 if (cpuc->lbr_entries[i].in_tx) 834 type |= X86_BR_IN_TX; 835 else 836 type |= X86_BR_NO_TX; 837 } 838 839 /* if type does not correspond, then discard */ 840 if (type == X86_BR_NONE || (br_sel & type) != type) { 841 cpuc->lbr_entries[i].from = 0; 842 compress = true; 843 } 844 } 845 846 if (!compress) 847 return; 848 849 /* remove all entries with from=0 */ 850 for (i = 0; i < cpuc->lbr_stack.nr; ) { 851 if (!cpuc->lbr_entries[i].from) { 852 j = i; 853 while (++j < cpuc->lbr_stack.nr) 854 cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j]; 855 cpuc->lbr_stack.nr--; 856 if (!cpuc->lbr_entries[i].from) 857 continue; 858 } 859 i++; 860 } 861} 862 863/* 864 * Map interface branch filters onto LBR filters 865 */ 866static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 867 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, 868 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, 869 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, 870 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 871 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_REL_JMP 872 | LBR_IND_JMP | LBR_FAR, 873 /* 874 * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches 875 */ 876 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = 877 LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR, 878 /* 879 * NHM/WSM erratum: must include IND_JMP to capture IND_CALL 880 */ 881 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP, 882 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 883 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, 884}; 885 886static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 887 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, 888 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, 889 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, 890 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 891 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, 892 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL 893 | LBR_FAR, 894 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, 895 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 896 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, 897 [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL, 898}; 899 900static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 901 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, 902 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, 903 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, 904 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 905 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, 906 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL 907 | LBR_FAR, 908 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, 909 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 910 [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL 911 | LBR_RETURN | LBR_CALL_STACK, 912 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, 913 [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL, 914}; 915 916/* core */ 917void __init intel_pmu_lbr_init_core(void) 918{ 919 x86_pmu.lbr_nr = 4; 920 x86_pmu.lbr_tos = MSR_LBR_TOS; 921 x86_pmu.lbr_from = MSR_LBR_CORE_FROM; 922 x86_pmu.lbr_to = MSR_LBR_CORE_TO; 923 924 /* 925 * SW branch filter usage: 926 * - compensate for lack of HW filter 927 */ 928 pr_cont("4-deep LBR, "); 929} 930 931/* nehalem/westmere */ 932void __init intel_pmu_lbr_init_nhm(void) 933{ 934 x86_pmu.lbr_nr = 16; 935 x86_pmu.lbr_tos = MSR_LBR_TOS; 936 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 937 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 938 939 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 940 x86_pmu.lbr_sel_map = nhm_lbr_sel_map; 941 942 /* 943 * SW branch filter usage: 944 * - workaround LBR_SEL errata (see above) 945 * - support syscall, sysret capture. 946 * That requires LBR_FAR but that means far 947 * jmp need to be filtered out 948 */ 949 pr_cont("16-deep LBR, "); 950} 951 952/* sandy bridge */ 953void __init intel_pmu_lbr_init_snb(void) 954{ 955 x86_pmu.lbr_nr = 16; 956 x86_pmu.lbr_tos = MSR_LBR_TOS; 957 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 958 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 959 960 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 961 x86_pmu.lbr_sel_map = snb_lbr_sel_map; 962 963 /* 964 * SW branch filter usage: 965 * - support syscall, sysret capture. 966 * That requires LBR_FAR but that means far 967 * jmp need to be filtered out 968 */ 969 pr_cont("16-deep LBR, "); 970} 971 972/* haswell */ 973void intel_pmu_lbr_init_hsw(void) 974{ 975 x86_pmu.lbr_nr = 16; 976 x86_pmu.lbr_tos = MSR_LBR_TOS; 977 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 978 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 979 980 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 981 x86_pmu.lbr_sel_map = hsw_lbr_sel_map; 982 983 pr_cont("16-deep LBR, "); 984} 985 986/* skylake */ 987__init void intel_pmu_lbr_init_skl(void) 988{ 989 x86_pmu.lbr_nr = 32; 990 x86_pmu.lbr_tos = MSR_LBR_TOS; 991 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 992 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 993 994 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 995 x86_pmu.lbr_sel_map = hsw_lbr_sel_map; 996 997 /* 998 * SW branch filter usage: 999 * - support syscall, sysret capture. 1000 * That requires LBR_FAR but that means far 1001 * jmp need to be filtered out 1002 */ 1003 pr_cont("32-deep LBR, "); 1004} 1005 1006/* atom */ 1007void __init intel_pmu_lbr_init_atom(void) 1008{ 1009 /* 1010 * only models starting at stepping 10 seems 1011 * to have an operational LBR which can freeze 1012 * on PMU interrupt 1013 */ 1014 if (boot_cpu_data.x86_model == 28 1015 && boot_cpu_data.x86_mask < 10) { 1016 pr_cont("LBR disabled due to erratum"); 1017 return; 1018 } 1019 1020 x86_pmu.lbr_nr = 8; 1021 x86_pmu.lbr_tos = MSR_LBR_TOS; 1022 x86_pmu.lbr_from = MSR_LBR_CORE_FROM; 1023 x86_pmu.lbr_to = MSR_LBR_CORE_TO; 1024 1025 /* 1026 * SW branch filter usage: 1027 * - compensate for lack of HW filter 1028 */ 1029 pr_cont("8-deep LBR, "); 1030} 1031