root/arch/x86/events/amd/ibs.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. perf_event_set_period
  2. perf_event_try_update
  3. get_ibs_pmu
  4. perf_ibs_precise_event
  5. perf_ibs_init
  6. perf_ibs_set_period
  7. get_ibs_fetch_count
  8. get_ibs_op_count
  9. perf_ibs_event_update
  10. perf_ibs_enable_event
  11. perf_ibs_disable_event
  12. perf_ibs_start
  13. perf_ibs_stop
  14. perf_ibs_add
  15. perf_ibs_del
  16. perf_ibs_read
  17. perf_ibs_handle_irq
  18. perf_ibs_nmi_handler
  19. perf_ibs_pmu_init
  20. perf_event_ibs_init
  21. perf_event_ibs_init
  22. __get_ibs_caps
  23. get_ibs_caps
  24. get_eilvt
  25. put_eilvt
  26. ibs_eilvt_valid
  27. setup_ibs_ctl
  28. force_ibs_eilvt_setup
  29. ibs_eilvt_setup
  30. get_ibs_lvt_offset
  31. setup_APIC_ibs
  32. clear_APIC_ibs
  33. x86_pmu_amd_ibs_starting_cpu
  34. perf_ibs_suspend
  35. perf_ibs_resume
  36. perf_ibs_pm_init
  37. perf_ibs_pm_init
  38. x86_pmu_amd_ibs_dying_cpu
  39. amd_ibs_init

   1 /*
   2  * Performance events - AMD IBS
   3  *
   4  *  Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
   5  *
   6  *  For licencing details see kernel-base/COPYING
   7  */
   8 
   9 #include <linux/perf_event.h>
  10 #include <linux/init.h>
  11 #include <linux/export.h>
  12 #include <linux/pci.h>
  13 #include <linux/ptrace.h>
  14 #include <linux/syscore_ops.h>
  15 #include <linux/sched/clock.h>
  16 
  17 #include <asm/apic.h>
  18 
  19 #include "../perf_event.h"
  20 
  21 static u32 ibs_caps;
  22 
  23 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
  24 
  25 #include <linux/kprobes.h>
  26 #include <linux/hardirq.h>
  27 
  28 #include <asm/nmi.h>
  29 
  30 #define IBS_FETCH_CONFIG_MASK   (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
  31 #define IBS_OP_CONFIG_MASK      IBS_OP_MAX_CNT
  32 
  33 
  34 /*
  35  * IBS states:
  36  *
  37  * ENABLED; tracks the pmu::add(), pmu::del() state, when set the counter is taken
  38  * and any further add()s must fail.
  39  *
  40  * STARTED/STOPPING/STOPPED; deal with pmu::start(), pmu::stop() state but are
  41  * complicated by the fact that the IBS hardware can send late NMIs (ie. after
  42  * we've cleared the EN bit).
  43  *
  44  * In order to consume these late NMIs we have the STOPPED state, any NMI that
  45  * happens after we've cleared the EN state will clear this bit and report the
  46  * NMI handled (this is fundamentally racy in the face or multiple NMI sources,
  47  * someone else can consume our BIT and our NMI will go unhandled).
  48  *
  49  * And since we cannot set/clear this separate bit together with the EN bit,
  50  * there are races; if we cleared STARTED early, an NMI could land in
  51  * between clearing STARTED and clearing the EN bit (in fact multiple NMIs
  52  * could happen if the period is small enough), and consume our STOPPED bit
  53  * and trigger streams of unhandled NMIs.
  54  *
  55  * If, however, we clear STARTED late, an NMI can hit between clearing the
  56  * EN bit and clearing STARTED, still see STARTED set and process the event.
  57  * If this event will have the VALID bit clear, we bail properly, but this
  58  * is not a given. With VALID set we can end up calling pmu::stop() again
  59  * (the throttle logic) and trigger the WARNs in there.
  60  *
  61  * So what we do is set STOPPING before clearing EN to avoid the pmu::stop()
  62  * nesting, and clear STARTED late, so that we have a well defined state over
  63  * the clearing of the EN bit.
  64  *
  65  * XXX: we could probably be using !atomic bitops for all this.
  66  */
  67 
  68 enum ibs_states {
  69         IBS_ENABLED     = 0,
  70         IBS_STARTED     = 1,
  71         IBS_STOPPING    = 2,
  72         IBS_STOPPED     = 3,
  73 
  74         IBS_MAX_STATES,
  75 };
  76 
  77 struct cpu_perf_ibs {
  78         struct perf_event       *event;
  79         unsigned long           state[BITS_TO_LONGS(IBS_MAX_STATES)];
  80 };
  81 
  82 struct perf_ibs {
  83         struct pmu                      pmu;
  84         unsigned int                    msr;
  85         u64                             config_mask;
  86         u64                             cnt_mask;
  87         u64                             enable_mask;
  88         u64                             valid_mask;
  89         u64                             max_period;
  90         unsigned long                   offset_mask[1];
  91         int                             offset_max;
  92         struct cpu_perf_ibs __percpu    *pcpu;
  93 
  94         struct attribute                **format_attrs;
  95         struct attribute_group          format_group;
  96         const struct attribute_group    *attr_groups[2];
  97 
  98         u64                             (*get_count)(u64 config);
  99 };
 100 
 101 struct perf_ibs_data {
 102         u32             size;
 103         union {
 104                 u32     data[0];        /* data buffer starts here */
 105                 u32     caps;
 106         };
 107         u64             regs[MSR_AMD64_IBS_REG_COUNT_MAX];
 108 };
 109 
 110 static int
 111 perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
 112 {
 113         s64 left = local64_read(&hwc->period_left);
 114         s64 period = hwc->sample_period;
 115         int overflow = 0;
 116 
 117         /*
 118          * If we are way outside a reasonable range then just skip forward:
 119          */
 120         if (unlikely(left <= -period)) {
 121                 left = period;
 122                 local64_set(&hwc->period_left, left);
 123                 hwc->last_period = period;
 124                 overflow = 1;
 125         }
 126 
 127         if (unlikely(left < (s64)min)) {
 128                 left += period;
 129                 local64_set(&hwc->period_left, left);
 130                 hwc->last_period = period;
 131                 overflow = 1;
 132         }
 133 
 134         /*
 135          * If the hw period that triggers the sw overflow is too short
 136          * we might hit the irq handler. This biases the results.
 137          * Thus we shorten the next-to-last period and set the last
 138          * period to the max period.
 139          */
 140         if (left > max) {
 141                 left -= max;
 142                 if (left > max)
 143                         left = max;
 144                 else if (left < min)
 145                         left = min;
 146         }
 147 
 148         *hw_period = (u64)left;
 149 
 150         return overflow;
 151 }
 152 
 153 static  int
 154 perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
 155 {
 156         struct hw_perf_event *hwc = &event->hw;
 157         int shift = 64 - width;
 158         u64 prev_raw_count;
 159         u64 delta;
 160 
 161         /*
 162          * Careful: an NMI might modify the previous event value.
 163          *
 164          * Our tactic to handle this is to first atomically read and
 165          * exchange a new raw count - then add that new-prev delta
 166          * count to the generic event atomically:
 167          */
 168         prev_raw_count = local64_read(&hwc->prev_count);
 169         if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 170                                         new_raw_count) != prev_raw_count)
 171                 return 0;
 172 
 173         /*
 174          * Now we have the new raw value and have updated the prev
 175          * timestamp already. We can now calculate the elapsed delta
 176          * (event-)time and add that to the generic event.
 177          *
 178          * Careful, not all hw sign-extends above the physical width
 179          * of the count.
 180          */
 181         delta = (new_raw_count << shift) - (prev_raw_count << shift);
 182         delta >>= shift;
 183 
 184         local64_add(delta, &event->count);
 185         local64_sub(delta, &hwc->period_left);
 186 
 187         return 1;
 188 }
 189 
 190 static struct perf_ibs perf_ibs_fetch;
 191 static struct perf_ibs perf_ibs_op;
 192 
 193 static struct perf_ibs *get_ibs_pmu(int type)
 194 {
 195         if (perf_ibs_fetch.pmu.type == type)
 196                 return &perf_ibs_fetch;
 197         if (perf_ibs_op.pmu.type == type)
 198                 return &perf_ibs_op;
 199         return NULL;
 200 }
 201 
 202 /*
 203  * Use IBS for precise event sampling:
 204  *
 205  *  perf record -a -e cpu-cycles:p ...    # use ibs op counting cycle count
 206  *  perf record -a -e r076:p ...          # same as -e cpu-cycles:p
 207  *  perf record -a -e r0C1:p ...          # use ibs op counting micro-ops
 208  *
 209  * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
 210  * MSRC001_1033) is used to select either cycle or micro-ops counting
 211  * mode.
 212  *
 213  * The rip of IBS samples has skid 0. Thus, IBS supports precise
 214  * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
 215  * rip is invalid when IBS was not able to record the rip correctly.
 216  * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
 217  *
 218  */
 219 static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
 220 {
 221         switch (event->attr.precise_ip) {
 222         case 0:
 223                 return -ENOENT;
 224         case 1:
 225         case 2:
 226                 break;
 227         default:
 228                 return -EOPNOTSUPP;
 229         }
 230 
 231         switch (event->attr.type) {
 232         case PERF_TYPE_HARDWARE:
 233                 switch (event->attr.config) {
 234                 case PERF_COUNT_HW_CPU_CYCLES:
 235                         *config = 0;
 236                         return 0;
 237                 }
 238                 break;
 239         case PERF_TYPE_RAW:
 240                 switch (event->attr.config) {
 241                 case 0x0076:
 242                         *config = 0;
 243                         return 0;
 244                 case 0x00C1:
 245                         *config = IBS_OP_CNT_CTL;
 246                         return 0;
 247                 }
 248                 break;
 249         default:
 250                 return -ENOENT;
 251         }
 252 
 253         return -EOPNOTSUPP;
 254 }
 255 
 256 static int perf_ibs_init(struct perf_event *event)
 257 {
 258         struct hw_perf_event *hwc = &event->hw;
 259         struct perf_ibs *perf_ibs;
 260         u64 max_cnt, config;
 261         int ret;
 262 
 263         perf_ibs = get_ibs_pmu(event->attr.type);
 264         if (perf_ibs) {
 265                 config = event->attr.config;
 266         } else {
 267                 perf_ibs = &perf_ibs_op;
 268                 ret = perf_ibs_precise_event(event, &config);
 269                 if (ret)
 270                         return ret;
 271         }
 272 
 273         if (event->pmu != &perf_ibs->pmu)
 274                 return -ENOENT;
 275 
 276         if (config & ~perf_ibs->config_mask)
 277                 return -EINVAL;
 278 
 279         if (hwc->sample_period) {
 280                 if (config & perf_ibs->cnt_mask)
 281                         /* raw max_cnt may not be set */
 282                         return -EINVAL;
 283                 if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
 284                         /*
 285                          * lower 4 bits can not be set in ibs max cnt,
 286                          * but allowing it in case we adjust the
 287                          * sample period to set a frequency.
 288                          */
 289                         return -EINVAL;
 290                 hwc->sample_period &= ~0x0FULL;
 291                 if (!hwc->sample_period)
 292                         hwc->sample_period = 0x10;
 293         } else {
 294                 max_cnt = config & perf_ibs->cnt_mask;
 295                 config &= ~perf_ibs->cnt_mask;
 296                 event->attr.sample_period = max_cnt << 4;
 297                 hwc->sample_period = event->attr.sample_period;
 298         }
 299 
 300         if (!hwc->sample_period)
 301                 return -EINVAL;
 302 
 303         /*
 304          * If we modify hwc->sample_period, we also need to update
 305          * hwc->last_period and hwc->period_left.
 306          */
 307         hwc->last_period = hwc->sample_period;
 308         local64_set(&hwc->period_left, hwc->sample_period);
 309 
 310         hwc->config_base = perf_ibs->msr;
 311         hwc->config = config;
 312 
 313         return 0;
 314 }
 315 
 316 static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
 317                                struct hw_perf_event *hwc, u64 *period)
 318 {
 319         int overflow;
 320 
 321         /* ignore lower 4 bits in min count: */
 322         overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
 323         local64_set(&hwc->prev_count, 0);
 324 
 325         return overflow;
 326 }
 327 
 328 static u64 get_ibs_fetch_count(u64 config)
 329 {
 330         return (config & IBS_FETCH_CNT) >> 12;
 331 }
 332 
 333 static u64 get_ibs_op_count(u64 config)
 334 {
 335         u64 count = 0;
 336 
 337         if (config & IBS_OP_VAL)
 338                 count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
 339 
 340         if (ibs_caps & IBS_CAPS_RDWROPCNT)
 341                 count += (config & IBS_OP_CUR_CNT) >> 32;
 342 
 343         return count;
 344 }
 345 
 346 static void
 347 perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
 348                       u64 *config)
 349 {
 350         u64 count = perf_ibs->get_count(*config);
 351 
 352         /*
 353          * Set width to 64 since we do not overflow on max width but
 354          * instead on max count. In perf_ibs_set_period() we clear
 355          * prev count manually on overflow.
 356          */
 357         while (!perf_event_try_update(event, count, 64)) {
 358                 rdmsrl(event->hw.config_base, *config);
 359                 count = perf_ibs->get_count(*config);
 360         }
 361 }
 362 
 363 static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
 364                                          struct hw_perf_event *hwc, u64 config)
 365 {
 366         wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
 367 }
 368 
 369 /*
 370  * Erratum #420 Instruction-Based Sampling Engine May Generate
 371  * Interrupt that Cannot Be Cleared:
 372  *
 373  * Must clear counter mask first, then clear the enable bit. See
 374  * Revision Guide for AMD Family 10h Processors, Publication #41322.
 375  */
 376 static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
 377                                           struct hw_perf_event *hwc, u64 config)
 378 {
 379         config &= ~perf_ibs->cnt_mask;
 380         if (boot_cpu_data.x86 == 0x10)
 381                 wrmsrl(hwc->config_base, config);
 382         config &= ~perf_ibs->enable_mask;
 383         wrmsrl(hwc->config_base, config);
 384 }
 385 
 386 /*
 387  * We cannot restore the ibs pmu state, so we always needs to update
 388  * the event while stopping it and then reset the state when starting
 389  * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
 390  * perf_ibs_start()/perf_ibs_stop() and instead always do it.
 391  */
 392 static void perf_ibs_start(struct perf_event *event, int flags)
 393 {
 394         struct hw_perf_event *hwc = &event->hw;
 395         struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
 396         struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
 397         u64 period;
 398 
 399         if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
 400                 return;
 401 
 402         WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
 403         hwc->state = 0;
 404 
 405         perf_ibs_set_period(perf_ibs, hwc, &period);
 406         /*
 407          * Set STARTED before enabling the hardware, such that a subsequent NMI
 408          * must observe it.
 409          */
 410         set_bit(IBS_STARTED,    pcpu->state);
 411         clear_bit(IBS_STOPPING, pcpu->state);
 412         perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
 413 
 414         perf_event_update_userpage(event);
 415 }
 416 
 417 static void perf_ibs_stop(struct perf_event *event, int flags)
 418 {
 419         struct hw_perf_event *hwc = &event->hw;
 420         struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
 421         struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
 422         u64 config;
 423         int stopping;
 424 
 425         if (test_and_set_bit(IBS_STOPPING, pcpu->state))
 426                 return;
 427 
 428         stopping = test_bit(IBS_STARTED, pcpu->state);
 429 
 430         if (!stopping && (hwc->state & PERF_HES_UPTODATE))
 431                 return;
 432 
 433         rdmsrl(hwc->config_base, config);
 434 
 435         if (stopping) {
 436                 /*
 437                  * Set STOPPED before disabling the hardware, such that it
 438                  * must be visible to NMIs the moment we clear the EN bit,
 439                  * at which point we can generate an !VALID sample which
 440                  * we need to consume.
 441                  */
 442                 set_bit(IBS_STOPPED, pcpu->state);
 443                 perf_ibs_disable_event(perf_ibs, hwc, config);
 444                 /*
 445                  * Clear STARTED after disabling the hardware; if it were
 446                  * cleared before an NMI hitting after the clear but before
 447                  * clearing the EN bit might think it a spurious NMI and not
 448                  * handle it.
 449                  *
 450                  * Clearing it after, however, creates the problem of the NMI
 451                  * handler seeing STARTED but not having a valid sample.
 452                  */
 453                 clear_bit(IBS_STARTED, pcpu->state);
 454                 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
 455                 hwc->state |= PERF_HES_STOPPED;
 456         }
 457 
 458         if (hwc->state & PERF_HES_UPTODATE)
 459                 return;
 460 
 461         /*
 462          * Clear valid bit to not count rollovers on update, rollovers
 463          * are only updated in the irq handler.
 464          */
 465         config &= ~perf_ibs->valid_mask;
 466 
 467         perf_ibs_event_update(perf_ibs, event, &config);
 468         hwc->state |= PERF_HES_UPTODATE;
 469 }
 470 
 471 static int perf_ibs_add(struct perf_event *event, int flags)
 472 {
 473         struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
 474         struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
 475 
 476         if (test_and_set_bit(IBS_ENABLED, pcpu->state))
 477                 return -ENOSPC;
 478 
 479         event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 480 
 481         pcpu->event = event;
 482 
 483         if (flags & PERF_EF_START)
 484                 perf_ibs_start(event, PERF_EF_RELOAD);
 485 
 486         return 0;
 487 }
 488 
 489 static void perf_ibs_del(struct perf_event *event, int flags)
 490 {
 491         struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
 492         struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
 493 
 494         if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
 495                 return;
 496 
 497         perf_ibs_stop(event, PERF_EF_UPDATE);
 498 
 499         pcpu->event = NULL;
 500 
 501         perf_event_update_userpage(event);
 502 }
 503 
 504 static void perf_ibs_read(struct perf_event *event) { }
 505 
 506 PMU_FORMAT_ATTR(rand_en,        "config:57");
 507 PMU_FORMAT_ATTR(cnt_ctl,        "config:19");
 508 
 509 static struct attribute *ibs_fetch_format_attrs[] = {
 510         &format_attr_rand_en.attr,
 511         NULL,
 512 };
 513 
 514 static struct attribute *ibs_op_format_attrs[] = {
 515         NULL,   /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */
 516         NULL,
 517 };
 518 
 519 static struct perf_ibs perf_ibs_fetch = {
 520         .pmu = {
 521                 .task_ctx_nr    = perf_invalid_context,
 522 
 523                 .event_init     = perf_ibs_init,
 524                 .add            = perf_ibs_add,
 525                 .del            = perf_ibs_del,
 526                 .start          = perf_ibs_start,
 527                 .stop           = perf_ibs_stop,
 528                 .read           = perf_ibs_read,
 529                 .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
 530         },
 531         .msr                    = MSR_AMD64_IBSFETCHCTL,
 532         .config_mask            = IBS_FETCH_CONFIG_MASK,
 533         .cnt_mask               = IBS_FETCH_MAX_CNT,
 534         .enable_mask            = IBS_FETCH_ENABLE,
 535         .valid_mask             = IBS_FETCH_VAL,
 536         .max_period             = IBS_FETCH_MAX_CNT << 4,
 537         .offset_mask            = { MSR_AMD64_IBSFETCH_REG_MASK },
 538         .offset_max             = MSR_AMD64_IBSFETCH_REG_COUNT,
 539         .format_attrs           = ibs_fetch_format_attrs,
 540 
 541         .get_count              = get_ibs_fetch_count,
 542 };
 543 
 544 static struct perf_ibs perf_ibs_op = {
 545         .pmu = {
 546                 .task_ctx_nr    = perf_invalid_context,
 547 
 548                 .event_init     = perf_ibs_init,
 549                 .add            = perf_ibs_add,
 550                 .del            = perf_ibs_del,
 551                 .start          = perf_ibs_start,
 552                 .stop           = perf_ibs_stop,
 553                 .read           = perf_ibs_read,
 554         },
 555         .msr                    = MSR_AMD64_IBSOPCTL,
 556         .config_mask            = IBS_OP_CONFIG_MASK,
 557         .cnt_mask               = IBS_OP_MAX_CNT | IBS_OP_CUR_CNT |
 558                                   IBS_OP_CUR_CNT_RAND,
 559         .enable_mask            = IBS_OP_ENABLE,
 560         .valid_mask             = IBS_OP_VAL,
 561         .max_period             = IBS_OP_MAX_CNT << 4,
 562         .offset_mask            = { MSR_AMD64_IBSOP_REG_MASK },
 563         .offset_max             = MSR_AMD64_IBSOP_REG_COUNT,
 564         .format_attrs           = ibs_op_format_attrs,
 565 
 566         .get_count              = get_ibs_op_count,
 567 };
 568 
 569 static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
 570 {
 571         struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
 572         struct perf_event *event = pcpu->event;
 573         struct hw_perf_event *hwc;
 574         struct perf_sample_data data;
 575         struct perf_raw_record raw;
 576         struct pt_regs regs;
 577         struct perf_ibs_data ibs_data;
 578         int offset, size, check_rip, offset_max, throttle = 0;
 579         unsigned int msr;
 580         u64 *buf, *config, period;
 581 
 582         if (!test_bit(IBS_STARTED, pcpu->state)) {
 583 fail:
 584                 /*
 585                  * Catch spurious interrupts after stopping IBS: After
 586                  * disabling IBS there could be still incoming NMIs
 587                  * with samples that even have the valid bit cleared.
 588                  * Mark all this NMIs as handled.
 589                  */
 590                 if (test_and_clear_bit(IBS_STOPPED, pcpu->state))
 591                         return 1;
 592 
 593                 return 0;
 594         }
 595 
 596         if (WARN_ON_ONCE(!event))
 597                 goto fail;
 598 
 599         hwc = &event->hw;
 600         msr = hwc->config_base;
 601         buf = ibs_data.regs;
 602         rdmsrl(msr, *buf);
 603         if (!(*buf++ & perf_ibs->valid_mask))
 604                 goto fail;
 605 
 606         config = &ibs_data.regs[0];
 607         perf_ibs_event_update(perf_ibs, event, config);
 608         perf_sample_data_init(&data, 0, hwc->last_period);
 609         if (!perf_ibs_set_period(perf_ibs, hwc, &period))
 610                 goto out;       /* no sw counter overflow */
 611 
 612         ibs_data.caps = ibs_caps;
 613         size = 1;
 614         offset = 1;
 615         check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
 616         if (event->attr.sample_type & PERF_SAMPLE_RAW)
 617                 offset_max = perf_ibs->offset_max;
 618         else if (check_rip)
 619                 offset_max = 3;
 620         else
 621                 offset_max = 1;
 622         do {
 623                 rdmsrl(msr + offset, *buf++);
 624                 size++;
 625                 offset = find_next_bit(perf_ibs->offset_mask,
 626                                        perf_ibs->offset_max,
 627                                        offset + 1);
 628         } while (offset < offset_max);
 629         if (event->attr.sample_type & PERF_SAMPLE_RAW) {
 630                 /*
 631                  * Read IbsBrTarget and IbsOpData4 separately
 632                  * depending on their availability.
 633                  * Can't add to offset_max as they are staggered
 634                  */
 635                 if (ibs_caps & IBS_CAPS_BRNTRGT) {
 636                         rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
 637                         size++;
 638                 }
 639                 if (ibs_caps & IBS_CAPS_OPDATA4) {
 640                         rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++);
 641                         size++;
 642                 }
 643         }
 644         ibs_data.size = sizeof(u64) * size;
 645 
 646         regs = *iregs;
 647         if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
 648                 regs.flags &= ~PERF_EFLAGS_EXACT;
 649         } else {
 650                 set_linear_ip(&regs, ibs_data.regs[1]);
 651                 regs.flags |= PERF_EFLAGS_EXACT;
 652         }
 653 
 654         if (event->attr.sample_type & PERF_SAMPLE_RAW) {
 655                 raw = (struct perf_raw_record){
 656                         .frag = {
 657                                 .size = sizeof(u32) + ibs_data.size,
 658                                 .data = ibs_data.data,
 659                         },
 660                 };
 661                 data.raw = &raw;
 662         }
 663 
 664         throttle = perf_event_overflow(event, &data, &regs);
 665 out:
 666         if (throttle) {
 667                 perf_ibs_stop(event, 0);
 668         } else {
 669                 period >>= 4;
 670 
 671                 if ((ibs_caps & IBS_CAPS_RDWROPCNT) &&
 672                     (*config & IBS_OP_CNT_CTL))
 673                         period |= *config & IBS_OP_CUR_CNT_RAND;
 674 
 675                 perf_ibs_enable_event(perf_ibs, hwc, period);
 676         }
 677 
 678         perf_event_update_userpage(event);
 679 
 680         return 1;
 681 }
 682 
 683 static int
 684 perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
 685 {
 686         u64 stamp = sched_clock();
 687         int handled = 0;
 688 
 689         handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
 690         handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
 691 
 692         if (handled)
 693                 inc_irq_stat(apic_perf_irqs);
 694 
 695         perf_sample_event_took(sched_clock() - stamp);
 696 
 697         return handled;
 698 }
 699 NOKPROBE_SYMBOL(perf_ibs_nmi_handler);
 700 
 701 static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
 702 {
 703         struct cpu_perf_ibs __percpu *pcpu;
 704         int ret;
 705 
 706         pcpu = alloc_percpu(struct cpu_perf_ibs);
 707         if (!pcpu)
 708                 return -ENOMEM;
 709 
 710         perf_ibs->pcpu = pcpu;
 711 
 712         /* register attributes */
 713         if (perf_ibs->format_attrs[0]) {
 714                 memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group));
 715                 perf_ibs->format_group.name     = "format";
 716                 perf_ibs->format_group.attrs    = perf_ibs->format_attrs;
 717 
 718                 memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups));
 719                 perf_ibs->attr_groups[0]        = &perf_ibs->format_group;
 720                 perf_ibs->pmu.attr_groups       = perf_ibs->attr_groups;
 721         }
 722 
 723         ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
 724         if (ret) {
 725                 perf_ibs->pcpu = NULL;
 726                 free_percpu(pcpu);
 727         }
 728 
 729         return ret;
 730 }
 731 
 732 static __init void perf_event_ibs_init(void)
 733 {
 734         struct attribute **attr = ibs_op_format_attrs;
 735 
 736         perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
 737 
 738         if (ibs_caps & IBS_CAPS_OPCNT) {
 739                 perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
 740                 *attr++ = &format_attr_cnt_ctl.attr;
 741         }
 742         perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
 743 
 744         register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
 745         pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps);
 746 }
 747 
 748 #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
 749 
 750 static __init void perf_event_ibs_init(void) { }
 751 
 752 #endif
 753 
 754 /* IBS - apic initialization, for perf and oprofile */
 755 
 756 static __init u32 __get_ibs_caps(void)
 757 {
 758         u32 caps;
 759         unsigned int max_level;
 760 
 761         if (!boot_cpu_has(X86_FEATURE_IBS))
 762                 return 0;
 763 
 764         /* check IBS cpuid feature flags */
 765         max_level = cpuid_eax(0x80000000);
 766         if (max_level < IBS_CPUID_FEATURES)
 767                 return IBS_CAPS_DEFAULT;
 768 
 769         caps = cpuid_eax(IBS_CPUID_FEATURES);
 770         if (!(caps & IBS_CAPS_AVAIL))
 771                 /* cpuid flags not valid */
 772                 return IBS_CAPS_DEFAULT;
 773 
 774         return caps;
 775 }
 776 
 777 u32 get_ibs_caps(void)
 778 {
 779         return ibs_caps;
 780 }
 781 
 782 EXPORT_SYMBOL(get_ibs_caps);
 783 
 784 static inline int get_eilvt(int offset)
 785 {
 786         return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
 787 }
 788 
 789 static inline int put_eilvt(int offset)
 790 {
 791         return !setup_APIC_eilvt(offset, 0, 0, 1);
 792 }
 793 
 794 /*
 795  * Check and reserve APIC extended interrupt LVT offset for IBS if available.
 796  */
 797 static inline int ibs_eilvt_valid(void)
 798 {
 799         int offset;
 800         u64 val;
 801         int valid = 0;
 802 
 803         preempt_disable();
 804 
 805         rdmsrl(MSR_AMD64_IBSCTL, val);
 806         offset = val & IBSCTL_LVT_OFFSET_MASK;
 807 
 808         if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
 809                 pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
 810                        smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
 811                 goto out;
 812         }
 813 
 814         if (!get_eilvt(offset)) {
 815                 pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
 816                        smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
 817                 goto out;
 818         }
 819 
 820         valid = 1;
 821 out:
 822         preempt_enable();
 823 
 824         return valid;
 825 }
 826 
 827 static int setup_ibs_ctl(int ibs_eilvt_off)
 828 {
 829         struct pci_dev *cpu_cfg;
 830         int nodes;
 831         u32 value = 0;
 832 
 833         nodes = 0;
 834         cpu_cfg = NULL;
 835         do {
 836                 cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
 837                                          PCI_DEVICE_ID_AMD_10H_NB_MISC,
 838                                          cpu_cfg);
 839                 if (!cpu_cfg)
 840                         break;
 841                 ++nodes;
 842                 pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
 843                                        | IBSCTL_LVT_OFFSET_VALID);
 844                 pci_read_config_dword(cpu_cfg, IBSCTL, &value);
 845                 if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
 846                         pci_dev_put(cpu_cfg);
 847                         pr_debug("Failed to setup IBS LVT offset, IBSCTL = 0x%08x\n",
 848                                  value);
 849                         return -EINVAL;
 850                 }
 851         } while (1);
 852 
 853         if (!nodes) {
 854                 pr_debug("No CPU node configured for IBS\n");
 855                 return -ENODEV;
 856         }
 857 
 858         return 0;
 859 }
 860 
 861 /*
 862  * This runs only on the current cpu. We try to find an LVT offset and
 863  * setup the local APIC. For this we must disable preemption. On
 864  * success we initialize all nodes with this offset. This updates then
 865  * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
 866  * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
 867  * is using the new offset.
 868  */
 869 static void force_ibs_eilvt_setup(void)
 870 {
 871         int offset;
 872         int ret;
 873 
 874         preempt_disable();
 875         /* find the next free available EILVT entry, skip offset 0 */
 876         for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
 877                 if (get_eilvt(offset))
 878                         break;
 879         }
 880         preempt_enable();
 881 
 882         if (offset == APIC_EILVT_NR_MAX) {
 883                 pr_debug("No EILVT entry available\n");
 884                 return;
 885         }
 886 
 887         ret = setup_ibs_ctl(offset);
 888         if (ret)
 889                 goto out;
 890 
 891         if (!ibs_eilvt_valid())
 892                 goto out;
 893 
 894         pr_info("LVT offset %d assigned\n", offset);
 895 
 896         return;
 897 out:
 898         preempt_disable();
 899         put_eilvt(offset);
 900         preempt_enable();
 901         return;
 902 }
 903 
 904 static void ibs_eilvt_setup(void)
 905 {
 906         /*
 907          * Force LVT offset assignment for family 10h: The offsets are
 908          * not assigned by the BIOS for this family, so the OS is
 909          * responsible for doing it. If the OS assignment fails, fall
 910          * back to BIOS settings and try to setup this.
 911          */
 912         if (boot_cpu_data.x86 == 0x10)
 913                 force_ibs_eilvt_setup();
 914 }
 915 
 916 static inline int get_ibs_lvt_offset(void)
 917 {
 918         u64 val;
 919 
 920         rdmsrl(MSR_AMD64_IBSCTL, val);
 921         if (!(val & IBSCTL_LVT_OFFSET_VALID))
 922                 return -EINVAL;
 923 
 924         return val & IBSCTL_LVT_OFFSET_MASK;
 925 }
 926 
 927 static void setup_APIC_ibs(void)
 928 {
 929         int offset;
 930 
 931         offset = get_ibs_lvt_offset();
 932         if (offset < 0)
 933                 goto failed;
 934 
 935         if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
 936                 return;
 937 failed:
 938         pr_warn("perf: IBS APIC setup failed on cpu #%d\n",
 939                 smp_processor_id());
 940 }
 941 
 942 static void clear_APIC_ibs(void)
 943 {
 944         int offset;
 945 
 946         offset = get_ibs_lvt_offset();
 947         if (offset >= 0)
 948                 setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
 949 }
 950 
 951 static int x86_pmu_amd_ibs_starting_cpu(unsigned int cpu)
 952 {
 953         setup_APIC_ibs();
 954         return 0;
 955 }
 956 
 957 #ifdef CONFIG_PM
 958 
 959 static int perf_ibs_suspend(void)
 960 {
 961         clear_APIC_ibs();
 962         return 0;
 963 }
 964 
 965 static void perf_ibs_resume(void)
 966 {
 967         ibs_eilvt_setup();
 968         setup_APIC_ibs();
 969 }
 970 
 971 static struct syscore_ops perf_ibs_syscore_ops = {
 972         .resume         = perf_ibs_resume,
 973         .suspend        = perf_ibs_suspend,
 974 };
 975 
 976 static void perf_ibs_pm_init(void)
 977 {
 978         register_syscore_ops(&perf_ibs_syscore_ops);
 979 }
 980 
 981 #else
 982 
 983 static inline void perf_ibs_pm_init(void) { }
 984 
 985 #endif
 986 
 987 static int x86_pmu_amd_ibs_dying_cpu(unsigned int cpu)
 988 {
 989         clear_APIC_ibs();
 990         return 0;
 991 }
 992 
 993 static __init int amd_ibs_init(void)
 994 {
 995         u32 caps;
 996 
 997         caps = __get_ibs_caps();
 998         if (!caps)
 999                 return -ENODEV; /* ibs not supported by the cpu */
1000 
1001         ibs_eilvt_setup();
1002 
1003         if (!ibs_eilvt_valid())
1004                 return -EINVAL;
1005 
1006         perf_ibs_pm_init();
1007 
1008         ibs_caps = caps;
1009         /* make ibs_caps visible to other cpus: */
1010         smp_mb();
1011         /*
1012          * x86_pmu_amd_ibs_starting_cpu will be called from core on
1013          * all online cpus.
1014          */
1015         cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_IBS_STARTING,
1016                           "perf/x86/amd/ibs:starting",
1017                           x86_pmu_amd_ibs_starting_cpu,
1018                           x86_pmu_amd_ibs_dying_cpu);
1019 
1020         perf_event_ibs_init();
1021 
1022         return 0;
1023 }
1024 
1025 /* Since we need the pci subsystem to init ibs we can't do this earlier: */
1026 device_initcall(amd_ibs_init);

/* [<][>][^][v][top][bottom][index][help] */