1#include "perf_event_intel_uncore.h" 2 3static struct intel_uncore_type *empty_uncore[] = { NULL, }; 4struct intel_uncore_type **uncore_msr_uncores = empty_uncore; 5struct intel_uncore_type **uncore_pci_uncores = empty_uncore; 6 7static bool pcidrv_registered; 8struct pci_driver *uncore_pci_driver; 9/* pci bus to socket mapping */ 10DEFINE_RAW_SPINLOCK(pci2phy_map_lock); 11struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head); 12struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX]; 13 14static DEFINE_RAW_SPINLOCK(uncore_box_lock); 15/* mask of cpus that collect uncore events */ 16static cpumask_t uncore_cpu_mask; 17 18/* constraint for the fixed counter */ 19static struct event_constraint uncore_constraint_fixed = 20 EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL); 21struct event_constraint uncore_constraint_empty = 22 EVENT_CONSTRAINT(0, 0, 0); 23 24int uncore_pcibus_to_physid(struct pci_bus *bus) 25{ 26 struct pci2phy_map *map; 27 int phys_id = -1; 28 29 raw_spin_lock(&pci2phy_map_lock); 30 list_for_each_entry(map, &pci2phy_map_head, list) { 31 if (map->segment == pci_domain_nr(bus)) { 32 phys_id = map->pbus_to_physid[bus->number]; 33 break; 34 } 35 } 36 raw_spin_unlock(&pci2phy_map_lock); 37 38 return phys_id; 39} 40 41struct pci2phy_map *__find_pci2phy_map(int segment) 42{ 43 struct pci2phy_map *map, *alloc = NULL; 44 int i; 45 46 lockdep_assert_held(&pci2phy_map_lock); 47 48lookup: 49 list_for_each_entry(map, &pci2phy_map_head, list) { 50 if (map->segment == segment) 51 goto end; 52 } 53 54 if (!alloc) { 55 raw_spin_unlock(&pci2phy_map_lock); 56 alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL); 57 raw_spin_lock(&pci2phy_map_lock); 58 59 if (!alloc) 60 return NULL; 61 62 goto lookup; 63 } 64 65 map = alloc; 66 alloc = NULL; 67 map->segment = segment; 68 for (i = 0; i < 256; i++) 69 map->pbus_to_physid[i] = -1; 70 list_add_tail(&map->list, &pci2phy_map_head); 71 72end: 73 kfree(alloc); 74 return map; 75} 76 77ssize_t uncore_event_show(struct kobject *kobj, 78 struct kobj_attribute *attr, char *buf) 79{ 80 struct uncore_event_desc *event = 81 container_of(attr, struct uncore_event_desc, attr); 82 return sprintf(buf, "%s", event->config); 83} 84 85struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) 86{ 87 return container_of(event->pmu, struct intel_uncore_pmu, pmu); 88} 89 90struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) 91{ 92 struct intel_uncore_box *box; 93 94 box = *per_cpu_ptr(pmu->box, cpu); 95 if (box) 96 return box; 97 98 raw_spin_lock(&uncore_box_lock); 99 /* Recheck in lock to handle races. */ 100 if (*per_cpu_ptr(pmu->box, cpu)) 101 goto out; 102 list_for_each_entry(box, &pmu->box_list, list) { 103 if (box->phys_id == topology_physical_package_id(cpu)) { 104 atomic_inc(&box->refcnt); 105 *per_cpu_ptr(pmu->box, cpu) = box; 106 break; 107 } 108 } 109out: 110 raw_spin_unlock(&uncore_box_lock); 111 112 return *per_cpu_ptr(pmu->box, cpu); 113} 114 115struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) 116{ 117 /* 118 * perf core schedules event on the basis of cpu, uncore events are 119 * collected by one of the cpus inside a physical package. 120 */ 121 return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id()); 122} 123 124u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) 125{ 126 u64 count; 127 128 rdmsrl(event->hw.event_base, count); 129 130 return count; 131} 132 133/* 134 * generic get constraint function for shared match/mask registers. 135 */ 136struct event_constraint * 137uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event) 138{ 139 struct intel_uncore_extra_reg *er; 140 struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; 141 struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; 142 unsigned long flags; 143 bool ok = false; 144 145 /* 146 * reg->alloc can be set due to existing state, so for fake box we 147 * need to ignore this, otherwise we might fail to allocate proper 148 * fake state for this extra reg constraint. 149 */ 150 if (reg1->idx == EXTRA_REG_NONE || 151 (!uncore_box_is_fake(box) && reg1->alloc)) 152 return NULL; 153 154 er = &box->shared_regs[reg1->idx]; 155 raw_spin_lock_irqsave(&er->lock, flags); 156 if (!atomic_read(&er->ref) || 157 (er->config1 == reg1->config && er->config2 == reg2->config)) { 158 atomic_inc(&er->ref); 159 er->config1 = reg1->config; 160 er->config2 = reg2->config; 161 ok = true; 162 } 163 raw_spin_unlock_irqrestore(&er->lock, flags); 164 165 if (ok) { 166 if (!uncore_box_is_fake(box)) 167 reg1->alloc = 1; 168 return NULL; 169 } 170 171 return &uncore_constraint_empty; 172} 173 174void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event) 175{ 176 struct intel_uncore_extra_reg *er; 177 struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; 178 179 /* 180 * Only put constraint if extra reg was actually allocated. Also 181 * takes care of event which do not use an extra shared reg. 182 * 183 * Also, if this is a fake box we shouldn't touch any event state 184 * (reg->alloc) and we don't care about leaving inconsistent box 185 * state either since it will be thrown out. 186 */ 187 if (uncore_box_is_fake(box) || !reg1->alloc) 188 return; 189 190 er = &box->shared_regs[reg1->idx]; 191 atomic_dec(&er->ref); 192 reg1->alloc = 0; 193} 194 195u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx) 196{ 197 struct intel_uncore_extra_reg *er; 198 unsigned long flags; 199 u64 config; 200 201 er = &box->shared_regs[idx]; 202 203 raw_spin_lock_irqsave(&er->lock, flags); 204 config = er->config; 205 raw_spin_unlock_irqrestore(&er->lock, flags); 206 207 return config; 208} 209 210static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx) 211{ 212 struct hw_perf_event *hwc = &event->hw; 213 214 hwc->idx = idx; 215 hwc->last_tag = ++box->tags[idx]; 216 217 if (hwc->idx == UNCORE_PMC_IDX_FIXED) { 218 hwc->event_base = uncore_fixed_ctr(box); 219 hwc->config_base = uncore_fixed_ctl(box); 220 return; 221 } 222 223 hwc->config_base = uncore_event_ctl(box, hwc->idx); 224 hwc->event_base = uncore_perf_ctr(box, hwc->idx); 225} 226 227void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event) 228{ 229 u64 prev_count, new_count, delta; 230 int shift; 231 232 if (event->hw.idx >= UNCORE_PMC_IDX_FIXED) 233 shift = 64 - uncore_fixed_ctr_bits(box); 234 else 235 shift = 64 - uncore_perf_ctr_bits(box); 236 237 /* the hrtimer might modify the previous event value */ 238again: 239 prev_count = local64_read(&event->hw.prev_count); 240 new_count = uncore_read_counter(box, event); 241 if (local64_xchg(&event->hw.prev_count, new_count) != prev_count) 242 goto again; 243 244 delta = (new_count << shift) - (prev_count << shift); 245 delta >>= shift; 246 247 local64_add(delta, &event->count); 248} 249 250/* 251 * The overflow interrupt is unavailable for SandyBridge-EP, is broken 252 * for SandyBridge. So we use hrtimer to periodically poll the counter 253 * to avoid overflow. 254 */ 255static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) 256{ 257 struct intel_uncore_box *box; 258 struct perf_event *event; 259 unsigned long flags; 260 int bit; 261 262 box = container_of(hrtimer, struct intel_uncore_box, hrtimer); 263 if (!box->n_active || box->cpu != smp_processor_id()) 264 return HRTIMER_NORESTART; 265 /* 266 * disable local interrupt to prevent uncore_pmu_event_start/stop 267 * to interrupt the update process 268 */ 269 local_irq_save(flags); 270 271 /* 272 * handle boxes with an active event list as opposed to active 273 * counters 274 */ 275 list_for_each_entry(event, &box->active_list, active_entry) { 276 uncore_perf_event_update(box, event); 277 } 278 279 for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX) 280 uncore_perf_event_update(box, box->events[bit]); 281 282 local_irq_restore(flags); 283 284 hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration)); 285 return HRTIMER_RESTART; 286} 287 288void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) 289{ 290 hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration), 291 HRTIMER_MODE_REL_PINNED); 292} 293 294void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box) 295{ 296 hrtimer_cancel(&box->hrtimer); 297} 298 299static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) 300{ 301 hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 302 box->hrtimer.function = uncore_pmu_hrtimer; 303} 304 305static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node) 306{ 307 struct intel_uncore_box *box; 308 int i, size; 309 310 size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg); 311 312 box = kzalloc_node(size, GFP_KERNEL, node); 313 if (!box) 314 return NULL; 315 316 for (i = 0; i < type->num_shared_regs; i++) 317 raw_spin_lock_init(&box->shared_regs[i].lock); 318 319 uncore_pmu_init_hrtimer(box); 320 atomic_set(&box->refcnt, 1); 321 box->cpu = -1; 322 box->phys_id = -1; 323 324 /* set default hrtimer timeout */ 325 box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL; 326 327 INIT_LIST_HEAD(&box->active_list); 328 329 return box; 330} 331 332/* 333 * Using uncore_pmu_event_init pmu event_init callback 334 * as a detection point for uncore events. 335 */ 336static int uncore_pmu_event_init(struct perf_event *event); 337 338static bool is_uncore_event(struct perf_event *event) 339{ 340 return event->pmu->event_init == uncore_pmu_event_init; 341} 342 343static int 344uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp) 345{ 346 struct perf_event *event; 347 int n, max_count; 348 349 max_count = box->pmu->type->num_counters; 350 if (box->pmu->type->fixed_ctl) 351 max_count++; 352 353 if (box->n_events >= max_count) 354 return -EINVAL; 355 356 n = box->n_events; 357 358 if (is_uncore_event(leader)) { 359 box->event_list[n] = leader; 360 n++; 361 } 362 363 if (!dogrp) 364 return n; 365 366 list_for_each_entry(event, &leader->sibling_list, group_entry) { 367 if (!is_uncore_event(event) || 368 event->state <= PERF_EVENT_STATE_OFF) 369 continue; 370 371 if (n >= max_count) 372 return -EINVAL; 373 374 box->event_list[n] = event; 375 n++; 376 } 377 return n; 378} 379 380static struct event_constraint * 381uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event) 382{ 383 struct intel_uncore_type *type = box->pmu->type; 384 struct event_constraint *c; 385 386 if (type->ops->get_constraint) { 387 c = type->ops->get_constraint(box, event); 388 if (c) 389 return c; 390 } 391 392 if (event->attr.config == UNCORE_FIXED_EVENT) 393 return &uncore_constraint_fixed; 394 395 if (type->constraints) { 396 for_each_event_constraint(c, type->constraints) { 397 if ((event->hw.config & c->cmask) == c->code) 398 return c; 399 } 400 } 401 402 return &type->unconstrainted; 403} 404 405static void uncore_put_event_constraint(struct intel_uncore_box *box, struct perf_event *event) 406{ 407 if (box->pmu->type->ops->put_constraint) 408 box->pmu->type->ops->put_constraint(box, event); 409} 410 411static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n) 412{ 413 unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; 414 struct event_constraint *c; 415 int i, wmin, wmax, ret = 0; 416 struct hw_perf_event *hwc; 417 418 bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); 419 420 for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { 421 c = uncore_get_event_constraint(box, box->event_list[i]); 422 box->event_constraint[i] = c; 423 wmin = min(wmin, c->weight); 424 wmax = max(wmax, c->weight); 425 } 426 427 /* fastpath, try to reuse previous register */ 428 for (i = 0; i < n; i++) { 429 hwc = &box->event_list[i]->hw; 430 c = box->event_constraint[i]; 431 432 /* never assigned */ 433 if (hwc->idx == -1) 434 break; 435 436 /* constraint still honored */ 437 if (!test_bit(hwc->idx, c->idxmsk)) 438 break; 439 440 /* not already used */ 441 if (test_bit(hwc->idx, used_mask)) 442 break; 443 444 __set_bit(hwc->idx, used_mask); 445 if (assign) 446 assign[i] = hwc->idx; 447 } 448 /* slow path */ 449 if (i != n) 450 ret = perf_assign_events(box->event_constraint, n, 451 wmin, wmax, n, assign); 452 453 if (!assign || ret) { 454 for (i = 0; i < n; i++) 455 uncore_put_event_constraint(box, box->event_list[i]); 456 } 457 return ret ? -EINVAL : 0; 458} 459 460static void uncore_pmu_event_start(struct perf_event *event, int flags) 461{ 462 struct intel_uncore_box *box = uncore_event_to_box(event); 463 int idx = event->hw.idx; 464 465 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) 466 return; 467 468 if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX)) 469 return; 470 471 event->hw.state = 0; 472 box->events[idx] = event; 473 box->n_active++; 474 __set_bit(idx, box->active_mask); 475 476 local64_set(&event->hw.prev_count, uncore_read_counter(box, event)); 477 uncore_enable_event(box, event); 478 479 if (box->n_active == 1) { 480 uncore_enable_box(box); 481 uncore_pmu_start_hrtimer(box); 482 } 483} 484 485static void uncore_pmu_event_stop(struct perf_event *event, int flags) 486{ 487 struct intel_uncore_box *box = uncore_event_to_box(event); 488 struct hw_perf_event *hwc = &event->hw; 489 490 if (__test_and_clear_bit(hwc->idx, box->active_mask)) { 491 uncore_disable_event(box, event); 492 box->n_active--; 493 box->events[hwc->idx] = NULL; 494 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); 495 hwc->state |= PERF_HES_STOPPED; 496 497 if (box->n_active == 0) { 498 uncore_disable_box(box); 499 uncore_pmu_cancel_hrtimer(box); 500 } 501 } 502 503 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { 504 /* 505 * Drain the remaining delta count out of a event 506 * that we are disabling: 507 */ 508 uncore_perf_event_update(box, event); 509 hwc->state |= PERF_HES_UPTODATE; 510 } 511} 512 513static int uncore_pmu_event_add(struct perf_event *event, int flags) 514{ 515 struct intel_uncore_box *box = uncore_event_to_box(event); 516 struct hw_perf_event *hwc = &event->hw; 517 int assign[UNCORE_PMC_IDX_MAX]; 518 int i, n, ret; 519 520 if (!box) 521 return -ENODEV; 522 523 ret = n = uncore_collect_events(box, event, false); 524 if (ret < 0) 525 return ret; 526 527 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 528 if (!(flags & PERF_EF_START)) 529 hwc->state |= PERF_HES_ARCH; 530 531 ret = uncore_assign_events(box, assign, n); 532 if (ret) 533 return ret; 534 535 /* save events moving to new counters */ 536 for (i = 0; i < box->n_events; i++) { 537 event = box->event_list[i]; 538 hwc = &event->hw; 539 540 if (hwc->idx == assign[i] && 541 hwc->last_tag == box->tags[assign[i]]) 542 continue; 543 /* 544 * Ensure we don't accidentally enable a stopped 545 * counter simply because we rescheduled. 546 */ 547 if (hwc->state & PERF_HES_STOPPED) 548 hwc->state |= PERF_HES_ARCH; 549 550 uncore_pmu_event_stop(event, PERF_EF_UPDATE); 551 } 552 553 /* reprogram moved events into new counters */ 554 for (i = 0; i < n; i++) { 555 event = box->event_list[i]; 556 hwc = &event->hw; 557 558 if (hwc->idx != assign[i] || 559 hwc->last_tag != box->tags[assign[i]]) 560 uncore_assign_hw_event(box, event, assign[i]); 561 else if (i < box->n_events) 562 continue; 563 564 if (hwc->state & PERF_HES_ARCH) 565 continue; 566 567 uncore_pmu_event_start(event, 0); 568 } 569 box->n_events = n; 570 571 return 0; 572} 573 574static void uncore_pmu_event_del(struct perf_event *event, int flags) 575{ 576 struct intel_uncore_box *box = uncore_event_to_box(event); 577 int i; 578 579 uncore_pmu_event_stop(event, PERF_EF_UPDATE); 580 581 for (i = 0; i < box->n_events; i++) { 582 if (event == box->event_list[i]) { 583 uncore_put_event_constraint(box, event); 584 585 while (++i < box->n_events) 586 box->event_list[i - 1] = box->event_list[i]; 587 588 --box->n_events; 589 break; 590 } 591 } 592 593 event->hw.idx = -1; 594 event->hw.last_tag = ~0ULL; 595} 596 597void uncore_pmu_event_read(struct perf_event *event) 598{ 599 struct intel_uncore_box *box = uncore_event_to_box(event); 600 uncore_perf_event_update(box, event); 601} 602 603/* 604 * validation ensures the group can be loaded onto the 605 * PMU if it was the only group available. 606 */ 607static int uncore_validate_group(struct intel_uncore_pmu *pmu, 608 struct perf_event *event) 609{ 610 struct perf_event *leader = event->group_leader; 611 struct intel_uncore_box *fake_box; 612 int ret = -EINVAL, n; 613 614 fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE); 615 if (!fake_box) 616 return -ENOMEM; 617 618 fake_box->pmu = pmu; 619 /* 620 * the event is not yet connected with its 621 * siblings therefore we must first collect 622 * existing siblings, then add the new event 623 * before we can simulate the scheduling 624 */ 625 n = uncore_collect_events(fake_box, leader, true); 626 if (n < 0) 627 goto out; 628 629 fake_box->n_events = n; 630 n = uncore_collect_events(fake_box, event, false); 631 if (n < 0) 632 goto out; 633 634 fake_box->n_events = n; 635 636 ret = uncore_assign_events(fake_box, NULL, n); 637out: 638 kfree(fake_box); 639 return ret; 640} 641 642static int uncore_pmu_event_init(struct perf_event *event) 643{ 644 struct intel_uncore_pmu *pmu; 645 struct intel_uncore_box *box; 646 struct hw_perf_event *hwc = &event->hw; 647 int ret; 648 649 if (event->attr.type != event->pmu->type) 650 return -ENOENT; 651 652 pmu = uncore_event_to_pmu(event); 653 /* no device found for this pmu */ 654 if (pmu->func_id < 0) 655 return -ENOENT; 656 657 /* 658 * Uncore PMU does measure at all privilege level all the time. 659 * So it doesn't make sense to specify any exclude bits. 660 */ 661 if (event->attr.exclude_user || event->attr.exclude_kernel || 662 event->attr.exclude_hv || event->attr.exclude_idle) 663 return -EINVAL; 664 665 /* Sampling not supported yet */ 666 if (hwc->sample_period) 667 return -EINVAL; 668 669 /* 670 * Place all uncore events for a particular physical package 671 * onto a single cpu 672 */ 673 if (event->cpu < 0) 674 return -EINVAL; 675 box = uncore_pmu_to_box(pmu, event->cpu); 676 if (!box || box->cpu < 0) 677 return -EINVAL; 678 event->cpu = box->cpu; 679 680 event->hw.idx = -1; 681 event->hw.last_tag = ~0ULL; 682 event->hw.extra_reg.idx = EXTRA_REG_NONE; 683 event->hw.branch_reg.idx = EXTRA_REG_NONE; 684 685 if (event->attr.config == UNCORE_FIXED_EVENT) { 686 /* no fixed counter */ 687 if (!pmu->type->fixed_ctl) 688 return -EINVAL; 689 /* 690 * if there is only one fixed counter, only the first pmu 691 * can access the fixed counter 692 */ 693 if (pmu->type->single_fixed && pmu->pmu_idx > 0) 694 return -EINVAL; 695 696 /* fixed counters have event field hardcoded to zero */ 697 hwc->config = 0ULL; 698 } else { 699 hwc->config = event->attr.config & pmu->type->event_mask; 700 if (pmu->type->ops->hw_config) { 701 ret = pmu->type->ops->hw_config(box, event); 702 if (ret) 703 return ret; 704 } 705 } 706 707 if (event->group_leader != event) 708 ret = uncore_validate_group(pmu, event); 709 else 710 ret = 0; 711 712 return ret; 713} 714 715static ssize_t uncore_get_attr_cpumask(struct device *dev, 716 struct device_attribute *attr, char *buf) 717{ 718 return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask); 719} 720 721static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL); 722 723static struct attribute *uncore_pmu_attrs[] = { 724 &dev_attr_cpumask.attr, 725 NULL, 726}; 727 728static struct attribute_group uncore_pmu_attr_group = { 729 .attrs = uncore_pmu_attrs, 730}; 731 732static int uncore_pmu_register(struct intel_uncore_pmu *pmu) 733{ 734 int ret; 735 736 if (!pmu->type->pmu) { 737 pmu->pmu = (struct pmu) { 738 .attr_groups = pmu->type->attr_groups, 739 .task_ctx_nr = perf_invalid_context, 740 .event_init = uncore_pmu_event_init, 741 .add = uncore_pmu_event_add, 742 .del = uncore_pmu_event_del, 743 .start = uncore_pmu_event_start, 744 .stop = uncore_pmu_event_stop, 745 .read = uncore_pmu_event_read, 746 }; 747 } else { 748 pmu->pmu = *pmu->type->pmu; 749 pmu->pmu.attr_groups = pmu->type->attr_groups; 750 } 751 752 if (pmu->type->num_boxes == 1) { 753 if (strlen(pmu->type->name) > 0) 754 sprintf(pmu->name, "uncore_%s", pmu->type->name); 755 else 756 sprintf(pmu->name, "uncore"); 757 } else { 758 sprintf(pmu->name, "uncore_%s_%d", pmu->type->name, 759 pmu->pmu_idx); 760 } 761 762 ret = perf_pmu_register(&pmu->pmu, pmu->name, -1); 763 return ret; 764} 765 766static void __init uncore_type_exit(struct intel_uncore_type *type) 767{ 768 int i; 769 770 for (i = 0; i < type->num_boxes; i++) 771 free_percpu(type->pmus[i].box); 772 kfree(type->pmus); 773 type->pmus = NULL; 774 kfree(type->events_group); 775 type->events_group = NULL; 776} 777 778static void __init uncore_types_exit(struct intel_uncore_type **types) 779{ 780 int i; 781 for (i = 0; types[i]; i++) 782 uncore_type_exit(types[i]); 783} 784 785static int __init uncore_type_init(struct intel_uncore_type *type) 786{ 787 struct intel_uncore_pmu *pmus; 788 struct attribute_group *attr_group; 789 struct attribute **attrs; 790 int i, j; 791 792 pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL); 793 if (!pmus) 794 return -ENOMEM; 795 796 type->pmus = pmus; 797 798 type->unconstrainted = (struct event_constraint) 799 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, 800 0, type->num_counters, 0, 0); 801 802 for (i = 0; i < type->num_boxes; i++) { 803 pmus[i].func_id = -1; 804 pmus[i].pmu_idx = i; 805 pmus[i].type = type; 806 INIT_LIST_HEAD(&pmus[i].box_list); 807 pmus[i].box = alloc_percpu(struct intel_uncore_box *); 808 if (!pmus[i].box) 809 goto fail; 810 } 811 812 if (type->event_descs) { 813 i = 0; 814 while (type->event_descs[i].attr.attr.name) 815 i++; 816 817 attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) + 818 sizeof(*attr_group), GFP_KERNEL); 819 if (!attr_group) 820 goto fail; 821 822 attrs = (struct attribute **)(attr_group + 1); 823 attr_group->name = "events"; 824 attr_group->attrs = attrs; 825 826 for (j = 0; j < i; j++) 827 attrs[j] = &type->event_descs[j].attr.attr; 828 829 type->events_group = attr_group; 830 } 831 832 type->pmu_group = &uncore_pmu_attr_group; 833 return 0; 834fail: 835 uncore_type_exit(type); 836 return -ENOMEM; 837} 838 839static int __init uncore_types_init(struct intel_uncore_type **types) 840{ 841 int i, ret; 842 843 for (i = 0; types[i]; i++) { 844 ret = uncore_type_init(types[i]); 845 if (ret) 846 goto fail; 847 } 848 return 0; 849fail: 850 while (--i >= 0) 851 uncore_type_exit(types[i]); 852 return ret; 853} 854 855/* 856 * add a pci uncore device 857 */ 858static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) 859{ 860 struct intel_uncore_pmu *pmu; 861 struct intel_uncore_box *box; 862 struct intel_uncore_type *type; 863 int phys_id; 864 bool first_box = false; 865 866 phys_id = uncore_pcibus_to_physid(pdev->bus); 867 if (phys_id < 0) 868 return -ENODEV; 869 870 if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) { 871 int idx = UNCORE_PCI_DEV_IDX(id->driver_data); 872 uncore_extra_pci_dev[phys_id][idx] = pdev; 873 pci_set_drvdata(pdev, NULL); 874 return 0; 875 } 876 877 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; 878 box = uncore_alloc_box(type, NUMA_NO_NODE); 879 if (!box) 880 return -ENOMEM; 881 882 /* 883 * for performance monitoring unit with multiple boxes, 884 * each box has a different function id. 885 */ 886 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; 887 if (pmu->func_id < 0) 888 pmu->func_id = pdev->devfn; 889 else 890 WARN_ON_ONCE(pmu->func_id != pdev->devfn); 891 892 box->phys_id = phys_id; 893 box->pci_dev = pdev; 894 box->pmu = pmu; 895 uncore_box_init(box); 896 pci_set_drvdata(pdev, box); 897 898 raw_spin_lock(&uncore_box_lock); 899 if (list_empty(&pmu->box_list)) 900 first_box = true; 901 list_add_tail(&box->list, &pmu->box_list); 902 raw_spin_unlock(&uncore_box_lock); 903 904 if (first_box) 905 uncore_pmu_register(pmu); 906 return 0; 907} 908 909static void uncore_pci_remove(struct pci_dev *pdev) 910{ 911 struct intel_uncore_box *box = pci_get_drvdata(pdev); 912 struct intel_uncore_pmu *pmu; 913 int i, cpu, phys_id; 914 bool last_box = false; 915 916 phys_id = uncore_pcibus_to_physid(pdev->bus); 917 box = pci_get_drvdata(pdev); 918 if (!box) { 919 for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { 920 if (uncore_extra_pci_dev[phys_id][i] == pdev) { 921 uncore_extra_pci_dev[phys_id][i] = NULL; 922 break; 923 } 924 } 925 WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX); 926 return; 927 } 928 929 pmu = box->pmu; 930 if (WARN_ON_ONCE(phys_id != box->phys_id)) 931 return; 932 933 pci_set_drvdata(pdev, NULL); 934 935 raw_spin_lock(&uncore_box_lock); 936 list_del(&box->list); 937 if (list_empty(&pmu->box_list)) 938 last_box = true; 939 raw_spin_unlock(&uncore_box_lock); 940 941 for_each_possible_cpu(cpu) { 942 if (*per_cpu_ptr(pmu->box, cpu) == box) { 943 *per_cpu_ptr(pmu->box, cpu) = NULL; 944 atomic_dec(&box->refcnt); 945 } 946 } 947 948 WARN_ON_ONCE(atomic_read(&box->refcnt) != 1); 949 kfree(box); 950 951 if (last_box) 952 perf_pmu_unregister(&pmu->pmu); 953} 954 955static int __init uncore_pci_init(void) 956{ 957 int ret; 958 959 switch (boot_cpu_data.x86_model) { 960 case 45: /* Sandy Bridge-EP */ 961 ret = snbep_uncore_pci_init(); 962 break; 963 case 62: /* Ivy Bridge-EP */ 964 ret = ivbep_uncore_pci_init(); 965 break; 966 case 63: /* Haswell-EP */ 967 ret = hswep_uncore_pci_init(); 968 break; 969 case 86: /* BDX-DE */ 970 ret = bdx_uncore_pci_init(); 971 break; 972 case 42: /* Sandy Bridge */ 973 ret = snb_uncore_pci_init(); 974 break; 975 case 58: /* Ivy Bridge */ 976 ret = ivb_uncore_pci_init(); 977 break; 978 case 60: /* Haswell */ 979 case 69: /* Haswell Celeron */ 980 ret = hsw_uncore_pci_init(); 981 break; 982 case 61: /* Broadwell */ 983 ret = bdw_uncore_pci_init(); 984 break; 985 default: 986 return 0; 987 } 988 989 if (ret) 990 return ret; 991 992 ret = uncore_types_init(uncore_pci_uncores); 993 if (ret) 994 return ret; 995 996 uncore_pci_driver->probe = uncore_pci_probe; 997 uncore_pci_driver->remove = uncore_pci_remove; 998 999 ret = pci_register_driver(uncore_pci_driver); 1000 if (ret == 0) 1001 pcidrv_registered = true; 1002 else 1003 uncore_types_exit(uncore_pci_uncores); 1004 1005 return ret; 1006} 1007 1008static void __init uncore_pci_exit(void) 1009{ 1010 if (pcidrv_registered) { 1011 pcidrv_registered = false; 1012 pci_unregister_driver(uncore_pci_driver); 1013 uncore_types_exit(uncore_pci_uncores); 1014 } 1015} 1016 1017/* CPU hot plug/unplug are serialized by cpu_add_remove_lock mutex */ 1018static LIST_HEAD(boxes_to_free); 1019 1020static void uncore_kfree_boxes(void) 1021{ 1022 struct intel_uncore_box *box; 1023 1024 while (!list_empty(&boxes_to_free)) { 1025 box = list_entry(boxes_to_free.next, 1026 struct intel_uncore_box, list); 1027 list_del(&box->list); 1028 kfree(box); 1029 } 1030} 1031 1032static void uncore_cpu_dying(int cpu) 1033{ 1034 struct intel_uncore_type *type; 1035 struct intel_uncore_pmu *pmu; 1036 struct intel_uncore_box *box; 1037 int i, j; 1038 1039 for (i = 0; uncore_msr_uncores[i]; i++) { 1040 type = uncore_msr_uncores[i]; 1041 for (j = 0; j < type->num_boxes; j++) { 1042 pmu = &type->pmus[j]; 1043 box = *per_cpu_ptr(pmu->box, cpu); 1044 *per_cpu_ptr(pmu->box, cpu) = NULL; 1045 if (box && atomic_dec_and_test(&box->refcnt)) 1046 list_add(&box->list, &boxes_to_free); 1047 } 1048 } 1049} 1050 1051static int uncore_cpu_starting(int cpu) 1052{ 1053 struct intel_uncore_type *type; 1054 struct intel_uncore_pmu *pmu; 1055 struct intel_uncore_box *box, *exist; 1056 int i, j, k, phys_id; 1057 1058 phys_id = topology_physical_package_id(cpu); 1059 1060 for (i = 0; uncore_msr_uncores[i]; i++) { 1061 type = uncore_msr_uncores[i]; 1062 for (j = 0; j < type->num_boxes; j++) { 1063 pmu = &type->pmus[j]; 1064 box = *per_cpu_ptr(pmu->box, cpu); 1065 /* called by uncore_cpu_init? */ 1066 if (box && box->phys_id >= 0) { 1067 uncore_box_init(box); 1068 continue; 1069 } 1070 1071 for_each_online_cpu(k) { 1072 exist = *per_cpu_ptr(pmu->box, k); 1073 if (exist && exist->phys_id == phys_id) { 1074 atomic_inc(&exist->refcnt); 1075 *per_cpu_ptr(pmu->box, cpu) = exist; 1076 if (box) { 1077 list_add(&box->list, 1078 &boxes_to_free); 1079 box = NULL; 1080 } 1081 break; 1082 } 1083 } 1084 1085 if (box) { 1086 box->phys_id = phys_id; 1087 uncore_box_init(box); 1088 } 1089 } 1090 } 1091 return 0; 1092} 1093 1094static int uncore_cpu_prepare(int cpu, int phys_id) 1095{ 1096 struct intel_uncore_type *type; 1097 struct intel_uncore_pmu *pmu; 1098 struct intel_uncore_box *box; 1099 int i, j; 1100 1101 for (i = 0; uncore_msr_uncores[i]; i++) { 1102 type = uncore_msr_uncores[i]; 1103 for (j = 0; j < type->num_boxes; j++) { 1104 pmu = &type->pmus[j]; 1105 if (pmu->func_id < 0) 1106 pmu->func_id = j; 1107 1108 box = uncore_alloc_box(type, cpu_to_node(cpu)); 1109 if (!box) 1110 return -ENOMEM; 1111 1112 box->pmu = pmu; 1113 box->phys_id = phys_id; 1114 *per_cpu_ptr(pmu->box, cpu) = box; 1115 } 1116 } 1117 return 0; 1118} 1119 1120static void 1121uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu) 1122{ 1123 struct intel_uncore_type *type; 1124 struct intel_uncore_pmu *pmu; 1125 struct intel_uncore_box *box; 1126 int i, j; 1127 1128 for (i = 0; uncores[i]; i++) { 1129 type = uncores[i]; 1130 for (j = 0; j < type->num_boxes; j++) { 1131 pmu = &type->pmus[j]; 1132 if (old_cpu < 0) 1133 box = uncore_pmu_to_box(pmu, new_cpu); 1134 else 1135 box = uncore_pmu_to_box(pmu, old_cpu); 1136 if (!box) 1137 continue; 1138 1139 if (old_cpu < 0) { 1140 WARN_ON_ONCE(box->cpu != -1); 1141 box->cpu = new_cpu; 1142 continue; 1143 } 1144 1145 WARN_ON_ONCE(box->cpu != old_cpu); 1146 if (new_cpu >= 0) { 1147 uncore_pmu_cancel_hrtimer(box); 1148 perf_pmu_migrate_context(&pmu->pmu, 1149 old_cpu, new_cpu); 1150 box->cpu = new_cpu; 1151 } else { 1152 box->cpu = -1; 1153 } 1154 } 1155 } 1156} 1157 1158static void uncore_event_exit_cpu(int cpu) 1159{ 1160 int i, phys_id, target; 1161 1162 /* if exiting cpu is used for collecting uncore events */ 1163 if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) 1164 return; 1165 1166 /* find a new cpu to collect uncore events */ 1167 phys_id = topology_physical_package_id(cpu); 1168 target = -1; 1169 for_each_online_cpu(i) { 1170 if (i == cpu) 1171 continue; 1172 if (phys_id == topology_physical_package_id(i)) { 1173 target = i; 1174 break; 1175 } 1176 } 1177 1178 /* migrate uncore events to the new cpu */ 1179 if (target >= 0) 1180 cpumask_set_cpu(target, &uncore_cpu_mask); 1181 1182 uncore_change_context(uncore_msr_uncores, cpu, target); 1183 uncore_change_context(uncore_pci_uncores, cpu, target); 1184} 1185 1186static void uncore_event_init_cpu(int cpu) 1187{ 1188 int i, phys_id; 1189 1190 phys_id = topology_physical_package_id(cpu); 1191 for_each_cpu(i, &uncore_cpu_mask) { 1192 if (phys_id == topology_physical_package_id(i)) 1193 return; 1194 } 1195 1196 cpumask_set_cpu(cpu, &uncore_cpu_mask); 1197 1198 uncore_change_context(uncore_msr_uncores, -1, cpu); 1199 uncore_change_context(uncore_pci_uncores, -1, cpu); 1200} 1201 1202static int uncore_cpu_notifier(struct notifier_block *self, 1203 unsigned long action, void *hcpu) 1204{ 1205 unsigned int cpu = (long)hcpu; 1206 1207 /* allocate/free data structure for uncore box */ 1208 switch (action & ~CPU_TASKS_FROZEN) { 1209 case CPU_UP_PREPARE: 1210 uncore_cpu_prepare(cpu, -1); 1211 break; 1212 case CPU_STARTING: 1213 uncore_cpu_starting(cpu); 1214 break; 1215 case CPU_UP_CANCELED: 1216 case CPU_DYING: 1217 uncore_cpu_dying(cpu); 1218 break; 1219 case CPU_ONLINE: 1220 case CPU_DEAD: 1221 uncore_kfree_boxes(); 1222 break; 1223 default: 1224 break; 1225 } 1226 1227 /* select the cpu that collects uncore events */ 1228 switch (action & ~CPU_TASKS_FROZEN) { 1229 case CPU_DOWN_FAILED: 1230 case CPU_STARTING: 1231 uncore_event_init_cpu(cpu); 1232 break; 1233 case CPU_DOWN_PREPARE: 1234 uncore_event_exit_cpu(cpu); 1235 break; 1236 default: 1237 break; 1238 } 1239 1240 return NOTIFY_OK; 1241} 1242 1243static struct notifier_block uncore_cpu_nb = { 1244 .notifier_call = uncore_cpu_notifier, 1245 /* 1246 * to migrate uncore events, our notifier should be executed 1247 * before perf core's notifier. 1248 */ 1249 .priority = CPU_PRI_PERF + 1, 1250}; 1251 1252static void __init uncore_cpu_setup(void *dummy) 1253{ 1254 uncore_cpu_starting(smp_processor_id()); 1255} 1256 1257static int __init uncore_cpu_init(void) 1258{ 1259 int ret; 1260 1261 switch (boot_cpu_data.x86_model) { 1262 case 26: /* Nehalem */ 1263 case 30: 1264 case 37: /* Westmere */ 1265 case 44: 1266 nhm_uncore_cpu_init(); 1267 break; 1268 case 42: /* Sandy Bridge */ 1269 case 58: /* Ivy Bridge */ 1270 case 60: /* Haswell */ 1271 case 69: /* Haswell */ 1272 case 70: /* Haswell */ 1273 case 61: /* Broadwell */ 1274 case 71: /* Broadwell */ 1275 snb_uncore_cpu_init(); 1276 break; 1277 case 45: /* Sandy Bridge-EP */ 1278 snbep_uncore_cpu_init(); 1279 break; 1280 case 46: /* Nehalem-EX */ 1281 case 47: /* Westmere-EX aka. Xeon E7 */ 1282 nhmex_uncore_cpu_init(); 1283 break; 1284 case 62: /* Ivy Bridge-EP */ 1285 ivbep_uncore_cpu_init(); 1286 break; 1287 case 63: /* Haswell-EP */ 1288 hswep_uncore_cpu_init(); 1289 break; 1290 case 86: /* BDX-DE */ 1291 bdx_uncore_cpu_init(); 1292 break; 1293 default: 1294 return 0; 1295 } 1296 1297 ret = uncore_types_init(uncore_msr_uncores); 1298 if (ret) 1299 return ret; 1300 1301 return 0; 1302} 1303 1304static int __init uncore_pmus_register(void) 1305{ 1306 struct intel_uncore_pmu *pmu; 1307 struct intel_uncore_type *type; 1308 int i, j; 1309 1310 for (i = 0; uncore_msr_uncores[i]; i++) { 1311 type = uncore_msr_uncores[i]; 1312 for (j = 0; j < type->num_boxes; j++) { 1313 pmu = &type->pmus[j]; 1314 uncore_pmu_register(pmu); 1315 } 1316 } 1317 1318 return 0; 1319} 1320 1321static void __init uncore_cpumask_init(void) 1322{ 1323 int cpu; 1324 1325 /* 1326 * ony invoke once from msr or pci init code 1327 */ 1328 if (!cpumask_empty(&uncore_cpu_mask)) 1329 return; 1330 1331 cpu_notifier_register_begin(); 1332 1333 for_each_online_cpu(cpu) { 1334 int i, phys_id = topology_physical_package_id(cpu); 1335 1336 for_each_cpu(i, &uncore_cpu_mask) { 1337 if (phys_id == topology_physical_package_id(i)) { 1338 phys_id = -1; 1339 break; 1340 } 1341 } 1342 if (phys_id < 0) 1343 continue; 1344 1345 uncore_cpu_prepare(cpu, phys_id); 1346 uncore_event_init_cpu(cpu); 1347 } 1348 on_each_cpu(uncore_cpu_setup, NULL, 1); 1349 1350 __register_cpu_notifier(&uncore_cpu_nb); 1351 1352 cpu_notifier_register_done(); 1353} 1354 1355 1356static int __init intel_uncore_init(void) 1357{ 1358 int ret; 1359 1360 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) 1361 return -ENODEV; 1362 1363 if (cpu_has_hypervisor) 1364 return -ENODEV; 1365 1366 ret = uncore_pci_init(); 1367 if (ret) 1368 goto fail; 1369 ret = uncore_cpu_init(); 1370 if (ret) { 1371 uncore_pci_exit(); 1372 goto fail; 1373 } 1374 uncore_cpumask_init(); 1375 1376 uncore_pmus_register(); 1377 return 0; 1378fail: 1379 return ret; 1380} 1381device_initcall(intel_uncore_init); 1382