1#include "perf_event_intel_uncore.h" 2 3static struct intel_uncore_type *empty_uncore[] = { NULL, }; 4struct intel_uncore_type **uncore_msr_uncores = empty_uncore; 5struct intel_uncore_type **uncore_pci_uncores = empty_uncore; 6 7static bool pcidrv_registered; 8struct pci_driver *uncore_pci_driver; 9/* pci bus to socket mapping */ 10int uncore_pcibus_to_physid[256] = { [0 ... 255] = -1, }; 11struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX]; 12 13static DEFINE_RAW_SPINLOCK(uncore_box_lock); 14/* mask of cpus that collect uncore events */ 15static cpumask_t uncore_cpu_mask; 16 17/* constraint for the fixed counter */ 18static struct event_constraint uncore_constraint_fixed = 19 EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL); 20struct event_constraint uncore_constraint_empty = 21 EVENT_CONSTRAINT(0, 0, 0); 22 23ssize_t uncore_event_show(struct kobject *kobj, 24 struct kobj_attribute *attr, char *buf) 25{ 26 struct uncore_event_desc *event = 27 container_of(attr, struct uncore_event_desc, attr); 28 return sprintf(buf, "%s", event->config); 29} 30 31struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) 32{ 33 return container_of(event->pmu, struct intel_uncore_pmu, pmu); 34} 35 36struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) 37{ 38 struct intel_uncore_box *box; 39 40 box = *per_cpu_ptr(pmu->box, cpu); 41 if (box) 42 return box; 43 44 raw_spin_lock(&uncore_box_lock); 45 /* Recheck in lock to handle races. */ 46 if (*per_cpu_ptr(pmu->box, cpu)) 47 goto out; 48 list_for_each_entry(box, &pmu->box_list, list) { 49 if (box->phys_id == topology_physical_package_id(cpu)) { 50 atomic_inc(&box->refcnt); 51 *per_cpu_ptr(pmu->box, cpu) = box; 52 break; 53 } 54 } 55out: 56 raw_spin_unlock(&uncore_box_lock); 57 58 return *per_cpu_ptr(pmu->box, cpu); 59} 60 61struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) 62{ 63 /* 64 * perf core schedules event on the basis of cpu, uncore events are 65 * collected by one of the cpus inside a physical package. 66 */ 67 return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id()); 68} 69 70u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) 71{ 72 u64 count; 73 74 rdmsrl(event->hw.event_base, count); 75 76 return count; 77} 78 79/* 80 * generic get constraint function for shared match/mask registers. 81 */ 82struct event_constraint * 83uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event) 84{ 85 struct intel_uncore_extra_reg *er; 86 struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; 87 struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; 88 unsigned long flags; 89 bool ok = false; 90 91 /* 92 * reg->alloc can be set due to existing state, so for fake box we 93 * need to ignore this, otherwise we might fail to allocate proper 94 * fake state for this extra reg constraint. 95 */ 96 if (reg1->idx == EXTRA_REG_NONE || 97 (!uncore_box_is_fake(box) && reg1->alloc)) 98 return NULL; 99 100 er = &box->shared_regs[reg1->idx]; 101 raw_spin_lock_irqsave(&er->lock, flags); 102 if (!atomic_read(&er->ref) || 103 (er->config1 == reg1->config && er->config2 == reg2->config)) { 104 atomic_inc(&er->ref); 105 er->config1 = reg1->config; 106 er->config2 = reg2->config; 107 ok = true; 108 } 109 raw_spin_unlock_irqrestore(&er->lock, flags); 110 111 if (ok) { 112 if (!uncore_box_is_fake(box)) 113 reg1->alloc = 1; 114 return NULL; 115 } 116 117 return &uncore_constraint_empty; 118} 119 120void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event) 121{ 122 struct intel_uncore_extra_reg *er; 123 struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; 124 125 /* 126 * Only put constraint if extra reg was actually allocated. Also 127 * takes care of event which do not use an extra shared reg. 128 * 129 * Also, if this is a fake box we shouldn't touch any event state 130 * (reg->alloc) and we don't care about leaving inconsistent box 131 * state either since it will be thrown out. 132 */ 133 if (uncore_box_is_fake(box) || !reg1->alloc) 134 return; 135 136 er = &box->shared_regs[reg1->idx]; 137 atomic_dec(&er->ref); 138 reg1->alloc = 0; 139} 140 141u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx) 142{ 143 struct intel_uncore_extra_reg *er; 144 unsigned long flags; 145 u64 config; 146 147 er = &box->shared_regs[idx]; 148 149 raw_spin_lock_irqsave(&er->lock, flags); 150 config = er->config; 151 raw_spin_unlock_irqrestore(&er->lock, flags); 152 153 return config; 154} 155 156static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx) 157{ 158 struct hw_perf_event *hwc = &event->hw; 159 160 hwc->idx = idx; 161 hwc->last_tag = ++box->tags[idx]; 162 163 if (hwc->idx == UNCORE_PMC_IDX_FIXED) { 164 hwc->event_base = uncore_fixed_ctr(box); 165 hwc->config_base = uncore_fixed_ctl(box); 166 return; 167 } 168 169 hwc->config_base = uncore_event_ctl(box, hwc->idx); 170 hwc->event_base = uncore_perf_ctr(box, hwc->idx); 171} 172 173void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event) 174{ 175 u64 prev_count, new_count, delta; 176 int shift; 177 178 if (event->hw.idx >= UNCORE_PMC_IDX_FIXED) 179 shift = 64 - uncore_fixed_ctr_bits(box); 180 else 181 shift = 64 - uncore_perf_ctr_bits(box); 182 183 /* the hrtimer might modify the previous event value */ 184again: 185 prev_count = local64_read(&event->hw.prev_count); 186 new_count = uncore_read_counter(box, event); 187 if (local64_xchg(&event->hw.prev_count, new_count) != prev_count) 188 goto again; 189 190 delta = (new_count << shift) - (prev_count << shift); 191 delta >>= shift; 192 193 local64_add(delta, &event->count); 194} 195 196/* 197 * The overflow interrupt is unavailable for SandyBridge-EP, is broken 198 * for SandyBridge. So we use hrtimer to periodically poll the counter 199 * to avoid overflow. 200 */ 201static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) 202{ 203 struct intel_uncore_box *box; 204 struct perf_event *event; 205 unsigned long flags; 206 int bit; 207 208 box = container_of(hrtimer, struct intel_uncore_box, hrtimer); 209 if (!box->n_active || box->cpu != smp_processor_id()) 210 return HRTIMER_NORESTART; 211 /* 212 * disable local interrupt to prevent uncore_pmu_event_start/stop 213 * to interrupt the update process 214 */ 215 local_irq_save(flags); 216 217 /* 218 * handle boxes with an active event list as opposed to active 219 * counters 220 */ 221 list_for_each_entry(event, &box->active_list, active_entry) { 222 uncore_perf_event_update(box, event); 223 } 224 225 for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX) 226 uncore_perf_event_update(box, box->events[bit]); 227 228 local_irq_restore(flags); 229 230 hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration)); 231 return HRTIMER_RESTART; 232} 233 234void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) 235{ 236 __hrtimer_start_range_ns(&box->hrtimer, 237 ns_to_ktime(box->hrtimer_duration), 0, 238 HRTIMER_MODE_REL_PINNED, 0); 239} 240 241void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box) 242{ 243 hrtimer_cancel(&box->hrtimer); 244} 245 246static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) 247{ 248 hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 249 box->hrtimer.function = uncore_pmu_hrtimer; 250} 251 252static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node) 253{ 254 struct intel_uncore_box *box; 255 int i, size; 256 257 size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg); 258 259 box = kzalloc_node(size, GFP_KERNEL, node); 260 if (!box) 261 return NULL; 262 263 for (i = 0; i < type->num_shared_regs; i++) 264 raw_spin_lock_init(&box->shared_regs[i].lock); 265 266 uncore_pmu_init_hrtimer(box); 267 atomic_set(&box->refcnt, 1); 268 box->cpu = -1; 269 box->phys_id = -1; 270 271 /* set default hrtimer timeout */ 272 box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL; 273 274 INIT_LIST_HEAD(&box->active_list); 275 276 return box; 277} 278 279/* 280 * Using uncore_pmu_event_init pmu event_init callback 281 * as a detection point for uncore events. 282 */ 283static int uncore_pmu_event_init(struct perf_event *event); 284 285static bool is_uncore_event(struct perf_event *event) 286{ 287 return event->pmu->event_init == uncore_pmu_event_init; 288} 289 290static int 291uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp) 292{ 293 struct perf_event *event; 294 int n, max_count; 295 296 max_count = box->pmu->type->num_counters; 297 if (box->pmu->type->fixed_ctl) 298 max_count++; 299 300 if (box->n_events >= max_count) 301 return -EINVAL; 302 303 n = box->n_events; 304 305 if (is_uncore_event(leader)) { 306 box->event_list[n] = leader; 307 n++; 308 } 309 310 if (!dogrp) 311 return n; 312 313 list_for_each_entry(event, &leader->sibling_list, group_entry) { 314 if (!is_uncore_event(event) || 315 event->state <= PERF_EVENT_STATE_OFF) 316 continue; 317 318 if (n >= max_count) 319 return -EINVAL; 320 321 box->event_list[n] = event; 322 n++; 323 } 324 return n; 325} 326 327static struct event_constraint * 328uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event) 329{ 330 struct intel_uncore_type *type = box->pmu->type; 331 struct event_constraint *c; 332 333 if (type->ops->get_constraint) { 334 c = type->ops->get_constraint(box, event); 335 if (c) 336 return c; 337 } 338 339 if (event->attr.config == UNCORE_FIXED_EVENT) 340 return &uncore_constraint_fixed; 341 342 if (type->constraints) { 343 for_each_event_constraint(c, type->constraints) { 344 if ((event->hw.config & c->cmask) == c->code) 345 return c; 346 } 347 } 348 349 return &type->unconstrainted; 350} 351 352static void uncore_put_event_constraint(struct intel_uncore_box *box, struct perf_event *event) 353{ 354 if (box->pmu->type->ops->put_constraint) 355 box->pmu->type->ops->put_constraint(box, event); 356} 357 358static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n) 359{ 360 unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; 361 struct event_constraint *c; 362 int i, wmin, wmax, ret = 0; 363 struct hw_perf_event *hwc; 364 365 bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); 366 367 for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { 368 c = uncore_get_event_constraint(box, box->event_list[i]); 369 box->event_constraint[i] = c; 370 wmin = min(wmin, c->weight); 371 wmax = max(wmax, c->weight); 372 } 373 374 /* fastpath, try to reuse previous register */ 375 for (i = 0; i < n; i++) { 376 hwc = &box->event_list[i]->hw; 377 c = box->event_constraint[i]; 378 379 /* never assigned */ 380 if (hwc->idx == -1) 381 break; 382 383 /* constraint still honored */ 384 if (!test_bit(hwc->idx, c->idxmsk)) 385 break; 386 387 /* not already used */ 388 if (test_bit(hwc->idx, used_mask)) 389 break; 390 391 __set_bit(hwc->idx, used_mask); 392 if (assign) 393 assign[i] = hwc->idx; 394 } 395 /* slow path */ 396 if (i != n) 397 ret = perf_assign_events(box->event_constraint, n, 398 wmin, wmax, n, assign); 399 400 if (!assign || ret) { 401 for (i = 0; i < n; i++) 402 uncore_put_event_constraint(box, box->event_list[i]); 403 } 404 return ret ? -EINVAL : 0; 405} 406 407static void uncore_pmu_event_start(struct perf_event *event, int flags) 408{ 409 struct intel_uncore_box *box = uncore_event_to_box(event); 410 int idx = event->hw.idx; 411 412 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) 413 return; 414 415 if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX)) 416 return; 417 418 event->hw.state = 0; 419 box->events[idx] = event; 420 box->n_active++; 421 __set_bit(idx, box->active_mask); 422 423 local64_set(&event->hw.prev_count, uncore_read_counter(box, event)); 424 uncore_enable_event(box, event); 425 426 if (box->n_active == 1) { 427 uncore_enable_box(box); 428 uncore_pmu_start_hrtimer(box); 429 } 430} 431 432static void uncore_pmu_event_stop(struct perf_event *event, int flags) 433{ 434 struct intel_uncore_box *box = uncore_event_to_box(event); 435 struct hw_perf_event *hwc = &event->hw; 436 437 if (__test_and_clear_bit(hwc->idx, box->active_mask)) { 438 uncore_disable_event(box, event); 439 box->n_active--; 440 box->events[hwc->idx] = NULL; 441 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); 442 hwc->state |= PERF_HES_STOPPED; 443 444 if (box->n_active == 0) { 445 uncore_disable_box(box); 446 uncore_pmu_cancel_hrtimer(box); 447 } 448 } 449 450 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { 451 /* 452 * Drain the remaining delta count out of a event 453 * that we are disabling: 454 */ 455 uncore_perf_event_update(box, event); 456 hwc->state |= PERF_HES_UPTODATE; 457 } 458} 459 460static int uncore_pmu_event_add(struct perf_event *event, int flags) 461{ 462 struct intel_uncore_box *box = uncore_event_to_box(event); 463 struct hw_perf_event *hwc = &event->hw; 464 int assign[UNCORE_PMC_IDX_MAX]; 465 int i, n, ret; 466 467 if (!box) 468 return -ENODEV; 469 470 ret = n = uncore_collect_events(box, event, false); 471 if (ret < 0) 472 return ret; 473 474 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 475 if (!(flags & PERF_EF_START)) 476 hwc->state |= PERF_HES_ARCH; 477 478 ret = uncore_assign_events(box, assign, n); 479 if (ret) 480 return ret; 481 482 /* save events moving to new counters */ 483 for (i = 0; i < box->n_events; i++) { 484 event = box->event_list[i]; 485 hwc = &event->hw; 486 487 if (hwc->idx == assign[i] && 488 hwc->last_tag == box->tags[assign[i]]) 489 continue; 490 /* 491 * Ensure we don't accidentally enable a stopped 492 * counter simply because we rescheduled. 493 */ 494 if (hwc->state & PERF_HES_STOPPED) 495 hwc->state |= PERF_HES_ARCH; 496 497 uncore_pmu_event_stop(event, PERF_EF_UPDATE); 498 } 499 500 /* reprogram moved events into new counters */ 501 for (i = 0; i < n; i++) { 502 event = box->event_list[i]; 503 hwc = &event->hw; 504 505 if (hwc->idx != assign[i] || 506 hwc->last_tag != box->tags[assign[i]]) 507 uncore_assign_hw_event(box, event, assign[i]); 508 else if (i < box->n_events) 509 continue; 510 511 if (hwc->state & PERF_HES_ARCH) 512 continue; 513 514 uncore_pmu_event_start(event, 0); 515 } 516 box->n_events = n; 517 518 return 0; 519} 520 521static void uncore_pmu_event_del(struct perf_event *event, int flags) 522{ 523 struct intel_uncore_box *box = uncore_event_to_box(event); 524 int i; 525 526 uncore_pmu_event_stop(event, PERF_EF_UPDATE); 527 528 for (i = 0; i < box->n_events; i++) { 529 if (event == box->event_list[i]) { 530 uncore_put_event_constraint(box, event); 531 532 while (++i < box->n_events) 533 box->event_list[i - 1] = box->event_list[i]; 534 535 --box->n_events; 536 break; 537 } 538 } 539 540 event->hw.idx = -1; 541 event->hw.last_tag = ~0ULL; 542} 543 544void uncore_pmu_event_read(struct perf_event *event) 545{ 546 struct intel_uncore_box *box = uncore_event_to_box(event); 547 uncore_perf_event_update(box, event); 548} 549 550/* 551 * validation ensures the group can be loaded onto the 552 * PMU if it was the only group available. 553 */ 554static int uncore_validate_group(struct intel_uncore_pmu *pmu, 555 struct perf_event *event) 556{ 557 struct perf_event *leader = event->group_leader; 558 struct intel_uncore_box *fake_box; 559 int ret = -EINVAL, n; 560 561 fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE); 562 if (!fake_box) 563 return -ENOMEM; 564 565 fake_box->pmu = pmu; 566 /* 567 * the event is not yet connected with its 568 * siblings therefore we must first collect 569 * existing siblings, then add the new event 570 * before we can simulate the scheduling 571 */ 572 n = uncore_collect_events(fake_box, leader, true); 573 if (n < 0) 574 goto out; 575 576 fake_box->n_events = n; 577 n = uncore_collect_events(fake_box, event, false); 578 if (n < 0) 579 goto out; 580 581 fake_box->n_events = n; 582 583 ret = uncore_assign_events(fake_box, NULL, n); 584out: 585 kfree(fake_box); 586 return ret; 587} 588 589static int uncore_pmu_event_init(struct perf_event *event) 590{ 591 struct intel_uncore_pmu *pmu; 592 struct intel_uncore_box *box; 593 struct hw_perf_event *hwc = &event->hw; 594 int ret; 595 596 if (event->attr.type != event->pmu->type) 597 return -ENOENT; 598 599 pmu = uncore_event_to_pmu(event); 600 /* no device found for this pmu */ 601 if (pmu->func_id < 0) 602 return -ENOENT; 603 604 /* 605 * Uncore PMU does measure at all privilege level all the time. 606 * So it doesn't make sense to specify any exclude bits. 607 */ 608 if (event->attr.exclude_user || event->attr.exclude_kernel || 609 event->attr.exclude_hv || event->attr.exclude_idle) 610 return -EINVAL; 611 612 /* Sampling not supported yet */ 613 if (hwc->sample_period) 614 return -EINVAL; 615 616 /* 617 * Place all uncore events for a particular physical package 618 * onto a single cpu 619 */ 620 if (event->cpu < 0) 621 return -EINVAL; 622 box = uncore_pmu_to_box(pmu, event->cpu); 623 if (!box || box->cpu < 0) 624 return -EINVAL; 625 event->cpu = box->cpu; 626 627 event->hw.idx = -1; 628 event->hw.last_tag = ~0ULL; 629 event->hw.extra_reg.idx = EXTRA_REG_NONE; 630 event->hw.branch_reg.idx = EXTRA_REG_NONE; 631 632 if (event->attr.config == UNCORE_FIXED_EVENT) { 633 /* no fixed counter */ 634 if (!pmu->type->fixed_ctl) 635 return -EINVAL; 636 /* 637 * if there is only one fixed counter, only the first pmu 638 * can access the fixed counter 639 */ 640 if (pmu->type->single_fixed && pmu->pmu_idx > 0) 641 return -EINVAL; 642 643 /* fixed counters have event field hardcoded to zero */ 644 hwc->config = 0ULL; 645 } else { 646 hwc->config = event->attr.config & pmu->type->event_mask; 647 if (pmu->type->ops->hw_config) { 648 ret = pmu->type->ops->hw_config(box, event); 649 if (ret) 650 return ret; 651 } 652 } 653 654 if (event->group_leader != event) 655 ret = uncore_validate_group(pmu, event); 656 else 657 ret = 0; 658 659 return ret; 660} 661 662static ssize_t uncore_get_attr_cpumask(struct device *dev, 663 struct device_attribute *attr, char *buf) 664{ 665 return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask); 666} 667 668static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL); 669 670static struct attribute *uncore_pmu_attrs[] = { 671 &dev_attr_cpumask.attr, 672 NULL, 673}; 674 675static struct attribute_group uncore_pmu_attr_group = { 676 .attrs = uncore_pmu_attrs, 677}; 678 679static int uncore_pmu_register(struct intel_uncore_pmu *pmu) 680{ 681 int ret; 682 683 if (!pmu->type->pmu) { 684 pmu->pmu = (struct pmu) { 685 .attr_groups = pmu->type->attr_groups, 686 .task_ctx_nr = perf_invalid_context, 687 .event_init = uncore_pmu_event_init, 688 .add = uncore_pmu_event_add, 689 .del = uncore_pmu_event_del, 690 .start = uncore_pmu_event_start, 691 .stop = uncore_pmu_event_stop, 692 .read = uncore_pmu_event_read, 693 }; 694 } else { 695 pmu->pmu = *pmu->type->pmu; 696 pmu->pmu.attr_groups = pmu->type->attr_groups; 697 } 698 699 if (pmu->type->num_boxes == 1) { 700 if (strlen(pmu->type->name) > 0) 701 sprintf(pmu->name, "uncore_%s", pmu->type->name); 702 else 703 sprintf(pmu->name, "uncore"); 704 } else { 705 sprintf(pmu->name, "uncore_%s_%d", pmu->type->name, 706 pmu->pmu_idx); 707 } 708 709 ret = perf_pmu_register(&pmu->pmu, pmu->name, -1); 710 return ret; 711} 712 713static void __init uncore_type_exit(struct intel_uncore_type *type) 714{ 715 int i; 716 717 for (i = 0; i < type->num_boxes; i++) 718 free_percpu(type->pmus[i].box); 719 kfree(type->pmus); 720 type->pmus = NULL; 721 kfree(type->events_group); 722 type->events_group = NULL; 723} 724 725static void __init uncore_types_exit(struct intel_uncore_type **types) 726{ 727 int i; 728 for (i = 0; types[i]; i++) 729 uncore_type_exit(types[i]); 730} 731 732static int __init uncore_type_init(struct intel_uncore_type *type) 733{ 734 struct intel_uncore_pmu *pmus; 735 struct attribute_group *attr_group; 736 struct attribute **attrs; 737 int i, j; 738 739 pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL); 740 if (!pmus) 741 return -ENOMEM; 742 743 type->pmus = pmus; 744 745 type->unconstrainted = (struct event_constraint) 746 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, 747 0, type->num_counters, 0, 0); 748 749 for (i = 0; i < type->num_boxes; i++) { 750 pmus[i].func_id = -1; 751 pmus[i].pmu_idx = i; 752 pmus[i].type = type; 753 INIT_LIST_HEAD(&pmus[i].box_list); 754 pmus[i].box = alloc_percpu(struct intel_uncore_box *); 755 if (!pmus[i].box) 756 goto fail; 757 } 758 759 if (type->event_descs) { 760 i = 0; 761 while (type->event_descs[i].attr.attr.name) 762 i++; 763 764 attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) + 765 sizeof(*attr_group), GFP_KERNEL); 766 if (!attr_group) 767 goto fail; 768 769 attrs = (struct attribute **)(attr_group + 1); 770 attr_group->name = "events"; 771 attr_group->attrs = attrs; 772 773 for (j = 0; j < i; j++) 774 attrs[j] = &type->event_descs[j].attr.attr; 775 776 type->events_group = attr_group; 777 } 778 779 type->pmu_group = &uncore_pmu_attr_group; 780 return 0; 781fail: 782 uncore_type_exit(type); 783 return -ENOMEM; 784} 785 786static int __init uncore_types_init(struct intel_uncore_type **types) 787{ 788 int i, ret; 789 790 for (i = 0; types[i]; i++) { 791 ret = uncore_type_init(types[i]); 792 if (ret) 793 goto fail; 794 } 795 return 0; 796fail: 797 while (--i >= 0) 798 uncore_type_exit(types[i]); 799 return ret; 800} 801 802/* 803 * add a pci uncore device 804 */ 805static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) 806{ 807 struct intel_uncore_pmu *pmu; 808 struct intel_uncore_box *box; 809 struct intel_uncore_type *type; 810 int phys_id; 811 bool first_box = false; 812 813 phys_id = uncore_pcibus_to_physid[pdev->bus->number]; 814 if (phys_id < 0) 815 return -ENODEV; 816 817 if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) { 818 int idx = UNCORE_PCI_DEV_IDX(id->driver_data); 819 uncore_extra_pci_dev[phys_id][idx] = pdev; 820 pci_set_drvdata(pdev, NULL); 821 return 0; 822 } 823 824 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; 825 box = uncore_alloc_box(type, NUMA_NO_NODE); 826 if (!box) 827 return -ENOMEM; 828 829 /* 830 * for performance monitoring unit with multiple boxes, 831 * each box has a different function id. 832 */ 833 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; 834 if (pmu->func_id < 0) 835 pmu->func_id = pdev->devfn; 836 else 837 WARN_ON_ONCE(pmu->func_id != pdev->devfn); 838 839 box->phys_id = phys_id; 840 box->pci_dev = pdev; 841 box->pmu = pmu; 842 uncore_box_init(box); 843 pci_set_drvdata(pdev, box); 844 845 raw_spin_lock(&uncore_box_lock); 846 if (list_empty(&pmu->box_list)) 847 first_box = true; 848 list_add_tail(&box->list, &pmu->box_list); 849 raw_spin_unlock(&uncore_box_lock); 850 851 if (first_box) 852 uncore_pmu_register(pmu); 853 return 0; 854} 855 856static void uncore_pci_remove(struct pci_dev *pdev) 857{ 858 struct intel_uncore_box *box = pci_get_drvdata(pdev); 859 struct intel_uncore_pmu *pmu; 860 int i, cpu, phys_id = uncore_pcibus_to_physid[pdev->bus->number]; 861 bool last_box = false; 862 863 box = pci_get_drvdata(pdev); 864 if (!box) { 865 for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { 866 if (uncore_extra_pci_dev[phys_id][i] == pdev) { 867 uncore_extra_pci_dev[phys_id][i] = NULL; 868 break; 869 } 870 } 871 WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX); 872 return; 873 } 874 875 pmu = box->pmu; 876 if (WARN_ON_ONCE(phys_id != box->phys_id)) 877 return; 878 879 pci_set_drvdata(pdev, NULL); 880 881 raw_spin_lock(&uncore_box_lock); 882 list_del(&box->list); 883 if (list_empty(&pmu->box_list)) 884 last_box = true; 885 raw_spin_unlock(&uncore_box_lock); 886 887 for_each_possible_cpu(cpu) { 888 if (*per_cpu_ptr(pmu->box, cpu) == box) { 889 *per_cpu_ptr(pmu->box, cpu) = NULL; 890 atomic_dec(&box->refcnt); 891 } 892 } 893 894 WARN_ON_ONCE(atomic_read(&box->refcnt) != 1); 895 kfree(box); 896 897 if (last_box) 898 perf_pmu_unregister(&pmu->pmu); 899} 900 901static int __init uncore_pci_init(void) 902{ 903 int ret; 904 905 switch (boot_cpu_data.x86_model) { 906 case 45: /* Sandy Bridge-EP */ 907 ret = snbep_uncore_pci_init(); 908 break; 909 case 62: /* Ivy Bridge-EP */ 910 ret = ivbep_uncore_pci_init(); 911 break; 912 case 63: /* Haswell-EP */ 913 ret = hswep_uncore_pci_init(); 914 break; 915 case 42: /* Sandy Bridge */ 916 ret = snb_uncore_pci_init(); 917 break; 918 case 58: /* Ivy Bridge */ 919 ret = ivb_uncore_pci_init(); 920 break; 921 case 60: /* Haswell */ 922 case 69: /* Haswell Celeron */ 923 ret = hsw_uncore_pci_init(); 924 break; 925 default: 926 return 0; 927 } 928 929 if (ret) 930 return ret; 931 932 ret = uncore_types_init(uncore_pci_uncores); 933 if (ret) 934 return ret; 935 936 uncore_pci_driver->probe = uncore_pci_probe; 937 uncore_pci_driver->remove = uncore_pci_remove; 938 939 ret = pci_register_driver(uncore_pci_driver); 940 if (ret == 0) 941 pcidrv_registered = true; 942 else 943 uncore_types_exit(uncore_pci_uncores); 944 945 return ret; 946} 947 948static void __init uncore_pci_exit(void) 949{ 950 if (pcidrv_registered) { 951 pcidrv_registered = false; 952 pci_unregister_driver(uncore_pci_driver); 953 uncore_types_exit(uncore_pci_uncores); 954 } 955} 956 957/* CPU hot plug/unplug are serialized by cpu_add_remove_lock mutex */ 958static LIST_HEAD(boxes_to_free); 959 960static void uncore_kfree_boxes(void) 961{ 962 struct intel_uncore_box *box; 963 964 while (!list_empty(&boxes_to_free)) { 965 box = list_entry(boxes_to_free.next, 966 struct intel_uncore_box, list); 967 list_del(&box->list); 968 kfree(box); 969 } 970} 971 972static void uncore_cpu_dying(int cpu) 973{ 974 struct intel_uncore_type *type; 975 struct intel_uncore_pmu *pmu; 976 struct intel_uncore_box *box; 977 int i, j; 978 979 for (i = 0; uncore_msr_uncores[i]; i++) { 980 type = uncore_msr_uncores[i]; 981 for (j = 0; j < type->num_boxes; j++) { 982 pmu = &type->pmus[j]; 983 box = *per_cpu_ptr(pmu->box, cpu); 984 *per_cpu_ptr(pmu->box, cpu) = NULL; 985 if (box && atomic_dec_and_test(&box->refcnt)) 986 list_add(&box->list, &boxes_to_free); 987 } 988 } 989} 990 991static int uncore_cpu_starting(int cpu) 992{ 993 struct intel_uncore_type *type; 994 struct intel_uncore_pmu *pmu; 995 struct intel_uncore_box *box, *exist; 996 int i, j, k, phys_id; 997 998 phys_id = topology_physical_package_id(cpu); 999 1000 for (i = 0; uncore_msr_uncores[i]; i++) { 1001 type = uncore_msr_uncores[i]; 1002 for (j = 0; j < type->num_boxes; j++) { 1003 pmu = &type->pmus[j]; 1004 box = *per_cpu_ptr(pmu->box, cpu); 1005 /* called by uncore_cpu_init? */ 1006 if (box && box->phys_id >= 0) { 1007 uncore_box_init(box); 1008 continue; 1009 } 1010 1011 for_each_online_cpu(k) { 1012 exist = *per_cpu_ptr(pmu->box, k); 1013 if (exist && exist->phys_id == phys_id) { 1014 atomic_inc(&exist->refcnt); 1015 *per_cpu_ptr(pmu->box, cpu) = exist; 1016 if (box) { 1017 list_add(&box->list, 1018 &boxes_to_free); 1019 box = NULL; 1020 } 1021 break; 1022 } 1023 } 1024 1025 if (box) { 1026 box->phys_id = phys_id; 1027 uncore_box_init(box); 1028 } 1029 } 1030 } 1031 return 0; 1032} 1033 1034static int uncore_cpu_prepare(int cpu, int phys_id) 1035{ 1036 struct intel_uncore_type *type; 1037 struct intel_uncore_pmu *pmu; 1038 struct intel_uncore_box *box; 1039 int i, j; 1040 1041 for (i = 0; uncore_msr_uncores[i]; i++) { 1042 type = uncore_msr_uncores[i]; 1043 for (j = 0; j < type->num_boxes; j++) { 1044 pmu = &type->pmus[j]; 1045 if (pmu->func_id < 0) 1046 pmu->func_id = j; 1047 1048 box = uncore_alloc_box(type, cpu_to_node(cpu)); 1049 if (!box) 1050 return -ENOMEM; 1051 1052 box->pmu = pmu; 1053 box->phys_id = phys_id; 1054 *per_cpu_ptr(pmu->box, cpu) = box; 1055 } 1056 } 1057 return 0; 1058} 1059 1060static void 1061uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu) 1062{ 1063 struct intel_uncore_type *type; 1064 struct intel_uncore_pmu *pmu; 1065 struct intel_uncore_box *box; 1066 int i, j; 1067 1068 for (i = 0; uncores[i]; i++) { 1069 type = uncores[i]; 1070 for (j = 0; j < type->num_boxes; j++) { 1071 pmu = &type->pmus[j]; 1072 if (old_cpu < 0) 1073 box = uncore_pmu_to_box(pmu, new_cpu); 1074 else 1075 box = uncore_pmu_to_box(pmu, old_cpu); 1076 if (!box) 1077 continue; 1078 1079 if (old_cpu < 0) { 1080 WARN_ON_ONCE(box->cpu != -1); 1081 box->cpu = new_cpu; 1082 continue; 1083 } 1084 1085 WARN_ON_ONCE(box->cpu != old_cpu); 1086 if (new_cpu >= 0) { 1087 uncore_pmu_cancel_hrtimer(box); 1088 perf_pmu_migrate_context(&pmu->pmu, 1089 old_cpu, new_cpu); 1090 box->cpu = new_cpu; 1091 } else { 1092 box->cpu = -1; 1093 } 1094 } 1095 } 1096} 1097 1098static void uncore_event_exit_cpu(int cpu) 1099{ 1100 int i, phys_id, target; 1101 1102 /* if exiting cpu is used for collecting uncore events */ 1103 if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) 1104 return; 1105 1106 /* find a new cpu to collect uncore events */ 1107 phys_id = topology_physical_package_id(cpu); 1108 target = -1; 1109 for_each_online_cpu(i) { 1110 if (i == cpu) 1111 continue; 1112 if (phys_id == topology_physical_package_id(i)) { 1113 target = i; 1114 break; 1115 } 1116 } 1117 1118 /* migrate uncore events to the new cpu */ 1119 if (target >= 0) 1120 cpumask_set_cpu(target, &uncore_cpu_mask); 1121 1122 uncore_change_context(uncore_msr_uncores, cpu, target); 1123 uncore_change_context(uncore_pci_uncores, cpu, target); 1124} 1125 1126static void uncore_event_init_cpu(int cpu) 1127{ 1128 int i, phys_id; 1129 1130 phys_id = topology_physical_package_id(cpu); 1131 for_each_cpu(i, &uncore_cpu_mask) { 1132 if (phys_id == topology_physical_package_id(i)) 1133 return; 1134 } 1135 1136 cpumask_set_cpu(cpu, &uncore_cpu_mask); 1137 1138 uncore_change_context(uncore_msr_uncores, -1, cpu); 1139 uncore_change_context(uncore_pci_uncores, -1, cpu); 1140} 1141 1142static int uncore_cpu_notifier(struct notifier_block *self, 1143 unsigned long action, void *hcpu) 1144{ 1145 unsigned int cpu = (long)hcpu; 1146 1147 /* allocate/free data structure for uncore box */ 1148 switch (action & ~CPU_TASKS_FROZEN) { 1149 case CPU_UP_PREPARE: 1150 uncore_cpu_prepare(cpu, -1); 1151 break; 1152 case CPU_STARTING: 1153 uncore_cpu_starting(cpu); 1154 break; 1155 case CPU_UP_CANCELED: 1156 case CPU_DYING: 1157 uncore_cpu_dying(cpu); 1158 break; 1159 case CPU_ONLINE: 1160 case CPU_DEAD: 1161 uncore_kfree_boxes(); 1162 break; 1163 default: 1164 break; 1165 } 1166 1167 /* select the cpu that collects uncore events */ 1168 switch (action & ~CPU_TASKS_FROZEN) { 1169 case CPU_DOWN_FAILED: 1170 case CPU_STARTING: 1171 uncore_event_init_cpu(cpu); 1172 break; 1173 case CPU_DOWN_PREPARE: 1174 uncore_event_exit_cpu(cpu); 1175 break; 1176 default: 1177 break; 1178 } 1179 1180 return NOTIFY_OK; 1181} 1182 1183static struct notifier_block uncore_cpu_nb = { 1184 .notifier_call = uncore_cpu_notifier, 1185 /* 1186 * to migrate uncore events, our notifier should be executed 1187 * before perf core's notifier. 1188 */ 1189 .priority = CPU_PRI_PERF + 1, 1190}; 1191 1192static void __init uncore_cpu_setup(void *dummy) 1193{ 1194 uncore_cpu_starting(smp_processor_id()); 1195} 1196 1197static int __init uncore_cpu_init(void) 1198{ 1199 int ret; 1200 1201 switch (boot_cpu_data.x86_model) { 1202 case 26: /* Nehalem */ 1203 case 30: 1204 case 37: /* Westmere */ 1205 case 44: 1206 nhm_uncore_cpu_init(); 1207 break; 1208 case 42: /* Sandy Bridge */ 1209 case 58: /* Ivy Bridge */ 1210 snb_uncore_cpu_init(); 1211 break; 1212 case 45: /* Sandy Bridge-EP */ 1213 snbep_uncore_cpu_init(); 1214 break; 1215 case 46: /* Nehalem-EX */ 1216 case 47: /* Westmere-EX aka. Xeon E7 */ 1217 nhmex_uncore_cpu_init(); 1218 break; 1219 case 62: /* Ivy Bridge-EP */ 1220 ivbep_uncore_cpu_init(); 1221 break; 1222 case 63: /* Haswell-EP */ 1223 hswep_uncore_cpu_init(); 1224 break; 1225 default: 1226 return 0; 1227 } 1228 1229 ret = uncore_types_init(uncore_msr_uncores); 1230 if (ret) 1231 return ret; 1232 1233 return 0; 1234} 1235 1236static int __init uncore_pmus_register(void) 1237{ 1238 struct intel_uncore_pmu *pmu; 1239 struct intel_uncore_type *type; 1240 int i, j; 1241 1242 for (i = 0; uncore_msr_uncores[i]; i++) { 1243 type = uncore_msr_uncores[i]; 1244 for (j = 0; j < type->num_boxes; j++) { 1245 pmu = &type->pmus[j]; 1246 uncore_pmu_register(pmu); 1247 } 1248 } 1249 1250 return 0; 1251} 1252 1253static void __init uncore_cpumask_init(void) 1254{ 1255 int cpu; 1256 1257 /* 1258 * ony invoke once from msr or pci init code 1259 */ 1260 if (!cpumask_empty(&uncore_cpu_mask)) 1261 return; 1262 1263 cpu_notifier_register_begin(); 1264 1265 for_each_online_cpu(cpu) { 1266 int i, phys_id = topology_physical_package_id(cpu); 1267 1268 for_each_cpu(i, &uncore_cpu_mask) { 1269 if (phys_id == topology_physical_package_id(i)) { 1270 phys_id = -1; 1271 break; 1272 } 1273 } 1274 if (phys_id < 0) 1275 continue; 1276 1277 uncore_cpu_prepare(cpu, phys_id); 1278 uncore_event_init_cpu(cpu); 1279 } 1280 on_each_cpu(uncore_cpu_setup, NULL, 1); 1281 1282 __register_cpu_notifier(&uncore_cpu_nb); 1283 1284 cpu_notifier_register_done(); 1285} 1286 1287 1288static int __init intel_uncore_init(void) 1289{ 1290 int ret; 1291 1292 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) 1293 return -ENODEV; 1294 1295 if (cpu_has_hypervisor) 1296 return -ENODEV; 1297 1298 ret = uncore_pci_init(); 1299 if (ret) 1300 goto fail; 1301 ret = uncore_cpu_init(); 1302 if (ret) { 1303 uncore_pci_exit(); 1304 goto fail; 1305 } 1306 uncore_cpumask_init(); 1307 1308 uncore_pmus_register(); 1309 return 0; 1310fail: 1311 return ret; 1312} 1313device_initcall(intel_uncore_init); 1314