root/arch/s390/kernel/perf_cpum_cf.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. get_counter_set
  2. validate_ctr_version
  3. validate_ctr_auth
  4. cpumf_pmu_enable
  5. cpumf_pmu_disable
  6. hw_perf_event_destroy
  7. __hw_perf_event_init
  8. cpumf_pmu_event_init
  9. hw_perf_event_reset
  10. hw_perf_event_update
  11. cpumf_pmu_read
  12. cpumf_pmu_start
  13. cpumf_pmu_stop
  14. cpumf_pmu_add
  15. cpumf_pmu_del
  16. cpumf_pmu_start_txn
  17. cpumf_pmu_cancel_txn
  18. cpumf_pmu_commit_txn
  19. cpumf_pmu_init

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Performance event support for s390x - CPU-measurement Counter Facility
   4  *
   5  *  Copyright IBM Corp. 2012, 2019
   6  *  Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
   7  */
   8 #define KMSG_COMPONENT  "cpum_cf"
   9 #define pr_fmt(fmt)     KMSG_COMPONENT ": " fmt
  10 
  11 #include <linux/kernel.h>
  12 #include <linux/kernel_stat.h>
  13 #include <linux/percpu.h>
  14 #include <linux/notifier.h>
  15 #include <linux/init.h>
  16 #include <linux/export.h>
  17 #include <asm/cpu_mcf.h>
  18 
  19 static enum cpumf_ctr_set get_counter_set(u64 event)
  20 {
  21         int set = CPUMF_CTR_SET_MAX;
  22 
  23         if (event < 32)
  24                 set = CPUMF_CTR_SET_BASIC;
  25         else if (event < 64)
  26                 set = CPUMF_CTR_SET_USER;
  27         else if (event < 128)
  28                 set = CPUMF_CTR_SET_CRYPTO;
  29         else if (event < 288)
  30                 set = CPUMF_CTR_SET_EXT;
  31         else if (event >= 448 && event < 496)
  32                 set = CPUMF_CTR_SET_MT_DIAG;
  33 
  34         return set;
  35 }
  36 
  37 static int validate_ctr_version(const struct hw_perf_event *hwc)
  38 {
  39         struct cpu_cf_events *cpuhw;
  40         int err = 0;
  41         u16 mtdiag_ctl;
  42 
  43         cpuhw = &get_cpu_var(cpu_cf_events);
  44 
  45         /* check required version for counter sets */
  46         switch (hwc->config_base) {
  47         case CPUMF_CTR_SET_BASIC:
  48         case CPUMF_CTR_SET_USER:
  49                 if (cpuhw->info.cfvn < 1)
  50                         err = -EOPNOTSUPP;
  51                 break;
  52         case CPUMF_CTR_SET_CRYPTO:
  53                 if ((cpuhw->info.csvn >= 1 && cpuhw->info.csvn <= 5 &&
  54                      hwc->config > 79) ||
  55                     (cpuhw->info.csvn >= 6 && hwc->config > 83))
  56                         err = -EOPNOTSUPP;
  57                 break;
  58         case CPUMF_CTR_SET_EXT:
  59                 if (cpuhw->info.csvn < 1)
  60                         err = -EOPNOTSUPP;
  61                 if ((cpuhw->info.csvn == 1 && hwc->config > 159) ||
  62                     (cpuhw->info.csvn == 2 && hwc->config > 175) ||
  63                     (cpuhw->info.csvn >= 3 && cpuhw->info.csvn <= 5
  64                      && hwc->config > 255) ||
  65                     (cpuhw->info.csvn >= 6 && hwc->config > 287))
  66                         err = -EOPNOTSUPP;
  67                 break;
  68         case CPUMF_CTR_SET_MT_DIAG:
  69                 if (cpuhw->info.csvn <= 3)
  70                         err = -EOPNOTSUPP;
  71                 /*
  72                  * MT-diagnostic counters are read-only.  The counter set
  73                  * is automatically enabled and activated on all CPUs with
  74                  * multithreading (SMT).  Deactivation of multithreading
  75                  * also disables the counter set.  State changes are ignored
  76                  * by lcctl().  Because Linux controls SMT enablement through
  77                  * a kernel parameter only, the counter set is either disabled
  78                  * or enabled and active.
  79                  *
  80                  * Thus, the counters can only be used if SMT is on and the
  81                  * counter set is enabled and active.
  82                  */
  83                 mtdiag_ctl = cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG];
  84                 if (!((cpuhw->info.auth_ctl & mtdiag_ctl) &&
  85                       (cpuhw->info.enable_ctl & mtdiag_ctl) &&
  86                       (cpuhw->info.act_ctl & mtdiag_ctl)))
  87                         err = -EOPNOTSUPP;
  88                 break;
  89         }
  90 
  91         put_cpu_var(cpu_cf_events);
  92         return err;
  93 }
  94 
  95 static int validate_ctr_auth(const struct hw_perf_event *hwc)
  96 {
  97         struct cpu_cf_events *cpuhw;
  98         u64 ctrs_state;
  99         int err = 0;
 100 
 101         cpuhw = &get_cpu_var(cpu_cf_events);
 102 
 103         /* Check authorization for cpu counter sets.
 104          * If the particular CPU counter set is not authorized,
 105          * return with -ENOENT in order to fall back to other
 106          * PMUs that might suffice the event request.
 107          */
 108         ctrs_state = cpumf_ctr_ctl[hwc->config_base];
 109         if (!(ctrs_state & cpuhw->info.auth_ctl))
 110                 err = -ENOENT;
 111 
 112         put_cpu_var(cpu_cf_events);
 113         return err;
 114 }
 115 
 116 /*
 117  * Change the CPUMF state to active.
 118  * Enable and activate the CPU-counter sets according
 119  * to the per-cpu control state.
 120  */
 121 static void cpumf_pmu_enable(struct pmu *pmu)
 122 {
 123         struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
 124         int err;
 125 
 126         if (cpuhw->flags & PMU_F_ENABLED)
 127                 return;
 128 
 129         err = lcctl(cpuhw->state);
 130         if (err) {
 131                 pr_err("Enabling the performance measuring unit "
 132                        "failed with rc=%x\n", err);
 133                 return;
 134         }
 135 
 136         cpuhw->flags |= PMU_F_ENABLED;
 137 }
 138 
 139 /*
 140  * Change the CPUMF state to inactive.
 141  * Disable and enable (inactive) the CPU-counter sets according
 142  * to the per-cpu control state.
 143  */
 144 static void cpumf_pmu_disable(struct pmu *pmu)
 145 {
 146         struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
 147         int err;
 148         u64 inactive;
 149 
 150         if (!(cpuhw->flags & PMU_F_ENABLED))
 151                 return;
 152 
 153         inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
 154         err = lcctl(inactive);
 155         if (err) {
 156                 pr_err("Disabling the performance measuring unit "
 157                        "failed with rc=%x\n", err);
 158                 return;
 159         }
 160 
 161         cpuhw->flags &= ~PMU_F_ENABLED;
 162 }
 163 
 164 
 165 /* Number of perf events counting hardware events */
 166 static atomic_t num_events = ATOMIC_INIT(0);
 167 /* Used to avoid races in calling reserve/release_cpumf_hardware */
 168 static DEFINE_MUTEX(pmc_reserve_mutex);
 169 
 170 /* Release the PMU if event is the last perf event */
 171 static void hw_perf_event_destroy(struct perf_event *event)
 172 {
 173         if (!atomic_add_unless(&num_events, -1, 1)) {
 174                 mutex_lock(&pmc_reserve_mutex);
 175                 if (atomic_dec_return(&num_events) == 0)
 176                         __kernel_cpumcf_end();
 177                 mutex_unlock(&pmc_reserve_mutex);
 178         }
 179 }
 180 
 181 /* CPUMF <-> perf event mappings for kernel+userspace (basic set) */
 182 static const int cpumf_generic_events_basic[] = {
 183         [PERF_COUNT_HW_CPU_CYCLES]          = 0,
 184         [PERF_COUNT_HW_INSTRUCTIONS]        = 1,
 185         [PERF_COUNT_HW_CACHE_REFERENCES]    = -1,
 186         [PERF_COUNT_HW_CACHE_MISSES]        = -1,
 187         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
 188         [PERF_COUNT_HW_BRANCH_MISSES]       = -1,
 189         [PERF_COUNT_HW_BUS_CYCLES]          = -1,
 190 };
 191 /* CPUMF <-> perf event mappings for userspace (problem-state set) */
 192 static const int cpumf_generic_events_user[] = {
 193         [PERF_COUNT_HW_CPU_CYCLES]          = 32,
 194         [PERF_COUNT_HW_INSTRUCTIONS]        = 33,
 195         [PERF_COUNT_HW_CACHE_REFERENCES]    = -1,
 196         [PERF_COUNT_HW_CACHE_MISSES]        = -1,
 197         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
 198         [PERF_COUNT_HW_BRANCH_MISSES]       = -1,
 199         [PERF_COUNT_HW_BUS_CYCLES]          = -1,
 200 };
 201 
 202 static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
 203 {
 204         struct perf_event_attr *attr = &event->attr;
 205         struct hw_perf_event *hwc = &event->hw;
 206         enum cpumf_ctr_set set;
 207         int err = 0;
 208         u64 ev;
 209 
 210         switch (type) {
 211         case PERF_TYPE_RAW:
 212                 /* Raw events are used to access counters directly,
 213                  * hence do not permit excludes */
 214                 if (attr->exclude_kernel || attr->exclude_user ||
 215                     attr->exclude_hv)
 216                         return -EOPNOTSUPP;
 217                 ev = attr->config;
 218                 break;
 219 
 220         case PERF_TYPE_HARDWARE:
 221                 if (is_sampling_event(event))   /* No sampling support */
 222                         return -ENOENT;
 223                 ev = attr->config;
 224                 /* Count user space (problem-state) only */
 225                 if (!attr->exclude_user && attr->exclude_kernel) {
 226                         if (ev >= ARRAY_SIZE(cpumf_generic_events_user))
 227                                 return -EOPNOTSUPP;
 228                         ev = cpumf_generic_events_user[ev];
 229 
 230                 /* No support for kernel space counters only */
 231                 } else if (!attr->exclude_kernel && attr->exclude_user) {
 232                         return -EOPNOTSUPP;
 233 
 234                 /* Count user and kernel space */
 235                 } else {
 236                         if (ev >= ARRAY_SIZE(cpumf_generic_events_basic))
 237                                 return -EOPNOTSUPP;
 238                         ev = cpumf_generic_events_basic[ev];
 239                 }
 240                 break;
 241 
 242         default:
 243                 return -ENOENT;
 244         }
 245 
 246         if (ev == -1)
 247                 return -ENOENT;
 248 
 249         if (ev > PERF_CPUM_CF_MAX_CTR)
 250                 return -ENOENT;
 251 
 252         /* Obtain the counter set to which the specified counter belongs */
 253         set = get_counter_set(ev);
 254         switch (set) {
 255         case CPUMF_CTR_SET_BASIC:
 256         case CPUMF_CTR_SET_USER:
 257         case CPUMF_CTR_SET_CRYPTO:
 258         case CPUMF_CTR_SET_EXT:
 259         case CPUMF_CTR_SET_MT_DIAG:
 260                 /*
 261                  * Use the hardware perf event structure to store the
 262                  * counter number in the 'config' member and the counter
 263                  * set number in the 'config_base'.  The counter set number
 264                  * is then later used to enable/disable the counter(s).
 265                  */
 266                 hwc->config = ev;
 267                 hwc->config_base = set;
 268                 break;
 269         case CPUMF_CTR_SET_MAX:
 270                 /* The counter could not be associated to a counter set */
 271                 return -EINVAL;
 272         };
 273 
 274         /* Initialize for using the CPU-measurement counter facility */
 275         if (!atomic_inc_not_zero(&num_events)) {
 276                 mutex_lock(&pmc_reserve_mutex);
 277                 if (atomic_read(&num_events) == 0 && __kernel_cpumcf_begin())
 278                         err = -EBUSY;
 279                 else
 280                         atomic_inc(&num_events);
 281                 mutex_unlock(&pmc_reserve_mutex);
 282         }
 283         if (err)
 284                 return err;
 285         event->destroy = hw_perf_event_destroy;
 286 
 287         /* Finally, validate version and authorization of the counter set */
 288         err = validate_ctr_auth(hwc);
 289         if (!err)
 290                 err = validate_ctr_version(hwc);
 291 
 292         return err;
 293 }
 294 
 295 static int cpumf_pmu_event_init(struct perf_event *event)
 296 {
 297         unsigned int type = event->attr.type;
 298         int err;
 299 
 300         if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW)
 301                 err = __hw_perf_event_init(event, type);
 302         else if (event->pmu->type == type)
 303                 /* Registered as unknown PMU */
 304                 err = __hw_perf_event_init(event, PERF_TYPE_RAW);
 305         else
 306                 return -ENOENT;
 307 
 308         if (unlikely(err) && event->destroy)
 309                 event->destroy(event);
 310 
 311         return err;
 312 }
 313 
 314 static int hw_perf_event_reset(struct perf_event *event)
 315 {
 316         u64 prev, new;
 317         int err;
 318 
 319         do {
 320                 prev = local64_read(&event->hw.prev_count);
 321                 err = ecctr(event->hw.config, &new);
 322                 if (err) {
 323                         if (err != 3)
 324                                 break;
 325                         /* The counter is not (yet) available. This
 326                          * might happen if the counter set to which
 327                          * this counter belongs is in the disabled
 328                          * state.
 329                          */
 330                         new = 0;
 331                 }
 332         } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
 333 
 334         return err;
 335 }
 336 
 337 static void hw_perf_event_update(struct perf_event *event)
 338 {
 339         u64 prev, new, delta;
 340         int err;
 341 
 342         do {
 343                 prev = local64_read(&event->hw.prev_count);
 344                 err = ecctr(event->hw.config, &new);
 345                 if (err)
 346                         return;
 347         } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
 348 
 349         delta = (prev <= new) ? new - prev
 350                               : (-1ULL - prev) + new + 1;        /* overflow */
 351         local64_add(delta, &event->count);
 352 }
 353 
 354 static void cpumf_pmu_read(struct perf_event *event)
 355 {
 356         if (event->hw.state & PERF_HES_STOPPED)
 357                 return;
 358 
 359         hw_perf_event_update(event);
 360 }
 361 
 362 static void cpumf_pmu_start(struct perf_event *event, int flags)
 363 {
 364         struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
 365         struct hw_perf_event *hwc = &event->hw;
 366 
 367         if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
 368                 return;
 369 
 370         if (WARN_ON_ONCE(hwc->config == -1))
 371                 return;
 372 
 373         if (flags & PERF_EF_RELOAD)
 374                 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
 375 
 376         hwc->state = 0;
 377 
 378         /* (Re-)enable and activate the counter set */
 379         ctr_set_enable(&cpuhw->state, hwc->config_base);
 380         ctr_set_start(&cpuhw->state, hwc->config_base);
 381 
 382         /* The counter set to which this counter belongs can be already active.
 383          * Because all counters in a set are active, the event->hw.prev_count
 384          * needs to be synchronized.  At this point, the counter set can be in
 385          * the inactive or disabled state.
 386          */
 387         hw_perf_event_reset(event);
 388 
 389         /* increment refcount for this counter set */
 390         atomic_inc(&cpuhw->ctr_set[hwc->config_base]);
 391 }
 392 
 393 static void cpumf_pmu_stop(struct perf_event *event, int flags)
 394 {
 395         struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
 396         struct hw_perf_event *hwc = &event->hw;
 397 
 398         if (!(hwc->state & PERF_HES_STOPPED)) {
 399                 /* Decrement reference count for this counter set and if this
 400                  * is the last used counter in the set, clear activation
 401                  * control and set the counter set state to inactive.
 402                  */
 403                 if (!atomic_dec_return(&cpuhw->ctr_set[hwc->config_base]))
 404                         ctr_set_stop(&cpuhw->state, hwc->config_base);
 405                 event->hw.state |= PERF_HES_STOPPED;
 406         }
 407 
 408         if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
 409                 hw_perf_event_update(event);
 410                 event->hw.state |= PERF_HES_UPTODATE;
 411         }
 412 }
 413 
 414 static int cpumf_pmu_add(struct perf_event *event, int flags)
 415 {
 416         struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
 417 
 418         /* Check authorization for the counter set to which this
 419          * counter belongs.
 420          * For group events transaction, the authorization check is
 421          * done in cpumf_pmu_commit_txn().
 422          */
 423         if (!(cpuhw->txn_flags & PERF_PMU_TXN_ADD))
 424                 if (validate_ctr_auth(&event->hw))
 425                         return -ENOENT;
 426 
 427         ctr_set_enable(&cpuhw->state, event->hw.config_base);
 428         event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 429 
 430         if (flags & PERF_EF_START)
 431                 cpumf_pmu_start(event, PERF_EF_RELOAD);
 432 
 433         perf_event_update_userpage(event);
 434 
 435         return 0;
 436 }
 437 
 438 static void cpumf_pmu_del(struct perf_event *event, int flags)
 439 {
 440         struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
 441 
 442         cpumf_pmu_stop(event, PERF_EF_UPDATE);
 443 
 444         /* Check if any counter in the counter set is still used.  If not used,
 445          * change the counter set to the disabled state.  This also clears the
 446          * content of all counters in the set.
 447          *
 448          * When a new perf event has been added but not yet started, this can
 449          * clear enable control and resets all counters in a set.  Therefore,
 450          * cpumf_pmu_start() always has to reenable a counter set.
 451          */
 452         if (!atomic_read(&cpuhw->ctr_set[event->hw.config_base]))
 453                 ctr_set_disable(&cpuhw->state, event->hw.config_base);
 454 
 455         perf_event_update_userpage(event);
 456 }
 457 
 458 /*
 459  * Start group events scheduling transaction.
 460  * Set flags to perform a single test at commit time.
 461  *
 462  * We only support PERF_PMU_TXN_ADD transactions. Save the
 463  * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD
 464  * transactions.
 465  */
 466 static void cpumf_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
 467 {
 468         struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
 469 
 470         WARN_ON_ONCE(cpuhw->txn_flags);         /* txn already in flight */
 471 
 472         cpuhw->txn_flags = txn_flags;
 473         if (txn_flags & ~PERF_PMU_TXN_ADD)
 474                 return;
 475 
 476         perf_pmu_disable(pmu);
 477         cpuhw->tx_state = cpuhw->state;
 478 }
 479 
 480 /*
 481  * Stop and cancel a group events scheduling tranctions.
 482  * Assumes cpumf_pmu_del() is called for each successful added
 483  * cpumf_pmu_add() during the transaction.
 484  */
 485 static void cpumf_pmu_cancel_txn(struct pmu *pmu)
 486 {
 487         unsigned int txn_flags;
 488         struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
 489 
 490         WARN_ON_ONCE(!cpuhw->txn_flags);        /* no txn in flight */
 491 
 492         txn_flags = cpuhw->txn_flags;
 493         cpuhw->txn_flags = 0;
 494         if (txn_flags & ~PERF_PMU_TXN_ADD)
 495                 return;
 496 
 497         WARN_ON(cpuhw->tx_state != cpuhw->state);
 498 
 499         perf_pmu_enable(pmu);
 500 }
 501 
 502 /*
 503  * Commit the group events scheduling transaction.  On success, the
 504  * transaction is closed.   On error, the transaction is kept open
 505  * until cpumf_pmu_cancel_txn() is called.
 506  */
 507 static int cpumf_pmu_commit_txn(struct pmu *pmu)
 508 {
 509         struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
 510         u64 state;
 511 
 512         WARN_ON_ONCE(!cpuhw->txn_flags);        /* no txn in flight */
 513 
 514         if (cpuhw->txn_flags & ~PERF_PMU_TXN_ADD) {
 515                 cpuhw->txn_flags = 0;
 516                 return 0;
 517         }
 518 
 519         /* check if the updated state can be scheduled */
 520         state = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
 521         state >>= CPUMF_LCCTL_ENABLE_SHIFT;
 522         if ((state & cpuhw->info.auth_ctl) != state)
 523                 return -ENOENT;
 524 
 525         cpuhw->txn_flags = 0;
 526         perf_pmu_enable(pmu);
 527         return 0;
 528 }
 529 
 530 /* Performance monitoring unit for s390x */
 531 static struct pmu cpumf_pmu = {
 532         .task_ctx_nr  = perf_sw_context,
 533         .capabilities = PERF_PMU_CAP_NO_INTERRUPT,
 534         .pmu_enable   = cpumf_pmu_enable,
 535         .pmu_disable  = cpumf_pmu_disable,
 536         .event_init   = cpumf_pmu_event_init,
 537         .add          = cpumf_pmu_add,
 538         .del          = cpumf_pmu_del,
 539         .start        = cpumf_pmu_start,
 540         .stop         = cpumf_pmu_stop,
 541         .read         = cpumf_pmu_read,
 542         .start_txn    = cpumf_pmu_start_txn,
 543         .commit_txn   = cpumf_pmu_commit_txn,
 544         .cancel_txn   = cpumf_pmu_cancel_txn,
 545 };
 546 
 547 static int __init cpumf_pmu_init(void)
 548 {
 549         int rc;
 550 
 551         if (!kernel_cpumcf_avail())
 552                 return -ENODEV;
 553 
 554         cpumf_pmu.attr_groups = cpumf_cf_event_group();
 555         rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1);
 556         if (rc)
 557                 pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
 558         return rc;
 559 }
 560 subsys_initcall(cpumf_pmu_init);

/* [<][>][^][v][top][bottom][index][help] */