1/* 2 * This file implements the perfmon-2 subsystem which is used 3 * to program the IA-64 Performance Monitoring Unit (PMU). 4 * 5 * The initial version of perfmon.c was written by 6 * Ganesh Venkitachalam, IBM Corp. 7 * 8 * Then it was modified for perfmon-1.x by Stephane Eranian and 9 * David Mosberger, Hewlett Packard Co. 10 * 11 * Version Perfmon-2.x is a rewrite of perfmon-1.x 12 * by Stephane Eranian, Hewlett Packard Co. 13 * 14 * Copyright (C) 1999-2005 Hewlett Packard Co 15 * Stephane Eranian <eranian@hpl.hp.com> 16 * David Mosberger-Tang <davidm@hpl.hp.com> 17 * 18 * More information about perfmon available at: 19 * http://www.hpl.hp.com/research/linux/perfmon 20 */ 21 22#include <linux/module.h> 23#include <linux/kernel.h> 24#include <linux/sched.h> 25#include <linux/interrupt.h> 26#include <linux/proc_fs.h> 27#include <linux/seq_file.h> 28#include <linux/init.h> 29#include <linux/vmalloc.h> 30#include <linux/mm.h> 31#include <linux/sysctl.h> 32#include <linux/list.h> 33#include <linux/file.h> 34#include <linux/poll.h> 35#include <linux/vfs.h> 36#include <linux/smp.h> 37#include <linux/pagemap.h> 38#include <linux/mount.h> 39#include <linux/bitops.h> 40#include <linux/capability.h> 41#include <linux/rcupdate.h> 42#include <linux/completion.h> 43#include <linux/tracehook.h> 44#include <linux/slab.h> 45#include <linux/cpu.h> 46 47#include <asm/errno.h> 48#include <asm/intrinsics.h> 49#include <asm/page.h> 50#include <asm/perfmon.h> 51#include <asm/processor.h> 52#include <asm/signal.h> 53#include <asm/uaccess.h> 54#include <asm/delay.h> 55 56#ifdef CONFIG_PERFMON 57/* 58 * perfmon context state 59 */ 60#define PFM_CTX_UNLOADED 1 /* context is not loaded onto any task */ 61#define PFM_CTX_LOADED 2 /* context is loaded onto a task */ 62#define PFM_CTX_MASKED 3 /* context is loaded but monitoring is masked due to overflow */ 63#define PFM_CTX_ZOMBIE 4 /* owner of the context is closing it */ 64 65#define PFM_INVALID_ACTIVATION (~0UL) 66 67#define PFM_NUM_PMC_REGS 64 /* PMC save area for ctxsw */ 68#define PFM_NUM_PMD_REGS 64 /* PMD save area for ctxsw */ 69 70/* 71 * depth of message queue 72 */ 73#define PFM_MAX_MSGS 32 74#define PFM_CTXQ_EMPTY(g) ((g)->ctx_msgq_head == (g)->ctx_msgq_tail) 75 76/* 77 * type of a PMU register (bitmask). 78 * bitmask structure: 79 * bit0 : register implemented 80 * bit1 : end marker 81 * bit2-3 : reserved 82 * bit4 : pmc has pmc.pm 83 * bit5 : pmc controls a counter (has pmc.oi), pmd is used as counter 84 * bit6-7 : register type 85 * bit8-31: reserved 86 */ 87#define PFM_REG_NOTIMPL 0x0 /* not implemented at all */ 88#define PFM_REG_IMPL 0x1 /* register implemented */ 89#define PFM_REG_END 0x2 /* end marker */ 90#define PFM_REG_MONITOR (0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */ 91#define PFM_REG_COUNTING (0x2<<4|PFM_REG_MONITOR) /* a monitor + pmc.oi+ PMD used as a counter */ 92#define PFM_REG_CONTROL (0x4<<4|PFM_REG_IMPL) /* PMU control register */ 93#define PFM_REG_CONFIG (0x8<<4|PFM_REG_IMPL) /* configuration register */ 94#define PFM_REG_BUFFER (0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */ 95 96#define PMC_IS_LAST(i) (pmu_conf->pmc_desc[i].type & PFM_REG_END) 97#define PMD_IS_LAST(i) (pmu_conf->pmd_desc[i].type & PFM_REG_END) 98 99#define PMC_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY) 100 101/* i assumed unsigned */ 102#define PMC_IS_IMPL(i) (i< PMU_MAX_PMCS && (pmu_conf->pmc_desc[i].type & PFM_REG_IMPL)) 103#define PMD_IS_IMPL(i) (i< PMU_MAX_PMDS && (pmu_conf->pmd_desc[i].type & PFM_REG_IMPL)) 104 105/* XXX: these assume that register i is implemented */ 106#define PMD_IS_COUNTING(i) ((pmu_conf->pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) 107#define PMC_IS_COUNTING(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) 108#define PMC_IS_MONITOR(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_MONITOR) == PFM_REG_MONITOR) 109#define PMC_IS_CONTROL(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_CONTROL) == PFM_REG_CONTROL) 110 111#define PMC_DFL_VAL(i) pmu_conf->pmc_desc[i].default_value 112#define PMC_RSVD_MASK(i) pmu_conf->pmc_desc[i].reserved_mask 113#define PMD_PMD_DEP(i) pmu_conf->pmd_desc[i].dep_pmd[0] 114#define PMC_PMD_DEP(i) pmu_conf->pmc_desc[i].dep_pmd[0] 115 116#define PFM_NUM_IBRS IA64_NUM_DBG_REGS 117#define PFM_NUM_DBRS IA64_NUM_DBG_REGS 118 119#define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_block == 0) 120#define CTX_HAS_SMPL(c) ((c)->ctx_fl_is_sampling) 121#define PFM_CTX_TASK(h) (h)->ctx_task 122 123#define PMU_PMC_OI 5 /* position of pmc.oi bit */ 124 125/* XXX: does not support more than 64 PMDs */ 126#define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask) 127#define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL) 128 129#define CTX_USED_MONITOR(ctx, mask) (ctx)->ctx_used_monitors[0] |= (mask) 130 131#define CTX_USED_IBR(ctx,n) (ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64) 132#define CTX_USED_DBR(ctx,n) (ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64) 133#define CTX_USES_DBREGS(ctx) (((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1) 134#define PFM_CODE_RR 0 /* requesting code range restriction */ 135#define PFM_DATA_RR 1 /* requestion data range restriction */ 136 137#define PFM_CPUINFO_CLEAR(v) pfm_get_cpu_var(pfm_syst_info) &= ~(v) 138#define PFM_CPUINFO_SET(v) pfm_get_cpu_var(pfm_syst_info) |= (v) 139#define PFM_CPUINFO_GET() pfm_get_cpu_var(pfm_syst_info) 140 141#define RDEP(x) (1UL<<(x)) 142 143/* 144 * context protection macros 145 * in SMP: 146 * - we need to protect against CPU concurrency (spin_lock) 147 * - we need to protect against PMU overflow interrupts (local_irq_disable) 148 * in UP: 149 * - we need to protect against PMU overflow interrupts (local_irq_disable) 150 * 151 * spin_lock_irqsave()/spin_unlock_irqrestore(): 152 * in SMP: local_irq_disable + spin_lock 153 * in UP : local_irq_disable 154 * 155 * spin_lock()/spin_lock(): 156 * in UP : removed automatically 157 * in SMP: protect against context accesses from other CPU. interrupts 158 * are not masked. This is useful for the PMU interrupt handler 159 * because we know we will not get PMU concurrency in that code. 160 */ 161#define PROTECT_CTX(c, f) \ 162 do { \ 163 DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, task_pid_nr(current))); \ 164 spin_lock_irqsave(&(c)->ctx_lock, f); \ 165 DPRINT(("spinlocked ctx %p by [%d]\n", c, task_pid_nr(current))); \ 166 } while(0) 167 168#define UNPROTECT_CTX(c, f) \ 169 do { \ 170 DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, task_pid_nr(current))); \ 171 spin_unlock_irqrestore(&(c)->ctx_lock, f); \ 172 } while(0) 173 174#define PROTECT_CTX_NOPRINT(c, f) \ 175 do { \ 176 spin_lock_irqsave(&(c)->ctx_lock, f); \ 177 } while(0) 178 179 180#define UNPROTECT_CTX_NOPRINT(c, f) \ 181 do { \ 182 spin_unlock_irqrestore(&(c)->ctx_lock, f); \ 183 } while(0) 184 185 186#define PROTECT_CTX_NOIRQ(c) \ 187 do { \ 188 spin_lock(&(c)->ctx_lock); \ 189 } while(0) 190 191#define UNPROTECT_CTX_NOIRQ(c) \ 192 do { \ 193 spin_unlock(&(c)->ctx_lock); \ 194 } while(0) 195 196 197#ifdef CONFIG_SMP 198 199#define GET_ACTIVATION() pfm_get_cpu_var(pmu_activation_number) 200#define INC_ACTIVATION() pfm_get_cpu_var(pmu_activation_number)++ 201#define SET_ACTIVATION(c) (c)->ctx_last_activation = GET_ACTIVATION() 202 203#else /* !CONFIG_SMP */ 204#define SET_ACTIVATION(t) do {} while(0) 205#define GET_ACTIVATION(t) do {} while(0) 206#define INC_ACTIVATION(t) do {} while(0) 207#endif /* CONFIG_SMP */ 208 209#define SET_PMU_OWNER(t, c) do { pfm_get_cpu_var(pmu_owner) = (t); pfm_get_cpu_var(pmu_ctx) = (c); } while(0) 210#define GET_PMU_OWNER() pfm_get_cpu_var(pmu_owner) 211#define GET_PMU_CTX() pfm_get_cpu_var(pmu_ctx) 212 213#define LOCK_PFS(g) spin_lock_irqsave(&pfm_sessions.pfs_lock, g) 214#define UNLOCK_PFS(g) spin_unlock_irqrestore(&pfm_sessions.pfs_lock, g) 215 216#define PFM_REG_RETFLAG_SET(flags, val) do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0) 217 218/* 219 * cmp0 must be the value of pmc0 220 */ 221#define PMC0_HAS_OVFL(cmp0) (cmp0 & ~0x1UL) 222 223#define PFMFS_MAGIC 0xa0b4d889 224 225/* 226 * debugging 227 */ 228#define PFM_DEBUGGING 1 229#ifdef PFM_DEBUGGING 230#define DPRINT(a) \ 231 do { \ 232 if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __func__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \ 233 } while (0) 234 235#define DPRINT_ovfl(a) \ 236 do { \ 237 if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __func__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \ 238 } while (0) 239#endif 240 241/* 242 * 64-bit software counter structure 243 * 244 * the next_reset_type is applied to the next call to pfm_reset_regs() 245 */ 246typedef struct { 247 unsigned long val; /* virtual 64bit counter value */ 248 unsigned long lval; /* last reset value */ 249 unsigned long long_reset; /* reset value on sampling overflow */ 250 unsigned long short_reset; /* reset value on overflow */ 251 unsigned long reset_pmds[4]; /* which other pmds to reset when this counter overflows */ 252 unsigned long smpl_pmds[4]; /* which pmds are accessed when counter overflow */ 253 unsigned long seed; /* seed for random-number generator */ 254 unsigned long mask; /* mask for random-number generator */ 255 unsigned int flags; /* notify/do not notify */ 256 unsigned long eventid; /* overflow event identifier */ 257} pfm_counter_t; 258 259/* 260 * context flags 261 */ 262typedef struct { 263 unsigned int block:1; /* when 1, task will blocked on user notifications */ 264 unsigned int system:1; /* do system wide monitoring */ 265 unsigned int using_dbreg:1; /* using range restrictions (debug registers) */ 266 unsigned int is_sampling:1; /* true if using a custom format */ 267 unsigned int excl_idle:1; /* exclude idle task in system wide session */ 268 unsigned int going_zombie:1; /* context is zombie (MASKED+blocking) */ 269 unsigned int trap_reason:2; /* reason for going into pfm_handle_work() */ 270 unsigned int no_msg:1; /* no message sent on overflow */ 271 unsigned int can_restart:1; /* allowed to issue a PFM_RESTART */ 272 unsigned int reserved:22; 273} pfm_context_flags_t; 274 275#define PFM_TRAP_REASON_NONE 0x0 /* default value */ 276#define PFM_TRAP_REASON_BLOCK 0x1 /* we need to block on overflow */ 277#define PFM_TRAP_REASON_RESET 0x2 /* we need to reset PMDs */ 278 279 280/* 281 * perfmon context: encapsulates all the state of a monitoring session 282 */ 283 284typedef struct pfm_context { 285 spinlock_t ctx_lock; /* context protection */ 286 287 pfm_context_flags_t ctx_flags; /* bitmask of flags (block reason incl.) */ 288 unsigned int ctx_state; /* state: active/inactive (no bitfield) */ 289 290 struct task_struct *ctx_task; /* task to which context is attached */ 291 292 unsigned long ctx_ovfl_regs[4]; /* which registers overflowed (notification) */ 293 294 struct completion ctx_restart_done; /* use for blocking notification mode */ 295 296 unsigned long ctx_used_pmds[4]; /* bitmask of PMD used */ 297 unsigned long ctx_all_pmds[4]; /* bitmask of all accessible PMDs */ 298 unsigned long ctx_reload_pmds[4]; /* bitmask of force reload PMD on ctxsw in */ 299 300 unsigned long ctx_all_pmcs[4]; /* bitmask of all accessible PMCs */ 301 unsigned long ctx_reload_pmcs[4]; /* bitmask of force reload PMC on ctxsw in */ 302 unsigned long ctx_used_monitors[4]; /* bitmask of monitor PMC being used */ 303 304 unsigned long ctx_pmcs[PFM_NUM_PMC_REGS]; /* saved copies of PMC values */ 305 306 unsigned int ctx_used_ibrs[1]; /* bitmask of used IBR (speedup ctxsw in) */ 307 unsigned int ctx_used_dbrs[1]; /* bitmask of used DBR (speedup ctxsw in) */ 308 unsigned long ctx_dbrs[IA64_NUM_DBG_REGS]; /* DBR values (cache) when not loaded */ 309 unsigned long ctx_ibrs[IA64_NUM_DBG_REGS]; /* IBR values (cache) when not loaded */ 310 311 pfm_counter_t ctx_pmds[PFM_NUM_PMD_REGS]; /* software state for PMDS */ 312 313 unsigned long th_pmcs[PFM_NUM_PMC_REGS]; /* PMC thread save state */ 314 unsigned long th_pmds[PFM_NUM_PMD_REGS]; /* PMD thread save state */ 315 316 unsigned long ctx_saved_psr_up; /* only contains psr.up value */ 317 318 unsigned long ctx_last_activation; /* context last activation number for last_cpu */ 319 unsigned int ctx_last_cpu; /* CPU id of current or last CPU used (SMP only) */ 320 unsigned int ctx_cpu; /* cpu to which perfmon is applied (system wide) */ 321 322 int ctx_fd; /* file descriptor used my this context */ 323 pfm_ovfl_arg_t ctx_ovfl_arg; /* argument to custom buffer format handler */ 324 325 pfm_buffer_fmt_t *ctx_buf_fmt; /* buffer format callbacks */ 326 void *ctx_smpl_hdr; /* points to sampling buffer header kernel vaddr */ 327 unsigned long ctx_smpl_size; /* size of sampling buffer */ 328 void *ctx_smpl_vaddr; /* user level virtual address of smpl buffer */ 329 330 wait_queue_head_t ctx_msgq_wait; 331 pfm_msg_t ctx_msgq[PFM_MAX_MSGS]; 332 int ctx_msgq_head; 333 int ctx_msgq_tail; 334 struct fasync_struct *ctx_async_queue; 335 336 wait_queue_head_t ctx_zombieq; /* termination cleanup wait queue */ 337} pfm_context_t; 338 339/* 340 * magic number used to verify that structure is really 341 * a perfmon context 342 */ 343#define PFM_IS_FILE(f) ((f)->f_op == &pfm_file_ops) 344 345#define PFM_GET_CTX(t) ((pfm_context_t *)(t)->thread.pfm_context) 346 347#ifdef CONFIG_SMP 348#define SET_LAST_CPU(ctx, v) (ctx)->ctx_last_cpu = (v) 349#define GET_LAST_CPU(ctx) (ctx)->ctx_last_cpu 350#else 351#define SET_LAST_CPU(ctx, v) do {} while(0) 352#define GET_LAST_CPU(ctx) do {} while(0) 353#endif 354 355 356#define ctx_fl_block ctx_flags.block 357#define ctx_fl_system ctx_flags.system 358#define ctx_fl_using_dbreg ctx_flags.using_dbreg 359#define ctx_fl_is_sampling ctx_flags.is_sampling 360#define ctx_fl_excl_idle ctx_flags.excl_idle 361#define ctx_fl_going_zombie ctx_flags.going_zombie 362#define ctx_fl_trap_reason ctx_flags.trap_reason 363#define ctx_fl_no_msg ctx_flags.no_msg 364#define ctx_fl_can_restart ctx_flags.can_restart 365 366#define PFM_SET_WORK_PENDING(t, v) do { (t)->thread.pfm_needs_checking = v; } while(0); 367#define PFM_GET_WORK_PENDING(t) (t)->thread.pfm_needs_checking 368 369/* 370 * global information about all sessions 371 * mostly used to synchronize between system wide and per-process 372 */ 373typedef struct { 374 spinlock_t pfs_lock; /* lock the structure */ 375 376 unsigned int pfs_task_sessions; /* number of per task sessions */ 377 unsigned int pfs_sys_sessions; /* number of per system wide sessions */ 378 unsigned int pfs_sys_use_dbregs; /* incremented when a system wide session uses debug regs */ 379 unsigned int pfs_ptrace_use_dbregs; /* incremented when a process uses debug regs */ 380 struct task_struct *pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */ 381} pfm_session_t; 382 383/* 384 * information about a PMC or PMD. 385 * dep_pmd[]: a bitmask of dependent PMD registers 386 * dep_pmc[]: a bitmask of dependent PMC registers 387 */ 388typedef int (*pfm_reg_check_t)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); 389typedef struct { 390 unsigned int type; 391 int pm_pos; 392 unsigned long default_value; /* power-on default value */ 393 unsigned long reserved_mask; /* bitmask of reserved bits */ 394 pfm_reg_check_t read_check; 395 pfm_reg_check_t write_check; 396 unsigned long dep_pmd[4]; 397 unsigned long dep_pmc[4]; 398} pfm_reg_desc_t; 399 400/* assume cnum is a valid monitor */ 401#define PMC_PM(cnum, val) (((val) >> (pmu_conf->pmc_desc[cnum].pm_pos)) & 0x1) 402 403/* 404 * This structure is initialized at boot time and contains 405 * a description of the PMU main characteristics. 406 * 407 * If the probe function is defined, detection is based 408 * on its return value: 409 * - 0 means recognized PMU 410 * - anything else means not supported 411 * When the probe function is not defined, then the pmu_family field 412 * is used and it must match the host CPU family such that: 413 * - cpu->family & config->pmu_family != 0 414 */ 415typedef struct { 416 unsigned long ovfl_val; /* overflow value for counters */ 417 418 pfm_reg_desc_t *pmc_desc; /* detailed PMC register dependencies descriptions */ 419 pfm_reg_desc_t *pmd_desc; /* detailed PMD register dependencies descriptions */ 420 421 unsigned int num_pmcs; /* number of PMCS: computed at init time */ 422 unsigned int num_pmds; /* number of PMDS: computed at init time */ 423 unsigned long impl_pmcs[4]; /* bitmask of implemented PMCS */ 424 unsigned long impl_pmds[4]; /* bitmask of implemented PMDS */ 425 426 char *pmu_name; /* PMU family name */ 427 unsigned int pmu_family; /* cpuid family pattern used to identify pmu */ 428 unsigned int flags; /* pmu specific flags */ 429 unsigned int num_ibrs; /* number of IBRS: computed at init time */ 430 unsigned int num_dbrs; /* number of DBRS: computed at init time */ 431 unsigned int num_counters; /* PMC/PMD counting pairs : computed at init time */ 432 int (*probe)(void); /* customized probe routine */ 433 unsigned int use_rr_dbregs:1; /* set if debug registers used for range restriction */ 434} pmu_config_t; 435/* 436 * PMU specific flags 437 */ 438#define PFM_PMU_IRQ_RESEND 1 /* PMU needs explicit IRQ resend */ 439 440/* 441 * debug register related type definitions 442 */ 443typedef struct { 444 unsigned long ibr_mask:56; 445 unsigned long ibr_plm:4; 446 unsigned long ibr_ig:3; 447 unsigned long ibr_x:1; 448} ibr_mask_reg_t; 449 450typedef struct { 451 unsigned long dbr_mask:56; 452 unsigned long dbr_plm:4; 453 unsigned long dbr_ig:2; 454 unsigned long dbr_w:1; 455 unsigned long dbr_r:1; 456} dbr_mask_reg_t; 457 458typedef union { 459 unsigned long val; 460 ibr_mask_reg_t ibr; 461 dbr_mask_reg_t dbr; 462} dbreg_t; 463 464 465/* 466 * perfmon command descriptions 467 */ 468typedef struct { 469 int (*cmd_func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); 470 char *cmd_name; 471 int cmd_flags; 472 unsigned int cmd_narg; 473 size_t cmd_argsize; 474 int (*cmd_getsize)(void *arg, size_t *sz); 475} pfm_cmd_desc_t; 476 477#define PFM_CMD_FD 0x01 /* command requires a file descriptor */ 478#define PFM_CMD_ARG_READ 0x02 /* command must read argument(s) */ 479#define PFM_CMD_ARG_RW 0x04 /* command must read/write argument(s) */ 480#define PFM_CMD_STOP 0x08 /* command does not work on zombie context */ 481 482 483#define PFM_CMD_NAME(cmd) pfm_cmd_tab[(cmd)].cmd_name 484#define PFM_CMD_READ_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_READ) 485#define PFM_CMD_RW_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_RW) 486#define PFM_CMD_USE_FD(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_FD) 487#define PFM_CMD_STOPPED(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_STOP) 488 489#define PFM_CMD_ARG_MANY -1 /* cannot be zero */ 490 491typedef struct { 492 unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */ 493 unsigned long pfm_replay_ovfl_intr_count; /* keep track of replayed ovfl interrupts */ 494 unsigned long pfm_ovfl_intr_count; /* keep track of ovfl interrupts */ 495 unsigned long pfm_ovfl_intr_cycles; /* cycles spent processing ovfl interrupts */ 496 unsigned long pfm_ovfl_intr_cycles_min; /* min cycles spent processing ovfl interrupts */ 497 unsigned long pfm_ovfl_intr_cycles_max; /* max cycles spent processing ovfl interrupts */ 498 unsigned long pfm_smpl_handler_calls; 499 unsigned long pfm_smpl_handler_cycles; 500 char pad[SMP_CACHE_BYTES] ____cacheline_aligned; 501} pfm_stats_t; 502 503/* 504 * perfmon internal variables 505 */ 506static pfm_stats_t pfm_stats[NR_CPUS]; 507static pfm_session_t pfm_sessions; /* global sessions information */ 508 509static DEFINE_SPINLOCK(pfm_alt_install_check); 510static pfm_intr_handler_desc_t *pfm_alt_intr_handler; 511 512static struct proc_dir_entry *perfmon_dir; 513static pfm_uuid_t pfm_null_uuid = {0,}; 514 515static spinlock_t pfm_buffer_fmt_lock; 516static LIST_HEAD(pfm_buffer_fmt_list); 517 518static pmu_config_t *pmu_conf; 519 520/* sysctl() controls */ 521pfm_sysctl_t pfm_sysctl; 522EXPORT_SYMBOL(pfm_sysctl); 523 524static struct ctl_table pfm_ctl_table[] = { 525 { 526 .procname = "debug", 527 .data = &pfm_sysctl.debug, 528 .maxlen = sizeof(int), 529 .mode = 0666, 530 .proc_handler = proc_dointvec, 531 }, 532 { 533 .procname = "debug_ovfl", 534 .data = &pfm_sysctl.debug_ovfl, 535 .maxlen = sizeof(int), 536 .mode = 0666, 537 .proc_handler = proc_dointvec, 538 }, 539 { 540 .procname = "fastctxsw", 541 .data = &pfm_sysctl.fastctxsw, 542 .maxlen = sizeof(int), 543 .mode = 0600, 544 .proc_handler = proc_dointvec, 545 }, 546 { 547 .procname = "expert_mode", 548 .data = &pfm_sysctl.expert_mode, 549 .maxlen = sizeof(int), 550 .mode = 0600, 551 .proc_handler = proc_dointvec, 552 }, 553 {} 554}; 555static struct ctl_table pfm_sysctl_dir[] = { 556 { 557 .procname = "perfmon", 558 .mode = 0555, 559 .child = pfm_ctl_table, 560 }, 561 {} 562}; 563static struct ctl_table pfm_sysctl_root[] = { 564 { 565 .procname = "kernel", 566 .mode = 0555, 567 .child = pfm_sysctl_dir, 568 }, 569 {} 570}; 571static struct ctl_table_header *pfm_sysctl_header; 572 573static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); 574 575#define pfm_get_cpu_var(v) __ia64_per_cpu_var(v) 576#define pfm_get_cpu_data(a,b) per_cpu(a, b) 577 578static inline void 579pfm_put_task(struct task_struct *task) 580{ 581 if (task != current) put_task_struct(task); 582} 583 584static inline void 585pfm_reserve_page(unsigned long a) 586{ 587 SetPageReserved(vmalloc_to_page((void *)a)); 588} 589static inline void 590pfm_unreserve_page(unsigned long a) 591{ 592 ClearPageReserved(vmalloc_to_page((void*)a)); 593} 594 595static inline unsigned long 596pfm_protect_ctx_ctxsw(pfm_context_t *x) 597{ 598 spin_lock(&(x)->ctx_lock); 599 return 0UL; 600} 601 602static inline void 603pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f) 604{ 605 spin_unlock(&(x)->ctx_lock); 606} 607 608/* forward declaration */ 609static const struct dentry_operations pfmfs_dentry_operations; 610 611static struct dentry * 612pfmfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) 613{ 614 return mount_pseudo(fs_type, "pfm:", NULL, &pfmfs_dentry_operations, 615 PFMFS_MAGIC); 616} 617 618static struct file_system_type pfm_fs_type = { 619 .name = "pfmfs", 620 .mount = pfmfs_mount, 621 .kill_sb = kill_anon_super, 622}; 623MODULE_ALIAS_FS("pfmfs"); 624 625DEFINE_PER_CPU(unsigned long, pfm_syst_info); 626DEFINE_PER_CPU(struct task_struct *, pmu_owner); 627DEFINE_PER_CPU(pfm_context_t *, pmu_ctx); 628DEFINE_PER_CPU(unsigned long, pmu_activation_number); 629EXPORT_PER_CPU_SYMBOL_GPL(pfm_syst_info); 630 631 632/* forward declaration */ 633static const struct file_operations pfm_file_ops; 634 635/* 636 * forward declarations 637 */ 638#ifndef CONFIG_SMP 639static void pfm_lazy_save_regs (struct task_struct *ta); 640#endif 641 642void dump_pmu_state(const char *); 643static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); 644 645#include "perfmon_itanium.h" 646#include "perfmon_mckinley.h" 647#include "perfmon_montecito.h" 648#include "perfmon_generic.h" 649 650static pmu_config_t *pmu_confs[]={ 651 &pmu_conf_mont, 652 &pmu_conf_mck, 653 &pmu_conf_ita, 654 &pmu_conf_gen, /* must be last */ 655 NULL 656}; 657 658 659static int pfm_end_notify_user(pfm_context_t *ctx); 660 661static inline void 662pfm_clear_psr_pp(void) 663{ 664 ia64_rsm(IA64_PSR_PP); 665 ia64_srlz_i(); 666} 667 668static inline void 669pfm_set_psr_pp(void) 670{ 671 ia64_ssm(IA64_PSR_PP); 672 ia64_srlz_i(); 673} 674 675static inline void 676pfm_clear_psr_up(void) 677{ 678 ia64_rsm(IA64_PSR_UP); 679 ia64_srlz_i(); 680} 681 682static inline void 683pfm_set_psr_up(void) 684{ 685 ia64_ssm(IA64_PSR_UP); 686 ia64_srlz_i(); 687} 688 689static inline unsigned long 690pfm_get_psr(void) 691{ 692 unsigned long tmp; 693 tmp = ia64_getreg(_IA64_REG_PSR); 694 ia64_srlz_i(); 695 return tmp; 696} 697 698static inline void 699pfm_set_psr_l(unsigned long val) 700{ 701 ia64_setreg(_IA64_REG_PSR_L, val); 702 ia64_srlz_i(); 703} 704 705static inline void 706pfm_freeze_pmu(void) 707{ 708 ia64_set_pmc(0,1UL); 709 ia64_srlz_d(); 710} 711 712static inline void 713pfm_unfreeze_pmu(void) 714{ 715 ia64_set_pmc(0,0UL); 716 ia64_srlz_d(); 717} 718 719static inline void 720pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs) 721{ 722 int i; 723 724 for (i=0; i < nibrs; i++) { 725 ia64_set_ibr(i, ibrs[i]); 726 ia64_dv_serialize_instruction(); 727 } 728 ia64_srlz_i(); 729} 730 731static inline void 732pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs) 733{ 734 int i; 735 736 for (i=0; i < ndbrs; i++) { 737 ia64_set_dbr(i, dbrs[i]); 738 ia64_dv_serialize_data(); 739 } 740 ia64_srlz_d(); 741} 742 743/* 744 * PMD[i] must be a counter. no check is made 745 */ 746static inline unsigned long 747pfm_read_soft_counter(pfm_context_t *ctx, int i) 748{ 749 return ctx->ctx_pmds[i].val + (ia64_get_pmd(i) & pmu_conf->ovfl_val); 750} 751 752/* 753 * PMD[i] must be a counter. no check is made 754 */ 755static inline void 756pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val) 757{ 758 unsigned long ovfl_val = pmu_conf->ovfl_val; 759 760 ctx->ctx_pmds[i].val = val & ~ovfl_val; 761 /* 762 * writing to unimplemented part is ignore, so we do not need to 763 * mask off top part 764 */ 765 ia64_set_pmd(i, val & ovfl_val); 766} 767 768static pfm_msg_t * 769pfm_get_new_msg(pfm_context_t *ctx) 770{ 771 int idx, next; 772 773 next = (ctx->ctx_msgq_tail+1) % PFM_MAX_MSGS; 774 775 DPRINT(("ctx_fd=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); 776 if (next == ctx->ctx_msgq_head) return NULL; 777 778 idx = ctx->ctx_msgq_tail; 779 ctx->ctx_msgq_tail = next; 780 781 DPRINT(("ctx=%p head=%d tail=%d msg=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, idx)); 782 783 return ctx->ctx_msgq+idx; 784} 785 786static pfm_msg_t * 787pfm_get_next_msg(pfm_context_t *ctx) 788{ 789 pfm_msg_t *msg; 790 791 DPRINT(("ctx=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); 792 793 if (PFM_CTXQ_EMPTY(ctx)) return NULL; 794 795 /* 796 * get oldest message 797 */ 798 msg = ctx->ctx_msgq+ctx->ctx_msgq_head; 799 800 /* 801 * and move forward 802 */ 803 ctx->ctx_msgq_head = (ctx->ctx_msgq_head+1) % PFM_MAX_MSGS; 804 805 DPRINT(("ctx=%p head=%d tail=%d type=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, msg->pfm_gen_msg.msg_type)); 806 807 return msg; 808} 809 810static void 811pfm_reset_msgq(pfm_context_t *ctx) 812{ 813 ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0; 814 DPRINT(("ctx=%p msgq reset\n", ctx)); 815} 816 817static void * 818pfm_rvmalloc(unsigned long size) 819{ 820 void *mem; 821 unsigned long addr; 822 823 size = PAGE_ALIGN(size); 824 mem = vzalloc(size); 825 if (mem) { 826 //printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem); 827 addr = (unsigned long)mem; 828 while (size > 0) { 829 pfm_reserve_page(addr); 830 addr+=PAGE_SIZE; 831 size-=PAGE_SIZE; 832 } 833 } 834 return mem; 835} 836 837static void 838pfm_rvfree(void *mem, unsigned long size) 839{ 840 unsigned long addr; 841 842 if (mem) { 843 DPRINT(("freeing physical buffer @%p size=%lu\n", mem, size)); 844 addr = (unsigned long) mem; 845 while ((long) size > 0) { 846 pfm_unreserve_page(addr); 847 addr+=PAGE_SIZE; 848 size-=PAGE_SIZE; 849 } 850 vfree(mem); 851 } 852 return; 853} 854 855static pfm_context_t * 856pfm_context_alloc(int ctx_flags) 857{ 858 pfm_context_t *ctx; 859 860 /* 861 * allocate context descriptor 862 * must be able to free with interrupts disabled 863 */ 864 ctx = kzalloc(sizeof(pfm_context_t), GFP_KERNEL); 865 if (ctx) { 866 DPRINT(("alloc ctx @%p\n", ctx)); 867 868 /* 869 * init context protection lock 870 */ 871 spin_lock_init(&ctx->ctx_lock); 872 873 /* 874 * context is unloaded 875 */ 876 ctx->ctx_state = PFM_CTX_UNLOADED; 877 878 /* 879 * initialization of context's flags 880 */ 881 ctx->ctx_fl_block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0; 882 ctx->ctx_fl_system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0; 883 ctx->ctx_fl_no_msg = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0; 884 /* 885 * will move to set properties 886 * ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0; 887 */ 888 889 /* 890 * init restart semaphore to locked 891 */ 892 init_completion(&ctx->ctx_restart_done); 893 894 /* 895 * activation is used in SMP only 896 */ 897 ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; 898 SET_LAST_CPU(ctx, -1); 899 900 /* 901 * initialize notification message queue 902 */ 903 ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0; 904 init_waitqueue_head(&ctx->ctx_msgq_wait); 905 init_waitqueue_head(&ctx->ctx_zombieq); 906 907 } 908 return ctx; 909} 910 911static void 912pfm_context_free(pfm_context_t *ctx) 913{ 914 if (ctx) { 915 DPRINT(("free ctx @%p\n", ctx)); 916 kfree(ctx); 917 } 918} 919 920static void 921pfm_mask_monitoring(struct task_struct *task) 922{ 923 pfm_context_t *ctx = PFM_GET_CTX(task); 924 unsigned long mask, val, ovfl_mask; 925 int i; 926 927 DPRINT_ovfl(("masking monitoring for [%d]\n", task_pid_nr(task))); 928 929 ovfl_mask = pmu_conf->ovfl_val; 930 /* 931 * monitoring can only be masked as a result of a valid 932 * counter overflow. In UP, it means that the PMU still 933 * has an owner. Note that the owner can be different 934 * from the current task. However the PMU state belongs 935 * to the owner. 936 * In SMP, a valid overflow only happens when task is 937 * current. Therefore if we come here, we know that 938 * the PMU state belongs to the current task, therefore 939 * we can access the live registers. 940 * 941 * So in both cases, the live register contains the owner's 942 * state. We can ONLY touch the PMU registers and NOT the PSR. 943 * 944 * As a consequence to this call, the ctx->th_pmds[] array 945 * contains stale information which must be ignored 946 * when context is reloaded AND monitoring is active (see 947 * pfm_restart). 948 */ 949 mask = ctx->ctx_used_pmds[0]; 950 for (i = 0; mask; i++, mask>>=1) { 951 /* skip non used pmds */ 952 if ((mask & 0x1) == 0) continue; 953 val = ia64_get_pmd(i); 954 955 if (PMD_IS_COUNTING(i)) { 956 /* 957 * we rebuild the full 64 bit value of the counter 958 */ 959 ctx->ctx_pmds[i].val += (val & ovfl_mask); 960 } else { 961 ctx->ctx_pmds[i].val = val; 962 } 963 DPRINT_ovfl(("pmd[%d]=0x%lx hw_pmd=0x%lx\n", 964 i, 965 ctx->ctx_pmds[i].val, 966 val & ovfl_mask)); 967 } 968 /* 969 * mask monitoring by setting the privilege level to 0 970 * we cannot use psr.pp/psr.up for this, it is controlled by 971 * the user 972 * 973 * if task is current, modify actual registers, otherwise modify 974 * thread save state, i.e., what will be restored in pfm_load_regs() 975 */ 976 mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; 977 for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { 978 if ((mask & 0x1) == 0UL) continue; 979 ia64_set_pmc(i, ctx->th_pmcs[i] & ~0xfUL); 980 ctx->th_pmcs[i] &= ~0xfUL; 981 DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i])); 982 } 983 /* 984 * make all of this visible 985 */ 986 ia64_srlz_d(); 987} 988 989/* 990 * must always be done with task == current 991 * 992 * context must be in MASKED state when calling 993 */ 994static void 995pfm_restore_monitoring(struct task_struct *task) 996{ 997 pfm_context_t *ctx = PFM_GET_CTX(task); 998 unsigned long mask, ovfl_mask; 999 unsigned long psr, val; 1000 int i, is_system; 1001 1002 is_system = ctx->ctx_fl_system; 1003 ovfl_mask = pmu_conf->ovfl_val; 1004 1005 if (task != current) { 1006 printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task_pid_nr(task), task_pid_nr(current)); 1007 return; 1008 } 1009 if (ctx->ctx_state != PFM_CTX_MASKED) { 1010 printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__, 1011 task_pid_nr(task), task_pid_nr(current), ctx->ctx_state); 1012 return; 1013 } 1014 psr = pfm_get_psr(); 1015 /* 1016 * monitoring is masked via the PMC. 1017 * As we restore their value, we do not want each counter to 1018 * restart right away. We stop monitoring using the PSR, 1019 * restore the PMC (and PMD) and then re-establish the psr 1020 * as it was. Note that there can be no pending overflow at 1021 * this point, because monitoring was MASKED. 1022 * 1023 * system-wide session are pinned and self-monitoring 1024 */ 1025 if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) { 1026 /* disable dcr pp */ 1027 ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP); 1028 pfm_clear_psr_pp(); 1029 } else { 1030 pfm_clear_psr_up(); 1031 } 1032 /* 1033 * first, we restore the PMD 1034 */ 1035 mask = ctx->ctx_used_pmds[0]; 1036 for (i = 0; mask; i++, mask>>=1) { 1037 /* skip non used pmds */ 1038 if ((mask & 0x1) == 0) continue; 1039 1040 if (PMD_IS_COUNTING(i)) { 1041 /* 1042 * we split the 64bit value according to 1043 * counter width 1044 */ 1045 val = ctx->ctx_pmds[i].val & ovfl_mask; 1046 ctx->ctx_pmds[i].val &= ~ovfl_mask; 1047 } else { 1048 val = ctx->ctx_pmds[i].val; 1049 } 1050 ia64_set_pmd(i, val); 1051 1052 DPRINT(("pmd[%d]=0x%lx hw_pmd=0x%lx\n", 1053 i, 1054 ctx->ctx_pmds[i].val, 1055 val)); 1056 } 1057 /* 1058 * restore the PMCs 1059 */ 1060 mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; 1061 for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { 1062 if ((mask & 0x1) == 0UL) continue; 1063 ctx->th_pmcs[i] = ctx->ctx_pmcs[i]; 1064 ia64_set_pmc(i, ctx->th_pmcs[i]); 1065 DPRINT(("[%d] pmc[%d]=0x%lx\n", 1066 task_pid_nr(task), i, ctx->th_pmcs[i])); 1067 } 1068 ia64_srlz_d(); 1069 1070 /* 1071 * must restore DBR/IBR because could be modified while masked 1072 * XXX: need to optimize 1073 */ 1074 if (ctx->ctx_fl_using_dbreg) { 1075 pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); 1076 pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); 1077 } 1078 1079 /* 1080 * now restore PSR 1081 */ 1082 if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) { 1083 /* enable dcr pp */ 1084 ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP); 1085 ia64_srlz_i(); 1086 } 1087 pfm_set_psr_l(psr); 1088} 1089 1090static inline void 1091pfm_save_pmds(unsigned long *pmds, unsigned long mask) 1092{ 1093 int i; 1094 1095 ia64_srlz_d(); 1096 1097 for (i=0; mask; i++, mask>>=1) { 1098 if (mask & 0x1) pmds[i] = ia64_get_pmd(i); 1099 } 1100} 1101 1102/* 1103 * reload from thread state (used for ctxw only) 1104 */ 1105static inline void 1106pfm_restore_pmds(unsigned long *pmds, unsigned long mask) 1107{ 1108 int i; 1109 unsigned long val, ovfl_val = pmu_conf->ovfl_val; 1110 1111 for (i=0; mask; i++, mask>>=1) { 1112 if ((mask & 0x1) == 0) continue; 1113 val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i]; 1114 ia64_set_pmd(i, val); 1115 } 1116 ia64_srlz_d(); 1117} 1118 1119/* 1120 * propagate PMD from context to thread-state 1121 */ 1122static inline void 1123pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx) 1124{ 1125 unsigned long ovfl_val = pmu_conf->ovfl_val; 1126 unsigned long mask = ctx->ctx_all_pmds[0]; 1127 unsigned long val; 1128 int i; 1129 1130 DPRINT(("mask=0x%lx\n", mask)); 1131 1132 for (i=0; mask; i++, mask>>=1) { 1133 1134 val = ctx->ctx_pmds[i].val; 1135 1136 /* 1137 * We break up the 64 bit value into 2 pieces 1138 * the lower bits go to the machine state in the 1139 * thread (will be reloaded on ctxsw in). 1140 * The upper part stays in the soft-counter. 1141 */ 1142 if (PMD_IS_COUNTING(i)) { 1143 ctx->ctx_pmds[i].val = val & ~ovfl_val; 1144 val &= ovfl_val; 1145 } 1146 ctx->th_pmds[i] = val; 1147 1148 DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n", 1149 i, 1150 ctx->th_pmds[i], 1151 ctx->ctx_pmds[i].val)); 1152 } 1153} 1154 1155/* 1156 * propagate PMC from context to thread-state 1157 */ 1158static inline void 1159pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx) 1160{ 1161 unsigned long mask = ctx->ctx_all_pmcs[0]; 1162 int i; 1163 1164 DPRINT(("mask=0x%lx\n", mask)); 1165 1166 for (i=0; mask; i++, mask>>=1) { 1167 /* masking 0 with ovfl_val yields 0 */ 1168 ctx->th_pmcs[i] = ctx->ctx_pmcs[i]; 1169 DPRINT(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i])); 1170 } 1171} 1172 1173 1174 1175static inline void 1176pfm_restore_pmcs(unsigned long *pmcs, unsigned long mask) 1177{ 1178 int i; 1179 1180 for (i=0; mask; i++, mask>>=1) { 1181 if ((mask & 0x1) == 0) continue; 1182 ia64_set_pmc(i, pmcs[i]); 1183 } 1184 ia64_srlz_d(); 1185} 1186 1187static inline int 1188pfm_uuid_cmp(pfm_uuid_t a, pfm_uuid_t b) 1189{ 1190 return memcmp(a, b, sizeof(pfm_uuid_t)); 1191} 1192 1193static inline int 1194pfm_buf_fmt_exit(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, struct pt_regs *regs) 1195{ 1196 int ret = 0; 1197 if (fmt->fmt_exit) ret = (*fmt->fmt_exit)(task, buf, regs); 1198 return ret; 1199} 1200 1201static inline int 1202pfm_buf_fmt_getsize(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size) 1203{ 1204 int ret = 0; 1205 if (fmt->fmt_getsize) ret = (*fmt->fmt_getsize)(task, flags, cpu, arg, size); 1206 return ret; 1207} 1208 1209 1210static inline int 1211pfm_buf_fmt_validate(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, 1212 int cpu, void *arg) 1213{ 1214 int ret = 0; 1215 if (fmt->fmt_validate) ret = (*fmt->fmt_validate)(task, flags, cpu, arg); 1216 return ret; 1217} 1218 1219static inline int 1220pfm_buf_fmt_init(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, unsigned int flags, 1221 int cpu, void *arg) 1222{ 1223 int ret = 0; 1224 if (fmt->fmt_init) ret = (*fmt->fmt_init)(task, buf, flags, cpu, arg); 1225 return ret; 1226} 1227 1228static inline int 1229pfm_buf_fmt_restart(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) 1230{ 1231 int ret = 0; 1232 if (fmt->fmt_restart) ret = (*fmt->fmt_restart)(task, ctrl, buf, regs); 1233 return ret; 1234} 1235 1236static inline int 1237pfm_buf_fmt_restart_active(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) 1238{ 1239 int ret = 0; 1240 if (fmt->fmt_restart_active) ret = (*fmt->fmt_restart_active)(task, ctrl, buf, regs); 1241 return ret; 1242} 1243 1244static pfm_buffer_fmt_t * 1245__pfm_find_buffer_fmt(pfm_uuid_t uuid) 1246{ 1247 struct list_head * pos; 1248 pfm_buffer_fmt_t * entry; 1249 1250 list_for_each(pos, &pfm_buffer_fmt_list) { 1251 entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list); 1252 if (pfm_uuid_cmp(uuid, entry->fmt_uuid) == 0) 1253 return entry; 1254 } 1255 return NULL; 1256} 1257 1258/* 1259 * find a buffer format based on its uuid 1260 */ 1261static pfm_buffer_fmt_t * 1262pfm_find_buffer_fmt(pfm_uuid_t uuid) 1263{ 1264 pfm_buffer_fmt_t * fmt; 1265 spin_lock(&pfm_buffer_fmt_lock); 1266 fmt = __pfm_find_buffer_fmt(uuid); 1267 spin_unlock(&pfm_buffer_fmt_lock); 1268 return fmt; 1269} 1270 1271int 1272pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt) 1273{ 1274 int ret = 0; 1275 1276 /* some sanity checks */ 1277 if (fmt == NULL || fmt->fmt_name == NULL) return -EINVAL; 1278 1279 /* we need at least a handler */ 1280 if (fmt->fmt_handler == NULL) return -EINVAL; 1281 1282 /* 1283 * XXX: need check validity of fmt_arg_size 1284 */ 1285 1286 spin_lock(&pfm_buffer_fmt_lock); 1287 1288 if (__pfm_find_buffer_fmt(fmt->fmt_uuid)) { 1289 printk(KERN_ERR "perfmon: duplicate sampling format: %s\n", fmt->fmt_name); 1290 ret = -EBUSY; 1291 goto out; 1292 } 1293 list_add(&fmt->fmt_list, &pfm_buffer_fmt_list); 1294 printk(KERN_INFO "perfmon: added sampling format %s\n", fmt->fmt_name); 1295 1296out: 1297 spin_unlock(&pfm_buffer_fmt_lock); 1298 return ret; 1299} 1300EXPORT_SYMBOL(pfm_register_buffer_fmt); 1301 1302int 1303pfm_unregister_buffer_fmt(pfm_uuid_t uuid) 1304{ 1305 pfm_buffer_fmt_t *fmt; 1306 int ret = 0; 1307 1308 spin_lock(&pfm_buffer_fmt_lock); 1309 1310 fmt = __pfm_find_buffer_fmt(uuid); 1311 if (!fmt) { 1312 printk(KERN_ERR "perfmon: cannot unregister format, not found\n"); 1313 ret = -EINVAL; 1314 goto out; 1315 } 1316 list_del_init(&fmt->fmt_list); 1317 printk(KERN_INFO "perfmon: removed sampling format: %s\n", fmt->fmt_name); 1318 1319out: 1320 spin_unlock(&pfm_buffer_fmt_lock); 1321 return ret; 1322 1323} 1324EXPORT_SYMBOL(pfm_unregister_buffer_fmt); 1325 1326static int 1327pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) 1328{ 1329 unsigned long flags; 1330 /* 1331 * validity checks on cpu_mask have been done upstream 1332 */ 1333 LOCK_PFS(flags); 1334 1335 DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", 1336 pfm_sessions.pfs_sys_sessions, 1337 pfm_sessions.pfs_task_sessions, 1338 pfm_sessions.pfs_sys_use_dbregs, 1339 is_syswide, 1340 cpu)); 1341 1342 if (is_syswide) { 1343 /* 1344 * cannot mix system wide and per-task sessions 1345 */ 1346 if (pfm_sessions.pfs_task_sessions > 0UL) { 1347 DPRINT(("system wide not possible, %u conflicting task_sessions\n", 1348 pfm_sessions.pfs_task_sessions)); 1349 goto abort; 1350 } 1351 1352 if (pfm_sessions.pfs_sys_session[cpu]) goto error_conflict; 1353 1354 DPRINT(("reserving system wide session on CPU%u currently on CPU%u\n", cpu, smp_processor_id())); 1355 1356 pfm_sessions.pfs_sys_session[cpu] = task; 1357 1358 pfm_sessions.pfs_sys_sessions++ ; 1359 1360 } else { 1361 if (pfm_sessions.pfs_sys_sessions) goto abort; 1362 pfm_sessions.pfs_task_sessions++; 1363 } 1364 1365 DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", 1366 pfm_sessions.pfs_sys_sessions, 1367 pfm_sessions.pfs_task_sessions, 1368 pfm_sessions.pfs_sys_use_dbregs, 1369 is_syswide, 1370 cpu)); 1371 1372 /* 1373 * Force idle() into poll mode 1374 */ 1375 cpu_idle_poll_ctrl(true); 1376 1377 UNLOCK_PFS(flags); 1378 1379 return 0; 1380 1381error_conflict: 1382 DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n", 1383 task_pid_nr(pfm_sessions.pfs_sys_session[cpu]), 1384 cpu)); 1385abort: 1386 UNLOCK_PFS(flags); 1387 1388 return -EBUSY; 1389 1390} 1391 1392static int 1393pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu) 1394{ 1395 unsigned long flags; 1396 /* 1397 * validity checks on cpu_mask have been done upstream 1398 */ 1399 LOCK_PFS(flags); 1400 1401 DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", 1402 pfm_sessions.pfs_sys_sessions, 1403 pfm_sessions.pfs_task_sessions, 1404 pfm_sessions.pfs_sys_use_dbregs, 1405 is_syswide, 1406 cpu)); 1407 1408 1409 if (is_syswide) { 1410 pfm_sessions.pfs_sys_session[cpu] = NULL; 1411 /* 1412 * would not work with perfmon+more than one bit in cpu_mask 1413 */ 1414 if (ctx && ctx->ctx_fl_using_dbreg) { 1415 if (pfm_sessions.pfs_sys_use_dbregs == 0) { 1416 printk(KERN_ERR "perfmon: invalid release for ctx %p sys_use_dbregs=0\n", ctx); 1417 } else { 1418 pfm_sessions.pfs_sys_use_dbregs--; 1419 } 1420 } 1421 pfm_sessions.pfs_sys_sessions--; 1422 } else { 1423 pfm_sessions.pfs_task_sessions--; 1424 } 1425 DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", 1426 pfm_sessions.pfs_sys_sessions, 1427 pfm_sessions.pfs_task_sessions, 1428 pfm_sessions.pfs_sys_use_dbregs, 1429 is_syswide, 1430 cpu)); 1431 1432 /* Undo forced polling. Last session reenables pal_halt */ 1433 cpu_idle_poll_ctrl(false); 1434 1435 UNLOCK_PFS(flags); 1436 1437 return 0; 1438} 1439 1440/* 1441 * removes virtual mapping of the sampling buffer. 1442 * IMPORTANT: cannot be called with interrupts disable, e.g. inside 1443 * a PROTECT_CTX() section. 1444 */ 1445static int 1446pfm_remove_smpl_mapping(void *vaddr, unsigned long size) 1447{ 1448 struct task_struct *task = current; 1449 int r; 1450 1451 /* sanity checks */ 1452 if (task->mm == NULL || size == 0UL || vaddr == NULL) { 1453 printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task_pid_nr(task), task->mm); 1454 return -EINVAL; 1455 } 1456 1457 DPRINT(("smpl_vaddr=%p size=%lu\n", vaddr, size)); 1458 1459 /* 1460 * does the actual unmapping 1461 */ 1462 r = vm_munmap((unsigned long)vaddr, size); 1463 1464 if (r !=0) { 1465 printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task_pid_nr(task), vaddr, size); 1466 } 1467 1468 DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r)); 1469 1470 return 0; 1471} 1472 1473/* 1474 * free actual physical storage used by sampling buffer 1475 */ 1476#if 0 1477static int 1478pfm_free_smpl_buffer(pfm_context_t *ctx) 1479{ 1480 pfm_buffer_fmt_t *fmt; 1481 1482 if (ctx->ctx_smpl_hdr == NULL) goto invalid_free; 1483 1484 /* 1485 * we won't use the buffer format anymore 1486 */ 1487 fmt = ctx->ctx_buf_fmt; 1488 1489 DPRINT(("sampling buffer @%p size %lu vaddr=%p\n", 1490 ctx->ctx_smpl_hdr, 1491 ctx->ctx_smpl_size, 1492 ctx->ctx_smpl_vaddr)); 1493 1494 pfm_buf_fmt_exit(fmt, current, NULL, NULL); 1495 1496 /* 1497 * free the buffer 1498 */ 1499 pfm_rvfree(ctx->ctx_smpl_hdr, ctx->ctx_smpl_size); 1500 1501 ctx->ctx_smpl_hdr = NULL; 1502 ctx->ctx_smpl_size = 0UL; 1503 1504 return 0; 1505 1506invalid_free: 1507 printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", task_pid_nr(current)); 1508 return -EINVAL; 1509} 1510#endif 1511 1512static inline void 1513pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt) 1514{ 1515 if (fmt == NULL) return; 1516 1517 pfm_buf_fmt_exit(fmt, current, NULL, NULL); 1518 1519} 1520 1521/* 1522 * pfmfs should _never_ be mounted by userland - too much of security hassle, 1523 * no real gain from having the whole whorehouse mounted. So we don't need 1524 * any operations on the root directory. However, we need a non-trivial 1525 * d_name - pfm: will go nicely and kill the special-casing in procfs. 1526 */ 1527static struct vfsmount *pfmfs_mnt __read_mostly; 1528 1529static int __init 1530init_pfm_fs(void) 1531{ 1532 int err = register_filesystem(&pfm_fs_type); 1533 if (!err) { 1534 pfmfs_mnt = kern_mount(&pfm_fs_type); 1535 err = PTR_ERR(pfmfs_mnt); 1536 if (IS_ERR(pfmfs_mnt)) 1537 unregister_filesystem(&pfm_fs_type); 1538 else 1539 err = 0; 1540 } 1541 return err; 1542} 1543 1544static ssize_t 1545pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos) 1546{ 1547 pfm_context_t *ctx; 1548 pfm_msg_t *msg; 1549 ssize_t ret; 1550 unsigned long flags; 1551 DECLARE_WAITQUEUE(wait, current); 1552 if (PFM_IS_FILE(filp) == 0) { 1553 printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current)); 1554 return -EINVAL; 1555 } 1556 1557 ctx = filp->private_data; 1558 if (ctx == NULL) { 1559 printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", task_pid_nr(current)); 1560 return -EINVAL; 1561 } 1562 1563 /* 1564 * check even when there is no message 1565 */ 1566 if (size < sizeof(pfm_msg_t)) { 1567 DPRINT(("message is too small ctx=%p (>=%ld)\n", ctx, sizeof(pfm_msg_t))); 1568 return -EINVAL; 1569 } 1570 1571 PROTECT_CTX(ctx, flags); 1572 1573 /* 1574 * put ourselves on the wait queue 1575 */ 1576 add_wait_queue(&ctx->ctx_msgq_wait, &wait); 1577 1578 1579 for(;;) { 1580 /* 1581 * check wait queue 1582 */ 1583 1584 set_current_state(TASK_INTERRUPTIBLE); 1585 1586 DPRINT(("head=%d tail=%d\n", ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); 1587 1588 ret = 0; 1589 if(PFM_CTXQ_EMPTY(ctx) == 0) break; 1590 1591 UNPROTECT_CTX(ctx, flags); 1592 1593 /* 1594 * check non-blocking read 1595 */ 1596 ret = -EAGAIN; 1597 if(filp->f_flags & O_NONBLOCK) break; 1598 1599 /* 1600 * check pending signals 1601 */ 1602 if(signal_pending(current)) { 1603 ret = -EINTR; 1604 break; 1605 } 1606 /* 1607 * no message, so wait 1608 */ 1609 schedule(); 1610 1611 PROTECT_CTX(ctx, flags); 1612 } 1613 DPRINT(("[%d] back to running ret=%ld\n", task_pid_nr(current), ret)); 1614 set_current_state(TASK_RUNNING); 1615 remove_wait_queue(&ctx->ctx_msgq_wait, &wait); 1616 1617 if (ret < 0) goto abort; 1618 1619 ret = -EINVAL; 1620 msg = pfm_get_next_msg(ctx); 1621 if (msg == NULL) { 1622 printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, task_pid_nr(current)); 1623 goto abort_locked; 1624 } 1625 1626 DPRINT(("fd=%d type=%d\n", msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type)); 1627 1628 ret = -EFAULT; 1629 if(copy_to_user(buf, msg, sizeof(pfm_msg_t)) == 0) ret = sizeof(pfm_msg_t); 1630 1631abort_locked: 1632 UNPROTECT_CTX(ctx, flags); 1633abort: 1634 return ret; 1635} 1636 1637static ssize_t 1638pfm_write(struct file *file, const char __user *ubuf, 1639 size_t size, loff_t *ppos) 1640{ 1641 DPRINT(("pfm_write called\n")); 1642 return -EINVAL; 1643} 1644 1645static unsigned int 1646pfm_poll(struct file *filp, poll_table * wait) 1647{ 1648 pfm_context_t *ctx; 1649 unsigned long flags; 1650 unsigned int mask = 0; 1651 1652 if (PFM_IS_FILE(filp) == 0) { 1653 printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current)); 1654 return 0; 1655 } 1656 1657 ctx = filp->private_data; 1658 if (ctx == NULL) { 1659 printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", task_pid_nr(current)); 1660 return 0; 1661 } 1662 1663 1664 DPRINT(("pfm_poll ctx_fd=%d before poll_wait\n", ctx->ctx_fd)); 1665 1666 poll_wait(filp, &ctx->ctx_msgq_wait, wait); 1667 1668 PROTECT_CTX(ctx, flags); 1669 1670 if (PFM_CTXQ_EMPTY(ctx) == 0) 1671 mask = POLLIN | POLLRDNORM; 1672 1673 UNPROTECT_CTX(ctx, flags); 1674 1675 DPRINT(("pfm_poll ctx_fd=%d mask=0x%x\n", ctx->ctx_fd, mask)); 1676 1677 return mask; 1678} 1679 1680static long 1681pfm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 1682{ 1683 DPRINT(("pfm_ioctl called\n")); 1684 return -EINVAL; 1685} 1686 1687/* 1688 * interrupt cannot be masked when coming here 1689 */ 1690static inline int 1691pfm_do_fasync(int fd, struct file *filp, pfm_context_t *ctx, int on) 1692{ 1693 int ret; 1694 1695 ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue); 1696 1697 DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n", 1698 task_pid_nr(current), 1699 fd, 1700 on, 1701 ctx->ctx_async_queue, ret)); 1702 1703 return ret; 1704} 1705 1706static int 1707pfm_fasync(int fd, struct file *filp, int on) 1708{ 1709 pfm_context_t *ctx; 1710 int ret; 1711 1712 if (PFM_IS_FILE(filp) == 0) { 1713 printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", task_pid_nr(current)); 1714 return -EBADF; 1715 } 1716 1717 ctx = filp->private_data; 1718 if (ctx == NULL) { 1719 printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", task_pid_nr(current)); 1720 return -EBADF; 1721 } 1722 /* 1723 * we cannot mask interrupts during this call because this may 1724 * may go to sleep if memory is not readily avalaible. 1725 * 1726 * We are protected from the conetxt disappearing by the get_fd()/put_fd() 1727 * done in caller. Serialization of this function is ensured by caller. 1728 */ 1729 ret = pfm_do_fasync(fd, filp, ctx, on); 1730 1731 1732 DPRINT(("pfm_fasync called on ctx_fd=%d on=%d async_queue=%p ret=%d\n", 1733 fd, 1734 on, 1735 ctx->ctx_async_queue, ret)); 1736 1737 return ret; 1738} 1739 1740#ifdef CONFIG_SMP 1741/* 1742 * this function is exclusively called from pfm_close(). 1743 * The context is not protected at that time, nor are interrupts 1744 * on the remote CPU. That's necessary to avoid deadlocks. 1745 */ 1746static void 1747pfm_syswide_force_stop(void *info) 1748{ 1749 pfm_context_t *ctx = (pfm_context_t *)info; 1750 struct pt_regs *regs = task_pt_regs(current); 1751 struct task_struct *owner; 1752 unsigned long flags; 1753 int ret; 1754 1755 if (ctx->ctx_cpu != smp_processor_id()) { 1756 printk(KERN_ERR "perfmon: pfm_syswide_force_stop for CPU%d but on CPU%d\n", 1757 ctx->ctx_cpu, 1758 smp_processor_id()); 1759 return; 1760 } 1761 owner = GET_PMU_OWNER(); 1762 if (owner != ctx->ctx_task) { 1763 printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n", 1764 smp_processor_id(), 1765 task_pid_nr(owner), task_pid_nr(ctx->ctx_task)); 1766 return; 1767 } 1768 if (GET_PMU_CTX() != ctx) { 1769 printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected ctx %p instead of %p\n", 1770 smp_processor_id(), 1771 GET_PMU_CTX(), ctx); 1772 return; 1773 } 1774 1775 DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), task_pid_nr(ctx->ctx_task))); 1776 /* 1777 * the context is already protected in pfm_close(), we simply 1778 * need to mask interrupts to avoid a PMU interrupt race on 1779 * this CPU 1780 */ 1781 local_irq_save(flags); 1782 1783 ret = pfm_context_unload(ctx, NULL, 0, regs); 1784 if (ret) { 1785 DPRINT(("context_unload returned %d\n", ret)); 1786 } 1787 1788 /* 1789 * unmask interrupts, PMU interrupts are now spurious here 1790 */ 1791 local_irq_restore(flags); 1792} 1793 1794static void 1795pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx) 1796{ 1797 int ret; 1798 1799 DPRINT(("calling CPU%d for cleanup\n", ctx->ctx_cpu)); 1800 ret = smp_call_function_single(ctx->ctx_cpu, pfm_syswide_force_stop, ctx, 1); 1801 DPRINT(("called CPU%d for cleanup ret=%d\n", ctx->ctx_cpu, ret)); 1802} 1803#endif /* CONFIG_SMP */ 1804 1805/* 1806 * called for each close(). Partially free resources. 1807 * When caller is self-monitoring, the context is unloaded. 1808 */ 1809static int 1810pfm_flush(struct file *filp, fl_owner_t id) 1811{ 1812 pfm_context_t *ctx; 1813 struct task_struct *task; 1814 struct pt_regs *regs; 1815 unsigned long flags; 1816 unsigned long smpl_buf_size = 0UL; 1817 void *smpl_buf_vaddr = NULL; 1818 int state, is_system; 1819 1820 if (PFM_IS_FILE(filp) == 0) { 1821 DPRINT(("bad magic for\n")); 1822 return -EBADF; 1823 } 1824 1825 ctx = filp->private_data; 1826 if (ctx == NULL) { 1827 printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", task_pid_nr(current)); 1828 return -EBADF; 1829 } 1830 1831 /* 1832 * remove our file from the async queue, if we use this mode. 1833 * This can be done without the context being protected. We come 1834 * here when the context has become unreachable by other tasks. 1835 * 1836 * We may still have active monitoring at this point and we may 1837 * end up in pfm_overflow_handler(). However, fasync_helper() 1838 * operates with interrupts disabled and it cleans up the 1839 * queue. If the PMU handler is called prior to entering 1840 * fasync_helper() then it will send a signal. If it is 1841 * invoked after, it will find an empty queue and no 1842 * signal will be sent. In both case, we are safe 1843 */ 1844 PROTECT_CTX(ctx, flags); 1845 1846 state = ctx->ctx_state; 1847 is_system = ctx->ctx_fl_system; 1848 1849 task = PFM_CTX_TASK(ctx); 1850 regs = task_pt_regs(task); 1851 1852 DPRINT(("ctx_state=%d is_current=%d\n", 1853 state, 1854 task == current ? 1 : 0)); 1855 1856 /* 1857 * if state == UNLOADED, then task is NULL 1858 */ 1859 1860 /* 1861 * we must stop and unload because we are losing access to the context. 1862 */ 1863 if (task == current) { 1864#ifdef CONFIG_SMP 1865 /* 1866 * the task IS the owner but it migrated to another CPU: that's bad 1867 * but we must handle this cleanly. Unfortunately, the kernel does 1868 * not provide a mechanism to block migration (while the context is loaded). 1869 * 1870 * We need to release the resource on the ORIGINAL cpu. 1871 */ 1872 if (is_system && ctx->ctx_cpu != smp_processor_id()) { 1873 1874 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 1875 /* 1876 * keep context protected but unmask interrupt for IPI 1877 */ 1878 local_irq_restore(flags); 1879 1880 pfm_syswide_cleanup_other_cpu(ctx); 1881 1882 /* 1883 * restore interrupt masking 1884 */ 1885 local_irq_save(flags); 1886 1887 /* 1888 * context is unloaded at this point 1889 */ 1890 } else 1891#endif /* CONFIG_SMP */ 1892 { 1893 1894 DPRINT(("forcing unload\n")); 1895 /* 1896 * stop and unload, returning with state UNLOADED 1897 * and session unreserved. 1898 */ 1899 pfm_context_unload(ctx, NULL, 0, regs); 1900 1901 DPRINT(("ctx_state=%d\n", ctx->ctx_state)); 1902 } 1903 } 1904 1905 /* 1906 * remove virtual mapping, if any, for the calling task. 1907 * cannot reset ctx field until last user is calling close(). 1908 * 1909 * ctx_smpl_vaddr must never be cleared because it is needed 1910 * by every task with access to the context 1911 * 1912 * When called from do_exit(), the mm context is gone already, therefore 1913 * mm is NULL, i.e., the VMA is already gone and we do not have to 1914 * do anything here 1915 */ 1916 if (ctx->ctx_smpl_vaddr && current->mm) { 1917 smpl_buf_vaddr = ctx->ctx_smpl_vaddr; 1918 smpl_buf_size = ctx->ctx_smpl_size; 1919 } 1920 1921 UNPROTECT_CTX(ctx, flags); 1922 1923 /* 1924 * if there was a mapping, then we systematically remove it 1925 * at this point. Cannot be done inside critical section 1926 * because some VM function reenables interrupts. 1927 * 1928 */ 1929 if (smpl_buf_vaddr) pfm_remove_smpl_mapping(smpl_buf_vaddr, smpl_buf_size); 1930 1931 return 0; 1932} 1933/* 1934 * called either on explicit close() or from exit_files(). 1935 * Only the LAST user of the file gets to this point, i.e., it is 1936 * called only ONCE. 1937 * 1938 * IMPORTANT: we get called ONLY when the refcnt on the file gets to zero 1939 * (fput()),i.e, last task to access the file. Nobody else can access the 1940 * file at this point. 1941 * 1942 * When called from exit_files(), the VMA has been freed because exit_mm() 1943 * is executed before exit_files(). 1944 * 1945 * When called from exit_files(), the current task is not yet ZOMBIE but we 1946 * flush the PMU state to the context. 1947 */ 1948static int 1949pfm_close(struct inode *inode, struct file *filp) 1950{ 1951 pfm_context_t *ctx; 1952 struct task_struct *task; 1953 struct pt_regs *regs; 1954 DECLARE_WAITQUEUE(wait, current); 1955 unsigned long flags; 1956 unsigned long smpl_buf_size = 0UL; 1957 void *smpl_buf_addr = NULL; 1958 int free_possible = 1; 1959 int state, is_system; 1960 1961 DPRINT(("pfm_close called private=%p\n", filp->private_data)); 1962 1963 if (PFM_IS_FILE(filp) == 0) { 1964 DPRINT(("bad magic\n")); 1965 return -EBADF; 1966 } 1967 1968 ctx = filp->private_data; 1969 if (ctx == NULL) { 1970 printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", task_pid_nr(current)); 1971 return -EBADF; 1972 } 1973 1974 PROTECT_CTX(ctx, flags); 1975 1976 state = ctx->ctx_state; 1977 is_system = ctx->ctx_fl_system; 1978 1979 task = PFM_CTX_TASK(ctx); 1980 regs = task_pt_regs(task); 1981 1982 DPRINT(("ctx_state=%d is_current=%d\n", 1983 state, 1984 task == current ? 1 : 0)); 1985 1986 /* 1987 * if task == current, then pfm_flush() unloaded the context 1988 */ 1989 if (state == PFM_CTX_UNLOADED) goto doit; 1990 1991 /* 1992 * context is loaded/masked and task != current, we need to 1993 * either force an unload or go zombie 1994 */ 1995 1996 /* 1997 * The task is currently blocked or will block after an overflow. 1998 * we must force it to wakeup to get out of the 1999 * MASKED state and transition to the unloaded state by itself. 2000 * 2001 * This situation is only possible for per-task mode 2002 */ 2003 if (state == PFM_CTX_MASKED && CTX_OVFL_NOBLOCK(ctx) == 0) { 2004 2005 /* 2006 * set a "partial" zombie state to be checked 2007 * upon return from down() in pfm_handle_work(). 2008 * 2009 * We cannot use the ZOMBIE state, because it is checked 2010 * by pfm_load_regs() which is called upon wakeup from down(). 2011 * In such case, it would free the context and then we would 2012 * return to pfm_handle_work() which would access the 2013 * stale context. Instead, we set a flag invisible to pfm_load_regs() 2014 * but visible to pfm_handle_work(). 2015 * 2016 * For some window of time, we have a zombie context with 2017 * ctx_state = MASKED and not ZOMBIE 2018 */ 2019 ctx->ctx_fl_going_zombie = 1; 2020 2021 /* 2022 * force task to wake up from MASKED state 2023 */ 2024 complete(&ctx->ctx_restart_done); 2025 2026 DPRINT(("waking up ctx_state=%d\n", state)); 2027 2028 /* 2029 * put ourself to sleep waiting for the other 2030 * task to report completion 2031 * 2032 * the context is protected by mutex, therefore there 2033 * is no risk of being notified of completion before 2034 * begin actually on the waitq. 2035 */ 2036 set_current_state(TASK_INTERRUPTIBLE); 2037 add_wait_queue(&ctx->ctx_zombieq, &wait); 2038 2039 UNPROTECT_CTX(ctx, flags); 2040 2041 /* 2042 * XXX: check for signals : 2043 * - ok for explicit close 2044 * - not ok when coming from exit_files() 2045 */ 2046 schedule(); 2047 2048 2049 PROTECT_CTX(ctx, flags); 2050 2051 2052 remove_wait_queue(&ctx->ctx_zombieq, &wait); 2053 set_current_state(TASK_RUNNING); 2054 2055 /* 2056 * context is unloaded at this point 2057 */ 2058 DPRINT(("after zombie wakeup ctx_state=%d for\n", state)); 2059 } 2060 else if (task != current) { 2061#ifdef CONFIG_SMP 2062 /* 2063 * switch context to zombie state 2064 */ 2065 ctx->ctx_state = PFM_CTX_ZOMBIE; 2066 2067 DPRINT(("zombie ctx for [%d]\n", task_pid_nr(task))); 2068 /* 2069 * cannot free the context on the spot. deferred until 2070 * the task notices the ZOMBIE state 2071 */ 2072 free_possible = 0; 2073#else 2074 pfm_context_unload(ctx, NULL, 0, regs); 2075#endif 2076 } 2077 2078doit: 2079 /* reload state, may have changed during opening of critical section */ 2080 state = ctx->ctx_state; 2081 2082 /* 2083 * the context is still attached to a task (possibly current) 2084 * we cannot destroy it right now 2085 */ 2086 2087 /* 2088 * we must free the sampling buffer right here because 2089 * we cannot rely on it being cleaned up later by the 2090 * monitored task. It is not possible to free vmalloc'ed 2091 * memory in pfm_load_regs(). Instead, we remove the buffer 2092 * now. should there be subsequent PMU overflow originally 2093 * meant for sampling, the will be converted to spurious 2094 * and that's fine because the monitoring tools is gone anyway. 2095 */ 2096 if (ctx->ctx_smpl_hdr) { 2097 smpl_buf_addr = ctx->ctx_smpl_hdr; 2098 smpl_buf_size = ctx->ctx_smpl_size; 2099 /* no more sampling */ 2100 ctx->ctx_smpl_hdr = NULL; 2101 ctx->ctx_fl_is_sampling = 0; 2102 } 2103 2104 DPRINT(("ctx_state=%d free_possible=%d addr=%p size=%lu\n", 2105 state, 2106 free_possible, 2107 smpl_buf_addr, 2108 smpl_buf_size)); 2109 2110 if (smpl_buf_addr) pfm_exit_smpl_buffer(ctx->ctx_buf_fmt); 2111 2112 /* 2113 * UNLOADED that the session has already been unreserved. 2114 */ 2115 if (state == PFM_CTX_ZOMBIE) { 2116 pfm_unreserve_session(ctx, ctx->ctx_fl_system , ctx->ctx_cpu); 2117 } 2118 2119 /* 2120 * disconnect file descriptor from context must be done 2121 * before we unlock. 2122 */ 2123 filp->private_data = NULL; 2124 2125 /* 2126 * if we free on the spot, the context is now completely unreachable 2127 * from the callers side. The monitored task side is also cut, so we 2128 * can freely cut. 2129 * 2130 * If we have a deferred free, only the caller side is disconnected. 2131 */ 2132 UNPROTECT_CTX(ctx, flags); 2133 2134 /* 2135 * All memory free operations (especially for vmalloc'ed memory) 2136 * MUST be done with interrupts ENABLED. 2137 */ 2138 if (smpl_buf_addr) pfm_rvfree(smpl_buf_addr, smpl_buf_size); 2139 2140 /* 2141 * return the memory used by the context 2142 */ 2143 if (free_possible) pfm_context_free(ctx); 2144 2145 return 0; 2146} 2147 2148static const struct file_operations pfm_file_ops = { 2149 .llseek = no_llseek, 2150 .read = pfm_read, 2151 .write = pfm_write, 2152 .poll = pfm_poll, 2153 .unlocked_ioctl = pfm_ioctl, 2154 .fasync = pfm_fasync, 2155 .release = pfm_close, 2156 .flush = pfm_flush 2157}; 2158 2159static char *pfmfs_dname(struct dentry *dentry, char *buffer, int buflen) 2160{ 2161 return dynamic_dname(dentry, buffer, buflen, "pfm:[%lu]", 2162 d_inode(dentry)->i_ino); 2163} 2164 2165static const struct dentry_operations pfmfs_dentry_operations = { 2166 .d_delete = always_delete_dentry, 2167 .d_dname = pfmfs_dname, 2168}; 2169 2170 2171static struct file * 2172pfm_alloc_file(pfm_context_t *ctx) 2173{ 2174 struct file *file; 2175 struct inode *inode; 2176 struct path path; 2177 struct qstr this = { .name = "" }; 2178 2179 /* 2180 * allocate a new inode 2181 */ 2182 inode = new_inode(pfmfs_mnt->mnt_sb); 2183 if (!inode) 2184 return ERR_PTR(-ENOMEM); 2185 2186 DPRINT(("new inode ino=%ld @%p\n", inode->i_ino, inode)); 2187 2188 inode->i_mode = S_IFCHR|S_IRUGO; 2189 inode->i_uid = current_fsuid(); 2190 inode->i_gid = current_fsgid(); 2191 2192 /* 2193 * allocate a new dcache entry 2194 */ 2195 path.dentry = d_alloc(pfmfs_mnt->mnt_root, &this); 2196 if (!path.dentry) { 2197 iput(inode); 2198 return ERR_PTR(-ENOMEM); 2199 } 2200 path.mnt = mntget(pfmfs_mnt); 2201 2202 d_add(path.dentry, inode); 2203 2204 file = alloc_file(&path, FMODE_READ, &pfm_file_ops); 2205 if (IS_ERR(file)) { 2206 path_put(&path); 2207 return file; 2208 } 2209 2210 file->f_flags = O_RDONLY; 2211 file->private_data = ctx; 2212 2213 return file; 2214} 2215 2216static int 2217pfm_remap_buffer(struct vm_area_struct *vma, unsigned long buf, unsigned long addr, unsigned long size) 2218{ 2219 DPRINT(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size)); 2220 2221 while (size > 0) { 2222 unsigned long pfn = ia64_tpa(buf) >> PAGE_SHIFT; 2223 2224 2225 if (remap_pfn_range(vma, addr, pfn, PAGE_SIZE, PAGE_READONLY)) 2226 return -ENOMEM; 2227 2228 addr += PAGE_SIZE; 2229 buf += PAGE_SIZE; 2230 size -= PAGE_SIZE; 2231 } 2232 return 0; 2233} 2234 2235/* 2236 * allocate a sampling buffer and remaps it into the user address space of the task 2237 */ 2238static int 2239pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t *ctx, unsigned long rsize, void **user_vaddr) 2240{ 2241 struct mm_struct *mm = task->mm; 2242 struct vm_area_struct *vma = NULL; 2243 unsigned long size; 2244 void *smpl_buf; 2245 2246 2247 /* 2248 * the fixed header + requested size and align to page boundary 2249 */ 2250 size = PAGE_ALIGN(rsize); 2251 2252 DPRINT(("sampling buffer rsize=%lu size=%lu bytes\n", rsize, size)); 2253 2254 /* 2255 * check requested size to avoid Denial-of-service attacks 2256 * XXX: may have to refine this test 2257 * Check against address space limit. 2258 * 2259 * if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur) 2260 * return -ENOMEM; 2261 */ 2262 if (size > task_rlimit(task, RLIMIT_MEMLOCK)) 2263 return -ENOMEM; 2264 2265 /* 2266 * We do the easy to undo allocations first. 2267 * 2268 * pfm_rvmalloc(), clears the buffer, so there is no leak 2269 */ 2270 smpl_buf = pfm_rvmalloc(size); 2271 if (smpl_buf == NULL) { 2272 DPRINT(("Can't allocate sampling buffer\n")); 2273 return -ENOMEM; 2274 } 2275 2276 DPRINT(("smpl_buf @%p\n", smpl_buf)); 2277 2278 /* allocate vma */ 2279 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); 2280 if (!vma) { 2281 DPRINT(("Cannot allocate vma\n")); 2282 goto error_kmem; 2283 } 2284 INIT_LIST_HEAD(&vma->anon_vma_chain); 2285 2286 /* 2287 * partially initialize the vma for the sampling buffer 2288 */ 2289 vma->vm_mm = mm; 2290 vma->vm_file = get_file(filp); 2291 vma->vm_flags = VM_READ|VM_MAYREAD|VM_DONTEXPAND|VM_DONTDUMP; 2292 vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ 2293 2294 /* 2295 * Now we have everything we need and we can initialize 2296 * and connect all the data structures 2297 */ 2298 2299 ctx->ctx_smpl_hdr = smpl_buf; 2300 ctx->ctx_smpl_size = size; /* aligned size */ 2301 2302 /* 2303 * Let's do the difficult operations next. 2304 * 2305 * now we atomically find some area in the address space and 2306 * remap the buffer in it. 2307 */ 2308 down_write(&task->mm->mmap_sem); 2309 2310 /* find some free area in address space, must have mmap sem held */ 2311 vma->vm_start = get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS); 2312 if (IS_ERR_VALUE(vma->vm_start)) { 2313 DPRINT(("Cannot find unmapped area for size %ld\n", size)); 2314 up_write(&task->mm->mmap_sem); 2315 goto error; 2316 } 2317 vma->vm_end = vma->vm_start + size; 2318 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT; 2319 2320 DPRINT(("aligned size=%ld, hdr=%p mapped @0x%lx\n", size, ctx->ctx_smpl_hdr, vma->vm_start)); 2321 2322 /* can only be applied to current task, need to have the mm semaphore held when called */ 2323 if (pfm_remap_buffer(vma, (unsigned long)smpl_buf, vma->vm_start, size)) { 2324 DPRINT(("Can't remap buffer\n")); 2325 up_write(&task->mm->mmap_sem); 2326 goto error; 2327 } 2328 2329 /* 2330 * now insert the vma in the vm list for the process, must be 2331 * done with mmap lock held 2332 */ 2333 insert_vm_struct(mm, vma); 2334 2335 vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, 2336 vma_pages(vma)); 2337 up_write(&task->mm->mmap_sem); 2338 2339 /* 2340 * keep track of user level virtual address 2341 */ 2342 ctx->ctx_smpl_vaddr = (void *)vma->vm_start; 2343 *(unsigned long *)user_vaddr = vma->vm_start; 2344 2345 return 0; 2346 2347error: 2348 kmem_cache_free(vm_area_cachep, vma); 2349error_kmem: 2350 pfm_rvfree(smpl_buf, size); 2351 2352 return -ENOMEM; 2353} 2354 2355/* 2356 * XXX: do something better here 2357 */ 2358static int 2359pfm_bad_permissions(struct task_struct *task) 2360{ 2361 const struct cred *tcred; 2362 kuid_t uid = current_uid(); 2363 kgid_t gid = current_gid(); 2364 int ret; 2365 2366 rcu_read_lock(); 2367 tcred = __task_cred(task); 2368 2369 /* inspired by ptrace_attach() */ 2370 DPRINT(("cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n", 2371 from_kuid(&init_user_ns, uid), 2372 from_kgid(&init_user_ns, gid), 2373 from_kuid(&init_user_ns, tcred->euid), 2374 from_kuid(&init_user_ns, tcred->suid), 2375 from_kuid(&init_user_ns, tcred->uid), 2376 from_kgid(&init_user_ns, tcred->egid), 2377 from_kgid(&init_user_ns, tcred->sgid))); 2378 2379 ret = ((!uid_eq(uid, tcred->euid)) 2380 || (!uid_eq(uid, tcred->suid)) 2381 || (!uid_eq(uid, tcred->uid)) 2382 || (!gid_eq(gid, tcred->egid)) 2383 || (!gid_eq(gid, tcred->sgid)) 2384 || (!gid_eq(gid, tcred->gid))) && !capable(CAP_SYS_PTRACE); 2385 2386 rcu_read_unlock(); 2387 return ret; 2388} 2389 2390static int 2391pfarg_is_sane(struct task_struct *task, pfarg_context_t *pfx) 2392{ 2393 int ctx_flags; 2394 2395 /* valid signal */ 2396 2397 ctx_flags = pfx->ctx_flags; 2398 2399 if (ctx_flags & PFM_FL_SYSTEM_WIDE) { 2400 2401 /* 2402 * cannot block in this mode 2403 */ 2404 if (ctx_flags & PFM_FL_NOTIFY_BLOCK) { 2405 DPRINT(("cannot use blocking mode when in system wide monitoring\n")); 2406 return -EINVAL; 2407 } 2408 } else { 2409 } 2410 /* probably more to add here */ 2411 2412 return 0; 2413} 2414 2415static int 2416pfm_setup_buffer_fmt(struct task_struct *task, struct file *filp, pfm_context_t *ctx, unsigned int ctx_flags, 2417 unsigned int cpu, pfarg_context_t *arg) 2418{ 2419 pfm_buffer_fmt_t *fmt = NULL; 2420 unsigned long size = 0UL; 2421 void *uaddr = NULL; 2422 void *fmt_arg = NULL; 2423 int ret = 0; 2424#define PFM_CTXARG_BUF_ARG(a) (pfm_buffer_fmt_t *)(a+1) 2425 2426 /* invoke and lock buffer format, if found */ 2427 fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id); 2428 if (fmt == NULL) { 2429 DPRINT(("[%d] cannot find buffer format\n", task_pid_nr(task))); 2430 return -EINVAL; 2431 } 2432 2433 /* 2434 * buffer argument MUST be contiguous to pfarg_context_t 2435 */ 2436 if (fmt->fmt_arg_size) fmt_arg = PFM_CTXARG_BUF_ARG(arg); 2437 2438 ret = pfm_buf_fmt_validate(fmt, task, ctx_flags, cpu, fmt_arg); 2439 2440 DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task_pid_nr(task), ctx_flags, cpu, fmt_arg, ret)); 2441 2442 if (ret) goto error; 2443 2444 /* link buffer format and context */ 2445 ctx->ctx_buf_fmt = fmt; 2446 ctx->ctx_fl_is_sampling = 1; /* assume record() is defined */ 2447 2448 /* 2449 * check if buffer format wants to use perfmon buffer allocation/mapping service 2450 */ 2451 ret = pfm_buf_fmt_getsize(fmt, task, ctx_flags, cpu, fmt_arg, &size); 2452 if (ret) goto error; 2453 2454 if (size) { 2455 /* 2456 * buffer is always remapped into the caller's address space 2457 */ 2458 ret = pfm_smpl_buffer_alloc(current, filp, ctx, size, &uaddr); 2459 if (ret) goto error; 2460 2461 /* keep track of user address of buffer */ 2462 arg->ctx_smpl_vaddr = uaddr; 2463 } 2464 ret = pfm_buf_fmt_init(fmt, task, ctx->ctx_smpl_hdr, ctx_flags, cpu, fmt_arg); 2465 2466error: 2467 return ret; 2468} 2469 2470static void 2471pfm_reset_pmu_state(pfm_context_t *ctx) 2472{ 2473 int i; 2474 2475 /* 2476 * install reset values for PMC. 2477 */ 2478 for (i=1; PMC_IS_LAST(i) == 0; i++) { 2479 if (PMC_IS_IMPL(i) == 0) continue; 2480 ctx->ctx_pmcs[i] = PMC_DFL_VAL(i); 2481 DPRINT(("pmc[%d]=0x%lx\n", i, ctx->ctx_pmcs[i])); 2482 } 2483 /* 2484 * PMD registers are set to 0UL when the context in memset() 2485 */ 2486 2487 /* 2488 * On context switched restore, we must restore ALL pmc and ALL pmd even 2489 * when they are not actively used by the task. In UP, the incoming process 2490 * may otherwise pick up left over PMC, PMD state from the previous process. 2491 * As opposed to PMD, stale PMC can cause harm to the incoming 2492 * process because they may change what is being measured. 2493 * Therefore, we must systematically reinstall the entire 2494 * PMC state. In SMP, the same thing is possible on the 2495 * same CPU but also on between 2 CPUs. 2496 * 2497 * The problem with PMD is information leaking especially 2498 * to user level when psr.sp=0 2499 * 2500 * There is unfortunately no easy way to avoid this problem 2501 * on either UP or SMP. This definitively slows down the 2502 * pfm_load_regs() function. 2503 */ 2504 2505 /* 2506 * bitmask of all PMCs accessible to this context 2507 * 2508 * PMC0 is treated differently. 2509 */ 2510 ctx->ctx_all_pmcs[0] = pmu_conf->impl_pmcs[0] & ~0x1; 2511 2512 /* 2513 * bitmask of all PMDs that are accessible to this context 2514 */ 2515 ctx->ctx_all_pmds[0] = pmu_conf->impl_pmds[0]; 2516 2517 DPRINT(("<%d> all_pmcs=0x%lx all_pmds=0x%lx\n", ctx->ctx_fd, ctx->ctx_all_pmcs[0],ctx->ctx_all_pmds[0])); 2518 2519 /* 2520 * useful in case of re-enable after disable 2521 */ 2522 ctx->ctx_used_ibrs[0] = 0UL; 2523 ctx->ctx_used_dbrs[0] = 0UL; 2524} 2525 2526static int 2527pfm_ctx_getsize(void *arg, size_t *sz) 2528{ 2529 pfarg_context_t *req = (pfarg_context_t *)arg; 2530 pfm_buffer_fmt_t *fmt; 2531 2532 *sz = 0; 2533 2534 if (!pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) return 0; 2535 2536 fmt = pfm_find_buffer_fmt(req->ctx_smpl_buf_id); 2537 if (fmt == NULL) { 2538 DPRINT(("cannot find buffer format\n")); 2539 return -EINVAL; 2540 } 2541 /* get just enough to copy in user parameters */ 2542 *sz = fmt->fmt_arg_size; 2543 DPRINT(("arg_size=%lu\n", *sz)); 2544 2545 return 0; 2546} 2547 2548 2549 2550/* 2551 * cannot attach if : 2552 * - kernel task 2553 * - task not owned by caller 2554 * - task incompatible with context mode 2555 */ 2556static int 2557pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task) 2558{ 2559 /* 2560 * no kernel task or task not owner by caller 2561 */ 2562 if (task->mm == NULL) { 2563 DPRINT(("task [%d] has not memory context (kernel thread)\n", task_pid_nr(task))); 2564 return -EPERM; 2565 } 2566 if (pfm_bad_permissions(task)) { 2567 DPRINT(("no permission to attach to [%d]\n", task_pid_nr(task))); 2568 return -EPERM; 2569 } 2570 /* 2571 * cannot block in self-monitoring mode 2572 */ 2573 if (CTX_OVFL_NOBLOCK(ctx) == 0 && task == current) { 2574 DPRINT(("cannot load a blocking context on self for [%d]\n", task_pid_nr(task))); 2575 return -EINVAL; 2576 } 2577 2578 if (task->exit_state == EXIT_ZOMBIE) { 2579 DPRINT(("cannot attach to zombie task [%d]\n", task_pid_nr(task))); 2580 return -EBUSY; 2581 } 2582 2583 /* 2584 * always ok for self 2585 */ 2586 if (task == current) return 0; 2587 2588 if (!task_is_stopped_or_traced(task)) { 2589 DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task_pid_nr(task), task->state)); 2590 return -EBUSY; 2591 } 2592 /* 2593 * make sure the task is off any CPU 2594 */ 2595 wait_task_inactive(task, 0); 2596 2597 /* more to come... */ 2598 2599 return 0; 2600} 2601 2602static int 2603pfm_get_task(pfm_context_t *ctx, pid_t pid, struct task_struct **task) 2604{ 2605 struct task_struct *p = current; 2606 int ret; 2607 2608 /* XXX: need to add more checks here */ 2609 if (pid < 2) return -EPERM; 2610 2611 if (pid != task_pid_vnr(current)) { 2612 2613 read_lock(&tasklist_lock); 2614 2615 p = find_task_by_vpid(pid); 2616 2617 /* make sure task cannot go away while we operate on it */ 2618 if (p) get_task_struct(p); 2619 2620 read_unlock(&tasklist_lock); 2621 2622 if (p == NULL) return -ESRCH; 2623 } 2624 2625 ret = pfm_task_incompatible(ctx, p); 2626 if (ret == 0) { 2627 *task = p; 2628 } else if (p != current) { 2629 pfm_put_task(p); 2630 } 2631 return ret; 2632} 2633 2634 2635 2636static int 2637pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 2638{ 2639 pfarg_context_t *req = (pfarg_context_t *)arg; 2640 struct file *filp; 2641 struct path path; 2642 int ctx_flags; 2643 int fd; 2644 int ret; 2645 2646 /* let's check the arguments first */ 2647 ret = pfarg_is_sane(current, req); 2648 if (ret < 0) 2649 return ret; 2650 2651 ctx_flags = req->ctx_flags; 2652 2653 ret = -ENOMEM; 2654 2655 fd = get_unused_fd_flags(0); 2656 if (fd < 0) 2657 return fd; 2658 2659 ctx = pfm_context_alloc(ctx_flags); 2660 if (!ctx) 2661 goto error; 2662 2663 filp = pfm_alloc_file(ctx); 2664 if (IS_ERR(filp)) { 2665 ret = PTR_ERR(filp); 2666 goto error_file; 2667 } 2668 2669 req->ctx_fd = ctx->ctx_fd = fd; 2670 2671 /* 2672 * does the user want to sample? 2673 */ 2674 if (pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) { 2675 ret = pfm_setup_buffer_fmt(current, filp, ctx, ctx_flags, 0, req); 2676 if (ret) 2677 goto buffer_error; 2678 } 2679 2680 DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d no_msg=%d ctx_fd=%d\n", 2681 ctx, 2682 ctx_flags, 2683 ctx->ctx_fl_system, 2684 ctx->ctx_fl_block, 2685 ctx->ctx_fl_excl_idle, 2686 ctx->ctx_fl_no_msg, 2687 ctx->ctx_fd)); 2688 2689 /* 2690 * initialize soft PMU state 2691 */ 2692 pfm_reset_pmu_state(ctx); 2693 2694 fd_install(fd, filp); 2695 2696 return 0; 2697 2698buffer_error: 2699 path = filp->f_path; 2700 put_filp(filp); 2701 path_put(&path); 2702 2703 if (ctx->ctx_buf_fmt) { 2704 pfm_buf_fmt_exit(ctx->ctx_buf_fmt, current, NULL, regs); 2705 } 2706error_file: 2707 pfm_context_free(ctx); 2708 2709error: 2710 put_unused_fd(fd); 2711 return ret; 2712} 2713 2714static inline unsigned long 2715pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset) 2716{ 2717 unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset; 2718 unsigned long new_seed, old_seed = reg->seed, mask = reg->mask; 2719 extern unsigned long carta_random32 (unsigned long seed); 2720 2721 if (reg->flags & PFM_REGFL_RANDOM) { 2722 new_seed = carta_random32(old_seed); 2723 val -= (old_seed & mask); /* counter values are negative numbers! */ 2724 if ((mask >> 32) != 0) 2725 /* construct a full 64-bit random value: */ 2726 new_seed |= carta_random32(old_seed >> 32) << 32; 2727 reg->seed = new_seed; 2728 } 2729 reg->lval = val; 2730 return val; 2731} 2732 2733static void 2734pfm_reset_regs_masked(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset) 2735{ 2736 unsigned long mask = ovfl_regs[0]; 2737 unsigned long reset_others = 0UL; 2738 unsigned long val; 2739 int i; 2740 2741 /* 2742 * now restore reset value on sampling overflowed counters 2743 */ 2744 mask >>= PMU_FIRST_COUNTER; 2745 for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) { 2746 2747 if ((mask & 0x1UL) == 0UL) continue; 2748 2749 ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset); 2750 reset_others |= ctx->ctx_pmds[i].reset_pmds[0]; 2751 2752 DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val)); 2753 } 2754 2755 /* 2756 * Now take care of resetting the other registers 2757 */ 2758 for(i = 0; reset_others; i++, reset_others >>= 1) { 2759 2760 if ((reset_others & 0x1) == 0) continue; 2761 2762 ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset); 2763 2764 DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n", 2765 is_long_reset ? "long" : "short", i, val)); 2766 } 2767} 2768 2769static void 2770pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset) 2771{ 2772 unsigned long mask = ovfl_regs[0]; 2773 unsigned long reset_others = 0UL; 2774 unsigned long val; 2775 int i; 2776 2777 DPRINT_ovfl(("ovfl_regs=0x%lx is_long_reset=%d\n", ovfl_regs[0], is_long_reset)); 2778 2779 if (ctx->ctx_state == PFM_CTX_MASKED) { 2780 pfm_reset_regs_masked(ctx, ovfl_regs, is_long_reset); 2781 return; 2782 } 2783 2784 /* 2785 * now restore reset value on sampling overflowed counters 2786 */ 2787 mask >>= PMU_FIRST_COUNTER; 2788 for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) { 2789 2790 if ((mask & 0x1UL) == 0UL) continue; 2791 2792 val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset); 2793 reset_others |= ctx->ctx_pmds[i].reset_pmds[0]; 2794 2795 DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val)); 2796 2797 pfm_write_soft_counter(ctx, i, val); 2798 } 2799 2800 /* 2801 * Now take care of resetting the other registers 2802 */ 2803 for(i = 0; reset_others; i++, reset_others >>= 1) { 2804 2805 if ((reset_others & 0x1) == 0) continue; 2806 2807 val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset); 2808 2809 if (PMD_IS_COUNTING(i)) { 2810 pfm_write_soft_counter(ctx, i, val); 2811 } else { 2812 ia64_set_pmd(i, val); 2813 } 2814 DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n", 2815 is_long_reset ? "long" : "short", i, val)); 2816 } 2817 ia64_srlz_d(); 2818} 2819 2820static int 2821pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 2822{ 2823 struct task_struct *task; 2824 pfarg_reg_t *req = (pfarg_reg_t *)arg; 2825 unsigned long value, pmc_pm; 2826 unsigned long smpl_pmds, reset_pmds, impl_pmds; 2827 unsigned int cnum, reg_flags, flags, pmc_type; 2828 int i, can_access_pmu = 0, is_loaded, is_system, expert_mode; 2829 int is_monitor, is_counting, state; 2830 int ret = -EINVAL; 2831 pfm_reg_check_t wr_func; 2832#define PFM_CHECK_PMC_PM(x, y, z) ((x)->ctx_fl_system ^ PMC_PM(y, z)) 2833 2834 state = ctx->ctx_state; 2835 is_loaded = state == PFM_CTX_LOADED ? 1 : 0; 2836 is_system = ctx->ctx_fl_system; 2837 task = ctx->ctx_task; 2838 impl_pmds = pmu_conf->impl_pmds[0]; 2839 2840 if (state == PFM_CTX_ZOMBIE) return -EINVAL; 2841 2842 if (is_loaded) { 2843 /* 2844 * In system wide and when the context is loaded, access can only happen 2845 * when the caller is running on the CPU being monitored by the session. 2846 * It does not have to be the owner (ctx_task) of the context per se. 2847 */ 2848 if (is_system && ctx->ctx_cpu != smp_processor_id()) { 2849 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 2850 return -EBUSY; 2851 } 2852 can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; 2853 } 2854 expert_mode = pfm_sysctl.expert_mode; 2855 2856 for (i = 0; i < count; i++, req++) { 2857 2858 cnum = req->reg_num; 2859 reg_flags = req->reg_flags; 2860 value = req->reg_value; 2861 smpl_pmds = req->reg_smpl_pmds[0]; 2862 reset_pmds = req->reg_reset_pmds[0]; 2863 flags = 0; 2864 2865 2866 if (cnum >= PMU_MAX_PMCS) { 2867 DPRINT(("pmc%u is invalid\n", cnum)); 2868 goto error; 2869 } 2870 2871 pmc_type = pmu_conf->pmc_desc[cnum].type; 2872 pmc_pm = (value >> pmu_conf->pmc_desc[cnum].pm_pos) & 0x1; 2873 is_counting = (pmc_type & PFM_REG_COUNTING) == PFM_REG_COUNTING ? 1 : 0; 2874 is_monitor = (pmc_type & PFM_REG_MONITOR) == PFM_REG_MONITOR ? 1 : 0; 2875 2876 /* 2877 * we reject all non implemented PMC as well 2878 * as attempts to modify PMC[0-3] which are used 2879 * as status registers by the PMU 2880 */ 2881 if ((pmc_type & PFM_REG_IMPL) == 0 || (pmc_type & PFM_REG_CONTROL) == PFM_REG_CONTROL) { 2882 DPRINT(("pmc%u is unimplemented or no-access pmc_type=%x\n", cnum, pmc_type)); 2883 goto error; 2884 } 2885 wr_func = pmu_conf->pmc_desc[cnum].write_check; 2886 /* 2887 * If the PMC is a monitor, then if the value is not the default: 2888 * - system-wide session: PMCx.pm=1 (privileged monitor) 2889 * - per-task : PMCx.pm=0 (user monitor) 2890 */ 2891 if (is_monitor && value != PMC_DFL_VAL(cnum) && is_system ^ pmc_pm) { 2892 DPRINT(("pmc%u pmc_pm=%lu is_system=%d\n", 2893 cnum, 2894 pmc_pm, 2895 is_system)); 2896 goto error; 2897 } 2898 2899 if (is_counting) { 2900 /* 2901 * enforce generation of overflow interrupt. Necessary on all 2902 * CPUs. 2903 */ 2904 value |= 1 << PMU_PMC_OI; 2905 2906 if (reg_flags & PFM_REGFL_OVFL_NOTIFY) { 2907 flags |= PFM_REGFL_OVFL_NOTIFY; 2908 } 2909 2910 if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM; 2911 2912 /* verify validity of smpl_pmds */ 2913 if ((smpl_pmds & impl_pmds) != smpl_pmds) { 2914 DPRINT(("invalid smpl_pmds 0x%lx for pmc%u\n", smpl_pmds, cnum)); 2915 goto error; 2916 } 2917 2918 /* verify validity of reset_pmds */ 2919 if ((reset_pmds & impl_pmds) != reset_pmds) { 2920 DPRINT(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum)); 2921 goto error; 2922 } 2923 } else { 2924 if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) { 2925 DPRINT(("cannot set ovfl_notify or random on pmc%u\n", cnum)); 2926 goto error; 2927 } 2928 /* eventid on non-counting monitors are ignored */ 2929 } 2930 2931 /* 2932 * execute write checker, if any 2933 */ 2934 if (likely(expert_mode == 0 && wr_func)) { 2935 ret = (*wr_func)(task, ctx, cnum, &value, regs); 2936 if (ret) goto error; 2937 ret = -EINVAL; 2938 } 2939 2940 /* 2941 * no error on this register 2942 */ 2943 PFM_REG_RETFLAG_SET(req->reg_flags, 0); 2944 2945 /* 2946 * Now we commit the changes to the software state 2947 */ 2948 2949 /* 2950 * update overflow information 2951 */ 2952 if (is_counting) { 2953 /* 2954 * full flag update each time a register is programmed 2955 */ 2956 ctx->ctx_pmds[cnum].flags = flags; 2957 2958 ctx->ctx_pmds[cnum].reset_pmds[0] = reset_pmds; 2959 ctx->ctx_pmds[cnum].smpl_pmds[0] = smpl_pmds; 2960 ctx->ctx_pmds[cnum].eventid = req->reg_smpl_eventid; 2961 2962 /* 2963 * Mark all PMDS to be accessed as used. 2964 * 2965 * We do not keep track of PMC because we have to 2966 * systematically restore ALL of them. 2967 * 2968 * We do not update the used_monitors mask, because 2969 * if we have not programmed them, then will be in 2970 * a quiescent state, therefore we will not need to 2971 * mask/restore then when context is MASKED. 2972 */ 2973 CTX_USED_PMD(ctx, reset_pmds); 2974 CTX_USED_PMD(ctx, smpl_pmds); 2975 /* 2976 * make sure we do not try to reset on 2977 * restart because we have established new values 2978 */ 2979 if (state == PFM_CTX_MASKED) ctx->ctx_ovfl_regs[0] &= ~1UL << cnum; 2980 } 2981 /* 2982 * Needed in case the user does not initialize the equivalent 2983 * PMD. Clearing is done indirectly via pfm_reset_pmu_state() so there is no 2984 * possible leak here. 2985 */ 2986 CTX_USED_PMD(ctx, pmu_conf->pmc_desc[cnum].dep_pmd[0]); 2987 2988 /* 2989 * keep track of the monitor PMC that we are using. 2990 * we save the value of the pmc in ctx_pmcs[] and if 2991 * the monitoring is not stopped for the context we also 2992 * place it in the saved state area so that it will be 2993 * picked up later by the context switch code. 2994 * 2995 * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs(). 2996 * 2997 * The value in th_pmcs[] may be modified on overflow, i.e., when 2998 * monitoring needs to be stopped. 2999 */ 3000 if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum); 3001 3002 /* 3003 * update context state 3004 */ 3005 ctx->ctx_pmcs[cnum] = value; 3006 3007 if (is_loaded) { 3008 /* 3009 * write thread state 3010 */ 3011 if (is_system == 0) ctx->th_pmcs[cnum] = value; 3012 3013 /* 3014 * write hardware register if we can 3015 */ 3016 if (can_access_pmu) { 3017 ia64_set_pmc(cnum, value); 3018 } 3019#ifdef CONFIG_SMP 3020 else { 3021 /* 3022 * per-task SMP only here 3023 * 3024 * we are guaranteed that the task is not running on the other CPU, 3025 * we indicate that this PMD will need to be reloaded if the task 3026 * is rescheduled on the CPU it ran last on. 3027 */ 3028 ctx->ctx_reload_pmcs[0] |= 1UL << cnum; 3029 } 3030#endif 3031 } 3032 3033 DPRINT(("pmc[%u]=0x%lx ld=%d apmu=%d flags=0x%x all_pmcs=0x%lx used_pmds=0x%lx eventid=%ld smpl_pmds=0x%lx reset_pmds=0x%lx reloads_pmcs=0x%lx used_monitors=0x%lx ovfl_regs=0x%lx\n", 3034 cnum, 3035 value, 3036 is_loaded, 3037 can_access_pmu, 3038 flags, 3039 ctx->ctx_all_pmcs[0], 3040 ctx->ctx_used_pmds[0], 3041 ctx->ctx_pmds[cnum].eventid, 3042 smpl_pmds, 3043 reset_pmds, 3044 ctx->ctx_reload_pmcs[0], 3045 ctx->ctx_used_monitors[0], 3046 ctx->ctx_ovfl_regs[0])); 3047 } 3048 3049 /* 3050 * make sure the changes are visible 3051 */ 3052 if (can_access_pmu) ia64_srlz_d(); 3053 3054 return 0; 3055error: 3056 PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); 3057 return ret; 3058} 3059 3060static int 3061pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3062{ 3063 struct task_struct *task; 3064 pfarg_reg_t *req = (pfarg_reg_t *)arg; 3065 unsigned long value, hw_value, ovfl_mask; 3066 unsigned int cnum; 3067 int i, can_access_pmu = 0, state; 3068 int is_counting, is_loaded, is_system, expert_mode; 3069 int ret = -EINVAL; 3070 pfm_reg_check_t wr_func; 3071 3072 3073 state = ctx->ctx_state; 3074 is_loaded = state == PFM_CTX_LOADED ? 1 : 0; 3075 is_system = ctx->ctx_fl_system; 3076 ovfl_mask = pmu_conf->ovfl_val; 3077 task = ctx->ctx_task; 3078 3079 if (unlikely(state == PFM_CTX_ZOMBIE)) return -EINVAL; 3080 3081 /* 3082 * on both UP and SMP, we can only write to the PMC when the task is 3083 * the owner of the local PMU. 3084 */ 3085 if (likely(is_loaded)) { 3086 /* 3087 * In system wide and when the context is loaded, access can only happen 3088 * when the caller is running on the CPU being monitored by the session. 3089 * It does not have to be the owner (ctx_task) of the context per se. 3090 */ 3091 if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { 3092 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3093 return -EBUSY; 3094 } 3095 can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; 3096 } 3097 expert_mode = pfm_sysctl.expert_mode; 3098 3099 for (i = 0; i < count; i++, req++) { 3100 3101 cnum = req->reg_num; 3102 value = req->reg_value; 3103 3104 if (!PMD_IS_IMPL(cnum)) { 3105 DPRINT(("pmd[%u] is unimplemented or invalid\n", cnum)); 3106 goto abort_mission; 3107 } 3108 is_counting = PMD_IS_COUNTING(cnum); 3109 wr_func = pmu_conf->pmd_desc[cnum].write_check; 3110 3111 /* 3112 * execute write checker, if any 3113 */ 3114 if (unlikely(expert_mode == 0 && wr_func)) { 3115 unsigned long v = value; 3116 3117 ret = (*wr_func)(task, ctx, cnum, &v, regs); 3118 if (ret) goto abort_mission; 3119 3120 value = v; 3121 ret = -EINVAL; 3122 } 3123 3124 /* 3125 * no error on this register 3126 */ 3127 PFM_REG_RETFLAG_SET(req->reg_flags, 0); 3128 3129 /* 3130 * now commit changes to software state 3131 */ 3132 hw_value = value; 3133 3134 /* 3135 * update virtualized (64bits) counter 3136 */ 3137 if (is_counting) { 3138 /* 3139 * write context state 3140 */ 3141 ctx->ctx_pmds[cnum].lval = value; 3142 3143 /* 3144 * when context is load we use the split value 3145 */ 3146 if (is_loaded) { 3147 hw_value = value & ovfl_mask; 3148 value = value & ~ovfl_mask; 3149 } 3150 } 3151 /* 3152 * update reset values (not just for counters) 3153 */ 3154 ctx->ctx_pmds[cnum].long_reset = req->reg_long_reset; 3155 ctx->ctx_pmds[cnum].short_reset = req->reg_short_reset; 3156 3157 /* 3158 * update randomization parameters (not just for counters) 3159 */ 3160 ctx->ctx_pmds[cnum].seed = req->reg_random_seed; 3161 ctx->ctx_pmds[cnum].mask = req->reg_random_mask; 3162 3163 /* 3164 * update context value 3165 */ 3166 ctx->ctx_pmds[cnum].val = value; 3167 3168 /* 3169 * Keep track of what we use 3170 * 3171 * We do not keep track of PMC because we have to 3172 * systematically restore ALL of them. 3173 */ 3174 CTX_USED_PMD(ctx, PMD_PMD_DEP(cnum)); 3175 3176 /* 3177 * mark this PMD register used as well 3178 */ 3179 CTX_USED_PMD(ctx, RDEP(cnum)); 3180 3181 /* 3182 * make sure we do not try to reset on 3183 * restart because we have established new values 3184 */ 3185 if (is_counting && state == PFM_CTX_MASKED) { 3186 ctx->ctx_ovfl_regs[0] &= ~1UL << cnum; 3187 } 3188 3189 if (is_loaded) { 3190 /* 3191 * write thread state 3192 */ 3193 if (is_system == 0) ctx->th_pmds[cnum] = hw_value; 3194 3195 /* 3196 * write hardware register if we can 3197 */ 3198 if (can_access_pmu) { 3199 ia64_set_pmd(cnum, hw_value); 3200 } else { 3201#ifdef CONFIG_SMP 3202 /* 3203 * we are guaranteed that the task is not running on the other CPU, 3204 * we indicate that this PMD will need to be reloaded if the task 3205 * is rescheduled on the CPU it ran last on. 3206 */ 3207 ctx->ctx_reload_pmds[0] |= 1UL << cnum; 3208#endif 3209 } 3210 } 3211 3212 DPRINT(("pmd[%u]=0x%lx ld=%d apmu=%d, hw_value=0x%lx ctx_pmd=0x%lx short_reset=0x%lx " 3213 "long_reset=0x%lx notify=%c seed=0x%lx mask=0x%lx used_pmds=0x%lx reset_pmds=0x%lx reload_pmds=0x%lx all_pmds=0x%lx ovfl_regs=0x%lx\n", 3214 cnum, 3215 value, 3216 is_loaded, 3217 can_access_pmu, 3218 hw_value, 3219 ctx->ctx_pmds[cnum].val, 3220 ctx->ctx_pmds[cnum].short_reset, 3221 ctx->ctx_pmds[cnum].long_reset, 3222 PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N', 3223 ctx->ctx_pmds[cnum].seed, 3224 ctx->ctx_pmds[cnum].mask, 3225 ctx->ctx_used_pmds[0], 3226 ctx->ctx_pmds[cnum].reset_pmds[0], 3227 ctx->ctx_reload_pmds[0], 3228 ctx->ctx_all_pmds[0], 3229 ctx->ctx_ovfl_regs[0])); 3230 } 3231 3232 /* 3233 * make changes visible 3234 */ 3235 if (can_access_pmu) ia64_srlz_d(); 3236 3237 return 0; 3238 3239abort_mission: 3240 /* 3241 * for now, we have only one possibility for error 3242 */ 3243 PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); 3244 return ret; 3245} 3246 3247/* 3248 * By the way of PROTECT_CONTEXT(), interrupts are masked while we are in this function. 3249 * Therefore we know, we do not have to worry about the PMU overflow interrupt. If an 3250 * interrupt is delivered during the call, it will be kept pending until we leave, making 3251 * it appears as if it had been generated at the UNPROTECT_CONTEXT(). At least we are 3252 * guaranteed to return consistent data to the user, it may simply be old. It is not 3253 * trivial to treat the overflow while inside the call because you may end up in 3254 * some module sampling buffer code causing deadlocks. 3255 */ 3256static int 3257pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3258{ 3259 struct task_struct *task; 3260 unsigned long val = 0UL, lval, ovfl_mask, sval; 3261 pfarg_reg_t *req = (pfarg_reg_t *)arg; 3262 unsigned int cnum, reg_flags = 0; 3263 int i, can_access_pmu = 0, state; 3264 int is_loaded, is_system, is_counting, expert_mode; 3265 int ret = -EINVAL; 3266 pfm_reg_check_t rd_func; 3267 3268 /* 3269 * access is possible when loaded only for 3270 * self-monitoring tasks or in UP mode 3271 */ 3272 3273 state = ctx->ctx_state; 3274 is_loaded = state == PFM_CTX_LOADED ? 1 : 0; 3275 is_system = ctx->ctx_fl_system; 3276 ovfl_mask = pmu_conf->ovfl_val; 3277 task = ctx->ctx_task; 3278 3279 if (state == PFM_CTX_ZOMBIE) return -EINVAL; 3280 3281 if (likely(is_loaded)) { 3282 /* 3283 * In system wide and when the context is loaded, access can only happen 3284 * when the caller is running on the CPU being monitored by the session. 3285 * It does not have to be the owner (ctx_task) of the context per se. 3286 */ 3287 if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { 3288 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3289 return -EBUSY; 3290 } 3291 /* 3292 * this can be true when not self-monitoring only in UP 3293 */ 3294 can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; 3295 3296 if (can_access_pmu) ia64_srlz_d(); 3297 } 3298 expert_mode = pfm_sysctl.expert_mode; 3299 3300 DPRINT(("ld=%d apmu=%d ctx_state=%d\n", 3301 is_loaded, 3302 can_access_pmu, 3303 state)); 3304 3305 /* 3306 * on both UP and SMP, we can only read the PMD from the hardware register when 3307 * the task is the owner of the local PMU. 3308 */ 3309 3310 for (i = 0; i < count; i++, req++) { 3311 3312 cnum = req->reg_num; 3313 reg_flags = req->reg_flags; 3314 3315 if (unlikely(!PMD_IS_IMPL(cnum))) goto error; 3316 /* 3317 * we can only read the register that we use. That includes 3318 * the one we explicitly initialize AND the one we want included 3319 * in the sampling buffer (smpl_regs). 3320 * 3321 * Having this restriction allows optimization in the ctxsw routine 3322 * without compromising security (leaks) 3323 */ 3324 if (unlikely(!CTX_IS_USED_PMD(ctx, cnum))) goto error; 3325 3326 sval = ctx->ctx_pmds[cnum].val; 3327 lval = ctx->ctx_pmds[cnum].lval; 3328 is_counting = PMD_IS_COUNTING(cnum); 3329 3330 /* 3331 * If the task is not the current one, then we check if the 3332 * PMU state is still in the local live register due to lazy ctxsw. 3333 * If true, then we read directly from the registers. 3334 */ 3335 if (can_access_pmu){ 3336 val = ia64_get_pmd(cnum); 3337 } else { 3338 /* 3339 * context has been saved 3340 * if context is zombie, then task does not exist anymore. 3341 * In this case, we use the full value saved in the context (pfm_flush_regs()). 3342 */ 3343 val = is_loaded ? ctx->th_pmds[cnum] : 0UL; 3344 } 3345 rd_func = pmu_conf->pmd_desc[cnum].read_check; 3346 3347 if (is_counting) { 3348 /* 3349 * XXX: need to check for overflow when loaded 3350 */ 3351 val &= ovfl_mask; 3352 val += sval; 3353 } 3354 3355 /* 3356 * execute read checker, if any 3357 */ 3358 if (unlikely(expert_mode == 0 && rd_func)) { 3359 unsigned long v = val; 3360 ret = (*rd_func)(ctx->ctx_task, ctx, cnum, &v, regs); 3361 if (ret) goto error; 3362 val = v; 3363 ret = -EINVAL; 3364 } 3365 3366 PFM_REG_RETFLAG_SET(reg_flags, 0); 3367 3368 DPRINT(("pmd[%u]=0x%lx\n", cnum, val)); 3369 3370 /* 3371 * update register return value, abort all if problem during copy. 3372 * we only modify the reg_flags field. no check mode is fine because 3373 * access has been verified upfront in sys_perfmonctl(). 3374 */ 3375 req->reg_value = val; 3376 req->reg_flags = reg_flags; 3377 req->reg_last_reset_val = lval; 3378 } 3379 3380 return 0; 3381 3382error: 3383 PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); 3384 return ret; 3385} 3386 3387int 3388pfm_mod_write_pmcs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) 3389{ 3390 pfm_context_t *ctx; 3391 3392 if (req == NULL) return -EINVAL; 3393 3394 ctx = GET_PMU_CTX(); 3395 3396 if (ctx == NULL) return -EINVAL; 3397 3398 /* 3399 * for now limit to current task, which is enough when calling 3400 * from overflow handler 3401 */ 3402 if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; 3403 3404 return pfm_write_pmcs(ctx, req, nreq, regs); 3405} 3406EXPORT_SYMBOL(pfm_mod_write_pmcs); 3407 3408int 3409pfm_mod_read_pmds(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) 3410{ 3411 pfm_context_t *ctx; 3412 3413 if (req == NULL) return -EINVAL; 3414 3415 ctx = GET_PMU_CTX(); 3416 3417 if (ctx == NULL) return -EINVAL; 3418 3419 /* 3420 * for now limit to current task, which is enough when calling 3421 * from overflow handler 3422 */ 3423 if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; 3424 3425 return pfm_read_pmds(ctx, req, nreq, regs); 3426} 3427EXPORT_SYMBOL(pfm_mod_read_pmds); 3428 3429/* 3430 * Only call this function when a process it trying to 3431 * write the debug registers (reading is always allowed) 3432 */ 3433int 3434pfm_use_debug_registers(struct task_struct *task) 3435{ 3436 pfm_context_t *ctx = task->thread.pfm_context; 3437 unsigned long flags; 3438 int ret = 0; 3439 3440 if (pmu_conf->use_rr_dbregs == 0) return 0; 3441 3442 DPRINT(("called for [%d]\n", task_pid_nr(task))); 3443 3444 /* 3445 * do it only once 3446 */ 3447 if (task->thread.flags & IA64_THREAD_DBG_VALID) return 0; 3448 3449 /* 3450 * Even on SMP, we do not need to use an atomic here because 3451 * the only way in is via ptrace() and this is possible only when the 3452 * process is stopped. Even in the case where the ctxsw out is not totally 3453 * completed by the time we come here, there is no way the 'stopped' process 3454 * could be in the middle of fiddling with the pfm_write_ibr_dbr() routine. 3455 * So this is always safe. 3456 */ 3457 if (ctx && ctx->ctx_fl_using_dbreg == 1) return -1; 3458 3459 LOCK_PFS(flags); 3460 3461 /* 3462 * We cannot allow setting breakpoints when system wide monitoring 3463 * sessions are using the debug registers. 3464 */ 3465 if (pfm_sessions.pfs_sys_use_dbregs> 0) 3466 ret = -1; 3467 else 3468 pfm_sessions.pfs_ptrace_use_dbregs++; 3469 3470 DPRINT(("ptrace_use_dbregs=%u sys_use_dbregs=%u by [%d] ret = %d\n", 3471 pfm_sessions.pfs_ptrace_use_dbregs, 3472 pfm_sessions.pfs_sys_use_dbregs, 3473 task_pid_nr(task), ret)); 3474 3475 UNLOCK_PFS(flags); 3476 3477 return ret; 3478} 3479 3480/* 3481 * This function is called for every task that exits with the 3482 * IA64_THREAD_DBG_VALID set. This indicates a task which was 3483 * able to use the debug registers for debugging purposes via 3484 * ptrace(). Therefore we know it was not using them for 3485 * performance monitoring, so we only decrement the number 3486 * of "ptraced" debug register users to keep the count up to date 3487 */ 3488int 3489pfm_release_debug_registers(struct task_struct *task) 3490{ 3491 unsigned long flags; 3492 int ret; 3493 3494 if (pmu_conf->use_rr_dbregs == 0) return 0; 3495 3496 LOCK_PFS(flags); 3497 if (pfm_sessions.pfs_ptrace_use_dbregs == 0) { 3498 printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task_pid_nr(task)); 3499 ret = -1; 3500 } else { 3501 pfm_sessions.pfs_ptrace_use_dbregs--; 3502 ret = 0; 3503 } 3504 UNLOCK_PFS(flags); 3505 3506 return ret; 3507} 3508 3509static int 3510pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3511{ 3512 struct task_struct *task; 3513 pfm_buffer_fmt_t *fmt; 3514 pfm_ovfl_ctrl_t rst_ctrl; 3515 int state, is_system; 3516 int ret = 0; 3517 3518 state = ctx->ctx_state; 3519 fmt = ctx->ctx_buf_fmt; 3520 is_system = ctx->ctx_fl_system; 3521 task = PFM_CTX_TASK(ctx); 3522 3523 switch(state) { 3524 case PFM_CTX_MASKED: 3525 break; 3526 case PFM_CTX_LOADED: 3527 if (CTX_HAS_SMPL(ctx) && fmt->fmt_restart_active) break; 3528 /* fall through */ 3529 case PFM_CTX_UNLOADED: 3530 case PFM_CTX_ZOMBIE: 3531 DPRINT(("invalid state=%d\n", state)); 3532 return -EBUSY; 3533 default: 3534 DPRINT(("state=%d, cannot operate (no active_restart handler)\n", state)); 3535 return -EINVAL; 3536 } 3537 3538 /* 3539 * In system wide and when the context is loaded, access can only happen 3540 * when the caller is running on the CPU being monitored by the session. 3541 * It does not have to be the owner (ctx_task) of the context per se. 3542 */ 3543 if (is_system && ctx->ctx_cpu != smp_processor_id()) { 3544 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3545 return -EBUSY; 3546 } 3547 3548 /* sanity check */ 3549 if (unlikely(task == NULL)) { 3550 printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", task_pid_nr(current)); 3551 return -EINVAL; 3552 } 3553 3554 if (task == current || is_system) { 3555 3556 fmt = ctx->ctx_buf_fmt; 3557 3558 DPRINT(("restarting self %d ovfl=0x%lx\n", 3559 task_pid_nr(task), 3560 ctx->ctx_ovfl_regs[0])); 3561 3562 if (CTX_HAS_SMPL(ctx)) { 3563 3564 prefetch(ctx->ctx_smpl_hdr); 3565 3566 rst_ctrl.bits.mask_monitoring = 0; 3567 rst_ctrl.bits.reset_ovfl_pmds = 0; 3568 3569 if (state == PFM_CTX_LOADED) 3570 ret = pfm_buf_fmt_restart_active(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs); 3571 else 3572 ret = pfm_buf_fmt_restart(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs); 3573 } else { 3574 rst_ctrl.bits.mask_monitoring = 0; 3575 rst_ctrl.bits.reset_ovfl_pmds = 1; 3576 } 3577 3578 if (ret == 0) { 3579 if (rst_ctrl.bits.reset_ovfl_pmds) 3580 pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET); 3581 3582 if (rst_ctrl.bits.mask_monitoring == 0) { 3583 DPRINT(("resuming monitoring for [%d]\n", task_pid_nr(task))); 3584 3585 if (state == PFM_CTX_MASKED) pfm_restore_monitoring(task); 3586 } else { 3587 DPRINT(("keeping monitoring stopped for [%d]\n", task_pid_nr(task))); 3588 3589 // cannot use pfm_stop_monitoring(task, regs); 3590 } 3591 } 3592 /* 3593 * clear overflowed PMD mask to remove any stale information 3594 */ 3595 ctx->ctx_ovfl_regs[0] = 0UL; 3596 3597 /* 3598 * back to LOADED state 3599 */ 3600 ctx->ctx_state = PFM_CTX_LOADED; 3601 3602 /* 3603 * XXX: not really useful for self monitoring 3604 */ 3605 ctx->ctx_fl_can_restart = 0; 3606 3607 return 0; 3608 } 3609 3610 /* 3611 * restart another task 3612 */ 3613 3614 /* 3615 * When PFM_CTX_MASKED, we cannot issue a restart before the previous 3616 * one is seen by the task. 3617 */ 3618 if (state == PFM_CTX_MASKED) { 3619 if (ctx->ctx_fl_can_restart == 0) return -EINVAL; 3620 /* 3621 * will prevent subsequent restart before this one is 3622 * seen by other task 3623 */ 3624 ctx->ctx_fl_can_restart = 0; 3625 } 3626 3627 /* 3628 * if blocking, then post the semaphore is PFM_CTX_MASKED, i.e. 3629 * the task is blocked or on its way to block. That's the normal 3630 * restart path. If the monitoring is not masked, then the task 3631 * can be actively monitoring and we cannot directly intervene. 3632 * Therefore we use the trap mechanism to catch the task and 3633 * force it to reset the buffer/reset PMDs. 3634 * 3635 * if non-blocking, then we ensure that the task will go into 3636 * pfm_handle_work() before returning to user mode. 3637 * 3638 * We cannot explicitly reset another task, it MUST always 3639 * be done by the task itself. This works for system wide because 3640 * the tool that is controlling the session is logically doing 3641 * "self-monitoring". 3642 */ 3643 if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) { 3644 DPRINT(("unblocking [%d]\n", task_pid_nr(task))); 3645 complete(&ctx->ctx_restart_done); 3646 } else { 3647 DPRINT(("[%d] armed exit trap\n", task_pid_nr(task))); 3648 3649 ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET; 3650 3651 PFM_SET_WORK_PENDING(task, 1); 3652 3653 set_notify_resume(task); 3654 3655 /* 3656 * XXX: send reschedule if task runs on another CPU 3657 */ 3658 } 3659 return 0; 3660} 3661 3662static int 3663pfm_debug(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3664{ 3665 unsigned int m = *(unsigned int *)arg; 3666 3667 pfm_sysctl.debug = m == 0 ? 0 : 1; 3668 3669 printk(KERN_INFO "perfmon debugging %s (timing reset)\n", pfm_sysctl.debug ? "on" : "off"); 3670 3671 if (m == 0) { 3672 memset(pfm_stats, 0, sizeof(pfm_stats)); 3673 for(m=0; m < NR_CPUS; m++) pfm_stats[m].pfm_ovfl_intr_cycles_min = ~0UL; 3674 } 3675 return 0; 3676} 3677 3678/* 3679 * arg can be NULL and count can be zero for this function 3680 */ 3681static int 3682pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3683{ 3684 struct thread_struct *thread = NULL; 3685 struct task_struct *task; 3686 pfarg_dbreg_t *req = (pfarg_dbreg_t *)arg; 3687 unsigned long flags; 3688 dbreg_t dbreg; 3689 unsigned int rnum; 3690 int first_time; 3691 int ret = 0, state; 3692 int i, can_access_pmu = 0; 3693 int is_system, is_loaded; 3694 3695 if (pmu_conf->use_rr_dbregs == 0) return -EINVAL; 3696 3697 state = ctx->ctx_state; 3698 is_loaded = state == PFM_CTX_LOADED ? 1 : 0; 3699 is_system = ctx->ctx_fl_system; 3700 task = ctx->ctx_task; 3701 3702 if (state == PFM_CTX_ZOMBIE) return -EINVAL; 3703 3704 /* 3705 * on both UP and SMP, we can only write to the PMC when the task is 3706 * the owner of the local PMU. 3707 */ 3708 if (is_loaded) { 3709 thread = &task->thread; 3710 /* 3711 * In system wide and when the context is loaded, access can only happen 3712 * when the caller is running on the CPU being monitored by the session. 3713 * It does not have to be the owner (ctx_task) of the context per se. 3714 */ 3715 if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { 3716 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3717 return -EBUSY; 3718 } 3719 can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; 3720 } 3721 3722 /* 3723 * we do not need to check for ipsr.db because we do clear ibr.x, dbr.r, and dbr.w 3724 * ensuring that no real breakpoint can be installed via this call. 3725 * 3726 * IMPORTANT: regs can be NULL in this function 3727 */ 3728 3729 first_time = ctx->ctx_fl_using_dbreg == 0; 3730 3731 /* 3732 * don't bother if we are loaded and task is being debugged 3733 */ 3734 if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) { 3735 DPRINT(("debug registers already in use for [%d]\n", task_pid_nr(task))); 3736 return -EBUSY; 3737 } 3738 3739 /* 3740 * check for debug registers in system wide mode 3741 * 3742 * If though a check is done in pfm_context_load(), 3743 * we must repeat it here, in case the registers are 3744 * written after the context is loaded 3745 */ 3746 if (is_loaded) { 3747 LOCK_PFS(flags); 3748 3749 if (first_time && is_system) { 3750 if (pfm_sessions.pfs_ptrace_use_dbregs) 3751 ret = -EBUSY; 3752 else 3753 pfm_sessions.pfs_sys_use_dbregs++; 3754 } 3755 UNLOCK_PFS(flags); 3756 } 3757 3758 if (ret != 0) return ret; 3759 3760 /* 3761 * mark ourself as user of the debug registers for 3762 * perfmon purposes. 3763 */ 3764 ctx->ctx_fl_using_dbreg = 1; 3765 3766 /* 3767 * clear hardware registers to make sure we don't 3768 * pick up stale state. 3769 * 3770 * for a system wide session, we do not use 3771 * thread.dbr, thread.ibr because this process 3772 * never leaves the current CPU and the state 3773 * is shared by all processes running on it 3774 */ 3775 if (first_time && can_access_pmu) { 3776 DPRINT(("[%d] clearing ibrs, dbrs\n", task_pid_nr(task))); 3777 for (i=0; i < pmu_conf->num_ibrs; i++) { 3778 ia64_set_ibr(i, 0UL); 3779 ia64_dv_serialize_instruction(); 3780 } 3781 ia64_srlz_i(); 3782 for (i=0; i < pmu_conf->num_dbrs; i++) { 3783 ia64_set_dbr(i, 0UL); 3784 ia64_dv_serialize_data(); 3785 } 3786 ia64_srlz_d(); 3787 } 3788 3789 /* 3790 * Now install the values into the registers 3791 */ 3792 for (i = 0; i < count; i++, req++) { 3793 3794 rnum = req->dbreg_num; 3795 dbreg.val = req->dbreg_value; 3796 3797 ret = -EINVAL; 3798 3799 if ((mode == PFM_CODE_RR && rnum >= PFM_NUM_IBRS) || ((mode == PFM_DATA_RR) && rnum >= PFM_NUM_DBRS)) { 3800 DPRINT(("invalid register %u val=0x%lx mode=%d i=%d count=%d\n", 3801 rnum, dbreg.val, mode, i, count)); 3802 3803 goto abort_mission; 3804 } 3805 3806 /* 3807 * make sure we do not install enabled breakpoint 3808 */ 3809 if (rnum & 0x1) { 3810 if (mode == PFM_CODE_RR) 3811 dbreg.ibr.ibr_x = 0; 3812 else 3813 dbreg.dbr.dbr_r = dbreg.dbr.dbr_w = 0; 3814 } 3815 3816 PFM_REG_RETFLAG_SET(req->dbreg_flags, 0); 3817 3818 /* 3819 * Debug registers, just like PMC, can only be modified 3820 * by a kernel call. Moreover, perfmon() access to those 3821 * registers are centralized in this routine. The hardware 3822 * does not modify the value of these registers, therefore, 3823 * if we save them as they are written, we can avoid having 3824 * to save them on context switch out. This is made possible 3825 * by the fact that when perfmon uses debug registers, ptrace() 3826 * won't be able to modify them concurrently. 3827 */ 3828 if (mode == PFM_CODE_RR) { 3829 CTX_USED_IBR(ctx, rnum); 3830 3831 if (can_access_pmu) { 3832 ia64_set_ibr(rnum, dbreg.val); 3833 ia64_dv_serialize_instruction(); 3834 } 3835 3836 ctx->ctx_ibrs[rnum] = dbreg.val; 3837 3838 DPRINT(("write ibr%u=0x%lx used_ibrs=0x%x ld=%d apmu=%d\n", 3839 rnum, dbreg.val, ctx->ctx_used_ibrs[0], is_loaded, can_access_pmu)); 3840 } else { 3841 CTX_USED_DBR(ctx, rnum); 3842 3843 if (can_access_pmu) { 3844 ia64_set_dbr(rnum, dbreg.val); 3845 ia64_dv_serialize_data(); 3846 } 3847 ctx->ctx_dbrs[rnum] = dbreg.val; 3848 3849 DPRINT(("write dbr%u=0x%lx used_dbrs=0x%x ld=%d apmu=%d\n", 3850 rnum, dbreg.val, ctx->ctx_used_dbrs[0], is_loaded, can_access_pmu)); 3851 } 3852 } 3853 3854 return 0; 3855 3856abort_mission: 3857 /* 3858 * in case it was our first attempt, we undo the global modifications 3859 */ 3860 if (first_time) { 3861 LOCK_PFS(flags); 3862 if (ctx->ctx_fl_system) { 3863 pfm_sessions.pfs_sys_use_dbregs--; 3864 } 3865 UNLOCK_PFS(flags); 3866 ctx->ctx_fl_using_dbreg = 0; 3867 } 3868 /* 3869 * install error return flag 3870 */ 3871 PFM_REG_RETFLAG_SET(req->dbreg_flags, PFM_REG_RETFL_EINVAL); 3872 3873 return ret; 3874} 3875 3876static int 3877pfm_write_ibrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3878{ 3879 return pfm_write_ibr_dbr(PFM_CODE_RR, ctx, arg, count, regs); 3880} 3881 3882static int 3883pfm_write_dbrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3884{ 3885 return pfm_write_ibr_dbr(PFM_DATA_RR, ctx, arg, count, regs); 3886} 3887 3888int 3889pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) 3890{ 3891 pfm_context_t *ctx; 3892 3893 if (req == NULL) return -EINVAL; 3894 3895 ctx = GET_PMU_CTX(); 3896 3897 if (ctx == NULL) return -EINVAL; 3898 3899 /* 3900 * for now limit to current task, which is enough when calling 3901 * from overflow handler 3902 */ 3903 if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; 3904 3905 return pfm_write_ibrs(ctx, req, nreq, regs); 3906} 3907EXPORT_SYMBOL(pfm_mod_write_ibrs); 3908 3909int 3910pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) 3911{ 3912 pfm_context_t *ctx; 3913 3914 if (req == NULL) return -EINVAL; 3915 3916 ctx = GET_PMU_CTX(); 3917 3918 if (ctx == NULL) return -EINVAL; 3919 3920 /* 3921 * for now limit to current task, which is enough when calling 3922 * from overflow handler 3923 */ 3924 if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; 3925 3926 return pfm_write_dbrs(ctx, req, nreq, regs); 3927} 3928EXPORT_SYMBOL(pfm_mod_write_dbrs); 3929 3930 3931static int 3932pfm_get_features(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3933{ 3934 pfarg_features_t *req = (pfarg_features_t *)arg; 3935 3936 req->ft_version = PFM_VERSION; 3937 return 0; 3938} 3939 3940static int 3941pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3942{ 3943 struct pt_regs *tregs; 3944 struct task_struct *task = PFM_CTX_TASK(ctx); 3945 int state, is_system; 3946 3947 state = ctx->ctx_state; 3948 is_system = ctx->ctx_fl_system; 3949 3950 /* 3951 * context must be attached to issue the stop command (includes LOADED,MASKED,ZOMBIE) 3952 */ 3953 if (state == PFM_CTX_UNLOADED) return -EINVAL; 3954 3955 /* 3956 * In system wide and when the context is loaded, access can only happen 3957 * when the caller is running on the CPU being monitored by the session. 3958 * It does not have to be the owner (ctx_task) of the context per se. 3959 */ 3960 if (is_system && ctx->ctx_cpu != smp_processor_id()) { 3961 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3962 return -EBUSY; 3963 } 3964 DPRINT(("task [%d] ctx_state=%d is_system=%d\n", 3965 task_pid_nr(PFM_CTX_TASK(ctx)), 3966 state, 3967 is_system)); 3968 /* 3969 * in system mode, we need to update the PMU directly 3970 * and the user level state of the caller, which may not 3971 * necessarily be the creator of the context. 3972 */ 3973 if (is_system) { 3974 /* 3975 * Update local PMU first 3976 * 3977 * disable dcr pp 3978 */ 3979 ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP); 3980 ia64_srlz_i(); 3981 3982 /* 3983 * update local cpuinfo 3984 */ 3985 PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP); 3986 3987 /* 3988 * stop monitoring, does srlz.i 3989 */ 3990 pfm_clear_psr_pp(); 3991 3992 /* 3993 * stop monitoring in the caller 3994 */ 3995 ia64_psr(regs)->pp = 0; 3996 3997 return 0; 3998 } 3999 /* 4000 * per-task mode 4001 */ 4002 4003 if (task == current) { 4004 /* stop monitoring at kernel level */ 4005 pfm_clear_psr_up(); 4006 4007 /* 4008 * stop monitoring at the user level 4009 */ 4010 ia64_psr(regs)->up = 0; 4011 } else { 4012 tregs = task_pt_regs(task); 4013 4014 /* 4015 * stop monitoring at the user level 4016 */ 4017 ia64_psr(tregs)->up = 0; 4018 4019 /* 4020 * monitoring disabled in kernel at next reschedule 4021 */ 4022 ctx->ctx_saved_psr_up = 0; 4023 DPRINT(("task=[%d]\n", task_pid_nr(task))); 4024 } 4025 return 0; 4026} 4027 4028 4029static int 4030pfm_start(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 4031{ 4032 struct pt_regs *tregs; 4033 int state, is_system; 4034 4035 state = ctx->ctx_state; 4036 is_system = ctx->ctx_fl_system; 4037 4038 if (state != PFM_CTX_LOADED) return -EINVAL; 4039 4040 /* 4041 * In system wide and when the context is loaded, access can only happen 4042 * when the caller is running on the CPU being monitored by the session. 4043 * It does not have to be the owner (ctx_task) of the context per se. 4044 */ 4045 if (is_system && ctx->ctx_cpu != smp_processor_id()) { 4046 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 4047 return -EBUSY; 4048 } 4049 4050 /* 4051 * in system mode, we need to update the PMU directly 4052 * and the user level state of the caller, which may not 4053 * necessarily be the creator of the context. 4054 */ 4055 if (is_system) { 4056 4057 /* 4058 * set user level psr.pp for the caller 4059 */ 4060 ia64_psr(regs)->pp = 1; 4061 4062 /* 4063 * now update the local PMU and cpuinfo 4064 */ 4065 PFM_CPUINFO_SET(PFM_CPUINFO_DCR_PP); 4066 4067 /* 4068 * start monitoring at kernel level 4069 */ 4070 pfm_set_psr_pp(); 4071 4072 /* enable dcr pp */ 4073 ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP); 4074 ia64_srlz_i(); 4075 4076 return 0; 4077 } 4078 4079 /* 4080 * per-process mode 4081 */ 4082 4083 if (ctx->ctx_task == current) { 4084 4085 /* start monitoring at kernel level */ 4086 pfm_set_psr_up(); 4087 4088 /* 4089 * activate monitoring at user level 4090 */ 4091 ia64_psr(regs)->up = 1; 4092 4093 } else { 4094 tregs = task_pt_regs(ctx->ctx_task); 4095 4096 /* 4097 * start monitoring at the kernel level the next 4098 * time the task is scheduled 4099 */ 4100 ctx->ctx_saved_psr_up = IA64_PSR_UP; 4101 4102 /* 4103 * activate monitoring at user level 4104 */ 4105 ia64_psr(tregs)->up = 1; 4106 } 4107 return 0; 4108} 4109 4110static int 4111pfm_get_pmc_reset(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 4112{ 4113 pfarg_reg_t *req = (pfarg_reg_t *)arg; 4114 unsigned int cnum; 4115 int i; 4116 int ret = -EINVAL; 4117 4118 for (i = 0; i < count; i++, req++) { 4119 4120 cnum = req->reg_num; 4121 4122 if (!PMC_IS_IMPL(cnum)) goto abort_mission; 4123 4124 req->reg_value = PMC_DFL_VAL(cnum); 4125 4126 PFM_REG_RETFLAG_SET(req->reg_flags, 0); 4127 4128 DPRINT(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, req->reg_value)); 4129 } 4130 return 0; 4131 4132abort_mission: 4133 PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); 4134 return ret; 4135} 4136 4137static int 4138pfm_check_task_exist(pfm_context_t *ctx) 4139{ 4140 struct task_struct *g, *t; 4141 int ret = -ESRCH; 4142 4143 read_lock(&tasklist_lock); 4144 4145 do_each_thread (g, t) { 4146 if (t->thread.pfm_context == ctx) { 4147 ret = 0; 4148 goto out; 4149 } 4150 } while_each_thread (g, t); 4151out: 4152 read_unlock(&tasklist_lock); 4153 4154 DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx)); 4155 4156 return ret; 4157} 4158 4159static int 4160pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 4161{ 4162 struct task_struct *task; 4163 struct thread_struct *thread; 4164 struct pfm_context_t *old; 4165 unsigned long flags; 4166#ifndef CONFIG_SMP 4167 struct task_struct *owner_task = NULL; 4168#endif 4169 pfarg_load_t *req = (pfarg_load_t *)arg; 4170 unsigned long *pmcs_source, *pmds_source; 4171 int the_cpu; 4172 int ret = 0; 4173 int state, is_system, set_dbregs = 0; 4174 4175 state = ctx->ctx_state; 4176 is_system = ctx->ctx_fl_system; 4177 /* 4178 * can only load from unloaded or terminated state 4179 */ 4180 if (state != PFM_CTX_UNLOADED) { 4181 DPRINT(("cannot load to [%d], invalid ctx_state=%d\n", 4182 req->load_pid, 4183 ctx->ctx_state)); 4184 return -EBUSY; 4185 } 4186 4187 DPRINT(("load_pid [%d] using_dbreg=%d\n", req->load_pid, ctx->ctx_fl_using_dbreg)); 4188 4189 if (CTX_OVFL_NOBLOCK(ctx) == 0 && req->load_pid == current->pid) { 4190 DPRINT(("cannot use blocking mode on self\n")); 4191 return -EINVAL; 4192 } 4193 4194 ret = pfm_get_task(ctx, req->load_pid, &task); 4195 if (ret) { 4196 DPRINT(("load_pid [%d] get_task=%d\n", req->load_pid, ret)); 4197 return ret; 4198 } 4199 4200 ret = -EINVAL; 4201 4202 /* 4203 * system wide is self monitoring only 4204 */ 4205 if (is_system && task != current) { 4206 DPRINT(("system wide is self monitoring only load_pid=%d\n", 4207 req->load_pid)); 4208 goto error; 4209 } 4210 4211 thread = &task->thread; 4212 4213 ret = 0; 4214 /* 4215 * cannot load a context which is using range restrictions, 4216 * into a task that is being debugged. 4217 */ 4218 if (ctx->ctx_fl_using_dbreg) { 4219 if (thread->flags & IA64_THREAD_DBG_VALID) { 4220 ret = -EBUSY; 4221 DPRINT(("load_pid [%d] task is debugged, cannot load range restrictions\n", req->load_pid)); 4222 goto error; 4223 } 4224 LOCK_PFS(flags); 4225 4226 if (is_system) { 4227 if (pfm_sessions.pfs_ptrace_use_dbregs) { 4228 DPRINT(("cannot load [%d] dbregs in use\n", 4229 task_pid_nr(task))); 4230 ret = -EBUSY; 4231 } else { 4232 pfm_sessions.pfs_sys_use_dbregs++; 4233 DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task_pid_nr(task), pfm_sessions.pfs_sys_use_dbregs)); 4234 set_dbregs = 1; 4235 } 4236 } 4237 4238 UNLOCK_PFS(flags); 4239 4240 if (ret) goto error; 4241 } 4242 4243 /* 4244 * SMP system-wide monitoring implies self-monitoring. 4245 * 4246 * The programming model expects the task to 4247 * be pinned on a CPU throughout the session. 4248 * Here we take note of the current CPU at the 4249 * time the context is loaded. No call from 4250 * another CPU will be allowed. 4251 * 4252 * The pinning via shed_setaffinity() 4253 * must be done by the calling task prior 4254 * to this call. 4255 * 4256 * systemwide: keep track of CPU this session is supposed to run on 4257 */ 4258 the_cpu = ctx->ctx_cpu = smp_processor_id(); 4259 4260 ret = -EBUSY; 4261 /* 4262 * now reserve the session 4263 */ 4264 ret = pfm_reserve_session(current, is_system, the_cpu); 4265 if (ret) goto error; 4266 4267 /* 4268 * task is necessarily stopped at this point. 4269 * 4270 * If the previous context was zombie, then it got removed in 4271 * pfm_save_regs(). Therefore we should not see it here. 4272 * If we see a context, then this is an active context 4273 * 4274 * XXX: needs to be atomic 4275 */ 4276 DPRINT(("before cmpxchg() old_ctx=%p new_ctx=%p\n", 4277 thread->pfm_context, ctx)); 4278 4279 ret = -EBUSY; 4280 old = ia64_cmpxchg(acq, &thread->pfm_context, NULL, ctx, sizeof(pfm_context_t *)); 4281 if (old != NULL) { 4282 DPRINT(("load_pid [%d] already has a context\n", req->load_pid)); 4283 goto error_unres; 4284 } 4285 4286 pfm_reset_msgq(ctx); 4287 4288 ctx->ctx_state = PFM_CTX_LOADED; 4289 4290 /* 4291 * link context to task 4292 */ 4293 ctx->ctx_task = task; 4294 4295 if (is_system) { 4296 /* 4297 * we load as stopped 4298 */ 4299 PFM_CPUINFO_SET(PFM_CPUINFO_SYST_WIDE); 4300 PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP); 4301 4302 if (ctx->ctx_fl_excl_idle) PFM_CPUINFO_SET(PFM_CPUINFO_EXCL_IDLE); 4303 } else { 4304 thread->flags |= IA64_THREAD_PM_VALID; 4305 } 4306 4307 /* 4308 * propagate into thread-state 4309 */ 4310 pfm_copy_pmds(task, ctx); 4311 pfm_copy_pmcs(task, ctx); 4312 4313 pmcs_source = ctx->th_pmcs; 4314 pmds_source = ctx->th_pmds; 4315 4316 /* 4317 * always the case for system-wide 4318 */ 4319 if (task == current) { 4320 4321 if (is_system == 0) { 4322 4323 /* allow user level control */ 4324 ia64_psr(regs)->sp = 0; 4325 DPRINT(("clearing psr.sp for [%d]\n", task_pid_nr(task))); 4326 4327 SET_LAST_CPU(ctx, smp_processor_id()); 4328 INC_ACTIVATION(); 4329 SET_ACTIVATION(ctx); 4330#ifndef CONFIG_SMP 4331 /* 4332 * push the other task out, if any 4333 */ 4334 owner_task = GET_PMU_OWNER(); 4335 if (owner_task) pfm_lazy_save_regs(owner_task); 4336#endif 4337 } 4338 /* 4339 * load all PMD from ctx to PMU (as opposed to thread state) 4340 * restore all PMC from ctx to PMU 4341 */ 4342 pfm_restore_pmds(pmds_source, ctx->ctx_all_pmds[0]); 4343 pfm_restore_pmcs(pmcs_source, ctx->ctx_all_pmcs[0]); 4344 4345 ctx->ctx_reload_pmcs[0] = 0UL; 4346 ctx->ctx_reload_pmds[0] = 0UL; 4347 4348 /* 4349 * guaranteed safe by earlier check against DBG_VALID 4350 */ 4351 if (ctx->ctx_fl_using_dbreg) { 4352 pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); 4353 pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); 4354 } 4355 /* 4356 * set new ownership 4357 */ 4358 SET_PMU_OWNER(task, ctx); 4359 4360 DPRINT(("context loaded on PMU for [%d]\n", task_pid_nr(task))); 4361 } else { 4362 /* 4363 * when not current, task MUST be stopped, so this is safe 4364 */ 4365 regs = task_pt_regs(task); 4366 4367 /* force a full reload */ 4368 ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; 4369 SET_LAST_CPU(ctx, -1); 4370 4371 /* initial saved psr (stopped) */ 4372 ctx->ctx_saved_psr_up = 0UL; 4373 ia64_psr(regs)->up = ia64_psr(regs)->pp = 0; 4374 } 4375 4376 ret = 0; 4377 4378error_unres: 4379 if (ret) pfm_unreserve_session(ctx, ctx->ctx_fl_system, the_cpu); 4380error: 4381 /* 4382 * we must undo the dbregs setting (for system-wide) 4383 */ 4384 if (ret && set_dbregs) { 4385 LOCK_PFS(flags); 4386 pfm_sessions.pfs_sys_use_dbregs--; 4387 UNLOCK_PFS(flags); 4388 } 4389 /* 4390 * release task, there is now a link with the context 4391 */ 4392 if (is_system == 0 && task != current) { 4393 pfm_put_task(task); 4394 4395 if (ret == 0) { 4396 ret = pfm_check_task_exist(ctx); 4397 if (ret) { 4398 ctx->ctx_state = PFM_CTX_UNLOADED; 4399 ctx->ctx_task = NULL; 4400 } 4401 } 4402 } 4403 return ret; 4404} 4405 4406/* 4407 * in this function, we do not need to increase the use count 4408 * for the task via get_task_struct(), because we hold the 4409 * context lock. If the task were to disappear while having 4410 * a context attached, it would go through pfm_exit_thread() 4411 * which also grabs the context lock and would therefore be blocked 4412 * until we are here. 4413 */ 4414static void pfm_flush_pmds(struct task_struct *, pfm_context_t *ctx); 4415 4416static int 4417pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 4418{ 4419 struct task_struct *task = PFM_CTX_TASK(ctx); 4420 struct pt_regs *tregs; 4421 int prev_state, is_system; 4422 int ret; 4423 4424 DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task_pid_nr(task) : -1)); 4425 4426 prev_state = ctx->ctx_state; 4427 is_system = ctx->ctx_fl_system; 4428 4429 /* 4430 * unload only when necessary 4431 */ 4432 if (prev_state == PFM_CTX_UNLOADED) { 4433 DPRINT(("ctx_state=%d, nothing to do\n", prev_state)); 4434 return 0; 4435 } 4436 4437 /* 4438 * clear psr and dcr bits 4439 */ 4440 ret = pfm_stop(ctx, NULL, 0, regs); 4441 if (ret) return ret; 4442 4443 ctx->ctx_state = PFM_CTX_UNLOADED; 4444 4445 /* 4446 * in system mode, we need to update the PMU directly 4447 * and the user level state of the caller, which may not 4448 * necessarily be the creator of the context. 4449 */ 4450 if (is_system) { 4451 4452 /* 4453 * Update cpuinfo 4454 * 4455 * local PMU is taken care of in pfm_stop() 4456 */ 4457 PFM_CPUINFO_CLEAR(PFM_CPUINFO_SYST_WIDE); 4458 PFM_CPUINFO_CLEAR(PFM_CPUINFO_EXCL_IDLE); 4459 4460 /* 4461 * save PMDs in context 4462 * release ownership 4463 */ 4464 pfm_flush_pmds(current, ctx); 4465 4466 /* 4467 * at this point we are done with the PMU 4468 * so we can unreserve the resource. 4469 */ 4470 if (prev_state != PFM_CTX_ZOMBIE) 4471 pfm_unreserve_session(ctx, 1 , ctx->ctx_cpu); 4472 4473 /* 4474 * disconnect context from task 4475 */ 4476 task->thread.pfm_context = NULL; 4477 /* 4478 * disconnect task from context 4479 */ 4480 ctx->ctx_task = NULL; 4481 4482 /* 4483 * There is nothing more to cleanup here. 4484 */ 4485 return 0; 4486 } 4487 4488 /* 4489 * per-task mode 4490 */ 4491 tregs = task == current ? regs : task_pt_regs(task); 4492 4493 if (task == current) { 4494 /* 4495 * cancel user level control 4496 */ 4497 ia64_psr(regs)->sp = 1; 4498 4499 DPRINT(("setting psr.sp for [%d]\n", task_pid_nr(task))); 4500 } 4501 /* 4502 * save PMDs to context 4503 * release ownership 4504 */ 4505 pfm_flush_pmds(task, ctx); 4506 4507 /* 4508 * at this point we are done with the PMU 4509 * so we can unreserve the resource. 4510 * 4511 * when state was ZOMBIE, we have already unreserved. 4512 */ 4513 if (prev_state != PFM_CTX_ZOMBIE) 4514 pfm_unreserve_session(ctx, 0 , ctx->ctx_cpu); 4515 4516 /* 4517 * reset activation counter and psr 4518 */ 4519 ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; 4520 SET_LAST_CPU(ctx, -1); 4521 4522 /* 4523 * PMU state will not be restored 4524 */ 4525 task->thread.flags &= ~IA64_THREAD_PM_VALID; 4526 4527 /* 4528 * break links between context and task 4529 */ 4530 task->thread.pfm_context = NULL; 4531 ctx->ctx_task = NULL; 4532 4533 PFM_SET_WORK_PENDING(task, 0); 4534 4535 ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE; 4536 ctx->ctx_fl_can_restart = 0; 4537 ctx->ctx_fl_going_zombie = 0; 4538 4539 DPRINT(("disconnected [%d] from context\n", task_pid_nr(task))); 4540 4541 return 0; 4542} 4543 4544 4545/* 4546 * called only from exit_thread(): task == current 4547 * we come here only if current has a context attached (loaded or masked) 4548 */ 4549void 4550pfm_exit_thread(struct task_struct *task) 4551{ 4552 pfm_context_t *ctx; 4553 unsigned long flags; 4554 struct pt_regs *regs = task_pt_regs(task); 4555 int ret, state; 4556 int free_ok = 0; 4557 4558 ctx = PFM_GET_CTX(task); 4559 4560 PROTECT_CTX(ctx, flags); 4561 4562 DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task_pid_nr(task))); 4563 4564 state = ctx->ctx_state; 4565 switch(state) { 4566 case PFM_CTX_UNLOADED: 4567 /* 4568 * only comes to this function if pfm_context is not NULL, i.e., cannot 4569 * be in unloaded state 4570 */ 4571 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task_pid_nr(task)); 4572 break; 4573 case PFM_CTX_LOADED: 4574 case PFM_CTX_MASKED: 4575 ret = pfm_context_unload(ctx, NULL, 0, regs); 4576 if (ret) { 4577 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret); 4578 } 4579 DPRINT(("ctx unloaded for current state was %d\n", state)); 4580 4581 pfm_end_notify_user(ctx); 4582 break; 4583 case PFM_CTX_ZOMBIE: 4584 ret = pfm_context_unload(ctx, NULL, 0, regs); 4585 if (ret) { 4586 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret); 4587 } 4588 free_ok = 1; 4589 break; 4590 default: 4591 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task_pid_nr(task), state); 4592 break; 4593 } 4594 UNPROTECT_CTX(ctx, flags); 4595 4596 { u64 psr = pfm_get_psr(); 4597 BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); 4598 BUG_ON(GET_PMU_OWNER()); 4599 BUG_ON(ia64_psr(regs)->up); 4600 BUG_ON(ia64_psr(regs)->pp); 4601 } 4602 4603 /* 4604 * All memory free operations (especially for vmalloc'ed memory) 4605 * MUST be done with interrupts ENABLED. 4606 */ 4607 if (free_ok) pfm_context_free(ctx); 4608} 4609 4610/* 4611 * functions MUST be listed in the increasing order of their index (see permfon.h) 4612 */ 4613#define PFM_CMD(name, flags, arg_count, arg_type, getsz) { name, #name, flags, arg_count, sizeof(arg_type), getsz } 4614#define PFM_CMD_S(name, flags) { name, #name, flags, 0, 0, NULL } 4615#define PFM_CMD_PCLRWS (PFM_CMD_FD|PFM_CMD_ARG_RW|PFM_CMD_STOP) 4616#define PFM_CMD_PCLRW (PFM_CMD_FD|PFM_CMD_ARG_RW) 4617#define PFM_CMD_NONE { NULL, "no-cmd", 0, 0, 0, NULL} 4618 4619static pfm_cmd_desc_t pfm_cmd_tab[]={ 4620/* 0 */PFM_CMD_NONE, 4621/* 1 */PFM_CMD(pfm_write_pmcs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), 4622/* 2 */PFM_CMD(pfm_write_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), 4623/* 3 */PFM_CMD(pfm_read_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), 4624/* 4 */PFM_CMD_S(pfm_stop, PFM_CMD_PCLRWS), 4625/* 5 */PFM_CMD_S(pfm_start, PFM_CMD_PCLRWS), 4626/* 6 */PFM_CMD_NONE, 4627/* 7 */PFM_CMD_NONE, 4628/* 8 */PFM_CMD(pfm_context_create, PFM_CMD_ARG_RW, 1, pfarg_context_t, pfm_ctx_getsize), 4629/* 9 */PFM_CMD_NONE, 4630/* 10 */PFM_CMD_S(pfm_restart, PFM_CMD_PCLRW), 4631/* 11 */PFM_CMD_NONE, 4632/* 12 */PFM_CMD(pfm_get_features, PFM_CMD_ARG_RW, 1, pfarg_features_t, NULL), 4633/* 13 */PFM_CMD(pfm_debug, 0, 1, unsigned int, NULL), 4634/* 14 */PFM_CMD_NONE, 4635/* 15 */PFM_CMD(pfm_get_pmc_reset, PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), 4636/* 16 */PFM_CMD(pfm_context_load, PFM_CMD_PCLRWS, 1, pfarg_load_t, NULL), 4637/* 17 */PFM_CMD_S(pfm_context_unload, PFM_CMD_PCLRWS), 4638/* 18 */PFM_CMD_NONE, 4639/* 19 */PFM_CMD_NONE, 4640/* 20 */PFM_CMD_NONE, 4641/* 21 */PFM_CMD_NONE, 4642/* 22 */PFM_CMD_NONE, 4643/* 23 */PFM_CMD_NONE, 4644/* 24 */PFM_CMD_NONE, 4645/* 25 */PFM_CMD_NONE, 4646/* 26 */PFM_CMD_NONE, 4647/* 27 */PFM_CMD_NONE, 4648/* 28 */PFM_CMD_NONE, 4649/* 29 */PFM_CMD_NONE, 4650/* 30 */PFM_CMD_NONE, 4651/* 31 */PFM_CMD_NONE, 4652/* 32 */PFM_CMD(pfm_write_ibrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL), 4653/* 33 */PFM_CMD(pfm_write_dbrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL) 4654}; 4655#define PFM_CMD_COUNT (sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t)) 4656 4657static int 4658pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags) 4659{ 4660 struct task_struct *task; 4661 int state, old_state; 4662 4663recheck: 4664 state = ctx->ctx_state; 4665 task = ctx->ctx_task; 4666 4667 if (task == NULL) { 4668 DPRINT(("context %d no task, state=%d\n", ctx->ctx_fd, state)); 4669 return 0; 4670 } 4671 4672 DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n", 4673 ctx->ctx_fd, 4674 state, 4675 task_pid_nr(task), 4676 task->state, PFM_CMD_STOPPED(cmd))); 4677 4678 /* 4679 * self-monitoring always ok. 4680 * 4681 * for system-wide the caller can either be the creator of the 4682 * context (to one to which the context is attached to) OR 4683 * a task running on the same CPU as the session. 4684 */ 4685 if (task == current || ctx->ctx_fl_system) return 0; 4686 4687 /* 4688 * we are monitoring another thread 4689 */ 4690 switch(state) { 4691 case PFM_CTX_UNLOADED: 4692 /* 4693 * if context is UNLOADED we are safe to go 4694 */ 4695 return 0; 4696 case PFM_CTX_ZOMBIE: 4697 /* 4698 * no command can operate on a zombie context 4699 */ 4700 DPRINT(("cmd %d state zombie cannot operate on context\n", cmd)); 4701 return -EINVAL; 4702 case PFM_CTX_MASKED: 4703 /* 4704 * PMU state has been saved to software even though 4705 * the thread may still be running. 4706 */ 4707 if (cmd != PFM_UNLOAD_CONTEXT) return 0; 4708 } 4709 4710 /* 4711 * context is LOADED or MASKED. Some commands may need to have 4712 * the task stopped. 4713 * 4714 * We could lift this restriction for UP but it would mean that 4715 * the user has no guarantee the task would not run between 4716 * two successive calls to perfmonctl(). That's probably OK. 4717 * If this user wants to ensure the task does not run, then 4718 * the task must be stopped. 4719 */ 4720 if (PFM_CMD_STOPPED(cmd)) { 4721 if (!task_is_stopped_or_traced(task)) { 4722 DPRINT(("[%d] task not in stopped state\n", task_pid_nr(task))); 4723 return -EBUSY; 4724 } 4725 /* 4726 * task is now stopped, wait for ctxsw out 4727 * 4728 * This is an interesting point in the code. 4729 * We need to unprotect the context because 4730 * the pfm_save_regs() routines needs to grab 4731 * the same lock. There are danger in doing 4732 * this because it leaves a window open for 4733 * another task to get access to the context 4734 * and possibly change its state. The one thing 4735 * that is not possible is for the context to disappear 4736 * because we are protected by the VFS layer, i.e., 4737 * get_fd()/put_fd(). 4738 */ 4739 old_state = state; 4740 4741 UNPROTECT_CTX(ctx, flags); 4742 4743 wait_task_inactive(task, 0); 4744 4745 PROTECT_CTX(ctx, flags); 4746 4747 /* 4748 * we must recheck to verify if state has changed 4749 */ 4750 if (ctx->ctx_state != old_state) { 4751 DPRINT(("old_state=%d new_state=%d\n", old_state, ctx->ctx_state)); 4752 goto recheck; 4753 } 4754 } 4755 return 0; 4756} 4757 4758/* 4759 * system-call entry point (must return long) 4760 */ 4761asmlinkage long 4762sys_perfmonctl (int fd, int cmd, void __user *arg, int count) 4763{ 4764 struct fd f = {NULL, 0}; 4765 pfm_context_t *ctx = NULL; 4766 unsigned long flags = 0UL; 4767 void *args_k = NULL; 4768 long ret; /* will expand int return types */ 4769 size_t base_sz, sz, xtra_sz = 0; 4770 int narg, completed_args = 0, call_made = 0, cmd_flags; 4771 int (*func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); 4772 int (*getsize)(void *arg, size_t *sz); 4773#define PFM_MAX_ARGSIZE 4096 4774 4775 /* 4776 * reject any call if perfmon was disabled at initialization 4777 */ 4778 if (unlikely(pmu_conf == NULL)) return -ENOSYS; 4779 4780 if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT)) { 4781 DPRINT(("invalid cmd=%d\n", cmd)); 4782 return -EINVAL; 4783 } 4784 4785 func = pfm_cmd_tab[cmd].cmd_func; 4786 narg = pfm_cmd_tab[cmd].cmd_narg; 4787 base_sz = pfm_cmd_tab[cmd].cmd_argsize; 4788 getsize = pfm_cmd_tab[cmd].cmd_getsize; 4789 cmd_flags = pfm_cmd_tab[cmd].cmd_flags; 4790 4791 if (unlikely(func == NULL)) { 4792 DPRINT(("invalid cmd=%d\n", cmd)); 4793 return -EINVAL; 4794 } 4795 4796 DPRINT(("cmd=%s idx=%d narg=0x%x argsz=%lu count=%d\n", 4797 PFM_CMD_NAME(cmd), 4798 cmd, 4799 narg, 4800 base_sz, 4801 count)); 4802 4803 /* 4804 * check if number of arguments matches what the command expects 4805 */ 4806 if (unlikely((narg == PFM_CMD_ARG_MANY && count <= 0) || (narg > 0 && narg != count))) 4807 return -EINVAL; 4808 4809restart_args: 4810 sz = xtra_sz + base_sz*count; 4811 /* 4812 * limit abuse to min page size 4813 */ 4814 if (unlikely(sz > PFM_MAX_ARGSIZE)) { 4815 printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", task_pid_nr(current), sz); 4816 return -E2BIG; 4817 } 4818 4819 /* 4820 * allocate default-sized argument buffer 4821 */ 4822 if (likely(count && args_k == NULL)) { 4823 args_k = kmalloc(PFM_MAX_ARGSIZE, GFP_KERNEL); 4824 if (args_k == NULL) return -ENOMEM; 4825 } 4826 4827 ret = -EFAULT; 4828 4829 /* 4830 * copy arguments 4831 * 4832 * assume sz = 0 for command without parameters 4833 */ 4834 if (sz && copy_from_user(args_k, arg, sz)) { 4835 DPRINT(("cannot copy_from_user %lu bytes @%p\n", sz, arg)); 4836 goto error_args; 4837 } 4838 4839 /* 4840 * check if command supports extra parameters 4841 */ 4842 if (completed_args == 0 && getsize) { 4843 /* 4844 * get extra parameters size (based on main argument) 4845 */ 4846 ret = (*getsize)(args_k, &xtra_sz); 4847 if (ret) goto error_args; 4848 4849 completed_args = 1; 4850 4851 DPRINT(("restart_args sz=%lu xtra_sz=%lu\n", sz, xtra_sz)); 4852 4853 /* retry if necessary */ 4854 if (likely(xtra_sz)) goto restart_args; 4855 } 4856 4857 if (unlikely((cmd_flags & PFM_CMD_FD) == 0)) goto skip_fd; 4858 4859 ret = -EBADF; 4860 4861 f = fdget(fd); 4862 if (unlikely(f.file == NULL)) { 4863 DPRINT(("invalid fd %d\n", fd)); 4864 goto error_args; 4865 } 4866 if (unlikely(PFM_IS_FILE(f.file) == 0)) { 4867 DPRINT(("fd %d not related to perfmon\n", fd)); 4868 goto error_args; 4869 } 4870 4871 ctx = f.file->private_data; 4872 if (unlikely(ctx == NULL)) { 4873 DPRINT(("no context for fd %d\n", fd)); 4874 goto error_args; 4875 } 4876 prefetch(&ctx->ctx_state); 4877 4878 PROTECT_CTX(ctx, flags); 4879 4880 /* 4881 * check task is stopped 4882 */ 4883 ret = pfm_check_task_state(ctx, cmd, flags); 4884 if (unlikely(ret)) goto abort_locked; 4885 4886skip_fd: 4887 ret = (*func)(ctx, args_k, count, task_pt_regs(current)); 4888 4889 call_made = 1; 4890 4891abort_locked: 4892 if (likely(ctx)) { 4893 DPRINT(("context unlocked\n")); 4894 UNPROTECT_CTX(ctx, flags); 4895 } 4896 4897 /* copy argument back to user, if needed */ 4898 if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT; 4899 4900error_args: 4901 if (f.file) 4902 fdput(f); 4903 4904 kfree(args_k); 4905 4906 DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret)); 4907 4908 return ret; 4909} 4910 4911static void 4912pfm_resume_after_ovfl(pfm_context_t *ctx, unsigned long ovfl_regs, struct pt_regs *regs) 4913{ 4914 pfm_buffer_fmt_t *fmt = ctx->ctx_buf_fmt; 4915 pfm_ovfl_ctrl_t rst_ctrl; 4916 int state; 4917 int ret = 0; 4918 4919 state = ctx->ctx_state; 4920 /* 4921 * Unlock sampling buffer and reset index atomically 4922 * XXX: not really needed when blocking 4923 */ 4924 if (CTX_HAS_SMPL(ctx)) { 4925 4926 rst_ctrl.bits.mask_monitoring = 0; 4927 rst_ctrl.bits.reset_ovfl_pmds = 0; 4928 4929 if (state == PFM_CTX_LOADED) 4930 ret = pfm_buf_fmt_restart_active(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs); 4931 else 4932 ret = pfm_buf_fmt_restart(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs); 4933 } else { 4934 rst_ctrl.bits.mask_monitoring = 0; 4935 rst_ctrl.bits.reset_ovfl_pmds = 1; 4936 } 4937 4938 if (ret == 0) { 4939 if (rst_ctrl.bits.reset_ovfl_pmds) { 4940 pfm_reset_regs(ctx, &ovfl_regs, PFM_PMD_LONG_RESET); 4941 } 4942 if (rst_ctrl.bits.mask_monitoring == 0) { 4943 DPRINT(("resuming monitoring\n")); 4944 if (ctx->ctx_state == PFM_CTX_MASKED) pfm_restore_monitoring(current); 4945 } else { 4946 DPRINT(("stopping monitoring\n")); 4947 //pfm_stop_monitoring(current, regs); 4948 } 4949 ctx->ctx_state = PFM_CTX_LOADED; 4950 } 4951} 4952 4953/* 4954 * context MUST BE LOCKED when calling 4955 * can only be called for current 4956 */ 4957static void 4958pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs) 4959{ 4960 int ret; 4961 4962 DPRINT(("entering for [%d]\n", task_pid_nr(current))); 4963 4964 ret = pfm_context_unload(ctx, NULL, 0, regs); 4965 if (ret) { 4966 printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", task_pid_nr(current), ret); 4967 } 4968 4969 /* 4970 * and wakeup controlling task, indicating we are now disconnected 4971 */ 4972 wake_up_interruptible(&ctx->ctx_zombieq); 4973 4974 /* 4975 * given that context is still locked, the controlling 4976 * task will only get access when we return from 4977 * pfm_handle_work(). 4978 */ 4979} 4980 4981static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds); 4982 4983 /* 4984 * pfm_handle_work() can be called with interrupts enabled 4985 * (TIF_NEED_RESCHED) or disabled. The down_interruptible 4986 * call may sleep, therefore we must re-enable interrupts 4987 * to avoid deadlocks. It is safe to do so because this function 4988 * is called ONLY when returning to user level (pUStk=1), in which case 4989 * there is no risk of kernel stack overflow due to deep 4990 * interrupt nesting. 4991 */ 4992void 4993pfm_handle_work(void) 4994{ 4995 pfm_context_t *ctx; 4996 struct pt_regs *regs; 4997 unsigned long flags, dummy_flags; 4998 unsigned long ovfl_regs; 4999 unsigned int reason; 5000 int ret; 5001 5002 ctx = PFM_GET_CTX(current); 5003 if (ctx == NULL) { 5004 printk(KERN_ERR "perfmon: [%d] has no PFM context\n", 5005 task_pid_nr(current)); 5006 return; 5007 } 5008 5009 PROTECT_CTX(ctx, flags); 5010 5011 PFM_SET_WORK_PENDING(current, 0); 5012 5013 regs = task_pt_regs(current); 5014 5015 /* 5016 * extract reason for being here and clear 5017 */ 5018 reason = ctx->ctx_fl_trap_reason; 5019 ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE; 5020 ovfl_regs = ctx->ctx_ovfl_regs[0]; 5021 5022 DPRINT(("reason=%d state=%d\n", reason, ctx->ctx_state)); 5023 5024 /* 5025 * must be done before we check for simple-reset mode 5026 */ 5027 if (ctx->ctx_fl_going_zombie || ctx->ctx_state == PFM_CTX_ZOMBIE) 5028 goto do_zombie; 5029 5030 //if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking; 5031 if (reason == PFM_TRAP_REASON_RESET) 5032 goto skip_blocking; 5033 5034 /* 5035 * restore interrupt mask to what it was on entry. 5036 * Could be enabled/diasbled. 5037 */ 5038 UNPROTECT_CTX(ctx, flags); 5039 5040 /* 5041 * force interrupt enable because of down_interruptible() 5042 */ 5043 local_irq_enable(); 5044 5045 DPRINT(("before block sleeping\n")); 5046 5047 /* 5048 * may go through without blocking on SMP systems 5049 * if restart has been received already by the time we call down() 5050 */ 5051 ret = wait_for_completion_interruptible(&ctx->ctx_restart_done); 5052 5053 DPRINT(("after block sleeping ret=%d\n", ret)); 5054 5055 /* 5056 * lock context and mask interrupts again 5057 * We save flags into a dummy because we may have 5058 * altered interrupts mask compared to entry in this 5059 * function. 5060 */ 5061 PROTECT_CTX(ctx, dummy_flags); 5062 5063 /* 5064 * we need to read the ovfl_regs only after wake-up 5065 * because we may have had pfm_write_pmds() in between 5066 * and that can changed PMD values and therefore 5067 * ovfl_regs is reset for these new PMD values. 5068 */ 5069 ovfl_regs = ctx->ctx_ovfl_regs[0]; 5070 5071 if (ctx->ctx_fl_going_zombie) { 5072do_zombie: 5073 DPRINT(("context is zombie, bailing out\n")); 5074 pfm_context_force_terminate(ctx, regs); 5075 goto nothing_to_do; 5076 } 5077 /* 5078 * in case of interruption of down() we don't restart anything 5079 */ 5080 if (ret < 0) 5081 goto nothing_to_do; 5082 5083skip_blocking: 5084 pfm_resume_after_ovfl(ctx, ovfl_regs, regs); 5085 ctx->ctx_ovfl_regs[0] = 0UL; 5086 5087nothing_to_do: 5088 /* 5089 * restore flags as they were upon entry 5090 */ 5091 UNPROTECT_CTX(ctx, flags); 5092} 5093 5094static int 5095pfm_notify_user(pfm_context_t *ctx, pfm_msg_t *msg) 5096{ 5097 if (ctx->ctx_state == PFM_CTX_ZOMBIE) { 5098 DPRINT(("ignoring overflow notification, owner is zombie\n")); 5099 return 0; 5100 } 5101 5102 DPRINT(("waking up somebody\n")); 5103 5104 if (msg) wake_up_interruptible(&ctx->ctx_msgq_wait); 5105 5106 /* 5107 * safe, we are not in intr handler, nor in ctxsw when 5108 * we come here 5109 */ 5110 kill_fasync (&ctx->ctx_async_queue, SIGIO, POLL_IN); 5111 5112 return 0; 5113} 5114 5115static int 5116pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds) 5117{ 5118 pfm_msg_t *msg = NULL; 5119 5120 if (ctx->ctx_fl_no_msg == 0) { 5121 msg = pfm_get_new_msg(ctx); 5122 if (msg == NULL) { 5123 printk(KERN_ERR "perfmon: pfm_ovfl_notify_user no more notification msgs\n"); 5124 return -1; 5125 } 5126 5127 msg->pfm_ovfl_msg.msg_type = PFM_MSG_OVFL; 5128 msg->pfm_ovfl_msg.msg_ctx_fd = ctx->ctx_fd; 5129 msg->pfm_ovfl_msg.msg_active_set = 0; 5130 msg->pfm_ovfl_msg.msg_ovfl_pmds[0] = ovfl_pmds; 5131 msg->pfm_ovfl_msg.msg_ovfl_pmds[1] = 0UL; 5132 msg->pfm_ovfl_msg.msg_ovfl_pmds[2] = 0UL; 5133 msg->pfm_ovfl_msg.msg_ovfl_pmds[3] = 0UL; 5134 msg->pfm_ovfl_msg.msg_tstamp = 0UL; 5135 } 5136 5137 DPRINT(("ovfl msg: msg=%p no_msg=%d fd=%d ovfl_pmds=0x%lx\n", 5138 msg, 5139 ctx->ctx_fl_no_msg, 5140 ctx->ctx_fd, 5141 ovfl_pmds)); 5142 5143 return pfm_notify_user(ctx, msg); 5144} 5145 5146static int 5147pfm_end_notify_user(pfm_context_t *ctx) 5148{ 5149 pfm_msg_t *msg; 5150 5151 msg = pfm_get_new_msg(ctx); 5152 if (msg == NULL) { 5153 printk(KERN_ERR "perfmon: pfm_end_notify_user no more notification msgs\n"); 5154 return -1; 5155 } 5156 /* no leak */ 5157 memset(msg, 0, sizeof(*msg)); 5158 5159 msg->pfm_end_msg.msg_type = PFM_MSG_END; 5160 msg->pfm_end_msg.msg_ctx_fd = ctx->ctx_fd; 5161 msg->pfm_ovfl_msg.msg_tstamp = 0UL; 5162 5163 DPRINT(("end msg: msg=%p no_msg=%d ctx_fd=%d\n", 5164 msg, 5165 ctx->ctx_fl_no_msg, 5166 ctx->ctx_fd)); 5167 5168 return pfm_notify_user(ctx, msg); 5169} 5170 5171/* 5172 * main overflow processing routine. 5173 * it can be called from the interrupt path or explicitly during the context switch code 5174 */ 5175static void pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, 5176 unsigned long pmc0, struct pt_regs *regs) 5177{ 5178 pfm_ovfl_arg_t *ovfl_arg; 5179 unsigned long mask; 5180 unsigned long old_val, ovfl_val, new_val; 5181 unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL, smpl_pmds = 0UL, reset_pmds; 5182 unsigned long tstamp; 5183 pfm_ovfl_ctrl_t ovfl_ctrl; 5184 unsigned int i, has_smpl; 5185 int must_notify = 0; 5186 5187 if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) goto stop_monitoring; 5188 5189 /* 5190 * sanity test. Should never happen 5191 */ 5192 if (unlikely((pmc0 & 0x1) == 0)) goto sanity_check; 5193 5194 tstamp = ia64_get_itc(); 5195 mask = pmc0 >> PMU_FIRST_COUNTER; 5196 ovfl_val = pmu_conf->ovfl_val; 5197 has_smpl = CTX_HAS_SMPL(ctx); 5198 5199 DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s " 5200 "used_pmds=0x%lx\n", 5201 pmc0, 5202 task ? task_pid_nr(task): -1, 5203 (regs ? regs->cr_iip : 0), 5204 CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking", 5205 ctx->ctx_used_pmds[0])); 5206 5207 5208 /* 5209 * first we update the virtual counters 5210 * assume there was a prior ia64_srlz_d() issued 5211 */ 5212 for (i = PMU_FIRST_COUNTER; mask ; i++, mask >>= 1) { 5213 5214 /* skip pmd which did not overflow */ 5215 if ((mask & 0x1) == 0) continue; 5216 5217 /* 5218 * Note that the pmd is not necessarily 0 at this point as qualified events 5219 * may have happened before the PMU was frozen. The residual count is not 5220 * taken into consideration here but will be with any read of the pmd via 5221 * pfm_read_pmds(). 5222 */ 5223 old_val = new_val = ctx->ctx_pmds[i].val; 5224 new_val += 1 + ovfl_val; 5225 ctx->ctx_pmds[i].val = new_val; 5226 5227 /* 5228 * check for overflow condition 5229 */ 5230 if (likely(old_val > new_val)) { 5231 ovfl_pmds |= 1UL << i; 5232 if (PMC_OVFL_NOTIFY(ctx, i)) ovfl_notify |= 1UL << i; 5233 } 5234 5235 DPRINT_ovfl(("ctx_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx\n", 5236 i, 5237 new_val, 5238 old_val, 5239 ia64_get_pmd(i) & ovfl_val, 5240 ovfl_pmds, 5241 ovfl_notify)); 5242 } 5243 5244 /* 5245 * there was no 64-bit overflow, nothing else to do 5246 */ 5247 if (ovfl_pmds == 0UL) return; 5248 5249 /* 5250 * reset all control bits 5251 */ 5252 ovfl_ctrl.val = 0; 5253 reset_pmds = 0UL; 5254 5255 /* 5256 * if a sampling format module exists, then we "cache" the overflow by 5257 * calling the module's handler() routine. 5258 */ 5259 if (has_smpl) { 5260 unsigned long start_cycles, end_cycles; 5261 unsigned long pmd_mask; 5262 int j, k, ret = 0; 5263 int this_cpu = smp_processor_id(); 5264 5265 pmd_mask = ovfl_pmds >> PMU_FIRST_COUNTER; 5266 ovfl_arg = &ctx->ctx_ovfl_arg; 5267 5268 prefetch(ctx->ctx_smpl_hdr); 5269 5270 for(i=PMU_FIRST_COUNTER; pmd_mask && ret == 0; i++, pmd_mask >>=1) { 5271 5272 mask = 1UL << i; 5273 5274 if ((pmd_mask & 0x1) == 0) continue; 5275 5276 ovfl_arg->ovfl_pmd = (unsigned char )i; 5277 ovfl_arg->ovfl_notify = ovfl_notify & mask ? 1 : 0; 5278 ovfl_arg->active_set = 0; 5279 ovfl_arg->ovfl_ctrl.val = 0; /* module must fill in all fields */ 5280 ovfl_arg->smpl_pmds[0] = smpl_pmds = ctx->ctx_pmds[i].smpl_pmds[0]; 5281 5282 ovfl_arg->pmd_value = ctx->ctx_pmds[i].val; 5283 ovfl_arg->pmd_last_reset = ctx->ctx_pmds[i].lval; 5284 ovfl_arg->pmd_eventid = ctx->ctx_pmds[i].eventid; 5285 5286 /* 5287 * copy values of pmds of interest. Sampling format may copy them 5288 * into sampling buffer. 5289 */ 5290 if (smpl_pmds) { 5291 for(j=0, k=0; smpl_pmds; j++, smpl_pmds >>=1) { 5292 if ((smpl_pmds & 0x1) == 0) continue; 5293 ovfl_arg->smpl_pmds_values[k++] = PMD_IS_COUNTING(j) ? pfm_read_soft_counter(ctx, j) : ia64_get_pmd(j); 5294 DPRINT_ovfl(("smpl_pmd[%d]=pmd%u=0x%lx\n", k-1, j, ovfl_arg->smpl_pmds_values[k-1])); 5295 } 5296 } 5297 5298 pfm_stats[this_cpu].pfm_smpl_handler_calls++; 5299 5300 start_cycles = ia64_get_itc(); 5301 5302 /* 5303 * call custom buffer format record (handler) routine 5304 */ 5305 ret = (*ctx->ctx_buf_fmt->fmt_handler)(task, ctx->ctx_smpl_hdr, ovfl_arg, regs, tstamp); 5306 5307 end_cycles = ia64_get_itc(); 5308 5309 /* 5310 * For those controls, we take the union because they have 5311 * an all or nothing behavior. 5312 */ 5313 ovfl_ctrl.bits.notify_user |= ovfl_arg->ovfl_ctrl.bits.notify_user; 5314 ovfl_ctrl.bits.block_task |= ovfl_arg->ovfl_ctrl.bits.block_task; 5315 ovfl_ctrl.bits.mask_monitoring |= ovfl_arg->ovfl_ctrl.bits.mask_monitoring; 5316 /* 5317 * build the bitmask of pmds to reset now 5318 */ 5319 if (ovfl_arg->ovfl_ctrl.bits.reset_ovfl_pmds) reset_pmds |= mask; 5320 5321 pfm_stats[this_cpu].pfm_smpl_handler_cycles += end_cycles - start_cycles; 5322 } 5323 /* 5324 * when the module cannot handle the rest of the overflows, we abort right here 5325 */ 5326 if (ret && pmd_mask) { 5327 DPRINT(("handler aborts leftover ovfl_pmds=0x%lx\n", 5328 pmd_mask<<PMU_FIRST_COUNTER)); 5329 } 5330 /* 5331 * remove the pmds we reset now from the set of pmds to reset in pfm_restart() 5332 */ 5333 ovfl_pmds &= ~reset_pmds; 5334 } else { 5335 /* 5336 * when no sampling module is used, then the default 5337 * is to notify on overflow if requested by user 5338 */ 5339 ovfl_ctrl.bits.notify_user = ovfl_notify ? 1 : 0; 5340 ovfl_ctrl.bits.block_task = ovfl_notify ? 1 : 0; 5341 ovfl_ctrl.bits.mask_monitoring = ovfl_notify ? 1 : 0; /* XXX: change for saturation */ 5342 ovfl_ctrl.bits.reset_ovfl_pmds = ovfl_notify ? 0 : 1; 5343 /* 5344 * if needed, we reset all overflowed pmds 5345 */ 5346 if (ovfl_notify == 0) reset_pmds = ovfl_pmds; 5347 } 5348 5349 DPRINT_ovfl(("ovfl_pmds=0x%lx reset_pmds=0x%lx\n", ovfl_pmds, reset_pmds)); 5350 5351 /* 5352 * reset the requested PMD registers using the short reset values 5353 */ 5354 if (reset_pmds) { 5355 unsigned long bm = reset_pmds; 5356 pfm_reset_regs(ctx, &bm, PFM_PMD_SHORT_RESET); 5357 } 5358 5359 if (ovfl_notify && ovfl_ctrl.bits.notify_user) { 5360 /* 5361 * keep track of what to reset when unblocking 5362 */ 5363 ctx->ctx_ovfl_regs[0] = ovfl_pmds; 5364 5365 /* 5366 * check for blocking context 5367 */ 5368 if (CTX_OVFL_NOBLOCK(ctx) == 0 && ovfl_ctrl.bits.block_task) { 5369 5370 ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_BLOCK; 5371 5372 /* 5373 * set the perfmon specific checking pending work for the task 5374 */ 5375 PFM_SET_WORK_PENDING(task, 1); 5376 5377 /* 5378 * when coming from ctxsw, current still points to the 5379 * previous task, therefore we must work with task and not current. 5380 */ 5381 set_notify_resume(task); 5382 } 5383 /* 5384 * defer until state is changed (shorten spin window). the context is locked 5385 * anyway, so the signal receiver would come spin for nothing. 5386 */ 5387 must_notify = 1; 5388 } 5389 5390 DPRINT_ovfl(("owner [%d] pending=%ld reason=%u ovfl_pmds=0x%lx ovfl_notify=0x%lx masked=%d\n", 5391 GET_PMU_OWNER() ? task_pid_nr(GET_PMU_OWNER()) : -1, 5392 PFM_GET_WORK_PENDING(task), 5393 ctx->ctx_fl_trap_reason, 5394 ovfl_pmds, 5395 ovfl_notify, 5396 ovfl_ctrl.bits.mask_monitoring ? 1 : 0)); 5397 /* 5398 * in case monitoring must be stopped, we toggle the psr bits 5399 */ 5400 if (ovfl_ctrl.bits.mask_monitoring) { 5401 pfm_mask_monitoring(task); 5402 ctx->ctx_state = PFM_CTX_MASKED; 5403 ctx->ctx_fl_can_restart = 1; 5404 } 5405 5406 /* 5407 * send notification now 5408 */ 5409 if (must_notify) pfm_ovfl_notify_user(ctx, ovfl_notify); 5410 5411 return; 5412 5413sanity_check: 5414 printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n", 5415 smp_processor_id(), 5416 task ? task_pid_nr(task) : -1, 5417 pmc0); 5418 return; 5419 5420stop_monitoring: 5421 /* 5422 * in SMP, zombie context is never restored but reclaimed in pfm_load_regs(). 5423 * Moreover, zombies are also reclaimed in pfm_save_regs(). Therefore we can 5424 * come here as zombie only if the task is the current task. In which case, we 5425 * can access the PMU hardware directly. 5426 * 5427 * Note that zombies do have PM_VALID set. So here we do the minimal. 5428 * 5429 * In case the context was zombified it could not be reclaimed at the time 5430 * the monitoring program exited. At this point, the PMU reservation has been 5431 * returned, the sampiing buffer has been freed. We must convert this call 5432 * into a spurious interrupt. However, we must also avoid infinite overflows 5433 * by stopping monitoring for this task. We can only come here for a per-task 5434 * context. All we need to do is to stop monitoring using the psr bits which 5435 * are always task private. By re-enabling secure montioring, we ensure that 5436 * the monitored task will not be able to re-activate monitoring. 5437 * The task will eventually be context switched out, at which point the context 5438 * will be reclaimed (that includes releasing ownership of the PMU). 5439 * 5440 * So there might be a window of time where the number of per-task session is zero 5441 * yet one PMU might have a owner and get at most one overflow interrupt for a zombie 5442 * context. This is safe because if a per-task session comes in, it will push this one 5443 * out and by the virtue on pfm_save_regs(), this one will disappear. If a system wide 5444 * session is force on that CPU, given that we use task pinning, pfm_save_regs() will 5445 * also push our zombie context out. 5446 * 5447 * Overall pretty hairy stuff.... 5448 */ 5449 DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task_pid_nr(task): -1)); 5450 pfm_clear_psr_up(); 5451 ia64_psr(regs)->up = 0; 5452 ia64_psr(regs)->sp = 1; 5453 return; 5454} 5455 5456static int 5457pfm_do_interrupt_handler(void *arg, struct pt_regs *regs) 5458{ 5459 struct task_struct *task; 5460 pfm_context_t *ctx; 5461 unsigned long flags; 5462 u64 pmc0; 5463 int this_cpu = smp_processor_id(); 5464 int retval = 0; 5465 5466 pfm_stats[this_cpu].pfm_ovfl_intr_count++; 5467 5468 /* 5469 * srlz.d done before arriving here 5470 */ 5471 pmc0 = ia64_get_pmc(0); 5472 5473 task = GET_PMU_OWNER(); 5474 ctx = GET_PMU_CTX(); 5475 5476 /* 5477 * if we have some pending bits set 5478 * assumes : if any PMC0.bit[63-1] is set, then PMC0.fr = 1 5479 */ 5480 if (PMC0_HAS_OVFL(pmc0) && task) { 5481 /* 5482 * we assume that pmc0.fr is always set here 5483 */ 5484 5485 /* sanity check */ 5486 if (!ctx) goto report_spurious1; 5487 5488 if (ctx->ctx_fl_system == 0 && (task->thread.flags & IA64_THREAD_PM_VALID) == 0) 5489 goto report_spurious2; 5490 5491 PROTECT_CTX_NOPRINT(ctx, flags); 5492 5493 pfm_overflow_handler(task, ctx, pmc0, regs); 5494 5495 UNPROTECT_CTX_NOPRINT(ctx, flags); 5496 5497 } else { 5498 pfm_stats[this_cpu].pfm_spurious_ovfl_intr_count++; 5499 retval = -1; 5500 } 5501 /* 5502 * keep it unfrozen at all times 5503 */ 5504 pfm_unfreeze_pmu(); 5505 5506 return retval; 5507 5508report_spurious1: 5509 printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d has no PFM context\n", 5510 this_cpu, task_pid_nr(task)); 5511 pfm_unfreeze_pmu(); 5512 return -1; 5513report_spurious2: 5514 printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d, invalid flag\n", 5515 this_cpu, 5516 task_pid_nr(task)); 5517 pfm_unfreeze_pmu(); 5518 return -1; 5519} 5520 5521static irqreturn_t 5522pfm_interrupt_handler(int irq, void *arg) 5523{ 5524 unsigned long start_cycles, total_cycles; 5525 unsigned long min, max; 5526 int this_cpu; 5527 int ret; 5528 struct pt_regs *regs = get_irq_regs(); 5529 5530 this_cpu = get_cpu(); 5531 if (likely(!pfm_alt_intr_handler)) { 5532 min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min; 5533 max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max; 5534 5535 start_cycles = ia64_get_itc(); 5536 5537 ret = pfm_do_interrupt_handler(arg, regs); 5538 5539 total_cycles = ia64_get_itc(); 5540 5541 /* 5542 * don't measure spurious interrupts 5543 */ 5544 if (likely(ret == 0)) { 5545 total_cycles -= start_cycles; 5546 5547 if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles; 5548 if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles; 5549 5550 pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles; 5551 } 5552 } 5553 else { 5554 (*pfm_alt_intr_handler->handler)(irq, arg, regs); 5555 } 5556 5557 put_cpu(); 5558 return IRQ_HANDLED; 5559} 5560 5561/* 5562 * /proc/perfmon interface, for debug only 5563 */ 5564 5565#define PFM_PROC_SHOW_HEADER ((void *)(long)nr_cpu_ids+1) 5566 5567static void * 5568pfm_proc_start(struct seq_file *m, loff_t *pos) 5569{ 5570 if (*pos == 0) { 5571 return PFM_PROC_SHOW_HEADER; 5572 } 5573 5574 while (*pos <= nr_cpu_ids) { 5575 if (cpu_online(*pos - 1)) { 5576 return (void *)*pos; 5577 } 5578 ++*pos; 5579 } 5580 return NULL; 5581} 5582 5583static void * 5584pfm_proc_next(struct seq_file *m, void *v, loff_t *pos) 5585{ 5586 ++*pos; 5587 return pfm_proc_start(m, pos); 5588} 5589 5590static void 5591pfm_proc_stop(struct seq_file *m, void *v) 5592{ 5593} 5594 5595static void 5596pfm_proc_show_header(struct seq_file *m) 5597{ 5598 struct list_head * pos; 5599 pfm_buffer_fmt_t * entry; 5600 unsigned long flags; 5601 5602 seq_printf(m, 5603 "perfmon version : %u.%u\n" 5604 "model : %s\n" 5605 "fastctxsw : %s\n" 5606 "expert mode : %s\n" 5607 "ovfl_mask : 0x%lx\n" 5608 "PMU flags : 0x%x\n", 5609 PFM_VERSION_MAJ, PFM_VERSION_MIN, 5610 pmu_conf->pmu_name, 5611 pfm_sysctl.fastctxsw > 0 ? "Yes": "No", 5612 pfm_sysctl.expert_mode > 0 ? "Yes": "No", 5613 pmu_conf->ovfl_val, 5614 pmu_conf->flags); 5615 5616 LOCK_PFS(flags); 5617 5618 seq_printf(m, 5619 "proc_sessions : %u\n" 5620 "sys_sessions : %u\n" 5621 "sys_use_dbregs : %u\n" 5622 "ptrace_use_dbregs : %u\n", 5623 pfm_sessions.pfs_task_sessions, 5624 pfm_sessions.pfs_sys_sessions, 5625 pfm_sessions.pfs_sys_use_dbregs, 5626 pfm_sessions.pfs_ptrace_use_dbregs); 5627 5628 UNLOCK_PFS(flags); 5629 5630 spin_lock(&pfm_buffer_fmt_lock); 5631 5632 list_for_each(pos, &pfm_buffer_fmt_list) { 5633 entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list); 5634 seq_printf(m, "format : %16phD %s\n", 5635 entry->fmt_uuid, entry->fmt_name); 5636 } 5637 spin_unlock(&pfm_buffer_fmt_lock); 5638 5639} 5640 5641static int 5642pfm_proc_show(struct seq_file *m, void *v) 5643{ 5644 unsigned long psr; 5645 unsigned int i; 5646 int cpu; 5647 5648 if (v == PFM_PROC_SHOW_HEADER) { 5649 pfm_proc_show_header(m); 5650 return 0; 5651 } 5652 5653 /* show info for CPU (v - 1) */ 5654 5655 cpu = (long)v - 1; 5656 seq_printf(m, 5657 "CPU%-2d overflow intrs : %lu\n" 5658 "CPU%-2d overflow cycles : %lu\n" 5659 "CPU%-2d overflow min : %lu\n" 5660 "CPU%-2d overflow max : %lu\n" 5661 "CPU%-2d smpl handler calls : %lu\n" 5662 "CPU%-2d smpl handler cycles : %lu\n" 5663 "CPU%-2d spurious intrs : %lu\n" 5664 "CPU%-2d replay intrs : %lu\n" 5665 "CPU%-2d syst_wide : %d\n" 5666 "CPU%-2d dcr_pp : %d\n" 5667 "CPU%-2d exclude idle : %d\n" 5668 "CPU%-2d owner : %d\n" 5669 "CPU%-2d context : %p\n" 5670 "CPU%-2d activations : %lu\n", 5671 cpu, pfm_stats[cpu].pfm_ovfl_intr_count, 5672 cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles, 5673 cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_min, 5674 cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_max, 5675 cpu, pfm_stats[cpu].pfm_smpl_handler_calls, 5676 cpu, pfm_stats[cpu].pfm_smpl_handler_cycles, 5677 cpu, pfm_stats[cpu].pfm_spurious_ovfl_intr_count, 5678 cpu, pfm_stats[cpu].pfm_replay_ovfl_intr_count, 5679 cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_SYST_WIDE ? 1 : 0, 5680 cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_DCR_PP ? 1 : 0, 5681 cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_EXCL_IDLE ? 1 : 0, 5682 cpu, pfm_get_cpu_data(pmu_owner, cpu) ? pfm_get_cpu_data(pmu_owner, cpu)->pid: -1, 5683 cpu, pfm_get_cpu_data(pmu_ctx, cpu), 5684 cpu, pfm_get_cpu_data(pmu_activation_number, cpu)); 5685 5686 if (num_online_cpus() == 1 && pfm_sysctl.debug > 0) { 5687 5688 psr = pfm_get_psr(); 5689 5690 ia64_srlz_d(); 5691 5692 seq_printf(m, 5693 "CPU%-2d psr : 0x%lx\n" 5694 "CPU%-2d pmc0 : 0x%lx\n", 5695 cpu, psr, 5696 cpu, ia64_get_pmc(0)); 5697 5698 for (i=0; PMC_IS_LAST(i) == 0; i++) { 5699 if (PMC_IS_COUNTING(i) == 0) continue; 5700 seq_printf(m, 5701 "CPU%-2d pmc%u : 0x%lx\n" 5702 "CPU%-2d pmd%u : 0x%lx\n", 5703 cpu, i, ia64_get_pmc(i), 5704 cpu, i, ia64_get_pmd(i)); 5705 } 5706 } 5707 return 0; 5708} 5709 5710const struct seq_operations pfm_seq_ops = { 5711 .start = pfm_proc_start, 5712 .next = pfm_proc_next, 5713 .stop = pfm_proc_stop, 5714 .show = pfm_proc_show 5715}; 5716 5717static int 5718pfm_proc_open(struct inode *inode, struct file *file) 5719{ 5720 return seq_open(file, &pfm_seq_ops); 5721} 5722 5723 5724/* 5725 * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens 5726 * during pfm_enable() hence before pfm_start(). We cannot assume monitoring 5727 * is active or inactive based on mode. We must rely on the value in 5728 * local_cpu_data->pfm_syst_info 5729 */ 5730void 5731pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin) 5732{ 5733 struct pt_regs *regs; 5734 unsigned long dcr; 5735 unsigned long dcr_pp; 5736 5737 dcr_pp = info & PFM_CPUINFO_DCR_PP ? 1 : 0; 5738 5739 /* 5740 * pid 0 is guaranteed to be the idle task. There is one such task with pid 0 5741 * on every CPU, so we can rely on the pid to identify the idle task. 5742 */ 5743 if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->pid) { 5744 regs = task_pt_regs(task); 5745 ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0; 5746 return; 5747 } 5748 /* 5749 * if monitoring has started 5750 */ 5751 if (dcr_pp) { 5752 dcr = ia64_getreg(_IA64_REG_CR_DCR); 5753 /* 5754 * context switching in? 5755 */ 5756 if (is_ctxswin) { 5757 /* mask monitoring for the idle task */ 5758 ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP); 5759 pfm_clear_psr_pp(); 5760 ia64_srlz_i(); 5761 return; 5762 } 5763 /* 5764 * context switching out 5765 * restore monitoring for next task 5766 * 5767 * Due to inlining this odd if-then-else construction generates 5768 * better code. 5769 */ 5770 ia64_setreg(_IA64_REG_CR_DCR, dcr |IA64_DCR_PP); 5771 pfm_set_psr_pp(); 5772 ia64_srlz_i(); 5773 } 5774} 5775 5776#ifdef CONFIG_SMP 5777 5778static void 5779pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs) 5780{ 5781 struct task_struct *task = ctx->ctx_task; 5782 5783 ia64_psr(regs)->up = 0; 5784 ia64_psr(regs)->sp = 1; 5785 5786 if (GET_PMU_OWNER() == task) { 5787 DPRINT(("cleared ownership for [%d]\n", 5788 task_pid_nr(ctx->ctx_task))); 5789 SET_PMU_OWNER(NULL, NULL); 5790 } 5791 5792 /* 5793 * disconnect the task from the context and vice-versa 5794 */ 5795 PFM_SET_WORK_PENDING(task, 0); 5796 5797 task->thread.pfm_context = NULL; 5798 task->thread.flags &= ~IA64_THREAD_PM_VALID; 5799 5800 DPRINT(("force cleanup for [%d]\n", task_pid_nr(task))); 5801} 5802 5803 5804/* 5805 * in 2.6, interrupts are masked when we come here and the runqueue lock is held 5806 */ 5807void 5808pfm_save_regs(struct task_struct *task) 5809{ 5810 pfm_context_t *ctx; 5811 unsigned long flags; 5812 u64 psr; 5813 5814 5815 ctx = PFM_GET_CTX(task); 5816 if (ctx == NULL) return; 5817 5818 /* 5819 * we always come here with interrupts ALREADY disabled by 5820 * the scheduler. So we simply need to protect against concurrent 5821 * access, not CPU concurrency. 5822 */ 5823 flags = pfm_protect_ctx_ctxsw(ctx); 5824 5825 if (ctx->ctx_state == PFM_CTX_ZOMBIE) { 5826 struct pt_regs *regs = task_pt_regs(task); 5827 5828 pfm_clear_psr_up(); 5829 5830 pfm_force_cleanup(ctx, regs); 5831 5832 BUG_ON(ctx->ctx_smpl_hdr); 5833 5834 pfm_unprotect_ctx_ctxsw(ctx, flags); 5835 5836 pfm_context_free(ctx); 5837 return; 5838 } 5839 5840 /* 5841 * save current PSR: needed because we modify it 5842 */ 5843 ia64_srlz_d(); 5844 psr = pfm_get_psr(); 5845 5846 BUG_ON(psr & (IA64_PSR_I)); 5847 5848 /* 5849 * stop monitoring: 5850 * This is the last instruction which may generate an overflow 5851 * 5852 * We do not need to set psr.sp because, it is irrelevant in kernel. 5853 * It will be restored from ipsr when going back to user level 5854 */ 5855 pfm_clear_psr_up(); 5856 5857 /* 5858 * keep a copy of psr.up (for reload) 5859 */ 5860 ctx->ctx_saved_psr_up = psr & IA64_PSR_UP; 5861 5862 /* 5863 * release ownership of this PMU. 5864 * PM interrupts are masked, so nothing 5865 * can happen. 5866 */ 5867 SET_PMU_OWNER(NULL, NULL); 5868 5869 /* 5870 * we systematically save the PMD as we have no 5871 * guarantee we will be schedule at that same 5872 * CPU again. 5873 */ 5874 pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]); 5875 5876 /* 5877 * save pmc0 ia64_srlz_d() done in pfm_save_pmds() 5878 * we will need it on the restore path to check 5879 * for pending overflow. 5880 */ 5881 ctx->th_pmcs[0] = ia64_get_pmc(0); 5882 5883 /* 5884 * unfreeze PMU if had pending overflows 5885 */ 5886 if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); 5887 5888 /* 5889 * finally, allow context access. 5890 * interrupts will still be masked after this call. 5891 */ 5892 pfm_unprotect_ctx_ctxsw(ctx, flags); 5893} 5894 5895#else /* !CONFIG_SMP */ 5896void 5897pfm_save_regs(struct task_struct *task) 5898{ 5899 pfm_context_t *ctx; 5900 u64 psr; 5901 5902 ctx = PFM_GET_CTX(task); 5903 if (ctx == NULL) return; 5904 5905 /* 5906 * save current PSR: needed because we modify it 5907 */ 5908 psr = pfm_get_psr(); 5909 5910 BUG_ON(psr & (IA64_PSR_I)); 5911 5912 /* 5913 * stop monitoring: 5914 * This is the last instruction which may generate an overflow 5915 * 5916 * We do not need to set psr.sp because, it is irrelevant in kernel. 5917 * It will be restored from ipsr when going back to user level 5918 */ 5919 pfm_clear_psr_up(); 5920 5921 /* 5922 * keep a copy of psr.up (for reload) 5923 */ 5924 ctx->ctx_saved_psr_up = psr & IA64_PSR_UP; 5925} 5926 5927static void 5928pfm_lazy_save_regs (struct task_struct *task) 5929{ 5930 pfm_context_t *ctx; 5931 unsigned long flags; 5932 5933 { u64 psr = pfm_get_psr(); 5934 BUG_ON(psr & IA64_PSR_UP); 5935 } 5936 5937 ctx = PFM_GET_CTX(task); 5938 5939 /* 5940 * we need to mask PMU overflow here to 5941 * make sure that we maintain pmc0 until 5942 * we save it. overflow interrupts are 5943 * treated as spurious if there is no 5944 * owner. 5945 * 5946 * XXX: I don't think this is necessary 5947 */ 5948 PROTECT_CTX(ctx,flags); 5949 5950 /* 5951 * release ownership of this PMU. 5952 * must be done before we save the registers. 5953 * 5954 * after this call any PMU interrupt is treated 5955 * as spurious. 5956 */ 5957 SET_PMU_OWNER(NULL, NULL); 5958 5959 /* 5960 * save all the pmds we use 5961 */ 5962 pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]); 5963 5964 /* 5965 * save pmc0 ia64_srlz_d() done in pfm_save_pmds() 5966 * it is needed to check for pended overflow 5967 * on the restore path 5968 */ 5969 ctx->th_pmcs[0] = ia64_get_pmc(0); 5970 5971 /* 5972 * unfreeze PMU if had pending overflows 5973 */ 5974 if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); 5975 5976 /* 5977 * now get can unmask PMU interrupts, they will 5978 * be treated as purely spurious and we will not 5979 * lose any information 5980 */ 5981 UNPROTECT_CTX(ctx,flags); 5982} 5983#endif /* CONFIG_SMP */ 5984 5985#ifdef CONFIG_SMP 5986/* 5987 * in 2.6, interrupts are masked when we come here and the runqueue lock is held 5988 */ 5989void 5990pfm_load_regs (struct task_struct *task) 5991{ 5992 pfm_context_t *ctx; 5993 unsigned long pmc_mask = 0UL, pmd_mask = 0UL; 5994 unsigned long flags; 5995 u64 psr, psr_up; 5996 int need_irq_resend; 5997 5998 ctx = PFM_GET_CTX(task); 5999 if (unlikely(ctx == NULL)) return; 6000 6001 BUG_ON(GET_PMU_OWNER()); 6002 6003 /* 6004 * possible on unload 6005 */ 6006 if (unlikely((task->thread.flags & IA64_THREAD_PM_VALID) == 0)) return; 6007 6008 /* 6009 * we always come here with interrupts ALREADY disabled by 6010 * the scheduler. So we simply need to protect against concurrent 6011 * access, not CPU concurrency. 6012 */ 6013 flags = pfm_protect_ctx_ctxsw(ctx); 6014 psr = pfm_get_psr(); 6015 6016 need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND; 6017 6018 BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); 6019 BUG_ON(psr & IA64_PSR_I); 6020 6021 if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) { 6022 struct pt_regs *regs = task_pt_regs(task); 6023 6024 BUG_ON(ctx->ctx_smpl_hdr); 6025 6026 pfm_force_cleanup(ctx, regs); 6027 6028 pfm_unprotect_ctx_ctxsw(ctx, flags); 6029 6030 /* 6031 * this one (kmalloc'ed) is fine with interrupts disabled 6032 */ 6033 pfm_context_free(ctx); 6034 6035 return; 6036 } 6037 6038 /* 6039 * we restore ALL the debug registers to avoid picking up 6040 * stale state. 6041 */ 6042 if (ctx->ctx_fl_using_dbreg) { 6043 pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); 6044 pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); 6045 } 6046 /* 6047 * retrieve saved psr.up 6048 */ 6049 psr_up = ctx->ctx_saved_psr_up; 6050 6051 /* 6052 * if we were the last user of the PMU on that CPU, 6053 * then nothing to do except restore psr 6054 */ 6055 if (GET_LAST_CPU(ctx) == smp_processor_id() && ctx->ctx_last_activation == GET_ACTIVATION()) { 6056 6057 /* 6058 * retrieve partial reload masks (due to user modifications) 6059 */ 6060 pmc_mask = ctx->ctx_reload_pmcs[0]; 6061 pmd_mask = ctx->ctx_reload_pmds[0]; 6062 6063 } else { 6064 /* 6065 * To avoid leaking information to the user level when psr.sp=0, 6066 * we must reload ALL implemented pmds (even the ones we don't use). 6067 * In the kernel we only allow PFM_READ_PMDS on registers which 6068 * we initialized or requested (sampling) so there is no risk there. 6069 */ 6070 pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0]; 6071 6072 /* 6073 * ALL accessible PMCs are systematically reloaded, unused registers 6074 * get their default (from pfm_reset_pmu_state()) values to avoid picking 6075 * up stale configuration. 6076 * 6077 * PMC0 is never in the mask. It is always restored separately. 6078 */ 6079 pmc_mask = ctx->ctx_all_pmcs[0]; 6080 } 6081 /* 6082 * when context is MASKED, we will restore PMC with plm=0 6083 * and PMD with stale information, but that's ok, nothing 6084 * will be captured. 6085 * 6086 * XXX: optimize here 6087 */ 6088 if (pmd_mask) pfm_restore_pmds(ctx->th_pmds, pmd_mask); 6089 if (pmc_mask) pfm_restore_pmcs(ctx->th_pmcs, pmc_mask); 6090 6091 /* 6092 * check for pending overflow at the time the state 6093 * was saved. 6094 */ 6095 if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) { 6096 /* 6097 * reload pmc0 with the overflow information 6098 * On McKinley PMU, this will trigger a PMU interrupt 6099 */ 6100 ia64_set_pmc(0, ctx->th_pmcs[0]); 6101 ia64_srlz_d(); 6102 ctx->th_pmcs[0] = 0UL; 6103 6104 /* 6105 * will replay the PMU interrupt 6106 */ 6107 if (need_irq_resend) ia64_resend_irq(IA64_PERFMON_VECTOR); 6108 6109 pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++; 6110 } 6111 6112 /* 6113 * we just did a reload, so we reset the partial reload fields 6114 */ 6115 ctx->ctx_reload_pmcs[0] = 0UL; 6116 ctx->ctx_reload_pmds[0] = 0UL; 6117 6118 SET_LAST_CPU(ctx, smp_processor_id()); 6119 6120 /* 6121 * dump activation value for this PMU 6122 */ 6123 INC_ACTIVATION(); 6124 /* 6125 * record current activation for this context 6126 */ 6127 SET_ACTIVATION(ctx); 6128 6129 /* 6130 * establish new ownership. 6131 */ 6132 SET_PMU_OWNER(task, ctx); 6133 6134 /* 6135 * restore the psr.up bit. measurement 6136 * is active again. 6137 * no PMU interrupt can happen at this point 6138 * because we still have interrupts disabled. 6139 */ 6140 if (likely(psr_up)) pfm_set_psr_up(); 6141 6142 /* 6143 * allow concurrent access to context 6144 */ 6145 pfm_unprotect_ctx_ctxsw(ctx, flags); 6146} 6147#else /* !CONFIG_SMP */ 6148/* 6149 * reload PMU state for UP kernels 6150 * in 2.5 we come here with interrupts disabled 6151 */ 6152void 6153pfm_load_regs (struct task_struct *task) 6154{ 6155 pfm_context_t *ctx; 6156 struct task_struct *owner; 6157 unsigned long pmd_mask, pmc_mask; 6158 u64 psr, psr_up; 6159 int need_irq_resend; 6160 6161 owner = GET_PMU_OWNER(); 6162 ctx = PFM_GET_CTX(task); 6163 psr = pfm_get_psr(); 6164 6165 BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); 6166 BUG_ON(psr & IA64_PSR_I); 6167 6168 /* 6169 * we restore ALL the debug registers to avoid picking up 6170 * stale state. 6171 * 6172 * This must be done even when the task is still the owner 6173 * as the registers may have been modified via ptrace() 6174 * (not perfmon) by the previous task. 6175 */ 6176 if (ctx->ctx_fl_using_dbreg) { 6177 pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); 6178 pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); 6179 } 6180 6181 /* 6182 * retrieved saved psr.up 6183 */ 6184 psr_up = ctx->ctx_saved_psr_up; 6185 need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND; 6186 6187 /* 6188 * short path, our state is still there, just 6189 * need to restore psr and we go 6190 * 6191 * we do not touch either PMC nor PMD. the psr is not touched 6192 * by the overflow_handler. So we are safe w.r.t. to interrupt 6193 * concurrency even without interrupt masking. 6194 */ 6195 if (likely(owner == task)) { 6196 if (likely(psr_up)) pfm_set_psr_up(); 6197 return; 6198 } 6199 6200 /* 6201 * someone else is still using the PMU, first push it out and 6202 * then we'll be able to install our stuff ! 6203 * 6204 * Upon return, there will be no owner for the current PMU 6205 */ 6206 if (owner) pfm_lazy_save_regs(owner); 6207 6208 /* 6209 * To avoid leaking information to the user level when psr.sp=0, 6210 * we must reload ALL implemented pmds (even the ones we don't use). 6211 * In the kernel we only allow PFM_READ_PMDS on registers which 6212 * we initialized or requested (sampling) so there is no risk there. 6213 */ 6214 pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0]; 6215 6216 /* 6217 * ALL accessible PMCs are systematically reloaded, unused registers 6218 * get their default (from pfm_reset_pmu_state()) values to avoid picking 6219 * up stale configuration. 6220 * 6221 * PMC0 is never in the mask. It is always restored separately 6222 */ 6223 pmc_mask = ctx->ctx_all_pmcs[0]; 6224 6225 pfm_restore_pmds(ctx->th_pmds, pmd_mask); 6226 pfm_restore_pmcs(ctx->th_pmcs, pmc_mask); 6227 6228 /* 6229 * check for pending overflow at the time the state 6230 * was saved. 6231 */ 6232 if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) { 6233 /* 6234 * reload pmc0 with the overflow information 6235 * On McKinley PMU, this will trigger a PMU interrupt 6236 */ 6237 ia64_set_pmc(0, ctx->th_pmcs[0]); 6238 ia64_srlz_d(); 6239 6240 ctx->th_pmcs[0] = 0UL; 6241 6242 /* 6243 * will replay the PMU interrupt 6244 */ 6245 if (need_irq_resend) ia64_resend_irq(IA64_PERFMON_VECTOR); 6246 6247 pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++; 6248 } 6249 6250 /* 6251 * establish new ownership. 6252 */ 6253 SET_PMU_OWNER(task, ctx); 6254 6255 /* 6256 * restore the psr.up bit. measurement 6257 * is active again. 6258 * no PMU interrupt can happen at this point 6259 * because we still have interrupts disabled. 6260 */ 6261 if (likely(psr_up)) pfm_set_psr_up(); 6262} 6263#endif /* CONFIG_SMP */ 6264 6265/* 6266 * this function assumes monitoring is stopped 6267 */ 6268static void 6269pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) 6270{ 6271 u64 pmc0; 6272 unsigned long mask2, val, pmd_val, ovfl_val; 6273 int i, can_access_pmu = 0; 6274 int is_self; 6275 6276 /* 6277 * is the caller the task being monitored (or which initiated the 6278 * session for system wide measurements) 6279 */ 6280 is_self = ctx->ctx_task == task ? 1 : 0; 6281 6282 /* 6283 * can access PMU is task is the owner of the PMU state on the current CPU 6284 * or if we are running on the CPU bound to the context in system-wide mode 6285 * (that is not necessarily the task the context is attached to in this mode). 6286 * In system-wide we always have can_access_pmu true because a task running on an 6287 * invalid processor is flagged earlier in the call stack (see pfm_stop). 6288 */ 6289 can_access_pmu = (GET_PMU_OWNER() == task) || (ctx->ctx_fl_system && ctx->ctx_cpu == smp_processor_id()); 6290 if (can_access_pmu) { 6291 /* 6292 * Mark the PMU as not owned 6293 * This will cause the interrupt handler to do nothing in case an overflow 6294 * interrupt was in-flight 6295 * This also guarantees that pmc0 will contain the final state 6296 * It virtually gives us full control on overflow processing from that point 6297 * on. 6298 */ 6299 SET_PMU_OWNER(NULL, NULL); 6300 DPRINT(("releasing ownership\n")); 6301 6302 /* 6303 * read current overflow status: 6304 * 6305 * we are guaranteed to read the final stable state 6306 */ 6307 ia64_srlz_d(); 6308 pmc0 = ia64_get_pmc(0); /* slow */ 6309 6310 /* 6311 * reset freeze bit, overflow status information destroyed 6312 */ 6313 pfm_unfreeze_pmu(); 6314 } else { 6315 pmc0 = ctx->th_pmcs[0]; 6316 /* 6317 * clear whatever overflow status bits there were 6318 */ 6319 ctx->th_pmcs[0] = 0; 6320 } 6321 ovfl_val = pmu_conf->ovfl_val; 6322 /* 6323 * we save all the used pmds 6324 * we take care of overflows for counting PMDs 6325 * 6326 * XXX: sampling situation is not taken into account here 6327 */ 6328 mask2 = ctx->ctx_used_pmds[0]; 6329 6330 DPRINT(("is_self=%d ovfl_val=0x%lx mask2=0x%lx\n", is_self, ovfl_val, mask2)); 6331 6332 for (i = 0; mask2; i++, mask2>>=1) { 6333 6334 /* skip non used pmds */ 6335 if ((mask2 & 0x1) == 0) continue; 6336 6337 /* 6338 * can access PMU always true in system wide mode 6339 */ 6340 val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : ctx->th_pmds[i]; 6341 6342 if (PMD_IS_COUNTING(i)) { 6343 DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n", 6344 task_pid_nr(task), 6345 i, 6346 ctx->ctx_pmds[i].val, 6347 val & ovfl_val)); 6348 6349 /* 6350 * we rebuild the full 64 bit value of the counter 6351 */ 6352 val = ctx->ctx_pmds[i].val + (val & ovfl_val); 6353 6354 /* 6355 * now everything is in ctx_pmds[] and we need 6356 * to clear the saved context from save_regs() such that 6357 * pfm_read_pmds() gets the correct value 6358 */ 6359 pmd_val = 0UL; 6360 6361 /* 6362 * take care of overflow inline 6363 */ 6364 if (pmc0 & (1UL << i)) { 6365 val += 1 + ovfl_val; 6366 DPRINT(("[%d] pmd[%d] overflowed\n", task_pid_nr(task), i)); 6367 } 6368 } 6369 6370 DPRINT(("[%d] ctx_pmd[%d]=0x%lx pmd_val=0x%lx\n", task_pid_nr(task), i, val, pmd_val)); 6371 6372 if (is_self) ctx->th_pmds[i] = pmd_val; 6373 6374 ctx->ctx_pmds[i].val = val; 6375 } 6376} 6377 6378static struct irqaction perfmon_irqaction = { 6379 .handler = pfm_interrupt_handler, 6380 .name = "perfmon" 6381}; 6382 6383static void 6384pfm_alt_save_pmu_state(void *data) 6385{ 6386 struct pt_regs *regs; 6387 6388 regs = task_pt_regs(current); 6389 6390 DPRINT(("called\n")); 6391 6392 /* 6393 * should not be necessary but 6394 * let's take not risk 6395 */ 6396 pfm_clear_psr_up(); 6397 pfm_clear_psr_pp(); 6398 ia64_psr(regs)->pp = 0; 6399 6400 /* 6401 * This call is required 6402 * May cause a spurious interrupt on some processors 6403 */ 6404 pfm_freeze_pmu(); 6405 6406 ia64_srlz_d(); 6407} 6408 6409void 6410pfm_alt_restore_pmu_state(void *data) 6411{ 6412 struct pt_regs *regs; 6413 6414 regs = task_pt_regs(current); 6415 6416 DPRINT(("called\n")); 6417 6418 /* 6419 * put PMU back in state expected 6420 * by perfmon 6421 */ 6422 pfm_clear_psr_up(); 6423 pfm_clear_psr_pp(); 6424 ia64_psr(regs)->pp = 0; 6425 6426 /* 6427 * perfmon runs with PMU unfrozen at all times 6428 */ 6429 pfm_unfreeze_pmu(); 6430 6431 ia64_srlz_d(); 6432} 6433 6434int 6435pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) 6436{ 6437 int ret, i; 6438 int reserve_cpu; 6439 6440 /* some sanity checks */ 6441 if (hdl == NULL || hdl->handler == NULL) return -EINVAL; 6442 6443 /* do the easy test first */ 6444 if (pfm_alt_intr_handler) return -EBUSY; 6445 6446 /* one at a time in the install or remove, just fail the others */ 6447 if (!spin_trylock(&pfm_alt_install_check)) { 6448 return -EBUSY; 6449 } 6450 6451 /* reserve our session */ 6452 for_each_online_cpu(reserve_cpu) { 6453 ret = pfm_reserve_session(NULL, 1, reserve_cpu); 6454 if (ret) goto cleanup_reserve; 6455 } 6456 6457 /* save the current system wide pmu states */ 6458 ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 1); 6459 if (ret) { 6460 DPRINT(("on_each_cpu() failed: %d\n", ret)); 6461 goto cleanup_reserve; 6462 } 6463 6464 /* officially change to the alternate interrupt handler */ 6465 pfm_alt_intr_handler = hdl; 6466 6467 spin_unlock(&pfm_alt_install_check); 6468 6469 return 0; 6470 6471cleanup_reserve: 6472 for_each_online_cpu(i) { 6473 /* don't unreserve more than we reserved */ 6474 if (i >= reserve_cpu) break; 6475 6476 pfm_unreserve_session(NULL, 1, i); 6477 } 6478 6479 spin_unlock(&pfm_alt_install_check); 6480 6481 return ret; 6482} 6483EXPORT_SYMBOL_GPL(pfm_install_alt_pmu_interrupt); 6484 6485int 6486pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) 6487{ 6488 int i; 6489 int ret; 6490 6491 if (hdl == NULL) return -EINVAL; 6492 6493 /* cannot remove someone else's handler! */ 6494 if (pfm_alt_intr_handler != hdl) return -EINVAL; 6495 6496 /* one at a time in the install or remove, just fail the others */ 6497 if (!spin_trylock(&pfm_alt_install_check)) { 6498 return -EBUSY; 6499 } 6500 6501 pfm_alt_intr_handler = NULL; 6502 6503 ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 1); 6504 if (ret) { 6505 DPRINT(("on_each_cpu() failed: %d\n", ret)); 6506 } 6507 6508 for_each_online_cpu(i) { 6509 pfm_unreserve_session(NULL, 1, i); 6510 } 6511 6512 spin_unlock(&pfm_alt_install_check); 6513 6514 return 0; 6515} 6516EXPORT_SYMBOL_GPL(pfm_remove_alt_pmu_interrupt); 6517 6518/* 6519 * perfmon initialization routine, called from the initcall() table 6520 */ 6521static int init_pfm_fs(void); 6522 6523static int __init 6524pfm_probe_pmu(void) 6525{ 6526 pmu_config_t **p; 6527 int family; 6528 6529 family = local_cpu_data->family; 6530 p = pmu_confs; 6531 6532 while(*p) { 6533 if ((*p)->probe) { 6534 if ((*p)->probe() == 0) goto found; 6535 } else if ((*p)->pmu_family == family || (*p)->pmu_family == 0xff) { 6536 goto found; 6537 } 6538 p++; 6539 } 6540 return -1; 6541found: 6542 pmu_conf = *p; 6543 return 0; 6544} 6545 6546static const struct file_operations pfm_proc_fops = { 6547 .open = pfm_proc_open, 6548 .read = seq_read, 6549 .llseek = seq_lseek, 6550 .release = seq_release, 6551}; 6552 6553int __init 6554pfm_init(void) 6555{ 6556 unsigned int n, n_counters, i; 6557 6558 printk("perfmon: version %u.%u IRQ %u\n", 6559 PFM_VERSION_MAJ, 6560 PFM_VERSION_MIN, 6561 IA64_PERFMON_VECTOR); 6562 6563 if (pfm_probe_pmu()) { 6564 printk(KERN_INFO "perfmon: disabled, there is no support for processor family %d\n", 6565 local_cpu_data->family); 6566 return -ENODEV; 6567 } 6568 6569 /* 6570 * compute the number of implemented PMD/PMC from the 6571 * description tables 6572 */ 6573 n = 0; 6574 for (i=0; PMC_IS_LAST(i) == 0; i++) { 6575 if (PMC_IS_IMPL(i) == 0) continue; 6576 pmu_conf->impl_pmcs[i>>6] |= 1UL << (i&63); 6577 n++; 6578 } 6579 pmu_conf->num_pmcs = n; 6580 6581 n = 0; n_counters = 0; 6582 for (i=0; PMD_IS_LAST(i) == 0; i++) { 6583 if (PMD_IS_IMPL(i) == 0) continue; 6584 pmu_conf->impl_pmds[i>>6] |= 1UL << (i&63); 6585 n++; 6586 if (PMD_IS_COUNTING(i)) n_counters++; 6587 } 6588 pmu_conf->num_pmds = n; 6589 pmu_conf->num_counters = n_counters; 6590 6591 /* 6592 * sanity checks on the number of debug registers 6593 */ 6594 if (pmu_conf->use_rr_dbregs) { 6595 if (pmu_conf->num_ibrs > IA64_NUM_DBG_REGS) { 6596 printk(KERN_INFO "perfmon: unsupported number of code debug registers (%u)\n", pmu_conf->num_ibrs); 6597 pmu_conf = NULL; 6598 return -1; 6599 } 6600 if (pmu_conf->num_dbrs > IA64_NUM_DBG_REGS) { 6601 printk(KERN_INFO "perfmon: unsupported number of data debug registers (%u)\n", pmu_conf->num_ibrs); 6602 pmu_conf = NULL; 6603 return -1; 6604 } 6605 } 6606 6607 printk("perfmon: %s PMU detected, %u PMCs, %u PMDs, %u counters (%lu bits)\n", 6608 pmu_conf->pmu_name, 6609 pmu_conf->num_pmcs, 6610 pmu_conf->num_pmds, 6611 pmu_conf->num_counters, 6612 ffz(pmu_conf->ovfl_val)); 6613 6614 /* sanity check */ 6615 if (pmu_conf->num_pmds >= PFM_NUM_PMD_REGS || pmu_conf->num_pmcs >= PFM_NUM_PMC_REGS) { 6616 printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n"); 6617 pmu_conf = NULL; 6618 return -1; 6619 } 6620 6621 /* 6622 * create /proc/perfmon (mostly for debugging purposes) 6623 */ 6624 perfmon_dir = proc_create("perfmon", S_IRUGO, NULL, &pfm_proc_fops); 6625 if (perfmon_dir == NULL) { 6626 printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n"); 6627 pmu_conf = NULL; 6628 return -1; 6629 } 6630 6631 /* 6632 * create /proc/sys/kernel/perfmon (for debugging purposes) 6633 */ 6634 pfm_sysctl_header = register_sysctl_table(pfm_sysctl_root); 6635 6636 /* 6637 * initialize all our spinlocks 6638 */ 6639 spin_lock_init(&pfm_sessions.pfs_lock); 6640 spin_lock_init(&pfm_buffer_fmt_lock); 6641 6642 init_pfm_fs(); 6643 6644 for(i=0; i < NR_CPUS; i++) pfm_stats[i].pfm_ovfl_intr_cycles_min = ~0UL; 6645 6646 return 0; 6647} 6648 6649__initcall(pfm_init); 6650 6651/* 6652 * this function is called before pfm_init() 6653 */ 6654void 6655pfm_init_percpu (void) 6656{ 6657 static int first_time=1; 6658 /* 6659 * make sure no measurement is active 6660 * (may inherit programmed PMCs from EFI). 6661 */ 6662 pfm_clear_psr_pp(); 6663 pfm_clear_psr_up(); 6664 6665 /* 6666 * we run with the PMU not frozen at all times 6667 */ 6668 pfm_unfreeze_pmu(); 6669 6670 if (first_time) { 6671 register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction); 6672 first_time=0; 6673 } 6674 6675 ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR); 6676 ia64_srlz_d(); 6677} 6678 6679/* 6680 * used for debug purposes only 6681 */ 6682void 6683dump_pmu_state(const char *from) 6684{ 6685 struct task_struct *task; 6686 struct pt_regs *regs; 6687 pfm_context_t *ctx; 6688 unsigned long psr, dcr, info, flags; 6689 int i, this_cpu; 6690 6691 local_irq_save(flags); 6692 6693 this_cpu = smp_processor_id(); 6694 regs = task_pt_regs(current); 6695 info = PFM_CPUINFO_GET(); 6696 dcr = ia64_getreg(_IA64_REG_CR_DCR); 6697 6698 if (info == 0 && ia64_psr(regs)->pp == 0 && (dcr & IA64_DCR_PP) == 0) { 6699 local_irq_restore(flags); 6700 return; 6701 } 6702 6703 printk("CPU%d from %s() current [%d] iip=0x%lx %s\n", 6704 this_cpu, 6705 from, 6706 task_pid_nr(current), 6707 regs->cr_iip, 6708 current->comm); 6709 6710 task = GET_PMU_OWNER(); 6711 ctx = GET_PMU_CTX(); 6712 6713 printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task_pid_nr(task) : -1, ctx); 6714 6715 psr = pfm_get_psr(); 6716 6717 printk("->CPU%d pmc0=0x%lx psr.pp=%d psr.up=%d dcr.pp=%d syst_info=0x%lx user_psr.up=%d user_psr.pp=%d\n", 6718 this_cpu, 6719 ia64_get_pmc(0), 6720 psr & IA64_PSR_PP ? 1 : 0, 6721 psr & IA64_PSR_UP ? 1 : 0, 6722 dcr & IA64_DCR_PP ? 1 : 0, 6723 info, 6724 ia64_psr(regs)->up, 6725 ia64_psr(regs)->pp); 6726 6727 ia64_psr(regs)->up = 0; 6728 ia64_psr(regs)->pp = 0; 6729 6730 for (i=1; PMC_IS_LAST(i) == 0; i++) { 6731 if (PMC_IS_IMPL(i) == 0) continue; 6732 printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, ctx->th_pmcs[i]); 6733 } 6734 6735 for (i=1; PMD_IS_LAST(i) == 0; i++) { 6736 if (PMD_IS_IMPL(i) == 0) continue; 6737 printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, ctx->th_pmds[i]); 6738 } 6739 6740 if (ctx) { 6741 printk("->CPU%d ctx_state=%d vaddr=%p addr=%p fd=%d ctx_task=[%d] saved_psr_up=0x%lx\n", 6742 this_cpu, 6743 ctx->ctx_state, 6744 ctx->ctx_smpl_vaddr, 6745 ctx->ctx_smpl_hdr, 6746 ctx->ctx_msgq_head, 6747 ctx->ctx_msgq_tail, 6748 ctx->ctx_saved_psr_up); 6749 } 6750 local_irq_restore(flags); 6751} 6752 6753/* 6754 * called from process.c:copy_thread(). task is new child. 6755 */ 6756void 6757pfm_inherit(struct task_struct *task, struct pt_regs *regs) 6758{ 6759 struct thread_struct *thread; 6760 6761 DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task_pid_nr(task))); 6762 6763 thread = &task->thread; 6764 6765 /* 6766 * cut links inherited from parent (current) 6767 */ 6768 thread->pfm_context = NULL; 6769 6770 PFM_SET_WORK_PENDING(task, 0); 6771 6772 /* 6773 * the psr bits are already set properly in copy_threads() 6774 */ 6775} 6776#else /* !CONFIG_PERFMON */ 6777asmlinkage long 6778sys_perfmonctl (int fd, int cmd, void *arg, int count) 6779{ 6780 return -ENOSYS; 6781} 6782#endif /* CONFIG_PERFMON */ 6783