This source file includes following definitions.
- remote_function
- task_function_call
- cpu_function_call
- __get_cpu_context
- perf_ctx_lock
- perf_ctx_unlock
- is_kernel_event
- event_function
- event_function_call
- event_function_local
- update_perf_cpu_limits
- perf_proc_update_handler
- perf_cpu_time_max_percent_handler
- perf_duration_warn
- perf_sample_event_took
- perf_event_print_debug
- perf_pmu_name
- perf_clock
- perf_event_clock
- __perf_effective_state
- __perf_update_times
- perf_event_update_time
- perf_event_update_sibling_time
- perf_event_set_state
- perf_cgroup_match
- perf_detach_cgroup
- is_cgroup_event
- perf_cgroup_event_time
- __update_cgrp_time
- update_cgrp_time_from_cpuctx
- update_cgrp_time_from_event
- perf_cgroup_set_timestamp
- perf_cgroup_switch
- perf_cgroup_sched_out
- perf_cgroup_sched_in
- perf_cgroup_connect
- perf_cgroup_set_shadow_time
- list_update_cgroup_event
- perf_cgroup_match
- perf_detach_cgroup
- is_cgroup_event
- update_cgrp_time_from_event
- update_cgrp_time_from_cpuctx
- perf_cgroup_sched_out
- perf_cgroup_sched_in
- perf_cgroup_connect
- perf_cgroup_set_timestamp
- perf_cgroup_switch
- perf_cgroup_set_shadow_time
- perf_cgroup_event_time
- list_update_cgroup_event
- perf_mux_hrtimer_handler
- __perf_mux_hrtimer_init
- perf_mux_hrtimer_restart
- perf_pmu_disable
- perf_pmu_enable
- perf_event_ctx_activate
- perf_event_ctx_deactivate
- get_ctx
- free_ctx
- put_ctx
- perf_event_ctx_lock_nested
- perf_event_ctx_lock
- perf_event_ctx_unlock
- unclone_ctx
- perf_event_pid_type
- perf_event_pid
- perf_event_tid
- primary_event_id
- perf_lock_task_context
- perf_pin_task_context
- perf_unpin_context
- update_context_time
- perf_event_time
- get_event_type
- init_event_group
- get_event_groups
- perf_event_groups_init
- perf_event_groups_less
- perf_event_groups_insert
- add_event_to_groups
- perf_event_groups_delete
- del_event_from_groups
- perf_event_groups_first
- perf_event_groups_next
- list_add_event
- perf_event__state_init
- __perf_event_read_size
- __perf_event_header_size
- perf_event__header_size
- perf_event__id_header_size
- perf_event_validate_size
- perf_group_attach
- list_del_event
- perf_aux_output_match
- perf_put_aux_event
- perf_get_aux_event
- perf_group_detach
- is_orphaned_event
- __pmu_filter_match
- pmu_filter_match
- event_filter_match
- event_sched_out
- group_sched_out
- __perf_remove_from_context
- perf_remove_from_context
- __perf_event_disable
- _perf_event_disable
- perf_event_disable_local
- perf_event_disable
- perf_event_disable_inatomic
- perf_set_shadow_time
- event_sched_in
- group_sched_in
- group_can_go_on
- add_event_to_ctx
- task_ctx_sched_out
- perf_event_sched_in
- ctx_resched
- perf_pmu_resched
- __perf_install_in_context
- perf_install_in_context
- __perf_event_enable
- _perf_event_enable
- perf_event_enable
- __perf_event_stop
- perf_event_stop
- perf_event_addr_filters_sync
- _perf_event_refresh
- perf_event_refresh
- perf_event_modify_breakpoint
- perf_event_modify_attr
- ctx_sched_out
- context_equiv
- __perf_event_sync_stat
- perf_event_sync_stat
- perf_event_context_sched_out
- perf_sched_cb_dec
- perf_sched_cb_inc
- perf_pmu_sched_task
- __perf_event_task_sched_out
- cpu_ctx_sched_out
- visit_groups_merge
- pinned_sched_in
- flexible_sched_in
- ctx_pinned_sched_in
- ctx_flexible_sched_in
- ctx_sched_in
- cpu_ctx_sched_in
- perf_event_context_sched_in
- __perf_event_task_sched_in
- perf_calculate_period
- perf_adjust_period
- perf_adjust_freq_unthr_context
- rotate_ctx
- ctx_event_to_rotate
- perf_rotate_context
- perf_event_task_tick
- event_enable_on_exec
- perf_event_enable_on_exec
- __perf_event_read_cpu
- __perf_event_read
- perf_event_count
- perf_event_read_local
- perf_event_read
- __perf_event_init_context
- alloc_perf_context
- find_lively_task_by_vpid
- find_get_context
- free_event_rcu
- detach_sb_event
- is_sb_event
- unaccount_pmu_sb_event
- unaccount_event_cpu
- unaccount_freq_event_nohz
- unaccount_freq_event
- unaccount_event
- perf_sched_delayed
- exclusive_event_init
- exclusive_event_destroy
- exclusive_event_match
- exclusive_event_installable
- _free_event
- free_event
- perf_remove_from_owner
- put_event
- perf_event_release_kernel
- perf_release
- __perf_event_read_value
- perf_event_read_value
- __perf_read_group_add
- perf_read_group
- perf_read_one
- is_event_hup
- __perf_read
- perf_read
- perf_poll
- _perf_event_reset
- perf_event_for_each_child
- perf_event_for_each
- __perf_event_period
- perf_event_check_period
- perf_event_period
- perf_fget_light
- _perf_ioctl
- perf_ioctl
- perf_compat_ioctl
- perf_event_task_enable
- perf_event_task_disable
- perf_event_index
- calc_timer_values
- perf_event_init_userpage
- arch_perf_update_userpage
- perf_event_update_userpage
- perf_mmap_fault
- ring_buffer_attach
- ring_buffer_wakeup
- ring_buffer_get
- ring_buffer_put
- perf_mmap_open
- perf_mmap_close
- perf_mmap
- perf_fasync
- perf_event_fasync
- perf_event_wakeup
- perf_pending_event_disable
- perf_pending_event
- perf_register_guest_info_callbacks
- perf_unregister_guest_info_callbacks
- perf_output_sample_regs
- perf_sample_regs_user
- perf_sample_regs_intr
- perf_ustack_task_size
- perf_sample_ustack_size
- perf_output_sample_ustack
- __perf_event_header__init_id
- perf_event_header__init_id
- __perf_event__output_id_sample
- perf_event__output_id_sample
- perf_output_read_one
- perf_output_read_group
- perf_output_read
- perf_output_sample
- perf_virt_to_phys
- perf_callchain
- perf_prepare_sample
- __perf_event_output
- perf_event_output_forward
- perf_event_output_backward
- perf_event_output
- perf_event_read_event
- perf_iterate_ctx
- perf_iterate_sb_cpu
- perf_iterate_sb
- perf_event_addr_filters_exec
- perf_event_exec
- __perf_event_output_stop
- __perf_pmu_output_stop
- perf_pmu_output_stop
- perf_event_task_match
- perf_event_task_output
- perf_event_task
- perf_event_fork
- perf_event_comm_match
- perf_event_comm_output
- perf_event_comm_event
- perf_event_comm
- perf_event_namespaces_match
- perf_event_namespaces_output
- perf_fill_ns_link_info
- perf_event_namespaces
- perf_event_mmap_match
- perf_event_mmap_output
- perf_event_mmap_event
- perf_addr_filter_match
- perf_addr_filter_vma_adjust
- __perf_addr_filters_adjust
- perf_addr_filters_adjust
- perf_event_mmap
- perf_event_aux_event
- perf_log_lost_samples
- perf_event_switch_match
- perf_event_switch_output
- perf_event_switch
- perf_log_throttle
- perf_event_ksymbol_match
- perf_event_ksymbol_output
- perf_event_ksymbol
- perf_event_bpf_match
- perf_event_bpf_output
- perf_event_bpf_emit_ksymbols
- perf_event_bpf_event
- perf_event_itrace_started
- perf_log_itrace_start
- __perf_event_account_interrupt
- perf_event_account_interrupt
- __perf_event_overflow
- perf_event_overflow
- perf_swevent_set_period
- perf_swevent_overflow
- perf_swevent_event
- perf_exclude_event
- perf_swevent_match
- swevent_hash
- __find_swevent_head
- find_swevent_head_rcu
- find_swevent_head
- do_perf_sw_event
- perf_swevent_get_recursion_context
- perf_swevent_put_recursion_context
- ___perf_sw_event
- __perf_sw_event
- perf_swevent_read
- perf_swevent_add
- perf_swevent_del
- perf_swevent_start
- perf_swevent_stop
- swevent_hlist_deref
- swevent_hlist_release
- swevent_hlist_put_cpu
- swevent_hlist_put
- swevent_hlist_get_cpu
- swevent_hlist_get
- sw_perf_event_destroy
- perf_swevent_init
- perf_tp_filter_match
- perf_tp_event_match
- perf_trace_run_bpf_submit
- perf_tp_event
- tp_perf_event_destroy
- perf_tp_event_init
- perf_kprobe_event_init
- perf_uprobe_event_init
- perf_tp_register
- perf_event_free_filter
- bpf_overflow_handler
- perf_event_set_bpf_handler
- perf_event_free_bpf_handler
- perf_event_set_bpf_handler
- perf_event_free_bpf_handler
- perf_event_is_tracing
- perf_event_set_bpf_prog
- perf_event_free_bpf_prog
- perf_tp_register
- perf_event_free_filter
- perf_event_set_bpf_prog
- perf_event_free_bpf_prog
- perf_bp_event
- perf_addr_filter_new
- free_filters_list
- perf_addr_filters_splice
- perf_addr_filter_apply
- perf_event_addr_filters_apply
- perf_event_parse_addr_filter
- perf_event_set_addr_filter
- perf_event_set_filter
- perf_swevent_hrtimer
- perf_swevent_start_hrtimer
- perf_swevent_cancel_hrtimer
- perf_swevent_init_hrtimer
- cpu_clock_event_update
- cpu_clock_event_start
- cpu_clock_event_stop
- cpu_clock_event_add
- cpu_clock_event_del
- cpu_clock_event_read
- cpu_clock_event_init
- task_clock_event_update
- task_clock_event_start
- task_clock_event_stop
- task_clock_event_add
- task_clock_event_del
- task_clock_event_read
- task_clock_event_init
- perf_pmu_nop_void
- perf_pmu_nop_txn
- perf_pmu_nop_int
- perf_event_nop_int
- perf_pmu_start_txn
- perf_pmu_commit_txn
- perf_pmu_cancel_txn
- perf_event_idx_default
- find_pmu_context
- free_pmu_context
- nr_addr_filters_show
- type_show
- perf_event_mux_interval_ms_show
- perf_event_mux_interval_ms_store
- pmu_dev_release
- pmu_dev_alloc
- perf_pmu_register
- perf_pmu_unregister
- has_extended_regs
- perf_try_init_event
- perf_init_event
- attach_sb_event
- account_pmu_sb_event
- account_event_cpu
- account_freq_event_nohz
- account_freq_event
- account_event
- perf_event_alloc
- perf_copy_attr
- perf_event_set_output
- mutex_lock_double
- perf_event_set_clock
- __perf_event_ctx_lock_double
- SYSCALL_DEFINE5
- perf_event_create_kernel_counter
- perf_pmu_migrate_context
- sync_child_event
- perf_event_exit_event
- perf_event_exit_task_context
- perf_event_exit_task
- perf_free_event
- perf_event_free_task
- perf_event_delayed_put
- perf_event_get
- perf_get_event
- perf_event_attrs
- inherit_event
- inherit_group
- inherit_task_group
- perf_event_init_context
- perf_event_init_task
- perf_event_init_all_cpus
- perf_swevent_init_cpu
- __perf_event_exit_context
- perf_event_exit_cpu_context
- perf_event_exit_cpu_context
- perf_event_init_cpu
- perf_event_exit_cpu
- perf_reboot
- perf_event_init
- perf_event_sysfs_show
- perf_event_sysfs_init
- perf_cgroup_css_alloc
- perf_cgroup_css_free
- __perf_cgroup_move
- perf_cgroup_attach
1
2
3
4
5
6
7
8
9
10
11 #include <linux/fs.h>
12 #include <linux/mm.h>
13 #include <linux/cpu.h>
14 #include <linux/smp.h>
15 #include <linux/idr.h>
16 #include <linux/file.h>
17 #include <linux/poll.h>
18 #include <linux/slab.h>
19 #include <linux/hash.h>
20 #include <linux/tick.h>
21 #include <linux/sysfs.h>
22 #include <linux/dcache.h>
23 #include <linux/percpu.h>
24 #include <linux/ptrace.h>
25 #include <linux/reboot.h>
26 #include <linux/vmstat.h>
27 #include <linux/device.h>
28 #include <linux/export.h>
29 #include <linux/vmalloc.h>
30 #include <linux/hardirq.h>
31 #include <linux/rculist.h>
32 #include <linux/uaccess.h>
33 #include <linux/syscalls.h>
34 #include <linux/anon_inodes.h>
35 #include <linux/kernel_stat.h>
36 #include <linux/cgroup.h>
37 #include <linux/perf_event.h>
38 #include <linux/trace_events.h>
39 #include <linux/hw_breakpoint.h>
40 #include <linux/mm_types.h>
41 #include <linux/module.h>
42 #include <linux/mman.h>
43 #include <linux/compat.h>
44 #include <linux/bpf.h>
45 #include <linux/filter.h>
46 #include <linux/namei.h>
47 #include <linux/parser.h>
48 #include <linux/sched/clock.h>
49 #include <linux/sched/mm.h>
50 #include <linux/proc_ns.h>
51 #include <linux/mount.h>
52
53 #include "internal.h"
54
55 #include <asm/irq_regs.h>
56
57 typedef int (*remote_function_f)(void *);
58
59 struct remote_function_call {
60 struct task_struct *p;
61 remote_function_f func;
62 void *info;
63 int ret;
64 };
65
66 static void remote_function(void *data)
67 {
68 struct remote_function_call *tfc = data;
69 struct task_struct *p = tfc->p;
70
71 if (p) {
72
73 if (task_cpu(p) != smp_processor_id())
74 return;
75
76
77
78
79
80
81 tfc->ret = -ESRCH;
82 if (p != current)
83 return;
84 }
85
86 tfc->ret = tfc->func(tfc->info);
87 }
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102 static int
103 task_function_call(struct task_struct *p, remote_function_f func, void *info)
104 {
105 struct remote_function_call data = {
106 .p = p,
107 .func = func,
108 .info = info,
109 .ret = -EAGAIN,
110 };
111 int ret;
112
113 for (;;) {
114 ret = smp_call_function_single(task_cpu(p), remote_function,
115 &data, 1);
116 ret = !ret ? data.ret : -EAGAIN;
117
118 if (ret != -EAGAIN)
119 break;
120
121 cond_resched();
122 }
123
124 return ret;
125 }
126
127
128
129
130
131
132
133
134
135
136 static int cpu_function_call(int cpu, remote_function_f func, void *info)
137 {
138 struct remote_function_call data = {
139 .p = NULL,
140 .func = func,
141 .info = info,
142 .ret = -ENXIO,
143 };
144
145 smp_call_function_single(cpu, remote_function, &data, 1);
146
147 return data.ret;
148 }
149
150 static inline struct perf_cpu_context *
151 __get_cpu_context(struct perf_event_context *ctx)
152 {
153 return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
154 }
155
156 static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
157 struct perf_event_context *ctx)
158 {
159 raw_spin_lock(&cpuctx->ctx.lock);
160 if (ctx)
161 raw_spin_lock(&ctx->lock);
162 }
163
164 static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
165 struct perf_event_context *ctx)
166 {
167 if (ctx)
168 raw_spin_unlock(&ctx->lock);
169 raw_spin_unlock(&cpuctx->ctx.lock);
170 }
171
172 #define TASK_TOMBSTONE ((void *)-1L)
173
174 static bool is_kernel_event(struct perf_event *event)
175 {
176 return READ_ONCE(event->owner) == TASK_TOMBSTONE;
177 }
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198 typedef void (*event_f)(struct perf_event *, struct perf_cpu_context *,
199 struct perf_event_context *, void *);
200
201 struct event_function_struct {
202 struct perf_event *event;
203 event_f func;
204 void *data;
205 };
206
207 static int event_function(void *info)
208 {
209 struct event_function_struct *efs = info;
210 struct perf_event *event = efs->event;
211 struct perf_event_context *ctx = event->ctx;
212 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
213 struct perf_event_context *task_ctx = cpuctx->task_ctx;
214 int ret = 0;
215
216 lockdep_assert_irqs_disabled();
217
218 perf_ctx_lock(cpuctx, task_ctx);
219
220
221
222
223 if (ctx->task) {
224 if (ctx->task != current) {
225 ret = -ESRCH;
226 goto unlock;
227 }
228
229
230
231
232
233
234
235
236 WARN_ON_ONCE(!ctx->is_active);
237
238
239
240
241 WARN_ON_ONCE(task_ctx != ctx);
242 } else {
243 WARN_ON_ONCE(&cpuctx->ctx != ctx);
244 }
245
246 efs->func(event, cpuctx, ctx, efs->data);
247 unlock:
248 perf_ctx_unlock(cpuctx, task_ctx);
249
250 return ret;
251 }
252
253 static void event_function_call(struct perf_event *event, event_f func, void *data)
254 {
255 struct perf_event_context *ctx = event->ctx;
256 struct task_struct *task = READ_ONCE(ctx->task);
257 struct event_function_struct efs = {
258 .event = event,
259 .func = func,
260 .data = data,
261 };
262
263 if (!event->parent) {
264
265
266
267
268
269 lockdep_assert_held(&ctx->mutex);
270 }
271
272 if (!task) {
273 cpu_function_call(event->cpu, event_function, &efs);
274 return;
275 }
276
277 if (task == TASK_TOMBSTONE)
278 return;
279
280 again:
281 if (!task_function_call(task, event_function, &efs))
282 return;
283
284 raw_spin_lock_irq(&ctx->lock);
285
286
287
288
289 task = ctx->task;
290 if (task == TASK_TOMBSTONE) {
291 raw_spin_unlock_irq(&ctx->lock);
292 return;
293 }
294 if (ctx->is_active) {
295 raw_spin_unlock_irq(&ctx->lock);
296 goto again;
297 }
298 func(event, NULL, ctx, data);
299 raw_spin_unlock_irq(&ctx->lock);
300 }
301
302
303
304
305
306 static void event_function_local(struct perf_event *event, event_f func, void *data)
307 {
308 struct perf_event_context *ctx = event->ctx;
309 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
310 struct task_struct *task = READ_ONCE(ctx->task);
311 struct perf_event_context *task_ctx = NULL;
312
313 lockdep_assert_irqs_disabled();
314
315 if (task) {
316 if (task == TASK_TOMBSTONE)
317 return;
318
319 task_ctx = ctx;
320 }
321
322 perf_ctx_lock(cpuctx, task_ctx);
323
324 task = ctx->task;
325 if (task == TASK_TOMBSTONE)
326 goto unlock;
327
328 if (task) {
329
330
331
332
333
334 if (ctx->is_active) {
335 if (WARN_ON_ONCE(task != current))
336 goto unlock;
337
338 if (WARN_ON_ONCE(cpuctx->task_ctx != ctx))
339 goto unlock;
340 }
341 } else {
342 WARN_ON_ONCE(&cpuctx->ctx != ctx);
343 }
344
345 func(event, cpuctx, ctx, data);
346 unlock:
347 perf_ctx_unlock(cpuctx, task_ctx);
348 }
349
350 #define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
351 PERF_FLAG_FD_OUTPUT |\
352 PERF_FLAG_PID_CGROUP |\
353 PERF_FLAG_FD_CLOEXEC)
354
355
356
357
358 #define PERF_SAMPLE_BRANCH_PERM_PLM \
359 (PERF_SAMPLE_BRANCH_KERNEL |\
360 PERF_SAMPLE_BRANCH_HV)
361
362 enum event_type_t {
363 EVENT_FLEXIBLE = 0x1,
364 EVENT_PINNED = 0x2,
365 EVENT_TIME = 0x4,
366
367 EVENT_CPU = 0x8,
368 EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
369 };
370
371
372
373
374
375
376 static void perf_sched_delayed(struct work_struct *work);
377 DEFINE_STATIC_KEY_FALSE(perf_sched_events);
378 static DECLARE_DELAYED_WORK(perf_sched_work, perf_sched_delayed);
379 static DEFINE_MUTEX(perf_sched_mutex);
380 static atomic_t perf_sched_count;
381
382 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
383 static DEFINE_PER_CPU(int, perf_sched_cb_usages);
384 static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
385
386 static atomic_t nr_mmap_events __read_mostly;
387 static atomic_t nr_comm_events __read_mostly;
388 static atomic_t nr_namespaces_events __read_mostly;
389 static atomic_t nr_task_events __read_mostly;
390 static atomic_t nr_freq_events __read_mostly;
391 static atomic_t nr_switch_events __read_mostly;
392 static atomic_t nr_ksymbol_events __read_mostly;
393 static atomic_t nr_bpf_events __read_mostly;
394
395 static LIST_HEAD(pmus);
396 static DEFINE_MUTEX(pmus_lock);
397 static struct srcu_struct pmus_srcu;
398 static cpumask_var_t perf_online_mask;
399
400
401
402
403
404
405
406
407 int sysctl_perf_event_paranoid __read_mostly = 2;
408
409
410 int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024);
411
412
413
414
415 #define DEFAULT_MAX_SAMPLE_RATE 100000
416 #define DEFAULT_SAMPLE_PERIOD_NS (NSEC_PER_SEC / DEFAULT_MAX_SAMPLE_RATE)
417 #define DEFAULT_CPU_TIME_MAX_PERCENT 25
418
419 int sysctl_perf_event_sample_rate __read_mostly = DEFAULT_MAX_SAMPLE_RATE;
420
421 static int max_samples_per_tick __read_mostly = DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ);
422 static int perf_sample_period_ns __read_mostly = DEFAULT_SAMPLE_PERIOD_NS;
423
424 static int perf_sample_allowed_ns __read_mostly =
425 DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100;
426
427 static void update_perf_cpu_limits(void)
428 {
429 u64 tmp = perf_sample_period_ns;
430
431 tmp *= sysctl_perf_cpu_time_max_percent;
432 tmp = div_u64(tmp, 100);
433 if (!tmp)
434 tmp = 1;
435
436 WRITE_ONCE(perf_sample_allowed_ns, tmp);
437 }
438
439 static bool perf_rotate_context(struct perf_cpu_context *cpuctx);
440
441 int perf_proc_update_handler(struct ctl_table *table, int write,
442 void __user *buffer, size_t *lenp,
443 loff_t *ppos)
444 {
445 int ret;
446 int perf_cpu = sysctl_perf_cpu_time_max_percent;
447
448
449
450 if (write && (perf_cpu == 100 || perf_cpu == 0))
451 return -EINVAL;
452
453 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
454 if (ret || !write)
455 return ret;
456
457 max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ);
458 perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
459 update_perf_cpu_limits();
460
461 return 0;
462 }
463
464 int sysctl_perf_cpu_time_max_percent __read_mostly = DEFAULT_CPU_TIME_MAX_PERCENT;
465
466 int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
467 void __user *buffer, size_t *lenp,
468 loff_t *ppos)
469 {
470 int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
471
472 if (ret || !write)
473 return ret;
474
475 if (sysctl_perf_cpu_time_max_percent == 100 ||
476 sysctl_perf_cpu_time_max_percent == 0) {
477 printk(KERN_WARNING
478 "perf: Dynamic interrupt throttling disabled, can hang your system!\n");
479 WRITE_ONCE(perf_sample_allowed_ns, 0);
480 } else {
481 update_perf_cpu_limits();
482 }
483
484 return 0;
485 }
486
487
488
489
490
491
492
493 #define NR_ACCUMULATED_SAMPLES 128
494 static DEFINE_PER_CPU(u64, running_sample_length);
495
496 static u64 __report_avg;
497 static u64 __report_allowed;
498
499 static void perf_duration_warn(struct irq_work *w)
500 {
501 printk_ratelimited(KERN_INFO
502 "perf: interrupt took too long (%lld > %lld), lowering "
503 "kernel.perf_event_max_sample_rate to %d\n",
504 __report_avg, __report_allowed,
505 sysctl_perf_event_sample_rate);
506 }
507
508 static DEFINE_IRQ_WORK(perf_duration_work, perf_duration_warn);
509
510 void perf_sample_event_took(u64 sample_len_ns)
511 {
512 u64 max_len = READ_ONCE(perf_sample_allowed_ns);
513 u64 running_len;
514 u64 avg_len;
515 u32 max;
516
517 if (max_len == 0)
518 return;
519
520
521 running_len = __this_cpu_read(running_sample_length);
522 running_len -= running_len/NR_ACCUMULATED_SAMPLES;
523 running_len += sample_len_ns;
524 __this_cpu_write(running_sample_length, running_len);
525
526
527
528
529
530
531 avg_len = running_len/NR_ACCUMULATED_SAMPLES;
532 if (avg_len <= max_len)
533 return;
534
535 __report_avg = avg_len;
536 __report_allowed = max_len;
537
538
539
540
541 avg_len += avg_len / 4;
542 max = (TICK_NSEC / 100) * sysctl_perf_cpu_time_max_percent;
543 if (avg_len < max)
544 max /= (u32)avg_len;
545 else
546 max = 1;
547
548 WRITE_ONCE(perf_sample_allowed_ns, avg_len);
549 WRITE_ONCE(max_samples_per_tick, max);
550
551 sysctl_perf_event_sample_rate = max * HZ;
552 perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
553
554 if (!irq_work_queue(&perf_duration_work)) {
555 early_printk("perf: interrupt took too long (%lld > %lld), lowering "
556 "kernel.perf_event_max_sample_rate to %d\n",
557 __report_avg, __report_allowed,
558 sysctl_perf_event_sample_rate);
559 }
560 }
561
562 static atomic64_t perf_event_id;
563
564 static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
565 enum event_type_t event_type);
566
567 static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
568 enum event_type_t event_type,
569 struct task_struct *task);
570
571 static void update_context_time(struct perf_event_context *ctx);
572 static u64 perf_event_time(struct perf_event *event);
573
574 void __weak perf_event_print_debug(void) { }
575
576 extern __weak const char *perf_pmu_name(void)
577 {
578 return "pmu";
579 }
580
581 static inline u64 perf_clock(void)
582 {
583 return local_clock();
584 }
585
586 static inline u64 perf_event_clock(struct perf_event *event)
587 {
588 return event->clock();
589 }
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613 static __always_inline enum perf_event_state
614 __perf_effective_state(struct perf_event *event)
615 {
616 struct perf_event *leader = event->group_leader;
617
618 if (leader->state <= PERF_EVENT_STATE_OFF)
619 return leader->state;
620
621 return event->state;
622 }
623
624 static __always_inline void
625 __perf_update_times(struct perf_event *event, u64 now, u64 *enabled, u64 *running)
626 {
627 enum perf_event_state state = __perf_effective_state(event);
628 u64 delta = now - event->tstamp;
629
630 *enabled = event->total_time_enabled;
631 if (state >= PERF_EVENT_STATE_INACTIVE)
632 *enabled += delta;
633
634 *running = event->total_time_running;
635 if (state >= PERF_EVENT_STATE_ACTIVE)
636 *running += delta;
637 }
638
639 static void perf_event_update_time(struct perf_event *event)
640 {
641 u64 now = perf_event_time(event);
642
643 __perf_update_times(event, now, &event->total_time_enabled,
644 &event->total_time_running);
645 event->tstamp = now;
646 }
647
648 static void perf_event_update_sibling_time(struct perf_event *leader)
649 {
650 struct perf_event *sibling;
651
652 for_each_sibling_event(sibling, leader)
653 perf_event_update_time(sibling);
654 }
655
656 static void
657 perf_event_set_state(struct perf_event *event, enum perf_event_state state)
658 {
659 if (event->state == state)
660 return;
661
662 perf_event_update_time(event);
663
664
665
666
667 if ((event->state < 0) ^ (state < 0))
668 perf_event_update_sibling_time(event);
669
670 WRITE_ONCE(event->state, state);
671 }
672
673 #ifdef CONFIG_CGROUP_PERF
674
675 static inline bool
676 perf_cgroup_match(struct perf_event *event)
677 {
678 struct perf_event_context *ctx = event->ctx;
679 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
680
681
682 if (!event->cgrp)
683 return true;
684
685
686 if (!cpuctx->cgrp)
687 return false;
688
689
690
691
692
693
694
695 return cgroup_is_descendant(cpuctx->cgrp->css.cgroup,
696 event->cgrp->css.cgroup);
697 }
698
699 static inline void perf_detach_cgroup(struct perf_event *event)
700 {
701 css_put(&event->cgrp->css);
702 event->cgrp = NULL;
703 }
704
705 static inline int is_cgroup_event(struct perf_event *event)
706 {
707 return event->cgrp != NULL;
708 }
709
710 static inline u64 perf_cgroup_event_time(struct perf_event *event)
711 {
712 struct perf_cgroup_info *t;
713
714 t = per_cpu_ptr(event->cgrp->info, event->cpu);
715 return t->time;
716 }
717
718 static inline void __update_cgrp_time(struct perf_cgroup *cgrp)
719 {
720 struct perf_cgroup_info *info;
721 u64 now;
722
723 now = perf_clock();
724
725 info = this_cpu_ptr(cgrp->info);
726
727 info->time += now - info->timestamp;
728 info->timestamp = now;
729 }
730
731 static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
732 {
733 struct perf_cgroup *cgrp = cpuctx->cgrp;
734 struct cgroup_subsys_state *css;
735
736 if (cgrp) {
737 for (css = &cgrp->css; css; css = css->parent) {
738 cgrp = container_of(css, struct perf_cgroup, css);
739 __update_cgrp_time(cgrp);
740 }
741 }
742 }
743
744 static inline void update_cgrp_time_from_event(struct perf_event *event)
745 {
746 struct perf_cgroup *cgrp;
747
748
749
750
751
752 if (!is_cgroup_event(event))
753 return;
754
755 cgrp = perf_cgroup_from_task(current, event->ctx);
756
757
758
759 if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
760 __update_cgrp_time(event->cgrp);
761 }
762
763 static inline void
764 perf_cgroup_set_timestamp(struct task_struct *task,
765 struct perf_event_context *ctx)
766 {
767 struct perf_cgroup *cgrp;
768 struct perf_cgroup_info *info;
769 struct cgroup_subsys_state *css;
770
771
772
773
774
775
776 if (!task || !ctx->nr_cgroups)
777 return;
778
779 cgrp = perf_cgroup_from_task(task, ctx);
780
781 for (css = &cgrp->css; css; css = css->parent) {
782 cgrp = container_of(css, struct perf_cgroup, css);
783 info = this_cpu_ptr(cgrp->info);
784 info->timestamp = ctx->timestamp;
785 }
786 }
787
788 static DEFINE_PER_CPU(struct list_head, cgrp_cpuctx_list);
789
790 #define PERF_CGROUP_SWOUT 0x1
791 #define PERF_CGROUP_SWIN 0x2
792
793
794
795
796
797
798
799 static void perf_cgroup_switch(struct task_struct *task, int mode)
800 {
801 struct perf_cpu_context *cpuctx;
802 struct list_head *list;
803 unsigned long flags;
804
805
806
807
808
809 local_irq_save(flags);
810
811 list = this_cpu_ptr(&cgrp_cpuctx_list);
812 list_for_each_entry(cpuctx, list, cgrp_cpuctx_entry) {
813 WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0);
814
815 perf_ctx_lock(cpuctx, cpuctx->task_ctx);
816 perf_pmu_disable(cpuctx->ctx.pmu);
817
818 if (mode & PERF_CGROUP_SWOUT) {
819 cpu_ctx_sched_out(cpuctx, EVENT_ALL);
820
821
822
823
824 cpuctx->cgrp = NULL;
825 }
826
827 if (mode & PERF_CGROUP_SWIN) {
828 WARN_ON_ONCE(cpuctx->cgrp);
829
830
831
832
833
834
835
836 cpuctx->cgrp = perf_cgroup_from_task(task,
837 &cpuctx->ctx);
838 cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
839 }
840 perf_pmu_enable(cpuctx->ctx.pmu);
841 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
842 }
843
844 local_irq_restore(flags);
845 }
846
847 static inline void perf_cgroup_sched_out(struct task_struct *task,
848 struct task_struct *next)
849 {
850 struct perf_cgroup *cgrp1;
851 struct perf_cgroup *cgrp2 = NULL;
852
853 rcu_read_lock();
854
855
856
857
858
859 cgrp1 = perf_cgroup_from_task(task, NULL);
860 cgrp2 = perf_cgroup_from_task(next, NULL);
861
862
863
864
865
866
867 if (cgrp1 != cgrp2)
868 perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
869
870 rcu_read_unlock();
871 }
872
873 static inline void perf_cgroup_sched_in(struct task_struct *prev,
874 struct task_struct *task)
875 {
876 struct perf_cgroup *cgrp1;
877 struct perf_cgroup *cgrp2 = NULL;
878
879 rcu_read_lock();
880
881
882
883
884
885 cgrp1 = perf_cgroup_from_task(task, NULL);
886 cgrp2 = perf_cgroup_from_task(prev, NULL);
887
888
889
890
891
892
893 if (cgrp1 != cgrp2)
894 perf_cgroup_switch(task, PERF_CGROUP_SWIN);
895
896 rcu_read_unlock();
897 }
898
899 static inline int perf_cgroup_connect(int fd, struct perf_event *event,
900 struct perf_event_attr *attr,
901 struct perf_event *group_leader)
902 {
903 struct perf_cgroup *cgrp;
904 struct cgroup_subsys_state *css;
905 struct fd f = fdget(fd);
906 int ret = 0;
907
908 if (!f.file)
909 return -EBADF;
910
911 css = css_tryget_online_from_dir(f.file->f_path.dentry,
912 &perf_event_cgrp_subsys);
913 if (IS_ERR(css)) {
914 ret = PTR_ERR(css);
915 goto out;
916 }
917
918 cgrp = container_of(css, struct perf_cgroup, css);
919 event->cgrp = cgrp;
920
921
922
923
924
925
926 if (group_leader && group_leader->cgrp != cgrp) {
927 perf_detach_cgroup(event);
928 ret = -EINVAL;
929 }
930 out:
931 fdput(f);
932 return ret;
933 }
934
935 static inline void
936 perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
937 {
938 struct perf_cgroup_info *t;
939 t = per_cpu_ptr(event->cgrp->info, event->cpu);
940 event->shadow_ctx_time = now - t->timestamp;
941 }
942
943
944
945
946
947 static inline void
948 list_update_cgroup_event(struct perf_event *event,
949 struct perf_event_context *ctx, bool add)
950 {
951 struct perf_cpu_context *cpuctx;
952 struct list_head *cpuctx_entry;
953
954 if (!is_cgroup_event(event))
955 return;
956
957
958
959
960
961 cpuctx = __get_cpu_context(ctx);
962
963
964
965
966
967
968
969 if (add && !cpuctx->cgrp) {
970 struct perf_cgroup *cgrp = perf_cgroup_from_task(current, ctx);
971
972 if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
973 cpuctx->cgrp = cgrp;
974 }
975
976 if (add && ctx->nr_cgroups++)
977 return;
978 else if (!add && --ctx->nr_cgroups)
979 return;
980
981
982 if (!add)
983 cpuctx->cgrp = NULL;
984
985 cpuctx_entry = &cpuctx->cgrp_cpuctx_entry;
986 if (add)
987 list_add(cpuctx_entry, this_cpu_ptr(&cgrp_cpuctx_list));
988 else
989 list_del(cpuctx_entry);
990 }
991
992 #else
993
994 static inline bool
995 perf_cgroup_match(struct perf_event *event)
996 {
997 return true;
998 }
999
1000 static inline void perf_detach_cgroup(struct perf_event *event)
1001 {}
1002
1003 static inline int is_cgroup_event(struct perf_event *event)
1004 {
1005 return 0;
1006 }
1007
1008 static inline void update_cgrp_time_from_event(struct perf_event *event)
1009 {
1010 }
1011
1012 static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
1013 {
1014 }
1015
1016 static inline void perf_cgroup_sched_out(struct task_struct *task,
1017 struct task_struct *next)
1018 {
1019 }
1020
1021 static inline void perf_cgroup_sched_in(struct task_struct *prev,
1022 struct task_struct *task)
1023 {
1024 }
1025
1026 static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event,
1027 struct perf_event_attr *attr,
1028 struct perf_event *group_leader)
1029 {
1030 return -EINVAL;
1031 }
1032
1033 static inline void
1034 perf_cgroup_set_timestamp(struct task_struct *task,
1035 struct perf_event_context *ctx)
1036 {
1037 }
1038
1039 static inline void
1040 perf_cgroup_switch(struct task_struct *task, struct task_struct *next)
1041 {
1042 }
1043
1044 static inline void
1045 perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
1046 {
1047 }
1048
1049 static inline u64 perf_cgroup_event_time(struct perf_event *event)
1050 {
1051 return 0;
1052 }
1053
1054 static inline void
1055 list_update_cgroup_event(struct perf_event *event,
1056 struct perf_event_context *ctx, bool add)
1057 {
1058 }
1059
1060 #endif
1061
1062
1063
1064
1065
1066 #define PERF_CPU_HRTIMER (1000 / HZ)
1067
1068
1069
1070 static enum hrtimer_restart perf_mux_hrtimer_handler(struct hrtimer *hr)
1071 {
1072 struct perf_cpu_context *cpuctx;
1073 bool rotations;
1074
1075 lockdep_assert_irqs_disabled();
1076
1077 cpuctx = container_of(hr, struct perf_cpu_context, hrtimer);
1078 rotations = perf_rotate_context(cpuctx);
1079
1080 raw_spin_lock(&cpuctx->hrtimer_lock);
1081 if (rotations)
1082 hrtimer_forward_now(hr, cpuctx->hrtimer_interval);
1083 else
1084 cpuctx->hrtimer_active = 0;
1085 raw_spin_unlock(&cpuctx->hrtimer_lock);
1086
1087 return rotations ? HRTIMER_RESTART : HRTIMER_NORESTART;
1088 }
1089
1090 static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
1091 {
1092 struct hrtimer *timer = &cpuctx->hrtimer;
1093 struct pmu *pmu = cpuctx->ctx.pmu;
1094 u64 interval;
1095
1096
1097 if (pmu->task_ctx_nr == perf_sw_context)
1098 return;
1099
1100
1101
1102
1103
1104 interval = pmu->hrtimer_interval_ms;
1105 if (interval < 1)
1106 interval = pmu->hrtimer_interval_ms = PERF_CPU_HRTIMER;
1107
1108 cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval);
1109
1110 raw_spin_lock_init(&cpuctx->hrtimer_lock);
1111 hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
1112 timer->function = perf_mux_hrtimer_handler;
1113 }
1114
1115 static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
1116 {
1117 struct hrtimer *timer = &cpuctx->hrtimer;
1118 struct pmu *pmu = cpuctx->ctx.pmu;
1119 unsigned long flags;
1120
1121
1122 if (pmu->task_ctx_nr == perf_sw_context)
1123 return 0;
1124
1125 raw_spin_lock_irqsave(&cpuctx->hrtimer_lock, flags);
1126 if (!cpuctx->hrtimer_active) {
1127 cpuctx->hrtimer_active = 1;
1128 hrtimer_forward_now(timer, cpuctx->hrtimer_interval);
1129 hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD);
1130 }
1131 raw_spin_unlock_irqrestore(&cpuctx->hrtimer_lock, flags);
1132
1133 return 0;
1134 }
1135
1136 void perf_pmu_disable(struct pmu *pmu)
1137 {
1138 int *count = this_cpu_ptr(pmu->pmu_disable_count);
1139 if (!(*count)++)
1140 pmu->pmu_disable(pmu);
1141 }
1142
1143 void perf_pmu_enable(struct pmu *pmu)
1144 {
1145 int *count = this_cpu_ptr(pmu->pmu_disable_count);
1146 if (!--(*count))
1147 pmu->pmu_enable(pmu);
1148 }
1149
1150 static DEFINE_PER_CPU(struct list_head, active_ctx_list);
1151
1152
1153
1154
1155
1156
1157
1158 static void perf_event_ctx_activate(struct perf_event_context *ctx)
1159 {
1160 struct list_head *head = this_cpu_ptr(&active_ctx_list);
1161
1162 lockdep_assert_irqs_disabled();
1163
1164 WARN_ON(!list_empty(&ctx->active_ctx_list));
1165
1166 list_add(&ctx->active_ctx_list, head);
1167 }
1168
1169 static void perf_event_ctx_deactivate(struct perf_event_context *ctx)
1170 {
1171 lockdep_assert_irqs_disabled();
1172
1173 WARN_ON(list_empty(&ctx->active_ctx_list));
1174
1175 list_del_init(&ctx->active_ctx_list);
1176 }
1177
1178 static void get_ctx(struct perf_event_context *ctx)
1179 {
1180 refcount_inc(&ctx->refcount);
1181 }
1182
1183 static void free_ctx(struct rcu_head *head)
1184 {
1185 struct perf_event_context *ctx;
1186
1187 ctx = container_of(head, struct perf_event_context, rcu_head);
1188 kfree(ctx->task_ctx_data);
1189 kfree(ctx);
1190 }
1191
1192 static void put_ctx(struct perf_event_context *ctx)
1193 {
1194 if (refcount_dec_and_test(&ctx->refcount)) {
1195 if (ctx->parent_ctx)
1196 put_ctx(ctx->parent_ctx);
1197 if (ctx->task && ctx->task != TASK_TOMBSTONE)
1198 put_task_struct(ctx->task);
1199 call_rcu(&ctx->rcu_head, free_ctx);
1200 }
1201 }
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269 static struct perf_event_context *
1270 perf_event_ctx_lock_nested(struct perf_event *event, int nesting)
1271 {
1272 struct perf_event_context *ctx;
1273
1274 again:
1275 rcu_read_lock();
1276 ctx = READ_ONCE(event->ctx);
1277 if (!refcount_inc_not_zero(&ctx->refcount)) {
1278 rcu_read_unlock();
1279 goto again;
1280 }
1281 rcu_read_unlock();
1282
1283 mutex_lock_nested(&ctx->mutex, nesting);
1284 if (event->ctx != ctx) {
1285 mutex_unlock(&ctx->mutex);
1286 put_ctx(ctx);
1287 goto again;
1288 }
1289
1290 return ctx;
1291 }
1292
1293 static inline struct perf_event_context *
1294 perf_event_ctx_lock(struct perf_event *event)
1295 {
1296 return perf_event_ctx_lock_nested(event, 0);
1297 }
1298
1299 static void perf_event_ctx_unlock(struct perf_event *event,
1300 struct perf_event_context *ctx)
1301 {
1302 mutex_unlock(&ctx->mutex);
1303 put_ctx(ctx);
1304 }
1305
1306
1307
1308
1309
1310
1311 static __must_check struct perf_event_context *
1312 unclone_ctx(struct perf_event_context *ctx)
1313 {
1314 struct perf_event_context *parent_ctx = ctx->parent_ctx;
1315
1316 lockdep_assert_held(&ctx->lock);
1317
1318 if (parent_ctx)
1319 ctx->parent_ctx = NULL;
1320 ctx->generation++;
1321
1322 return parent_ctx;
1323 }
1324
1325 static u32 perf_event_pid_type(struct perf_event *event, struct task_struct *p,
1326 enum pid_type type)
1327 {
1328 u32 nr;
1329
1330
1331
1332 if (event->parent)
1333 event = event->parent;
1334
1335 nr = __task_pid_nr_ns(p, type, event->ns);
1336
1337 if (!nr && !pid_alive(p))
1338 nr = -1;
1339 return nr;
1340 }
1341
1342 static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
1343 {
1344 return perf_event_pid_type(event, p, PIDTYPE_TGID);
1345 }
1346
1347 static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
1348 {
1349 return perf_event_pid_type(event, p, PIDTYPE_PID);
1350 }
1351
1352
1353
1354
1355
1356 static u64 primary_event_id(struct perf_event *event)
1357 {
1358 u64 id = event->id;
1359
1360 if (event->parent)
1361 id = event->parent->id;
1362
1363 return id;
1364 }
1365
1366
1367
1368
1369
1370
1371
1372 static struct perf_event_context *
1373 perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags)
1374 {
1375 struct perf_event_context *ctx;
1376
1377 retry:
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387 local_irq_save(*flags);
1388 rcu_read_lock();
1389 ctx = rcu_dereference(task->perf_event_ctxp[ctxn]);
1390 if (ctx) {
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401 raw_spin_lock(&ctx->lock);
1402 if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) {
1403 raw_spin_unlock(&ctx->lock);
1404 rcu_read_unlock();
1405 local_irq_restore(*flags);
1406 goto retry;
1407 }
1408
1409 if (ctx->task == TASK_TOMBSTONE ||
1410 !refcount_inc_not_zero(&ctx->refcount)) {
1411 raw_spin_unlock(&ctx->lock);
1412 ctx = NULL;
1413 } else {
1414 WARN_ON_ONCE(ctx->task != task);
1415 }
1416 }
1417 rcu_read_unlock();
1418 if (!ctx)
1419 local_irq_restore(*flags);
1420 return ctx;
1421 }
1422
1423
1424
1425
1426
1427
1428 static struct perf_event_context *
1429 perf_pin_task_context(struct task_struct *task, int ctxn)
1430 {
1431 struct perf_event_context *ctx;
1432 unsigned long flags;
1433
1434 ctx = perf_lock_task_context(task, ctxn, &flags);
1435 if (ctx) {
1436 ++ctx->pin_count;
1437 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1438 }
1439 return ctx;
1440 }
1441
1442 static void perf_unpin_context(struct perf_event_context *ctx)
1443 {
1444 unsigned long flags;
1445
1446 raw_spin_lock_irqsave(&ctx->lock, flags);
1447 --ctx->pin_count;
1448 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1449 }
1450
1451
1452
1453
1454 static void update_context_time(struct perf_event_context *ctx)
1455 {
1456 u64 now = perf_clock();
1457
1458 ctx->time += now - ctx->timestamp;
1459 ctx->timestamp = now;
1460 }
1461
1462 static u64 perf_event_time(struct perf_event *event)
1463 {
1464 struct perf_event_context *ctx = event->ctx;
1465
1466 if (is_cgroup_event(event))
1467 return perf_cgroup_event_time(event);
1468
1469 return ctx ? ctx->time : 0;
1470 }
1471
1472 static enum event_type_t get_event_type(struct perf_event *event)
1473 {
1474 struct perf_event_context *ctx = event->ctx;
1475 enum event_type_t event_type;
1476
1477 lockdep_assert_held(&ctx->lock);
1478
1479
1480
1481
1482
1483 if (event->group_leader != event)
1484 event = event->group_leader;
1485
1486 event_type = event->attr.pinned ? EVENT_PINNED : EVENT_FLEXIBLE;
1487 if (!ctx->task)
1488 event_type |= EVENT_CPU;
1489
1490 return event_type;
1491 }
1492
1493
1494
1495
1496 static void init_event_group(struct perf_event *event)
1497 {
1498 RB_CLEAR_NODE(&event->group_node);
1499 event->group_index = 0;
1500 }
1501
1502
1503
1504
1505
1506 static struct perf_event_groups *
1507 get_event_groups(struct perf_event *event, struct perf_event_context *ctx)
1508 {
1509 if (event->attr.pinned)
1510 return &ctx->pinned_groups;
1511 else
1512 return &ctx->flexible_groups;
1513 }
1514
1515
1516
1517
1518 static void perf_event_groups_init(struct perf_event_groups *groups)
1519 {
1520 groups->tree = RB_ROOT;
1521 groups->index = 0;
1522 }
1523
1524
1525
1526
1527
1528
1529
1530 static bool
1531 perf_event_groups_less(struct perf_event *left, struct perf_event *right)
1532 {
1533 if (left->cpu < right->cpu)
1534 return true;
1535 if (left->cpu > right->cpu)
1536 return false;
1537
1538 if (left->group_index < right->group_index)
1539 return true;
1540 if (left->group_index > right->group_index)
1541 return false;
1542
1543 return false;
1544 }
1545
1546
1547
1548
1549
1550
1551 static void
1552 perf_event_groups_insert(struct perf_event_groups *groups,
1553 struct perf_event *event)
1554 {
1555 struct perf_event *node_event;
1556 struct rb_node *parent;
1557 struct rb_node **node;
1558
1559 event->group_index = ++groups->index;
1560
1561 node = &groups->tree.rb_node;
1562 parent = *node;
1563
1564 while (*node) {
1565 parent = *node;
1566 node_event = container_of(*node, struct perf_event, group_node);
1567
1568 if (perf_event_groups_less(event, node_event))
1569 node = &parent->rb_left;
1570 else
1571 node = &parent->rb_right;
1572 }
1573
1574 rb_link_node(&event->group_node, parent, node);
1575 rb_insert_color(&event->group_node, &groups->tree);
1576 }
1577
1578
1579
1580
1581 static void
1582 add_event_to_groups(struct perf_event *event, struct perf_event_context *ctx)
1583 {
1584 struct perf_event_groups *groups;
1585
1586 groups = get_event_groups(event, ctx);
1587 perf_event_groups_insert(groups, event);
1588 }
1589
1590
1591
1592
1593 static void
1594 perf_event_groups_delete(struct perf_event_groups *groups,
1595 struct perf_event *event)
1596 {
1597 WARN_ON_ONCE(RB_EMPTY_NODE(&event->group_node) ||
1598 RB_EMPTY_ROOT(&groups->tree));
1599
1600 rb_erase(&event->group_node, &groups->tree);
1601 init_event_group(event);
1602 }
1603
1604
1605
1606
1607 static void
1608 del_event_from_groups(struct perf_event *event, struct perf_event_context *ctx)
1609 {
1610 struct perf_event_groups *groups;
1611
1612 groups = get_event_groups(event, ctx);
1613 perf_event_groups_delete(groups, event);
1614 }
1615
1616
1617
1618
1619 static struct perf_event *
1620 perf_event_groups_first(struct perf_event_groups *groups, int cpu)
1621 {
1622 struct perf_event *node_event = NULL, *match = NULL;
1623 struct rb_node *node = groups->tree.rb_node;
1624
1625 while (node) {
1626 node_event = container_of(node, struct perf_event, group_node);
1627
1628 if (cpu < node_event->cpu) {
1629 node = node->rb_left;
1630 } else if (cpu > node_event->cpu) {
1631 node = node->rb_right;
1632 } else {
1633 match = node_event;
1634 node = node->rb_left;
1635 }
1636 }
1637
1638 return match;
1639 }
1640
1641
1642
1643
1644 static struct perf_event *
1645 perf_event_groups_next(struct perf_event *event)
1646 {
1647 struct perf_event *next;
1648
1649 next = rb_entry_safe(rb_next(&event->group_node), typeof(*event), group_node);
1650 if (next && next->cpu == event->cpu)
1651 return next;
1652
1653 return NULL;
1654 }
1655
1656
1657
1658
1659 #define perf_event_groups_for_each(event, groups) \
1660 for (event = rb_entry_safe(rb_first(&((groups)->tree)), \
1661 typeof(*event), group_node); event; \
1662 event = rb_entry_safe(rb_next(&event->group_node), \
1663 typeof(*event), group_node))
1664
1665
1666
1667
1668
1669 static void
1670 list_add_event(struct perf_event *event, struct perf_event_context *ctx)
1671 {
1672 lockdep_assert_held(&ctx->lock);
1673
1674 WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
1675 event->attach_state |= PERF_ATTACH_CONTEXT;
1676
1677 event->tstamp = perf_event_time(event);
1678
1679
1680
1681
1682
1683
1684 if (event->group_leader == event) {
1685 event->group_caps = event->event_caps;
1686 add_event_to_groups(event, ctx);
1687 }
1688
1689 list_update_cgroup_event(event, ctx, true);
1690
1691 list_add_rcu(&event->event_entry, &ctx->event_list);
1692 ctx->nr_events++;
1693 if (event->attr.inherit_stat)
1694 ctx->nr_stat++;
1695
1696 ctx->generation++;
1697 }
1698
1699
1700
1701
1702 static inline void perf_event__state_init(struct perf_event *event)
1703 {
1704 event->state = event->attr.disabled ? PERF_EVENT_STATE_OFF :
1705 PERF_EVENT_STATE_INACTIVE;
1706 }
1707
1708 static void __perf_event_read_size(struct perf_event *event, int nr_siblings)
1709 {
1710 int entry = sizeof(u64);
1711 int size = 0;
1712 int nr = 1;
1713
1714 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1715 size += sizeof(u64);
1716
1717 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1718 size += sizeof(u64);
1719
1720 if (event->attr.read_format & PERF_FORMAT_ID)
1721 entry += sizeof(u64);
1722
1723 if (event->attr.read_format & PERF_FORMAT_GROUP) {
1724 nr += nr_siblings;
1725 size += sizeof(u64);
1726 }
1727
1728 size += entry * nr;
1729 event->read_size = size;
1730 }
1731
1732 static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
1733 {
1734 struct perf_sample_data *data;
1735 u16 size = 0;
1736
1737 if (sample_type & PERF_SAMPLE_IP)
1738 size += sizeof(data->ip);
1739
1740 if (sample_type & PERF_SAMPLE_ADDR)
1741 size += sizeof(data->addr);
1742
1743 if (sample_type & PERF_SAMPLE_PERIOD)
1744 size += sizeof(data->period);
1745
1746 if (sample_type & PERF_SAMPLE_WEIGHT)
1747 size += sizeof(data->weight);
1748
1749 if (sample_type & PERF_SAMPLE_READ)
1750 size += event->read_size;
1751
1752 if (sample_type & PERF_SAMPLE_DATA_SRC)
1753 size += sizeof(data->data_src.val);
1754
1755 if (sample_type & PERF_SAMPLE_TRANSACTION)
1756 size += sizeof(data->txn);
1757
1758 if (sample_type & PERF_SAMPLE_PHYS_ADDR)
1759 size += sizeof(data->phys_addr);
1760
1761 event->header_size = size;
1762 }
1763
1764
1765
1766
1767
1768 static void perf_event__header_size(struct perf_event *event)
1769 {
1770 __perf_event_read_size(event,
1771 event->group_leader->nr_siblings);
1772 __perf_event_header_size(event, event->attr.sample_type);
1773 }
1774
1775 static void perf_event__id_header_size(struct perf_event *event)
1776 {
1777 struct perf_sample_data *data;
1778 u64 sample_type = event->attr.sample_type;
1779 u16 size = 0;
1780
1781 if (sample_type & PERF_SAMPLE_TID)
1782 size += sizeof(data->tid_entry);
1783
1784 if (sample_type & PERF_SAMPLE_TIME)
1785 size += sizeof(data->time);
1786
1787 if (sample_type & PERF_SAMPLE_IDENTIFIER)
1788 size += sizeof(data->id);
1789
1790 if (sample_type & PERF_SAMPLE_ID)
1791 size += sizeof(data->id);
1792
1793 if (sample_type & PERF_SAMPLE_STREAM_ID)
1794 size += sizeof(data->stream_id);
1795
1796 if (sample_type & PERF_SAMPLE_CPU)
1797 size += sizeof(data->cpu_entry);
1798
1799 event->id_header_size = size;
1800 }
1801
1802 static bool perf_event_validate_size(struct perf_event *event)
1803 {
1804
1805
1806
1807
1808 __perf_event_read_size(event, event->group_leader->nr_siblings + 1);
1809 __perf_event_header_size(event, event->attr.sample_type & ~PERF_SAMPLE_READ);
1810 perf_event__id_header_size(event);
1811
1812
1813
1814
1815
1816 if (event->read_size + event->header_size +
1817 event->id_header_size + sizeof(struct perf_event_header) >= 16*1024)
1818 return false;
1819
1820 return true;
1821 }
1822
1823 static void perf_group_attach(struct perf_event *event)
1824 {
1825 struct perf_event *group_leader = event->group_leader, *pos;
1826
1827 lockdep_assert_held(&event->ctx->lock);
1828
1829
1830
1831
1832 if (event->attach_state & PERF_ATTACH_GROUP)
1833 return;
1834
1835 event->attach_state |= PERF_ATTACH_GROUP;
1836
1837 if (group_leader == event)
1838 return;
1839
1840 WARN_ON_ONCE(group_leader->ctx != event->ctx);
1841
1842 group_leader->group_caps &= event->event_caps;
1843
1844 list_add_tail(&event->sibling_list, &group_leader->sibling_list);
1845 group_leader->nr_siblings++;
1846
1847 perf_event__header_size(group_leader);
1848
1849 for_each_sibling_event(pos, group_leader)
1850 perf_event__header_size(pos);
1851 }
1852
1853
1854
1855
1856
1857 static void
1858 list_del_event(struct perf_event *event, struct perf_event_context *ctx)
1859 {
1860 WARN_ON_ONCE(event->ctx != ctx);
1861 lockdep_assert_held(&ctx->lock);
1862
1863
1864
1865
1866 if (!(event->attach_state & PERF_ATTACH_CONTEXT))
1867 return;
1868
1869 event->attach_state &= ~PERF_ATTACH_CONTEXT;
1870
1871 list_update_cgroup_event(event, ctx, false);
1872
1873 ctx->nr_events--;
1874 if (event->attr.inherit_stat)
1875 ctx->nr_stat--;
1876
1877 list_del_rcu(&event->event_entry);
1878
1879 if (event->group_leader == event)
1880 del_event_from_groups(event, ctx);
1881
1882
1883
1884
1885
1886
1887
1888
1889 if (event->state > PERF_EVENT_STATE_OFF)
1890 perf_event_set_state(event, PERF_EVENT_STATE_OFF);
1891
1892 ctx->generation++;
1893 }
1894
1895 static int
1896 perf_aux_output_match(struct perf_event *event, struct perf_event *aux_event)
1897 {
1898 if (!has_aux(aux_event))
1899 return 0;
1900
1901 if (!event->pmu->aux_output_match)
1902 return 0;
1903
1904 return event->pmu->aux_output_match(aux_event);
1905 }
1906
1907 static void put_event(struct perf_event *event);
1908 static void event_sched_out(struct perf_event *event,
1909 struct perf_cpu_context *cpuctx,
1910 struct perf_event_context *ctx);
1911
1912 static void perf_put_aux_event(struct perf_event *event)
1913 {
1914 struct perf_event_context *ctx = event->ctx;
1915 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
1916 struct perf_event *iter;
1917
1918
1919
1920
1921 if (event->aux_event) {
1922 iter = event->aux_event;
1923 event->aux_event = NULL;
1924 put_event(iter);
1925 return;
1926 }
1927
1928
1929
1930
1931
1932 for_each_sibling_event(iter, event->group_leader) {
1933 if (iter->aux_event != event)
1934 continue;
1935
1936 iter->aux_event = NULL;
1937 put_event(event);
1938
1939
1940
1941
1942
1943
1944 event_sched_out(iter, cpuctx, ctx);
1945 perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
1946 }
1947 }
1948
1949 static int perf_get_aux_event(struct perf_event *event,
1950 struct perf_event *group_leader)
1951 {
1952
1953
1954
1955
1956
1957
1958 if (!group_leader)
1959 return 0;
1960
1961 if (!perf_aux_output_match(event, group_leader))
1962 return 0;
1963
1964 if (!atomic_long_inc_not_zero(&group_leader->refcount))
1965 return 0;
1966
1967
1968
1969
1970
1971
1972
1973 event->aux_event = group_leader;
1974
1975 return 1;
1976 }
1977
1978 static void perf_group_detach(struct perf_event *event)
1979 {
1980 struct perf_event *sibling, *tmp;
1981 struct perf_event_context *ctx = event->ctx;
1982
1983 lockdep_assert_held(&ctx->lock);
1984
1985
1986
1987
1988 if (!(event->attach_state & PERF_ATTACH_GROUP))
1989 return;
1990
1991 event->attach_state &= ~PERF_ATTACH_GROUP;
1992
1993 perf_put_aux_event(event);
1994
1995
1996
1997
1998 if (event->group_leader != event) {
1999 list_del_init(&event->sibling_list);
2000 event->group_leader->nr_siblings--;
2001 goto out;
2002 }
2003
2004
2005
2006
2007
2008
2009 list_for_each_entry_safe(sibling, tmp, &event->sibling_list, sibling_list) {
2010
2011 sibling->group_leader = sibling;
2012 list_del_init(&sibling->sibling_list);
2013
2014
2015 sibling->group_caps = event->group_caps;
2016
2017 if (!RB_EMPTY_NODE(&event->group_node)) {
2018 add_event_to_groups(sibling, event->ctx);
2019
2020 if (sibling->state == PERF_EVENT_STATE_ACTIVE) {
2021 struct list_head *list = sibling->attr.pinned ?
2022 &ctx->pinned_active : &ctx->flexible_active;
2023
2024 list_add_tail(&sibling->active_list, list);
2025 }
2026 }
2027
2028 WARN_ON_ONCE(sibling->ctx != event->ctx);
2029 }
2030
2031 out:
2032 perf_event__header_size(event->group_leader);
2033
2034 for_each_sibling_event(tmp, event->group_leader)
2035 perf_event__header_size(tmp);
2036 }
2037
2038 static bool is_orphaned_event(struct perf_event *event)
2039 {
2040 return event->state == PERF_EVENT_STATE_DEAD;
2041 }
2042
2043 static inline int __pmu_filter_match(struct perf_event *event)
2044 {
2045 struct pmu *pmu = event->pmu;
2046 return pmu->filter_match ? pmu->filter_match(event) : 1;
2047 }
2048
2049
2050
2051
2052
2053
2054
2055 static inline int pmu_filter_match(struct perf_event *event)
2056 {
2057 struct perf_event *sibling;
2058
2059 if (!__pmu_filter_match(event))
2060 return 0;
2061
2062 for_each_sibling_event(sibling, event) {
2063 if (!__pmu_filter_match(sibling))
2064 return 0;
2065 }
2066
2067 return 1;
2068 }
2069
2070 static inline int
2071 event_filter_match(struct perf_event *event)
2072 {
2073 return (event->cpu == -1 || event->cpu == smp_processor_id()) &&
2074 perf_cgroup_match(event) && pmu_filter_match(event);
2075 }
2076
2077 static void
2078 event_sched_out(struct perf_event *event,
2079 struct perf_cpu_context *cpuctx,
2080 struct perf_event_context *ctx)
2081 {
2082 enum perf_event_state state = PERF_EVENT_STATE_INACTIVE;
2083
2084 WARN_ON_ONCE(event->ctx != ctx);
2085 lockdep_assert_held(&ctx->lock);
2086
2087 if (event->state != PERF_EVENT_STATE_ACTIVE)
2088 return;
2089
2090
2091
2092
2093
2094
2095 list_del_init(&event->active_list);
2096
2097 perf_pmu_disable(event->pmu);
2098
2099 event->pmu->del(event, 0);
2100 event->oncpu = -1;
2101
2102 if (READ_ONCE(event->pending_disable) >= 0) {
2103 WRITE_ONCE(event->pending_disable, -1);
2104 state = PERF_EVENT_STATE_OFF;
2105 }
2106 perf_event_set_state(event, state);
2107
2108 if (!is_software_event(event))
2109 cpuctx->active_oncpu--;
2110 if (!--ctx->nr_active)
2111 perf_event_ctx_deactivate(ctx);
2112 if (event->attr.freq && event->attr.sample_freq)
2113 ctx->nr_freq--;
2114 if (event->attr.exclusive || !cpuctx->active_oncpu)
2115 cpuctx->exclusive = 0;
2116
2117 perf_pmu_enable(event->pmu);
2118 }
2119
2120 static void
2121 group_sched_out(struct perf_event *group_event,
2122 struct perf_cpu_context *cpuctx,
2123 struct perf_event_context *ctx)
2124 {
2125 struct perf_event *event;
2126
2127 if (group_event->state != PERF_EVENT_STATE_ACTIVE)
2128 return;
2129
2130 perf_pmu_disable(ctx->pmu);
2131
2132 event_sched_out(group_event, cpuctx, ctx);
2133
2134
2135
2136
2137 for_each_sibling_event(event, group_event)
2138 event_sched_out(event, cpuctx, ctx);
2139
2140 perf_pmu_enable(ctx->pmu);
2141
2142 if (group_event->attr.exclusive)
2143 cpuctx->exclusive = 0;
2144 }
2145
2146 #define DETACH_GROUP 0x01UL
2147
2148
2149
2150
2151
2152
2153
2154 static void
2155 __perf_remove_from_context(struct perf_event *event,
2156 struct perf_cpu_context *cpuctx,
2157 struct perf_event_context *ctx,
2158 void *info)
2159 {
2160 unsigned long flags = (unsigned long)info;
2161
2162 if (ctx->is_active & EVENT_TIME) {
2163 update_context_time(ctx);
2164 update_cgrp_time_from_cpuctx(cpuctx);
2165 }
2166
2167 event_sched_out(event, cpuctx, ctx);
2168 if (flags & DETACH_GROUP)
2169 perf_group_detach(event);
2170 list_del_event(event, ctx);
2171
2172 if (!ctx->nr_events && ctx->is_active) {
2173 ctx->is_active = 0;
2174 if (ctx->task) {
2175 WARN_ON_ONCE(cpuctx->task_ctx != ctx);
2176 cpuctx->task_ctx = NULL;
2177 }
2178 }
2179 }
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191 static void perf_remove_from_context(struct perf_event *event, unsigned long flags)
2192 {
2193 struct perf_event_context *ctx = event->ctx;
2194
2195 lockdep_assert_held(&ctx->mutex);
2196
2197 event_function_call(event, __perf_remove_from_context, (void *)flags);
2198
2199
2200
2201
2202
2203
2204
2205 WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
2206 if ((flags & DETACH_GROUP) &&
2207 (event->attach_state & PERF_ATTACH_GROUP)) {
2208
2209
2210
2211
2212 raw_spin_lock_irq(&ctx->lock);
2213 perf_group_detach(event);
2214 raw_spin_unlock_irq(&ctx->lock);
2215 }
2216 }
2217
2218
2219
2220
2221 static void __perf_event_disable(struct perf_event *event,
2222 struct perf_cpu_context *cpuctx,
2223 struct perf_event_context *ctx,
2224 void *info)
2225 {
2226 if (event->state < PERF_EVENT_STATE_INACTIVE)
2227 return;
2228
2229 if (ctx->is_active & EVENT_TIME) {
2230 update_context_time(ctx);
2231 update_cgrp_time_from_event(event);
2232 }
2233
2234 if (event == event->group_leader)
2235 group_sched_out(event, cpuctx, ctx);
2236 else
2237 event_sched_out(event, cpuctx, ctx);
2238
2239 perf_event_set_state(event, PERF_EVENT_STATE_OFF);
2240 }
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256 static void _perf_event_disable(struct perf_event *event)
2257 {
2258 struct perf_event_context *ctx = event->ctx;
2259
2260 raw_spin_lock_irq(&ctx->lock);
2261 if (event->state <= PERF_EVENT_STATE_OFF) {
2262 raw_spin_unlock_irq(&ctx->lock);
2263 return;
2264 }
2265 raw_spin_unlock_irq(&ctx->lock);
2266
2267 event_function_call(event, __perf_event_disable, NULL);
2268 }
2269
2270 void perf_event_disable_local(struct perf_event *event)
2271 {
2272 event_function_local(event, __perf_event_disable, NULL);
2273 }
2274
2275
2276
2277
2278
2279 void perf_event_disable(struct perf_event *event)
2280 {
2281 struct perf_event_context *ctx;
2282
2283 ctx = perf_event_ctx_lock(event);
2284 _perf_event_disable(event);
2285 perf_event_ctx_unlock(event, ctx);
2286 }
2287 EXPORT_SYMBOL_GPL(perf_event_disable);
2288
2289 void perf_event_disable_inatomic(struct perf_event *event)
2290 {
2291 WRITE_ONCE(event->pending_disable, smp_processor_id());
2292
2293 irq_work_queue(&event->pending);
2294 }
2295
2296 static void perf_set_shadow_time(struct perf_event *event,
2297 struct perf_event_context *ctx)
2298 {
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324 if (is_cgroup_event(event))
2325 perf_cgroup_set_shadow_time(event, event->tstamp);
2326 else
2327 event->shadow_ctx_time = event->tstamp - ctx->timestamp;
2328 }
2329
2330 #define MAX_INTERRUPTS (~0ULL)
2331
2332 static void perf_log_throttle(struct perf_event *event, int enable);
2333 static void perf_log_itrace_start(struct perf_event *event);
2334
2335 static int
2336 event_sched_in(struct perf_event *event,
2337 struct perf_cpu_context *cpuctx,
2338 struct perf_event_context *ctx)
2339 {
2340 int ret = 0;
2341
2342 lockdep_assert_held(&ctx->lock);
2343
2344 if (event->state <= PERF_EVENT_STATE_OFF)
2345 return 0;
2346
2347 WRITE_ONCE(event->oncpu, smp_processor_id());
2348
2349
2350
2351
2352
2353 smp_wmb();
2354 perf_event_set_state(event, PERF_EVENT_STATE_ACTIVE);
2355
2356
2357
2358
2359
2360
2361 if (unlikely(event->hw.interrupts == MAX_INTERRUPTS)) {
2362 perf_log_throttle(event, 1);
2363 event->hw.interrupts = 0;
2364 }
2365
2366 perf_pmu_disable(event->pmu);
2367
2368 perf_set_shadow_time(event, ctx);
2369
2370 perf_log_itrace_start(event);
2371
2372 if (event->pmu->add(event, PERF_EF_START)) {
2373 perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
2374 event->oncpu = -1;
2375 ret = -EAGAIN;
2376 goto out;
2377 }
2378
2379 if (!is_software_event(event))
2380 cpuctx->active_oncpu++;
2381 if (!ctx->nr_active++)
2382 perf_event_ctx_activate(ctx);
2383 if (event->attr.freq && event->attr.sample_freq)
2384 ctx->nr_freq++;
2385
2386 if (event->attr.exclusive)
2387 cpuctx->exclusive = 1;
2388
2389 out:
2390 perf_pmu_enable(event->pmu);
2391
2392 return ret;
2393 }
2394
2395 static int
2396 group_sched_in(struct perf_event *group_event,
2397 struct perf_cpu_context *cpuctx,
2398 struct perf_event_context *ctx)
2399 {
2400 struct perf_event *event, *partial_group = NULL;
2401 struct pmu *pmu = ctx->pmu;
2402
2403 if (group_event->state == PERF_EVENT_STATE_OFF)
2404 return 0;
2405
2406 pmu->start_txn(pmu, PERF_PMU_TXN_ADD);
2407
2408 if (event_sched_in(group_event, cpuctx, ctx)) {
2409 pmu->cancel_txn(pmu);
2410 perf_mux_hrtimer_restart(cpuctx);
2411 return -EAGAIN;
2412 }
2413
2414
2415
2416
2417 for_each_sibling_event(event, group_event) {
2418 if (event_sched_in(event, cpuctx, ctx)) {
2419 partial_group = event;
2420 goto group_error;
2421 }
2422 }
2423
2424 if (!pmu->commit_txn(pmu))
2425 return 0;
2426
2427 group_error:
2428
2429
2430
2431
2432
2433 for_each_sibling_event(event, group_event) {
2434 if (event == partial_group)
2435 break;
2436
2437 event_sched_out(event, cpuctx, ctx);
2438 }
2439 event_sched_out(group_event, cpuctx, ctx);
2440
2441 pmu->cancel_txn(pmu);
2442
2443 perf_mux_hrtimer_restart(cpuctx);
2444
2445 return -EAGAIN;
2446 }
2447
2448
2449
2450
2451 static int group_can_go_on(struct perf_event *event,
2452 struct perf_cpu_context *cpuctx,
2453 int can_add_hw)
2454 {
2455
2456
2457
2458 if (event->group_caps & PERF_EV_CAP_SOFTWARE)
2459 return 1;
2460
2461
2462
2463
2464 if (cpuctx->exclusive)
2465 return 0;
2466
2467
2468
2469
2470 if (event->attr.exclusive && cpuctx->active_oncpu)
2471 return 0;
2472
2473
2474
2475
2476 return can_add_hw;
2477 }
2478
2479 static void add_event_to_ctx(struct perf_event *event,
2480 struct perf_event_context *ctx)
2481 {
2482 list_add_event(event, ctx);
2483 perf_group_attach(event);
2484 }
2485
2486 static void ctx_sched_out(struct perf_event_context *ctx,
2487 struct perf_cpu_context *cpuctx,
2488 enum event_type_t event_type);
2489 static void
2490 ctx_sched_in(struct perf_event_context *ctx,
2491 struct perf_cpu_context *cpuctx,
2492 enum event_type_t event_type,
2493 struct task_struct *task);
2494
2495 static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
2496 struct perf_event_context *ctx,
2497 enum event_type_t event_type)
2498 {
2499 if (!cpuctx->task_ctx)
2500 return;
2501
2502 if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
2503 return;
2504
2505 ctx_sched_out(ctx, cpuctx, event_type);
2506 }
2507
2508 static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
2509 struct perf_event_context *ctx,
2510 struct task_struct *task)
2511 {
2512 cpu_ctx_sched_in(cpuctx, EVENT_PINNED, task);
2513 if (ctx)
2514 ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task);
2515 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task);
2516 if (ctx)
2517 ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
2518 }
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535 static void ctx_resched(struct perf_cpu_context *cpuctx,
2536 struct perf_event_context *task_ctx,
2537 enum event_type_t event_type)
2538 {
2539 enum event_type_t ctx_event_type;
2540 bool cpu_event = !!(event_type & EVENT_CPU);
2541
2542
2543
2544
2545
2546 if (event_type & EVENT_PINNED)
2547 event_type |= EVENT_FLEXIBLE;
2548
2549 ctx_event_type = event_type & EVENT_ALL;
2550
2551 perf_pmu_disable(cpuctx->ctx.pmu);
2552 if (task_ctx)
2553 task_ctx_sched_out(cpuctx, task_ctx, event_type);
2554
2555
2556
2557
2558
2559
2560
2561
2562 if (cpu_event)
2563 cpu_ctx_sched_out(cpuctx, ctx_event_type);
2564 else if (ctx_event_type & EVENT_PINNED)
2565 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
2566
2567 perf_event_sched_in(cpuctx, task_ctx, current);
2568 perf_pmu_enable(cpuctx->ctx.pmu);
2569 }
2570
2571 void perf_pmu_resched(struct pmu *pmu)
2572 {
2573 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
2574 struct perf_event_context *task_ctx = cpuctx->task_ctx;
2575
2576 perf_ctx_lock(cpuctx, task_ctx);
2577 ctx_resched(cpuctx, task_ctx, EVENT_ALL|EVENT_CPU);
2578 perf_ctx_unlock(cpuctx, task_ctx);
2579 }
2580
2581
2582
2583
2584
2585
2586
2587 static int __perf_install_in_context(void *info)
2588 {
2589 struct perf_event *event = info;
2590 struct perf_event_context *ctx = event->ctx;
2591 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
2592 struct perf_event_context *task_ctx = cpuctx->task_ctx;
2593 bool reprogram = true;
2594 int ret = 0;
2595
2596 raw_spin_lock(&cpuctx->ctx.lock);
2597 if (ctx->task) {
2598 raw_spin_lock(&ctx->lock);
2599 task_ctx = ctx;
2600
2601 reprogram = (ctx->task == current);
2602
2603
2604
2605
2606
2607
2608
2609
2610 if (task_curr(ctx->task) && !reprogram) {
2611 ret = -ESRCH;
2612 goto unlock;
2613 }
2614
2615 WARN_ON_ONCE(reprogram && cpuctx->task_ctx && cpuctx->task_ctx != ctx);
2616 } else if (task_ctx) {
2617 raw_spin_lock(&task_ctx->lock);
2618 }
2619
2620 #ifdef CONFIG_CGROUP_PERF
2621 if (is_cgroup_event(event)) {
2622
2623
2624
2625
2626 struct perf_cgroup *cgrp = perf_cgroup_from_task(current, ctx);
2627 reprogram = cgroup_is_descendant(cgrp->css.cgroup,
2628 event->cgrp->css.cgroup);
2629 }
2630 #endif
2631
2632 if (reprogram) {
2633 ctx_sched_out(ctx, cpuctx, EVENT_TIME);
2634 add_event_to_ctx(event, ctx);
2635 ctx_resched(cpuctx, task_ctx, get_event_type(event));
2636 } else {
2637 add_event_to_ctx(event, ctx);
2638 }
2639
2640 unlock:
2641 perf_ctx_unlock(cpuctx, task_ctx);
2642
2643 return ret;
2644 }
2645
2646 static bool exclusive_event_installable(struct perf_event *event,
2647 struct perf_event_context *ctx);
2648
2649
2650
2651
2652
2653
2654 static void
2655 perf_install_in_context(struct perf_event_context *ctx,
2656 struct perf_event *event,
2657 int cpu)
2658 {
2659 struct task_struct *task = READ_ONCE(ctx->task);
2660
2661 lockdep_assert_held(&ctx->mutex);
2662
2663 WARN_ON_ONCE(!exclusive_event_installable(event, ctx));
2664
2665 if (event->cpu != -1)
2666 event->cpu = cpu;
2667
2668
2669
2670
2671
2672 smp_store_release(&event->ctx, ctx);
2673
2674 if (!task) {
2675 cpu_function_call(cpu, __perf_install_in_context, event);
2676 return;
2677 }
2678
2679
2680
2681
2682 if (WARN_ON_ONCE(task == TASK_TOMBSTONE))
2683 return;
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715 smp_mb();
2716 again:
2717 if (!task_function_call(task, __perf_install_in_context, event))
2718 return;
2719
2720 raw_spin_lock_irq(&ctx->lock);
2721 task = ctx->task;
2722 if (WARN_ON_ONCE(task == TASK_TOMBSTONE)) {
2723
2724
2725
2726
2727
2728 raw_spin_unlock_irq(&ctx->lock);
2729 return;
2730 }
2731
2732
2733
2734
2735 if (task_curr(task)) {
2736 raw_spin_unlock_irq(&ctx->lock);
2737 goto again;
2738 }
2739 add_event_to_ctx(event, ctx);
2740 raw_spin_unlock_irq(&ctx->lock);
2741 }
2742
2743
2744
2745
2746 static void __perf_event_enable(struct perf_event *event,
2747 struct perf_cpu_context *cpuctx,
2748 struct perf_event_context *ctx,
2749 void *info)
2750 {
2751 struct perf_event *leader = event->group_leader;
2752 struct perf_event_context *task_ctx;
2753
2754 if (event->state >= PERF_EVENT_STATE_INACTIVE ||
2755 event->state <= PERF_EVENT_STATE_ERROR)
2756 return;
2757
2758 if (ctx->is_active)
2759 ctx_sched_out(ctx, cpuctx, EVENT_TIME);
2760
2761 perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
2762
2763 if (!ctx->is_active)
2764 return;
2765
2766 if (!event_filter_match(event)) {
2767 ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
2768 return;
2769 }
2770
2771
2772
2773
2774
2775 if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) {
2776 ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
2777 return;
2778 }
2779
2780 task_ctx = cpuctx->task_ctx;
2781 if (ctx->task)
2782 WARN_ON_ONCE(task_ctx != ctx);
2783
2784 ctx_resched(cpuctx, task_ctx, get_event_type(event));
2785 }
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796 static void _perf_event_enable(struct perf_event *event)
2797 {
2798 struct perf_event_context *ctx = event->ctx;
2799
2800 raw_spin_lock_irq(&ctx->lock);
2801 if (event->state >= PERF_EVENT_STATE_INACTIVE ||
2802 event->state < PERF_EVENT_STATE_ERROR) {
2803 raw_spin_unlock_irq(&ctx->lock);
2804 return;
2805 }
2806
2807
2808
2809
2810
2811
2812
2813
2814 if (event->state == PERF_EVENT_STATE_ERROR)
2815 event->state = PERF_EVENT_STATE_OFF;
2816 raw_spin_unlock_irq(&ctx->lock);
2817
2818 event_function_call(event, __perf_event_enable, NULL);
2819 }
2820
2821
2822
2823
2824 void perf_event_enable(struct perf_event *event)
2825 {
2826 struct perf_event_context *ctx;
2827
2828 ctx = perf_event_ctx_lock(event);
2829 _perf_event_enable(event);
2830 perf_event_ctx_unlock(event, ctx);
2831 }
2832 EXPORT_SYMBOL_GPL(perf_event_enable);
2833
2834 struct stop_event_data {
2835 struct perf_event *event;
2836 unsigned int restart;
2837 };
2838
2839 static int __perf_event_stop(void *info)
2840 {
2841 struct stop_event_data *sd = info;
2842 struct perf_event *event = sd->event;
2843
2844
2845 if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE)
2846 return 0;
2847
2848
2849 smp_rmb();
2850
2851
2852
2853
2854
2855 if (READ_ONCE(event->oncpu) != smp_processor_id())
2856 return -EAGAIN;
2857
2858 event->pmu->stop(event, PERF_EF_UPDATE);
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869 if (sd->restart)
2870 event->pmu->start(event, 0);
2871
2872 return 0;
2873 }
2874
2875 static int perf_event_stop(struct perf_event *event, int restart)
2876 {
2877 struct stop_event_data sd = {
2878 .event = event,
2879 .restart = restart,
2880 };
2881 int ret = 0;
2882
2883 do {
2884 if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE)
2885 return 0;
2886
2887
2888 smp_rmb();
2889
2890
2891
2892
2893
2894
2895 ret = cpu_function_call(READ_ONCE(event->oncpu),
2896 __perf_event_stop, &sd);
2897 } while (ret == -EAGAIN);
2898
2899 return ret;
2900 }
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924 void perf_event_addr_filters_sync(struct perf_event *event)
2925 {
2926 struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
2927
2928 if (!has_addr_filter(event))
2929 return;
2930
2931 raw_spin_lock(&ifh->lock);
2932 if (event->addr_filters_gen != event->hw.addr_filters_gen) {
2933 event->pmu->addr_filters_sync(event);
2934 event->hw.addr_filters_gen = event->addr_filters_gen;
2935 }
2936 raw_spin_unlock(&ifh->lock);
2937 }
2938 EXPORT_SYMBOL_GPL(perf_event_addr_filters_sync);
2939
2940 static int _perf_event_refresh(struct perf_event *event, int refresh)
2941 {
2942
2943
2944
2945 if (event->attr.inherit || !is_sampling_event(event))
2946 return -EINVAL;
2947
2948 atomic_add(refresh, &event->event_limit);
2949 _perf_event_enable(event);
2950
2951 return 0;
2952 }
2953
2954
2955
2956
2957 int perf_event_refresh(struct perf_event *event, int refresh)
2958 {
2959 struct perf_event_context *ctx;
2960 int ret;
2961
2962 ctx = perf_event_ctx_lock(event);
2963 ret = _perf_event_refresh(event, refresh);
2964 perf_event_ctx_unlock(event, ctx);
2965
2966 return ret;
2967 }
2968 EXPORT_SYMBOL_GPL(perf_event_refresh);
2969
2970 static int perf_event_modify_breakpoint(struct perf_event *bp,
2971 struct perf_event_attr *attr)
2972 {
2973 int err;
2974
2975 _perf_event_disable(bp);
2976
2977 err = modify_user_hw_breakpoint_check(bp, attr, true);
2978
2979 if (!bp->attr.disabled)
2980 _perf_event_enable(bp);
2981
2982 return err;
2983 }
2984
2985 static int perf_event_modify_attr(struct perf_event *event,
2986 struct perf_event_attr *attr)
2987 {
2988 if (event->attr.type != attr->type)
2989 return -EINVAL;
2990
2991 switch (event->attr.type) {
2992 case PERF_TYPE_BREAKPOINT:
2993 return perf_event_modify_breakpoint(event, attr);
2994 default:
2995
2996 return -EOPNOTSUPP;
2997 }
2998 }
2999
3000 static void ctx_sched_out(struct perf_event_context *ctx,
3001 struct perf_cpu_context *cpuctx,
3002 enum event_type_t event_type)
3003 {
3004 struct perf_event *event, *tmp;
3005 int is_active = ctx->is_active;
3006
3007 lockdep_assert_held(&ctx->lock);
3008
3009 if (likely(!ctx->nr_events)) {
3010
3011
3012
3013 WARN_ON_ONCE(ctx->is_active);
3014 if (ctx->task)
3015 WARN_ON_ONCE(cpuctx->task_ctx);
3016 return;
3017 }
3018
3019 ctx->is_active &= ~event_type;
3020 if (!(ctx->is_active & EVENT_ALL))
3021 ctx->is_active = 0;
3022
3023 if (ctx->task) {
3024 WARN_ON_ONCE(cpuctx->task_ctx != ctx);
3025 if (!ctx->is_active)
3026 cpuctx->task_ctx = NULL;
3027 }
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039 if (is_active & EVENT_TIME) {
3040
3041 update_context_time(ctx);
3042 update_cgrp_time_from_cpuctx(cpuctx);
3043 }
3044
3045 is_active ^= ctx->is_active;
3046
3047 if (!ctx->nr_active || !(is_active & EVENT_ALL))
3048 return;
3049
3050
3051
3052
3053
3054 ctx->rotate_necessary = 0;
3055
3056 perf_pmu_disable(ctx->pmu);
3057 if (is_active & EVENT_PINNED) {
3058 list_for_each_entry_safe(event, tmp, &ctx->pinned_active, active_list)
3059 group_sched_out(event, cpuctx, ctx);
3060 }
3061
3062 if (is_active & EVENT_FLEXIBLE) {
3063 list_for_each_entry_safe(event, tmp, &ctx->flexible_active, active_list)
3064 group_sched_out(event, cpuctx, ctx);
3065 }
3066 perf_pmu_enable(ctx->pmu);
3067 }
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077 static int context_equiv(struct perf_event_context *ctx1,
3078 struct perf_event_context *ctx2)
3079 {
3080 lockdep_assert_held(&ctx1->lock);
3081 lockdep_assert_held(&ctx2->lock);
3082
3083
3084 if (ctx1->pin_count || ctx2->pin_count)
3085 return 0;
3086
3087
3088 if (ctx1 == ctx2->parent_ctx && ctx1->generation == ctx2->parent_gen)
3089 return 1;
3090
3091
3092 if (ctx1->parent_ctx == ctx2 && ctx1->parent_gen == ctx2->generation)
3093 return 1;
3094
3095
3096
3097
3098
3099 if (ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx &&
3100 ctx1->parent_gen == ctx2->parent_gen)
3101 return 1;
3102
3103
3104 return 0;
3105 }
3106
3107 static void __perf_event_sync_stat(struct perf_event *event,
3108 struct perf_event *next_event)
3109 {
3110 u64 value;
3111
3112 if (!event->attr.inherit_stat)
3113 return;
3114
3115
3116
3117
3118
3119
3120
3121
3122 if (event->state == PERF_EVENT_STATE_ACTIVE)
3123 event->pmu->read(event);
3124
3125 perf_event_update_time(event);
3126
3127
3128
3129
3130
3131 value = local64_read(&next_event->count);
3132 value = local64_xchg(&event->count, value);
3133 local64_set(&next_event->count, value);
3134
3135 swap(event->total_time_enabled, next_event->total_time_enabled);
3136 swap(event->total_time_running, next_event->total_time_running);
3137
3138
3139
3140
3141 perf_event_update_userpage(event);
3142 perf_event_update_userpage(next_event);
3143 }
3144
3145 static void perf_event_sync_stat(struct perf_event_context *ctx,
3146 struct perf_event_context *next_ctx)
3147 {
3148 struct perf_event *event, *next_event;
3149
3150 if (!ctx->nr_stat)
3151 return;
3152
3153 update_context_time(ctx);
3154
3155 event = list_first_entry(&ctx->event_list,
3156 struct perf_event, event_entry);
3157
3158 next_event = list_first_entry(&next_ctx->event_list,
3159 struct perf_event, event_entry);
3160
3161 while (&event->event_entry != &ctx->event_list &&
3162 &next_event->event_entry != &next_ctx->event_list) {
3163
3164 __perf_event_sync_stat(event, next_event);
3165
3166 event = list_next_entry(event, event_entry);
3167 next_event = list_next_entry(next_event, event_entry);
3168 }
3169 }
3170
3171 static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
3172 struct task_struct *next)
3173 {
3174 struct perf_event_context *ctx = task->perf_event_ctxp[ctxn];
3175 struct perf_event_context *next_ctx;
3176 struct perf_event_context *parent, *next_parent;
3177 struct perf_cpu_context *cpuctx;
3178 int do_switch = 1;
3179
3180 if (likely(!ctx))
3181 return;
3182
3183 cpuctx = __get_cpu_context(ctx);
3184 if (!cpuctx->task_ctx)
3185 return;
3186
3187 rcu_read_lock();
3188 next_ctx = next->perf_event_ctxp[ctxn];
3189 if (!next_ctx)
3190 goto unlock;
3191
3192 parent = rcu_dereference(ctx->parent_ctx);
3193 next_parent = rcu_dereference(next_ctx->parent_ctx);
3194
3195
3196 if (!parent && !next_parent)
3197 goto unlock;
3198
3199 if (next_parent == ctx || next_ctx == parent || next_parent == parent) {
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209 raw_spin_lock(&ctx->lock);
3210 raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
3211 if (context_equiv(ctx, next_ctx)) {
3212 WRITE_ONCE(ctx->task, next);
3213 WRITE_ONCE(next_ctx->task, task);
3214
3215 swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
3216
3217
3218
3219
3220
3221
3222
3223
3224 RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], next_ctx);
3225 RCU_INIT_POINTER(next->perf_event_ctxp[ctxn], ctx);
3226
3227 do_switch = 0;
3228
3229 perf_event_sync_stat(ctx, next_ctx);
3230 }
3231 raw_spin_unlock(&next_ctx->lock);
3232 raw_spin_unlock(&ctx->lock);
3233 }
3234 unlock:
3235 rcu_read_unlock();
3236
3237 if (do_switch) {
3238 raw_spin_lock(&ctx->lock);
3239 task_ctx_sched_out(cpuctx, ctx, EVENT_ALL);
3240 raw_spin_unlock(&ctx->lock);
3241 }
3242 }
3243
3244 static DEFINE_PER_CPU(struct list_head, sched_cb_list);
3245
3246 void perf_sched_cb_dec(struct pmu *pmu)
3247 {
3248 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
3249
3250 this_cpu_dec(perf_sched_cb_usages);
3251
3252 if (!--cpuctx->sched_cb_usage)
3253 list_del(&cpuctx->sched_cb_entry);
3254 }
3255
3256
3257 void perf_sched_cb_inc(struct pmu *pmu)
3258 {
3259 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
3260
3261 if (!cpuctx->sched_cb_usage++)
3262 list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list));
3263
3264 this_cpu_inc(perf_sched_cb_usages);
3265 }
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275 static void perf_pmu_sched_task(struct task_struct *prev,
3276 struct task_struct *next,
3277 bool sched_in)
3278 {
3279 struct perf_cpu_context *cpuctx;
3280 struct pmu *pmu;
3281
3282 if (prev == next)
3283 return;
3284
3285 list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) {
3286 pmu = cpuctx->ctx.pmu;
3287
3288 if (WARN_ON_ONCE(!pmu->sched_task))
3289 continue;
3290
3291 perf_ctx_lock(cpuctx, cpuctx->task_ctx);
3292 perf_pmu_disable(pmu);
3293
3294 pmu->sched_task(cpuctx->task_ctx, sched_in);
3295
3296 perf_pmu_enable(pmu);
3297 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
3298 }
3299 }
3300
3301 static void perf_event_switch(struct task_struct *task,
3302 struct task_struct *next_prev, bool sched_in);
3303
3304 #define for_each_task_context_nr(ctxn) \
3305 for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++)
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318 void __perf_event_task_sched_out(struct task_struct *task,
3319 struct task_struct *next)
3320 {
3321 int ctxn;
3322
3323 if (__this_cpu_read(perf_sched_cb_usages))
3324 perf_pmu_sched_task(task, next, false);
3325
3326 if (atomic_read(&nr_switch_events))
3327 perf_event_switch(task, next, false);
3328
3329 for_each_task_context_nr(ctxn)
3330 perf_event_context_sched_out(task, ctxn, next);
3331
3332
3333
3334
3335
3336
3337 if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
3338 perf_cgroup_sched_out(task, next);
3339 }
3340
3341
3342
3343
3344 static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
3345 enum event_type_t event_type)
3346 {
3347 ctx_sched_out(&cpuctx->ctx, cpuctx, event_type);
3348 }
3349
3350 static int visit_groups_merge(struct perf_event_groups *groups, int cpu,
3351 int (*func)(struct perf_event *, void *), void *data)
3352 {
3353 struct perf_event **evt, *evt1, *evt2;
3354 int ret;
3355
3356 evt1 = perf_event_groups_first(groups, -1);
3357 evt2 = perf_event_groups_first(groups, cpu);
3358
3359 while (evt1 || evt2) {
3360 if (evt1 && evt2) {
3361 if (evt1->group_index < evt2->group_index)
3362 evt = &evt1;
3363 else
3364 evt = &evt2;
3365 } else if (evt1) {
3366 evt = &evt1;
3367 } else {
3368 evt = &evt2;
3369 }
3370
3371 ret = func(*evt, data);
3372 if (ret)
3373 return ret;
3374
3375 *evt = perf_event_groups_next(*evt);
3376 }
3377
3378 return 0;
3379 }
3380
3381 struct sched_in_data {
3382 struct perf_event_context *ctx;
3383 struct perf_cpu_context *cpuctx;
3384 int can_add_hw;
3385 };
3386
3387 static int pinned_sched_in(struct perf_event *event, void *data)
3388 {
3389 struct sched_in_data *sid = data;
3390
3391 if (event->state <= PERF_EVENT_STATE_OFF)
3392 return 0;
3393
3394 if (!event_filter_match(event))
3395 return 0;
3396
3397 if (group_can_go_on(event, sid->cpuctx, sid->can_add_hw)) {
3398 if (!group_sched_in(event, sid->cpuctx, sid->ctx))
3399 list_add_tail(&event->active_list, &sid->ctx->pinned_active);
3400 }
3401
3402
3403
3404
3405
3406 if (event->state == PERF_EVENT_STATE_INACTIVE)
3407 perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
3408
3409 return 0;
3410 }
3411
3412 static int flexible_sched_in(struct perf_event *event, void *data)
3413 {
3414 struct sched_in_data *sid = data;
3415
3416 if (event->state <= PERF_EVENT_STATE_OFF)
3417 return 0;
3418
3419 if (!event_filter_match(event))
3420 return 0;
3421
3422 if (group_can_go_on(event, sid->cpuctx, sid->can_add_hw)) {
3423 int ret = group_sched_in(event, sid->cpuctx, sid->ctx);
3424 if (ret) {
3425 sid->can_add_hw = 0;
3426 sid->ctx->rotate_necessary = 1;
3427 return 0;
3428 }
3429 list_add_tail(&event->active_list, &sid->ctx->flexible_active);
3430 }
3431
3432 return 0;
3433 }
3434
3435 static void
3436 ctx_pinned_sched_in(struct perf_event_context *ctx,
3437 struct perf_cpu_context *cpuctx)
3438 {
3439 struct sched_in_data sid = {
3440 .ctx = ctx,
3441 .cpuctx = cpuctx,
3442 .can_add_hw = 1,
3443 };
3444
3445 visit_groups_merge(&ctx->pinned_groups,
3446 smp_processor_id(),
3447 pinned_sched_in, &sid);
3448 }
3449
3450 static void
3451 ctx_flexible_sched_in(struct perf_event_context *ctx,
3452 struct perf_cpu_context *cpuctx)
3453 {
3454 struct sched_in_data sid = {
3455 .ctx = ctx,
3456 .cpuctx = cpuctx,
3457 .can_add_hw = 1,
3458 };
3459
3460 visit_groups_merge(&ctx->flexible_groups,
3461 smp_processor_id(),
3462 flexible_sched_in, &sid);
3463 }
3464
3465 static void
3466 ctx_sched_in(struct perf_event_context *ctx,
3467 struct perf_cpu_context *cpuctx,
3468 enum event_type_t event_type,
3469 struct task_struct *task)
3470 {
3471 int is_active = ctx->is_active;
3472 u64 now;
3473
3474 lockdep_assert_held(&ctx->lock);
3475
3476 if (likely(!ctx->nr_events))
3477 return;
3478
3479 ctx->is_active |= (event_type | EVENT_TIME);
3480 if (ctx->task) {
3481 if (!is_active)
3482 cpuctx->task_ctx = ctx;
3483 else
3484 WARN_ON_ONCE(cpuctx->task_ctx != ctx);
3485 }
3486
3487 is_active ^= ctx->is_active;
3488
3489 if (is_active & EVENT_TIME) {
3490
3491 now = perf_clock();
3492 ctx->timestamp = now;
3493 perf_cgroup_set_timestamp(task, ctx);
3494 }
3495
3496
3497
3498
3499
3500 if (is_active & EVENT_PINNED)
3501 ctx_pinned_sched_in(ctx, cpuctx);
3502
3503
3504 if (is_active & EVENT_FLEXIBLE)
3505 ctx_flexible_sched_in(ctx, cpuctx);
3506 }
3507
3508 static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
3509 enum event_type_t event_type,
3510 struct task_struct *task)
3511 {
3512 struct perf_event_context *ctx = &cpuctx->ctx;
3513
3514 ctx_sched_in(ctx, cpuctx, event_type, task);
3515 }
3516
3517 static void perf_event_context_sched_in(struct perf_event_context *ctx,
3518 struct task_struct *task)
3519 {
3520 struct perf_cpu_context *cpuctx;
3521
3522 cpuctx = __get_cpu_context(ctx);
3523 if (cpuctx->task_ctx == ctx)
3524 return;
3525
3526 perf_ctx_lock(cpuctx, ctx);
3527
3528
3529
3530
3531 if (!ctx->nr_events)
3532 goto unlock;
3533
3534 perf_pmu_disable(ctx->pmu);
3535
3536
3537
3538
3539
3540
3541
3542
3543 if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree))
3544 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
3545 perf_event_sched_in(cpuctx, ctx, task);
3546 perf_pmu_enable(ctx->pmu);
3547
3548 unlock:
3549 perf_ctx_unlock(cpuctx, ctx);
3550 }
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563 void __perf_event_task_sched_in(struct task_struct *prev,
3564 struct task_struct *task)
3565 {
3566 struct perf_event_context *ctx;
3567 int ctxn;
3568
3569
3570
3571
3572
3573
3574
3575
3576 if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
3577 perf_cgroup_sched_in(prev, task);
3578
3579 for_each_task_context_nr(ctxn) {
3580 ctx = task->perf_event_ctxp[ctxn];
3581 if (likely(!ctx))
3582 continue;
3583
3584 perf_event_context_sched_in(ctx, task);
3585 }
3586
3587 if (atomic_read(&nr_switch_events))
3588 perf_event_switch(task, prev, true);
3589
3590 if (__this_cpu_read(perf_sched_cb_usages))
3591 perf_pmu_sched_task(prev, task, true);
3592 }
3593
3594 static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
3595 {
3596 u64 frequency = event->attr.sample_freq;
3597 u64 sec = NSEC_PER_SEC;
3598 u64 divisor, dividend;
3599
3600 int count_fls, nsec_fls, frequency_fls, sec_fls;
3601
3602 count_fls = fls64(count);
3603 nsec_fls = fls64(nsec);
3604 frequency_fls = fls64(frequency);
3605 sec_fls = 30;
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621 #define REDUCE_FLS(a, b) \
3622 do { \
3623 if (a##_fls > b##_fls) { \
3624 a >>= 1; \
3625 a##_fls--; \
3626 } else { \
3627 b >>= 1; \
3628 b##_fls--; \
3629 } \
3630 } while (0)
3631
3632
3633
3634
3635
3636 while (count_fls + sec_fls > 64 && nsec_fls + frequency_fls > 64) {
3637 REDUCE_FLS(nsec, frequency);
3638 REDUCE_FLS(sec, count);
3639 }
3640
3641 if (count_fls + sec_fls > 64) {
3642 divisor = nsec * frequency;
3643
3644 while (count_fls + sec_fls > 64) {
3645 REDUCE_FLS(count, sec);
3646 divisor >>= 1;
3647 }
3648
3649 dividend = count * sec;
3650 } else {
3651 dividend = count * sec;
3652
3653 while (nsec_fls + frequency_fls > 64) {
3654 REDUCE_FLS(nsec, frequency);
3655 dividend >>= 1;
3656 }
3657
3658 divisor = nsec * frequency;
3659 }
3660
3661 if (!divisor)
3662 return dividend;
3663
3664 return div64_u64(dividend, divisor);
3665 }
3666
3667 static DEFINE_PER_CPU(int, perf_throttled_count);
3668 static DEFINE_PER_CPU(u64, perf_throttled_seq);
3669
3670 static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count, bool disable)
3671 {
3672 struct hw_perf_event *hwc = &event->hw;
3673 s64 period, sample_period;
3674 s64 delta;
3675
3676 period = perf_calculate_period(event, nsec, count);
3677
3678 delta = (s64)(period - hwc->sample_period);
3679 delta = (delta + 7) / 8;
3680
3681 sample_period = hwc->sample_period + delta;
3682
3683 if (!sample_period)
3684 sample_period = 1;
3685
3686 hwc->sample_period = sample_period;
3687
3688 if (local64_read(&hwc->period_left) > 8*sample_period) {
3689 if (disable)
3690 event->pmu->stop(event, PERF_EF_UPDATE);
3691
3692 local64_set(&hwc->period_left, 0);
3693
3694 if (disable)
3695 event->pmu->start(event, PERF_EF_RELOAD);
3696 }
3697 }
3698
3699
3700
3701
3702
3703
3704 static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx,
3705 int needs_unthr)
3706 {
3707 struct perf_event *event;
3708 struct hw_perf_event *hwc;
3709 u64 now, period = TICK_NSEC;
3710 s64 delta;
3711
3712
3713
3714
3715
3716
3717 if (!(ctx->nr_freq || needs_unthr))
3718 return;
3719
3720 raw_spin_lock(&ctx->lock);
3721 perf_pmu_disable(ctx->pmu);
3722
3723 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3724 if (event->state != PERF_EVENT_STATE_ACTIVE)
3725 continue;
3726
3727 if (!event_filter_match(event))
3728 continue;
3729
3730 perf_pmu_disable(event->pmu);
3731
3732 hwc = &event->hw;
3733
3734 if (hwc->interrupts == MAX_INTERRUPTS) {
3735 hwc->interrupts = 0;
3736 perf_log_throttle(event, 1);
3737 event->pmu->start(event, 0);
3738 }
3739
3740 if (!event->attr.freq || !event->attr.sample_freq)
3741 goto next;
3742
3743
3744
3745
3746 event->pmu->stop(event, PERF_EF_UPDATE);
3747
3748 now = local64_read(&event->count);
3749 delta = now - hwc->freq_count_stamp;
3750 hwc->freq_count_stamp = now;
3751
3752
3753
3754
3755
3756
3757
3758
3759 if (delta > 0)
3760 perf_adjust_period(event, period, delta, false);
3761
3762 event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0);
3763 next:
3764 perf_pmu_enable(event->pmu);
3765 }
3766
3767 perf_pmu_enable(ctx->pmu);
3768 raw_spin_unlock(&ctx->lock);
3769 }
3770
3771
3772
3773
3774 static void rotate_ctx(struct perf_event_context *ctx, struct perf_event *event)
3775 {
3776
3777
3778
3779
3780 if (ctx->rotate_disable)
3781 return;
3782
3783 perf_event_groups_delete(&ctx->flexible_groups, event);
3784 perf_event_groups_insert(&ctx->flexible_groups, event);
3785 }
3786
3787
3788 static inline struct perf_event *
3789 ctx_event_to_rotate(struct perf_event_context *ctx)
3790 {
3791 struct perf_event *event;
3792
3793
3794 event = list_first_entry_or_null(&ctx->flexible_active,
3795 struct perf_event, active_list);
3796
3797
3798 if (!event) {
3799 event = rb_entry_safe(rb_first(&ctx->flexible_groups.tree),
3800 typeof(*event), group_node);
3801 }
3802
3803 return event;
3804 }
3805
3806 static bool perf_rotate_context(struct perf_cpu_context *cpuctx)
3807 {
3808 struct perf_event *cpu_event = NULL, *task_event = NULL;
3809 struct perf_event_context *task_ctx = NULL;
3810 int cpu_rotate, task_rotate;
3811
3812
3813
3814
3815
3816
3817 cpu_rotate = cpuctx->ctx.rotate_necessary;
3818 task_ctx = cpuctx->task_ctx;
3819 task_rotate = task_ctx ? task_ctx->rotate_necessary : 0;
3820
3821 if (!(cpu_rotate || task_rotate))
3822 return false;
3823
3824 perf_ctx_lock(cpuctx, cpuctx->task_ctx);
3825 perf_pmu_disable(cpuctx->ctx.pmu);
3826
3827 if (task_rotate)
3828 task_event = ctx_event_to_rotate(task_ctx);
3829 if (cpu_rotate)
3830 cpu_event = ctx_event_to_rotate(&cpuctx->ctx);
3831
3832
3833
3834
3835
3836 if (task_event || (task_ctx && cpu_event))
3837 ctx_sched_out(task_ctx, cpuctx, EVENT_FLEXIBLE);
3838 if (cpu_event)
3839 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
3840
3841 if (task_event)
3842 rotate_ctx(task_ctx, task_event);
3843 if (cpu_event)
3844 rotate_ctx(&cpuctx->ctx, cpu_event);
3845
3846 perf_event_sched_in(cpuctx, task_ctx, current);
3847
3848 perf_pmu_enable(cpuctx->ctx.pmu);
3849 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
3850
3851 return true;
3852 }
3853
3854 void perf_event_task_tick(void)
3855 {
3856 struct list_head *head = this_cpu_ptr(&active_ctx_list);
3857 struct perf_event_context *ctx, *tmp;
3858 int throttled;
3859
3860 lockdep_assert_irqs_disabled();
3861
3862 __this_cpu_inc(perf_throttled_seq);
3863 throttled = __this_cpu_xchg(perf_throttled_count, 0);
3864 tick_dep_clear_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
3865
3866 list_for_each_entry_safe(ctx, tmp, head, active_ctx_list)
3867 perf_adjust_freq_unthr_context(ctx, throttled);
3868 }
3869
3870 static int event_enable_on_exec(struct perf_event *event,
3871 struct perf_event_context *ctx)
3872 {
3873 if (!event->attr.enable_on_exec)
3874 return 0;
3875
3876 event->attr.enable_on_exec = 0;
3877 if (event->state >= PERF_EVENT_STATE_INACTIVE)
3878 return 0;
3879
3880 perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
3881
3882 return 1;
3883 }
3884
3885
3886
3887
3888
3889 static void perf_event_enable_on_exec(int ctxn)
3890 {
3891 struct perf_event_context *ctx, *clone_ctx = NULL;
3892 enum event_type_t event_type = 0;
3893 struct perf_cpu_context *cpuctx;
3894 struct perf_event *event;
3895 unsigned long flags;
3896 int enabled = 0;
3897
3898 local_irq_save(flags);
3899 ctx = current->perf_event_ctxp[ctxn];
3900 if (!ctx || !ctx->nr_events)
3901 goto out;
3902
3903 cpuctx = __get_cpu_context(ctx);
3904 perf_ctx_lock(cpuctx, ctx);
3905 ctx_sched_out(ctx, cpuctx, EVENT_TIME);
3906 list_for_each_entry(event, &ctx->event_list, event_entry) {
3907 enabled |= event_enable_on_exec(event, ctx);
3908 event_type |= get_event_type(event);
3909 }
3910
3911
3912
3913
3914 if (enabled) {
3915 clone_ctx = unclone_ctx(ctx);
3916 ctx_resched(cpuctx, ctx, event_type);
3917 } else {
3918 ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
3919 }
3920 perf_ctx_unlock(cpuctx, ctx);
3921
3922 out:
3923 local_irq_restore(flags);
3924
3925 if (clone_ctx)
3926 put_ctx(clone_ctx);
3927 }
3928
3929 struct perf_read_data {
3930 struct perf_event *event;
3931 bool group;
3932 int ret;
3933 };
3934
3935 static int __perf_event_read_cpu(struct perf_event *event, int event_cpu)
3936 {
3937 u16 local_pkg, event_pkg;
3938
3939 if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
3940 int local_cpu = smp_processor_id();
3941
3942 event_pkg = topology_physical_package_id(event_cpu);
3943 local_pkg = topology_physical_package_id(local_cpu);
3944
3945 if (event_pkg == local_pkg)
3946 return local_cpu;
3947 }
3948
3949 return event_cpu;
3950 }
3951
3952
3953
3954
3955 static void __perf_event_read(void *info)
3956 {
3957 struct perf_read_data *data = info;
3958 struct perf_event *sub, *event = data->event;
3959 struct perf_event_context *ctx = event->ctx;
3960 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
3961 struct pmu *pmu = event->pmu;
3962
3963
3964
3965
3966
3967
3968
3969
3970 if (ctx->task && cpuctx->task_ctx != ctx)
3971 return;
3972
3973 raw_spin_lock(&ctx->lock);
3974 if (ctx->is_active & EVENT_TIME) {
3975 update_context_time(ctx);
3976 update_cgrp_time_from_event(event);
3977 }
3978
3979 perf_event_update_time(event);
3980 if (data->group)
3981 perf_event_update_sibling_time(event);
3982
3983 if (event->state != PERF_EVENT_STATE_ACTIVE)
3984 goto unlock;
3985
3986 if (!data->group) {
3987 pmu->read(event);
3988 data->ret = 0;
3989 goto unlock;
3990 }
3991
3992 pmu->start_txn(pmu, PERF_PMU_TXN_READ);
3993
3994 pmu->read(event);
3995
3996 for_each_sibling_event(sub, event) {
3997 if (sub->state == PERF_EVENT_STATE_ACTIVE) {
3998
3999
4000
4001
4002 sub->pmu->read(sub);
4003 }
4004 }
4005
4006 data->ret = pmu->commit_txn(pmu);
4007
4008 unlock:
4009 raw_spin_unlock(&ctx->lock);
4010 }
4011
4012 static inline u64 perf_event_count(struct perf_event *event)
4013 {
4014 return local64_read(&event->count) + atomic64_read(&event->child_count);
4015 }
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025 int perf_event_read_local(struct perf_event *event, u64 *value,
4026 u64 *enabled, u64 *running)
4027 {
4028 unsigned long flags;
4029 int ret = 0;
4030
4031
4032
4033
4034
4035 local_irq_save(flags);
4036
4037
4038
4039
4040
4041 if (event->attr.inherit) {
4042 ret = -EOPNOTSUPP;
4043 goto out;
4044 }
4045
4046
4047 if ((event->attach_state & PERF_ATTACH_TASK) &&
4048 event->hw.target != current) {
4049 ret = -EINVAL;
4050 goto out;
4051 }
4052
4053
4054 if (!(event->attach_state & PERF_ATTACH_TASK) &&
4055 event->cpu != smp_processor_id()) {
4056 ret = -EINVAL;
4057 goto out;
4058 }
4059
4060
4061 if (event->attr.pinned && event->oncpu != smp_processor_id()) {
4062 ret = -EBUSY;
4063 goto out;
4064 }
4065
4066
4067
4068
4069
4070
4071 if (event->oncpu == smp_processor_id())
4072 event->pmu->read(event);
4073
4074 *value = local64_read(&event->count);
4075 if (enabled || running) {
4076 u64 now = event->shadow_ctx_time + perf_clock();
4077 u64 __enabled, __running;
4078
4079 __perf_update_times(event, now, &__enabled, &__running);
4080 if (enabled)
4081 *enabled = __enabled;
4082 if (running)
4083 *running = __running;
4084 }
4085 out:
4086 local_irq_restore(flags);
4087
4088 return ret;
4089 }
4090
4091 static int perf_event_read(struct perf_event *event, bool group)
4092 {
4093 enum perf_event_state state = READ_ONCE(event->state);
4094 int event_cpu, ret = 0;
4095
4096
4097
4098
4099
4100 again:
4101 if (state == PERF_EVENT_STATE_ACTIVE) {
4102 struct perf_read_data data;
4103
4104
4105
4106
4107
4108
4109
4110 smp_rmb();
4111
4112 event_cpu = READ_ONCE(event->oncpu);
4113 if ((unsigned)event_cpu >= nr_cpu_ids)
4114 return 0;
4115
4116 data = (struct perf_read_data){
4117 .event = event,
4118 .group = group,
4119 .ret = 0,
4120 };
4121
4122 preempt_disable();
4123 event_cpu = __perf_event_read_cpu(event, event_cpu);
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135 (void)smp_call_function_single(event_cpu, __perf_event_read, &data, 1);
4136 preempt_enable();
4137 ret = data.ret;
4138
4139 } else if (state == PERF_EVENT_STATE_INACTIVE) {
4140 struct perf_event_context *ctx = event->ctx;
4141 unsigned long flags;
4142
4143 raw_spin_lock_irqsave(&ctx->lock, flags);
4144 state = event->state;
4145 if (state != PERF_EVENT_STATE_INACTIVE) {
4146 raw_spin_unlock_irqrestore(&ctx->lock, flags);
4147 goto again;
4148 }
4149
4150
4151
4152
4153
4154 if (ctx->is_active & EVENT_TIME) {
4155 update_context_time(ctx);
4156 update_cgrp_time_from_event(event);
4157 }
4158
4159 perf_event_update_time(event);
4160 if (group)
4161 perf_event_update_sibling_time(event);
4162 raw_spin_unlock_irqrestore(&ctx->lock, flags);
4163 }
4164
4165 return ret;
4166 }
4167
4168
4169
4170
4171 static void __perf_event_init_context(struct perf_event_context *ctx)
4172 {
4173 raw_spin_lock_init(&ctx->lock);
4174 mutex_init(&ctx->mutex);
4175 INIT_LIST_HEAD(&ctx->active_ctx_list);
4176 perf_event_groups_init(&ctx->pinned_groups);
4177 perf_event_groups_init(&ctx->flexible_groups);
4178 INIT_LIST_HEAD(&ctx->event_list);
4179 INIT_LIST_HEAD(&ctx->pinned_active);
4180 INIT_LIST_HEAD(&ctx->flexible_active);
4181 refcount_set(&ctx->refcount, 1);
4182 }
4183
4184 static struct perf_event_context *
4185 alloc_perf_context(struct pmu *pmu, struct task_struct *task)
4186 {
4187 struct perf_event_context *ctx;
4188
4189 ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL);
4190 if (!ctx)
4191 return NULL;
4192
4193 __perf_event_init_context(ctx);
4194 if (task)
4195 ctx->task = get_task_struct(task);
4196 ctx->pmu = pmu;
4197
4198 return ctx;
4199 }
4200
4201 static struct task_struct *
4202 find_lively_task_by_vpid(pid_t vpid)
4203 {
4204 struct task_struct *task;
4205
4206 rcu_read_lock();
4207 if (!vpid)
4208 task = current;
4209 else
4210 task = find_task_by_vpid(vpid);
4211 if (task)
4212 get_task_struct(task);
4213 rcu_read_unlock();
4214
4215 if (!task)
4216 return ERR_PTR(-ESRCH);
4217
4218 return task;
4219 }
4220
4221
4222
4223
4224 static struct perf_event_context *
4225 find_get_context(struct pmu *pmu, struct task_struct *task,
4226 struct perf_event *event)
4227 {
4228 struct perf_event_context *ctx, *clone_ctx = NULL;
4229 struct perf_cpu_context *cpuctx;
4230 void *task_ctx_data = NULL;
4231 unsigned long flags;
4232 int ctxn, err;
4233 int cpu = event->cpu;
4234
4235 if (!task) {
4236
4237 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
4238 return ERR_PTR(-EACCES);
4239
4240 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
4241 ctx = &cpuctx->ctx;
4242 get_ctx(ctx);
4243 ++ctx->pin_count;
4244
4245 return ctx;
4246 }
4247
4248 err = -EINVAL;
4249 ctxn = pmu->task_ctx_nr;
4250 if (ctxn < 0)
4251 goto errout;
4252
4253 if (event->attach_state & PERF_ATTACH_TASK_DATA) {
4254 task_ctx_data = kzalloc(pmu->task_ctx_size, GFP_KERNEL);
4255 if (!task_ctx_data) {
4256 err = -ENOMEM;
4257 goto errout;
4258 }
4259 }
4260
4261 retry:
4262 ctx = perf_lock_task_context(task, ctxn, &flags);
4263 if (ctx) {
4264 clone_ctx = unclone_ctx(ctx);
4265 ++ctx->pin_count;
4266
4267 if (task_ctx_data && !ctx->task_ctx_data) {
4268 ctx->task_ctx_data = task_ctx_data;
4269 task_ctx_data = NULL;
4270 }
4271 raw_spin_unlock_irqrestore(&ctx->lock, flags);
4272
4273 if (clone_ctx)
4274 put_ctx(clone_ctx);
4275 } else {
4276 ctx = alloc_perf_context(pmu, task);
4277 err = -ENOMEM;
4278 if (!ctx)
4279 goto errout;
4280
4281 if (task_ctx_data) {
4282 ctx->task_ctx_data = task_ctx_data;
4283 task_ctx_data = NULL;
4284 }
4285
4286 err = 0;
4287 mutex_lock(&task->perf_event_mutex);
4288
4289
4290
4291
4292 if (task->flags & PF_EXITING)
4293 err = -ESRCH;
4294 else if (task->perf_event_ctxp[ctxn])
4295 err = -EAGAIN;
4296 else {
4297 get_ctx(ctx);
4298 ++ctx->pin_count;
4299 rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx);
4300 }
4301 mutex_unlock(&task->perf_event_mutex);
4302
4303 if (unlikely(err)) {
4304 put_ctx(ctx);
4305
4306 if (err == -EAGAIN)
4307 goto retry;
4308 goto errout;
4309 }
4310 }
4311
4312 kfree(task_ctx_data);
4313 return ctx;
4314
4315 errout:
4316 kfree(task_ctx_data);
4317 return ERR_PTR(err);
4318 }
4319
4320 static void perf_event_free_filter(struct perf_event *event);
4321 static void perf_event_free_bpf_prog(struct perf_event *event);
4322
4323 static void free_event_rcu(struct rcu_head *head)
4324 {
4325 struct perf_event *event;
4326
4327 event = container_of(head, struct perf_event, rcu_head);
4328 if (event->ns)
4329 put_pid_ns(event->ns);
4330 perf_event_free_filter(event);
4331 kfree(event);
4332 }
4333
4334 static void ring_buffer_attach(struct perf_event *event,
4335 struct ring_buffer *rb);
4336
4337 static void detach_sb_event(struct perf_event *event)
4338 {
4339 struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);
4340
4341 raw_spin_lock(&pel->lock);
4342 list_del_rcu(&event->sb_list);
4343 raw_spin_unlock(&pel->lock);
4344 }
4345
4346 static bool is_sb_event(struct perf_event *event)
4347 {
4348 struct perf_event_attr *attr = &event->attr;
4349
4350 if (event->parent)
4351 return false;
4352
4353 if (event->attach_state & PERF_ATTACH_TASK)
4354 return false;
4355
4356 if (attr->mmap || attr->mmap_data || attr->mmap2 ||
4357 attr->comm || attr->comm_exec ||
4358 attr->task || attr->ksymbol ||
4359 attr->context_switch ||
4360 attr->bpf_event)
4361 return true;
4362 return false;
4363 }
4364
4365 static void unaccount_pmu_sb_event(struct perf_event *event)
4366 {
4367 if (is_sb_event(event))
4368 detach_sb_event(event);
4369 }
4370
4371 static void unaccount_event_cpu(struct perf_event *event, int cpu)
4372 {
4373 if (event->parent)
4374 return;
4375
4376 if (is_cgroup_event(event))
4377 atomic_dec(&per_cpu(perf_cgroup_events, cpu));
4378 }
4379
4380 #ifdef CONFIG_NO_HZ_FULL
4381 static DEFINE_SPINLOCK(nr_freq_lock);
4382 #endif
4383
4384 static void unaccount_freq_event_nohz(void)
4385 {
4386 #ifdef CONFIG_NO_HZ_FULL
4387 spin_lock(&nr_freq_lock);
4388 if (atomic_dec_and_test(&nr_freq_events))
4389 tick_nohz_dep_clear(TICK_DEP_BIT_PERF_EVENTS);
4390 spin_unlock(&nr_freq_lock);
4391 #endif
4392 }
4393
4394 static void unaccount_freq_event(void)
4395 {
4396 if (tick_nohz_full_enabled())
4397 unaccount_freq_event_nohz();
4398 else
4399 atomic_dec(&nr_freq_events);
4400 }
4401
4402 static void unaccount_event(struct perf_event *event)
4403 {
4404 bool dec = false;
4405
4406 if (event->parent)
4407 return;
4408
4409 if (event->attach_state & PERF_ATTACH_TASK)
4410 dec = true;
4411 if (event->attr.mmap || event->attr.mmap_data)
4412 atomic_dec(&nr_mmap_events);
4413 if (event->attr.comm)
4414 atomic_dec(&nr_comm_events);
4415 if (event->attr.namespaces)
4416 atomic_dec(&nr_namespaces_events);
4417 if (event->attr.task)
4418 atomic_dec(&nr_task_events);
4419 if (event->attr.freq)
4420 unaccount_freq_event();
4421 if (event->attr.context_switch) {
4422 dec = true;
4423 atomic_dec(&nr_switch_events);
4424 }
4425 if (is_cgroup_event(event))
4426 dec = true;
4427 if (has_branch_stack(event))
4428 dec = true;
4429 if (event->attr.ksymbol)
4430 atomic_dec(&nr_ksymbol_events);
4431 if (event->attr.bpf_event)
4432 atomic_dec(&nr_bpf_events);
4433
4434 if (dec) {
4435 if (!atomic_add_unless(&perf_sched_count, -1, 1))
4436 schedule_delayed_work(&perf_sched_work, HZ);
4437 }
4438
4439 unaccount_event_cpu(event, event->cpu);
4440
4441 unaccount_pmu_sb_event(event);
4442 }
4443
4444 static void perf_sched_delayed(struct work_struct *work)
4445 {
4446 mutex_lock(&perf_sched_mutex);
4447 if (atomic_dec_and_test(&perf_sched_count))
4448 static_branch_disable(&perf_sched_events);
4449 mutex_unlock(&perf_sched_mutex);
4450 }
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464 static int exclusive_event_init(struct perf_event *event)
4465 {
4466 struct pmu *pmu = event->pmu;
4467
4468 if (!is_exclusive_pmu(pmu))
4469 return 0;
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484 if (event->attach_state & PERF_ATTACH_TASK) {
4485 if (!atomic_inc_unless_negative(&pmu->exclusive_cnt))
4486 return -EBUSY;
4487 } else {
4488 if (!atomic_dec_unless_positive(&pmu->exclusive_cnt))
4489 return -EBUSY;
4490 }
4491
4492 return 0;
4493 }
4494
4495 static void exclusive_event_destroy(struct perf_event *event)
4496 {
4497 struct pmu *pmu = event->pmu;
4498
4499 if (!is_exclusive_pmu(pmu))
4500 return;
4501
4502
4503 if (event->attach_state & PERF_ATTACH_TASK)
4504 atomic_dec(&pmu->exclusive_cnt);
4505 else
4506 atomic_inc(&pmu->exclusive_cnt);
4507 }
4508
4509 static bool exclusive_event_match(struct perf_event *e1, struct perf_event *e2)
4510 {
4511 if ((e1->pmu == e2->pmu) &&
4512 (e1->cpu == e2->cpu ||
4513 e1->cpu == -1 ||
4514 e2->cpu == -1))
4515 return true;
4516 return false;
4517 }
4518
4519 static bool exclusive_event_installable(struct perf_event *event,
4520 struct perf_event_context *ctx)
4521 {
4522 struct perf_event *iter_event;
4523 struct pmu *pmu = event->pmu;
4524
4525 lockdep_assert_held(&ctx->mutex);
4526
4527 if (!is_exclusive_pmu(pmu))
4528 return true;
4529
4530 list_for_each_entry(iter_event, &ctx->event_list, event_entry) {
4531 if (exclusive_event_match(iter_event, event))
4532 return false;
4533 }
4534
4535 return true;
4536 }
4537
4538 static void perf_addr_filters_splice(struct perf_event *event,
4539 struct list_head *head);
4540
4541 static void _free_event(struct perf_event *event)
4542 {
4543 irq_work_sync(&event->pending);
4544
4545 unaccount_event(event);
4546
4547 if (event->rb) {
4548
4549
4550
4551
4552
4553
4554 mutex_lock(&event->mmap_mutex);
4555 ring_buffer_attach(event, NULL);
4556 mutex_unlock(&event->mmap_mutex);
4557 }
4558
4559 if (is_cgroup_event(event))
4560 perf_detach_cgroup(event);
4561
4562 if (!event->parent) {
4563 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
4564 put_callchain_buffers();
4565 }
4566
4567 perf_event_free_bpf_prog(event);
4568 perf_addr_filters_splice(event, NULL);
4569 kfree(event->addr_filter_ranges);
4570
4571 if (event->destroy)
4572 event->destroy(event);
4573
4574
4575
4576
4577
4578 if (event->hw.target)
4579 put_task_struct(event->hw.target);
4580
4581
4582
4583
4584
4585 if (event->ctx)
4586 put_ctx(event->ctx);
4587
4588 exclusive_event_destroy(event);
4589 module_put(event->pmu->module);
4590
4591 call_rcu(&event->rcu_head, free_event_rcu);
4592 }
4593
4594
4595
4596
4597
4598 static void free_event(struct perf_event *event)
4599 {
4600 if (WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1,
4601 "unexpected event refcount: %ld; ptr=%p\n",
4602 atomic_long_read(&event->refcount), event)) {
4603
4604 return;
4605 }
4606
4607 _free_event(event);
4608 }
4609
4610
4611
4612
4613 static void perf_remove_from_owner(struct perf_event *event)
4614 {
4615 struct task_struct *owner;
4616
4617 rcu_read_lock();
4618
4619
4620
4621
4622
4623
4624 owner = READ_ONCE(event->owner);
4625 if (owner) {
4626
4627
4628
4629
4630
4631 get_task_struct(owner);
4632 }
4633 rcu_read_unlock();
4634
4635 if (owner) {
4636
4637
4638
4639
4640
4641
4642
4643
4644 mutex_lock_nested(&owner->perf_event_mutex, SINGLE_DEPTH_NESTING);
4645
4646
4647
4648
4649
4650
4651
4652 if (event->owner) {
4653 list_del_init(&event->owner_entry);
4654 smp_store_release(&event->owner, NULL);
4655 }
4656 mutex_unlock(&owner->perf_event_mutex);
4657 put_task_struct(owner);
4658 }
4659 }
4660
4661 static void put_event(struct perf_event *event)
4662 {
4663 if (!atomic_long_dec_and_test(&event->refcount))
4664 return;
4665
4666 _free_event(event);
4667 }
4668
4669
4670
4671
4672
4673
4674 int perf_event_release_kernel(struct perf_event *event)
4675 {
4676 struct perf_event_context *ctx = event->ctx;
4677 struct perf_event *child, *tmp;
4678 LIST_HEAD(free_list);
4679
4680
4681
4682
4683
4684 if (!ctx) {
4685 WARN_ON_ONCE(event->attach_state &
4686 (PERF_ATTACH_CONTEXT|PERF_ATTACH_GROUP));
4687 goto no_ctx;
4688 }
4689
4690 if (!is_kernel_event(event))
4691 perf_remove_from_owner(event);
4692
4693 ctx = perf_event_ctx_lock(event);
4694 WARN_ON_ONCE(ctx->parent_ctx);
4695 perf_remove_from_context(event, DETACH_GROUP);
4696
4697 raw_spin_lock_irq(&ctx->lock);
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709 event->state = PERF_EVENT_STATE_DEAD;
4710 raw_spin_unlock_irq(&ctx->lock);
4711
4712 perf_event_ctx_unlock(event, ctx);
4713
4714 again:
4715 mutex_lock(&event->child_mutex);
4716 list_for_each_entry(child, &event->child_list, child_list) {
4717
4718
4719
4720
4721
4722 ctx = READ_ONCE(child->ctx);
4723
4724
4725
4726
4727
4728
4729
4730
4731 get_ctx(ctx);
4732
4733
4734
4735
4736
4737
4738 mutex_unlock(&event->child_mutex);
4739 mutex_lock(&ctx->mutex);
4740 mutex_lock(&event->child_mutex);
4741
4742
4743
4744
4745
4746
4747 tmp = list_first_entry_or_null(&event->child_list,
4748 struct perf_event, child_list);
4749 if (tmp == child) {
4750 perf_remove_from_context(child, DETACH_GROUP);
4751 list_move(&child->child_list, &free_list);
4752
4753
4754
4755
4756 put_event(event);
4757 }
4758
4759 mutex_unlock(&event->child_mutex);
4760 mutex_unlock(&ctx->mutex);
4761 put_ctx(ctx);
4762 goto again;
4763 }
4764 mutex_unlock(&event->child_mutex);
4765
4766 list_for_each_entry_safe(child, tmp, &free_list, child_list) {
4767 void *var = &child->ctx->refcount;
4768
4769 list_del(&child->child_list);
4770 free_event(child);
4771
4772
4773
4774
4775
4776 smp_mb();
4777 wake_up_var(var);
4778 }
4779
4780 no_ctx:
4781 put_event(event);
4782 return 0;
4783 }
4784 EXPORT_SYMBOL_GPL(perf_event_release_kernel);
4785
4786
4787
4788
4789 static int perf_release(struct inode *inode, struct file *file)
4790 {
4791 perf_event_release_kernel(file->private_data);
4792 return 0;
4793 }
4794
4795 static u64 __perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
4796 {
4797 struct perf_event *child;
4798 u64 total = 0;
4799
4800 *enabled = 0;
4801 *running = 0;
4802
4803 mutex_lock(&event->child_mutex);
4804
4805 (void)perf_event_read(event, false);
4806 total += perf_event_count(event);
4807
4808 *enabled += event->total_time_enabled +
4809 atomic64_read(&event->child_total_time_enabled);
4810 *running += event->total_time_running +
4811 atomic64_read(&event->child_total_time_running);
4812
4813 list_for_each_entry(child, &event->child_list, child_list) {
4814 (void)perf_event_read(child, false);
4815 total += perf_event_count(child);
4816 *enabled += child->total_time_enabled;
4817 *running += child->total_time_running;
4818 }
4819 mutex_unlock(&event->child_mutex);
4820
4821 return total;
4822 }
4823
4824 u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
4825 {
4826 struct perf_event_context *ctx;
4827 u64 count;
4828
4829 ctx = perf_event_ctx_lock(event);
4830 count = __perf_event_read_value(event, enabled, running);
4831 perf_event_ctx_unlock(event, ctx);
4832
4833 return count;
4834 }
4835 EXPORT_SYMBOL_GPL(perf_event_read_value);
4836
4837 static int __perf_read_group_add(struct perf_event *leader,
4838 u64 read_format, u64 *values)
4839 {
4840 struct perf_event_context *ctx = leader->ctx;
4841 struct perf_event *sub;
4842 unsigned long flags;
4843 int n = 1;
4844 int ret;
4845
4846 ret = perf_event_read(leader, true);
4847 if (ret)
4848 return ret;
4849
4850 raw_spin_lock_irqsave(&ctx->lock, flags);
4851
4852
4853
4854
4855
4856
4857 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
4858 values[n++] += leader->total_time_enabled +
4859 atomic64_read(&leader->child_total_time_enabled);
4860 }
4861
4862 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
4863 values[n++] += leader->total_time_running +
4864 atomic64_read(&leader->child_total_time_running);
4865 }
4866
4867
4868
4869
4870 values[n++] += perf_event_count(leader);
4871 if (read_format & PERF_FORMAT_ID)
4872 values[n++] = primary_event_id(leader);
4873
4874 for_each_sibling_event(sub, leader) {
4875 values[n++] += perf_event_count(sub);
4876 if (read_format & PERF_FORMAT_ID)
4877 values[n++] = primary_event_id(sub);
4878 }
4879
4880 raw_spin_unlock_irqrestore(&ctx->lock, flags);
4881 return 0;
4882 }
4883
4884 static int perf_read_group(struct perf_event *event,
4885 u64 read_format, char __user *buf)
4886 {
4887 struct perf_event *leader = event->group_leader, *child;
4888 struct perf_event_context *ctx = leader->ctx;
4889 int ret;
4890 u64 *values;
4891
4892 lockdep_assert_held(&ctx->mutex);
4893
4894 values = kzalloc(event->read_size, GFP_KERNEL);
4895 if (!values)
4896 return -ENOMEM;
4897
4898 values[0] = 1 + leader->nr_siblings;
4899
4900
4901
4902
4903
4904 mutex_lock(&leader->child_mutex);
4905
4906 ret = __perf_read_group_add(leader, read_format, values);
4907 if (ret)
4908 goto unlock;
4909
4910 list_for_each_entry(child, &leader->child_list, child_list) {
4911 ret = __perf_read_group_add(child, read_format, values);
4912 if (ret)
4913 goto unlock;
4914 }
4915
4916 mutex_unlock(&leader->child_mutex);
4917
4918 ret = event->read_size;
4919 if (copy_to_user(buf, values, event->read_size))
4920 ret = -EFAULT;
4921 goto out;
4922
4923 unlock:
4924 mutex_unlock(&leader->child_mutex);
4925 out:
4926 kfree(values);
4927 return ret;
4928 }
4929
4930 static int perf_read_one(struct perf_event *event,
4931 u64 read_format, char __user *buf)
4932 {
4933 u64 enabled, running;
4934 u64 values[4];
4935 int n = 0;
4936
4937 values[n++] = __perf_event_read_value(event, &enabled, &running);
4938 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
4939 values[n++] = enabled;
4940 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
4941 values[n++] = running;
4942 if (read_format & PERF_FORMAT_ID)
4943 values[n++] = primary_event_id(event);
4944
4945 if (copy_to_user(buf, values, n * sizeof(u64)))
4946 return -EFAULT;
4947
4948 return n * sizeof(u64);
4949 }
4950
4951 static bool is_event_hup(struct perf_event *event)
4952 {
4953 bool no_children;
4954
4955 if (event->state > PERF_EVENT_STATE_EXIT)
4956 return false;
4957
4958 mutex_lock(&event->child_mutex);
4959 no_children = list_empty(&event->child_list);
4960 mutex_unlock(&event->child_mutex);
4961 return no_children;
4962 }
4963
4964
4965
4966
4967 static ssize_t
4968 __perf_read(struct perf_event *event, char __user *buf, size_t count)
4969 {
4970 u64 read_format = event->attr.read_format;
4971 int ret;
4972
4973
4974
4975
4976
4977
4978 if (event->state == PERF_EVENT_STATE_ERROR)
4979 return 0;
4980
4981 if (count < event->read_size)
4982 return -ENOSPC;
4983
4984 WARN_ON_ONCE(event->ctx->parent_ctx);
4985 if (read_format & PERF_FORMAT_GROUP)
4986 ret = perf_read_group(event, read_format, buf);
4987 else
4988 ret = perf_read_one(event, read_format, buf);
4989
4990 return ret;
4991 }
4992
4993 static ssize_t
4994 perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
4995 {
4996 struct perf_event *event = file->private_data;
4997 struct perf_event_context *ctx;
4998 int ret;
4999
5000 ctx = perf_event_ctx_lock(event);
5001 ret = __perf_read(event, buf, count);
5002 perf_event_ctx_unlock(event, ctx);
5003
5004 return ret;
5005 }
5006
5007 static __poll_t perf_poll(struct file *file, poll_table *wait)
5008 {
5009 struct perf_event *event = file->private_data;
5010 struct ring_buffer *rb;
5011 __poll_t events = EPOLLHUP;
5012
5013 poll_wait(file, &event->waitq, wait);
5014
5015 if (is_event_hup(event))
5016 return events;
5017
5018
5019
5020
5021
5022 mutex_lock(&event->mmap_mutex);
5023 rb = event->rb;
5024 if (rb)
5025 events = atomic_xchg(&rb->poll, 0);
5026 mutex_unlock(&event->mmap_mutex);
5027 return events;
5028 }
5029
5030 static void _perf_event_reset(struct perf_event *event)
5031 {
5032 (void)perf_event_read(event, false);
5033 local64_set(&event->count, 0);
5034 perf_event_update_userpage(event);
5035 }
5036
5037
5038
5039
5040
5041
5042
5043 static void perf_event_for_each_child(struct perf_event *event,
5044 void (*func)(struct perf_event *))
5045 {
5046 struct perf_event *child;
5047
5048 WARN_ON_ONCE(event->ctx->parent_ctx);
5049
5050 mutex_lock(&event->child_mutex);
5051 func(event);
5052 list_for_each_entry(child, &event->child_list, child_list)
5053 func(child);
5054 mutex_unlock(&event->child_mutex);
5055 }
5056
5057 static void perf_event_for_each(struct perf_event *event,
5058 void (*func)(struct perf_event *))
5059 {
5060 struct perf_event_context *ctx = event->ctx;
5061 struct perf_event *sibling;
5062
5063 lockdep_assert_held(&ctx->mutex);
5064
5065 event = event->group_leader;
5066
5067 perf_event_for_each_child(event, func);
5068 for_each_sibling_event(sibling, event)
5069 perf_event_for_each_child(sibling, func);
5070 }
5071
5072 static void __perf_event_period(struct perf_event *event,
5073 struct perf_cpu_context *cpuctx,
5074 struct perf_event_context *ctx,
5075 void *info)
5076 {
5077 u64 value = *((u64 *)info);
5078 bool active;
5079
5080 if (event->attr.freq) {
5081 event->attr.sample_freq = value;
5082 } else {
5083 event->attr.sample_period = value;
5084 event->hw.sample_period = value;
5085 }
5086
5087 active = (event->state == PERF_EVENT_STATE_ACTIVE);
5088 if (active) {
5089 perf_pmu_disable(ctx->pmu);
5090
5091
5092
5093
5094 if (event->hw.interrupts == MAX_INTERRUPTS) {
5095 event->hw.interrupts = 0;
5096 perf_log_throttle(event, 1);
5097 }
5098 event->pmu->stop(event, PERF_EF_UPDATE);
5099 }
5100
5101 local64_set(&event->hw.period_left, 0);
5102
5103 if (active) {
5104 event->pmu->start(event, PERF_EF_RELOAD);
5105 perf_pmu_enable(ctx->pmu);
5106 }
5107 }
5108
5109 static int perf_event_check_period(struct perf_event *event, u64 value)
5110 {
5111 return event->pmu->check_period(event, value);
5112 }
5113
5114 static int perf_event_period(struct perf_event *event, u64 __user *arg)
5115 {
5116 u64 value;
5117
5118 if (!is_sampling_event(event))
5119 return -EINVAL;
5120
5121 if (copy_from_user(&value, arg, sizeof(value)))
5122 return -EFAULT;
5123
5124 if (!value)
5125 return -EINVAL;
5126
5127 if (event->attr.freq && value > sysctl_perf_event_sample_rate)
5128 return -EINVAL;
5129
5130 if (perf_event_check_period(event, value))
5131 return -EINVAL;
5132
5133 if (!event->attr.freq && (value & (1ULL << 63)))
5134 return -EINVAL;
5135
5136 event_function_call(event, __perf_event_period, &value);
5137
5138 return 0;
5139 }
5140
5141 static const struct file_operations perf_fops;
5142
5143 static inline int perf_fget_light(int fd, struct fd *p)
5144 {
5145 struct fd f = fdget(fd);
5146 if (!f.file)
5147 return -EBADF;
5148
5149 if (f.file->f_op != &perf_fops) {
5150 fdput(f);
5151 return -EBADF;
5152 }
5153 *p = f;
5154 return 0;
5155 }
5156
5157 static int perf_event_set_output(struct perf_event *event,
5158 struct perf_event *output_event);
5159 static int perf_event_set_filter(struct perf_event *event, void __user *arg);
5160 static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd);
5161 static int perf_copy_attr(struct perf_event_attr __user *uattr,
5162 struct perf_event_attr *attr);
5163
5164 static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg)
5165 {
5166 void (*func)(struct perf_event *);
5167 u32 flags = arg;
5168
5169 switch (cmd) {
5170 case PERF_EVENT_IOC_ENABLE:
5171 func = _perf_event_enable;
5172 break;
5173 case PERF_EVENT_IOC_DISABLE:
5174 func = _perf_event_disable;
5175 break;
5176 case PERF_EVENT_IOC_RESET:
5177 func = _perf_event_reset;
5178 break;
5179
5180 case PERF_EVENT_IOC_REFRESH:
5181 return _perf_event_refresh(event, arg);
5182
5183 case PERF_EVENT_IOC_PERIOD:
5184 return perf_event_period(event, (u64 __user *)arg);
5185
5186 case PERF_EVENT_IOC_ID:
5187 {
5188 u64 id = primary_event_id(event);
5189
5190 if (copy_to_user((void __user *)arg, &id, sizeof(id)))
5191 return -EFAULT;
5192 return 0;
5193 }
5194
5195 case PERF_EVENT_IOC_SET_OUTPUT:
5196 {
5197 int ret;
5198 if (arg != -1) {
5199 struct perf_event *output_event;
5200 struct fd output;
5201 ret = perf_fget_light(arg, &output);
5202 if (ret)
5203 return ret;
5204 output_event = output.file->private_data;
5205 ret = perf_event_set_output(event, output_event);
5206 fdput(output);
5207 } else {
5208 ret = perf_event_set_output(event, NULL);
5209 }
5210 return ret;
5211 }
5212
5213 case PERF_EVENT_IOC_SET_FILTER:
5214 return perf_event_set_filter(event, (void __user *)arg);
5215
5216 case PERF_EVENT_IOC_SET_BPF:
5217 return perf_event_set_bpf_prog(event, arg);
5218
5219 case PERF_EVENT_IOC_PAUSE_OUTPUT: {
5220 struct ring_buffer *rb;
5221
5222 rcu_read_lock();
5223 rb = rcu_dereference(event->rb);
5224 if (!rb || !rb->nr_pages) {
5225 rcu_read_unlock();
5226 return -EINVAL;
5227 }
5228 rb_toggle_paused(rb, !!arg);
5229 rcu_read_unlock();
5230 return 0;
5231 }
5232
5233 case PERF_EVENT_IOC_QUERY_BPF:
5234 return perf_event_query_prog_array(event, (void __user *)arg);
5235
5236 case PERF_EVENT_IOC_MODIFY_ATTRIBUTES: {
5237 struct perf_event_attr new_attr;
5238 int err = perf_copy_attr((struct perf_event_attr __user *)arg,
5239 &new_attr);
5240
5241 if (err)
5242 return err;
5243
5244 return perf_event_modify_attr(event, &new_attr);
5245 }
5246 default:
5247 return -ENOTTY;
5248 }
5249
5250 if (flags & PERF_IOC_FLAG_GROUP)
5251 perf_event_for_each(event, func);
5252 else
5253 perf_event_for_each_child(event, func);
5254
5255 return 0;
5256 }
5257
5258 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
5259 {
5260 struct perf_event *event = file->private_data;
5261 struct perf_event_context *ctx;
5262 long ret;
5263
5264 ctx = perf_event_ctx_lock(event);
5265 ret = _perf_ioctl(event, cmd, arg);
5266 perf_event_ctx_unlock(event, ctx);
5267
5268 return ret;
5269 }
5270
5271 #ifdef CONFIG_COMPAT
5272 static long perf_compat_ioctl(struct file *file, unsigned int cmd,
5273 unsigned long arg)
5274 {
5275 switch (_IOC_NR(cmd)) {
5276 case _IOC_NR(PERF_EVENT_IOC_SET_FILTER):
5277 case _IOC_NR(PERF_EVENT_IOC_ID):
5278 case _IOC_NR(PERF_EVENT_IOC_QUERY_BPF):
5279 case _IOC_NR(PERF_EVENT_IOC_MODIFY_ATTRIBUTES):
5280
5281 if (_IOC_SIZE(cmd) == sizeof(compat_uptr_t)) {
5282 cmd &= ~IOCSIZE_MASK;
5283 cmd |= sizeof(void *) << IOCSIZE_SHIFT;
5284 }
5285 break;
5286 }
5287 return perf_ioctl(file, cmd, arg);
5288 }
5289 #else
5290 # define perf_compat_ioctl NULL
5291 #endif
5292
5293 int perf_event_task_enable(void)
5294 {
5295 struct perf_event_context *ctx;
5296 struct perf_event *event;
5297
5298 mutex_lock(¤t->perf_event_mutex);
5299 list_for_each_entry(event, ¤t->perf_event_list, owner_entry) {
5300 ctx = perf_event_ctx_lock(event);
5301 perf_event_for_each_child(event, _perf_event_enable);
5302 perf_event_ctx_unlock(event, ctx);
5303 }
5304 mutex_unlock(¤t->perf_event_mutex);
5305
5306 return 0;
5307 }
5308
5309 int perf_event_task_disable(void)
5310 {
5311 struct perf_event_context *ctx;
5312 struct perf_event *event;
5313
5314 mutex_lock(¤t->perf_event_mutex);
5315 list_for_each_entry(event, ¤t->perf_event_list, owner_entry) {
5316 ctx = perf_event_ctx_lock(event);
5317 perf_event_for_each_child(event, _perf_event_disable);
5318 perf_event_ctx_unlock(event, ctx);
5319 }
5320 mutex_unlock(¤t->perf_event_mutex);
5321
5322 return 0;
5323 }
5324
5325 static int perf_event_index(struct perf_event *event)
5326 {
5327 if (event->hw.state & PERF_HES_STOPPED)
5328 return 0;
5329
5330 if (event->state != PERF_EVENT_STATE_ACTIVE)
5331 return 0;
5332
5333 return event->pmu->event_idx(event);
5334 }
5335
5336 static void calc_timer_values(struct perf_event *event,
5337 u64 *now,
5338 u64 *enabled,
5339 u64 *running)
5340 {
5341 u64 ctx_time;
5342
5343 *now = perf_clock();
5344 ctx_time = event->shadow_ctx_time + *now;
5345 __perf_update_times(event, ctx_time, enabled, running);
5346 }
5347
5348 static void perf_event_init_userpage(struct perf_event *event)
5349 {
5350 struct perf_event_mmap_page *userpg;
5351 struct ring_buffer *rb;
5352
5353 rcu_read_lock();
5354 rb = rcu_dereference(event->rb);
5355 if (!rb)
5356 goto unlock;
5357
5358 userpg = rb->user_page;
5359
5360
5361 userpg->cap_bit0_is_deprecated = 1;
5362 userpg->size = offsetof(struct perf_event_mmap_page, __reserved);
5363 userpg->data_offset = PAGE_SIZE;
5364 userpg->data_size = perf_data_size(rb);
5365
5366 unlock:
5367 rcu_read_unlock();
5368 }
5369
5370 void __weak arch_perf_update_userpage(
5371 struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now)
5372 {
5373 }
5374
5375
5376
5377
5378
5379
5380 void perf_event_update_userpage(struct perf_event *event)
5381 {
5382 struct perf_event_mmap_page *userpg;
5383 struct ring_buffer *rb;
5384 u64 enabled, running, now;
5385
5386 rcu_read_lock();
5387 rb = rcu_dereference(event->rb);
5388 if (!rb)
5389 goto unlock;
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400 calc_timer_values(event, &now, &enabled, &running);
5401
5402 userpg = rb->user_page;
5403
5404
5405
5406
5407 preempt_disable();
5408 ++userpg->lock;
5409 barrier();
5410 userpg->index = perf_event_index(event);
5411 userpg->offset = perf_event_count(event);
5412 if (userpg->index)
5413 userpg->offset -= local64_read(&event->hw.prev_count);
5414
5415 userpg->time_enabled = enabled +
5416 atomic64_read(&event->child_total_time_enabled);
5417
5418 userpg->time_running = running +
5419 atomic64_read(&event->child_total_time_running);
5420
5421 arch_perf_update_userpage(event, userpg, now);
5422
5423 barrier();
5424 ++userpg->lock;
5425 preempt_enable();
5426 unlock:
5427 rcu_read_unlock();
5428 }
5429 EXPORT_SYMBOL_GPL(perf_event_update_userpage);
5430
5431 static vm_fault_t perf_mmap_fault(struct vm_fault *vmf)
5432 {
5433 struct perf_event *event = vmf->vma->vm_file->private_data;
5434 struct ring_buffer *rb;
5435 vm_fault_t ret = VM_FAULT_SIGBUS;
5436
5437 if (vmf->flags & FAULT_FLAG_MKWRITE) {
5438 if (vmf->pgoff == 0)
5439 ret = 0;
5440 return ret;
5441 }
5442
5443 rcu_read_lock();
5444 rb = rcu_dereference(event->rb);
5445 if (!rb)
5446 goto unlock;
5447
5448 if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))
5449 goto unlock;
5450
5451 vmf->page = perf_mmap_to_page(rb, vmf->pgoff);
5452 if (!vmf->page)
5453 goto unlock;
5454
5455 get_page(vmf->page);
5456 vmf->page->mapping = vmf->vma->vm_file->f_mapping;
5457 vmf->page->index = vmf->pgoff;
5458
5459 ret = 0;
5460 unlock:
5461 rcu_read_unlock();
5462
5463 return ret;
5464 }
5465
5466 static void ring_buffer_attach(struct perf_event *event,
5467 struct ring_buffer *rb)
5468 {
5469 struct ring_buffer *old_rb = NULL;
5470 unsigned long flags;
5471
5472 if (event->rb) {
5473
5474
5475
5476
5477 WARN_ON_ONCE(event->rcu_pending);
5478
5479 old_rb = event->rb;
5480 spin_lock_irqsave(&old_rb->event_lock, flags);
5481 list_del_rcu(&event->rb_entry);
5482 spin_unlock_irqrestore(&old_rb->event_lock, flags);
5483
5484 event->rcu_batches = get_state_synchronize_rcu();
5485 event->rcu_pending = 1;
5486 }
5487
5488 if (rb) {
5489 if (event->rcu_pending) {
5490 cond_synchronize_rcu(event->rcu_batches);
5491 event->rcu_pending = 0;
5492 }
5493
5494 spin_lock_irqsave(&rb->event_lock, flags);
5495 list_add_rcu(&event->rb_entry, &rb->event_list);
5496 spin_unlock_irqrestore(&rb->event_lock, flags);
5497 }
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509 if (has_aux(event))
5510 perf_event_stop(event, 0);
5511
5512 rcu_assign_pointer(event->rb, rb);
5513
5514 if (old_rb) {
5515 ring_buffer_put(old_rb);
5516
5517
5518
5519
5520
5521 wake_up_all(&event->waitq);
5522 }
5523 }
5524
5525 static void ring_buffer_wakeup(struct perf_event *event)
5526 {
5527 struct ring_buffer *rb;
5528
5529 rcu_read_lock();
5530 rb = rcu_dereference(event->rb);
5531 if (rb) {
5532 list_for_each_entry_rcu(event, &rb->event_list, rb_entry)
5533 wake_up_all(&event->waitq);
5534 }
5535 rcu_read_unlock();
5536 }
5537
5538 struct ring_buffer *ring_buffer_get(struct perf_event *event)
5539 {
5540 struct ring_buffer *rb;
5541
5542 rcu_read_lock();
5543 rb = rcu_dereference(event->rb);
5544 if (rb) {
5545 if (!refcount_inc_not_zero(&rb->refcount))
5546 rb = NULL;
5547 }
5548 rcu_read_unlock();
5549
5550 return rb;
5551 }
5552
5553 void ring_buffer_put(struct ring_buffer *rb)
5554 {
5555 if (!refcount_dec_and_test(&rb->refcount))
5556 return;
5557
5558 WARN_ON_ONCE(!list_empty(&rb->event_list));
5559
5560 call_rcu(&rb->rcu_head, rb_free_rcu);
5561 }
5562
5563 static void perf_mmap_open(struct vm_area_struct *vma)
5564 {
5565 struct perf_event *event = vma->vm_file->private_data;
5566
5567 atomic_inc(&event->mmap_count);
5568 atomic_inc(&event->rb->mmap_count);
5569
5570 if (vma->vm_pgoff)
5571 atomic_inc(&event->rb->aux_mmap_count);
5572
5573 if (event->pmu->event_mapped)
5574 event->pmu->event_mapped(event, vma->vm_mm);
5575 }
5576
5577 static void perf_pmu_output_stop(struct perf_event *event);
5578
5579
5580
5581
5582
5583
5584
5585
5586
5587 static void perf_mmap_close(struct vm_area_struct *vma)
5588 {
5589 struct perf_event *event = vma->vm_file->private_data;
5590
5591 struct ring_buffer *rb = ring_buffer_get(event);
5592 struct user_struct *mmap_user = rb->mmap_user;
5593 int mmap_locked = rb->mmap_locked;
5594 unsigned long size = perf_data_size(rb);
5595
5596 if (event->pmu->event_unmapped)
5597 event->pmu->event_unmapped(event, vma->vm_mm);
5598
5599
5600
5601
5602
5603
5604 if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff &&
5605 atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) {
5606
5607
5608
5609
5610
5611
5612 perf_pmu_output_stop(event);
5613
5614
5615 atomic_long_sub(rb->aux_nr_pages - rb->aux_mmap_locked, &mmap_user->locked_vm);
5616 atomic64_sub(rb->aux_mmap_locked, &vma->vm_mm->pinned_vm);
5617
5618
5619 rb_free_aux(rb);
5620 WARN_ON_ONCE(refcount_read(&rb->aux_refcount));
5621
5622 mutex_unlock(&event->mmap_mutex);
5623 }
5624
5625 atomic_dec(&rb->mmap_count);
5626
5627 if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
5628 goto out_put;
5629
5630 ring_buffer_attach(event, NULL);
5631 mutex_unlock(&event->mmap_mutex);
5632
5633
5634 if (atomic_read(&rb->mmap_count))
5635 goto out_put;
5636
5637
5638
5639
5640
5641
5642 again:
5643 rcu_read_lock();
5644 list_for_each_entry_rcu(event, &rb->event_list, rb_entry) {
5645 if (!atomic_long_inc_not_zero(&event->refcount)) {
5646
5647
5648
5649
5650 continue;
5651 }
5652 rcu_read_unlock();
5653
5654 mutex_lock(&event->mmap_mutex);
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665 if (event->rb == rb)
5666 ring_buffer_attach(event, NULL);
5667
5668 mutex_unlock(&event->mmap_mutex);
5669 put_event(event);
5670
5671
5672
5673
5674
5675 goto again;
5676 }
5677 rcu_read_unlock();
5678
5679
5680
5681
5682
5683
5684
5685
5686
5687
5688 atomic_long_sub((size >> PAGE_SHIFT) + 1 - mmap_locked,
5689 &mmap_user->locked_vm);
5690 atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm);
5691 free_uid(mmap_user);
5692
5693 out_put:
5694 ring_buffer_put(rb);
5695 }
5696
5697 static const struct vm_operations_struct perf_mmap_vmops = {
5698 .open = perf_mmap_open,
5699 .close = perf_mmap_close,
5700 .fault = perf_mmap_fault,
5701 .page_mkwrite = perf_mmap_fault,
5702 };
5703
5704 static int perf_mmap(struct file *file, struct vm_area_struct *vma)
5705 {
5706 struct perf_event *event = file->private_data;
5707 unsigned long user_locked, user_lock_limit;
5708 struct user_struct *user = current_user();
5709 unsigned long locked, lock_limit;
5710 struct ring_buffer *rb = NULL;
5711 unsigned long vma_size;
5712 unsigned long nr_pages;
5713 long user_extra = 0, extra = 0;
5714 int ret = 0, flags = 0;
5715
5716
5717
5718
5719
5720
5721 if (event->cpu == -1 && event->attr.inherit)
5722 return -EINVAL;
5723
5724 if (!(vma->vm_flags & VM_SHARED))
5725 return -EINVAL;
5726
5727 vma_size = vma->vm_end - vma->vm_start;
5728
5729 if (vma->vm_pgoff == 0) {
5730 nr_pages = (vma_size / PAGE_SIZE) - 1;
5731 } else {
5732
5733
5734
5735
5736
5737 u64 aux_offset, aux_size;
5738
5739 if (!event->rb)
5740 return -EINVAL;
5741
5742 nr_pages = vma_size / PAGE_SIZE;
5743
5744 mutex_lock(&event->mmap_mutex);
5745 ret = -EINVAL;
5746
5747 rb = event->rb;
5748 if (!rb)
5749 goto aux_unlock;
5750
5751 aux_offset = READ_ONCE(rb->user_page->aux_offset);
5752 aux_size = READ_ONCE(rb->user_page->aux_size);
5753
5754 if (aux_offset < perf_data_size(rb) + PAGE_SIZE)
5755 goto aux_unlock;
5756
5757 if (aux_offset != vma->vm_pgoff << PAGE_SHIFT)
5758 goto aux_unlock;
5759
5760
5761 if (rb_has_aux(rb) && rb->aux_pgoff != vma->vm_pgoff)
5762 goto aux_unlock;
5763
5764 if (aux_size != vma_size || aux_size != nr_pages * PAGE_SIZE)
5765 goto aux_unlock;
5766
5767
5768 if (rb_has_aux(rb) && rb->aux_nr_pages != nr_pages)
5769 goto aux_unlock;
5770
5771 if (!is_power_of_2(nr_pages))
5772 goto aux_unlock;
5773
5774 if (!atomic_inc_not_zero(&rb->mmap_count))
5775 goto aux_unlock;
5776
5777 if (rb_has_aux(rb)) {
5778 atomic_inc(&rb->aux_mmap_count);
5779 ret = 0;
5780 goto unlock;
5781 }
5782
5783 atomic_set(&rb->aux_mmap_count, 1);
5784 user_extra = nr_pages;
5785
5786 goto accounting;
5787 }
5788
5789
5790
5791
5792
5793 if (nr_pages != 0 && !is_power_of_2(nr_pages))
5794 return -EINVAL;
5795
5796 if (vma_size != PAGE_SIZE * (1 + nr_pages))
5797 return -EINVAL;
5798
5799 WARN_ON_ONCE(event->ctx->parent_ctx);
5800 again:
5801 mutex_lock(&event->mmap_mutex);
5802 if (event->rb) {
5803 if (event->rb->nr_pages != nr_pages) {
5804 ret = -EINVAL;
5805 goto unlock;
5806 }
5807
5808 if (!atomic_inc_not_zero(&event->rb->mmap_count)) {
5809
5810
5811
5812
5813
5814 mutex_unlock(&event->mmap_mutex);
5815 goto again;
5816 }
5817
5818 goto unlock;
5819 }
5820
5821 user_extra = nr_pages + 1;
5822
5823 accounting:
5824 user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10);
5825
5826
5827
5828
5829 user_lock_limit *= num_online_cpus();
5830
5831 user_locked = atomic_long_read(&user->locked_vm);
5832
5833
5834
5835
5836
5837 if (user_locked > user_lock_limit)
5838 user_locked = user_lock_limit;
5839 user_locked += user_extra;
5840
5841 if (user_locked <= user_lock_limit) {
5842
5843 } else if (atomic_long_read(&user->locked_vm) >= user_lock_limit) {
5844
5845 extra = user_extra;
5846 user_extra = 0;
5847 } else {
5848
5849
5850
5851
5852 extra = user_locked - user_lock_limit;
5853 user_extra -= extra;
5854 }
5855
5856 lock_limit = rlimit(RLIMIT_MEMLOCK);
5857 lock_limit >>= PAGE_SHIFT;
5858 locked = atomic64_read(&vma->vm_mm->pinned_vm) + extra;
5859
5860 if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
5861 !capable(CAP_IPC_LOCK)) {
5862 ret = -EPERM;
5863 goto unlock;
5864 }
5865
5866 WARN_ON(!rb && event->rb);
5867
5868 if (vma->vm_flags & VM_WRITE)
5869 flags |= RING_BUFFER_WRITABLE;
5870
5871 if (!rb) {
5872 rb = rb_alloc(nr_pages,
5873 event->attr.watermark ? event->attr.wakeup_watermark : 0,
5874 event->cpu, flags);
5875
5876 if (!rb) {
5877 ret = -ENOMEM;
5878 goto unlock;
5879 }
5880
5881 atomic_set(&rb->mmap_count, 1);
5882 rb->mmap_user = get_current_user();
5883 rb->mmap_locked = extra;
5884
5885 ring_buffer_attach(event, rb);
5886
5887 perf_event_init_userpage(event);
5888 perf_event_update_userpage(event);
5889 } else {
5890 ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages,
5891 event->attr.aux_watermark, flags);
5892 if (!ret)
5893 rb->aux_mmap_locked = extra;
5894 }
5895
5896 unlock:
5897 if (!ret) {
5898 atomic_long_add(user_extra, &user->locked_vm);
5899 atomic64_add(extra, &vma->vm_mm->pinned_vm);
5900
5901 atomic_inc(&event->mmap_count);
5902 } else if (rb) {
5903 atomic_dec(&rb->mmap_count);
5904 }
5905 aux_unlock:
5906 mutex_unlock(&event->mmap_mutex);
5907
5908
5909
5910
5911
5912 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
5913 vma->vm_ops = &perf_mmap_vmops;
5914
5915 if (event->pmu->event_mapped)
5916 event->pmu->event_mapped(event, vma->vm_mm);
5917
5918 return ret;
5919 }
5920
5921 static int perf_fasync(int fd, struct file *filp, int on)
5922 {
5923 struct inode *inode = file_inode(filp);
5924 struct perf_event *event = filp->private_data;
5925 int retval;
5926
5927 inode_lock(inode);
5928 retval = fasync_helper(fd, filp, on, &event->fasync);
5929 inode_unlock(inode);
5930
5931 if (retval < 0)
5932 return retval;
5933
5934 return 0;
5935 }
5936
5937 static const struct file_operations perf_fops = {
5938 .llseek = no_llseek,
5939 .release = perf_release,
5940 .read = perf_read,
5941 .poll = perf_poll,
5942 .unlocked_ioctl = perf_ioctl,
5943 .compat_ioctl = perf_compat_ioctl,
5944 .mmap = perf_mmap,
5945 .fasync = perf_fasync,
5946 };
5947
5948
5949
5950
5951
5952
5953
5954
5955 static inline struct fasync_struct **perf_event_fasync(struct perf_event *event)
5956 {
5957
5958 if (event->parent)
5959 event = event->parent;
5960 return &event->fasync;
5961 }
5962
5963 void perf_event_wakeup(struct perf_event *event)
5964 {
5965 ring_buffer_wakeup(event);
5966
5967 if (event->pending_kill) {
5968 kill_fasync(perf_event_fasync(event), SIGIO, event->pending_kill);
5969 event->pending_kill = 0;
5970 }
5971 }
5972
5973 static void perf_pending_event_disable(struct perf_event *event)
5974 {
5975 int cpu = READ_ONCE(event->pending_disable);
5976
5977 if (cpu < 0)
5978 return;
5979
5980 if (cpu == smp_processor_id()) {
5981 WRITE_ONCE(event->pending_disable, -1);
5982 perf_event_disable_local(event);
5983 return;
5984 }
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006 irq_work_queue_on(&event->pending, cpu);
6007 }
6008
6009 static void perf_pending_event(struct irq_work *entry)
6010 {
6011 struct perf_event *event = container_of(entry, struct perf_event, pending);
6012 int rctx;
6013
6014 rctx = perf_swevent_get_recursion_context();
6015
6016
6017
6018
6019
6020 perf_pending_event_disable(event);
6021
6022 if (event->pending_wakeup) {
6023 event->pending_wakeup = 0;
6024 perf_event_wakeup(event);
6025 }
6026
6027 if (rctx >= 0)
6028 perf_swevent_put_recursion_context(rctx);
6029 }
6030
6031
6032
6033
6034
6035
6036 struct perf_guest_info_callbacks *perf_guest_cbs;
6037
6038 int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
6039 {
6040 perf_guest_cbs = cbs;
6041 return 0;
6042 }
6043 EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks);
6044
6045 int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
6046 {
6047 perf_guest_cbs = NULL;
6048 return 0;
6049 }
6050 EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
6051
6052 static void
6053 perf_output_sample_regs(struct perf_output_handle *handle,
6054 struct pt_regs *regs, u64 mask)
6055 {
6056 int bit;
6057 DECLARE_BITMAP(_mask, 64);
6058
6059 bitmap_from_u64(_mask, mask);
6060 for_each_set_bit(bit, _mask, sizeof(mask) * BITS_PER_BYTE) {
6061 u64 val;
6062
6063 val = perf_reg_value(regs, bit);
6064 perf_output_put(handle, val);
6065 }
6066 }
6067
6068 static void perf_sample_regs_user(struct perf_regs *regs_user,
6069 struct pt_regs *regs,
6070 struct pt_regs *regs_user_copy)
6071 {
6072 if (user_mode(regs)) {
6073 regs_user->abi = perf_reg_abi(current);
6074 regs_user->regs = regs;
6075 } else if (!(current->flags & PF_KTHREAD)) {
6076 perf_get_regs_user(regs_user, regs, regs_user_copy);
6077 } else {
6078 regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
6079 regs_user->regs = NULL;
6080 }
6081 }
6082
6083 static void perf_sample_regs_intr(struct perf_regs *regs_intr,
6084 struct pt_regs *regs)
6085 {
6086 regs_intr->regs = regs;
6087 regs_intr->abi = perf_reg_abi(current);
6088 }
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098 static u64 perf_ustack_task_size(struct pt_regs *regs)
6099 {
6100 unsigned long addr = perf_user_stack_pointer(regs);
6101
6102 if (!addr || addr >= TASK_SIZE)
6103 return 0;
6104
6105 return TASK_SIZE - addr;
6106 }
6107
6108 static u16
6109 perf_sample_ustack_size(u16 stack_size, u16 header_size,
6110 struct pt_regs *regs)
6111 {
6112 u64 task_size;
6113
6114
6115 if (!regs)
6116 return 0;
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128 task_size = min((u64) USHRT_MAX, perf_ustack_task_size(regs));
6129 stack_size = min(stack_size, (u16) task_size);
6130
6131
6132 header_size += 2 * sizeof(u64);
6133
6134
6135 if ((u16) (header_size + stack_size) < header_size) {
6136
6137
6138
6139
6140 stack_size = USHRT_MAX - header_size - sizeof(u64);
6141 stack_size = round_up(stack_size, sizeof(u64));
6142 }
6143
6144 return stack_size;
6145 }
6146
6147 static void
6148 perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size,
6149 struct pt_regs *regs)
6150 {
6151
6152 if (!regs) {
6153 u64 size = 0;
6154 perf_output_put(handle, size);
6155 } else {
6156 unsigned long sp;
6157 unsigned int rem;
6158 u64 dyn_size;
6159 mm_segment_t fs;
6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173 perf_output_put(handle, dump_size);
6174
6175
6176 sp = perf_user_stack_pointer(regs);
6177 fs = get_fs();
6178 set_fs(USER_DS);
6179 rem = __output_copy_user(handle, (void *) sp, dump_size);
6180 set_fs(fs);
6181 dyn_size = dump_size - rem;
6182
6183 perf_output_skip(handle, rem);
6184
6185
6186 perf_output_put(handle, dyn_size);
6187 }
6188 }
6189
6190 static void __perf_event_header__init_id(struct perf_event_header *header,
6191 struct perf_sample_data *data,
6192 struct perf_event *event)
6193 {
6194 u64 sample_type = event->attr.sample_type;
6195
6196 data->type = sample_type;
6197 header->size += event->id_header_size;
6198
6199 if (sample_type & PERF_SAMPLE_TID) {
6200
6201 data->tid_entry.pid = perf_event_pid(event, current);
6202 data->tid_entry.tid = perf_event_tid(event, current);
6203 }
6204
6205 if (sample_type & PERF_SAMPLE_TIME)
6206 data->time = perf_event_clock(event);
6207
6208 if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
6209 data->id = primary_event_id(event);
6210
6211 if (sample_type & PERF_SAMPLE_STREAM_ID)
6212 data->stream_id = event->id;
6213
6214 if (sample_type & PERF_SAMPLE_CPU) {
6215 data->cpu_entry.cpu = raw_smp_processor_id();
6216 data->cpu_entry.reserved = 0;
6217 }
6218 }
6219
6220 void perf_event_header__init_id(struct perf_event_header *header,
6221 struct perf_sample_data *data,
6222 struct perf_event *event)
6223 {
6224 if (event->attr.sample_id_all)
6225 __perf_event_header__init_id(header, data, event);
6226 }
6227
6228 static void __perf_event__output_id_sample(struct perf_output_handle *handle,
6229 struct perf_sample_data *data)
6230 {
6231 u64 sample_type = data->type;
6232
6233 if (sample_type & PERF_SAMPLE_TID)
6234 perf_output_put(handle, data->tid_entry);
6235
6236 if (sample_type & PERF_SAMPLE_TIME)
6237 perf_output_put(handle, data->time);
6238
6239 if (sample_type & PERF_SAMPLE_ID)
6240 perf_output_put(handle, data->id);
6241
6242 if (sample_type & PERF_SAMPLE_STREAM_ID)
6243 perf_output_put(handle, data->stream_id);
6244
6245 if (sample_type & PERF_SAMPLE_CPU)
6246 perf_output_put(handle, data->cpu_entry);
6247
6248 if (sample_type & PERF_SAMPLE_IDENTIFIER)
6249 perf_output_put(handle, data->id);
6250 }
6251
6252 void perf_event__output_id_sample(struct perf_event *event,
6253 struct perf_output_handle *handle,
6254 struct perf_sample_data *sample)
6255 {
6256 if (event->attr.sample_id_all)
6257 __perf_event__output_id_sample(handle, sample);
6258 }
6259
6260 static void perf_output_read_one(struct perf_output_handle *handle,
6261 struct perf_event *event,
6262 u64 enabled, u64 running)
6263 {
6264 u64 read_format = event->attr.read_format;
6265 u64 values[4];
6266 int n = 0;
6267
6268 values[n++] = perf_event_count(event);
6269 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
6270 values[n++] = enabled +
6271 atomic64_read(&event->child_total_time_enabled);
6272 }
6273 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
6274 values[n++] = running +
6275 atomic64_read(&event->child_total_time_running);
6276 }
6277 if (read_format & PERF_FORMAT_ID)
6278 values[n++] = primary_event_id(event);
6279
6280 __output_copy(handle, values, n * sizeof(u64));
6281 }
6282
6283 static void perf_output_read_group(struct perf_output_handle *handle,
6284 struct perf_event *event,
6285 u64 enabled, u64 running)
6286 {
6287 struct perf_event *leader = event->group_leader, *sub;
6288 u64 read_format = event->attr.read_format;
6289 u64 values[5];
6290 int n = 0;
6291
6292 values[n++] = 1 + leader->nr_siblings;
6293
6294 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
6295 values[n++] = enabled;
6296
6297 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
6298 values[n++] = running;
6299
6300 if ((leader != event) &&
6301 (leader->state == PERF_EVENT_STATE_ACTIVE))
6302 leader->pmu->read(leader);
6303
6304 values[n++] = perf_event_count(leader);
6305 if (read_format & PERF_FORMAT_ID)
6306 values[n++] = primary_event_id(leader);
6307
6308 __output_copy(handle, values, n * sizeof(u64));
6309
6310 for_each_sibling_event(sub, leader) {
6311 n = 0;
6312
6313 if ((sub != event) &&
6314 (sub->state == PERF_EVENT_STATE_ACTIVE))
6315 sub->pmu->read(sub);
6316
6317 values[n++] = perf_event_count(sub);
6318 if (read_format & PERF_FORMAT_ID)
6319 values[n++] = primary_event_id(sub);
6320
6321 __output_copy(handle, values, n * sizeof(u64));
6322 }
6323 }
6324
6325 #define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\
6326 PERF_FORMAT_TOTAL_TIME_RUNNING)
6327
6328
6329
6330
6331
6332
6333
6334
6335 static void perf_output_read(struct perf_output_handle *handle,
6336 struct perf_event *event)
6337 {
6338 u64 enabled = 0, running = 0, now;
6339 u64 read_format = event->attr.read_format;
6340
6341
6342
6343
6344
6345
6346
6347
6348
6349
6350 if (read_format & PERF_FORMAT_TOTAL_TIMES)
6351 calc_timer_values(event, &now, &enabled, &running);
6352
6353 if (event->attr.read_format & PERF_FORMAT_GROUP)
6354 perf_output_read_group(handle, event, enabled, running);
6355 else
6356 perf_output_read_one(handle, event, enabled, running);
6357 }
6358
6359 void perf_output_sample(struct perf_output_handle *handle,
6360 struct perf_event_header *header,
6361 struct perf_sample_data *data,
6362 struct perf_event *event)
6363 {
6364 u64 sample_type = data->type;
6365
6366 perf_output_put(handle, *header);
6367
6368 if (sample_type & PERF_SAMPLE_IDENTIFIER)
6369 perf_output_put(handle, data->id);
6370
6371 if (sample_type & PERF_SAMPLE_IP)
6372 perf_output_put(handle, data->ip);
6373
6374 if (sample_type & PERF_SAMPLE_TID)
6375 perf_output_put(handle, data->tid_entry);
6376
6377 if (sample_type & PERF_SAMPLE_TIME)
6378 perf_output_put(handle, data->time);
6379
6380 if (sample_type & PERF_SAMPLE_ADDR)
6381 perf_output_put(handle, data->addr);
6382
6383 if (sample_type & PERF_SAMPLE_ID)
6384 perf_output_put(handle, data->id);
6385
6386 if (sample_type & PERF_SAMPLE_STREAM_ID)
6387 perf_output_put(handle, data->stream_id);
6388
6389 if (sample_type & PERF_SAMPLE_CPU)
6390 perf_output_put(handle, data->cpu_entry);
6391
6392 if (sample_type & PERF_SAMPLE_PERIOD)
6393 perf_output_put(handle, data->period);
6394
6395 if (sample_type & PERF_SAMPLE_READ)
6396 perf_output_read(handle, event);
6397
6398 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
6399 int size = 1;
6400
6401 size += data->callchain->nr;
6402 size *= sizeof(u64);
6403 __output_copy(handle, data->callchain, size);
6404 }
6405
6406 if (sample_type & PERF_SAMPLE_RAW) {
6407 struct perf_raw_record *raw = data->raw;
6408
6409 if (raw) {
6410 struct perf_raw_frag *frag = &raw->frag;
6411
6412 perf_output_put(handle, raw->size);
6413 do {
6414 if (frag->copy) {
6415 __output_custom(handle, frag->copy,
6416 frag->data, frag->size);
6417 } else {
6418 __output_copy(handle, frag->data,
6419 frag->size);
6420 }
6421 if (perf_raw_frag_last(frag))
6422 break;
6423 frag = frag->next;
6424 } while (1);
6425 if (frag->pad)
6426 __output_skip(handle, NULL, frag->pad);
6427 } else {
6428 struct {
6429 u32 size;
6430 u32 data;
6431 } raw = {
6432 .size = sizeof(u32),
6433 .data = 0,
6434 };
6435 perf_output_put(handle, raw);
6436 }
6437 }
6438
6439 if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
6440 if (data->br_stack) {
6441 size_t size;
6442
6443 size = data->br_stack->nr
6444 * sizeof(struct perf_branch_entry);
6445
6446 perf_output_put(handle, data->br_stack->nr);
6447 perf_output_copy(handle, data->br_stack->entries, size);
6448 } else {
6449
6450
6451
6452 u64 nr = 0;
6453 perf_output_put(handle, nr);
6454 }
6455 }
6456
6457 if (sample_type & PERF_SAMPLE_REGS_USER) {
6458 u64 abi = data->regs_user.abi;
6459
6460
6461
6462
6463
6464 perf_output_put(handle, abi);
6465
6466 if (abi) {
6467 u64 mask = event->attr.sample_regs_user;
6468 perf_output_sample_regs(handle,
6469 data->regs_user.regs,
6470 mask);
6471 }
6472 }
6473
6474 if (sample_type & PERF_SAMPLE_STACK_USER) {
6475 perf_output_sample_ustack(handle,
6476 data->stack_user_size,
6477 data->regs_user.regs);
6478 }
6479
6480 if (sample_type & PERF_SAMPLE_WEIGHT)
6481 perf_output_put(handle, data->weight);
6482
6483 if (sample_type & PERF_SAMPLE_DATA_SRC)
6484 perf_output_put(handle, data->data_src.val);
6485
6486 if (sample_type & PERF_SAMPLE_TRANSACTION)
6487 perf_output_put(handle, data->txn);
6488
6489 if (sample_type & PERF_SAMPLE_REGS_INTR) {
6490 u64 abi = data->regs_intr.abi;
6491
6492
6493
6494
6495 perf_output_put(handle, abi);
6496
6497 if (abi) {
6498 u64 mask = event->attr.sample_regs_intr;
6499
6500 perf_output_sample_regs(handle,
6501 data->regs_intr.regs,
6502 mask);
6503 }
6504 }
6505
6506 if (sample_type & PERF_SAMPLE_PHYS_ADDR)
6507 perf_output_put(handle, data->phys_addr);
6508
6509 if (!event->attr.watermark) {
6510 int wakeup_events = event->attr.wakeup_events;
6511
6512 if (wakeup_events) {
6513 struct ring_buffer *rb = handle->rb;
6514 int events = local_inc_return(&rb->events);
6515
6516 if (events >= wakeup_events) {
6517 local_sub(wakeup_events, &rb->events);
6518 local_inc(&rb->wakeup);
6519 }
6520 }
6521 }
6522 }
6523
6524 static u64 perf_virt_to_phys(u64 virt)
6525 {
6526 u64 phys_addr = 0;
6527 struct page *p = NULL;
6528
6529 if (!virt)
6530 return 0;
6531
6532 if (virt >= TASK_SIZE) {
6533
6534 if (virt_addr_valid((void *)(uintptr_t)virt) &&
6535 !(virt >= VMALLOC_START && virt < VMALLOC_END))
6536 phys_addr = (u64)virt_to_phys((void *)(uintptr_t)virt);
6537 } else {
6538
6539
6540
6541
6542
6543
6544
6545 if (current->mm != NULL) {
6546 pagefault_disable();
6547 if (__get_user_pages_fast(virt, 1, 0, &p) == 1)
6548 phys_addr = page_to_phys(p) + virt % PAGE_SIZE;
6549 pagefault_enable();
6550 }
6551
6552 if (p)
6553 put_page(p);
6554 }
6555
6556 return phys_addr;
6557 }
6558
6559 static struct perf_callchain_entry __empty_callchain = { .nr = 0, };
6560
6561 struct perf_callchain_entry *
6562 perf_callchain(struct perf_event *event, struct pt_regs *regs)
6563 {
6564 bool kernel = !event->attr.exclude_callchain_kernel;
6565 bool user = !event->attr.exclude_callchain_user;
6566
6567 bool crosstask = event->ctx->task && event->ctx->task != current;
6568 const u32 max_stack = event->attr.sample_max_stack;
6569 struct perf_callchain_entry *callchain;
6570
6571 if (!kernel && !user)
6572 return &__empty_callchain;
6573
6574 callchain = get_perf_callchain(regs, 0, kernel, user,
6575 max_stack, crosstask, true);
6576 return callchain ?: &__empty_callchain;
6577 }
6578
6579 void perf_prepare_sample(struct perf_event_header *header,
6580 struct perf_sample_data *data,
6581 struct perf_event *event,
6582 struct pt_regs *regs)
6583 {
6584 u64 sample_type = event->attr.sample_type;
6585
6586 header->type = PERF_RECORD_SAMPLE;
6587 header->size = sizeof(*header) + event->header_size;
6588
6589 header->misc = 0;
6590 header->misc |= perf_misc_flags(regs);
6591
6592 __perf_event_header__init_id(header, data, event);
6593
6594 if (sample_type & PERF_SAMPLE_IP)
6595 data->ip = perf_instruction_pointer(regs);
6596
6597 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
6598 int size = 1;
6599
6600 if (!(sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
6601 data->callchain = perf_callchain(event, regs);
6602
6603 size += data->callchain->nr;
6604
6605 header->size += size * sizeof(u64);
6606 }
6607
6608 if (sample_type & PERF_SAMPLE_RAW) {
6609 struct perf_raw_record *raw = data->raw;
6610 int size;
6611
6612 if (raw) {
6613 struct perf_raw_frag *frag = &raw->frag;
6614 u32 sum = 0;
6615
6616 do {
6617 sum += frag->size;
6618 if (perf_raw_frag_last(frag))
6619 break;
6620 frag = frag->next;
6621 } while (1);
6622
6623 size = round_up(sum + sizeof(u32), sizeof(u64));
6624 raw->size = size - sizeof(u32);
6625 frag->pad = raw->size - sum;
6626 } else {
6627 size = sizeof(u64);
6628 }
6629
6630 header->size += size;
6631 }
6632
6633 if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
6634 int size = sizeof(u64);
6635 if (data->br_stack) {
6636 size += data->br_stack->nr
6637 * sizeof(struct perf_branch_entry);
6638 }
6639 header->size += size;
6640 }
6641
6642 if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER))
6643 perf_sample_regs_user(&data->regs_user, regs,
6644 &data->regs_user_copy);
6645
6646 if (sample_type & PERF_SAMPLE_REGS_USER) {
6647
6648 int size = sizeof(u64);
6649
6650 if (data->regs_user.regs) {
6651 u64 mask = event->attr.sample_regs_user;
6652 size += hweight64(mask) * sizeof(u64);
6653 }
6654
6655 header->size += size;
6656 }
6657
6658 if (sample_type & PERF_SAMPLE_STACK_USER) {
6659
6660
6661
6662
6663
6664
6665 u16 stack_size = event->attr.sample_stack_user;
6666 u16 size = sizeof(u64);
6667
6668 stack_size = perf_sample_ustack_size(stack_size, header->size,
6669 data->regs_user.regs);
6670
6671
6672
6673
6674
6675
6676 if (stack_size)
6677 size += sizeof(u64) + stack_size;
6678
6679 data->stack_user_size = stack_size;
6680 header->size += size;
6681 }
6682
6683 if (sample_type & PERF_SAMPLE_REGS_INTR) {
6684
6685 int size = sizeof(u64);
6686
6687 perf_sample_regs_intr(&data->regs_intr, regs);
6688
6689 if (data->regs_intr.regs) {
6690 u64 mask = event->attr.sample_regs_intr;
6691
6692 size += hweight64(mask) * sizeof(u64);
6693 }
6694
6695 header->size += size;
6696 }
6697
6698 if (sample_type & PERF_SAMPLE_PHYS_ADDR)
6699 data->phys_addr = perf_virt_to_phys(data->addr);
6700 }
6701
6702 static __always_inline int
6703 __perf_event_output(struct perf_event *event,
6704 struct perf_sample_data *data,
6705 struct pt_regs *regs,
6706 int (*output_begin)(struct perf_output_handle *,
6707 struct perf_event *,
6708 unsigned int))
6709 {
6710 struct perf_output_handle handle;
6711 struct perf_event_header header;
6712 int err;
6713
6714
6715 rcu_read_lock();
6716
6717 perf_prepare_sample(&header, data, event, regs);
6718
6719 err = output_begin(&handle, event, header.size);
6720 if (err)
6721 goto exit;
6722
6723 perf_output_sample(&handle, &header, data, event);
6724
6725 perf_output_end(&handle);
6726
6727 exit:
6728 rcu_read_unlock();
6729 return err;
6730 }
6731
6732 void
6733 perf_event_output_forward(struct perf_event *event,
6734 struct perf_sample_data *data,
6735 struct pt_regs *regs)
6736 {
6737 __perf_event_output(event, data, regs, perf_output_begin_forward);
6738 }
6739
6740 void
6741 perf_event_output_backward(struct perf_event *event,
6742 struct perf_sample_data *data,
6743 struct pt_regs *regs)
6744 {
6745 __perf_event_output(event, data, regs, perf_output_begin_backward);
6746 }
6747
6748 int
6749 perf_event_output(struct perf_event *event,
6750 struct perf_sample_data *data,
6751 struct pt_regs *regs)
6752 {
6753 return __perf_event_output(event, data, regs, perf_output_begin);
6754 }
6755
6756
6757
6758
6759
6760 struct perf_read_event {
6761 struct perf_event_header header;
6762
6763 u32 pid;
6764 u32 tid;
6765 };
6766
6767 static void
6768 perf_event_read_event(struct perf_event *event,
6769 struct task_struct *task)
6770 {
6771 struct perf_output_handle handle;
6772 struct perf_sample_data sample;
6773 struct perf_read_event read_event = {
6774 .header = {
6775 .type = PERF_RECORD_READ,
6776 .misc = 0,
6777 .size = sizeof(read_event) + event->read_size,
6778 },
6779 .pid = perf_event_pid(event, task),
6780 .tid = perf_event_tid(event, task),
6781 };
6782 int ret;
6783
6784 perf_event_header__init_id(&read_event.header, &sample, event);
6785 ret = perf_output_begin(&handle, event, read_event.header.size);
6786 if (ret)
6787 return;
6788
6789 perf_output_put(&handle, read_event);
6790 perf_output_read(&handle, event);
6791 perf_event__output_id_sample(event, &handle, &sample);
6792
6793 perf_output_end(&handle);
6794 }
6795
6796 typedef void (perf_iterate_f)(struct perf_event *event, void *data);
6797
6798 static void
6799 perf_iterate_ctx(struct perf_event_context *ctx,
6800 perf_iterate_f output,
6801 void *data, bool all)
6802 {
6803 struct perf_event *event;
6804
6805 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
6806 if (!all) {
6807 if (event->state < PERF_EVENT_STATE_INACTIVE)
6808 continue;
6809 if (!event_filter_match(event))
6810 continue;
6811 }
6812
6813 output(event, data);
6814 }
6815 }
6816
6817 static void perf_iterate_sb_cpu(perf_iterate_f output, void *data)
6818 {
6819 struct pmu_event_list *pel = this_cpu_ptr(&pmu_sb_events);
6820 struct perf_event *event;
6821
6822 list_for_each_entry_rcu(event, &pel->list, sb_list) {
6823
6824
6825
6826
6827
6828 if (!smp_load_acquire(&event->ctx))
6829 continue;
6830
6831 if (event->state < PERF_EVENT_STATE_INACTIVE)
6832 continue;
6833 if (!event_filter_match(event))
6834 continue;
6835 output(event, data);
6836 }
6837 }
6838
6839
6840
6841
6842
6843
6844
6845 static void
6846 perf_iterate_sb(perf_iterate_f output, void *data,
6847 struct perf_event_context *task_ctx)
6848 {
6849 struct perf_event_context *ctx;
6850 int ctxn;
6851
6852 rcu_read_lock();
6853 preempt_disable();
6854
6855
6856
6857
6858
6859
6860 if (task_ctx) {
6861 perf_iterate_ctx(task_ctx, output, data, false);
6862 goto done;
6863 }
6864
6865 perf_iterate_sb_cpu(output, data);
6866
6867 for_each_task_context_nr(ctxn) {
6868 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
6869 if (ctx)
6870 perf_iterate_ctx(ctx, output, data, false);
6871 }
6872 done:
6873 preempt_enable();
6874 rcu_read_unlock();
6875 }
6876
6877
6878
6879
6880
6881 static void perf_event_addr_filters_exec(struct perf_event *event, void *data)
6882 {
6883 struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
6884 struct perf_addr_filter *filter;
6885 unsigned int restart = 0, count = 0;
6886 unsigned long flags;
6887
6888 if (!has_addr_filter(event))
6889 return;
6890
6891 raw_spin_lock_irqsave(&ifh->lock, flags);
6892 list_for_each_entry(filter, &ifh->list, entry) {
6893 if (filter->path.dentry) {
6894 event->addr_filter_ranges[count].start = 0;
6895 event->addr_filter_ranges[count].size = 0;
6896 restart++;
6897 }
6898
6899 count++;
6900 }
6901
6902 if (restart)
6903 event->addr_filters_gen++;
6904 raw_spin_unlock_irqrestore(&ifh->lock, flags);
6905
6906 if (restart)
6907 perf_event_stop(event, 1);
6908 }
6909
6910 void perf_event_exec(void)
6911 {
6912 struct perf_event_context *ctx;
6913 int ctxn;
6914
6915 rcu_read_lock();
6916 for_each_task_context_nr(ctxn) {
6917 ctx = current->perf_event_ctxp[ctxn];
6918 if (!ctx)
6919 continue;
6920
6921 perf_event_enable_on_exec(ctxn);
6922
6923 perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL,
6924 true);
6925 }
6926 rcu_read_unlock();
6927 }
6928
6929 struct remote_output {
6930 struct ring_buffer *rb;
6931 int err;
6932 };
6933
6934 static void __perf_event_output_stop(struct perf_event *event, void *data)
6935 {
6936 struct perf_event *parent = event->parent;
6937 struct remote_output *ro = data;
6938 struct ring_buffer *rb = ro->rb;
6939 struct stop_event_data sd = {
6940 .event = event,
6941 };
6942
6943 if (!has_aux(event))
6944 return;
6945
6946 if (!parent)
6947 parent = event;
6948
6949
6950
6951
6952
6953
6954
6955
6956
6957
6958
6959 if (rcu_dereference(parent->rb) == rb)
6960 ro->err = __perf_event_stop(&sd);
6961 }
6962
6963 static int __perf_pmu_output_stop(void *info)
6964 {
6965 struct perf_event *event = info;
6966 struct pmu *pmu = event->ctx->pmu;
6967 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
6968 struct remote_output ro = {
6969 .rb = event->rb,
6970 };
6971
6972 rcu_read_lock();
6973 perf_iterate_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false);
6974 if (cpuctx->task_ctx)
6975 perf_iterate_ctx(cpuctx->task_ctx, __perf_event_output_stop,
6976 &ro, false);
6977 rcu_read_unlock();
6978
6979 return ro.err;
6980 }
6981
6982 static void perf_pmu_output_stop(struct perf_event *event)
6983 {
6984 struct perf_event *iter;
6985 int err, cpu;
6986
6987 restart:
6988 rcu_read_lock();
6989 list_for_each_entry_rcu(iter, &event->rb->event_list, rb_entry) {
6990
6991
6992
6993
6994
6995
6996 cpu = iter->cpu;
6997 if (cpu == -1)
6998 cpu = READ_ONCE(iter->oncpu);
6999
7000 if (cpu == -1)
7001 continue;
7002
7003 err = cpu_function_call(cpu, __perf_pmu_output_stop, event);
7004 if (err == -EAGAIN) {
7005 rcu_read_unlock();
7006 goto restart;
7007 }
7008 }
7009 rcu_read_unlock();
7010 }
7011
7012
7013
7014
7015
7016
7017
7018 struct perf_task_event {
7019 struct task_struct *task;
7020 struct perf_event_context *task_ctx;
7021
7022 struct {
7023 struct perf_event_header header;
7024
7025 u32 pid;
7026 u32 ppid;
7027 u32 tid;
7028 u32 ptid;
7029 u64 time;
7030 } event_id;
7031 };
7032
7033 static int perf_event_task_match(struct perf_event *event)
7034 {
7035 return event->attr.comm || event->attr.mmap ||
7036 event->attr.mmap2 || event->attr.mmap_data ||
7037 event->attr.task;
7038 }
7039
7040 static void perf_event_task_output(struct perf_event *event,
7041 void *data)
7042 {
7043 struct perf_task_event *task_event = data;
7044 struct perf_output_handle handle;
7045 struct perf_sample_data sample;
7046 struct task_struct *task = task_event->task;
7047 int ret, size = task_event->event_id.header.size;
7048
7049 if (!perf_event_task_match(event))
7050 return;
7051
7052 perf_event_header__init_id(&task_event->event_id.header, &sample, event);
7053
7054 ret = perf_output_begin(&handle, event,
7055 task_event->event_id.header.size);
7056 if (ret)
7057 goto out;
7058
7059 task_event->event_id.pid = perf_event_pid(event, task);
7060 task_event->event_id.tid = perf_event_tid(event, task);
7061
7062 if (task_event->event_id.header.type == PERF_RECORD_EXIT) {
7063 task_event->event_id.ppid = perf_event_pid(event,
7064 task->real_parent);
7065 task_event->event_id.ptid = perf_event_pid(event,
7066 task->real_parent);
7067 } else {
7068 task_event->event_id.ppid = perf_event_pid(event, current);
7069 task_event->event_id.ptid = perf_event_tid(event, current);
7070 }
7071
7072 task_event->event_id.time = perf_event_clock(event);
7073
7074 perf_output_put(&handle, task_event->event_id);
7075
7076 perf_event__output_id_sample(event, &handle, &sample);
7077
7078 perf_output_end(&handle);
7079 out:
7080 task_event->event_id.header.size = size;
7081 }
7082
7083 static void perf_event_task(struct task_struct *task,
7084 struct perf_event_context *task_ctx,
7085 int new)
7086 {
7087 struct perf_task_event task_event;
7088
7089 if (!atomic_read(&nr_comm_events) &&
7090 !atomic_read(&nr_mmap_events) &&
7091 !atomic_read(&nr_task_events))
7092 return;
7093
7094 task_event = (struct perf_task_event){
7095 .task = task,
7096 .task_ctx = task_ctx,
7097 .event_id = {
7098 .header = {
7099 .type = new ? PERF_RECORD_FORK : PERF_RECORD_EXIT,
7100 .misc = 0,
7101 .size = sizeof(task_event.event_id),
7102 },
7103
7104
7105
7106
7107
7108 },
7109 };
7110
7111 perf_iterate_sb(perf_event_task_output,
7112 &task_event,
7113 task_ctx);
7114 }
7115
7116 void perf_event_fork(struct task_struct *task)
7117 {
7118 perf_event_task(task, NULL, 1);
7119 perf_event_namespaces(task);
7120 }
7121
7122
7123
7124
7125
7126 struct perf_comm_event {
7127 struct task_struct *task;
7128 char *comm;
7129 int comm_size;
7130
7131 struct {
7132 struct perf_event_header header;
7133
7134 u32 pid;
7135 u32 tid;
7136 } event_id;
7137 };
7138
7139 static int perf_event_comm_match(struct perf_event *event)
7140 {
7141 return event->attr.comm;
7142 }
7143
7144 static void perf_event_comm_output(struct perf_event *event,
7145 void *data)
7146 {
7147 struct perf_comm_event *comm_event = data;
7148 struct perf_output_handle handle;
7149 struct perf_sample_data sample;
7150 int size = comm_event->event_id.header.size;
7151 int ret;
7152
7153 if (!perf_event_comm_match(event))
7154 return;
7155
7156 perf_event_header__init_id(&comm_event->event_id.header, &sample, event);
7157 ret = perf_output_begin(&handle, event,
7158 comm_event->event_id.header.size);
7159
7160 if (ret)
7161 goto out;
7162
7163 comm_event->event_id.pid = perf_event_pid(event, comm_event->task);
7164 comm_event->event_id.tid = perf_event_tid(event, comm_event->task);
7165
7166 perf_output_put(&handle, comm_event->event_id);
7167 __output_copy(&handle, comm_event->comm,
7168 comm_event->comm_size);
7169
7170 perf_event__output_id_sample(event, &handle, &sample);
7171
7172 perf_output_end(&handle);
7173 out:
7174 comm_event->event_id.header.size = size;
7175 }
7176
7177 static void perf_event_comm_event(struct perf_comm_event *comm_event)
7178 {
7179 char comm[TASK_COMM_LEN];
7180 unsigned int size;
7181
7182 memset(comm, 0, sizeof(comm));
7183 strlcpy(comm, comm_event->task->comm, sizeof(comm));
7184 size = ALIGN(strlen(comm)+1, sizeof(u64));
7185
7186 comm_event->comm = comm;
7187 comm_event->comm_size = size;
7188
7189 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
7190
7191 perf_iterate_sb(perf_event_comm_output,
7192 comm_event,
7193 NULL);
7194 }
7195
7196 void perf_event_comm(struct task_struct *task, bool exec)
7197 {
7198 struct perf_comm_event comm_event;
7199
7200 if (!atomic_read(&nr_comm_events))
7201 return;
7202
7203 comm_event = (struct perf_comm_event){
7204 .task = task,
7205
7206
7207 .event_id = {
7208 .header = {
7209 .type = PERF_RECORD_COMM,
7210 .misc = exec ? PERF_RECORD_MISC_COMM_EXEC : 0,
7211
7212 },
7213
7214
7215 },
7216 };
7217
7218 perf_event_comm_event(&comm_event);
7219 }
7220
7221
7222
7223
7224
7225 struct perf_namespaces_event {
7226 struct task_struct *task;
7227
7228 struct {
7229 struct perf_event_header header;
7230
7231 u32 pid;
7232 u32 tid;
7233 u64 nr_namespaces;
7234 struct perf_ns_link_info link_info[NR_NAMESPACES];
7235 } event_id;
7236 };
7237
7238 static int perf_event_namespaces_match(struct perf_event *event)
7239 {
7240 return event->attr.namespaces;
7241 }
7242
7243 static void perf_event_namespaces_output(struct perf_event *event,
7244 void *data)
7245 {
7246 struct perf_namespaces_event *namespaces_event = data;
7247 struct perf_output_handle handle;
7248 struct perf_sample_data sample;
7249 u16 header_size = namespaces_event->event_id.header.size;
7250 int ret;
7251
7252 if (!perf_event_namespaces_match(event))
7253 return;
7254
7255 perf_event_header__init_id(&namespaces_event->event_id.header,
7256 &sample, event);
7257 ret = perf_output_begin(&handle, event,
7258 namespaces_event->event_id.header.size);
7259 if (ret)
7260 goto out;
7261
7262 namespaces_event->event_id.pid = perf_event_pid(event,
7263 namespaces_event->task);
7264 namespaces_event->event_id.tid = perf_event_tid(event,
7265 namespaces_event->task);
7266
7267 perf_output_put(&handle, namespaces_event->event_id);
7268
7269 perf_event__output_id_sample(event, &handle, &sample);
7270
7271 perf_output_end(&handle);
7272 out:
7273 namespaces_event->event_id.header.size = header_size;
7274 }
7275
7276 static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,
7277 struct task_struct *task,
7278 const struct proc_ns_operations *ns_ops)
7279 {
7280 struct path ns_path;
7281 struct inode *ns_inode;
7282 void *error;
7283
7284 error = ns_get_path(&ns_path, task, ns_ops);
7285 if (!error) {
7286 ns_inode = ns_path.dentry->d_inode;
7287 ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev);
7288 ns_link_info->ino = ns_inode->i_ino;
7289 path_put(&ns_path);
7290 }
7291 }
7292
7293 void perf_event_namespaces(struct task_struct *task)
7294 {
7295 struct perf_namespaces_event namespaces_event;
7296 struct perf_ns_link_info *ns_link_info;
7297
7298 if (!atomic_read(&nr_namespaces_events))
7299 return;
7300
7301 namespaces_event = (struct perf_namespaces_event){
7302 .task = task,
7303 .event_id = {
7304 .header = {
7305 .type = PERF_RECORD_NAMESPACES,
7306 .misc = 0,
7307 .size = sizeof(namespaces_event.event_id),
7308 },
7309
7310
7311 .nr_namespaces = NR_NAMESPACES,
7312
7313 },
7314 };
7315
7316 ns_link_info = namespaces_event.event_id.link_info;
7317
7318 perf_fill_ns_link_info(&ns_link_info[MNT_NS_INDEX],
7319 task, &mntns_operations);
7320
7321 #ifdef CONFIG_USER_NS
7322 perf_fill_ns_link_info(&ns_link_info[USER_NS_INDEX],
7323 task, &userns_operations);
7324 #endif
7325 #ifdef CONFIG_NET_NS
7326 perf_fill_ns_link_info(&ns_link_info[NET_NS_INDEX],
7327 task, &netns_operations);
7328 #endif
7329 #ifdef CONFIG_UTS_NS
7330 perf_fill_ns_link_info(&ns_link_info[UTS_NS_INDEX],
7331 task, &utsns_operations);
7332 #endif
7333 #ifdef CONFIG_IPC_NS
7334 perf_fill_ns_link_info(&ns_link_info[IPC_NS_INDEX],
7335 task, &ipcns_operations);
7336 #endif
7337 #ifdef CONFIG_PID_NS
7338 perf_fill_ns_link_info(&ns_link_info[PID_NS_INDEX],
7339 task, &pidns_operations);
7340 #endif
7341 #ifdef CONFIG_CGROUPS
7342 perf_fill_ns_link_info(&ns_link_info[CGROUP_NS_INDEX],
7343 task, &cgroupns_operations);
7344 #endif
7345
7346 perf_iterate_sb(perf_event_namespaces_output,
7347 &namespaces_event,
7348 NULL);
7349 }
7350
7351
7352
7353
7354
7355 struct perf_mmap_event {
7356 struct vm_area_struct *vma;
7357
7358 const char *file_name;
7359 int file_size;
7360 int maj, min;
7361 u64 ino;
7362 u64 ino_generation;
7363 u32 prot, flags;
7364
7365 struct {
7366 struct perf_event_header header;
7367
7368 u32 pid;
7369 u32 tid;
7370 u64 start;
7371 u64 len;
7372 u64 pgoff;
7373 } event_id;
7374 };
7375
7376 static int perf_event_mmap_match(struct perf_event *event,
7377 void *data)
7378 {
7379 struct perf_mmap_event *mmap_event = data;
7380 struct vm_area_struct *vma = mmap_event->vma;
7381 int executable = vma->vm_flags & VM_EXEC;
7382
7383 return (!executable && event->attr.mmap_data) ||
7384 (executable && (event->attr.mmap || event->attr.mmap2));
7385 }
7386
7387 static void perf_event_mmap_output(struct perf_event *event,
7388 void *data)
7389 {
7390 struct perf_mmap_event *mmap_event = data;
7391 struct perf_output_handle handle;
7392 struct perf_sample_data sample;
7393 int size = mmap_event->event_id.header.size;
7394 u32 type = mmap_event->event_id.header.type;
7395 int ret;
7396
7397 if (!perf_event_mmap_match(event, data))
7398 return;
7399
7400 if (event->attr.mmap2) {
7401 mmap_event->event_id.header.type = PERF_RECORD_MMAP2;
7402 mmap_event->event_id.header.size += sizeof(mmap_event->maj);
7403 mmap_event->event_id.header.size += sizeof(mmap_event->min);
7404 mmap_event->event_id.header.size += sizeof(mmap_event->ino);
7405 mmap_event->event_id.header.size += sizeof(mmap_event->ino_generation);
7406 mmap_event->event_id.header.size += sizeof(mmap_event->prot);
7407 mmap_event->event_id.header.size += sizeof(mmap_event->flags);
7408 }
7409
7410 perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
7411 ret = perf_output_begin(&handle, event,
7412 mmap_event->event_id.header.size);
7413 if (ret)
7414 goto out;
7415
7416 mmap_event->event_id.pid = perf_event_pid(event, current);
7417 mmap_event->event_id.tid = perf_event_tid(event, current);
7418
7419 perf_output_put(&handle, mmap_event->event_id);
7420
7421 if (event->attr.mmap2) {
7422 perf_output_put(&handle, mmap_event->maj);
7423 perf_output_put(&handle, mmap_event->min);
7424 perf_output_put(&handle, mmap_event->ino);
7425 perf_output_put(&handle, mmap_event->ino_generation);
7426 perf_output_put(&handle, mmap_event->prot);
7427 perf_output_put(&handle, mmap_event->flags);
7428 }
7429
7430 __output_copy(&handle, mmap_event->file_name,
7431 mmap_event->file_size);
7432
7433 perf_event__output_id_sample(event, &handle, &sample);
7434
7435 perf_output_end(&handle);
7436 out:
7437 mmap_event->event_id.header.size = size;
7438 mmap_event->event_id.header.type = type;
7439 }
7440
7441 static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
7442 {
7443 struct vm_area_struct *vma = mmap_event->vma;
7444 struct file *file = vma->vm_file;
7445 int maj = 0, min = 0;
7446 u64 ino = 0, gen = 0;
7447 u32 prot = 0, flags = 0;
7448 unsigned int size;
7449 char tmp[16];
7450 char *buf = NULL;
7451 char *name;
7452
7453 if (vma->vm_flags & VM_READ)
7454 prot |= PROT_READ;
7455 if (vma->vm_flags & VM_WRITE)
7456 prot |= PROT_WRITE;
7457 if (vma->vm_flags & VM_EXEC)
7458 prot |= PROT_EXEC;
7459
7460 if (vma->vm_flags & VM_MAYSHARE)
7461 flags = MAP_SHARED;
7462 else
7463 flags = MAP_PRIVATE;
7464
7465 if (vma->vm_flags & VM_DENYWRITE)
7466 flags |= MAP_DENYWRITE;
7467 if (vma->vm_flags & VM_MAYEXEC)
7468 flags |= MAP_EXECUTABLE;
7469 if (vma->vm_flags & VM_LOCKED)
7470 flags |= MAP_LOCKED;
7471 if (vma->vm_flags & VM_HUGETLB)
7472 flags |= MAP_HUGETLB;
7473
7474 if (file) {
7475 struct inode *inode;
7476 dev_t dev;
7477
7478 buf = kmalloc(PATH_MAX, GFP_KERNEL);
7479 if (!buf) {
7480 name = "//enomem";
7481 goto cpy_name;
7482 }
7483
7484
7485
7486
7487
7488 name = file_path(file, buf, PATH_MAX - sizeof(u64));
7489 if (IS_ERR(name)) {
7490 name = "//toolong";
7491 goto cpy_name;
7492 }
7493 inode = file_inode(vma->vm_file);
7494 dev = inode->i_sb->s_dev;
7495 ino = inode->i_ino;
7496 gen = inode->i_generation;
7497 maj = MAJOR(dev);
7498 min = MINOR(dev);
7499
7500 goto got_name;
7501 } else {
7502 if (vma->vm_ops && vma->vm_ops->name) {
7503 name = (char *) vma->vm_ops->name(vma);
7504 if (name)
7505 goto cpy_name;
7506 }
7507
7508 name = (char *)arch_vma_name(vma);
7509 if (name)
7510 goto cpy_name;
7511
7512 if (vma->vm_start <= vma->vm_mm->start_brk &&
7513 vma->vm_end >= vma->vm_mm->brk) {
7514 name = "[heap]";
7515 goto cpy_name;
7516 }
7517 if (vma->vm_start <= vma->vm_mm->start_stack &&
7518 vma->vm_end >= vma->vm_mm->start_stack) {
7519 name = "[stack]";
7520 goto cpy_name;
7521 }
7522
7523 name = "//anon";
7524 goto cpy_name;
7525 }
7526
7527 cpy_name:
7528 strlcpy(tmp, name, sizeof(tmp));
7529 name = tmp;
7530 got_name:
7531
7532
7533
7534
7535
7536 size = strlen(name)+1;
7537 while (!IS_ALIGNED(size, sizeof(u64)))
7538 name[size++] = '\0';
7539
7540 mmap_event->file_name = name;
7541 mmap_event->file_size = size;
7542 mmap_event->maj = maj;
7543 mmap_event->min = min;
7544 mmap_event->ino = ino;
7545 mmap_event->ino_generation = gen;
7546 mmap_event->prot = prot;
7547 mmap_event->flags = flags;
7548
7549 if (!(vma->vm_flags & VM_EXEC))
7550 mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA;
7551
7552 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
7553
7554 perf_iterate_sb(perf_event_mmap_output,
7555 mmap_event,
7556 NULL);
7557
7558 kfree(buf);
7559 }
7560
7561
7562
7563
7564 static bool perf_addr_filter_match(struct perf_addr_filter *filter,
7565 struct file *file, unsigned long offset,
7566 unsigned long size)
7567 {
7568
7569 if (!filter->path.dentry)
7570 return false;
7571
7572 if (d_inode(filter->path.dentry) != file_inode(file))
7573 return false;
7574
7575 if (filter->offset > offset + size)
7576 return false;
7577
7578 if (filter->offset + filter->size < offset)
7579 return false;
7580
7581 return true;
7582 }
7583
7584 static bool perf_addr_filter_vma_adjust(struct perf_addr_filter *filter,
7585 struct vm_area_struct *vma,
7586 struct perf_addr_filter_range *fr)
7587 {
7588 unsigned long vma_size = vma->vm_end - vma->vm_start;
7589 unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
7590 struct file *file = vma->vm_file;
7591
7592 if (!perf_addr_filter_match(filter, file, off, vma_size))
7593 return false;
7594
7595 if (filter->offset < off) {
7596 fr->start = vma->vm_start;
7597 fr->size = min(vma_size, filter->size - (off - filter->offset));
7598 } else {
7599 fr->start = vma->vm_start + filter->offset - off;
7600 fr->size = min(vma->vm_end - fr->start, filter->size);
7601 }
7602
7603 return true;
7604 }
7605
7606 static void __perf_addr_filters_adjust(struct perf_event *event, void *data)
7607 {
7608 struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
7609 struct vm_area_struct *vma = data;
7610 struct perf_addr_filter *filter;
7611 unsigned int restart = 0, count = 0;
7612 unsigned long flags;
7613
7614 if (!has_addr_filter(event))
7615 return;
7616
7617 if (!vma->vm_file)
7618 return;
7619
7620 raw_spin_lock_irqsave(&ifh->lock, flags);
7621 list_for_each_entry(filter, &ifh->list, entry) {
7622 if (perf_addr_filter_vma_adjust(filter, vma,
7623 &event->addr_filter_ranges[count]))
7624 restart++;
7625
7626 count++;
7627 }
7628
7629 if (restart)
7630 event->addr_filters_gen++;
7631 raw_spin_unlock_irqrestore(&ifh->lock, flags);
7632
7633 if (restart)
7634 perf_event_stop(event, 1);
7635 }
7636
7637
7638
7639
7640 static void perf_addr_filters_adjust(struct vm_area_struct *vma)
7641 {
7642 struct perf_event_context *ctx;
7643 int ctxn;
7644
7645
7646
7647
7648
7649 if (!(vma->vm_flags & VM_EXEC))
7650 return;
7651
7652 rcu_read_lock();
7653 for_each_task_context_nr(ctxn) {
7654 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
7655 if (!ctx)
7656 continue;
7657
7658 perf_iterate_ctx(ctx, __perf_addr_filters_adjust, vma, true);
7659 }
7660 rcu_read_unlock();
7661 }
7662
7663 void perf_event_mmap(struct vm_area_struct *vma)
7664 {
7665 struct perf_mmap_event mmap_event;
7666
7667 if (!atomic_read(&nr_mmap_events))
7668 return;
7669
7670 mmap_event = (struct perf_mmap_event){
7671 .vma = vma,
7672
7673
7674 .event_id = {
7675 .header = {
7676 .type = PERF_RECORD_MMAP,
7677 .misc = PERF_RECORD_MISC_USER,
7678
7679 },
7680
7681
7682 .start = vma->vm_start,
7683 .len = vma->vm_end - vma->vm_start,
7684 .pgoff = (u64)vma->vm_pgoff << PAGE_SHIFT,
7685 },
7686
7687
7688
7689
7690
7691
7692 };
7693
7694 perf_addr_filters_adjust(vma);
7695 perf_event_mmap_event(&mmap_event);
7696 }
7697
7698 void perf_event_aux_event(struct perf_event *event, unsigned long head,
7699 unsigned long size, u64 flags)
7700 {
7701 struct perf_output_handle handle;
7702 struct perf_sample_data sample;
7703 struct perf_aux_event {
7704 struct perf_event_header header;
7705 u64 offset;
7706 u64 size;
7707 u64 flags;
7708 } rec = {
7709 .header = {
7710 .type = PERF_RECORD_AUX,
7711 .misc = 0,
7712 .size = sizeof(rec),
7713 },
7714 .offset = head,
7715 .size = size,
7716 .flags = flags,
7717 };
7718 int ret;
7719
7720 perf_event_header__init_id(&rec.header, &sample, event);
7721 ret = perf_output_begin(&handle, event, rec.header.size);
7722
7723 if (ret)
7724 return;
7725
7726 perf_output_put(&handle, rec);
7727 perf_event__output_id_sample(event, &handle, &sample);
7728
7729 perf_output_end(&handle);
7730 }
7731
7732
7733
7734
7735 void perf_log_lost_samples(struct perf_event *event, u64 lost)
7736 {
7737 struct perf_output_handle handle;
7738 struct perf_sample_data sample;
7739 int ret;
7740
7741 struct {
7742 struct perf_event_header header;
7743 u64 lost;
7744 } lost_samples_event = {
7745 .header = {
7746 .type = PERF_RECORD_LOST_SAMPLES,
7747 .misc = 0,
7748 .size = sizeof(lost_samples_event),
7749 },
7750 .lost = lost,
7751 };
7752
7753 perf_event_header__init_id(&lost_samples_event.header, &sample, event);
7754
7755 ret = perf_output_begin(&handle, event,
7756 lost_samples_event.header.size);
7757 if (ret)
7758 return;
7759
7760 perf_output_put(&handle, lost_samples_event);
7761 perf_event__output_id_sample(event, &handle, &sample);
7762 perf_output_end(&handle);
7763 }
7764
7765
7766
7767
7768
7769 struct perf_switch_event {
7770 struct task_struct *task;
7771 struct task_struct *next_prev;
7772
7773 struct {
7774 struct perf_event_header header;
7775 u32 next_prev_pid;
7776 u32 next_prev_tid;
7777 } event_id;
7778 };
7779
7780 static int perf_event_switch_match(struct perf_event *event)
7781 {
7782 return event->attr.context_switch;
7783 }
7784
7785 static void perf_event_switch_output(struct perf_event *event, void *data)
7786 {
7787 struct perf_switch_event *se = data;
7788 struct perf_output_handle handle;
7789 struct perf_sample_data sample;
7790 int ret;
7791
7792 if (!perf_event_switch_match(event))
7793 return;
7794
7795
7796 if (event->ctx->task) {
7797 se->event_id.header.type = PERF_RECORD_SWITCH;
7798 se->event_id.header.size = sizeof(se->event_id.header);
7799 } else {
7800 se->event_id.header.type = PERF_RECORD_SWITCH_CPU_WIDE;
7801 se->event_id.header.size = sizeof(se->event_id);
7802 se->event_id.next_prev_pid =
7803 perf_event_pid(event, se->next_prev);
7804 se->event_id.next_prev_tid =
7805 perf_event_tid(event, se->next_prev);
7806 }
7807
7808 perf_event_header__init_id(&se->event_id.header, &sample, event);
7809
7810 ret = perf_output_begin(&handle, event, se->event_id.header.size);
7811 if (ret)
7812 return;
7813
7814 if (event->ctx->task)
7815 perf_output_put(&handle, se->event_id.header);
7816 else
7817 perf_output_put(&handle, se->event_id);
7818
7819 perf_event__output_id_sample(event, &handle, &sample);
7820
7821 perf_output_end(&handle);
7822 }
7823
7824 static void perf_event_switch(struct task_struct *task,
7825 struct task_struct *next_prev, bool sched_in)
7826 {
7827 struct perf_switch_event switch_event;
7828
7829
7830
7831 switch_event = (struct perf_switch_event){
7832 .task = task,
7833 .next_prev = next_prev,
7834 .event_id = {
7835 .header = {
7836
7837 .misc = sched_in ? 0 : PERF_RECORD_MISC_SWITCH_OUT,
7838
7839 },
7840
7841
7842 },
7843 };
7844
7845 if (!sched_in && task->state == TASK_RUNNING)
7846 switch_event.event_id.header.misc |=
7847 PERF_RECORD_MISC_SWITCH_OUT_PREEMPT;
7848
7849 perf_iterate_sb(perf_event_switch_output,
7850 &switch_event,
7851 NULL);
7852 }
7853
7854
7855
7856
7857
7858 static void perf_log_throttle(struct perf_event *event, int enable)
7859 {
7860 struct perf_output_handle handle;
7861 struct perf_sample_data sample;
7862 int ret;
7863
7864 struct {
7865 struct perf_event_header header;
7866 u64 time;
7867 u64 id;
7868 u64 stream_id;
7869 } throttle_event = {
7870 .header = {
7871 .type = PERF_RECORD_THROTTLE,
7872 .misc = 0,
7873 .size = sizeof(throttle_event),
7874 },
7875 .time = perf_event_clock(event),
7876 .id = primary_event_id(event),
7877 .stream_id = event->id,
7878 };
7879
7880 if (enable)
7881 throttle_event.header.type = PERF_RECORD_UNTHROTTLE;
7882
7883 perf_event_header__init_id(&throttle_event.header, &sample, event);
7884
7885 ret = perf_output_begin(&handle, event,
7886 throttle_event.header.size);
7887 if (ret)
7888 return;
7889
7890 perf_output_put(&handle, throttle_event);
7891 perf_event__output_id_sample(event, &handle, &sample);
7892 perf_output_end(&handle);
7893 }
7894
7895
7896
7897
7898
7899 struct perf_ksymbol_event {
7900 const char *name;
7901 int name_len;
7902 struct {
7903 struct perf_event_header header;
7904 u64 addr;
7905 u32 len;
7906 u16 ksym_type;
7907 u16 flags;
7908 } event_id;
7909 };
7910
7911 static int perf_event_ksymbol_match(struct perf_event *event)
7912 {
7913 return event->attr.ksymbol;
7914 }
7915
7916 static void perf_event_ksymbol_output(struct perf_event *event, void *data)
7917 {
7918 struct perf_ksymbol_event *ksymbol_event = data;
7919 struct perf_output_handle handle;
7920 struct perf_sample_data sample;
7921 int ret;
7922
7923 if (!perf_event_ksymbol_match(event))
7924 return;
7925
7926 perf_event_header__init_id(&ksymbol_event->event_id.header,
7927 &sample, event);
7928 ret = perf_output_begin(&handle, event,
7929 ksymbol_event->event_id.header.size);
7930 if (ret)
7931 return;
7932
7933 perf_output_put(&handle, ksymbol_event->event_id);
7934 __output_copy(&handle, ksymbol_event->name, ksymbol_event->name_len);
7935 perf_event__output_id_sample(event, &handle, &sample);
7936
7937 perf_output_end(&handle);
7938 }
7939
7940 void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister,
7941 const char *sym)
7942 {
7943 struct perf_ksymbol_event ksymbol_event;
7944 char name[KSYM_NAME_LEN];
7945 u16 flags = 0;
7946 int name_len;
7947
7948 if (!atomic_read(&nr_ksymbol_events))
7949 return;
7950
7951 if (ksym_type >= PERF_RECORD_KSYMBOL_TYPE_MAX ||
7952 ksym_type == PERF_RECORD_KSYMBOL_TYPE_UNKNOWN)
7953 goto err;
7954
7955 strlcpy(name, sym, KSYM_NAME_LEN);
7956 name_len = strlen(name) + 1;
7957 while (!IS_ALIGNED(name_len, sizeof(u64)))
7958 name[name_len++] = '\0';
7959 BUILD_BUG_ON(KSYM_NAME_LEN % sizeof(u64));
7960
7961 if (unregister)
7962 flags |= PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER;
7963
7964 ksymbol_event = (struct perf_ksymbol_event){
7965 .name = name,
7966 .name_len = name_len,
7967 .event_id = {
7968 .header = {
7969 .type = PERF_RECORD_KSYMBOL,
7970 .size = sizeof(ksymbol_event.event_id) +
7971 name_len,
7972 },
7973 .addr = addr,
7974 .len = len,
7975 .ksym_type = ksym_type,
7976 .flags = flags,
7977 },
7978 };
7979
7980 perf_iterate_sb(perf_event_ksymbol_output, &ksymbol_event, NULL);
7981 return;
7982 err:
7983 WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type);
7984 }
7985
7986
7987
7988
7989
7990 struct perf_bpf_event {
7991 struct bpf_prog *prog;
7992 struct {
7993 struct perf_event_header header;
7994 u16 type;
7995 u16 flags;
7996 u32 id;
7997 u8 tag[BPF_TAG_SIZE];
7998 } event_id;
7999 };
8000
8001 static int perf_event_bpf_match(struct perf_event *event)
8002 {
8003 return event->attr.bpf_event;
8004 }
8005
8006 static void perf_event_bpf_output(struct perf_event *event, void *data)
8007 {
8008 struct perf_bpf_event *bpf_event = data;
8009 struct perf_output_handle handle;
8010 struct perf_sample_data sample;
8011 int ret;
8012
8013 if (!perf_event_bpf_match(event))
8014 return;
8015
8016 perf_event_header__init_id(&bpf_event->event_id.header,
8017 &sample, event);
8018 ret = perf_output_begin(&handle, event,
8019 bpf_event->event_id.header.size);
8020 if (ret)
8021 return;
8022
8023 perf_output_put(&handle, bpf_event->event_id);
8024 perf_event__output_id_sample(event, &handle, &sample);
8025
8026 perf_output_end(&handle);
8027 }
8028
8029 static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog,
8030 enum perf_bpf_event_type type)
8031 {
8032 bool unregister = type == PERF_BPF_EVENT_PROG_UNLOAD;
8033 char sym[KSYM_NAME_LEN];
8034 int i;
8035
8036 if (prog->aux->func_cnt == 0) {
8037 bpf_get_prog_name(prog, sym);
8038 perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF,
8039 (u64)(unsigned long)prog->bpf_func,
8040 prog->jited_len, unregister, sym);
8041 } else {
8042 for (i = 0; i < prog->aux->func_cnt; i++) {
8043 struct bpf_prog *subprog = prog->aux->func[i];
8044
8045 bpf_get_prog_name(subprog, sym);
8046 perf_event_ksymbol(
8047 PERF_RECORD_KSYMBOL_TYPE_BPF,
8048 (u64)(unsigned long)subprog->bpf_func,
8049 subprog->jited_len, unregister, sym);
8050 }
8051 }
8052 }
8053
8054 void perf_event_bpf_event(struct bpf_prog *prog,
8055 enum perf_bpf_event_type type,
8056 u16 flags)
8057 {
8058 struct perf_bpf_event bpf_event;
8059
8060 if (type <= PERF_BPF_EVENT_UNKNOWN ||
8061 type >= PERF_BPF_EVENT_MAX)
8062 return;
8063
8064 switch (type) {
8065 case PERF_BPF_EVENT_PROG_LOAD:
8066 case PERF_BPF_EVENT_PROG_UNLOAD:
8067 if (atomic_read(&nr_ksymbol_events))
8068 perf_event_bpf_emit_ksymbols(prog, type);
8069 break;
8070 default:
8071 break;
8072 }
8073
8074 if (!atomic_read(&nr_bpf_events))
8075 return;
8076
8077 bpf_event = (struct perf_bpf_event){
8078 .prog = prog,
8079 .event_id = {
8080 .header = {
8081 .type = PERF_RECORD_BPF_EVENT,
8082 .size = sizeof(bpf_event.event_id),
8083 },
8084 .type = type,
8085 .flags = flags,
8086 .id = prog->aux->id,
8087 },
8088 };
8089
8090 BUILD_BUG_ON(BPF_TAG_SIZE % sizeof(u64));
8091
8092 memcpy(bpf_event.event_id.tag, prog->tag, BPF_TAG_SIZE);
8093 perf_iterate_sb(perf_event_bpf_output, &bpf_event, NULL);
8094 }
8095
8096 void perf_event_itrace_started(struct perf_event *event)
8097 {
8098 event->attach_state |= PERF_ATTACH_ITRACE;
8099 }
8100
8101 static void perf_log_itrace_start(struct perf_event *event)
8102 {
8103 struct perf_output_handle handle;
8104 struct perf_sample_data sample;
8105 struct perf_aux_event {
8106 struct perf_event_header header;
8107 u32 pid;
8108 u32 tid;
8109 } rec;
8110 int ret;
8111
8112 if (event->parent)
8113 event = event->parent;
8114
8115 if (!(event->pmu->capabilities & PERF_PMU_CAP_ITRACE) ||
8116 event->attach_state & PERF_ATTACH_ITRACE)
8117 return;
8118
8119 rec.header.type = PERF_RECORD_ITRACE_START;
8120 rec.header.misc = 0;
8121 rec.header.size = sizeof(rec);
8122 rec.pid = perf_event_pid(event, current);
8123 rec.tid = perf_event_tid(event, current);
8124
8125 perf_event_header__init_id(&rec.header, &sample, event);
8126 ret = perf_output_begin(&handle, event, rec.header.size);
8127
8128 if (ret)
8129 return;
8130
8131 perf_output_put(&handle, rec);
8132 perf_event__output_id_sample(event, &handle, &sample);
8133
8134 perf_output_end(&handle);
8135 }
8136
8137 static int
8138 __perf_event_account_interrupt(struct perf_event *event, int throttle)
8139 {
8140 struct hw_perf_event *hwc = &event->hw;
8141 int ret = 0;
8142 u64 seq;
8143
8144 seq = __this_cpu_read(perf_throttled_seq);
8145 if (seq != hwc->interrupts_seq) {
8146 hwc->interrupts_seq = seq;
8147 hwc->interrupts = 1;
8148 } else {
8149 hwc->interrupts++;
8150 if (unlikely(throttle
8151 && hwc->interrupts >= max_samples_per_tick)) {
8152 __this_cpu_inc(perf_throttled_count);
8153 tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
8154 hwc->interrupts = MAX_INTERRUPTS;
8155 perf_log_throttle(event, 0);
8156 ret = 1;
8157 }
8158 }
8159
8160 if (event->attr.freq) {
8161 u64 now = perf_clock();
8162 s64 delta = now - hwc->freq_time_stamp;
8163
8164 hwc->freq_time_stamp = now;
8165
8166 if (delta > 0 && delta < 2*TICK_NSEC)
8167 perf_adjust_period(event, delta, hwc->last_period, true);
8168 }
8169
8170 return ret;
8171 }
8172
8173 int perf_event_account_interrupt(struct perf_event *event)
8174 {
8175 return __perf_event_account_interrupt(event, 1);
8176 }
8177
8178
8179
8180
8181
8182 static int __perf_event_overflow(struct perf_event *event,
8183 int throttle, struct perf_sample_data *data,
8184 struct pt_regs *regs)
8185 {
8186 int events = atomic_read(&event->event_limit);
8187 int ret = 0;
8188
8189
8190
8191
8192
8193 if (unlikely(!is_sampling_event(event)))
8194 return 0;
8195
8196 ret = __perf_event_account_interrupt(event, throttle);
8197
8198
8199
8200
8201
8202
8203 event->pending_kill = POLL_IN;
8204 if (events && atomic_dec_and_test(&event->event_limit)) {
8205 ret = 1;
8206 event->pending_kill = POLL_HUP;
8207
8208 perf_event_disable_inatomic(event);
8209 }
8210
8211 READ_ONCE(event->overflow_handler)(event, data, regs);
8212
8213 if (*perf_event_fasync(event) && event->pending_kill) {
8214 event->pending_wakeup = 1;
8215 irq_work_queue(&event->pending);
8216 }
8217
8218 return ret;
8219 }
8220
8221 int perf_event_overflow(struct perf_event *event,
8222 struct perf_sample_data *data,
8223 struct pt_regs *regs)
8224 {
8225 return __perf_event_overflow(event, 1, data, regs);
8226 }
8227
8228
8229
8230
8231
8232 struct swevent_htable {
8233 struct swevent_hlist *swevent_hlist;
8234 struct mutex hlist_mutex;
8235 int hlist_refcount;
8236
8237
8238 int recursion[PERF_NR_CONTEXTS];
8239 };
8240
8241 static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
8242
8243
8244
8245
8246
8247
8248
8249
8250 u64 perf_swevent_set_period(struct perf_event *event)
8251 {
8252 struct hw_perf_event *hwc = &event->hw;
8253 u64 period = hwc->last_period;
8254 u64 nr, offset;
8255 s64 old, val;
8256
8257 hwc->last_period = hwc->sample_period;
8258
8259 again:
8260 old = val = local64_read(&hwc->period_left);
8261 if (val < 0)
8262 return 0;
8263
8264 nr = div64_u64(period + val, period);
8265 offset = nr * period;
8266 val -= offset;
8267 if (local64_cmpxchg(&hwc->period_left, old, val) != old)
8268 goto again;
8269
8270 return nr;
8271 }
8272
8273 static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
8274 struct perf_sample_data *data,
8275 struct pt_regs *regs)
8276 {
8277 struct hw_perf_event *hwc = &event->hw;
8278 int throttle = 0;
8279
8280 if (!overflow)
8281 overflow = perf_swevent_set_period(event);
8282
8283 if (hwc->interrupts == MAX_INTERRUPTS)
8284 return;
8285
8286 for (; overflow; overflow--) {
8287 if (__perf_event_overflow(event, throttle,
8288 data, regs)) {
8289
8290
8291
8292
8293 break;
8294 }
8295 throttle = 1;
8296 }
8297 }
8298
8299 static void perf_swevent_event(struct perf_event *event, u64 nr,
8300 struct perf_sample_data *data,
8301 struct pt_regs *regs)
8302 {
8303 struct hw_perf_event *hwc = &event->hw;
8304
8305 local64_add(nr, &event->count);
8306
8307 if (!regs)
8308 return;
8309
8310 if (!is_sampling_event(event))
8311 return;
8312
8313 if ((event->attr.sample_type & PERF_SAMPLE_PERIOD) && !event->attr.freq) {
8314 data->period = nr;
8315 return perf_swevent_overflow(event, 1, data, regs);
8316 } else
8317 data->period = event->hw.last_period;
8318
8319 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
8320 return perf_swevent_overflow(event, 1, data, regs);
8321
8322 if (local64_add_negative(nr, &hwc->period_left))
8323 return;
8324
8325 perf_swevent_overflow(event, 0, data, regs);
8326 }
8327
8328 static int perf_exclude_event(struct perf_event *event,
8329 struct pt_regs *regs)
8330 {
8331 if (event->hw.state & PERF_HES_STOPPED)
8332 return 1;
8333
8334 if (regs) {
8335 if (event->attr.exclude_user && user_mode(regs))
8336 return 1;
8337
8338 if (event->attr.exclude_kernel && !user_mode(regs))
8339 return 1;
8340 }
8341
8342 return 0;
8343 }
8344
8345 static int perf_swevent_match(struct perf_event *event,
8346 enum perf_type_id type,
8347 u32 event_id,
8348 struct perf_sample_data *data,
8349 struct pt_regs *regs)
8350 {
8351 if (event->attr.type != type)
8352 return 0;
8353
8354 if (event->attr.config != event_id)
8355 return 0;
8356
8357 if (perf_exclude_event(event, regs))
8358 return 0;
8359
8360 return 1;
8361 }
8362
8363 static inline u64 swevent_hash(u64 type, u32 event_id)
8364 {
8365 u64 val = event_id | (type << 32);
8366
8367 return hash_64(val, SWEVENT_HLIST_BITS);
8368 }
8369
8370 static inline struct hlist_head *
8371 __find_swevent_head(struct swevent_hlist *hlist, u64 type, u32 event_id)
8372 {
8373 u64 hash = swevent_hash(type, event_id);
8374
8375 return &hlist->heads[hash];
8376 }
8377
8378
8379 static inline struct hlist_head *
8380 find_swevent_head_rcu(struct swevent_htable *swhash, u64 type, u32 event_id)
8381 {
8382 struct swevent_hlist *hlist;
8383
8384 hlist = rcu_dereference(swhash->swevent_hlist);
8385 if (!hlist)
8386 return NULL;
8387
8388 return __find_swevent_head(hlist, type, event_id);
8389 }
8390
8391
8392 static inline struct hlist_head *
8393 find_swevent_head(struct swevent_htable *swhash, struct perf_event *event)
8394 {
8395 struct swevent_hlist *hlist;
8396 u32 event_id = event->attr.config;
8397 u64 type = event->attr.type;
8398
8399
8400
8401
8402
8403
8404 hlist = rcu_dereference_protected(swhash->swevent_hlist,
8405 lockdep_is_held(&event->ctx->lock));
8406 if (!hlist)
8407 return NULL;
8408
8409 return __find_swevent_head(hlist, type, event_id);
8410 }
8411
8412 static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
8413 u64 nr,
8414 struct perf_sample_data *data,
8415 struct pt_regs *regs)
8416 {
8417 struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
8418 struct perf_event *event;
8419 struct hlist_head *head;
8420
8421 rcu_read_lock();
8422 head = find_swevent_head_rcu(swhash, type, event_id);
8423 if (!head)
8424 goto end;
8425
8426 hlist_for_each_entry_rcu(event, head, hlist_entry) {
8427 if (perf_swevent_match(event, type, event_id, data, regs))
8428 perf_swevent_event(event, nr, data, regs);
8429 }
8430 end:
8431 rcu_read_unlock();
8432 }
8433
8434 DEFINE_PER_CPU(struct pt_regs, __perf_regs[4]);
8435
8436 int perf_swevent_get_recursion_context(void)
8437 {
8438 struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
8439
8440 return get_recursion_context(swhash->recursion);
8441 }
8442 EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
8443
8444 void perf_swevent_put_recursion_context(int rctx)
8445 {
8446 struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
8447
8448 put_recursion_context(swhash->recursion, rctx);
8449 }
8450
8451 void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
8452 {
8453 struct perf_sample_data data;
8454
8455 if (WARN_ON_ONCE(!regs))
8456 return;
8457
8458 perf_sample_data_init(&data, addr, 0);
8459 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs);
8460 }
8461
8462 void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
8463 {
8464 int rctx;
8465
8466 preempt_disable_notrace();
8467 rctx = perf_swevent_get_recursion_context();
8468 if (unlikely(rctx < 0))
8469 goto fail;
8470
8471 ___perf_sw_event(event_id, nr, regs, addr);
8472
8473 perf_swevent_put_recursion_context(rctx);
8474 fail:
8475 preempt_enable_notrace();
8476 }
8477
8478 static void perf_swevent_read(struct perf_event *event)
8479 {
8480 }
8481
8482 static int perf_swevent_add(struct perf_event *event, int flags)
8483 {
8484 struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
8485 struct hw_perf_event *hwc = &event->hw;
8486 struct hlist_head *head;
8487
8488 if (is_sampling_event(event)) {
8489 hwc->last_period = hwc->sample_period;
8490 perf_swevent_set_period(event);
8491 }
8492
8493 hwc->state = !(flags & PERF_EF_START);
8494
8495 head = find_swevent_head(swhash, event);
8496 if (WARN_ON_ONCE(!head))
8497 return -EINVAL;
8498
8499 hlist_add_head_rcu(&event->hlist_entry, head);
8500 perf_event_update_userpage(event);
8501
8502 return 0;
8503 }
8504
8505 static void perf_swevent_del(struct perf_event *event, int flags)
8506 {
8507 hlist_del_rcu(&event->hlist_entry);
8508 }
8509
8510 static void perf_swevent_start(struct perf_event *event, int flags)
8511 {
8512 event->hw.state = 0;
8513 }
8514
8515 static void perf_swevent_stop(struct perf_event *event, int flags)
8516 {
8517 event->hw.state = PERF_HES_STOPPED;
8518 }
8519
8520
8521 static inline struct swevent_hlist *
8522 swevent_hlist_deref(struct swevent_htable *swhash)
8523 {
8524 return rcu_dereference_protected(swhash->swevent_hlist,
8525 lockdep_is_held(&swhash->hlist_mutex));
8526 }
8527
8528 static void swevent_hlist_release(struct swevent_htable *swhash)
8529 {
8530 struct swevent_hlist *hlist = swevent_hlist_deref(swhash);
8531
8532 if (!hlist)
8533 return;
8534
8535 RCU_INIT_POINTER(swhash->swevent_hlist, NULL);
8536 kfree_rcu(hlist, rcu_head);
8537 }
8538
8539 static void swevent_hlist_put_cpu(int cpu)
8540 {
8541 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
8542
8543 mutex_lock(&swhash->hlist_mutex);
8544
8545 if (!--swhash->hlist_refcount)
8546 swevent_hlist_release(swhash);
8547
8548 mutex_unlock(&swhash->hlist_mutex);
8549 }
8550
8551 static void swevent_hlist_put(void)
8552 {
8553 int cpu;
8554
8555 for_each_possible_cpu(cpu)
8556 swevent_hlist_put_cpu(cpu);
8557 }
8558
8559 static int swevent_hlist_get_cpu(int cpu)
8560 {
8561 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
8562 int err = 0;
8563
8564 mutex_lock(&swhash->hlist_mutex);
8565 if (!swevent_hlist_deref(swhash) &&
8566 cpumask_test_cpu(cpu, perf_online_mask)) {
8567 struct swevent_hlist *hlist;
8568
8569 hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
8570 if (!hlist) {
8571 err = -ENOMEM;
8572 goto exit;
8573 }
8574 rcu_assign_pointer(swhash->swevent_hlist, hlist);
8575 }
8576 swhash->hlist_refcount++;
8577 exit:
8578 mutex_unlock(&swhash->hlist_mutex);
8579
8580 return err;
8581 }
8582
8583 static int swevent_hlist_get(void)
8584 {
8585 int err, cpu, failed_cpu;
8586
8587 mutex_lock(&pmus_lock);
8588 for_each_possible_cpu(cpu) {
8589 err = swevent_hlist_get_cpu(cpu);
8590 if (err) {
8591 failed_cpu = cpu;
8592 goto fail;
8593 }
8594 }
8595 mutex_unlock(&pmus_lock);
8596 return 0;
8597 fail:
8598 for_each_possible_cpu(cpu) {
8599 if (cpu == failed_cpu)
8600 break;
8601 swevent_hlist_put_cpu(cpu);
8602 }
8603 mutex_unlock(&pmus_lock);
8604 return err;
8605 }
8606
8607 struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
8608
8609 static void sw_perf_event_destroy(struct perf_event *event)
8610 {
8611 u64 event_id = event->attr.config;
8612
8613 WARN_ON(event->parent);
8614
8615 static_key_slow_dec(&perf_swevent_enabled[event_id]);
8616 swevent_hlist_put();
8617 }
8618
8619 static int perf_swevent_init(struct perf_event *event)
8620 {
8621 u64 event_id = event->attr.config;
8622
8623 if (event->attr.type != PERF_TYPE_SOFTWARE)
8624 return -ENOENT;
8625
8626
8627
8628
8629 if (has_branch_stack(event))
8630 return -EOPNOTSUPP;
8631
8632 switch (event_id) {
8633 case PERF_COUNT_SW_CPU_CLOCK:
8634 case PERF_COUNT_SW_TASK_CLOCK:
8635 return -ENOENT;
8636
8637 default:
8638 break;
8639 }
8640
8641 if (event_id >= PERF_COUNT_SW_MAX)
8642 return -ENOENT;
8643
8644 if (!event->parent) {
8645 int err;
8646
8647 err = swevent_hlist_get();
8648 if (err)
8649 return err;
8650
8651 static_key_slow_inc(&perf_swevent_enabled[event_id]);
8652 event->destroy = sw_perf_event_destroy;
8653 }
8654
8655 return 0;
8656 }
8657
8658 static struct pmu perf_swevent = {
8659 .task_ctx_nr = perf_sw_context,
8660
8661 .capabilities = PERF_PMU_CAP_NO_NMI,
8662
8663 .event_init = perf_swevent_init,
8664 .add = perf_swevent_add,
8665 .del = perf_swevent_del,
8666 .start = perf_swevent_start,
8667 .stop = perf_swevent_stop,
8668 .read = perf_swevent_read,
8669 };
8670
8671 #ifdef CONFIG_EVENT_TRACING
8672
8673 static int perf_tp_filter_match(struct perf_event *event,
8674 struct perf_sample_data *data)
8675 {
8676 void *record = data->raw->frag.data;
8677
8678
8679 if (event->parent)
8680 event = event->parent;
8681
8682 if (likely(!event->filter) || filter_match_preds(event->filter, record))
8683 return 1;
8684 return 0;
8685 }
8686
8687 static int perf_tp_event_match(struct perf_event *event,
8688 struct perf_sample_data *data,
8689 struct pt_regs *regs)
8690 {
8691 if (event->hw.state & PERF_HES_STOPPED)
8692 return 0;
8693
8694
8695
8696 if (event->attr.exclude_kernel && !user_mode(regs))
8697 return 0;
8698
8699 if (!perf_tp_filter_match(event, data))
8700 return 0;
8701
8702 return 1;
8703 }
8704
8705 void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
8706 struct trace_event_call *call, u64 count,
8707 struct pt_regs *regs, struct hlist_head *head,
8708 struct task_struct *task)
8709 {
8710 if (bpf_prog_array_valid(call)) {
8711 *(struct pt_regs **)raw_data = regs;
8712 if (!trace_call_bpf(call, raw_data) || hlist_empty(head)) {
8713 perf_swevent_put_recursion_context(rctx);
8714 return;
8715 }
8716 }
8717 perf_tp_event(call->event.type, count, raw_data, size, regs, head,
8718 rctx, task);
8719 }
8720 EXPORT_SYMBOL_GPL(perf_trace_run_bpf_submit);
8721
8722 void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
8723 struct pt_regs *regs, struct hlist_head *head, int rctx,
8724 struct task_struct *task)
8725 {
8726 struct perf_sample_data data;
8727 struct perf_event *event;
8728
8729 struct perf_raw_record raw = {
8730 .frag = {
8731 .size = entry_size,
8732 .data = record,
8733 },
8734 };
8735
8736 perf_sample_data_init(&data, 0, 0);
8737 data.raw = &raw;
8738
8739 perf_trace_buf_update(record, event_type);
8740
8741 hlist_for_each_entry_rcu(event, head, hlist_entry) {
8742 if (perf_tp_event_match(event, &data, regs))
8743 perf_swevent_event(event, count, &data, regs);
8744 }
8745
8746
8747
8748
8749
8750 if (task && task != current) {
8751 struct perf_event_context *ctx;
8752 struct trace_entry *entry = record;
8753
8754 rcu_read_lock();
8755 ctx = rcu_dereference(task->perf_event_ctxp[perf_sw_context]);
8756 if (!ctx)
8757 goto unlock;
8758
8759 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
8760 if (event->cpu != smp_processor_id())
8761 continue;
8762 if (event->attr.type != PERF_TYPE_TRACEPOINT)
8763 continue;
8764 if (event->attr.config != entry->type)
8765 continue;
8766 if (perf_tp_event_match(event, &data, regs))
8767 perf_swevent_event(event, count, &data, regs);
8768 }
8769 unlock:
8770 rcu_read_unlock();
8771 }
8772
8773 perf_swevent_put_recursion_context(rctx);
8774 }
8775 EXPORT_SYMBOL_GPL(perf_tp_event);
8776
8777 static void tp_perf_event_destroy(struct perf_event *event)
8778 {
8779 perf_trace_destroy(event);
8780 }
8781
8782 static int perf_tp_event_init(struct perf_event *event)
8783 {
8784 int err;
8785
8786 if (event->attr.type != PERF_TYPE_TRACEPOINT)
8787 return -ENOENT;
8788
8789
8790
8791
8792 if (has_branch_stack(event))
8793 return -EOPNOTSUPP;
8794
8795 err = perf_trace_init(event);
8796 if (err)
8797 return err;
8798
8799 event->destroy = tp_perf_event_destroy;
8800
8801 return 0;
8802 }
8803
8804 static struct pmu perf_tracepoint = {
8805 .task_ctx_nr = perf_sw_context,
8806
8807 .event_init = perf_tp_event_init,
8808 .add = perf_trace_add,
8809 .del = perf_trace_del,
8810 .start = perf_swevent_start,
8811 .stop = perf_swevent_stop,
8812 .read = perf_swevent_read,
8813 };
8814
8815 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
8816
8817
8818
8819
8820
8821
8822
8823
8824
8825
8826
8827
8828
8829
8830 enum perf_probe_config {
8831 PERF_PROBE_CONFIG_IS_RETPROBE = 1U << 0,
8832 PERF_UPROBE_REF_CTR_OFFSET_BITS = 32,
8833 PERF_UPROBE_REF_CTR_OFFSET_SHIFT = 64 - PERF_UPROBE_REF_CTR_OFFSET_BITS,
8834 };
8835
8836 PMU_FORMAT_ATTR(retprobe, "config:0");
8837 #endif
8838
8839 #ifdef CONFIG_KPROBE_EVENTS
8840 static struct attribute *kprobe_attrs[] = {
8841 &format_attr_retprobe.attr,
8842 NULL,
8843 };
8844
8845 static struct attribute_group kprobe_format_group = {
8846 .name = "format",
8847 .attrs = kprobe_attrs,
8848 };
8849
8850 static const struct attribute_group *kprobe_attr_groups[] = {
8851 &kprobe_format_group,
8852 NULL,
8853 };
8854
8855 static int perf_kprobe_event_init(struct perf_event *event);
8856 static struct pmu perf_kprobe = {
8857 .task_ctx_nr = perf_sw_context,
8858 .event_init = perf_kprobe_event_init,
8859 .add = perf_trace_add,
8860 .del = perf_trace_del,
8861 .start = perf_swevent_start,
8862 .stop = perf_swevent_stop,
8863 .read = perf_swevent_read,
8864 .attr_groups = kprobe_attr_groups,
8865 };
8866
8867 static int perf_kprobe_event_init(struct perf_event *event)
8868 {
8869 int err;
8870 bool is_retprobe;
8871
8872 if (event->attr.type != perf_kprobe.type)
8873 return -ENOENT;
8874
8875 if (!capable(CAP_SYS_ADMIN))
8876 return -EACCES;
8877
8878
8879
8880
8881 if (has_branch_stack(event))
8882 return -EOPNOTSUPP;
8883
8884 is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE;
8885 err = perf_kprobe_init(event, is_retprobe);
8886 if (err)
8887 return err;
8888
8889 event->destroy = perf_kprobe_destroy;
8890
8891 return 0;
8892 }
8893 #endif
8894
8895 #ifdef CONFIG_UPROBE_EVENTS
8896 PMU_FORMAT_ATTR(ref_ctr_offset, "config:32-63");
8897
8898 static struct attribute *uprobe_attrs[] = {
8899 &format_attr_retprobe.attr,
8900 &format_attr_ref_ctr_offset.attr,
8901 NULL,
8902 };
8903
8904 static struct attribute_group uprobe_format_group = {
8905 .name = "format",
8906 .attrs = uprobe_attrs,
8907 };
8908
8909 static const struct attribute_group *uprobe_attr_groups[] = {
8910 &uprobe_format_group,
8911 NULL,
8912 };
8913
8914 static int perf_uprobe_event_init(struct perf_event *event);
8915 static struct pmu perf_uprobe = {
8916 .task_ctx_nr = perf_sw_context,
8917 .event_init = perf_uprobe_event_init,
8918 .add = perf_trace_add,
8919 .del = perf_trace_del,
8920 .start = perf_swevent_start,
8921 .stop = perf_swevent_stop,
8922 .read = perf_swevent_read,
8923 .attr_groups = uprobe_attr_groups,
8924 };
8925
8926 static int perf_uprobe_event_init(struct perf_event *event)
8927 {
8928 int err;
8929 unsigned long ref_ctr_offset;
8930 bool is_retprobe;
8931
8932 if (event->attr.type != perf_uprobe.type)
8933 return -ENOENT;
8934
8935 if (!capable(CAP_SYS_ADMIN))
8936 return -EACCES;
8937
8938
8939
8940
8941 if (has_branch_stack(event))
8942 return -EOPNOTSUPP;
8943
8944 is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE;
8945 ref_ctr_offset = event->attr.config >> PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
8946 err = perf_uprobe_init(event, ref_ctr_offset, is_retprobe);
8947 if (err)
8948 return err;
8949
8950 event->destroy = perf_uprobe_destroy;
8951
8952 return 0;
8953 }
8954 #endif
8955
8956 static inline void perf_tp_register(void)
8957 {
8958 perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT);
8959 #ifdef CONFIG_KPROBE_EVENTS
8960 perf_pmu_register(&perf_kprobe, "kprobe", -1);
8961 #endif
8962 #ifdef CONFIG_UPROBE_EVENTS
8963 perf_pmu_register(&perf_uprobe, "uprobe", -1);
8964 #endif
8965 }
8966
8967 static void perf_event_free_filter(struct perf_event *event)
8968 {
8969 ftrace_profile_free_filter(event);
8970 }
8971
8972 #ifdef CONFIG_BPF_SYSCALL
8973 static void bpf_overflow_handler(struct perf_event *event,
8974 struct perf_sample_data *data,
8975 struct pt_regs *regs)
8976 {
8977 struct bpf_perf_event_data_kern ctx = {
8978 .data = data,
8979 .event = event,
8980 };
8981 int ret = 0;
8982
8983 ctx.regs = perf_arch_bpf_user_pt_regs(regs);
8984 preempt_disable();
8985 if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
8986 goto out;
8987 rcu_read_lock();
8988 ret = BPF_PROG_RUN(event->prog, &ctx);
8989 rcu_read_unlock();
8990 out:
8991 __this_cpu_dec(bpf_prog_active);
8992 preempt_enable();
8993 if (!ret)
8994 return;
8995
8996 event->orig_overflow_handler(event, data, regs);
8997 }
8998
8999 static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
9000 {
9001 struct bpf_prog *prog;
9002
9003 if (event->overflow_handler_context)
9004
9005 return -EINVAL;
9006
9007 if (event->prog)
9008 return -EEXIST;
9009
9010 prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT);
9011 if (IS_ERR(prog))
9012 return PTR_ERR(prog);
9013
9014 event->prog = prog;
9015 event->orig_overflow_handler = READ_ONCE(event->overflow_handler);
9016 WRITE_ONCE(event->overflow_handler, bpf_overflow_handler);
9017 return 0;
9018 }
9019
9020 static void perf_event_free_bpf_handler(struct perf_event *event)
9021 {
9022 struct bpf_prog *prog = event->prog;
9023
9024 if (!prog)
9025 return;
9026
9027 WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler);
9028 event->prog = NULL;
9029 bpf_prog_put(prog);
9030 }
9031 #else
9032 static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
9033 {
9034 return -EOPNOTSUPP;
9035 }
9036 static void perf_event_free_bpf_handler(struct perf_event *event)
9037 {
9038 }
9039 #endif
9040
9041
9042
9043
9044
9045 static inline bool perf_event_is_tracing(struct perf_event *event)
9046 {
9047 if (event->pmu == &perf_tracepoint)
9048 return true;
9049 #ifdef CONFIG_KPROBE_EVENTS
9050 if (event->pmu == &perf_kprobe)
9051 return true;
9052 #endif
9053 #ifdef CONFIG_UPROBE_EVENTS
9054 if (event->pmu == &perf_uprobe)
9055 return true;
9056 #endif
9057 return false;
9058 }
9059
9060 static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
9061 {
9062 bool is_kprobe, is_tracepoint, is_syscall_tp;
9063 struct bpf_prog *prog;
9064 int ret;
9065
9066 if (!perf_event_is_tracing(event))
9067 return perf_event_set_bpf_handler(event, prog_fd);
9068
9069 is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
9070 is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
9071 is_syscall_tp = is_syscall_trace_event(event->tp_event);
9072 if (!is_kprobe && !is_tracepoint && !is_syscall_tp)
9073
9074 return -EINVAL;
9075
9076 prog = bpf_prog_get(prog_fd);
9077 if (IS_ERR(prog))
9078 return PTR_ERR(prog);
9079
9080 if ((is_kprobe && prog->type != BPF_PROG_TYPE_KPROBE) ||
9081 (is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT) ||
9082 (is_syscall_tp && prog->type != BPF_PROG_TYPE_TRACEPOINT)) {
9083
9084 bpf_prog_put(prog);
9085 return -EINVAL;
9086 }
9087
9088
9089 if (prog->kprobe_override &&
9090 !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) {
9091 bpf_prog_put(prog);
9092 return -EINVAL;
9093 }
9094
9095 if (is_tracepoint || is_syscall_tp) {
9096 int off = trace_event_get_offsets(event->tp_event);
9097
9098 if (prog->aux->max_ctx_offset > off) {
9099 bpf_prog_put(prog);
9100 return -EACCES;
9101 }
9102 }
9103
9104 ret = perf_event_attach_bpf_prog(event, prog);
9105 if (ret)
9106 bpf_prog_put(prog);
9107 return ret;
9108 }
9109
9110 static void perf_event_free_bpf_prog(struct perf_event *event)
9111 {
9112 if (!perf_event_is_tracing(event)) {
9113 perf_event_free_bpf_handler(event);
9114 return;
9115 }
9116 perf_event_detach_bpf_prog(event);
9117 }
9118
9119 #else
9120
9121 static inline void perf_tp_register(void)
9122 {
9123 }
9124
9125 static void perf_event_free_filter(struct perf_event *event)
9126 {
9127 }
9128
9129 static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
9130 {
9131 return -ENOENT;
9132 }
9133
9134 static void perf_event_free_bpf_prog(struct perf_event *event)
9135 {
9136 }
9137 #endif
9138
9139 #ifdef CONFIG_HAVE_HW_BREAKPOINT
9140 void perf_bp_event(struct perf_event *bp, void *data)
9141 {
9142 struct perf_sample_data sample;
9143 struct pt_regs *regs = data;
9144
9145 perf_sample_data_init(&sample, bp->attr.bp_addr, 0);
9146
9147 if (!bp->hw.state && !perf_exclude_event(bp, regs))
9148 perf_swevent_event(bp, 1, &sample, regs);
9149 }
9150 #endif
9151
9152
9153
9154
9155 static struct perf_addr_filter *
9156 perf_addr_filter_new(struct perf_event *event, struct list_head *filters)
9157 {
9158 int node = cpu_to_node(event->cpu == -1 ? 0 : event->cpu);
9159 struct perf_addr_filter *filter;
9160
9161 filter = kzalloc_node(sizeof(*filter), GFP_KERNEL, node);
9162 if (!filter)
9163 return NULL;
9164
9165 INIT_LIST_HEAD(&filter->entry);
9166 list_add_tail(&filter->entry, filters);
9167
9168 return filter;
9169 }
9170
9171 static void free_filters_list(struct list_head *filters)
9172 {
9173 struct perf_addr_filter *filter, *iter;
9174
9175 list_for_each_entry_safe(filter, iter, filters, entry) {
9176 path_put(&filter->path);
9177 list_del(&filter->entry);
9178 kfree(filter);
9179 }
9180 }
9181
9182
9183
9184
9185 static void perf_addr_filters_splice(struct perf_event *event,
9186 struct list_head *head)
9187 {
9188 unsigned long flags;
9189 LIST_HEAD(list);
9190
9191 if (!has_addr_filter(event))
9192 return;
9193
9194
9195 if (event->parent)
9196 return;
9197
9198 raw_spin_lock_irqsave(&event->addr_filters.lock, flags);
9199
9200 list_splice_init(&event->addr_filters.list, &list);
9201 if (head)
9202 list_splice(head, &event->addr_filters.list);
9203
9204 raw_spin_unlock_irqrestore(&event->addr_filters.lock, flags);
9205
9206 free_filters_list(&list);
9207 }
9208
9209
9210
9211
9212
9213
9214 static void perf_addr_filter_apply(struct perf_addr_filter *filter,
9215 struct mm_struct *mm,
9216 struct perf_addr_filter_range *fr)
9217 {
9218 struct vm_area_struct *vma;
9219
9220 for (vma = mm->mmap; vma; vma = vma->vm_next) {
9221 if (!vma->vm_file)
9222 continue;
9223
9224 if (perf_addr_filter_vma_adjust(filter, vma, fr))
9225 return;
9226 }
9227 }
9228
9229
9230
9231
9232
9233 static void perf_event_addr_filters_apply(struct perf_event *event)
9234 {
9235 struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
9236 struct task_struct *task = READ_ONCE(event->ctx->task);
9237 struct perf_addr_filter *filter;
9238 struct mm_struct *mm = NULL;
9239 unsigned int count = 0;
9240 unsigned long flags;
9241
9242
9243
9244
9245
9246 if (task == TASK_TOMBSTONE)
9247 return;
9248
9249 if (ifh->nr_file_filters) {
9250 mm = get_task_mm(event->ctx->task);
9251 if (!mm)
9252 goto restart;
9253
9254 down_read(&mm->mmap_sem);
9255 }
9256
9257 raw_spin_lock_irqsave(&ifh->lock, flags);
9258 list_for_each_entry(filter, &ifh->list, entry) {
9259 if (filter->path.dentry) {
9260
9261
9262
9263
9264 event->addr_filter_ranges[count].start = 0;
9265 event->addr_filter_ranges[count].size = 0;
9266
9267 perf_addr_filter_apply(filter, mm, &event->addr_filter_ranges[count]);
9268 } else {
9269 event->addr_filter_ranges[count].start = filter->offset;
9270 event->addr_filter_ranges[count].size = filter->size;
9271 }
9272
9273 count++;
9274 }
9275
9276 event->addr_filters_gen++;
9277 raw_spin_unlock_irqrestore(&ifh->lock, flags);
9278
9279 if (ifh->nr_file_filters) {
9280 up_read(&mm->mmap_sem);
9281
9282 mmput(mm);
9283 }
9284
9285 restart:
9286 perf_event_stop(event, 1);
9287 }
9288
9289
9290
9291
9292
9293
9294
9295
9296
9297
9298
9299
9300
9301
9302
9303
9304
9305
9306
9307
9308 enum {
9309 IF_ACT_NONE = -1,
9310 IF_ACT_FILTER,
9311 IF_ACT_START,
9312 IF_ACT_STOP,
9313 IF_SRC_FILE,
9314 IF_SRC_KERNEL,
9315 IF_SRC_FILEADDR,
9316 IF_SRC_KERNELADDR,
9317 };
9318
9319 enum {
9320 IF_STATE_ACTION = 0,
9321 IF_STATE_SOURCE,
9322 IF_STATE_END,
9323 };
9324
9325 static const match_table_t if_tokens = {
9326 { IF_ACT_FILTER, "filter" },
9327 { IF_ACT_START, "start" },
9328 { IF_ACT_STOP, "stop" },
9329 { IF_SRC_FILE, "%u/%u@%s" },
9330 { IF_SRC_KERNEL, "%u/%u" },
9331 { IF_SRC_FILEADDR, "%u@%s" },
9332 { IF_SRC_KERNELADDR, "%u" },
9333 { IF_ACT_NONE, NULL },
9334 };
9335
9336
9337
9338
9339 static int
9340 perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
9341 struct list_head *filters)
9342 {
9343 struct perf_addr_filter *filter = NULL;
9344 char *start, *orig, *filename = NULL;
9345 substring_t args[MAX_OPT_ARGS];
9346 int state = IF_STATE_ACTION, token;
9347 unsigned int kernel = 0;
9348 int ret = -EINVAL;
9349
9350 orig = fstr = kstrdup(fstr, GFP_KERNEL);
9351 if (!fstr)
9352 return -ENOMEM;
9353
9354 while ((start = strsep(&fstr, " ,\n")) != NULL) {
9355 static const enum perf_addr_filter_action_t actions[] = {
9356 [IF_ACT_FILTER] = PERF_ADDR_FILTER_ACTION_FILTER,
9357 [IF_ACT_START] = PERF_ADDR_FILTER_ACTION_START,
9358 [IF_ACT_STOP] = PERF_ADDR_FILTER_ACTION_STOP,
9359 };
9360 ret = -EINVAL;
9361
9362 if (!*start)
9363 continue;
9364
9365
9366 if (state == IF_STATE_ACTION) {
9367 filter = perf_addr_filter_new(event, filters);
9368 if (!filter)
9369 goto fail;
9370 }
9371
9372 token = match_token(start, if_tokens, args);
9373 switch (token) {
9374 case IF_ACT_FILTER:
9375 case IF_ACT_START:
9376 case IF_ACT_STOP:
9377 if (state != IF_STATE_ACTION)
9378 goto fail;
9379
9380 filter->action = actions[token];
9381 state = IF_STATE_SOURCE;
9382 break;
9383
9384 case IF_SRC_KERNELADDR:
9385 case IF_SRC_KERNEL:
9386 kernel = 1;
9387
9388
9389 case IF_SRC_FILEADDR:
9390 case IF_SRC_FILE:
9391 if (state != IF_STATE_SOURCE)
9392 goto fail;
9393
9394 *args[0].to = 0;
9395 ret = kstrtoul(args[0].from, 0, &filter->offset);
9396 if (ret)
9397 goto fail;
9398
9399 if (token == IF_SRC_KERNEL || token == IF_SRC_FILE) {
9400 *args[1].to = 0;
9401 ret = kstrtoul(args[1].from, 0, &filter->size);
9402 if (ret)
9403 goto fail;
9404 }
9405
9406 if (token == IF_SRC_FILE || token == IF_SRC_FILEADDR) {
9407 int fpos = token == IF_SRC_FILE ? 2 : 1;
9408
9409 filename = match_strdup(&args[fpos]);
9410 if (!filename) {
9411 ret = -ENOMEM;
9412 goto fail;
9413 }
9414 }
9415
9416 state = IF_STATE_END;
9417 break;
9418
9419 default:
9420 goto fail;
9421 }
9422
9423
9424
9425
9426
9427
9428 if (state == IF_STATE_END) {
9429 ret = -EINVAL;
9430 if (kernel && event->attr.exclude_kernel)
9431 goto fail;
9432
9433
9434
9435
9436
9437 if (filter->action == PERF_ADDR_FILTER_ACTION_FILTER &&
9438 !filter->size)
9439 goto fail;
9440
9441 if (!kernel) {
9442 if (!filename)
9443 goto fail;
9444
9445
9446
9447
9448
9449
9450
9451
9452
9453 ret = -EOPNOTSUPP;
9454 if (!event->ctx->task)
9455 goto fail_free_name;
9456
9457
9458 ret = kern_path(filename, LOOKUP_FOLLOW,
9459 &filter->path);
9460 if (ret)
9461 goto fail_free_name;
9462
9463 kfree(filename);
9464 filename = NULL;
9465
9466 ret = -EINVAL;
9467 if (!filter->path.dentry ||
9468 !S_ISREG(d_inode(filter->path.dentry)
9469 ->i_mode))
9470 goto fail;
9471
9472 event->addr_filters.nr_file_filters++;
9473 }
9474
9475
9476 state = IF_STATE_ACTION;
9477 filter = NULL;
9478 }
9479 }
9480
9481 if (state != IF_STATE_ACTION)
9482 goto fail;
9483
9484 kfree(orig);
9485
9486 return 0;
9487
9488 fail_free_name:
9489 kfree(filename);
9490 fail:
9491 free_filters_list(filters);
9492 kfree(orig);
9493
9494 return ret;
9495 }
9496
9497 static int
9498 perf_event_set_addr_filter(struct perf_event *event, char *filter_str)
9499 {
9500 LIST_HEAD(filters);
9501 int ret;
9502
9503
9504
9505
9506
9507 lockdep_assert_held(&event->ctx->mutex);
9508
9509 if (WARN_ON_ONCE(event->parent))
9510 return -EINVAL;
9511
9512 ret = perf_event_parse_addr_filter(event, filter_str, &filters);
9513 if (ret)
9514 goto fail_clear_files;
9515
9516 ret = event->pmu->addr_filters_validate(&filters);
9517 if (ret)
9518 goto fail_free_filters;
9519
9520
9521 perf_addr_filters_splice(event, &filters);
9522
9523
9524 perf_event_for_each_child(event, perf_event_addr_filters_apply);
9525
9526 return ret;
9527
9528 fail_free_filters:
9529 free_filters_list(&filters);
9530
9531 fail_clear_files:
9532 event->addr_filters.nr_file_filters = 0;
9533
9534 return ret;
9535 }
9536
9537 static int perf_event_set_filter(struct perf_event *event, void __user *arg)
9538 {
9539 int ret = -EINVAL;
9540 char *filter_str;
9541
9542 filter_str = strndup_user(arg, PAGE_SIZE);
9543 if (IS_ERR(filter_str))
9544 return PTR_ERR(filter_str);
9545
9546 #ifdef CONFIG_EVENT_TRACING
9547 if (perf_event_is_tracing(event)) {
9548 struct perf_event_context *ctx = event->ctx;
9549
9550
9551
9552
9553
9554
9555
9556
9557
9558
9559
9560
9561 mutex_unlock(&ctx->mutex);
9562 ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
9563 mutex_lock(&ctx->mutex);
9564 } else
9565 #endif
9566 if (has_addr_filter(event))
9567 ret = perf_event_set_addr_filter(event, filter_str);
9568
9569 kfree(filter_str);
9570 return ret;
9571 }
9572
9573
9574
9575
9576
9577 static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
9578 {
9579 enum hrtimer_restart ret = HRTIMER_RESTART;
9580 struct perf_sample_data data;
9581 struct pt_regs *regs;
9582 struct perf_event *event;
9583 u64 period;
9584
9585 event = container_of(hrtimer, struct perf_event, hw.hrtimer);
9586
9587 if (event->state != PERF_EVENT_STATE_ACTIVE)
9588 return HRTIMER_NORESTART;
9589
9590 event->pmu->read(event);
9591
9592 perf_sample_data_init(&data, 0, event->hw.last_period);
9593 regs = get_irq_regs();
9594
9595 if (regs && !perf_exclude_event(event, regs)) {
9596 if (!(event->attr.exclude_idle && is_idle_task(current)))
9597 if (__perf_event_overflow(event, 1, &data, regs))
9598 ret = HRTIMER_NORESTART;
9599 }
9600
9601 period = max_t(u64, 10000, event->hw.sample_period);
9602 hrtimer_forward_now(hrtimer, ns_to_ktime(period));
9603
9604 return ret;
9605 }
9606
9607 static void perf_swevent_start_hrtimer(struct perf_event *event)
9608 {
9609 struct hw_perf_event *hwc = &event->hw;
9610 s64 period;
9611
9612 if (!is_sampling_event(event))
9613 return;
9614
9615 period = local64_read(&hwc->period_left);
9616 if (period) {
9617 if (period < 0)
9618 period = 10000;
9619
9620 local64_set(&hwc->period_left, 0);
9621 } else {
9622 period = max_t(u64, 10000, hwc->sample_period);
9623 }
9624 hrtimer_start(&hwc->hrtimer, ns_to_ktime(period),
9625 HRTIMER_MODE_REL_PINNED_HARD);
9626 }
9627
9628 static void perf_swevent_cancel_hrtimer(struct perf_event *event)
9629 {
9630 struct hw_perf_event *hwc = &event->hw;
9631
9632 if (is_sampling_event(event)) {
9633 ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
9634 local64_set(&hwc->period_left, ktime_to_ns(remaining));
9635
9636 hrtimer_cancel(&hwc->hrtimer);
9637 }
9638 }
9639
9640 static void perf_swevent_init_hrtimer(struct perf_event *event)
9641 {
9642 struct hw_perf_event *hwc = &event->hw;
9643
9644 if (!is_sampling_event(event))
9645 return;
9646
9647 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
9648 hwc->hrtimer.function = perf_swevent_hrtimer;
9649
9650
9651
9652
9653
9654 if (event->attr.freq) {
9655 long freq = event->attr.sample_freq;
9656
9657 event->attr.sample_period = NSEC_PER_SEC / freq;
9658 hwc->sample_period = event->attr.sample_period;
9659 local64_set(&hwc->period_left, hwc->sample_period);
9660 hwc->last_period = hwc->sample_period;
9661 event->attr.freq = 0;
9662 }
9663 }
9664
9665
9666
9667
9668
9669 static void cpu_clock_event_update(struct perf_event *event)
9670 {
9671 s64 prev;
9672 u64 now;
9673
9674 now = local_clock();
9675 prev = local64_xchg(&event->hw.prev_count, now);
9676 local64_add(now - prev, &event->count);
9677 }
9678
9679 static void cpu_clock_event_start(struct perf_event *event, int flags)
9680 {
9681 local64_set(&event->hw.prev_count, local_clock());
9682 perf_swevent_start_hrtimer(event);
9683 }
9684
9685 static void cpu_clock_event_stop(struct perf_event *event, int flags)
9686 {
9687 perf_swevent_cancel_hrtimer(event);
9688 cpu_clock_event_update(event);
9689 }
9690
9691 static int cpu_clock_event_add(struct perf_event *event, int flags)
9692 {
9693 if (flags & PERF_EF_START)
9694 cpu_clock_event_start(event, flags);
9695 perf_event_update_userpage(event);
9696
9697 return 0;
9698 }
9699
9700 static void cpu_clock_event_del(struct perf_event *event, int flags)
9701 {
9702 cpu_clock_event_stop(event, flags);
9703 }
9704
9705 static void cpu_clock_event_read(struct perf_event *event)
9706 {
9707 cpu_clock_event_update(event);
9708 }
9709
9710 static int cpu_clock_event_init(struct perf_event *event)
9711 {
9712 if (event->attr.type != PERF_TYPE_SOFTWARE)
9713 return -ENOENT;
9714
9715 if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK)
9716 return -ENOENT;
9717
9718
9719
9720
9721 if (has_branch_stack(event))
9722 return -EOPNOTSUPP;
9723
9724 perf_swevent_init_hrtimer(event);
9725
9726 return 0;
9727 }
9728
9729 static struct pmu perf_cpu_clock = {
9730 .task_ctx_nr = perf_sw_context,
9731
9732 .capabilities = PERF_PMU_CAP_NO_NMI,
9733
9734 .event_init = cpu_clock_event_init,
9735 .add = cpu_clock_event_add,
9736 .del = cpu_clock_event_del,
9737 .start = cpu_clock_event_start,
9738 .stop = cpu_clock_event_stop,
9739 .read = cpu_clock_event_read,
9740 };
9741
9742
9743
9744
9745
9746 static void task_clock_event_update(struct perf_event *event, u64 now)
9747 {
9748 u64 prev;
9749 s64 delta;
9750
9751 prev = local64_xchg(&event->hw.prev_count, now);
9752 delta = now - prev;
9753 local64_add(delta, &event->count);
9754 }
9755
9756 static void task_clock_event_start(struct perf_event *event, int flags)
9757 {
9758 local64_set(&event->hw.prev_count, event->ctx->time);
9759 perf_swevent_start_hrtimer(event);
9760 }
9761
9762 static void task_clock_event_stop(struct perf_event *event, int flags)
9763 {
9764 perf_swevent_cancel_hrtimer(event);
9765 task_clock_event_update(event, event->ctx->time);
9766 }
9767
9768 static int task_clock_event_add(struct perf_event *event, int flags)
9769 {
9770 if (flags & PERF_EF_START)
9771 task_clock_event_start(event, flags);
9772 perf_event_update_userpage(event);
9773
9774 return 0;
9775 }
9776
9777 static void task_clock_event_del(struct perf_event *event, int flags)
9778 {
9779 task_clock_event_stop(event, PERF_EF_UPDATE);
9780 }
9781
9782 static void task_clock_event_read(struct perf_event *event)
9783 {
9784 u64 now = perf_clock();
9785 u64 delta = now - event->ctx->timestamp;
9786 u64 time = event->ctx->time + delta;
9787
9788 task_clock_event_update(event, time);
9789 }
9790
9791 static int task_clock_event_init(struct perf_event *event)
9792 {
9793 if (event->attr.type != PERF_TYPE_SOFTWARE)
9794 return -ENOENT;
9795
9796 if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK)
9797 return -ENOENT;
9798
9799
9800
9801
9802 if (has_branch_stack(event))
9803 return -EOPNOTSUPP;
9804
9805 perf_swevent_init_hrtimer(event);
9806
9807 return 0;
9808 }
9809
9810 static struct pmu perf_task_clock = {
9811 .task_ctx_nr = perf_sw_context,
9812
9813 .capabilities = PERF_PMU_CAP_NO_NMI,
9814
9815 .event_init = task_clock_event_init,
9816 .add = task_clock_event_add,
9817 .del = task_clock_event_del,
9818 .start = task_clock_event_start,
9819 .stop = task_clock_event_stop,
9820 .read = task_clock_event_read,
9821 };
9822
9823 static void perf_pmu_nop_void(struct pmu *pmu)
9824 {
9825 }
9826
9827 static void perf_pmu_nop_txn(struct pmu *pmu, unsigned int flags)
9828 {
9829 }
9830
9831 static int perf_pmu_nop_int(struct pmu *pmu)
9832 {
9833 return 0;
9834 }
9835
9836 static int perf_event_nop_int(struct perf_event *event, u64 value)
9837 {
9838 return 0;
9839 }
9840
9841 static DEFINE_PER_CPU(unsigned int, nop_txn_flags);
9842
9843 static void perf_pmu_start_txn(struct pmu *pmu, unsigned int flags)
9844 {
9845 __this_cpu_write(nop_txn_flags, flags);
9846
9847 if (flags & ~PERF_PMU_TXN_ADD)
9848 return;
9849
9850 perf_pmu_disable(pmu);
9851 }
9852
9853 static int perf_pmu_commit_txn(struct pmu *pmu)
9854 {
9855 unsigned int flags = __this_cpu_read(nop_txn_flags);
9856
9857 __this_cpu_write(nop_txn_flags, 0);
9858
9859 if (flags & ~PERF_PMU_TXN_ADD)
9860 return 0;
9861
9862 perf_pmu_enable(pmu);
9863 return 0;
9864 }
9865
9866 static void perf_pmu_cancel_txn(struct pmu *pmu)
9867 {
9868 unsigned int flags = __this_cpu_read(nop_txn_flags);
9869
9870 __this_cpu_write(nop_txn_flags, 0);
9871
9872 if (flags & ~PERF_PMU_TXN_ADD)
9873 return;
9874
9875 perf_pmu_enable(pmu);
9876 }
9877
9878 static int perf_event_idx_default(struct perf_event *event)
9879 {
9880 return 0;
9881 }
9882
9883
9884
9885
9886
9887 static struct perf_cpu_context __percpu *find_pmu_context(int ctxn)
9888 {
9889 struct pmu *pmu;
9890
9891 if (ctxn < 0)
9892 return NULL;
9893
9894 list_for_each_entry(pmu, &pmus, entry) {
9895 if (pmu->task_ctx_nr == ctxn)
9896 return pmu->pmu_cpu_context;
9897 }
9898
9899 return NULL;
9900 }
9901
9902 static void free_pmu_context(struct pmu *pmu)
9903 {
9904
9905
9906
9907
9908
9909 if (pmu->task_ctx_nr > perf_invalid_context)
9910 return;
9911
9912 free_percpu(pmu->pmu_cpu_context);
9913 }
9914
9915
9916
9917
9918 static ssize_t nr_addr_filters_show(struct device *dev,
9919 struct device_attribute *attr,
9920 char *page)
9921 {
9922 struct pmu *pmu = dev_get_drvdata(dev);
9923
9924 return snprintf(page, PAGE_SIZE - 1, "%d\n", pmu->nr_addr_filters);
9925 }
9926 DEVICE_ATTR_RO(nr_addr_filters);
9927
9928 static struct idr pmu_idr;
9929
9930 static ssize_t
9931 type_show(struct device *dev, struct device_attribute *attr, char *page)
9932 {
9933 struct pmu *pmu = dev_get_drvdata(dev);
9934
9935 return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type);
9936 }
9937 static DEVICE_ATTR_RO(type);
9938
9939 static ssize_t
9940 perf_event_mux_interval_ms_show(struct device *dev,
9941 struct device_attribute *attr,
9942 char *page)
9943 {
9944 struct pmu *pmu = dev_get_drvdata(dev);
9945
9946 return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->hrtimer_interval_ms);
9947 }
9948
9949 static DEFINE_MUTEX(mux_interval_mutex);
9950
9951 static ssize_t
9952 perf_event_mux_interval_ms_store(struct device *dev,
9953 struct device_attribute *attr,
9954 const char *buf, size_t count)
9955 {
9956 struct pmu *pmu = dev_get_drvdata(dev);
9957 int timer, cpu, ret;
9958
9959 ret = kstrtoint(buf, 0, &timer);
9960 if (ret)
9961 return ret;
9962
9963 if (timer < 1)
9964 return -EINVAL;
9965
9966
9967 if (timer == pmu->hrtimer_interval_ms)
9968 return count;
9969
9970 mutex_lock(&mux_interval_mutex);
9971 pmu->hrtimer_interval_ms = timer;
9972
9973
9974 cpus_read_lock();
9975 for_each_online_cpu(cpu) {
9976 struct perf_cpu_context *cpuctx;
9977 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
9978 cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer);
9979
9980 cpu_function_call(cpu,
9981 (remote_function_f)perf_mux_hrtimer_restart, cpuctx);
9982 }
9983 cpus_read_unlock();
9984 mutex_unlock(&mux_interval_mutex);
9985
9986 return count;
9987 }
9988 static DEVICE_ATTR_RW(perf_event_mux_interval_ms);
9989
9990 static struct attribute *pmu_dev_attrs[] = {
9991 &dev_attr_type.attr,
9992 &dev_attr_perf_event_mux_interval_ms.attr,
9993 NULL,
9994 };
9995 ATTRIBUTE_GROUPS(pmu_dev);
9996
9997 static int pmu_bus_running;
9998 static struct bus_type pmu_bus = {
9999 .name = "event_source",
10000 .dev_groups = pmu_dev_groups,
10001 };
10002
10003 static void pmu_dev_release(struct device *dev)
10004 {
10005 kfree(dev);
10006 }
10007
10008 static int pmu_dev_alloc(struct pmu *pmu)
10009 {
10010 int ret = -ENOMEM;
10011
10012 pmu->dev = kzalloc(sizeof(struct device), GFP_KERNEL);
10013 if (!pmu->dev)
10014 goto out;
10015
10016 pmu->dev->groups = pmu->attr_groups;
10017 device_initialize(pmu->dev);
10018 ret = dev_set_name(pmu->dev, "%s", pmu->name);
10019 if (ret)
10020 goto free_dev;
10021
10022 dev_set_drvdata(pmu->dev, pmu);
10023 pmu->dev->bus = &pmu_bus;
10024 pmu->dev->release = pmu_dev_release;
10025 ret = device_add(pmu->dev);
10026 if (ret)
10027 goto free_dev;
10028
10029
10030 if (pmu->nr_addr_filters)
10031 ret = device_create_file(pmu->dev, &dev_attr_nr_addr_filters);
10032
10033 if (ret)
10034 goto del_dev;
10035
10036 if (pmu->attr_update)
10037 ret = sysfs_update_groups(&pmu->dev->kobj, pmu->attr_update);
10038
10039 if (ret)
10040 goto del_dev;
10041
10042 out:
10043 return ret;
10044
10045 del_dev:
10046 device_del(pmu->dev);
10047
10048 free_dev:
10049 put_device(pmu->dev);
10050 goto out;
10051 }
10052
10053 static struct lock_class_key cpuctx_mutex;
10054 static struct lock_class_key cpuctx_lock;
10055
10056 int perf_pmu_register(struct pmu *pmu, const char *name, int type)
10057 {
10058 int cpu, ret;
10059
10060 mutex_lock(&pmus_lock);
10061 ret = -ENOMEM;
10062 pmu->pmu_disable_count = alloc_percpu(int);
10063 if (!pmu->pmu_disable_count)
10064 goto unlock;
10065
10066 pmu->type = -1;
10067 if (!name)
10068 goto skip_type;
10069 pmu->name = name;
10070
10071 if (type < 0) {
10072 type = idr_alloc(&pmu_idr, pmu, PERF_TYPE_MAX, 0, GFP_KERNEL);
10073 if (type < 0) {
10074 ret = type;
10075 goto free_pdc;
10076 }
10077 }
10078 pmu->type = type;
10079
10080 if (pmu_bus_running) {
10081 ret = pmu_dev_alloc(pmu);
10082 if (ret)
10083 goto free_idr;
10084 }
10085
10086 skip_type:
10087 if (pmu->task_ctx_nr == perf_hw_context) {
10088 static int hw_context_taken = 0;
10089
10090
10091
10092
10093
10094
10095 if (WARN_ON_ONCE(hw_context_taken &&
10096 !(pmu->capabilities & PERF_PMU_CAP_HETEROGENEOUS_CPUS)))
10097 pmu->task_ctx_nr = perf_invalid_context;
10098
10099 hw_context_taken = 1;
10100 }
10101
10102 pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr);
10103 if (pmu->pmu_cpu_context)
10104 goto got_cpu_context;
10105
10106 ret = -ENOMEM;
10107 pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context);
10108 if (!pmu->pmu_cpu_context)
10109 goto free_dev;
10110
10111 for_each_possible_cpu(cpu) {
10112 struct perf_cpu_context *cpuctx;
10113
10114 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
10115 __perf_event_init_context(&cpuctx->ctx);
10116 lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex);
10117 lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
10118 cpuctx->ctx.pmu = pmu;
10119 cpuctx->online = cpumask_test_cpu(cpu, perf_online_mask);
10120
10121 __perf_mux_hrtimer_init(cpuctx, cpu);
10122 }
10123
10124 got_cpu_context:
10125 if (!pmu->start_txn) {
10126 if (pmu->pmu_enable) {
10127
10128
10129
10130
10131
10132 pmu->start_txn = perf_pmu_start_txn;
10133 pmu->commit_txn = perf_pmu_commit_txn;
10134 pmu->cancel_txn = perf_pmu_cancel_txn;
10135 } else {
10136 pmu->start_txn = perf_pmu_nop_txn;
10137 pmu->commit_txn = perf_pmu_nop_int;
10138 pmu->cancel_txn = perf_pmu_nop_void;
10139 }
10140 }
10141
10142 if (!pmu->pmu_enable) {
10143 pmu->pmu_enable = perf_pmu_nop_void;
10144 pmu->pmu_disable = perf_pmu_nop_void;
10145 }
10146
10147 if (!pmu->check_period)
10148 pmu->check_period = perf_event_nop_int;
10149
10150 if (!pmu->event_idx)
10151 pmu->event_idx = perf_event_idx_default;
10152
10153 list_add_rcu(&pmu->entry, &pmus);
10154 atomic_set(&pmu->exclusive_cnt, 0);
10155 ret = 0;
10156 unlock:
10157 mutex_unlock(&pmus_lock);
10158
10159 return ret;
10160
10161 free_dev:
10162 device_del(pmu->dev);
10163 put_device(pmu->dev);
10164
10165 free_idr:
10166 if (pmu->type >= PERF_TYPE_MAX)
10167 idr_remove(&pmu_idr, pmu->type);
10168
10169 free_pdc:
10170 free_percpu(pmu->pmu_disable_count);
10171 goto unlock;
10172 }
10173 EXPORT_SYMBOL_GPL(perf_pmu_register);
10174
10175 void perf_pmu_unregister(struct pmu *pmu)
10176 {
10177 mutex_lock(&pmus_lock);
10178 list_del_rcu(&pmu->entry);
10179
10180
10181
10182
10183
10184 synchronize_srcu(&pmus_srcu);
10185 synchronize_rcu();
10186
10187 free_percpu(pmu->pmu_disable_count);
10188 if (pmu->type >= PERF_TYPE_MAX)
10189 idr_remove(&pmu_idr, pmu->type);
10190 if (pmu_bus_running) {
10191 if (pmu->nr_addr_filters)
10192 device_remove_file(pmu->dev, &dev_attr_nr_addr_filters);
10193 device_del(pmu->dev);
10194 put_device(pmu->dev);
10195 }
10196 free_pmu_context(pmu);
10197 mutex_unlock(&pmus_lock);
10198 }
10199 EXPORT_SYMBOL_GPL(perf_pmu_unregister);
10200
10201 static inline bool has_extended_regs(struct perf_event *event)
10202 {
10203 return (event->attr.sample_regs_user & PERF_REG_EXTENDED_MASK) ||
10204 (event->attr.sample_regs_intr & PERF_REG_EXTENDED_MASK);
10205 }
10206
10207 static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
10208 {
10209 struct perf_event_context *ctx = NULL;
10210 int ret;
10211
10212 if (!try_module_get(pmu->module))
10213 return -ENODEV;
10214
10215
10216
10217
10218
10219
10220
10221 if (event->group_leader != event && pmu->task_ctx_nr != perf_sw_context) {
10222
10223
10224
10225
10226 ctx = perf_event_ctx_lock_nested(event->group_leader,
10227 SINGLE_DEPTH_NESTING);
10228 BUG_ON(!ctx);
10229 }
10230
10231 event->pmu = pmu;
10232 ret = pmu->event_init(event);
10233
10234 if (ctx)
10235 perf_event_ctx_unlock(event->group_leader, ctx);
10236
10237 if (!ret) {
10238 if (!(pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS) &&
10239 has_extended_regs(event))
10240 ret = -EOPNOTSUPP;
10241
10242 if (pmu->capabilities & PERF_PMU_CAP_NO_EXCLUDE &&
10243 event_has_any_exclude_flag(event))
10244 ret = -EINVAL;
10245
10246 if (ret && event->destroy)
10247 event->destroy(event);
10248 }
10249
10250 if (ret)
10251 module_put(pmu->module);
10252
10253 return ret;
10254 }
10255
10256 static struct pmu *perf_init_event(struct perf_event *event)
10257 {
10258 struct pmu *pmu;
10259 int idx;
10260 int ret;
10261
10262 idx = srcu_read_lock(&pmus_srcu);
10263
10264
10265 if (event->parent && event->parent->pmu) {
10266 pmu = event->parent->pmu;
10267 ret = perf_try_init_event(pmu, event);
10268 if (!ret)
10269 goto unlock;
10270 }
10271
10272 rcu_read_lock();
10273 pmu = idr_find(&pmu_idr, event->attr.type);
10274 rcu_read_unlock();
10275 if (pmu) {
10276 ret = perf_try_init_event(pmu, event);
10277 if (ret)
10278 pmu = ERR_PTR(ret);
10279 goto unlock;
10280 }
10281
10282 list_for_each_entry_rcu(pmu, &pmus, entry) {
10283 ret = perf_try_init_event(pmu, event);
10284 if (!ret)
10285 goto unlock;
10286
10287 if (ret != -ENOENT) {
10288 pmu = ERR_PTR(ret);
10289 goto unlock;
10290 }
10291 }
10292 pmu = ERR_PTR(-ENOENT);
10293 unlock:
10294 srcu_read_unlock(&pmus_srcu, idx);
10295
10296 return pmu;
10297 }
10298
10299 static void attach_sb_event(struct perf_event *event)
10300 {
10301 struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);
10302
10303 raw_spin_lock(&pel->lock);
10304 list_add_rcu(&event->sb_list, &pel->list);
10305 raw_spin_unlock(&pel->lock);
10306 }
10307
10308
10309
10310
10311
10312
10313
10314
10315 static void account_pmu_sb_event(struct perf_event *event)
10316 {
10317 if (is_sb_event(event))
10318 attach_sb_event(event);
10319 }
10320
10321 static void account_event_cpu(struct perf_event *event, int cpu)
10322 {
10323 if (event->parent)
10324 return;
10325
10326 if (is_cgroup_event(event))
10327 atomic_inc(&per_cpu(perf_cgroup_events, cpu));
10328 }
10329
10330
10331 static void account_freq_event_nohz(void)
10332 {
10333 #ifdef CONFIG_NO_HZ_FULL
10334
10335 spin_lock(&nr_freq_lock);
10336 if (atomic_inc_return(&nr_freq_events) == 1)
10337 tick_nohz_dep_set(TICK_DEP_BIT_PERF_EVENTS);
10338 spin_unlock(&nr_freq_lock);
10339 #endif
10340 }
10341
10342 static void account_freq_event(void)
10343 {
10344 if (tick_nohz_full_enabled())
10345 account_freq_event_nohz();
10346 else
10347 atomic_inc(&nr_freq_events);
10348 }
10349
10350
10351 static void account_event(struct perf_event *event)
10352 {
10353 bool inc = false;
10354
10355 if (event->parent)
10356 return;
10357
10358 if (event->attach_state & PERF_ATTACH_TASK)
10359 inc = true;
10360 if (event->attr.mmap || event->attr.mmap_data)
10361 atomic_inc(&nr_mmap_events);
10362 if (event->attr.comm)
10363 atomic_inc(&nr_comm_events);
10364 if (event->attr.namespaces)
10365 atomic_inc(&nr_namespaces_events);
10366 if (event->attr.task)
10367 atomic_inc(&nr_task_events);
10368 if (event->attr.freq)
10369 account_freq_event();
10370 if (event->attr.context_switch) {
10371 atomic_inc(&nr_switch_events);
10372 inc = true;
10373 }
10374 if (has_branch_stack(event))
10375 inc = true;
10376 if (is_cgroup_event(event))
10377 inc = true;
10378 if (event->attr.ksymbol)
10379 atomic_inc(&nr_ksymbol_events);
10380 if (event->attr.bpf_event)
10381 atomic_inc(&nr_bpf_events);
10382
10383 if (inc) {
10384
10385
10386
10387
10388
10389 if (atomic_inc_not_zero(&perf_sched_count))
10390 goto enabled;
10391
10392 mutex_lock(&perf_sched_mutex);
10393 if (!atomic_read(&perf_sched_count)) {
10394 static_branch_enable(&perf_sched_events);
10395
10396
10397
10398
10399
10400 synchronize_rcu();
10401 }
10402
10403
10404
10405
10406 atomic_inc(&perf_sched_count);
10407 mutex_unlock(&perf_sched_mutex);
10408 }
10409 enabled:
10410
10411 account_event_cpu(event, event->cpu);
10412
10413 account_pmu_sb_event(event);
10414 }
10415
10416
10417
10418
10419 static struct perf_event *
10420 perf_event_alloc(struct perf_event_attr *attr, int cpu,
10421 struct task_struct *task,
10422 struct perf_event *group_leader,
10423 struct perf_event *parent_event,
10424 perf_overflow_handler_t overflow_handler,
10425 void *context, int cgroup_fd)
10426 {
10427 struct pmu *pmu;
10428 struct perf_event *event;
10429 struct hw_perf_event *hwc;
10430 long err = -EINVAL;
10431
10432 if ((unsigned)cpu >= nr_cpu_ids) {
10433 if (!task || cpu != -1)
10434 return ERR_PTR(-EINVAL);
10435 }
10436
10437 event = kzalloc(sizeof(*event), GFP_KERNEL);
10438 if (!event)
10439 return ERR_PTR(-ENOMEM);
10440
10441
10442
10443
10444
10445 if (!group_leader)
10446 group_leader = event;
10447
10448 mutex_init(&event->child_mutex);
10449 INIT_LIST_HEAD(&event->child_list);
10450
10451 INIT_LIST_HEAD(&event->event_entry);
10452 INIT_LIST_HEAD(&event->sibling_list);
10453 INIT_LIST_HEAD(&event->active_list);
10454 init_event_group(event);
10455 INIT_LIST_HEAD(&event->rb_entry);
10456 INIT_LIST_HEAD(&event->active_entry);
10457 INIT_LIST_HEAD(&event->addr_filters.list);
10458 INIT_HLIST_NODE(&event->hlist_entry);
10459
10460
10461 init_waitqueue_head(&event->waitq);
10462 event->pending_disable = -1;
10463 init_irq_work(&event->pending, perf_pending_event);
10464
10465 mutex_init(&event->mmap_mutex);
10466 raw_spin_lock_init(&event->addr_filters.lock);
10467
10468 atomic_long_set(&event->refcount, 1);
10469 event->cpu = cpu;
10470 event->attr = *attr;
10471 event->group_leader = group_leader;
10472 event->pmu = NULL;
10473 event->oncpu = -1;
10474
10475 event->parent = parent_event;
10476
10477 event->ns = get_pid_ns(task_active_pid_ns(current));
10478 event->id = atomic64_inc_return(&perf_event_id);
10479
10480 event->state = PERF_EVENT_STATE_INACTIVE;
10481
10482 if (task) {
10483 event->attach_state = PERF_ATTACH_TASK;
10484
10485
10486
10487
10488
10489 event->hw.target = get_task_struct(task);
10490 }
10491
10492 event->clock = &local_clock;
10493 if (parent_event)
10494 event->clock = parent_event->clock;
10495
10496 if (!overflow_handler && parent_event) {
10497 overflow_handler = parent_event->overflow_handler;
10498 context = parent_event->overflow_handler_context;
10499 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_EVENT_TRACING)
10500 if (overflow_handler == bpf_overflow_handler) {
10501 struct bpf_prog *prog = bpf_prog_inc(parent_event->prog);
10502
10503 if (IS_ERR(prog)) {
10504 err = PTR_ERR(prog);
10505 goto err_ns;
10506 }
10507 event->prog = prog;
10508 event->orig_overflow_handler =
10509 parent_event->orig_overflow_handler;
10510 }
10511 #endif
10512 }
10513
10514 if (overflow_handler) {
10515 event->overflow_handler = overflow_handler;
10516 event->overflow_handler_context = context;
10517 } else if (is_write_backward(event)){
10518 event->overflow_handler = perf_event_output_backward;
10519 event->overflow_handler_context = NULL;
10520 } else {
10521 event->overflow_handler = perf_event_output_forward;
10522 event->overflow_handler_context = NULL;
10523 }
10524
10525 perf_event__state_init(event);
10526
10527 pmu = NULL;
10528
10529 hwc = &event->hw;
10530 hwc->sample_period = attr->sample_period;
10531 if (attr->freq && attr->sample_freq)
10532 hwc->sample_period = 1;
10533 hwc->last_period = hwc->sample_period;
10534
10535 local64_set(&hwc->period_left, hwc->sample_period);
10536
10537
10538
10539
10540
10541 if (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ))
10542 goto err_ns;
10543
10544 if (!has_branch_stack(event))
10545 event->attr.branch_sample_type = 0;
10546
10547 if (cgroup_fd != -1) {
10548 err = perf_cgroup_connect(cgroup_fd, event, attr, group_leader);
10549 if (err)
10550 goto err_ns;
10551 }
10552
10553 pmu = perf_init_event(event);
10554 if (IS_ERR(pmu)) {
10555 err = PTR_ERR(pmu);
10556 goto err_ns;
10557 }
10558
10559
10560
10561
10562
10563 if (pmu->task_ctx_nr == perf_invalid_context && cgroup_fd != -1) {
10564 err = -EINVAL;
10565 goto err_pmu;
10566 }
10567
10568 if (event->attr.aux_output &&
10569 !(pmu->capabilities & PERF_PMU_CAP_AUX_OUTPUT)) {
10570 err = -EOPNOTSUPP;
10571 goto err_pmu;
10572 }
10573
10574 err = exclusive_event_init(event);
10575 if (err)
10576 goto err_pmu;
10577
10578 if (has_addr_filter(event)) {
10579 event->addr_filter_ranges = kcalloc(pmu->nr_addr_filters,
10580 sizeof(struct perf_addr_filter_range),
10581 GFP_KERNEL);
10582 if (!event->addr_filter_ranges) {
10583 err = -ENOMEM;
10584 goto err_per_task;
10585 }
10586
10587
10588
10589
10590
10591 if (event->parent) {
10592 struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
10593
10594 raw_spin_lock_irq(&ifh->lock);
10595 memcpy(event->addr_filter_ranges,
10596 event->parent->addr_filter_ranges,
10597 pmu->nr_addr_filters * sizeof(struct perf_addr_filter_range));
10598 raw_spin_unlock_irq(&ifh->lock);
10599 }
10600
10601
10602 event->addr_filters_gen = 1;
10603 }
10604
10605 if (!event->parent) {
10606 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
10607 err = get_callchain_buffers(attr->sample_max_stack);
10608 if (err)
10609 goto err_addr_filters;
10610 }
10611 }
10612
10613
10614 account_event(event);
10615
10616 return event;
10617
10618 err_addr_filters:
10619 kfree(event->addr_filter_ranges);
10620
10621 err_per_task:
10622 exclusive_event_destroy(event);
10623
10624 err_pmu:
10625 if (event->destroy)
10626 event->destroy(event);
10627 module_put(pmu->module);
10628 err_ns:
10629 if (is_cgroup_event(event))
10630 perf_detach_cgroup(event);
10631 if (event->ns)
10632 put_pid_ns(event->ns);
10633 if (event->hw.target)
10634 put_task_struct(event->hw.target);
10635 kfree(event);
10636
10637 return ERR_PTR(err);
10638 }
10639
10640 static int perf_copy_attr(struct perf_event_attr __user *uattr,
10641 struct perf_event_attr *attr)
10642 {
10643 u32 size;
10644 int ret;
10645
10646
10647 memset(attr, 0, sizeof(*attr));
10648
10649 ret = get_user(size, &uattr->size);
10650 if (ret)
10651 return ret;
10652
10653
10654 if (!size)
10655 size = PERF_ATTR_SIZE_VER0;
10656 if (size < PERF_ATTR_SIZE_VER0 || size > PAGE_SIZE)
10657 goto err_size;
10658
10659 ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size);
10660 if (ret) {
10661 if (ret == -E2BIG)
10662 goto err_size;
10663 return ret;
10664 }
10665
10666 attr->size = size;
10667
10668 if (attr->__reserved_1 || attr->__reserved_2)
10669 return -EINVAL;
10670
10671 if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
10672 return -EINVAL;
10673
10674 if (attr->read_format & ~(PERF_FORMAT_MAX-1))
10675 return -EINVAL;
10676
10677 if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) {
10678 u64 mask = attr->branch_sample_type;
10679
10680
10681 if (mask & ~(PERF_SAMPLE_BRANCH_MAX-1))
10682 return -EINVAL;
10683
10684
10685 if (!(mask & ~PERF_SAMPLE_BRANCH_PLM_ALL))
10686 return -EINVAL;
10687
10688
10689 if (!(mask & PERF_SAMPLE_BRANCH_PLM_ALL)) {
10690
10691
10692 if (!attr->exclude_kernel)
10693 mask |= PERF_SAMPLE_BRANCH_KERNEL;
10694
10695 if (!attr->exclude_user)
10696 mask |= PERF_SAMPLE_BRANCH_USER;
10697
10698 if (!attr->exclude_hv)
10699 mask |= PERF_SAMPLE_BRANCH_HV;
10700
10701
10702
10703 attr->branch_sample_type = mask;
10704 }
10705
10706 if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM)
10707 && perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
10708 return -EACCES;
10709 }
10710
10711 if (attr->sample_type & PERF_SAMPLE_REGS_USER) {
10712 ret = perf_reg_validate(attr->sample_regs_user);
10713 if (ret)
10714 return ret;
10715 }
10716
10717 if (attr->sample_type & PERF_SAMPLE_STACK_USER) {
10718 if (!arch_perf_have_user_stack_dump())
10719 return -ENOSYS;
10720
10721
10722
10723
10724
10725
10726 if (attr->sample_stack_user >= USHRT_MAX)
10727 return -EINVAL;
10728 else if (!IS_ALIGNED(attr->sample_stack_user, sizeof(u64)))
10729 return -EINVAL;
10730 }
10731
10732 if (!attr->sample_max_stack)
10733 attr->sample_max_stack = sysctl_perf_event_max_stack;
10734
10735 if (attr->sample_type & PERF_SAMPLE_REGS_INTR)
10736 ret = perf_reg_validate(attr->sample_regs_intr);
10737 out:
10738 return ret;
10739
10740 err_size:
10741 put_user(sizeof(*attr), &uattr->size);
10742 ret = -E2BIG;
10743 goto out;
10744 }
10745
10746 static int
10747 perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
10748 {
10749 struct ring_buffer *rb = NULL;
10750 int ret = -EINVAL;
10751
10752 if (!output_event)
10753 goto set;
10754
10755
10756 if (event == output_event)
10757 goto out;
10758
10759
10760
10761
10762 if (output_event->cpu != event->cpu)
10763 goto out;
10764
10765
10766
10767
10768 if (output_event->cpu == -1 && output_event->ctx != event->ctx)
10769 goto out;
10770
10771
10772
10773
10774 if (output_event->clock != event->clock)
10775 goto out;
10776
10777
10778
10779
10780
10781 if (is_write_backward(output_event) != is_write_backward(event))
10782 goto out;
10783
10784
10785
10786
10787 if (has_aux(event) && has_aux(output_event) &&
10788 event->pmu != output_event->pmu)
10789 goto out;
10790
10791 set:
10792 mutex_lock(&event->mmap_mutex);
10793
10794 if (atomic_read(&event->mmap_count))
10795 goto unlock;
10796
10797 if (output_event) {
10798
10799 rb = ring_buffer_get(output_event);
10800 if (!rb)
10801 goto unlock;
10802 }
10803
10804 ring_buffer_attach(event, rb);
10805
10806 ret = 0;
10807 unlock:
10808 mutex_unlock(&event->mmap_mutex);
10809
10810 out:
10811 return ret;
10812 }
10813
10814 static void mutex_lock_double(struct mutex *a, struct mutex *b)
10815 {
10816 if (b < a)
10817 swap(a, b);
10818
10819 mutex_lock(a);
10820 mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
10821 }
10822
10823 static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
10824 {
10825 bool nmi_safe = false;
10826
10827 switch (clk_id) {
10828 case CLOCK_MONOTONIC:
10829 event->clock = &ktime_get_mono_fast_ns;
10830 nmi_safe = true;
10831 break;
10832
10833 case CLOCK_MONOTONIC_RAW:
10834 event->clock = &ktime_get_raw_fast_ns;
10835 nmi_safe = true;
10836 break;
10837
10838 case CLOCK_REALTIME:
10839 event->clock = &ktime_get_real_ns;
10840 break;
10841
10842 case CLOCK_BOOTTIME:
10843 event->clock = &ktime_get_boottime_ns;
10844 break;
10845
10846 case CLOCK_TAI:
10847 event->clock = &ktime_get_clocktai_ns;
10848 break;
10849
10850 default:
10851 return -EINVAL;
10852 }
10853
10854 if (!nmi_safe && !(event->pmu->capabilities & PERF_PMU_CAP_NO_NMI))
10855 return -EINVAL;
10856
10857 return 0;
10858 }
10859
10860
10861
10862
10863
10864 static struct perf_event_context *
10865 __perf_event_ctx_lock_double(struct perf_event *group_leader,
10866 struct perf_event_context *ctx)
10867 {
10868 struct perf_event_context *gctx;
10869
10870 again:
10871 rcu_read_lock();
10872 gctx = READ_ONCE(group_leader->ctx);
10873 if (!refcount_inc_not_zero(&gctx->refcount)) {
10874 rcu_read_unlock();
10875 goto again;
10876 }
10877 rcu_read_unlock();
10878
10879 mutex_lock_double(&gctx->mutex, &ctx->mutex);
10880
10881 if (group_leader->ctx != gctx) {
10882 mutex_unlock(&ctx->mutex);
10883 mutex_unlock(&gctx->mutex);
10884 put_ctx(gctx);
10885 goto again;
10886 }
10887
10888 return gctx;
10889 }
10890
10891
10892
10893
10894
10895
10896
10897
10898
10899 SYSCALL_DEFINE5(perf_event_open,
10900 struct perf_event_attr __user *, attr_uptr,
10901 pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
10902 {
10903 struct perf_event *group_leader = NULL, *output_event = NULL;
10904 struct perf_event *event, *sibling;
10905 struct perf_event_attr attr;
10906 struct perf_event_context *ctx, *uninitialized_var(gctx);
10907 struct file *event_file = NULL;
10908 struct fd group = {NULL, 0};
10909 struct task_struct *task = NULL;
10910 struct pmu *pmu;
10911 int event_fd;
10912 int move_group = 0;
10913 int err;
10914 int f_flags = O_RDWR;
10915 int cgroup_fd = -1;
10916
10917
10918 if (flags & ~PERF_FLAG_ALL)
10919 return -EINVAL;
10920
10921 err = perf_copy_attr(attr_uptr, &attr);
10922 if (err)
10923 return err;
10924
10925 if (!attr.exclude_kernel) {
10926 if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
10927 return -EACCES;
10928 }
10929
10930 if (attr.namespaces) {
10931 if (!capable(CAP_SYS_ADMIN))
10932 return -EACCES;
10933 }
10934
10935 if (attr.freq) {
10936 if (attr.sample_freq > sysctl_perf_event_sample_rate)
10937 return -EINVAL;
10938 } else {
10939 if (attr.sample_period & (1ULL << 63))
10940 return -EINVAL;
10941 }
10942
10943
10944 if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR) &&
10945 perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
10946 return -EACCES;
10947
10948 err = security_locked_down(LOCKDOWN_PERF);
10949 if (err && (attr.sample_type & PERF_SAMPLE_REGS_INTR))
10950
10951 return err;
10952
10953 err = 0;
10954
10955
10956
10957
10958
10959
10960
10961 if ((flags & PERF_FLAG_PID_CGROUP) && (pid == -1 || cpu == -1))
10962 return -EINVAL;
10963
10964 if (flags & PERF_FLAG_FD_CLOEXEC)
10965 f_flags |= O_CLOEXEC;
10966
10967 event_fd = get_unused_fd_flags(f_flags);
10968 if (event_fd < 0)
10969 return event_fd;
10970
10971 if (group_fd != -1) {
10972 err = perf_fget_light(group_fd, &group);
10973 if (err)
10974 goto err_fd;
10975 group_leader = group.file->private_data;
10976 if (flags & PERF_FLAG_FD_OUTPUT)
10977 output_event = group_leader;
10978 if (flags & PERF_FLAG_FD_NO_GROUP)
10979 group_leader = NULL;
10980 }
10981
10982 if (pid != -1 && !(flags & PERF_FLAG_PID_CGROUP)) {
10983 task = find_lively_task_by_vpid(pid);
10984 if (IS_ERR(task)) {
10985 err = PTR_ERR(task);
10986 goto err_group_fd;
10987 }
10988 }
10989
10990 if (task && group_leader &&
10991 group_leader->attr.inherit != attr.inherit) {
10992 err = -EINVAL;
10993 goto err_task;
10994 }
10995
10996 if (task) {
10997 err = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
10998 if (err)
10999 goto err_task;
11000
11001
11002
11003
11004
11005
11006
11007
11008
11009 err = -EACCES;
11010 if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
11011 goto err_cred;
11012 }
11013
11014 if (flags & PERF_FLAG_PID_CGROUP)
11015 cgroup_fd = pid;
11016
11017 event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
11018 NULL, NULL, cgroup_fd);
11019 if (IS_ERR(event)) {
11020 err = PTR_ERR(event);
11021 goto err_cred;
11022 }
11023
11024 if (is_sampling_event(event)) {
11025 if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
11026 err = -EOPNOTSUPP;
11027 goto err_alloc;
11028 }
11029 }
11030
11031
11032
11033
11034
11035 pmu = event->pmu;
11036
11037 if (attr.use_clockid) {
11038 err = perf_event_set_clock(event, attr.clockid);
11039 if (err)
11040 goto err_alloc;
11041 }
11042
11043 if (pmu->task_ctx_nr == perf_sw_context)
11044 event->event_caps |= PERF_EV_CAP_SOFTWARE;
11045
11046 if (group_leader) {
11047 if (is_software_event(event) &&
11048 !in_software_context(group_leader)) {
11049
11050
11051
11052
11053
11054
11055
11056
11057 pmu = group_leader->ctx->pmu;
11058 } else if (!is_software_event(event) &&
11059 is_software_event(group_leader) &&
11060 (group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) {
11061
11062
11063
11064
11065
11066 move_group = 1;
11067 }
11068 }
11069
11070
11071
11072
11073 ctx = find_get_context(pmu, task, event);
11074 if (IS_ERR(ctx)) {
11075 err = PTR_ERR(ctx);
11076 goto err_alloc;
11077 }
11078
11079
11080
11081
11082 if (group_leader) {
11083 err = -EINVAL;
11084
11085
11086
11087
11088
11089 if (group_leader->group_leader != group_leader)
11090 goto err_context;
11091
11092
11093 if (group_leader->clock != event->clock)
11094 goto err_context;
11095
11096
11097
11098
11099
11100
11101 if (group_leader->cpu != event->cpu)
11102 goto err_context;
11103
11104
11105
11106
11107
11108 if (group_leader->ctx->task != ctx->task)
11109 goto err_context;
11110
11111
11112
11113
11114
11115
11116 if (!move_group && group_leader->ctx != ctx)
11117 goto err_context;
11118
11119
11120
11121
11122 if (attr.exclusive || attr.pinned)
11123 goto err_context;
11124 }
11125
11126 if (output_event) {
11127 err = perf_event_set_output(event, output_event);
11128 if (err)
11129 goto err_context;
11130 }
11131
11132 event_file = anon_inode_getfile("[perf_event]", &perf_fops, event,
11133 f_flags);
11134 if (IS_ERR(event_file)) {
11135 err = PTR_ERR(event_file);
11136 event_file = NULL;
11137 goto err_context;
11138 }
11139
11140 if (move_group) {
11141 gctx = __perf_event_ctx_lock_double(group_leader, ctx);
11142
11143 if (gctx->task == TASK_TOMBSTONE) {
11144 err = -ESRCH;
11145 goto err_locked;
11146 }
11147
11148
11149
11150
11151
11152 if (!(group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) {
11153
11154
11155
11156
11157
11158 if (gctx != ctx) {
11159 err = -EINVAL;
11160 goto err_locked;
11161 } else {
11162 perf_event_ctx_unlock(group_leader, gctx);
11163 move_group = 0;
11164 }
11165 }
11166
11167
11168
11169
11170 err = -EBUSY;
11171 if (!exclusive_event_installable(group_leader, ctx))
11172 goto err_locked;
11173
11174 for_each_sibling_event(sibling, group_leader) {
11175 if (!exclusive_event_installable(sibling, ctx))
11176 goto err_locked;
11177 }
11178 } else {
11179 mutex_lock(&ctx->mutex);
11180 }
11181
11182 if (ctx->task == TASK_TOMBSTONE) {
11183 err = -ESRCH;
11184 goto err_locked;
11185 }
11186
11187 if (!perf_event_validate_size(event)) {
11188 err = -E2BIG;
11189 goto err_locked;
11190 }
11191
11192 if (!task) {
11193
11194
11195
11196
11197
11198
11199 struct perf_cpu_context *cpuctx =
11200 container_of(ctx, struct perf_cpu_context, ctx);
11201
11202 if (!cpuctx->online) {
11203 err = -ENODEV;
11204 goto err_locked;
11205 }
11206 }
11207
11208 if (event->attr.aux_output && !perf_get_aux_event(event, group_leader)) {
11209 err = -EINVAL;
11210 goto err_locked;
11211 }
11212
11213
11214
11215
11216
11217 if (!exclusive_event_installable(event, ctx)) {
11218 err = -EBUSY;
11219 goto err_locked;
11220 }
11221
11222 WARN_ON_ONCE(ctx->parent_ctx);
11223
11224
11225
11226
11227
11228
11229 if (move_group) {
11230
11231
11232
11233
11234 perf_remove_from_context(group_leader, 0);
11235 put_ctx(gctx);
11236
11237 for_each_sibling_event(sibling, group_leader) {
11238 perf_remove_from_context(sibling, 0);
11239 put_ctx(gctx);
11240 }
11241
11242
11243
11244
11245
11246 synchronize_rcu();
11247
11248
11249
11250
11251
11252
11253
11254
11255
11256
11257
11258 for_each_sibling_event(sibling, group_leader) {
11259 perf_event__state_init(sibling);
11260 perf_install_in_context(ctx, sibling, sibling->cpu);
11261 get_ctx(ctx);
11262 }
11263
11264
11265
11266
11267
11268
11269 perf_event__state_init(group_leader);
11270 perf_install_in_context(ctx, group_leader, group_leader->cpu);
11271 get_ctx(ctx);
11272 }
11273
11274
11275
11276
11277
11278
11279
11280 perf_event__header_size(event);
11281 perf_event__id_header_size(event);
11282
11283 event->owner = current;
11284
11285 perf_install_in_context(ctx, event, event->cpu);
11286 perf_unpin_context(ctx);
11287
11288 if (move_group)
11289 perf_event_ctx_unlock(group_leader, gctx);
11290 mutex_unlock(&ctx->mutex);
11291
11292 if (task) {
11293 mutex_unlock(&task->signal->cred_guard_mutex);
11294 put_task_struct(task);
11295 }
11296
11297 mutex_lock(¤t->perf_event_mutex);
11298 list_add_tail(&event->owner_entry, ¤t->perf_event_list);
11299 mutex_unlock(¤t->perf_event_mutex);
11300
11301
11302
11303
11304
11305
11306
11307 fdput(group);
11308 fd_install(event_fd, event_file);
11309 return event_fd;
11310
11311 err_locked:
11312 if (move_group)
11313 perf_event_ctx_unlock(group_leader, gctx);
11314 mutex_unlock(&ctx->mutex);
11315
11316 fput(event_file);
11317 err_context:
11318 perf_unpin_context(ctx);
11319 put_ctx(ctx);
11320 err_alloc:
11321
11322
11323
11324
11325 if (!event_file)
11326 free_event(event);
11327 err_cred:
11328 if (task)
11329 mutex_unlock(&task->signal->cred_guard_mutex);
11330 err_task:
11331 if (task)
11332 put_task_struct(task);
11333 err_group_fd:
11334 fdput(group);
11335 err_fd:
11336 put_unused_fd(event_fd);
11337 return err;
11338 }
11339
11340
11341
11342
11343
11344
11345
11346
11347 struct perf_event *
11348 perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
11349 struct task_struct *task,
11350 perf_overflow_handler_t overflow_handler,
11351 void *context)
11352 {
11353 struct perf_event_context *ctx;
11354 struct perf_event *event;
11355 int err;
11356
11357
11358
11359
11360
11361 if (attr->aux_output)
11362 return ERR_PTR(-EINVAL);
11363
11364 event = perf_event_alloc(attr, cpu, task, NULL, NULL,
11365 overflow_handler, context, -1);
11366 if (IS_ERR(event)) {
11367 err = PTR_ERR(event);
11368 goto err;
11369 }
11370
11371
11372 event->owner = TASK_TOMBSTONE;
11373
11374
11375
11376
11377 ctx = find_get_context(event->pmu, task, event);
11378 if (IS_ERR(ctx)) {
11379 err = PTR_ERR(ctx);
11380 goto err_free;
11381 }
11382
11383 WARN_ON_ONCE(ctx->parent_ctx);
11384 mutex_lock(&ctx->mutex);
11385 if (ctx->task == TASK_TOMBSTONE) {
11386 err = -ESRCH;
11387 goto err_unlock;
11388 }
11389
11390 if (!task) {
11391
11392
11393
11394
11395
11396
11397 struct perf_cpu_context *cpuctx =
11398 container_of(ctx, struct perf_cpu_context, ctx);
11399 if (!cpuctx->online) {
11400 err = -ENODEV;
11401 goto err_unlock;
11402 }
11403 }
11404
11405 if (!exclusive_event_installable(event, ctx)) {
11406 err = -EBUSY;
11407 goto err_unlock;
11408 }
11409
11410 perf_install_in_context(ctx, event, event->cpu);
11411 perf_unpin_context(ctx);
11412 mutex_unlock(&ctx->mutex);
11413
11414 return event;
11415
11416 err_unlock:
11417 mutex_unlock(&ctx->mutex);
11418 perf_unpin_context(ctx);
11419 put_ctx(ctx);
11420 err_free:
11421 free_event(event);
11422 err:
11423 return ERR_PTR(err);
11424 }
11425 EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
11426
11427 void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
11428 {
11429 struct perf_event_context *src_ctx;
11430 struct perf_event_context *dst_ctx;
11431 struct perf_event *event, *tmp;
11432 LIST_HEAD(events);
11433
11434 src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx;
11435 dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx;
11436
11437
11438
11439
11440
11441 mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex);
11442 list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
11443 event_entry) {
11444 perf_remove_from_context(event, 0);
11445 unaccount_event_cpu(event, src_cpu);
11446 put_ctx(src_ctx);
11447 list_add(&event->migrate_entry, &events);
11448 }
11449
11450
11451
11452
11453 synchronize_rcu();
11454
11455
11456
11457
11458
11459
11460
11461
11462
11463 list_for_each_entry_safe(event, tmp, &events, migrate_entry) {
11464 if (event->group_leader == event)
11465 continue;
11466
11467 list_del(&event->migrate_entry);
11468 if (event->state >= PERF_EVENT_STATE_OFF)
11469 event->state = PERF_EVENT_STATE_INACTIVE;
11470 account_event_cpu(event, dst_cpu);
11471 perf_install_in_context(dst_ctx, event, dst_cpu);
11472 get_ctx(dst_ctx);
11473 }
11474
11475
11476
11477
11478
11479 list_for_each_entry_safe(event, tmp, &events, migrate_entry) {
11480 list_del(&event->migrate_entry);
11481 if (event->state >= PERF_EVENT_STATE_OFF)
11482 event->state = PERF_EVENT_STATE_INACTIVE;
11483 account_event_cpu(event, dst_cpu);
11484 perf_install_in_context(dst_ctx, event, dst_cpu);
11485 get_ctx(dst_ctx);
11486 }
11487 mutex_unlock(&dst_ctx->mutex);
11488 mutex_unlock(&src_ctx->mutex);
11489 }
11490 EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
11491
11492 static void sync_child_event(struct perf_event *child_event,
11493 struct task_struct *child)
11494 {
11495 struct perf_event *parent_event = child_event->parent;
11496 u64 child_val;
11497
11498 if (child_event->attr.inherit_stat)
11499 perf_event_read_event(child_event, child);
11500
11501 child_val = perf_event_count(child_event);
11502
11503
11504
11505
11506 atomic64_add(child_val, &parent_event->child_count);
11507 atomic64_add(child_event->total_time_enabled,
11508 &parent_event->child_total_time_enabled);
11509 atomic64_add(child_event->total_time_running,
11510 &parent_event->child_total_time_running);
11511 }
11512
11513 static void
11514 perf_event_exit_event(struct perf_event *child_event,
11515 struct perf_event_context *child_ctx,
11516 struct task_struct *child)
11517 {
11518 struct perf_event *parent_event = child_event->parent;
11519
11520
11521
11522
11523
11524
11525
11526
11527
11528
11529
11530
11531
11532 raw_spin_lock_irq(&child_ctx->lock);
11533 WARN_ON_ONCE(child_ctx->is_active);
11534
11535 if (parent_event)
11536 perf_group_detach(child_event);
11537 list_del_event(child_event, child_ctx);
11538 perf_event_set_state(child_event, PERF_EVENT_STATE_EXIT);
11539 raw_spin_unlock_irq(&child_ctx->lock);
11540
11541
11542
11543
11544 if (!parent_event) {
11545 perf_event_wakeup(child_event);
11546 return;
11547 }
11548
11549
11550
11551
11552 sync_child_event(child_event, child);
11553
11554
11555
11556
11557 WARN_ON_ONCE(parent_event->ctx->parent_ctx);
11558 mutex_lock(&parent_event->child_mutex);
11559 list_del_init(&child_event->child_list);
11560 mutex_unlock(&parent_event->child_mutex);
11561
11562
11563
11564
11565 perf_event_wakeup(parent_event);
11566 free_event(child_event);
11567 put_event(parent_event);
11568 }
11569
11570 static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
11571 {
11572 struct perf_event_context *child_ctx, *clone_ctx = NULL;
11573 struct perf_event *child_event, *next;
11574
11575 WARN_ON_ONCE(child != current);
11576
11577 child_ctx = perf_pin_task_context(child, ctxn);
11578 if (!child_ctx)
11579 return;
11580
11581
11582
11583
11584
11585
11586
11587
11588
11589
11590
11591 mutex_lock(&child_ctx->mutex);
11592
11593
11594
11595
11596
11597
11598 raw_spin_lock_irq(&child_ctx->lock);
11599 task_ctx_sched_out(__get_cpu_context(child_ctx), child_ctx, EVENT_ALL);
11600
11601
11602
11603
11604
11605 RCU_INIT_POINTER(child->perf_event_ctxp[ctxn], NULL);
11606 put_ctx(child_ctx);
11607 WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE);
11608 put_task_struct(current);
11609
11610 clone_ctx = unclone_ctx(child_ctx);
11611 raw_spin_unlock_irq(&child_ctx->lock);
11612
11613 if (clone_ctx)
11614 put_ctx(clone_ctx);
11615
11616
11617
11618
11619
11620
11621 perf_event_task(child, child_ctx, 0);
11622
11623 list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
11624 perf_event_exit_event(child_event, child_ctx, child);
11625
11626 mutex_unlock(&child_ctx->mutex);
11627
11628 put_ctx(child_ctx);
11629 }
11630
11631
11632
11633
11634
11635
11636
11637 void perf_event_exit_task(struct task_struct *child)
11638 {
11639 struct perf_event *event, *tmp;
11640 int ctxn;
11641
11642 mutex_lock(&child->perf_event_mutex);
11643 list_for_each_entry_safe(event, tmp, &child->perf_event_list,
11644 owner_entry) {
11645 list_del_init(&event->owner_entry);
11646
11647
11648
11649
11650
11651
11652 smp_store_release(&event->owner, NULL);
11653 }
11654 mutex_unlock(&child->perf_event_mutex);
11655
11656 for_each_task_context_nr(ctxn)
11657 perf_event_exit_task_context(child, ctxn);
11658
11659
11660
11661
11662
11663
11664
11665 perf_event_task(child, NULL, 0);
11666 }
11667
11668 static void perf_free_event(struct perf_event *event,
11669 struct perf_event_context *ctx)
11670 {
11671 struct perf_event *parent = event->parent;
11672
11673 if (WARN_ON_ONCE(!parent))
11674 return;
11675
11676 mutex_lock(&parent->child_mutex);
11677 list_del_init(&event->child_list);
11678 mutex_unlock(&parent->child_mutex);
11679
11680 put_event(parent);
11681
11682 raw_spin_lock_irq(&ctx->lock);
11683 perf_group_detach(event);
11684 list_del_event(event, ctx);
11685 raw_spin_unlock_irq(&ctx->lock);
11686 free_event(event);
11687 }
11688
11689
11690
11691
11692
11693
11694
11695
11696 void perf_event_free_task(struct task_struct *task)
11697 {
11698 struct perf_event_context *ctx;
11699 struct perf_event *event, *tmp;
11700 int ctxn;
11701
11702 for_each_task_context_nr(ctxn) {
11703 ctx = task->perf_event_ctxp[ctxn];
11704 if (!ctx)
11705 continue;
11706
11707 mutex_lock(&ctx->mutex);
11708 raw_spin_lock_irq(&ctx->lock);
11709
11710
11711
11712
11713
11714
11715 RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], NULL);
11716 WRITE_ONCE(ctx->task, TASK_TOMBSTONE);
11717 put_task_struct(task);
11718 raw_spin_unlock_irq(&ctx->lock);
11719
11720 list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry)
11721 perf_free_event(event, ctx);
11722
11723 mutex_unlock(&ctx->mutex);
11724
11725
11726
11727
11728
11729
11730
11731
11732
11733
11734
11735
11736
11737
11738
11739 wait_var_event(&ctx->refcount, refcount_read(&ctx->refcount) == 1);
11740 put_ctx(ctx);
11741 }
11742 }
11743
11744 void perf_event_delayed_put(struct task_struct *task)
11745 {
11746 int ctxn;
11747
11748 for_each_task_context_nr(ctxn)
11749 WARN_ON_ONCE(task->perf_event_ctxp[ctxn]);
11750 }
11751
11752 struct file *perf_event_get(unsigned int fd)
11753 {
11754 struct file *file = fget(fd);
11755 if (!file)
11756 return ERR_PTR(-EBADF);
11757
11758 if (file->f_op != &perf_fops) {
11759 fput(file);
11760 return ERR_PTR(-EBADF);
11761 }
11762
11763 return file;
11764 }
11765
11766 const struct perf_event *perf_get_event(struct file *file)
11767 {
11768 if (file->f_op != &perf_fops)
11769 return ERR_PTR(-EINVAL);
11770
11771 return file->private_data;
11772 }
11773
11774 const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
11775 {
11776 if (!event)
11777 return ERR_PTR(-EINVAL);
11778
11779 return &event->attr;
11780 }
11781
11782
11783
11784
11785
11786
11787
11788
11789
11790 static struct perf_event *
11791 inherit_event(struct perf_event *parent_event,
11792 struct task_struct *parent,
11793 struct perf_event_context *parent_ctx,
11794 struct task_struct *child,
11795 struct perf_event *group_leader,
11796 struct perf_event_context *child_ctx)
11797 {
11798 enum perf_event_state parent_state = parent_event->state;
11799 struct perf_event *child_event;
11800 unsigned long flags;
11801
11802
11803
11804
11805
11806
11807
11808 if (parent_event->parent)
11809 parent_event = parent_event->parent;
11810
11811 child_event = perf_event_alloc(&parent_event->attr,
11812 parent_event->cpu,
11813 child,
11814 group_leader, parent_event,
11815 NULL, NULL, -1);
11816 if (IS_ERR(child_event))
11817 return child_event;
11818
11819
11820 if ((child_event->attach_state & PERF_ATTACH_TASK_DATA) &&
11821 !child_ctx->task_ctx_data) {
11822 struct pmu *pmu = child_event->pmu;
11823
11824 child_ctx->task_ctx_data = kzalloc(pmu->task_ctx_size,
11825 GFP_KERNEL);
11826 if (!child_ctx->task_ctx_data) {
11827 free_event(child_event);
11828 return ERR_PTR(-ENOMEM);
11829 }
11830 }
11831
11832
11833
11834
11835
11836
11837
11838 mutex_lock(&parent_event->child_mutex);
11839 if (is_orphaned_event(parent_event) ||
11840 !atomic_long_inc_not_zero(&parent_event->refcount)) {
11841 mutex_unlock(&parent_event->child_mutex);
11842
11843 free_event(child_event);
11844 return NULL;
11845 }
11846
11847 get_ctx(child_ctx);
11848
11849
11850
11851
11852
11853
11854 if (parent_state >= PERF_EVENT_STATE_INACTIVE)
11855 child_event->state = PERF_EVENT_STATE_INACTIVE;
11856 else
11857 child_event->state = PERF_EVENT_STATE_OFF;
11858
11859 if (parent_event->attr.freq) {
11860 u64 sample_period = parent_event->hw.sample_period;
11861 struct hw_perf_event *hwc = &child_event->hw;
11862
11863 hwc->sample_period = sample_period;
11864 hwc->last_period = sample_period;
11865
11866 local64_set(&hwc->period_left, sample_period);
11867 }
11868
11869 child_event->ctx = child_ctx;
11870 child_event->overflow_handler = parent_event->overflow_handler;
11871 child_event->overflow_handler_context
11872 = parent_event->overflow_handler_context;
11873
11874
11875
11876
11877 perf_event__header_size(child_event);
11878 perf_event__id_header_size(child_event);
11879
11880
11881
11882
11883 raw_spin_lock_irqsave(&child_ctx->lock, flags);
11884 add_event_to_ctx(child_event, child_ctx);
11885 raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
11886
11887
11888
11889
11890 list_add_tail(&child_event->child_list, &parent_event->child_list);
11891 mutex_unlock(&parent_event->child_mutex);
11892
11893 return child_event;
11894 }
11895
11896
11897
11898
11899
11900
11901
11902
11903
11904
11905
11906 static int inherit_group(struct perf_event *parent_event,
11907 struct task_struct *parent,
11908 struct perf_event_context *parent_ctx,
11909 struct task_struct *child,
11910 struct perf_event_context *child_ctx)
11911 {
11912 struct perf_event *leader;
11913 struct perf_event *sub;
11914 struct perf_event *child_ctr;
11915
11916 leader = inherit_event(parent_event, parent, parent_ctx,
11917 child, NULL, child_ctx);
11918 if (IS_ERR(leader))
11919 return PTR_ERR(leader);
11920
11921
11922
11923
11924
11925 for_each_sibling_event(sub, parent_event) {
11926 child_ctr = inherit_event(sub, parent, parent_ctx,
11927 child, leader, child_ctx);
11928 if (IS_ERR(child_ctr))
11929 return PTR_ERR(child_ctr);
11930
11931 if (sub->aux_event == parent_event && child_ctr &&
11932 !perf_get_aux_event(child_ctr, leader))
11933 return -EINVAL;
11934 }
11935 return 0;
11936 }
11937
11938
11939
11940
11941
11942
11943
11944
11945
11946
11947
11948
11949 static int
11950 inherit_task_group(struct perf_event *event, struct task_struct *parent,
11951 struct perf_event_context *parent_ctx,
11952 struct task_struct *child, int ctxn,
11953 int *inherited_all)
11954 {
11955 int ret;
11956 struct perf_event_context *child_ctx;
11957
11958 if (!event->attr.inherit) {
11959 *inherited_all = 0;
11960 return 0;
11961 }
11962
11963 child_ctx = child->perf_event_ctxp[ctxn];
11964 if (!child_ctx) {
11965
11966
11967
11968
11969
11970
11971 child_ctx = alloc_perf_context(parent_ctx->pmu, child);
11972 if (!child_ctx)
11973 return -ENOMEM;
11974
11975 child->perf_event_ctxp[ctxn] = child_ctx;
11976 }
11977
11978 ret = inherit_group(event, parent, parent_ctx,
11979 child, child_ctx);
11980
11981 if (ret)
11982 *inherited_all = 0;
11983
11984 return ret;
11985 }
11986
11987
11988
11989
11990 static int perf_event_init_context(struct task_struct *child, int ctxn)
11991 {
11992 struct perf_event_context *child_ctx, *parent_ctx;
11993 struct perf_event_context *cloned_ctx;
11994 struct perf_event *event;
11995 struct task_struct *parent = current;
11996 int inherited_all = 1;
11997 unsigned long flags;
11998 int ret = 0;
11999
12000 if (likely(!parent->perf_event_ctxp[ctxn]))
12001 return 0;
12002
12003
12004
12005
12006
12007 parent_ctx = perf_pin_task_context(parent, ctxn);
12008 if (!parent_ctx)
12009 return 0;
12010
12011
12012
12013
12014
12015
12016
12017
12018
12019
12020
12021
12022 mutex_lock(&parent_ctx->mutex);
12023
12024
12025
12026
12027
12028 perf_event_groups_for_each(event, &parent_ctx->pinned_groups) {
12029 ret = inherit_task_group(event, parent, parent_ctx,
12030 child, ctxn, &inherited_all);
12031 if (ret)
12032 goto out_unlock;
12033 }
12034
12035
12036
12037
12038
12039
12040 raw_spin_lock_irqsave(&parent_ctx->lock, flags);
12041 parent_ctx->rotate_disable = 1;
12042 raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
12043
12044 perf_event_groups_for_each(event, &parent_ctx->flexible_groups) {
12045 ret = inherit_task_group(event, parent, parent_ctx,
12046 child, ctxn, &inherited_all);
12047 if (ret)
12048 goto out_unlock;
12049 }
12050
12051 raw_spin_lock_irqsave(&parent_ctx->lock, flags);
12052 parent_ctx->rotate_disable = 0;
12053
12054 child_ctx = child->perf_event_ctxp[ctxn];
12055
12056 if (child_ctx && inherited_all) {
12057
12058
12059
12060
12061
12062
12063
12064 cloned_ctx = parent_ctx->parent_ctx;
12065 if (cloned_ctx) {
12066 child_ctx->parent_ctx = cloned_ctx;
12067 child_ctx->parent_gen = parent_ctx->parent_gen;
12068 } else {
12069 child_ctx->parent_ctx = parent_ctx;
12070 child_ctx->parent_gen = parent_ctx->generation;
12071 }
12072 get_ctx(child_ctx->parent_ctx);
12073 }
12074
12075 raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
12076 out_unlock:
12077 mutex_unlock(&parent_ctx->mutex);
12078
12079 perf_unpin_context(parent_ctx);
12080 put_ctx(parent_ctx);
12081
12082 return ret;
12083 }
12084
12085
12086
12087
12088 int perf_event_init_task(struct task_struct *child)
12089 {
12090 int ctxn, ret;
12091
12092 memset(child->perf_event_ctxp, 0, sizeof(child->perf_event_ctxp));
12093 mutex_init(&child->perf_event_mutex);
12094 INIT_LIST_HEAD(&child->perf_event_list);
12095
12096 for_each_task_context_nr(ctxn) {
12097 ret = perf_event_init_context(child, ctxn);
12098 if (ret) {
12099 perf_event_free_task(child);
12100 return ret;
12101 }
12102 }
12103
12104 return 0;
12105 }
12106
12107 static void __init perf_event_init_all_cpus(void)
12108 {
12109 struct swevent_htable *swhash;
12110 int cpu;
12111
12112 zalloc_cpumask_var(&perf_online_mask, GFP_KERNEL);
12113
12114 for_each_possible_cpu(cpu) {
12115 swhash = &per_cpu(swevent_htable, cpu);
12116 mutex_init(&swhash->hlist_mutex);
12117 INIT_LIST_HEAD(&per_cpu(active_ctx_list, cpu));
12118
12119 INIT_LIST_HEAD(&per_cpu(pmu_sb_events.list, cpu));
12120 raw_spin_lock_init(&per_cpu(pmu_sb_events.lock, cpu));
12121
12122 #ifdef CONFIG_CGROUP_PERF
12123 INIT_LIST_HEAD(&per_cpu(cgrp_cpuctx_list, cpu));
12124 #endif
12125 INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
12126 }
12127 }
12128
12129 static void perf_swevent_init_cpu(unsigned int cpu)
12130 {
12131 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
12132
12133 mutex_lock(&swhash->hlist_mutex);
12134 if (swhash->hlist_refcount > 0 && !swevent_hlist_deref(swhash)) {
12135 struct swevent_hlist *hlist;
12136
12137 hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu));
12138 WARN_ON(!hlist);
12139 rcu_assign_pointer(swhash->swevent_hlist, hlist);
12140 }
12141 mutex_unlock(&swhash->hlist_mutex);
12142 }
12143
12144 #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
12145 static void __perf_event_exit_context(void *__info)
12146 {
12147 struct perf_event_context *ctx = __info;
12148 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
12149 struct perf_event *event;
12150
12151 raw_spin_lock(&ctx->lock);
12152 ctx_sched_out(ctx, cpuctx, EVENT_TIME);
12153 list_for_each_entry(event, &ctx->event_list, event_entry)
12154 __perf_remove_from_context(event, cpuctx, ctx, (void *)DETACH_GROUP);
12155 raw_spin_unlock(&ctx->lock);
12156 }
12157
12158 static void perf_event_exit_cpu_context(int cpu)
12159 {
12160 struct perf_cpu_context *cpuctx;
12161 struct perf_event_context *ctx;
12162 struct pmu *pmu;
12163
12164 mutex_lock(&pmus_lock);
12165 list_for_each_entry(pmu, &pmus, entry) {
12166 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
12167 ctx = &cpuctx->ctx;
12168
12169 mutex_lock(&ctx->mutex);
12170 smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
12171 cpuctx->online = 0;
12172 mutex_unlock(&ctx->mutex);
12173 }
12174 cpumask_clear_cpu(cpu, perf_online_mask);
12175 mutex_unlock(&pmus_lock);
12176 }
12177 #else
12178
12179 static void perf_event_exit_cpu_context(int cpu) { }
12180
12181 #endif
12182
12183 int perf_event_init_cpu(unsigned int cpu)
12184 {
12185 struct perf_cpu_context *cpuctx;
12186 struct perf_event_context *ctx;
12187 struct pmu *pmu;
12188
12189 perf_swevent_init_cpu(cpu);
12190
12191 mutex_lock(&pmus_lock);
12192 cpumask_set_cpu(cpu, perf_online_mask);
12193 list_for_each_entry(pmu, &pmus, entry) {
12194 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
12195 ctx = &cpuctx->ctx;
12196
12197 mutex_lock(&ctx->mutex);
12198 cpuctx->online = 1;
12199 mutex_unlock(&ctx->mutex);
12200 }
12201 mutex_unlock(&pmus_lock);
12202
12203 return 0;
12204 }
12205
12206 int perf_event_exit_cpu(unsigned int cpu)
12207 {
12208 perf_event_exit_cpu_context(cpu);
12209 return 0;
12210 }
12211
12212 static int
12213 perf_reboot(struct notifier_block *notifier, unsigned long val, void *v)
12214 {
12215 int cpu;
12216
12217 for_each_online_cpu(cpu)
12218 perf_event_exit_cpu(cpu);
12219
12220 return NOTIFY_OK;
12221 }
12222
12223
12224
12225
12226
12227 static struct notifier_block perf_reboot_notifier = {
12228 .notifier_call = perf_reboot,
12229 .priority = INT_MIN,
12230 };
12231
12232 void __init perf_event_init(void)
12233 {
12234 int ret;
12235
12236 idr_init(&pmu_idr);
12237
12238 perf_event_init_all_cpus();
12239 init_srcu_struct(&pmus_srcu);
12240 perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE);
12241 perf_pmu_register(&perf_cpu_clock, NULL, -1);
12242 perf_pmu_register(&perf_task_clock, NULL, -1);
12243 perf_tp_register();
12244 perf_event_init_cpu(smp_processor_id());
12245 register_reboot_notifier(&perf_reboot_notifier);
12246
12247 ret = init_hw_breakpoint();
12248 WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
12249
12250
12251
12252
12253
12254 BUILD_BUG_ON((offsetof(struct perf_event_mmap_page, data_head))
12255 != 1024);
12256 }
12257
12258 ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
12259 char *page)
12260 {
12261 struct perf_pmu_events_attr *pmu_attr =
12262 container_of(attr, struct perf_pmu_events_attr, attr);
12263
12264 if (pmu_attr->event_str)
12265 return sprintf(page, "%s\n", pmu_attr->event_str);
12266
12267 return 0;
12268 }
12269 EXPORT_SYMBOL_GPL(perf_event_sysfs_show);
12270
12271 static int __init perf_event_sysfs_init(void)
12272 {
12273 struct pmu *pmu;
12274 int ret;
12275
12276 mutex_lock(&pmus_lock);
12277
12278 ret = bus_register(&pmu_bus);
12279 if (ret)
12280 goto unlock;
12281
12282 list_for_each_entry(pmu, &pmus, entry) {
12283 if (!pmu->name || pmu->type < 0)
12284 continue;
12285
12286 ret = pmu_dev_alloc(pmu);
12287 WARN(ret, "Failed to register pmu: %s, reason %d\n", pmu->name, ret);
12288 }
12289 pmu_bus_running = 1;
12290 ret = 0;
12291
12292 unlock:
12293 mutex_unlock(&pmus_lock);
12294
12295 return ret;
12296 }
12297 device_initcall(perf_event_sysfs_init);
12298
12299 #ifdef CONFIG_CGROUP_PERF
12300 static struct cgroup_subsys_state *
12301 perf_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
12302 {
12303 struct perf_cgroup *jc;
12304
12305 jc = kzalloc(sizeof(*jc), GFP_KERNEL);
12306 if (!jc)
12307 return ERR_PTR(-ENOMEM);
12308
12309 jc->info = alloc_percpu(struct perf_cgroup_info);
12310 if (!jc->info) {
12311 kfree(jc);
12312 return ERR_PTR(-ENOMEM);
12313 }
12314
12315 return &jc->css;
12316 }
12317
12318 static void perf_cgroup_css_free(struct cgroup_subsys_state *css)
12319 {
12320 struct perf_cgroup *jc = container_of(css, struct perf_cgroup, css);
12321
12322 free_percpu(jc->info);
12323 kfree(jc);
12324 }
12325
12326 static int __perf_cgroup_move(void *info)
12327 {
12328 struct task_struct *task = info;
12329 rcu_read_lock();
12330 perf_cgroup_switch(task, PERF_CGROUP_SWOUT | PERF_CGROUP_SWIN);
12331 rcu_read_unlock();
12332 return 0;
12333 }
12334
12335 static void perf_cgroup_attach(struct cgroup_taskset *tset)
12336 {
12337 struct task_struct *task;
12338 struct cgroup_subsys_state *css;
12339
12340 cgroup_taskset_for_each(task, css, tset)
12341 task_function_call(task, __perf_cgroup_move, task);
12342 }
12343
12344 struct cgroup_subsys perf_event_cgrp_subsys = {
12345 .css_alloc = perf_cgroup_css_alloc,
12346 .css_free = perf_cgroup_css_free,
12347 .attach = perf_cgroup_attach,
12348
12349
12350
12351
12352
12353 .implicit_on_dfl = true,
12354 .threaded = true,
12355 };
12356 #endif