This source file includes following definitions.
- __tp_field__init_uint
- tp_field__init_uint
- tp_field__ptr
- __tp_field__init_ptr
- tp_field__init_ptr
- perf_evsel__init_tp_uint_field
- perf_evsel__init_tp_ptr_field
- evsel__delete_priv
- perf_evsel__init_syscall_tp
- perf_evsel__init_augmented_syscall_tp
- perf_evsel__init_augmented_syscall_tp_args
- perf_evsel__init_augmented_syscall_tp_ret
- perf_evsel__init_raw_syscall_tp
- perf_evsel__raw_syscall_newtp
- strarray__scnprintf
- __syscall_arg__scnprintf_strarray
- syscall_arg__scnprintf_strarray
- syscall_arg__scnprintf_strarray_flags
- strarrays__scnprintf
- syscall_arg__scnprintf_strarrays
- syscall_arg__scnprintf_fd_at
- syscall_arg__scnprintf_hex
- syscall_arg__scnprintf_ptr
- syscall_arg__scnprintf_int
- syscall_arg__scnprintf_long
- syscall_arg__scnprintf_access_mode
- syscall_arg__scnprintf_pipe_flags
- syscall_arg__scnprintf_getrandom_flags
- syscall_fmt__cmp
- syscall_fmt__find
- syscall_fmt__find_by_alias
- fprintf_duration
- thread_trace__new
- thread__trace
- syscall_arg__set_ret_scnprintf
- thread_trace__files_entry
- thread__files_entry
- trace__set_fd_pathname
- thread__read_fd_path
- thread__fd_path
- syscall_arg__scnprintf_fd
- pid__scnprintf_fd
- syscall_arg__scnprintf_close_fd
- thread__set_filename_pos
- syscall_arg__scnprintf_augmented_string
- syscall_arg__scnprintf_filename
- trace__filter_duration
- __trace__fprintf_tstamp
- trace__fprintf_tstamp
- sig_handler
- trace__fprintf_comm_tid
- trace__fprintf_entry_head
- trace__process_event
- trace__tool_process
- trace__machine__resolve_kernel_addr
- trace__symbols_init
- trace__symbols__exit
- syscall__alloc_arg_fmts
- syscall__set_arg_fmts
- trace__read_syscall_info
- intcmp
- trace__validate_ev_qualifier
- trace__syscall_enabled
- syscall_arg__val
- syscall__scnprintf_name
- syscall__mask_val
- syscall__scnprintf_val
- syscall__scnprintf_args
- trace__syscall_info
- thread__update_stats
- trace__printf_interrupted_entry
- trace__fprintf_sample
- syscall__augmented_args
- trace__sys_enter
- trace__fprintf_sys_enter
- trace__resolve_callchain
- trace__fprintf_callchain
- errno_to_name
- trace__sys_exit
- trace__vfs_getname
- trace__sched_stat_runtime
- bpf_output__printer
- bpf_output__fprintf
- trace__event_handler
- print_location
- trace__pgfault
- trace__set_base_time
- trace__process_sample
- trace__record
- evlist__add_vfs_getname
- perf_evsel__new_pgfault
- trace__handle_event
- trace__add_syscall_newtp
- trace__set_ev_qualifier_tp_filter
- trace__find_bpf_program_by_title
- trace__find_syscall_bpf_prog
- trace__init_syscall_bpf_progs
- trace__bpf_prog_sys_enter_fd
- trace__bpf_prog_sys_exit_fd
- trace__init_bpf_map_syscall_args
- trace__set_ev_qualifier_bpf_filter
- __trace__init_syscalls_bpf_map
- trace__init_syscalls_bpf_map
- trace__find_usable_bpf_prog_entry
- trace__init_syscalls_bpf_prog_array_maps
- trace__set_ev_qualifier_bpf_filter
- trace__init_syscalls_bpf_map
- trace__find_bpf_program_by_title
- trace__init_syscalls_bpf_prog_array_maps
- trace__set_ev_qualifier_filter
- bpf_map__set_filter_pids
- trace__set_filter_loop_pids
- trace__set_filter_pids
- __trace__deliver_event
- __trace__flush_events
- trace__flush_events
- trace__deliver_event
- ordered_events__deliver_event
- trace__run
- trace__replay
- trace__fprintf_threads_header
- DEFINE_RESORT_RB
- thread__dump_stats
- trace__fprintf_thread
- thread__nr_events
- DEFINE_RESORT_RB
- trace__fprintf_thread_summary
- trace__set_duration
- trace__set_filter_pids_from_option
- trace__open_output
- parse_pagefaults
- evlist__set_evsel_handler
- evlist__set_syscall_tp_fields
- trace__parse_events_option
- trace__parse_cgroups
- trace__find_bpf_map_by_name
- trace__set_bpf_map_filtered_pids
- trace__set_bpf_map_syscalls
- trace__config
- cmd_trace
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 #include "util/record.h"
18 #include <traceevent/event-parse.h>
19 #include <api/fs/tracing_path.h>
20 #include <bpf/bpf.h>
21 #include "util/bpf_map.h"
22 #include "util/rlimit.h"
23 #include "builtin.h"
24 #include "util/cgroup.h"
25 #include "util/color.h"
26 #include "util/config.h"
27 #include "util/debug.h"
28 #include "util/dso.h"
29 #include "util/env.h"
30 #include "util/event.h"
31 #include "util/evsel.h"
32 #include "util/evsel_fprintf.h"
33 #include "util/synthetic-events.h"
34 #include "util/evlist.h"
35 #include "util/evswitch.h"
36 #include "util/mmap.h"
37 #include <subcmd/pager.h>
38 #include <subcmd/exec-cmd.h>
39 #include "util/machine.h"
40 #include "util/map.h"
41 #include "util/symbol.h"
42 #include "util/path.h"
43 #include "util/session.h"
44 #include "util/thread.h"
45 #include <subcmd/parse-options.h>
46 #include "util/strlist.h"
47 #include "util/intlist.h"
48 #include "util/thread_map.h"
49 #include "util/stat.h"
50 #include "util/tool.h"
51 #include "util/util.h"
52 #include "trace/beauty/beauty.h"
53 #include "trace-event.h"
54 #include "util/parse-events.h"
55 #include "util/bpf-loader.h"
56 #include "callchain.h"
57 #include "print_binary.h"
58 #include "string2.h"
59 #include "syscalltbl.h"
60 #include "rb_resort.h"
61 #include "../perf.h"
62
63 #include <errno.h>
64 #include <inttypes.h>
65 #include <poll.h>
66 #include <signal.h>
67 #include <stdlib.h>
68 #include <string.h>
69 #include <linux/err.h>
70 #include <linux/filter.h>
71 #include <linux/kernel.h>
72 #include <linux/random.h>
73 #include <linux/stringify.h>
74 #include <linux/time64.h>
75 #include <linux/zalloc.h>
76 #include <fcntl.h>
77 #include <sys/sysmacros.h>
78
79 #include <linux/ctype.h>
80
81 #ifndef O_CLOEXEC
82 # define O_CLOEXEC 02000000
83 #endif
84
85 #ifndef F_LINUX_SPECIFIC_BASE
86 # define F_LINUX_SPECIFIC_BASE 1024
87 #endif
88
89 struct trace {
90 struct perf_tool tool;
91 struct syscalltbl *sctbl;
92 struct {
93 struct syscall *table;
94 struct bpf_map *map;
95 struct {
96 struct bpf_map *sys_enter,
97 *sys_exit;
98 } prog_array;
99 struct {
100 struct evsel *sys_enter,
101 *sys_exit,
102 *augmented;
103 } events;
104 struct bpf_program *unaugmented_prog;
105 } syscalls;
106 struct {
107 struct bpf_map *map;
108 } dump;
109 struct record_opts opts;
110 struct evlist *evlist;
111 struct machine *host;
112 struct thread *current;
113 struct bpf_object *bpf_obj;
114 struct cgroup *cgroup;
115 u64 base_time;
116 FILE *output;
117 unsigned long nr_events;
118 unsigned long nr_events_printed;
119 unsigned long max_events;
120 struct evswitch evswitch;
121 struct strlist *ev_qualifier;
122 struct {
123 size_t nr;
124 int *entries;
125 } ev_qualifier_ids;
126 struct {
127 size_t nr;
128 pid_t *entries;
129 struct bpf_map *map;
130 } filter_pids;
131 double duration_filter;
132 double runtime_ms;
133 struct {
134 u64 vfs_getname,
135 proc_getname;
136 } stats;
137 unsigned int max_stack;
138 unsigned int min_stack;
139 int raw_augmented_syscalls_args_size;
140 bool raw_augmented_syscalls;
141 bool fd_path_disabled;
142 bool sort_events;
143 bool not_ev_qualifier;
144 bool live;
145 bool full_time;
146 bool sched;
147 bool multiple_threads;
148 bool summary;
149 bool summary_only;
150 bool failure_only;
151 bool show_comm;
152 bool print_sample;
153 bool show_tool_stats;
154 bool trace_syscalls;
155 bool kernel_syscallchains;
156 s16 args_alignment;
157 bool show_tstamp;
158 bool show_duration;
159 bool show_zeros;
160 bool show_arg_names;
161 bool show_string_prefix;
162 bool force;
163 bool vfs_getname;
164 int trace_pgfaults;
165 struct {
166 struct ordered_events data;
167 u64 last;
168 } oe;
169 };
170
171 struct tp_field {
172 int offset;
173 union {
174 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
175 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
176 };
177 };
178
179 #define TP_UINT_FIELD(bits) \
180 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
181 { \
182 u##bits value; \
183 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
184 return value; \
185 }
186
187 TP_UINT_FIELD(8);
188 TP_UINT_FIELD(16);
189 TP_UINT_FIELD(32);
190 TP_UINT_FIELD(64);
191
192 #define TP_UINT_FIELD__SWAPPED(bits) \
193 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
194 { \
195 u##bits value; \
196 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
197 return bswap_##bits(value);\
198 }
199
200 TP_UINT_FIELD__SWAPPED(16);
201 TP_UINT_FIELD__SWAPPED(32);
202 TP_UINT_FIELD__SWAPPED(64);
203
204 static int __tp_field__init_uint(struct tp_field *field, int size, int offset, bool needs_swap)
205 {
206 field->offset = offset;
207
208 switch (size) {
209 case 1:
210 field->integer = tp_field__u8;
211 break;
212 case 2:
213 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
214 break;
215 case 4:
216 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
217 break;
218 case 8:
219 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
220 break;
221 default:
222 return -1;
223 }
224
225 return 0;
226 }
227
228 static int tp_field__init_uint(struct tp_field *field, struct tep_format_field *format_field, bool needs_swap)
229 {
230 return __tp_field__init_uint(field, format_field->size, format_field->offset, needs_swap);
231 }
232
233 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
234 {
235 return sample->raw_data + field->offset;
236 }
237
238 static int __tp_field__init_ptr(struct tp_field *field, int offset)
239 {
240 field->offset = offset;
241 field->pointer = tp_field__ptr;
242 return 0;
243 }
244
245 static int tp_field__init_ptr(struct tp_field *field, struct tep_format_field *format_field)
246 {
247 return __tp_field__init_ptr(field, format_field->offset);
248 }
249
250 struct syscall_tp {
251 struct tp_field id;
252 union {
253 struct tp_field args, ret;
254 };
255 };
256
257 static int perf_evsel__init_tp_uint_field(struct evsel *evsel,
258 struct tp_field *field,
259 const char *name)
260 {
261 struct tep_format_field *format_field = perf_evsel__field(evsel, name);
262
263 if (format_field == NULL)
264 return -1;
265
266 return tp_field__init_uint(field, format_field, evsel->needs_swap);
267 }
268
269 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
270 ({ struct syscall_tp *sc = evsel->priv;\
271 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
272
273 static int perf_evsel__init_tp_ptr_field(struct evsel *evsel,
274 struct tp_field *field,
275 const char *name)
276 {
277 struct tep_format_field *format_field = perf_evsel__field(evsel, name);
278
279 if (format_field == NULL)
280 return -1;
281
282 return tp_field__init_ptr(field, format_field);
283 }
284
285 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
286 ({ struct syscall_tp *sc = evsel->priv;\
287 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
288
289 static void evsel__delete_priv(struct evsel *evsel)
290 {
291 zfree(&evsel->priv);
292 evsel__delete(evsel);
293 }
294
295 static int perf_evsel__init_syscall_tp(struct evsel *evsel)
296 {
297 struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
298
299 if (evsel->priv != NULL) {
300 if (perf_evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") &&
301 perf_evsel__init_tp_uint_field(evsel, &sc->id, "nr"))
302 goto out_delete;
303 return 0;
304 }
305
306 return -ENOMEM;
307 out_delete:
308 zfree(&evsel->priv);
309 return -ENOENT;
310 }
311
312 static int perf_evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evsel *tp)
313 {
314 struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
315
316 if (evsel->priv != NULL) {
317 struct tep_format_field *syscall_id = perf_evsel__field(tp, "id");
318 if (syscall_id == NULL)
319 syscall_id = perf_evsel__field(tp, "__syscall_nr");
320 if (syscall_id == NULL)
321 goto out_delete;
322 if (__tp_field__init_uint(&sc->id, syscall_id->size, syscall_id->offset, evsel->needs_swap))
323 goto out_delete;
324
325 return 0;
326 }
327
328 return -ENOMEM;
329 out_delete:
330 zfree(&evsel->priv);
331 return -EINVAL;
332 }
333
334 static int perf_evsel__init_augmented_syscall_tp_args(struct evsel *evsel)
335 {
336 struct syscall_tp *sc = evsel->priv;
337
338 return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64));
339 }
340
341 static int perf_evsel__init_augmented_syscall_tp_ret(struct evsel *evsel)
342 {
343 struct syscall_tp *sc = evsel->priv;
344
345 return __tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap);
346 }
347
348 static int perf_evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler)
349 {
350 evsel->priv = malloc(sizeof(struct syscall_tp));
351 if (evsel->priv != NULL) {
352 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
353 goto out_delete;
354
355 evsel->handler = handler;
356 return 0;
357 }
358
359 return -ENOMEM;
360
361 out_delete:
362 zfree(&evsel->priv);
363 return -ENOENT;
364 }
365
366 static struct evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler)
367 {
368 struct evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
369
370
371 if (IS_ERR(evsel))
372 evsel = perf_evsel__newtp("syscalls", direction);
373
374 if (IS_ERR(evsel))
375 return NULL;
376
377 if (perf_evsel__init_raw_syscall_tp(evsel, handler))
378 goto out_delete;
379
380 return evsel;
381
382 out_delete:
383 evsel__delete_priv(evsel);
384 return NULL;
385 }
386
387 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
388 ({ struct syscall_tp *fields = evsel->priv; \
389 fields->name.integer(&fields->name, sample); })
390
391 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
392 ({ struct syscall_tp *fields = evsel->priv; \
393 fields->name.pointer(&fields->name, sample); })
394
395 size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_prefix, int val)
396 {
397 int idx = val - sa->offset;
398
399 if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL) {
400 size_t printed = scnprintf(bf, size, intfmt, val);
401 if (show_prefix)
402 printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sa->prefix);
403 return printed;
404 }
405
406 return scnprintf(bf, size, "%s%s", show_prefix ? sa->prefix : "", sa->entries[idx]);
407 }
408
409 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
410 const char *intfmt,
411 struct syscall_arg *arg)
412 {
413 return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->show_string_prefix, arg->val);
414 }
415
416 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
417 struct syscall_arg *arg)
418 {
419 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
420 }
421
422 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
423
424 size_t syscall_arg__scnprintf_strarray_flags(char *bf, size_t size, struct syscall_arg *arg)
425 {
426 return strarray__scnprintf_flags(arg->parm, bf, size, arg->show_string_prefix, arg->val);
427 }
428
429 size_t strarrays__scnprintf(struct strarrays *sas, char *bf, size_t size, const char *intfmt, bool show_prefix, int val)
430 {
431 size_t printed;
432 int i;
433
434 for (i = 0; i < sas->nr_entries; ++i) {
435 struct strarray *sa = sas->entries[i];
436 int idx = val - sa->offset;
437
438 if (idx >= 0 && idx < sa->nr_entries) {
439 if (sa->entries[idx] == NULL)
440 break;
441 return scnprintf(bf, size, "%s%s", show_prefix ? sa->prefix : "", sa->entries[idx]);
442 }
443 }
444
445 printed = scnprintf(bf, size, intfmt, val);
446 if (show_prefix)
447 printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sas->entries[0]->prefix);
448 return printed;
449 }
450
451 size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
452 struct syscall_arg *arg)
453 {
454 return strarrays__scnprintf(arg->parm, bf, size, "%d", arg->show_string_prefix, arg->val);
455 }
456
457 #ifndef AT_FDCWD
458 #define AT_FDCWD -100
459 #endif
460
461 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
462 struct syscall_arg *arg)
463 {
464 int fd = arg->val;
465 const char *prefix = "AT_FD";
466
467 if (fd == AT_FDCWD)
468 return scnprintf(bf, size, "%s%s", arg->show_string_prefix ? prefix : "", "CWD");
469
470 return syscall_arg__scnprintf_fd(bf, size, arg);
471 }
472
473 #define SCA_FDAT syscall_arg__scnprintf_fd_at
474
475 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
476 struct syscall_arg *arg);
477
478 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
479
480 size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg)
481 {
482 return scnprintf(bf, size, "%#lx", arg->val);
483 }
484
485 size_t syscall_arg__scnprintf_ptr(char *bf, size_t size, struct syscall_arg *arg)
486 {
487 if (arg->val == 0)
488 return scnprintf(bf, size, "NULL");
489 return syscall_arg__scnprintf_hex(bf, size, arg);
490 }
491
492 size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
493 {
494 return scnprintf(bf, size, "%d", arg->val);
495 }
496
497 size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg)
498 {
499 return scnprintf(bf, size, "%ld", arg->val);
500 }
501
502 static const char *bpf_cmd[] = {
503 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
504 "MAP_GET_NEXT_KEY", "PROG_LOAD",
505 };
506 static DEFINE_STRARRAY(bpf_cmd, "BPF_");
507
508 static const char *fsmount_flags[] = {
509 [1] = "CLOEXEC",
510 };
511 static DEFINE_STRARRAY(fsmount_flags, "FSMOUNT_");
512
513 #include "trace/beauty/generated/fsconfig_arrays.c"
514
515 static DEFINE_STRARRAY(fsconfig_cmds, "FSCONFIG_");
516
517 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
518 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, "EPOLL_CTL_", 1);
519
520 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
521 static DEFINE_STRARRAY(itimers, "ITIMER_");
522
523 static const char *keyctl_options[] = {
524 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
525 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
526 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
527 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
528 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
529 };
530 static DEFINE_STRARRAY(keyctl_options, "KEYCTL_");
531
532 static const char *whences[] = { "SET", "CUR", "END",
533 #ifdef SEEK_DATA
534 "DATA",
535 #endif
536 #ifdef SEEK_HOLE
537 "HOLE",
538 #endif
539 };
540 static DEFINE_STRARRAY(whences, "SEEK_");
541
542 static const char *fcntl_cmds[] = {
543 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
544 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64",
545 "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
546 "GETOWNER_UIDS",
547 };
548 static DEFINE_STRARRAY(fcntl_cmds, "F_");
549
550 static const char *fcntl_linux_specific_cmds[] = {
551 "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
552 "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
553 "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
554 };
555
556 static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, "F_", F_LINUX_SPECIFIC_BASE);
557
558 static struct strarray *fcntl_cmds_arrays[] = {
559 &strarray__fcntl_cmds,
560 &strarray__fcntl_linux_specific_cmds,
561 };
562
563 static DEFINE_STRARRAYS(fcntl_cmds_arrays);
564
565 static const char *rlimit_resources[] = {
566 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
567 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
568 "RTTIME",
569 };
570 static DEFINE_STRARRAY(rlimit_resources, "RLIMIT_");
571
572 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
573 static DEFINE_STRARRAY(sighow, "SIG_");
574
575 static const char *clockid[] = {
576 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
577 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
578 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
579 };
580 static DEFINE_STRARRAY(clockid, "CLOCK_");
581
582 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
583 struct syscall_arg *arg)
584 {
585 bool show_prefix = arg->show_string_prefix;
586 const char *suffix = "_OK";
587 size_t printed = 0;
588 int mode = arg->val;
589
590 if (mode == F_OK)
591 return scnprintf(bf, size, "F%s", show_prefix ? suffix : "");
592 #define P_MODE(n) \
593 if (mode & n##_OK) { \
594 printed += scnprintf(bf + printed, size - printed, "%s%s", #n, show_prefix ? suffix : ""); \
595 mode &= ~n##_OK; \
596 }
597
598 P_MODE(R);
599 P_MODE(W);
600 P_MODE(X);
601 #undef P_MODE
602
603 if (mode)
604 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
605
606 return printed;
607 }
608
609 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
610
611 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
612 struct syscall_arg *arg);
613
614 #define SCA_FILENAME syscall_arg__scnprintf_filename
615
616 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
617 struct syscall_arg *arg)
618 {
619 bool show_prefix = arg->show_string_prefix;
620 const char *prefix = "O_";
621 int printed = 0, flags = arg->val;
622
623 #define P_FLAG(n) \
624 if (flags & O_##n) { \
625 printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
626 flags &= ~O_##n; \
627 }
628
629 P_FLAG(CLOEXEC);
630 P_FLAG(NONBLOCK);
631 #undef P_FLAG
632
633 if (flags)
634 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
635
636 return printed;
637 }
638
639 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
640
641 #ifndef GRND_NONBLOCK
642 #define GRND_NONBLOCK 0x0001
643 #endif
644 #ifndef GRND_RANDOM
645 #define GRND_RANDOM 0x0002
646 #endif
647
648 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
649 struct syscall_arg *arg)
650 {
651 bool show_prefix = arg->show_string_prefix;
652 const char *prefix = "GRND_";
653 int printed = 0, flags = arg->val;
654
655 #define P_FLAG(n) \
656 if (flags & GRND_##n) { \
657 printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
658 flags &= ~GRND_##n; \
659 }
660
661 P_FLAG(RANDOM);
662 P_FLAG(NONBLOCK);
663 #undef P_FLAG
664
665 if (flags)
666 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
667
668 return printed;
669 }
670
671 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
672
673 #define STRARRAY(name, array) \
674 { .scnprintf = SCA_STRARRAY, \
675 .parm = &strarray__##array, }
676
677 #define STRARRAY_FLAGS(name, array) \
678 { .scnprintf = SCA_STRARRAY_FLAGS, \
679 .parm = &strarray__##array, }
680
681 #include "trace/beauty/arch_errno_names.c"
682 #include "trace/beauty/eventfd.c"
683 #include "trace/beauty/futex_op.c"
684 #include "trace/beauty/futex_val3.c"
685 #include "trace/beauty/mmap.c"
686 #include "trace/beauty/mode_t.c"
687 #include "trace/beauty/msg_flags.c"
688 #include "trace/beauty/open_flags.c"
689 #include "trace/beauty/perf_event_open.c"
690 #include "trace/beauty/pid.c"
691 #include "trace/beauty/sched_policy.c"
692 #include "trace/beauty/seccomp.c"
693 #include "trace/beauty/signum.c"
694 #include "trace/beauty/socket_type.c"
695 #include "trace/beauty/waitid_options.c"
696
697 struct syscall_arg_fmt {
698 size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
699 unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val);
700 void *parm;
701 const char *name;
702 bool show_zero;
703 };
704
705 static struct syscall_fmt {
706 const char *name;
707 const char *alias;
708 struct {
709 const char *sys_enter,
710 *sys_exit;
711 } bpf_prog_name;
712 struct syscall_arg_fmt arg[6];
713 u8 nr_args;
714 bool errpid;
715 bool timeout;
716 bool hexret;
717 } syscall_fmts[] = {
718 { .name = "access",
719 .arg = { [1] = { .scnprintf = SCA_ACCMODE, }, }, },
720 { .name = "arch_prctl",
721 .arg = { [0] = { .scnprintf = SCA_X86_ARCH_PRCTL_CODE, },
722 [1] = { .scnprintf = SCA_PTR, }, }, },
723 { .name = "bind",
724 .arg = { [0] = { .scnprintf = SCA_INT, },
725 [1] = { .scnprintf = SCA_SOCKADDR, },
726 [2] = { .scnprintf = SCA_INT, }, }, },
727 { .name = "bpf",
728 .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
729 { .name = "brk", .hexret = true,
730 .arg = { [0] = { .scnprintf = SCA_PTR, }, }, },
731 { .name = "clock_gettime",
732 .arg = { [0] = STRARRAY(clk_id, clockid), }, },
733 { .name = "clone", .errpid = true, .nr_args = 5,
734 .arg = { [0] = { .name = "flags", .scnprintf = SCA_CLONE_FLAGS, },
735 [1] = { .name = "child_stack", .scnprintf = SCA_HEX, },
736 [2] = { .name = "parent_tidptr", .scnprintf = SCA_HEX, },
737 [3] = { .name = "child_tidptr", .scnprintf = SCA_HEX, },
738 [4] = { .name = "tls", .scnprintf = SCA_HEX, }, }, },
739 { .name = "close",
740 .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, }, }, },
741 { .name = "connect",
742 .arg = { [0] = { .scnprintf = SCA_INT, },
743 [1] = { .scnprintf = SCA_SOCKADDR, },
744 [2] = { .scnprintf = SCA_INT, }, }, },
745 { .name = "epoll_ctl",
746 .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
747 { .name = "eventfd2",
748 .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, }, }, },
749 { .name = "fchmodat",
750 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
751 { .name = "fchownat",
752 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
753 { .name = "fcntl",
754 .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD,
755 .parm = &strarrays__fcntl_cmds_arrays,
756 .show_zero = true, },
757 [2] = { .scnprintf = SCA_FCNTL_ARG, }, }, },
758 { .name = "flock",
759 .arg = { [1] = { .scnprintf = SCA_FLOCK, }, }, },
760 { .name = "fsconfig",
761 .arg = { [1] = STRARRAY(cmd, fsconfig_cmds), }, },
762 { .name = "fsmount",
763 .arg = { [1] = STRARRAY_FLAGS(flags, fsmount_flags),
764 [2] = { .scnprintf = SCA_FSMOUNT_ATTR_FLAGS, }, }, },
765 { .name = "fspick",
766 .arg = { [0] = { .scnprintf = SCA_FDAT, },
767 [1] = { .scnprintf = SCA_FILENAME, },
768 [2] = { .scnprintf = SCA_FSPICK_FLAGS, }, }, },
769 { .name = "fstat", .alias = "newfstat", },
770 { .name = "fstatat", .alias = "newfstatat", },
771 { .name = "futex",
772 .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, },
773 [5] = { .scnprintf = SCA_FUTEX_VAL3, }, }, },
774 { .name = "futimesat",
775 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
776 { .name = "getitimer",
777 .arg = { [0] = STRARRAY(which, itimers), }, },
778 { .name = "getpid", .errpid = true, },
779 { .name = "getpgid", .errpid = true, },
780 { .name = "getppid", .errpid = true, },
781 { .name = "getrandom",
782 .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, }, }, },
783 { .name = "getrlimit",
784 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
785 { .name = "gettid", .errpid = true, },
786 { .name = "ioctl",
787 .arg = {
788 #if defined(__i386__) || defined(__x86_64__)
789
790
791
792 [1] = { .scnprintf = SCA_IOCTL_CMD, },
793 [2] = { .scnprintf = SCA_HEX, }, }, },
794 #else
795 [2] = { .scnprintf = SCA_HEX, }, }, },
796 #endif
797 { .name = "kcmp", .nr_args = 5,
798 .arg = { [0] = { .name = "pid1", .scnprintf = SCA_PID, },
799 [1] = { .name = "pid2", .scnprintf = SCA_PID, },
800 [2] = { .name = "type", .scnprintf = SCA_KCMP_TYPE, },
801 [3] = { .name = "idx1", .scnprintf = SCA_KCMP_IDX, },
802 [4] = { .name = "idx2", .scnprintf = SCA_KCMP_IDX, }, }, },
803 { .name = "keyctl",
804 .arg = { [0] = STRARRAY(option, keyctl_options), }, },
805 { .name = "kill",
806 .arg = { [1] = { .scnprintf = SCA_SIGNUM, }, }, },
807 { .name = "linkat",
808 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
809 { .name = "lseek",
810 .arg = { [2] = STRARRAY(whence, whences), }, },
811 { .name = "lstat", .alias = "newlstat", },
812 { .name = "madvise",
813 .arg = { [0] = { .scnprintf = SCA_HEX, },
814 [2] = { .scnprintf = SCA_MADV_BHV, }, }, },
815 { .name = "mkdirat",
816 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
817 { .name = "mknodat",
818 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
819 { .name = "mmap", .hexret = true,
820
821 #if defined(__s390x__)
822 .alias = "old_mmap",
823 #endif
824 .arg = { [2] = { .scnprintf = SCA_MMAP_PROT, },
825 [3] = { .scnprintf = SCA_MMAP_FLAGS, },
826 [5] = { .scnprintf = SCA_HEX, }, }, },
827 { .name = "mount",
828 .arg = { [0] = { .scnprintf = SCA_FILENAME, },
829 [3] = { .scnprintf = SCA_MOUNT_FLAGS,
830 .mask_val = SCAMV_MOUNT_FLAGS, }, }, },
831 { .name = "move_mount",
832 .arg = { [0] = { .scnprintf = SCA_FDAT, },
833 [1] = { .scnprintf = SCA_FILENAME, },
834 [2] = { .scnprintf = SCA_FDAT, },
835 [3] = { .scnprintf = SCA_FILENAME, },
836 [4] = { .scnprintf = SCA_MOVE_MOUNT_FLAGS, }, }, },
837 { .name = "mprotect",
838 .arg = { [0] = { .scnprintf = SCA_HEX, },
839 [2] = { .scnprintf = SCA_MMAP_PROT, }, }, },
840 { .name = "mq_unlink",
841 .arg = { [0] = { .scnprintf = SCA_FILENAME, }, }, },
842 { .name = "mremap", .hexret = true,
843 .arg = { [3] = { .scnprintf = SCA_MREMAP_FLAGS, }, }, },
844 { .name = "name_to_handle_at",
845 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
846 { .name = "newfstatat",
847 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
848 { .name = "open",
849 .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, }, }, },
850 { .name = "open_by_handle_at",
851 .arg = { [0] = { .scnprintf = SCA_FDAT, },
852 [2] = { .scnprintf = SCA_OPEN_FLAGS, }, }, },
853 { .name = "openat",
854 .arg = { [0] = { .scnprintf = SCA_FDAT, },
855 [2] = { .scnprintf = SCA_OPEN_FLAGS, }, }, },
856 { .name = "perf_event_open",
857 .arg = { [2] = { .scnprintf = SCA_INT, },
858 [3] = { .scnprintf = SCA_FD, },
859 [4] = { .scnprintf = SCA_PERF_FLAGS, }, }, },
860 { .name = "pipe2",
861 .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, }, }, },
862 { .name = "pkey_alloc",
863 .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS, }, }, },
864 { .name = "pkey_free",
865 .arg = { [0] = { .scnprintf = SCA_INT, }, }, },
866 { .name = "pkey_mprotect",
867 .arg = { [0] = { .scnprintf = SCA_HEX, },
868 [2] = { .scnprintf = SCA_MMAP_PROT, },
869 [3] = { .scnprintf = SCA_INT, }, }, },
870 { .name = "poll", .timeout = true, },
871 { .name = "ppoll", .timeout = true, },
872 { .name = "prctl",
873 .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, },
874 [1] = { .scnprintf = SCA_PRCTL_ARG2, },
875 [2] = { .scnprintf = SCA_PRCTL_ARG3, }, }, },
876 { .name = "pread", .alias = "pread64", },
877 { .name = "preadv", .alias = "pread", },
878 { .name = "prlimit64",
879 .arg = { [1] = STRARRAY(resource, rlimit_resources), }, },
880 { .name = "pwrite", .alias = "pwrite64", },
881 { .name = "readlinkat",
882 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
883 { .name = "recvfrom",
884 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, }, }, },
885 { .name = "recvmmsg",
886 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, }, }, },
887 { .name = "recvmsg",
888 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, }, }, },
889 { .name = "renameat",
890 .arg = { [0] = { .scnprintf = SCA_FDAT, },
891 [2] = { .scnprintf = SCA_FDAT, }, }, },
892 { .name = "renameat2",
893 .arg = { [0] = { .scnprintf = SCA_FDAT, },
894 [2] = { .scnprintf = SCA_FDAT, },
895 [4] = { .scnprintf = SCA_RENAMEAT2_FLAGS, }, }, },
896 { .name = "rt_sigaction",
897 .arg = { [0] = { .scnprintf = SCA_SIGNUM, }, }, },
898 { .name = "rt_sigprocmask",
899 .arg = { [0] = STRARRAY(how, sighow), }, },
900 { .name = "rt_sigqueueinfo",
901 .arg = { [1] = { .scnprintf = SCA_SIGNUM, }, }, },
902 { .name = "rt_tgsigqueueinfo",
903 .arg = { [2] = { .scnprintf = SCA_SIGNUM, }, }, },
904 { .name = "sched_setscheduler",
905 .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, }, }, },
906 { .name = "seccomp",
907 .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, },
908 [1] = { .scnprintf = SCA_SECCOMP_FLAGS, }, }, },
909 { .name = "select", .timeout = true, },
910 { .name = "sendfile", .alias = "sendfile64", },
911 { .name = "sendmmsg",
912 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, }, }, },
913 { .name = "sendmsg",
914 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, }, }, },
915 { .name = "sendto",
916 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, },
917 [4] = { .scnprintf = SCA_SOCKADDR, }, }, },
918 { .name = "set_tid_address", .errpid = true, },
919 { .name = "setitimer",
920 .arg = { [0] = STRARRAY(which, itimers), }, },
921 { .name = "setrlimit",
922 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
923 { .name = "socket",
924 .arg = { [0] = STRARRAY(family, socket_families),
925 [1] = { .scnprintf = SCA_SK_TYPE, },
926 [2] = { .scnprintf = SCA_SK_PROTO, }, }, },
927 { .name = "socketpair",
928 .arg = { [0] = STRARRAY(family, socket_families),
929 [1] = { .scnprintf = SCA_SK_TYPE, },
930 [2] = { .scnprintf = SCA_SK_PROTO, }, }, },
931 { .name = "stat", .alias = "newstat", },
932 { .name = "statx",
933 .arg = { [0] = { .scnprintf = SCA_FDAT, },
934 [2] = { .scnprintf = SCA_STATX_FLAGS, } ,
935 [3] = { .scnprintf = SCA_STATX_MASK, }, }, },
936 { .name = "swapoff",
937 .arg = { [0] = { .scnprintf = SCA_FILENAME, }, }, },
938 { .name = "swapon",
939 .arg = { [0] = { .scnprintf = SCA_FILENAME, }, }, },
940 { .name = "symlinkat",
941 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
942 { .name = "sync_file_range",
943 .arg = { [3] = { .scnprintf = SCA_SYNC_FILE_RANGE_FLAGS, }, }, },
944 { .name = "tgkill",
945 .arg = { [2] = { .scnprintf = SCA_SIGNUM, }, }, },
946 { .name = "tkill",
947 .arg = { [1] = { .scnprintf = SCA_SIGNUM, }, }, },
948 { .name = "umount2", .alias = "umount",
949 .arg = { [0] = { .scnprintf = SCA_FILENAME, }, }, },
950 { .name = "uname", .alias = "newuname", },
951 { .name = "unlinkat",
952 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
953 { .name = "utimensat",
954 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
955 { .name = "wait4", .errpid = true,
956 .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, }, }, },
957 { .name = "waitid", .errpid = true,
958 .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, }, }, },
959 };
960
961 static int syscall_fmt__cmp(const void *name, const void *fmtp)
962 {
963 const struct syscall_fmt *fmt = fmtp;
964 return strcmp(name, fmt->name);
965 }
966
967 static struct syscall_fmt *syscall_fmt__find(const char *name)
968 {
969 const int nmemb = ARRAY_SIZE(syscall_fmts);
970 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
971 }
972
973 static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias)
974 {
975 int i, nmemb = ARRAY_SIZE(syscall_fmts);
976
977 for (i = 0; i < nmemb; ++i) {
978 if (syscall_fmts[i].alias && strcmp(syscall_fmts[i].alias, alias) == 0)
979 return &syscall_fmts[i];
980 }
981
982 return NULL;
983 }
984
985
986
987
988
989
990
991 struct syscall {
992 struct tep_event *tp_format;
993 int nr_args;
994 int args_size;
995 struct {
996 struct bpf_program *sys_enter,
997 *sys_exit;
998 } bpf_prog;
999 bool is_exit;
1000 bool is_open;
1001 bool nonexistent;
1002 struct tep_format_field *args;
1003 const char *name;
1004 struct syscall_fmt *fmt;
1005 struct syscall_arg_fmt *arg_fmt;
1006 };
1007
1008
1009
1010
1011
1012
1013 struct bpf_map_syscall_entry {
1014 bool enabled;
1015 u16 string_args_len[6];
1016 };
1017
1018
1019
1020
1021
1022
1023
1024
1025 static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
1026 {
1027 double duration = (double)t / NSEC_PER_MSEC;
1028 size_t printed = fprintf(fp, "(");
1029
1030 if (!calculated)
1031 printed += fprintf(fp, " ");
1032 else if (duration >= 1.0)
1033 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1034 else if (duration >= 0.01)
1035 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1036 else
1037 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1038 return printed + fprintf(fp, "): ");
1039 }
1040
1041
1042
1043
1044
1045
1046
1047
1048 struct thread_trace {
1049 u64 entry_time;
1050 bool entry_pending;
1051 unsigned long nr_events;
1052 unsigned long pfmaj, pfmin;
1053 char *entry_str;
1054 double runtime_ms;
1055 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1056 struct {
1057 unsigned long ptr;
1058 short int entry_str_pos;
1059 bool pending_open;
1060 unsigned int namelen;
1061 char *name;
1062 } filename;
1063 struct {
1064 int max;
1065 struct file *table;
1066 } files;
1067
1068 struct intlist *syscall_stats;
1069 };
1070
1071 static struct thread_trace *thread_trace__new(void)
1072 {
1073 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1074
1075 if (ttrace) {
1076 ttrace->files.max = -1;
1077 ttrace->syscall_stats = intlist__new(NULL);
1078 }
1079
1080 return ttrace;
1081 }
1082
1083 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1084 {
1085 struct thread_trace *ttrace;
1086
1087 if (thread == NULL)
1088 goto fail;
1089
1090 if (thread__priv(thread) == NULL)
1091 thread__set_priv(thread, thread_trace__new());
1092
1093 if (thread__priv(thread) == NULL)
1094 goto fail;
1095
1096 ttrace = thread__priv(thread);
1097 ++ttrace->nr_events;
1098
1099 return ttrace;
1100 fail:
1101 color_fprintf(fp, PERF_COLOR_RED,
1102 "WARNING: not enough memory, dropping samples!\n");
1103 return NULL;
1104 }
1105
1106
1107 void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
1108 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg))
1109 {
1110 struct thread_trace *ttrace = thread__priv(arg->thread);
1111
1112 ttrace->ret_scnprintf = ret_scnprintf;
1113 }
1114
1115 #define TRACE_PFMAJ (1 << 0)
1116 #define TRACE_PFMIN (1 << 1)
1117
1118 static const size_t trace__entry_str_size = 2048;
1119
1120 static struct file *thread_trace__files_entry(struct thread_trace *ttrace, int fd)
1121 {
1122 if (fd < 0)
1123 return NULL;
1124
1125 if (fd > ttrace->files.max) {
1126 struct file *nfiles = realloc(ttrace->files.table, (fd + 1) * sizeof(struct file));
1127
1128 if (nfiles == NULL)
1129 return NULL;
1130
1131 if (ttrace->files.max != -1) {
1132 memset(nfiles + ttrace->files.max + 1, 0,
1133 (fd - ttrace->files.max) * sizeof(struct file));
1134 } else {
1135 memset(nfiles, 0, (fd + 1) * sizeof(struct file));
1136 }
1137
1138 ttrace->files.table = nfiles;
1139 ttrace->files.max = fd;
1140 }
1141
1142 return ttrace->files.table + fd;
1143 }
1144
1145 struct file *thread__files_entry(struct thread *thread, int fd)
1146 {
1147 return thread_trace__files_entry(thread__priv(thread), fd);
1148 }
1149
1150 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1151 {
1152 struct thread_trace *ttrace = thread__priv(thread);
1153 struct file *file = thread_trace__files_entry(ttrace, fd);
1154
1155 if (file != NULL) {
1156 struct stat st;
1157 if (stat(pathname, &st) == 0)
1158 file->dev_maj = major(st.st_rdev);
1159 file->pathname = strdup(pathname);
1160 if (file->pathname)
1161 return 0;
1162 }
1163
1164 return -1;
1165 }
1166
1167 static int thread__read_fd_path(struct thread *thread, int fd)
1168 {
1169 char linkname[PATH_MAX], pathname[PATH_MAX];
1170 struct stat st;
1171 int ret;
1172
1173 if (thread->pid_ == thread->tid) {
1174 scnprintf(linkname, sizeof(linkname),
1175 "/proc/%d/fd/%d", thread->pid_, fd);
1176 } else {
1177 scnprintf(linkname, sizeof(linkname),
1178 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1179 }
1180
1181 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1182 return -1;
1183
1184 ret = readlink(linkname, pathname, sizeof(pathname));
1185
1186 if (ret < 0 || ret > st.st_size)
1187 return -1;
1188
1189 pathname[ret] = '\0';
1190 return trace__set_fd_pathname(thread, fd, pathname);
1191 }
1192
1193 static const char *thread__fd_path(struct thread *thread, int fd,
1194 struct trace *trace)
1195 {
1196 struct thread_trace *ttrace = thread__priv(thread);
1197
1198 if (ttrace == NULL || trace->fd_path_disabled)
1199 return NULL;
1200
1201 if (fd < 0)
1202 return NULL;
1203
1204 if ((fd > ttrace->files.max || ttrace->files.table[fd].pathname == NULL)) {
1205 if (!trace->live)
1206 return NULL;
1207 ++trace->stats.proc_getname;
1208 if (thread__read_fd_path(thread, fd))
1209 return NULL;
1210 }
1211
1212 return ttrace->files.table[fd].pathname;
1213 }
1214
1215 size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
1216 {
1217 int fd = arg->val;
1218 size_t printed = scnprintf(bf, size, "%d", fd);
1219 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1220
1221 if (path)
1222 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1223
1224 return printed;
1225 }
1226
1227 size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size)
1228 {
1229 size_t printed = scnprintf(bf, size, "%d", fd);
1230 struct thread *thread = machine__find_thread(trace->host, pid, pid);
1231
1232 if (thread) {
1233 const char *path = thread__fd_path(thread, fd, trace);
1234
1235 if (path)
1236 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1237
1238 thread__put(thread);
1239 }
1240
1241 return printed;
1242 }
1243
1244 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1245 struct syscall_arg *arg)
1246 {
1247 int fd = arg->val;
1248 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1249 struct thread_trace *ttrace = thread__priv(arg->thread);
1250
1251 if (ttrace && fd >= 0 && fd <= ttrace->files.max)
1252 zfree(&ttrace->files.table[fd].pathname);
1253
1254 return printed;
1255 }
1256
1257 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1258 unsigned long ptr)
1259 {
1260 struct thread_trace *ttrace = thread__priv(thread);
1261
1262 ttrace->filename.ptr = ptr;
1263 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1264 }
1265
1266 static size_t syscall_arg__scnprintf_augmented_string(struct syscall_arg *arg, char *bf, size_t size)
1267 {
1268 struct augmented_arg *augmented_arg = arg->augmented.args;
1269 size_t printed = scnprintf(bf, size, "\"%.*s\"", augmented_arg->size, augmented_arg->value);
1270
1271
1272
1273
1274 int consumed = sizeof(*augmented_arg) + augmented_arg->size;
1275
1276 arg->augmented.args = ((void *)arg->augmented.args) + consumed;
1277 arg->augmented.size -= consumed;
1278
1279 return printed;
1280 }
1281
1282 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1283 struct syscall_arg *arg)
1284 {
1285 unsigned long ptr = arg->val;
1286
1287 if (arg->augmented.args)
1288 return syscall_arg__scnprintf_augmented_string(arg, bf, size);
1289
1290 if (!arg->trace->vfs_getname)
1291 return scnprintf(bf, size, "%#x", ptr);
1292
1293 thread__set_filename_pos(arg->thread, bf, ptr);
1294 return 0;
1295 }
1296
1297 static bool trace__filter_duration(struct trace *trace, double t)
1298 {
1299 return t < (trace->duration_filter * NSEC_PER_MSEC);
1300 }
1301
1302 static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1303 {
1304 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1305
1306 return fprintf(fp, "%10.3f ", ts);
1307 }
1308
1309
1310
1311
1312
1313
1314
1315 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1316 {
1317 if (tstamp > 0)
1318 return __trace__fprintf_tstamp(trace, tstamp, fp);
1319
1320 return fprintf(fp, " ? ");
1321 }
1322
1323 static bool done = false;
1324 static bool interrupted = false;
1325
1326 static void sig_handler(int sig)
1327 {
1328 done = true;
1329 interrupted = sig == SIGINT;
1330 }
1331
1332 static size_t trace__fprintf_comm_tid(struct trace *trace, struct thread *thread, FILE *fp)
1333 {
1334 size_t printed = 0;
1335
1336 if (trace->multiple_threads) {
1337 if (trace->show_comm)
1338 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1339 printed += fprintf(fp, "%d ", thread->tid);
1340 }
1341
1342 return printed;
1343 }
1344
1345 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1346 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
1347 {
1348 size_t printed = 0;
1349
1350 if (trace->show_tstamp)
1351 printed = trace__fprintf_tstamp(trace, tstamp, fp);
1352 if (trace->show_duration)
1353 printed += fprintf_duration(duration, duration_calculated, fp);
1354 return printed + trace__fprintf_comm_tid(trace, thread, fp);
1355 }
1356
1357 static int trace__process_event(struct trace *trace, struct machine *machine,
1358 union perf_event *event, struct perf_sample *sample)
1359 {
1360 int ret = 0;
1361
1362 switch (event->header.type) {
1363 case PERF_RECORD_LOST:
1364 color_fprintf(trace->output, PERF_COLOR_RED,
1365 "LOST %" PRIu64 " events!\n", event->lost.lost);
1366 ret = machine__process_lost_event(machine, event, sample);
1367 break;
1368 default:
1369 ret = machine__process_event(machine, event, sample);
1370 break;
1371 }
1372
1373 return ret;
1374 }
1375
1376 static int trace__tool_process(struct perf_tool *tool,
1377 union perf_event *event,
1378 struct perf_sample *sample,
1379 struct machine *machine)
1380 {
1381 struct trace *trace = container_of(tool, struct trace, tool);
1382 return trace__process_event(trace, machine, event, sample);
1383 }
1384
1385 static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1386 {
1387 struct machine *machine = vmachine;
1388
1389 if (machine->kptr_restrict_warned)
1390 return NULL;
1391
1392 if (symbol_conf.kptr_restrict) {
1393 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1394 "Check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1395 "Kernel samples will not be resolved.\n");
1396 machine->kptr_restrict_warned = true;
1397 return NULL;
1398 }
1399
1400 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1401 }
1402
1403 static int trace__symbols_init(struct trace *trace, struct evlist *evlist)
1404 {
1405 int err = symbol__init(NULL);
1406
1407 if (err)
1408 return err;
1409
1410 trace->host = machine__new_host();
1411 if (trace->host == NULL)
1412 return -ENOMEM;
1413
1414 err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr);
1415 if (err < 0)
1416 goto out;
1417
1418 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1419 evlist->core.threads, trace__tool_process, false,
1420 1);
1421 out:
1422 if (err)
1423 symbol__exit();
1424
1425 return err;
1426 }
1427
1428 static void trace__symbols__exit(struct trace *trace)
1429 {
1430 machine__exit(trace->host);
1431 trace->host = NULL;
1432
1433 symbol__exit();
1434 }
1435
1436 static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
1437 {
1438 int idx;
1439
1440 if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0)
1441 nr_args = sc->fmt->nr_args;
1442
1443 sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt));
1444 if (sc->arg_fmt == NULL)
1445 return -1;
1446
1447 for (idx = 0; idx < nr_args; ++idx) {
1448 if (sc->fmt)
1449 sc->arg_fmt[idx] = sc->fmt->arg[idx];
1450 }
1451
1452 sc->nr_args = nr_args;
1453 return 0;
1454 }
1455
1456 static int syscall__set_arg_fmts(struct syscall *sc)
1457 {
1458 struct tep_format_field *field, *last_field = NULL;
1459 int idx = 0, len;
1460
1461 for (field = sc->args; field; field = field->next, ++idx) {
1462 last_field = field;
1463
1464 if (sc->fmt && sc->fmt->arg[idx].scnprintf)
1465 continue;
1466
1467 len = strlen(field->name);
1468
1469 if (strcmp(field->type, "const char *") == 0 &&
1470 ((len >= 4 && strcmp(field->name + len - 4, "name") == 0) ||
1471 strstr(field->name, "path") != NULL))
1472 sc->arg_fmt[idx].scnprintf = SCA_FILENAME;
1473 else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr"))
1474 sc->arg_fmt[idx].scnprintf = SCA_PTR;
1475 else if (strcmp(field->type, "pid_t") == 0)
1476 sc->arg_fmt[idx].scnprintf = SCA_PID;
1477 else if (strcmp(field->type, "umode_t") == 0)
1478 sc->arg_fmt[idx].scnprintf = SCA_MODE_T;
1479 else if ((strcmp(field->type, "int") == 0 ||
1480 strcmp(field->type, "unsigned int") == 0 ||
1481 strcmp(field->type, "long") == 0) &&
1482 len >= 2 && strcmp(field->name + len - 2, "fd") == 0) {
1483
1484
1485
1486
1487
1488
1489
1490 sc->arg_fmt[idx].scnprintf = SCA_FD;
1491 }
1492 }
1493
1494 if (last_field)
1495 sc->args_size = last_field->offset + last_field->size;
1496
1497 return 0;
1498 }
1499
1500 static int trace__read_syscall_info(struct trace *trace, int id)
1501 {
1502 char tp_name[128];
1503 struct syscall *sc;
1504 const char *name = syscalltbl__name(trace->sctbl, id);
1505
1506 if (trace->syscalls.table == NULL) {
1507 trace->syscalls.table = calloc(trace->sctbl->syscalls.max_id + 1, sizeof(*sc));
1508 if (trace->syscalls.table == NULL)
1509 return -ENOMEM;
1510 }
1511
1512 sc = trace->syscalls.table + id;
1513 if (sc->nonexistent)
1514 return 0;
1515
1516 if (name == NULL) {
1517 sc->nonexistent = true;
1518 return 0;
1519 }
1520
1521 sc->name = name;
1522 sc->fmt = syscall_fmt__find(sc->name);
1523
1524 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1525 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1526
1527 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1528 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1529 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1530 }
1531
1532 if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields))
1533 return -ENOMEM;
1534
1535 if (IS_ERR(sc->tp_format))
1536 return PTR_ERR(sc->tp_format);
1537
1538 sc->args = sc->tp_format->format.fields;
1539
1540
1541
1542
1543
1544 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1545 sc->args = sc->args->next;
1546 --sc->nr_args;
1547 }
1548
1549 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1550 sc->is_open = !strcmp(name, "open") || !strcmp(name, "openat");
1551
1552 return syscall__set_arg_fmts(sc);
1553 }
1554
1555 static int intcmp(const void *a, const void *b)
1556 {
1557 const int *one = a, *another = b;
1558
1559 return *one - *another;
1560 }
1561
1562 static int trace__validate_ev_qualifier(struct trace *trace)
1563 {
1564 int err = 0;
1565 bool printed_invalid_prefix = false;
1566 struct str_node *pos;
1567 size_t nr_used = 0, nr_allocated = strlist__nr_entries(trace->ev_qualifier);
1568
1569 trace->ev_qualifier_ids.entries = malloc(nr_allocated *
1570 sizeof(trace->ev_qualifier_ids.entries[0]));
1571
1572 if (trace->ev_qualifier_ids.entries == NULL) {
1573 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1574 trace->output);
1575 err = -EINVAL;
1576 goto out;
1577 }
1578
1579 strlist__for_each_entry(pos, trace->ev_qualifier) {
1580 const char *sc = pos->s;
1581 int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
1582
1583 if (id < 0) {
1584 id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next);
1585 if (id >= 0)
1586 goto matches;
1587
1588 if (!printed_invalid_prefix) {
1589 pr_debug("Skipping unknown syscalls: ");
1590 printed_invalid_prefix = true;
1591 } else {
1592 pr_debug(", ");
1593 }
1594
1595 pr_debug("%s", sc);
1596 continue;
1597 }
1598 matches:
1599 trace->ev_qualifier_ids.entries[nr_used++] = id;
1600 if (match_next == -1)
1601 continue;
1602
1603 while (1) {
1604 id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
1605 if (id < 0)
1606 break;
1607 if (nr_allocated == nr_used) {
1608 void *entries;
1609
1610 nr_allocated += 8;
1611 entries = realloc(trace->ev_qualifier_ids.entries,
1612 nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0]));
1613 if (entries == NULL) {
1614 err = -ENOMEM;
1615 fputs("\nError:\t Not enough memory for parsing\n", trace->output);
1616 goto out_free;
1617 }
1618 trace->ev_qualifier_ids.entries = entries;
1619 }
1620 trace->ev_qualifier_ids.entries[nr_used++] = id;
1621 }
1622 }
1623
1624 trace->ev_qualifier_ids.nr = nr_used;
1625 qsort(trace->ev_qualifier_ids.entries, nr_used, sizeof(int), intcmp);
1626 out:
1627 if (printed_invalid_prefix)
1628 pr_debug("\n");
1629 return err;
1630 out_free:
1631 zfree(&trace->ev_qualifier_ids.entries);
1632 trace->ev_qualifier_ids.nr = 0;
1633 goto out;
1634 }
1635
1636 static __maybe_unused bool trace__syscall_enabled(struct trace *trace, int id)
1637 {
1638 bool in_ev_qualifier;
1639
1640 if (trace->ev_qualifier_ids.nr == 0)
1641 return true;
1642
1643 in_ev_qualifier = bsearch(&id, trace->ev_qualifier_ids.entries,
1644 trace->ev_qualifier_ids.nr, sizeof(int), intcmp) != NULL;
1645
1646 if (in_ev_qualifier)
1647 return !trace->not_ev_qualifier;
1648
1649 return trace->not_ev_qualifier;
1650 }
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660 unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx)
1661 {
1662 unsigned long val;
1663 unsigned char *p = arg->args + sizeof(unsigned long) * idx;
1664
1665 memcpy(&val, p, sizeof(val));
1666 return val;
1667 }
1668
1669 static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size,
1670 struct syscall_arg *arg)
1671 {
1672 if (sc->arg_fmt && sc->arg_fmt[arg->idx].name)
1673 return scnprintf(bf, size, "%s: ", sc->arg_fmt[arg->idx].name);
1674
1675 return scnprintf(bf, size, "arg%d: ", arg->idx);
1676 }
1677
1678
1679
1680
1681
1682
1683 static unsigned long syscall__mask_val(struct syscall *sc, struct syscall_arg *arg, unsigned long val)
1684 {
1685 if (sc->arg_fmt && sc->arg_fmt[arg->idx].mask_val)
1686 return sc->arg_fmt[arg->idx].mask_val(arg, val);
1687
1688 return val;
1689 }
1690
1691 static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size,
1692 struct syscall_arg *arg, unsigned long val)
1693 {
1694 if (sc->arg_fmt && sc->arg_fmt[arg->idx].scnprintf) {
1695 arg->val = val;
1696 if (sc->arg_fmt[arg->idx].parm)
1697 arg->parm = sc->arg_fmt[arg->idx].parm;
1698 return sc->arg_fmt[arg->idx].scnprintf(bf, size, arg);
1699 }
1700 return scnprintf(bf, size, "%ld", val);
1701 }
1702
1703 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1704 unsigned char *args, void *augmented_args, int augmented_args_size,
1705 struct trace *trace, struct thread *thread)
1706 {
1707 size_t printed = 0;
1708 unsigned long val;
1709 u8 bit = 1;
1710 struct syscall_arg arg = {
1711 .args = args,
1712 .augmented = {
1713 .size = augmented_args_size,
1714 .args = augmented_args,
1715 },
1716 .idx = 0,
1717 .mask = 0,
1718 .trace = trace,
1719 .thread = thread,
1720 .show_string_prefix = trace->show_string_prefix,
1721 };
1722 struct thread_trace *ttrace = thread__priv(thread);
1723
1724
1725
1726
1727
1728
1729 ttrace->ret_scnprintf = NULL;
1730
1731 if (sc->args != NULL) {
1732 struct tep_format_field *field;
1733
1734 for (field = sc->args; field;
1735 field = field->next, ++arg.idx, bit <<= 1) {
1736 if (arg.mask & bit)
1737 continue;
1738
1739 val = syscall_arg__val(&arg, arg.idx);
1740
1741
1742
1743
1744 val = syscall__mask_val(sc, &arg, val);
1745
1746
1747
1748
1749
1750
1751 if (val == 0 &&
1752 !trace->show_zeros &&
1753 !(sc->arg_fmt &&
1754 (sc->arg_fmt[arg.idx].show_zero ||
1755 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY ||
1756 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) &&
1757 sc->arg_fmt[arg.idx].parm))
1758 continue;
1759
1760 printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : "");
1761
1762 if (trace->show_arg_names)
1763 printed += scnprintf(bf + printed, size - printed, "%s: ", field->name);
1764
1765 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
1766 }
1767 } else if (IS_ERR(sc->tp_format)) {
1768
1769
1770
1771
1772
1773 while (arg.idx < sc->nr_args) {
1774 if (arg.mask & bit)
1775 goto next_arg;
1776 val = syscall_arg__val(&arg, arg.idx);
1777 if (printed)
1778 printed += scnprintf(bf + printed, size - printed, ", ");
1779 printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg);
1780 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
1781 next_arg:
1782 ++arg.idx;
1783 bit <<= 1;
1784 }
1785 }
1786
1787 return printed;
1788 }
1789
1790 typedef int (*tracepoint_handler)(struct trace *trace, struct evsel *evsel,
1791 union perf_event *event,
1792 struct perf_sample *sample);
1793
1794 static struct syscall *trace__syscall_info(struct trace *trace,
1795 struct evsel *evsel, int id)
1796 {
1797 int err = 0;
1798
1799 if (id < 0) {
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811 if (verbose > 1) {
1812 static u64 n;
1813 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1814 id, perf_evsel__name(evsel), ++n);
1815 }
1816 return NULL;
1817 }
1818
1819 err = -EINVAL;
1820
1821 if (id > trace->sctbl->syscalls.max_id)
1822 goto out_cant_read;
1823
1824 if ((trace->syscalls.table == NULL || trace->syscalls.table[id].name == NULL) &&
1825 (err = trace__read_syscall_info(trace, id)) != 0)
1826 goto out_cant_read;
1827
1828 if (trace->syscalls.table[id].name == NULL) {
1829 if (trace->syscalls.table[id].nonexistent)
1830 return NULL;
1831 goto out_cant_read;
1832 }
1833
1834 return &trace->syscalls.table[id];
1835
1836 out_cant_read:
1837 if (verbose > 0) {
1838 char sbuf[STRERR_BUFSIZE];
1839 fprintf(trace->output, "Problems reading syscall %d: %d (%s)", id, -err, str_error_r(-err, sbuf, sizeof(sbuf)));
1840 if (id <= trace->sctbl->syscalls.max_id && trace->syscalls.table[id].name != NULL)
1841 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1842 fputs(" information\n", trace->output);
1843 }
1844 return NULL;
1845 }
1846
1847 static void thread__update_stats(struct thread_trace *ttrace,
1848 int id, struct perf_sample *sample)
1849 {
1850 struct int_node *inode;
1851 struct stats *stats;
1852 u64 duration = 0;
1853
1854 inode = intlist__findnew(ttrace->syscall_stats, id);
1855 if (inode == NULL)
1856 return;
1857
1858 stats = inode->priv;
1859 if (stats == NULL) {
1860 stats = malloc(sizeof(struct stats));
1861 if (stats == NULL)
1862 return;
1863 init_stats(stats);
1864 inode->priv = stats;
1865 }
1866
1867 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1868 duration = sample->time - ttrace->entry_time;
1869
1870 update_stats(stats, duration);
1871 }
1872
1873 static int trace__printf_interrupted_entry(struct trace *trace)
1874 {
1875 struct thread_trace *ttrace;
1876 size_t printed;
1877 int len;
1878
1879 if (trace->failure_only || trace->current == NULL)
1880 return 0;
1881
1882 ttrace = thread__priv(trace->current);
1883
1884 if (!ttrace->entry_pending)
1885 return 0;
1886
1887 printed = trace__fprintf_entry_head(trace, trace->current, 0, false, ttrace->entry_time, trace->output);
1888 printed += len = fprintf(trace->output, "%s)", ttrace->entry_str);
1889
1890 if (len < trace->args_alignment - 4)
1891 printed += fprintf(trace->output, "%-*s", trace->args_alignment - 4 - len, " ");
1892
1893 printed += fprintf(trace->output, " ...\n");
1894
1895 ttrace->entry_pending = false;
1896 ++trace->nr_events_printed;
1897
1898 return printed;
1899 }
1900
1901 static int trace__fprintf_sample(struct trace *trace, struct evsel *evsel,
1902 struct perf_sample *sample, struct thread *thread)
1903 {
1904 int printed = 0;
1905
1906 if (trace->print_sample) {
1907 double ts = (double)sample->time / NSEC_PER_MSEC;
1908
1909 printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n",
1910 perf_evsel__name(evsel), ts,
1911 thread__comm_str(thread),
1912 sample->pid, sample->tid, sample->cpu);
1913 }
1914
1915 return printed;
1916 }
1917
1918 static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, int raw_augmented_args_size)
1919 {
1920 void *augmented_args = NULL;
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935 int args_size = raw_augmented_args_size ?: sc->args_size;
1936
1937 *augmented_args_size = sample->raw_size - args_size;
1938 if (*augmented_args_size > 0)
1939 augmented_args = sample->raw_data + args_size;
1940
1941 return augmented_args;
1942 }
1943
1944 static int trace__sys_enter(struct trace *trace, struct evsel *evsel,
1945 union perf_event *event __maybe_unused,
1946 struct perf_sample *sample)
1947 {
1948 char *msg;
1949 void *args;
1950 int printed = 0;
1951 struct thread *thread;
1952 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1953 int augmented_args_size = 0;
1954 void *augmented_args = NULL;
1955 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1956 struct thread_trace *ttrace;
1957
1958 if (sc == NULL)
1959 return -1;
1960
1961 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1962 ttrace = thread__trace(thread, trace->output);
1963 if (ttrace == NULL)
1964 goto out_put;
1965
1966 trace__fprintf_sample(trace, evsel, sample, thread);
1967
1968 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1969
1970 if (ttrace->entry_str == NULL) {
1971 ttrace->entry_str = malloc(trace__entry_str_size);
1972 if (!ttrace->entry_str)
1973 goto out_put;
1974 }
1975
1976 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
1977 trace__printf_interrupted_entry(trace);
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988 if (evsel != trace->syscalls.events.sys_enter)
1989 augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size);
1990 ttrace->entry_time = sample->time;
1991 msg = ttrace->entry_str;
1992 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1993
1994 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1995 args, augmented_args, augmented_args_size, trace, thread);
1996
1997 if (sc->is_exit) {
1998 if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) {
1999 int alignment = 0;
2000
2001 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
2002 printed = fprintf(trace->output, "%s)", ttrace->entry_str);
2003 if (trace->args_alignment > printed)
2004 alignment = trace->args_alignment - printed;
2005 fprintf(trace->output, "%*s= ?\n", alignment, " ");
2006 }
2007 } else {
2008 ttrace->entry_pending = true;
2009
2010 ttrace->filename.pending_open = false;
2011 }
2012
2013 if (trace->current != thread) {
2014 thread__put(trace->current);
2015 trace->current = thread__get(thread);
2016 }
2017 err = 0;
2018 out_put:
2019 thread__put(thread);
2020 return err;
2021 }
2022
2023 static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel,
2024 struct perf_sample *sample)
2025 {
2026 struct thread_trace *ttrace;
2027 struct thread *thread;
2028 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2029 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2030 char msg[1024];
2031 void *args, *augmented_args = NULL;
2032 int augmented_args_size;
2033
2034 if (sc == NULL)
2035 return -1;
2036
2037 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2038 ttrace = thread__trace(thread, trace->output);
2039
2040
2041
2042
2043 if (ttrace == NULL)
2044 goto out_put;
2045
2046 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
2047 augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size);
2048 syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread);
2049 fprintf(trace->output, "%s", msg);
2050 err = 0;
2051 out_put:
2052 thread__put(thread);
2053 return err;
2054 }
2055
2056 static int trace__resolve_callchain(struct trace *trace, struct evsel *evsel,
2057 struct perf_sample *sample,
2058 struct callchain_cursor *cursor)
2059 {
2060 struct addr_location al;
2061 int max_stack = evsel->core.attr.sample_max_stack ?
2062 evsel->core.attr.sample_max_stack :
2063 trace->max_stack;
2064 int err;
2065
2066 if (machine__resolve(trace->host, &al, sample) < 0)
2067 return -1;
2068
2069 err = thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack);
2070 addr_location__put(&al);
2071 return err;
2072 }
2073
2074 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
2075 {
2076
2077 const unsigned int print_opts = EVSEL__PRINT_SYM |
2078 EVSEL__PRINT_DSO |
2079 EVSEL__PRINT_UNKNOWN_AS_ADDR;
2080
2081 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, symbol_conf.bt_stop_list, trace->output);
2082 }
2083
2084 static const char *errno_to_name(struct evsel *evsel, int err)
2085 {
2086 struct perf_env *env = perf_evsel__env(evsel);
2087 const char *arch_name = perf_env__arch(env);
2088
2089 return arch_syscalls__strerrno(arch_name, err);
2090 }
2091
2092 static int trace__sys_exit(struct trace *trace, struct evsel *evsel,
2093 union perf_event *event __maybe_unused,
2094 struct perf_sample *sample)
2095 {
2096 long ret;
2097 u64 duration = 0;
2098 bool duration_calculated = false;
2099 struct thread *thread;
2100 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0, printed = 0;
2101 int alignment = trace->args_alignment;
2102 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2103 struct thread_trace *ttrace;
2104
2105 if (sc == NULL)
2106 return -1;
2107
2108 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2109 ttrace = thread__trace(thread, trace->output);
2110 if (ttrace == NULL)
2111 goto out_put;
2112
2113 trace__fprintf_sample(trace, evsel, sample, thread);
2114
2115 if (trace->summary)
2116 thread__update_stats(ttrace, id, sample);
2117
2118 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2119
2120 if (!trace->fd_path_disabled && sc->is_open && ret >= 0 && ttrace->filename.pending_open) {
2121 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2122 ttrace->filename.pending_open = false;
2123 ++trace->stats.vfs_getname;
2124 }
2125
2126 if (ttrace->entry_time) {
2127 duration = sample->time - ttrace->entry_time;
2128 if (trace__filter_duration(trace, duration))
2129 goto out;
2130 duration_calculated = true;
2131 } else if (trace->duration_filter)
2132 goto out;
2133
2134 if (sample->callchain) {
2135 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2136 if (callchain_ret == 0) {
2137 if (callchain_cursor.nr < trace->min_stack)
2138 goto out;
2139 callchain_ret = 1;
2140 }
2141 }
2142
2143 if (trace->summary_only || (ret >= 0 && trace->failure_only))
2144 goto out;
2145
2146 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
2147
2148 if (ttrace->entry_pending) {
2149 printed = fprintf(trace->output, "%s", ttrace->entry_str);
2150 } else {
2151 printed += fprintf(trace->output, " ... [");
2152 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2153 printed += 9;
2154 printed += fprintf(trace->output, "]: %s()", sc->name);
2155 }
2156
2157 printed++;
2158
2159 if (alignment > printed)
2160 alignment -= printed;
2161 else
2162 alignment = 0;
2163
2164 fprintf(trace->output, ")%*s= ", alignment, " ");
2165
2166 if (sc->fmt == NULL) {
2167 if (ret < 0)
2168 goto errno_print;
2169 signed_print:
2170 fprintf(trace->output, "%ld", ret);
2171 } else if (ret < 0) {
2172 errno_print: {
2173 char bf[STRERR_BUFSIZE];
2174 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
2175 *e = errno_to_name(evsel, -ret);
2176
2177 fprintf(trace->output, "-1 %s (%s)", e, emsg);
2178 }
2179 } else if (ret == 0 && sc->fmt->timeout)
2180 fprintf(trace->output, "0 (Timeout)");
2181 else if (ttrace->ret_scnprintf) {
2182 char bf[1024];
2183 struct syscall_arg arg = {
2184 .val = ret,
2185 .thread = thread,
2186 .trace = trace,
2187 };
2188 ttrace->ret_scnprintf(bf, sizeof(bf), &arg);
2189 ttrace->ret_scnprintf = NULL;
2190 fprintf(trace->output, "%s", bf);
2191 } else if (sc->fmt->hexret)
2192 fprintf(trace->output, "%#lx", ret);
2193 else if (sc->fmt->errpid) {
2194 struct thread *child = machine__find_thread(trace->host, ret, ret);
2195
2196 if (child != NULL) {
2197 fprintf(trace->output, "%ld", ret);
2198 if (child->comm_set)
2199 fprintf(trace->output, " (%s)", thread__comm_str(child));
2200 thread__put(child);
2201 }
2202 } else
2203 goto signed_print;
2204
2205 fputc('\n', trace->output);
2206
2207
2208
2209
2210
2211 if (++trace->nr_events_printed == trace->max_events && trace->max_events != ULONG_MAX)
2212 interrupted = true;
2213
2214 if (callchain_ret > 0)
2215 trace__fprintf_callchain(trace, sample);
2216 else if (callchain_ret < 0)
2217 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
2218 out:
2219 ttrace->entry_pending = false;
2220 err = 0;
2221 out_put:
2222 thread__put(thread);
2223 return err;
2224 }
2225
2226 static int trace__vfs_getname(struct trace *trace, struct evsel *evsel,
2227 union perf_event *event __maybe_unused,
2228 struct perf_sample *sample)
2229 {
2230 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2231 struct thread_trace *ttrace;
2232 size_t filename_len, entry_str_len, to_move;
2233 ssize_t remaining_space;
2234 char *pos;
2235 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2236
2237 if (!thread)
2238 goto out;
2239
2240 ttrace = thread__priv(thread);
2241 if (!ttrace)
2242 goto out_put;
2243
2244 filename_len = strlen(filename);
2245 if (filename_len == 0)
2246 goto out_put;
2247
2248 if (ttrace->filename.namelen < filename_len) {
2249 char *f = realloc(ttrace->filename.name, filename_len + 1);
2250
2251 if (f == NULL)
2252 goto out_put;
2253
2254 ttrace->filename.namelen = filename_len;
2255 ttrace->filename.name = f;
2256 }
2257
2258 strcpy(ttrace->filename.name, filename);
2259 ttrace->filename.pending_open = true;
2260
2261 if (!ttrace->filename.ptr)
2262 goto out_put;
2263
2264 entry_str_len = strlen(ttrace->entry_str);
2265 remaining_space = trace__entry_str_size - entry_str_len - 1;
2266 if (remaining_space <= 0)
2267 goto out_put;
2268
2269 if (filename_len > (size_t)remaining_space) {
2270 filename += filename_len - remaining_space;
2271 filename_len = remaining_space;
2272 }
2273
2274 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1;
2275 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2276 memmove(pos + filename_len, pos, to_move);
2277 memcpy(pos, filename, filename_len);
2278
2279 ttrace->filename.ptr = 0;
2280 ttrace->filename.entry_str_pos = 0;
2281 out_put:
2282 thread__put(thread);
2283 out:
2284 return 0;
2285 }
2286
2287 static int trace__sched_stat_runtime(struct trace *trace, struct evsel *evsel,
2288 union perf_event *event __maybe_unused,
2289 struct perf_sample *sample)
2290 {
2291 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2292 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2293 struct thread *thread = machine__findnew_thread(trace->host,
2294 sample->pid,
2295 sample->tid);
2296 struct thread_trace *ttrace = thread__trace(thread, trace->output);
2297
2298 if (ttrace == NULL)
2299 goto out_dump;
2300
2301 ttrace->runtime_ms += runtime_ms;
2302 trace->runtime_ms += runtime_ms;
2303 out_put:
2304 thread__put(thread);
2305 return 0;
2306
2307 out_dump:
2308 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2309 evsel->name,
2310 perf_evsel__strval(evsel, sample, "comm"),
2311 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2312 runtime,
2313 perf_evsel__intval(evsel, sample, "vruntime"));
2314 goto out_put;
2315 }
2316
2317 static int bpf_output__printer(enum binary_printer_ops op,
2318 unsigned int val, void *extra __maybe_unused, FILE *fp)
2319 {
2320 unsigned char ch = (unsigned char)val;
2321
2322 switch (op) {
2323 case BINARY_PRINT_CHAR_DATA:
2324 return fprintf(fp, "%c", isprint(ch) ? ch : '.');
2325 case BINARY_PRINT_DATA_BEGIN:
2326 case BINARY_PRINT_LINE_BEGIN:
2327 case BINARY_PRINT_ADDR:
2328 case BINARY_PRINT_NUM_DATA:
2329 case BINARY_PRINT_NUM_PAD:
2330 case BINARY_PRINT_SEP:
2331 case BINARY_PRINT_CHAR_PAD:
2332 case BINARY_PRINT_LINE_END:
2333 case BINARY_PRINT_DATA_END:
2334 default:
2335 break;
2336 }
2337
2338 return 0;
2339 }
2340
2341 static void bpf_output__fprintf(struct trace *trace,
2342 struct perf_sample *sample)
2343 {
2344 binary__fprintf(sample->raw_data, sample->raw_size, 8,
2345 bpf_output__printer, NULL, trace->output);
2346 ++trace->nr_events_printed;
2347 }
2348
2349 static int trace__event_handler(struct trace *trace, struct evsel *evsel,
2350 union perf_event *event __maybe_unused,
2351 struct perf_sample *sample)
2352 {
2353 struct thread *thread;
2354 int callchain_ret = 0;
2355
2356
2357
2358
2359
2360
2361 if (evsel->disabled)
2362 return 0;
2363
2364 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2365
2366 if (sample->callchain) {
2367 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2368 if (callchain_ret == 0) {
2369 if (callchain_cursor.nr < trace->min_stack)
2370 goto out;
2371 callchain_ret = 1;
2372 }
2373 }
2374
2375 trace__printf_interrupted_entry(trace);
2376 trace__fprintf_tstamp(trace, sample->time, trace->output);
2377
2378 if (trace->trace_syscalls && trace->show_duration)
2379 fprintf(trace->output, "( ): ");
2380
2381 if (thread)
2382 trace__fprintf_comm_tid(trace, thread, trace->output);
2383
2384 if (evsel == trace->syscalls.events.augmented) {
2385 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
2386 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2387
2388 if (sc) {
2389 fprintf(trace->output, "%s(", sc->name);
2390 trace__fprintf_sys_enter(trace, evsel, sample);
2391 fputc(')', trace->output);
2392 goto newline;
2393 }
2394
2395
2396
2397
2398
2399
2400 }
2401
2402 fprintf(trace->output, "%s:", evsel->name);
2403
2404 if (perf_evsel__is_bpf_output(evsel)) {
2405 bpf_output__fprintf(trace, sample);
2406 } else if (evsel->tp_format) {
2407 if (strncmp(evsel->tp_format->name, "sys_enter_", 10) ||
2408 trace__fprintf_sys_enter(trace, evsel, sample)) {
2409 event_format__fprintf(evsel->tp_format, sample->cpu,
2410 sample->raw_data, sample->raw_size,
2411 trace->output);
2412 ++trace->nr_events_printed;
2413
2414 if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) {
2415 evsel__disable(evsel);
2416 evsel__close(evsel);
2417 }
2418 }
2419 }
2420
2421 newline:
2422 fprintf(trace->output, "\n");
2423
2424 if (callchain_ret > 0)
2425 trace__fprintf_callchain(trace, sample);
2426 else if (callchain_ret < 0)
2427 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
2428 out:
2429 thread__put(thread);
2430 return 0;
2431 }
2432
2433 static void print_location(FILE *f, struct perf_sample *sample,
2434 struct addr_location *al,
2435 bool print_dso, bool print_sym)
2436 {
2437
2438 if ((verbose > 0 || print_dso) && al->map)
2439 fprintf(f, "%s@", al->map->dso->long_name);
2440
2441 if ((verbose > 0 || print_sym) && al->sym)
2442 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2443 al->addr - al->sym->start);
2444 else if (al->map)
2445 fprintf(f, "0x%" PRIx64, al->addr);
2446 else
2447 fprintf(f, "0x%" PRIx64, sample->addr);
2448 }
2449
2450 static int trace__pgfault(struct trace *trace,
2451 struct evsel *evsel,
2452 union perf_event *event __maybe_unused,
2453 struct perf_sample *sample)
2454 {
2455 struct thread *thread;
2456 struct addr_location al;
2457 char map_type = 'd';
2458 struct thread_trace *ttrace;
2459 int err = -1;
2460 int callchain_ret = 0;
2461
2462 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2463
2464 if (sample->callchain) {
2465 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2466 if (callchain_ret == 0) {
2467 if (callchain_cursor.nr < trace->min_stack)
2468 goto out_put;
2469 callchain_ret = 1;
2470 }
2471 }
2472
2473 ttrace = thread__trace(thread, trace->output);
2474 if (ttrace == NULL)
2475 goto out_put;
2476
2477 if (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2478 ttrace->pfmaj++;
2479 else
2480 ttrace->pfmin++;
2481
2482 if (trace->summary_only)
2483 goto out;
2484
2485 thread__find_symbol(thread, sample->cpumode, sample->ip, &al);
2486
2487 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
2488
2489 fprintf(trace->output, "%sfault [",
2490 evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2491 "maj" : "min");
2492
2493 print_location(trace->output, sample, &al, false, true);
2494
2495 fprintf(trace->output, "] => ");
2496
2497 thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
2498
2499 if (!al.map) {
2500 thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
2501
2502 if (al.map)
2503 map_type = 'x';
2504 else
2505 map_type = '?';
2506 }
2507
2508 print_location(trace->output, sample, &al, true, false);
2509
2510 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2511
2512 if (callchain_ret > 0)
2513 trace__fprintf_callchain(trace, sample);
2514 else if (callchain_ret < 0)
2515 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
2516
2517 ++trace->nr_events_printed;
2518 out:
2519 err = 0;
2520 out_put:
2521 thread__put(thread);
2522 return err;
2523 }
2524
2525 static void trace__set_base_time(struct trace *trace,
2526 struct evsel *evsel,
2527 struct perf_sample *sample)
2528 {
2529
2530
2531
2532
2533
2534
2535
2536
2537 if (trace->base_time == 0 && !trace->full_time &&
2538 (evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
2539 trace->base_time = sample->time;
2540 }
2541
2542 static int trace__process_sample(struct perf_tool *tool,
2543 union perf_event *event,
2544 struct perf_sample *sample,
2545 struct evsel *evsel,
2546 struct machine *machine __maybe_unused)
2547 {
2548 struct trace *trace = container_of(tool, struct trace, tool);
2549 struct thread *thread;
2550 int err = 0;
2551
2552 tracepoint_handler handler = evsel->handler;
2553
2554 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2555 if (thread && thread__is_filtered(thread))
2556 goto out;
2557
2558 trace__set_base_time(trace, evsel, sample);
2559
2560 if (handler) {
2561 ++trace->nr_events;
2562 handler(trace, evsel, event, sample);
2563 }
2564 out:
2565 thread__put(thread);
2566 return err;
2567 }
2568
2569 static int trace__record(struct trace *trace, int argc, const char **argv)
2570 {
2571 unsigned int rec_argc, i, j;
2572 const char **rec_argv;
2573 const char * const record_args[] = {
2574 "record",
2575 "-R",
2576 "-m", "1024",
2577 "-c", "1",
2578 };
2579
2580 const char * const sc_args[] = { "-e", };
2581 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2582 const char * const majpf_args[] = { "-e", "major-faults" };
2583 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2584 const char * const minpf_args[] = { "-e", "minor-faults" };
2585 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2586
2587
2588 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2589 majpf_args_nr + minpf_args_nr + argc;
2590 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2591
2592 if (rec_argv == NULL)
2593 return -ENOMEM;
2594
2595 j = 0;
2596 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2597 rec_argv[j++] = record_args[i];
2598
2599 if (trace->trace_syscalls) {
2600 for (i = 0; i < sc_args_nr; i++)
2601 rec_argv[j++] = sc_args[i];
2602
2603
2604 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2605 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2606 else if (is_valid_tracepoint("syscalls:sys_enter"))
2607 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2608 else {
2609 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2610 free(rec_argv);
2611 return -1;
2612 }
2613 }
2614
2615 if (trace->trace_pgfaults & TRACE_PFMAJ)
2616 for (i = 0; i < majpf_args_nr; i++)
2617 rec_argv[j++] = majpf_args[i];
2618
2619 if (trace->trace_pgfaults & TRACE_PFMIN)
2620 for (i = 0; i < minpf_args_nr; i++)
2621 rec_argv[j++] = minpf_args[i];
2622
2623 for (i = 0; i < (unsigned int)argc; i++)
2624 rec_argv[j++] = argv[i];
2625
2626 return cmd_record(j, rec_argv);
2627 }
2628
2629 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2630
2631 static bool evlist__add_vfs_getname(struct evlist *evlist)
2632 {
2633 bool found = false;
2634 struct evsel *evsel, *tmp;
2635 struct parse_events_error err = { .idx = 0, };
2636 int ret = parse_events(evlist, "probe:vfs_getname*", &err);
2637
2638 if (ret)
2639 return false;
2640
2641 evlist__for_each_entry_safe(evlist, evsel, tmp) {
2642 if (!strstarts(perf_evsel__name(evsel), "probe:vfs_getname"))
2643 continue;
2644
2645 if (perf_evsel__field(evsel, "pathname")) {
2646 evsel->handler = trace__vfs_getname;
2647 found = true;
2648 continue;
2649 }
2650
2651 list_del_init(&evsel->core.node);
2652 evsel->evlist = NULL;
2653 evsel__delete(evsel);
2654 }
2655
2656 return found;
2657 }
2658
2659 static struct evsel *perf_evsel__new_pgfault(u64 config)
2660 {
2661 struct evsel *evsel;
2662 struct perf_event_attr attr = {
2663 .type = PERF_TYPE_SOFTWARE,
2664 .mmap_data = 1,
2665 };
2666
2667 attr.config = config;
2668 attr.sample_period = 1;
2669
2670 event_attr_init(&attr);
2671
2672 evsel = evsel__new(&attr);
2673 if (evsel)
2674 evsel->handler = trace__pgfault;
2675
2676 return evsel;
2677 }
2678
2679 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2680 {
2681 const u32 type = event->header.type;
2682 struct evsel *evsel;
2683
2684 if (type != PERF_RECORD_SAMPLE) {
2685 trace__process_event(trace, trace->host, event, sample);
2686 return;
2687 }
2688
2689 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2690 if (evsel == NULL) {
2691 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2692 return;
2693 }
2694
2695 if (evswitch__discard(&trace->evswitch, evsel))
2696 return;
2697
2698 trace__set_base_time(trace, evsel, sample);
2699
2700 if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT &&
2701 sample->raw_data == NULL) {
2702 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2703 perf_evsel__name(evsel), sample->tid,
2704 sample->cpu, sample->raw_size);
2705 } else {
2706 tracepoint_handler handler = evsel->handler;
2707 handler(trace, evsel, event, sample);
2708 }
2709
2710 if (trace->nr_events_printed >= trace->max_events && trace->max_events != ULONG_MAX)
2711 interrupted = true;
2712 }
2713
2714 static int trace__add_syscall_newtp(struct trace *trace)
2715 {
2716 int ret = -1;
2717 struct evlist *evlist = trace->evlist;
2718 struct evsel *sys_enter, *sys_exit;
2719
2720 sys_enter = perf_evsel__raw_syscall_newtp("sys_enter", trace__sys_enter);
2721 if (sys_enter == NULL)
2722 goto out;
2723
2724 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2725 goto out_delete_sys_enter;
2726
2727 sys_exit = perf_evsel__raw_syscall_newtp("sys_exit", trace__sys_exit);
2728 if (sys_exit == NULL)
2729 goto out_delete_sys_enter;
2730
2731 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2732 goto out_delete_sys_exit;
2733
2734 perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
2735 perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
2736
2737 evlist__add(evlist, sys_enter);
2738 evlist__add(evlist, sys_exit);
2739
2740 if (callchain_param.enabled && !trace->kernel_syscallchains) {
2741
2742
2743
2744
2745
2746 sys_exit->core.attr.exclude_callchain_kernel = 1;
2747 }
2748
2749 trace->syscalls.events.sys_enter = sys_enter;
2750 trace->syscalls.events.sys_exit = sys_exit;
2751
2752 ret = 0;
2753 out:
2754 return ret;
2755
2756 out_delete_sys_exit:
2757 evsel__delete_priv(sys_exit);
2758 out_delete_sys_enter:
2759 evsel__delete_priv(sys_enter);
2760 goto out;
2761 }
2762
2763 static int trace__set_ev_qualifier_tp_filter(struct trace *trace)
2764 {
2765 int err = -1;
2766 struct evsel *sys_exit;
2767 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2768 trace->ev_qualifier_ids.nr,
2769 trace->ev_qualifier_ids.entries);
2770
2771 if (filter == NULL)
2772 goto out_enomem;
2773
2774 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2775 filter)) {
2776 sys_exit = trace->syscalls.events.sys_exit;
2777 err = perf_evsel__append_tp_filter(sys_exit, filter);
2778 }
2779
2780 free(filter);
2781 out:
2782 return err;
2783 out_enomem:
2784 errno = ENOMEM;
2785 goto out;
2786 }
2787
2788 #ifdef HAVE_LIBBPF_SUPPORT
2789 static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name)
2790 {
2791 if (trace->bpf_obj == NULL)
2792 return NULL;
2793
2794 return bpf_object__find_program_by_title(trace->bpf_obj, name);
2795 }
2796
2797 static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, struct syscall *sc,
2798 const char *prog_name, const char *type)
2799 {
2800 struct bpf_program *prog;
2801
2802 if (prog_name == NULL) {
2803 char default_prog_name[256];
2804 scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->name);
2805 prog = trace__find_bpf_program_by_title(trace, default_prog_name);
2806 if (prog != NULL)
2807 goto out_found;
2808 if (sc->fmt && sc->fmt->alias) {
2809 scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->fmt->alias);
2810 prog = trace__find_bpf_program_by_title(trace, default_prog_name);
2811 if (prog != NULL)
2812 goto out_found;
2813 }
2814 goto out_unaugmented;
2815 }
2816
2817 prog = trace__find_bpf_program_by_title(trace, prog_name);
2818
2819 if (prog != NULL) {
2820 out_found:
2821 return prog;
2822 }
2823
2824 pr_debug("Couldn't find BPF prog \"%s\" to associate with syscalls:sys_%s_%s, not augmenting it\n",
2825 prog_name, type, sc->name);
2826 out_unaugmented:
2827 return trace->syscalls.unaugmented_prog;
2828 }
2829
2830 static void trace__init_syscall_bpf_progs(struct trace *trace, int id)
2831 {
2832 struct syscall *sc = trace__syscall_info(trace, NULL, id);
2833
2834 if (sc == NULL)
2835 return;
2836
2837 sc->bpf_prog.sys_enter = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_enter : NULL, "enter");
2838 sc->bpf_prog.sys_exit = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_exit : NULL, "exit");
2839 }
2840
2841 static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int id)
2842 {
2843 struct syscall *sc = trace__syscall_info(trace, NULL, id);
2844 return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->syscalls.unaugmented_prog);
2845 }
2846
2847 static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id)
2848 {
2849 struct syscall *sc = trace__syscall_info(trace, NULL, id);
2850 return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->syscalls.unaugmented_prog);
2851 }
2852
2853 static void trace__init_bpf_map_syscall_args(struct trace *trace, int id, struct bpf_map_syscall_entry *entry)
2854 {
2855 struct syscall *sc = trace__syscall_info(trace, NULL, id);
2856 int arg = 0;
2857
2858 if (sc == NULL)
2859 goto out;
2860
2861 for (; arg < sc->nr_args; ++arg) {
2862 entry->string_args_len[arg] = 0;
2863 if (sc->arg_fmt[arg].scnprintf == SCA_FILENAME) {
2864
2865 entry->string_args_len[arg] = PATH_MAX;
2866 }
2867 }
2868 out:
2869 for (; arg < 6; ++arg)
2870 entry->string_args_len[arg] = 0;
2871 }
2872 static int trace__set_ev_qualifier_bpf_filter(struct trace *trace)
2873 {
2874 int fd = bpf_map__fd(trace->syscalls.map);
2875 struct bpf_map_syscall_entry value = {
2876 .enabled = !trace->not_ev_qualifier,
2877 };
2878 int err = 0;
2879 size_t i;
2880
2881 for (i = 0; i < trace->ev_qualifier_ids.nr; ++i) {
2882 int key = trace->ev_qualifier_ids.entries[i];
2883
2884 if (value.enabled) {
2885 trace__init_bpf_map_syscall_args(trace, key, &value);
2886 trace__init_syscall_bpf_progs(trace, key);
2887 }
2888
2889 err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
2890 if (err)
2891 break;
2892 }
2893
2894 return err;
2895 }
2896
2897 static int __trace__init_syscalls_bpf_map(struct trace *trace, bool enabled)
2898 {
2899 int fd = bpf_map__fd(trace->syscalls.map);
2900 struct bpf_map_syscall_entry value = {
2901 .enabled = enabled,
2902 };
2903 int err = 0, key;
2904
2905 for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
2906 if (enabled)
2907 trace__init_bpf_map_syscall_args(trace, key, &value);
2908
2909 err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
2910 if (err)
2911 break;
2912 }
2913
2914 return err;
2915 }
2916
2917 static int trace__init_syscalls_bpf_map(struct trace *trace)
2918 {
2919 bool enabled = true;
2920
2921 if (trace->ev_qualifier_ids.nr)
2922 enabled = trace->not_ev_qualifier;
2923
2924 return __trace__init_syscalls_bpf_map(trace, enabled);
2925 }
2926
2927 static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc)
2928 {
2929 struct tep_format_field *field, *candidate_field;
2930 int id;
2931
2932
2933
2934
2935 for (field = sc->args; field; field = field->next) {
2936 if (field->flags & TEP_FIELD_IS_POINTER)
2937 goto try_to_find_pair;
2938 }
2939
2940 return NULL;
2941
2942 try_to_find_pair:
2943 for (id = 0; id < trace->sctbl->syscalls.nr_entries; ++id) {
2944 struct syscall *pair = trace__syscall_info(trace, NULL, id);
2945 struct bpf_program *pair_prog;
2946 bool is_candidate = false;
2947
2948 if (pair == NULL || pair == sc ||
2949 pair->bpf_prog.sys_enter == trace->syscalls.unaugmented_prog)
2950 continue;
2951
2952 for (field = sc->args, candidate_field = pair->args;
2953 field && candidate_field; field = field->next, candidate_field = candidate_field->next) {
2954 bool is_pointer = field->flags & TEP_FIELD_IS_POINTER,
2955 candidate_is_pointer = candidate_field->flags & TEP_FIELD_IS_POINTER;
2956
2957 if (is_pointer) {
2958 if (!candidate_is_pointer) {
2959
2960 continue;
2961 }
2962 } else {
2963 if (candidate_is_pointer) {
2964
2965 goto next_candidate;
2966 }
2967 continue;
2968 }
2969
2970 if (strcmp(field->type, candidate_field->type))
2971 goto next_candidate;
2972
2973 is_candidate = true;
2974 }
2975
2976 if (!is_candidate)
2977 goto next_candidate;
2978
2979
2980
2981
2982
2983
2984 if (candidate_field) {
2985 for (candidate_field = candidate_field->next; candidate_field; candidate_field = candidate_field->next)
2986 if (candidate_field->flags & TEP_FIELD_IS_POINTER)
2987 goto next_candidate;
2988 }
2989
2990 pair_prog = pair->bpf_prog.sys_enter;
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000 if (pair_prog == NULL) {
3001 pair_prog = trace__find_syscall_bpf_prog(trace, pair, pair->fmt ? pair->fmt->bpf_prog_name.sys_enter : NULL, "enter");
3002 if (pair_prog == trace->syscalls.unaugmented_prog)
3003 goto next_candidate;
3004 }
3005
3006 pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, sc->name);
3007 return pair_prog;
3008 next_candidate:
3009 continue;
3010 }
3011
3012 return NULL;
3013 }
3014
3015 static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
3016 {
3017 int map_enter_fd = bpf_map__fd(trace->syscalls.prog_array.sys_enter),
3018 map_exit_fd = bpf_map__fd(trace->syscalls.prog_array.sys_exit);
3019 int err = 0, key;
3020
3021 for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
3022 int prog_fd;
3023
3024 if (!trace__syscall_enabled(trace, key))
3025 continue;
3026
3027 trace__init_syscall_bpf_progs(trace, key);
3028
3029
3030 prog_fd = trace__bpf_prog_sys_enter_fd(trace, key);
3031 err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY);
3032 if (err)
3033 break;
3034 prog_fd = trace__bpf_prog_sys_exit_fd(trace, key);
3035 err = bpf_map_update_elem(map_exit_fd, &key, &prog_fd, BPF_ANY);
3036 if (err)
3037 break;
3038 }
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068 for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
3069 struct syscall *sc = trace__syscall_info(trace, NULL, key);
3070 struct bpf_program *pair_prog;
3071 int prog_fd;
3072
3073 if (sc == NULL || sc->bpf_prog.sys_enter == NULL)
3074 continue;
3075
3076
3077
3078
3079
3080 if (sc->bpf_prog.sys_enter != trace->syscalls.unaugmented_prog)
3081 continue;
3082
3083
3084
3085
3086
3087 pair_prog = trace__find_usable_bpf_prog_entry(trace, sc);
3088 if (pair_prog == NULL)
3089 continue;
3090
3091 sc->bpf_prog.sys_enter = pair_prog;
3092
3093
3094
3095
3096
3097 prog_fd = bpf_program__fd(sc->bpf_prog.sys_enter);
3098 err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY);
3099 if (err)
3100 break;
3101 }
3102
3103
3104 return err;
3105 }
3106 #else
3107 static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused)
3108 {
3109 return 0;
3110 }
3111
3112 static int trace__init_syscalls_bpf_map(struct trace *trace __maybe_unused)
3113 {
3114 return 0;
3115 }
3116
3117 static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace __maybe_unused,
3118 const char *name __maybe_unused)
3119 {
3120 return NULL;
3121 }
3122
3123 static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace __maybe_unused)
3124 {
3125 return 0;
3126 }
3127 #endif
3128
3129 static int trace__set_ev_qualifier_filter(struct trace *trace)
3130 {
3131 if (trace->syscalls.map)
3132 return trace__set_ev_qualifier_bpf_filter(trace);
3133 if (trace->syscalls.events.sys_enter)
3134 return trace__set_ev_qualifier_tp_filter(trace);
3135 return 0;
3136 }
3137
3138 static int bpf_map__set_filter_pids(struct bpf_map *map __maybe_unused,
3139 size_t npids __maybe_unused, pid_t *pids __maybe_unused)
3140 {
3141 int err = 0;
3142 #ifdef HAVE_LIBBPF_SUPPORT
3143 bool value = true;
3144 int map_fd = bpf_map__fd(map);
3145 size_t i;
3146
3147 for (i = 0; i < npids; ++i) {
3148 err = bpf_map_update_elem(map_fd, &pids[i], &value, BPF_ANY);
3149 if (err)
3150 break;
3151 }
3152 #endif
3153 return err;
3154 }
3155
3156 static int trace__set_filter_loop_pids(struct trace *trace)
3157 {
3158 unsigned int nr = 1, err;
3159 pid_t pids[32] = {
3160 getpid(),
3161 };
3162 struct thread *thread = machine__find_thread(trace->host, pids[0], pids[0]);
3163
3164 while (thread && nr < ARRAY_SIZE(pids)) {
3165 struct thread *parent = machine__find_thread(trace->host, thread->ppid, thread->ppid);
3166
3167 if (parent == NULL)
3168 break;
3169
3170 if (!strcmp(thread__comm_str(parent), "sshd") ||
3171 strstarts(thread__comm_str(parent), "gnome-terminal")) {
3172 pids[nr++] = parent->tid;
3173 break;
3174 }
3175 thread = parent;
3176 }
3177
3178 err = perf_evlist__set_tp_filter_pids(trace->evlist, nr, pids);
3179 if (!err && trace->filter_pids.map)
3180 err = bpf_map__set_filter_pids(trace->filter_pids.map, nr, pids);
3181
3182 return err;
3183 }
3184
3185 static int trace__set_filter_pids(struct trace *trace)
3186 {
3187 int err = 0;
3188
3189
3190
3191
3192
3193
3194 if (trace->filter_pids.nr > 0) {
3195 err = perf_evlist__set_tp_filter_pids(trace->evlist, trace->filter_pids.nr,
3196 trace->filter_pids.entries);
3197 if (!err && trace->filter_pids.map) {
3198 err = bpf_map__set_filter_pids(trace->filter_pids.map, trace->filter_pids.nr,
3199 trace->filter_pids.entries);
3200 }
3201 } else if (perf_thread_map__pid(trace->evlist->core.threads, 0) == -1) {
3202 err = trace__set_filter_loop_pids(trace);
3203 }
3204
3205 return err;
3206 }
3207
3208 static int __trace__deliver_event(struct trace *trace, union perf_event *event)
3209 {
3210 struct evlist *evlist = trace->evlist;
3211 struct perf_sample sample;
3212 int err;
3213
3214 err = perf_evlist__parse_sample(evlist, event, &sample);
3215 if (err)
3216 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
3217 else
3218 trace__handle_event(trace, event, &sample);
3219
3220 return 0;
3221 }
3222
3223 static int __trace__flush_events(struct trace *trace)
3224 {
3225 u64 first = ordered_events__first_time(&trace->oe.data);
3226 u64 flush = trace->oe.last - NSEC_PER_SEC;
3227
3228
3229 if (first && first < flush)
3230 return ordered_events__flush_time(&trace->oe.data, flush);
3231
3232 return 0;
3233 }
3234
3235 static int trace__flush_events(struct trace *trace)
3236 {
3237 return !trace->sort_events ? 0 : __trace__flush_events(trace);
3238 }
3239
3240 static int trace__deliver_event(struct trace *trace, union perf_event *event)
3241 {
3242 int err;
3243
3244 if (!trace->sort_events)
3245 return __trace__deliver_event(trace, event);
3246
3247 err = perf_evlist__parse_sample_timestamp(trace->evlist, event, &trace->oe.last);
3248 if (err && err != -1)
3249 return err;
3250
3251 err = ordered_events__queue(&trace->oe.data, event, trace->oe.last, 0);
3252 if (err)
3253 return err;
3254
3255 return trace__flush_events(trace);
3256 }
3257
3258 static int ordered_events__deliver_event(struct ordered_events *oe,
3259 struct ordered_event *event)
3260 {
3261 struct trace *trace = container_of(oe, struct trace, oe.data);
3262
3263 return __trace__deliver_event(trace, event->event);
3264 }
3265
3266 static int trace__run(struct trace *trace, int argc, const char **argv)
3267 {
3268 struct evlist *evlist = trace->evlist;
3269 struct evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
3270 int err = -1, i;
3271 unsigned long before;
3272 const bool forks = argc > 0;
3273 bool draining = false;
3274
3275 trace->live = true;
3276
3277 if (!trace->raw_augmented_syscalls) {
3278 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
3279 goto out_error_raw_syscalls;
3280
3281 if (trace->trace_syscalls)
3282 trace->vfs_getname = evlist__add_vfs_getname(evlist);
3283 }
3284
3285 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
3286 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
3287 if (pgfault_maj == NULL)
3288 goto out_error_mem;
3289 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
3290 evlist__add(evlist, pgfault_maj);
3291 }
3292
3293 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
3294 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
3295 if (pgfault_min == NULL)
3296 goto out_error_mem;
3297 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
3298 evlist__add(evlist, pgfault_min);
3299 }
3300
3301 if (trace->sched &&
3302 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
3303 trace__sched_stat_runtime))
3304 goto out_error_sched_stat_runtime;
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331 if (trace->cgroup)
3332 evlist__set_default_cgroup(trace->evlist, trace->cgroup);
3333
3334 err = perf_evlist__create_maps(evlist, &trace->opts.target);
3335 if (err < 0) {
3336 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
3337 goto out_delete_evlist;
3338 }
3339
3340 err = trace__symbols_init(trace, evlist);
3341 if (err < 0) {
3342 fprintf(trace->output, "Problems initializing symbol libraries!\n");
3343 goto out_delete_evlist;
3344 }
3345
3346 perf_evlist__config(evlist, &trace->opts, &callchain_param);
3347
3348 signal(SIGCHLD, sig_handler);
3349 signal(SIGINT, sig_handler);
3350
3351 if (forks) {
3352 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
3353 argv, false, NULL);
3354 if (err < 0) {
3355 fprintf(trace->output, "Couldn't run the workload!\n");
3356 goto out_delete_evlist;
3357 }
3358 }
3359
3360 err = evlist__open(evlist);
3361 if (err < 0)
3362 goto out_error_open;
3363
3364 err = bpf__apply_obj_config();
3365 if (err) {
3366 char errbuf[BUFSIZ];
3367
3368 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
3369 pr_err("ERROR: Apply config to BPF failed: %s\n",
3370 errbuf);
3371 goto out_error_open;
3372 }
3373
3374 err = trace__set_filter_pids(trace);
3375 if (err < 0)
3376 goto out_error_mem;
3377
3378 if (trace->syscalls.map)
3379 trace__init_syscalls_bpf_map(trace);
3380
3381 if (trace->syscalls.prog_array.sys_enter)
3382 trace__init_syscalls_bpf_prog_array_maps(trace);
3383
3384 if (trace->ev_qualifier_ids.nr > 0) {
3385 err = trace__set_ev_qualifier_filter(trace);
3386 if (err < 0)
3387 goto out_errno;
3388
3389 if (trace->syscalls.events.sys_exit) {
3390 pr_debug("event qualifier tracepoint filter: %s\n",
3391 trace->syscalls.events.sys_exit->filter);
3392 }
3393 }
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406 trace->fd_path_disabled = !trace__syscall_enabled(trace, syscalltbl__id(trace->sctbl, "close"));
3407
3408 err = perf_evlist__apply_filters(evlist, &evsel);
3409 if (err < 0)
3410 goto out_error_apply_filters;
3411
3412 if (trace->dump.map)
3413 bpf_map__fprintf(trace->dump.map, trace->output);
3414
3415 err = evlist__mmap(evlist, trace->opts.mmap_pages);
3416 if (err < 0)
3417 goto out_error_mmap;
3418
3419 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
3420 evlist__enable(evlist);
3421
3422 if (forks)
3423 perf_evlist__start_workload(evlist);
3424
3425 if (trace->opts.initial_delay) {
3426 usleep(trace->opts.initial_delay * 1000);
3427 evlist__enable(evlist);
3428 }
3429
3430 trace->multiple_threads = perf_thread_map__pid(evlist->core.threads, 0) == -1 ||
3431 evlist->core.threads->nr > 1 ||
3432 evlist__first(evlist)->core.attr.inherit;
3433
3434
3435
3436
3437
3438
3439
3440 evlist__for_each_entry(evlist, evsel) {
3441 if (evsel__has_callchain(evsel) &&
3442 evsel->core.attr.sample_max_stack == 0)
3443 evsel->core.attr.sample_max_stack = trace->max_stack;
3444 }
3445 again:
3446 before = trace->nr_events;
3447
3448 for (i = 0; i < evlist->core.nr_mmaps; i++) {
3449 union perf_event *event;
3450 struct mmap *md;
3451
3452 md = &evlist->mmap[i];
3453 if (perf_mmap__read_init(md) < 0)
3454 continue;
3455
3456 while ((event = perf_mmap__read_event(md)) != NULL) {
3457 ++trace->nr_events;
3458
3459 err = trace__deliver_event(trace, event);
3460 if (err)
3461 goto out_disable;
3462
3463 perf_mmap__consume(md);
3464
3465 if (interrupted)
3466 goto out_disable;
3467
3468 if (done && !draining) {
3469 evlist__disable(evlist);
3470 draining = true;
3471 }
3472 }
3473 perf_mmap__read_done(md);
3474 }
3475
3476 if (trace->nr_events == before) {
3477 int timeout = done ? 100 : -1;
3478
3479 if (!draining && evlist__poll(evlist, timeout) > 0) {
3480 if (evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0)
3481 draining = true;
3482
3483 goto again;
3484 } else {
3485 if (trace__flush_events(trace))
3486 goto out_disable;
3487 }
3488 } else {
3489 goto again;
3490 }
3491
3492 out_disable:
3493 thread__zput(trace->current);
3494
3495 evlist__disable(evlist);
3496
3497 if (trace->sort_events)
3498 ordered_events__flush(&trace->oe.data, OE_FLUSH__FINAL);
3499
3500 if (!err) {
3501 if (trace->summary)
3502 trace__fprintf_thread_summary(trace, trace->output);
3503
3504 if (trace->show_tool_stats) {
3505 fprintf(trace->output, "Stats:\n "
3506 " vfs_getname : %" PRIu64 "\n"
3507 " proc_getname: %" PRIu64 "\n",
3508 trace->stats.vfs_getname,
3509 trace->stats.proc_getname);
3510 }
3511 }
3512
3513 out_delete_evlist:
3514 trace__symbols__exit(trace);
3515
3516 evlist__delete(evlist);
3517 cgroup__put(trace->cgroup);
3518 trace->evlist = NULL;
3519 trace->live = false;
3520 return err;
3521 {
3522 char errbuf[BUFSIZ];
3523
3524 out_error_sched_stat_runtime:
3525 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
3526 goto out_error;
3527
3528 out_error_raw_syscalls:
3529 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
3530 goto out_error;
3531
3532 out_error_mmap:
3533 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
3534 goto out_error;
3535
3536 out_error_open:
3537 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
3538
3539 out_error:
3540 fprintf(trace->output, "%s\n", errbuf);
3541 goto out_delete_evlist;
3542
3543 out_error_apply_filters:
3544 fprintf(trace->output,
3545 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
3546 evsel->filter, perf_evsel__name(evsel), errno,
3547 str_error_r(errno, errbuf, sizeof(errbuf)));
3548 goto out_delete_evlist;
3549 }
3550 out_error_mem:
3551 fprintf(trace->output, "Not enough memory to run!\n");
3552 goto out_delete_evlist;
3553
3554 out_errno:
3555 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
3556 goto out_delete_evlist;
3557 }
3558
3559 static int trace__replay(struct trace *trace)
3560 {
3561 const struct evsel_str_handler handlers[] = {
3562 { "probe:vfs_getname", trace__vfs_getname, },
3563 };
3564 struct perf_data data = {
3565 .path = input_name,
3566 .mode = PERF_DATA_MODE_READ,
3567 .force = trace->force,
3568 };
3569 struct perf_session *session;
3570 struct evsel *evsel;
3571 int err = -1;
3572
3573 trace->tool.sample = trace__process_sample;
3574 trace->tool.mmap = perf_event__process_mmap;
3575 trace->tool.mmap2 = perf_event__process_mmap2;
3576 trace->tool.comm = perf_event__process_comm;
3577 trace->tool.exit = perf_event__process_exit;
3578 trace->tool.fork = perf_event__process_fork;
3579 trace->tool.attr = perf_event__process_attr;
3580 trace->tool.tracing_data = perf_event__process_tracing_data;
3581 trace->tool.build_id = perf_event__process_build_id;
3582 trace->tool.namespaces = perf_event__process_namespaces;
3583
3584 trace->tool.ordered_events = true;
3585 trace->tool.ordering_requires_timestamps = true;
3586
3587
3588 trace->multiple_threads = true;
3589
3590 session = perf_session__new(&data, false, &trace->tool);
3591 if (IS_ERR(session))
3592 return PTR_ERR(session);
3593
3594 if (trace->opts.target.pid)
3595 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
3596
3597 if (trace->opts.target.tid)
3598 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
3599
3600 if (symbol__init(&session->header.env) < 0)
3601 goto out;
3602
3603 trace->host = &session->machines.host;
3604
3605 err = perf_session__set_tracepoints_handlers(session, handlers);
3606 if (err)
3607 goto out;
3608
3609 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
3610 "raw_syscalls:sys_enter");
3611
3612 if (evsel == NULL)
3613 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
3614 "syscalls:sys_enter");
3615
3616 if (evsel &&
3617 (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 ||
3618 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
3619 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
3620 goto out;
3621 }
3622
3623 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
3624 "raw_syscalls:sys_exit");
3625 if (evsel == NULL)
3626 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
3627 "syscalls:sys_exit");
3628 if (evsel &&
3629 (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 ||
3630 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
3631 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
3632 goto out;
3633 }
3634
3635 evlist__for_each_entry(session->evlist, evsel) {
3636 if (evsel->core.attr.type == PERF_TYPE_SOFTWARE &&
3637 (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
3638 evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
3639 evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS))
3640 evsel->handler = trace__pgfault;
3641 }
3642
3643 setup_pager();
3644
3645 err = perf_session__process_events(session);
3646 if (err)
3647 pr_err("Failed to process events, error %d", err);
3648
3649 else if (trace->summary)
3650 trace__fprintf_thread_summary(trace, trace->output);
3651
3652 out:
3653 perf_session__delete(session);
3654
3655 return err;
3656 }
3657
3658 static size_t trace__fprintf_threads_header(FILE *fp)
3659 {
3660 size_t printed;
3661
3662 printed = fprintf(fp, "\n Summary of events:\n\n");
3663
3664 return printed;
3665 }
3666
3667 DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
3668 struct stats *stats;
3669 double msecs;
3670 int syscall;
3671 )
3672 {
3673 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
3674 struct stats *stats = source->priv;
3675
3676 entry->syscall = source->i;
3677 entry->stats = stats;
3678 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
3679 }
3680
3681 static size_t thread__dump_stats(struct thread_trace *ttrace,
3682 struct trace *trace, FILE *fp)
3683 {
3684 size_t printed = 0;
3685 struct syscall *sc;
3686 struct rb_node *nd;
3687 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
3688
3689 if (syscall_stats == NULL)
3690 return 0;
3691
3692 printed += fprintf(fp, "\n");
3693
3694 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
3695 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
3696 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
3697
3698 resort_rb__for_each_entry(nd, syscall_stats) {
3699 struct stats *stats = syscall_stats_entry->stats;
3700 if (stats) {
3701 double min = (double)(stats->min) / NSEC_PER_MSEC;
3702 double max = (double)(stats->max) / NSEC_PER_MSEC;
3703 double avg = avg_stats(stats);
3704 double pct;
3705 u64 n = (u64) stats->n;
3706
3707 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
3708 avg /= NSEC_PER_MSEC;
3709
3710 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
3711 printed += fprintf(fp, " %-15s", sc->name);
3712 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
3713 n, syscall_stats_entry->msecs, min, avg);
3714 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
3715 }
3716 }
3717
3718 resort_rb__delete(syscall_stats);
3719 printed += fprintf(fp, "\n\n");
3720
3721 return printed;
3722 }
3723
3724 static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
3725 {
3726 size_t printed = 0;
3727 struct thread_trace *ttrace = thread__priv(thread);
3728 double ratio;
3729
3730 if (ttrace == NULL)
3731 return 0;
3732
3733 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
3734
3735 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
3736 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
3737 printed += fprintf(fp, "%.1f%%", ratio);
3738 if (ttrace->pfmaj)
3739 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
3740 if (ttrace->pfmin)
3741 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
3742 if (trace->sched)
3743 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
3744 else if (fputc('\n', fp) != EOF)
3745 ++printed;
3746
3747 printed += thread__dump_stats(ttrace, trace, fp);
3748
3749 return printed;
3750 }
3751
3752 static unsigned long thread__nr_events(struct thread_trace *ttrace)
3753 {
3754 return ttrace ? ttrace->nr_events : 0;
3755 }
3756
3757 DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
3758 struct thread *thread;
3759 )
3760 {
3761 entry->thread = rb_entry(nd, struct thread, rb_node);
3762 }
3763
3764 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
3765 {
3766 size_t printed = trace__fprintf_threads_header(fp);
3767 struct rb_node *nd;
3768 int i;
3769
3770 for (i = 0; i < THREADS__TABLE_SIZE; i++) {
3771 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
3772
3773 if (threads == NULL) {
3774 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
3775 return 0;
3776 }
3777
3778 resort_rb__for_each_entry(nd, threads)
3779 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
3780
3781 resort_rb__delete(threads);
3782 }
3783 return printed;
3784 }
3785
3786 static int trace__set_duration(const struct option *opt, const char *str,
3787 int unset __maybe_unused)
3788 {
3789 struct trace *trace = opt->value;
3790
3791 trace->duration_filter = atof(str);
3792 return 0;
3793 }
3794
3795 static int trace__set_filter_pids_from_option(const struct option *opt, const char *str,
3796 int unset __maybe_unused)
3797 {
3798 int ret = -1;
3799 size_t i;
3800 struct trace *trace = opt->value;
3801
3802
3803
3804
3805 struct intlist *list = intlist__new(str);
3806
3807 if (list == NULL)
3808 return -1;
3809
3810 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
3811 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
3812
3813 if (trace->filter_pids.entries == NULL)
3814 goto out;
3815
3816 trace->filter_pids.entries[0] = getpid();
3817
3818 for (i = 1; i < trace->filter_pids.nr; ++i)
3819 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
3820
3821 intlist__delete(list);
3822 ret = 0;
3823 out:
3824 return ret;
3825 }
3826
3827 static int trace__open_output(struct trace *trace, const char *filename)
3828 {
3829 struct stat st;
3830
3831 if (!stat(filename, &st) && st.st_size) {
3832 char oldname[PATH_MAX];
3833
3834 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
3835 unlink(oldname);
3836 rename(filename, oldname);
3837 }
3838
3839 trace->output = fopen(filename, "w");
3840
3841 return trace->output == NULL ? -errno : 0;
3842 }
3843
3844 static int parse_pagefaults(const struct option *opt, const char *str,
3845 int unset __maybe_unused)
3846 {
3847 int *trace_pgfaults = opt->value;
3848
3849 if (strcmp(str, "all") == 0)
3850 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
3851 else if (strcmp(str, "maj") == 0)
3852 *trace_pgfaults |= TRACE_PFMAJ;
3853 else if (strcmp(str, "min") == 0)
3854 *trace_pgfaults |= TRACE_PFMIN;
3855 else
3856 return -1;
3857
3858 return 0;
3859 }
3860
3861 static void evlist__set_evsel_handler(struct evlist *evlist, void *handler)
3862 {
3863 struct evsel *evsel;
3864
3865 evlist__for_each_entry(evlist, evsel)
3866 evsel->handler = handler;
3867 }
3868
3869 static int evlist__set_syscall_tp_fields(struct evlist *evlist)
3870 {
3871 struct evsel *evsel;
3872
3873 evlist__for_each_entry(evlist, evsel) {
3874 if (evsel->priv || !evsel->tp_format)
3875 continue;
3876
3877 if (strcmp(evsel->tp_format->system, "syscalls"))
3878 continue;
3879
3880 if (perf_evsel__init_syscall_tp(evsel))
3881 return -1;
3882
3883 if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) {
3884 struct syscall_tp *sc = evsel->priv;
3885
3886 if (__tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)))
3887 return -1;
3888 } else if (!strncmp(evsel->tp_format->name, "sys_exit_", 9)) {
3889 struct syscall_tp *sc = evsel->priv;
3890
3891 if (__tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap))
3892 return -1;
3893 }
3894 }
3895
3896 return 0;
3897 }
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907 static int trace__parse_events_option(const struct option *opt, const char *str,
3908 int unset __maybe_unused)
3909 {
3910 struct trace *trace = (struct trace *)opt->value;
3911 const char *s = str;
3912 char *sep = NULL, *lists[2] = { NULL, NULL, };
3913 int len = strlen(str) + 1, err = -1, list, idx;
3914 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
3915 char group_name[PATH_MAX];
3916 struct syscall_fmt *fmt;
3917
3918 if (strace_groups_dir == NULL)
3919 return -1;
3920
3921 if (*s == '!') {
3922 ++s;
3923 trace->not_ev_qualifier = true;
3924 }
3925
3926 while (1) {
3927 if ((sep = strchr(s, ',')) != NULL)
3928 *sep = '\0';
3929
3930 list = 0;
3931 if (syscalltbl__id(trace->sctbl, s) >= 0 ||
3932 syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
3933 list = 1;
3934 goto do_concat;
3935 }
3936
3937 fmt = syscall_fmt__find_by_alias(s);
3938 if (fmt != NULL) {
3939 list = 1;
3940 s = fmt->name;
3941 } else {
3942 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
3943 if (access(group_name, R_OK) == 0)
3944 list = 1;
3945 }
3946 do_concat:
3947 if (lists[list]) {
3948 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
3949 } else {
3950 lists[list] = malloc(len);
3951 if (lists[list] == NULL)
3952 goto out;
3953 strcpy(lists[list], s);
3954 }
3955
3956 if (!sep)
3957 break;
3958
3959 *sep = ',';
3960 s = sep + 1;
3961 }
3962
3963 if (lists[1] != NULL) {
3964 struct strlist_config slist_config = {
3965 .dirname = strace_groups_dir,
3966 };
3967
3968 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
3969 if (trace->ev_qualifier == NULL) {
3970 fputs("Not enough memory to parse event qualifier", trace->output);
3971 goto out;
3972 }
3973
3974 if (trace__validate_ev_qualifier(trace))
3975 goto out;
3976 trace->trace_syscalls = true;
3977 }
3978
3979 err = 0;
3980
3981 if (lists[0]) {
3982 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
3983 "event selector. use 'perf list' to list available events",
3984 parse_events_option);
3985 err = parse_events_option(&o, lists[0], 0);
3986 }
3987 out:
3988 if (sep)
3989 *sep = ',';
3990
3991 return err;
3992 }
3993
3994 static int trace__parse_cgroups(const struct option *opt, const char *str, int unset)
3995 {
3996 struct trace *trace = opt->value;
3997
3998 if (!list_empty(&trace->evlist->core.entries))
3999 return parse_cgroups(opt, str, unset);
4000
4001 trace->cgroup = evlist__findnew_cgroup(trace->evlist, str);
4002
4003 return 0;
4004 }
4005
4006 static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name)
4007 {
4008 if (trace->bpf_obj == NULL)
4009 return NULL;
4010
4011 return bpf_object__find_map_by_name(trace->bpf_obj, name);
4012 }
4013
4014 static void trace__set_bpf_map_filtered_pids(struct trace *trace)
4015 {
4016 trace->filter_pids.map = trace__find_bpf_map_by_name(trace, "pids_filtered");
4017 }
4018
4019 static void trace__set_bpf_map_syscalls(struct trace *trace)
4020 {
4021 trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls");
4022 trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter");
4023 trace->syscalls.prog_array.sys_exit = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit");
4024 }
4025
4026 static int trace__config(const char *var, const char *value, void *arg)
4027 {
4028 struct trace *trace = arg;
4029 int err = 0;
4030
4031 if (!strcmp(var, "trace.add_events")) {
4032 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
4033 "event selector. use 'perf list' to list available events",
4034 parse_events_option);
4035
4036
4037
4038
4039 if (parse_events_option(&o, value, 0))
4040 err = -1;
4041 } else if (!strcmp(var, "trace.show_timestamp")) {
4042 trace->show_tstamp = perf_config_bool(var, value);
4043 } else if (!strcmp(var, "trace.show_duration")) {
4044 trace->show_duration = perf_config_bool(var, value);
4045 } else if (!strcmp(var, "trace.show_arg_names")) {
4046 trace->show_arg_names = perf_config_bool(var, value);
4047 if (!trace->show_arg_names)
4048 trace->show_zeros = true;
4049 } else if (!strcmp(var, "trace.show_zeros")) {
4050 bool new_show_zeros = perf_config_bool(var, value);
4051 if (!trace->show_arg_names && !new_show_zeros) {
4052 pr_warning("trace.show_zeros has to be set when trace.show_arg_names=no\n");
4053 goto out;
4054 }
4055 trace->show_zeros = new_show_zeros;
4056 } else if (!strcmp(var, "trace.show_prefix")) {
4057 trace->show_string_prefix = perf_config_bool(var, value);
4058 } else if (!strcmp(var, "trace.no_inherit")) {
4059 trace->opts.no_inherit = perf_config_bool(var, value);
4060 } else if (!strcmp(var, "trace.args_alignment")) {
4061 int args_alignment = 0;
4062 if (perf_config_int(&args_alignment, var, value) == 0)
4063 trace->args_alignment = args_alignment;
4064 }
4065 out:
4066 return err;
4067 }
4068
4069 int cmd_trace(int argc, const char **argv)
4070 {
4071 const char *trace_usage[] = {
4072 "perf trace [<options>] [<command>]",
4073 "perf trace [<options>] -- <command> [<options>]",
4074 "perf trace record [<options>] [<command>]",
4075 "perf trace record [<options>] -- <command> [<options>]",
4076 NULL
4077 };
4078 struct trace trace = {
4079 .opts = {
4080 .target = {
4081 .uid = UINT_MAX,
4082 .uses_mmap = true,
4083 },
4084 .user_freq = UINT_MAX,
4085 .user_interval = ULLONG_MAX,
4086 .no_buffering = true,
4087 .mmap_pages = UINT_MAX,
4088 },
4089 .output = stderr,
4090 .show_comm = true,
4091 .show_tstamp = true,
4092 .show_duration = true,
4093 .show_arg_names = true,
4094 .args_alignment = 70,
4095 .trace_syscalls = false,
4096 .kernel_syscallchains = false,
4097 .max_stack = UINT_MAX,
4098 .max_events = ULONG_MAX,
4099 };
4100 const char *map_dump_str = NULL;
4101 const char *output_name = NULL;
4102 const struct option trace_options[] = {
4103 OPT_CALLBACK('e', "event", &trace, "event",
4104 "event/syscall selector. use 'perf list' to list available events",
4105 trace__parse_events_option),
4106 OPT_BOOLEAN(0, "comm", &trace.show_comm,
4107 "show the thread COMM next to its id"),
4108 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
4109 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
4110 trace__parse_events_option),
4111 OPT_STRING('o', "output", &output_name, "file", "output file name"),
4112 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
4113 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
4114 "trace events on existing process id"),
4115 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
4116 "trace events on existing thread id"),
4117 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
4118 "pids to filter (by the kernel)", trace__set_filter_pids_from_option),
4119 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
4120 "system-wide collection from all CPUs"),
4121 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
4122 "list of cpus to monitor"),
4123 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
4124 "child tasks do not inherit counters"),
4125 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
4126 "number of mmap data pages",
4127 perf_evlist__parse_mmap_pages),
4128 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
4129 "user to profile"),
4130 OPT_CALLBACK(0, "duration", &trace, "float",
4131 "show only events with duration > N.M ms",
4132 trace__set_duration),
4133 #ifdef HAVE_LIBBPF_SUPPORT
4134 OPT_STRING(0, "map-dump", &map_dump_str, "BPF map", "BPF map to periodically dump"),
4135 #endif
4136 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
4137 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4138 OPT_BOOLEAN('T', "time", &trace.full_time,
4139 "Show full timestamp, not time relative to first start"),
4140 OPT_BOOLEAN(0, "failure", &trace.failure_only,
4141 "Show only syscalls that failed"),
4142 OPT_BOOLEAN('s', "summary", &trace.summary_only,
4143 "Show only syscall summary with statistics"),
4144 OPT_BOOLEAN('S', "with-summary", &trace.summary,
4145 "Show all syscalls and summary with statistics"),
4146 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
4147 "Trace pagefaults", parse_pagefaults, "maj"),
4148 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
4149 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
4150 OPT_CALLBACK(0, "call-graph", &trace.opts,
4151 "record_mode[,record_size]", record_callchain_help,
4152 &record_parse_callchain_opt),
4153 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
4154 "Show the kernel callchains on the syscall exit path"),
4155 OPT_ULONG(0, "max-events", &trace.max_events,
4156 "Set the maximum number of events to print, exit after that is reached. "),
4157 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
4158 "Set the minimum stack depth when parsing the callchain, "
4159 "anything below the specified depth will be ignored."),
4160 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
4161 "Set the maximum stack depth when parsing the callchain, "
4162 "anything beyond the specified depth will be ignored. "
4163 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
4164 OPT_BOOLEAN(0, "sort-events", &trace.sort_events,
4165 "Sort batch of events before processing, use if getting out of order events"),
4166 OPT_BOOLEAN(0, "print-sample", &trace.print_sample,
4167 "print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"),
4168 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
4169 "per thread proc mmap processing timeout in ms"),
4170 OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
4171 trace__parse_cgroups),
4172 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
4173 "ms to wait before starting measurement after program "
4174 "start"),
4175 OPTS_EVSWITCH(&trace.evswitch),
4176 OPT_END()
4177 };
4178 bool __maybe_unused max_stack_user_set = true;
4179 bool mmap_pages_user_set = true;
4180 struct evsel *evsel;
4181 const char * const trace_subcommands[] = { "record", NULL };
4182 int err = -1;
4183 char bf[BUFSIZ];
4184
4185 signal(SIGSEGV, sighandler_dump_stack);
4186 signal(SIGFPE, sighandler_dump_stack);
4187
4188 trace.evlist = evlist__new();
4189 trace.sctbl = syscalltbl__new();
4190
4191 if (trace.evlist == NULL || trace.sctbl == NULL) {
4192 pr_err("Not enough memory to run!\n");
4193 err = -ENOMEM;
4194 goto out;
4195 }
4196
4197
4198
4199
4200
4201
4202
4203
4204 rlimit__bump_memlock();
4205
4206 err = perf_config(trace__config, &trace);
4207 if (err)
4208 goto out;
4209
4210 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
4211 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
4212
4213 if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) {
4214 usage_with_options_msg(trace_usage, trace_options,
4215 "cgroup monitoring only available in system-wide mode");
4216 }
4217
4218 evsel = bpf__setup_output_event(trace.evlist, "__augmented_syscalls__");
4219 if (IS_ERR(evsel)) {
4220 bpf__strerror_setup_output_event(trace.evlist, PTR_ERR(evsel), bf, sizeof(bf));
4221 pr_err("ERROR: Setup trace syscalls enter failed: %s\n", bf);
4222 goto out;
4223 }
4224
4225 if (evsel) {
4226 trace.syscalls.events.augmented = evsel;
4227
4228 evsel = perf_evlist__find_tracepoint_by_name(trace.evlist, "raw_syscalls:sys_enter");
4229 if (evsel == NULL) {
4230 pr_err("ERROR: raw_syscalls:sys_enter not found in the augmented BPF object\n");
4231 goto out;
4232 }
4233
4234 if (evsel->bpf_obj == NULL) {
4235 pr_err("ERROR: raw_syscalls:sys_enter not associated to a BPF object\n");
4236 goto out;
4237 }
4238
4239 trace.bpf_obj = evsel->bpf_obj;
4240
4241 trace__set_bpf_map_filtered_pids(&trace);
4242 trace__set_bpf_map_syscalls(&trace);
4243 trace.syscalls.unaugmented_prog = trace__find_bpf_program_by_title(&trace, "!raw_syscalls:unaugmented");
4244 }
4245
4246 err = bpf__setup_stdout(trace.evlist);
4247 if (err) {
4248 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
4249 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
4250 goto out;
4251 }
4252
4253 err = -1;
4254
4255 if (map_dump_str) {
4256 trace.dump.map = trace__find_bpf_map_by_name(&trace, map_dump_str);
4257 if (trace.dump.map == NULL) {
4258 pr_err("ERROR: BPF map \"%s\" not found\n", map_dump_str);
4259 goto out;
4260 }
4261 }
4262
4263 if (trace.trace_pgfaults) {
4264 trace.opts.sample_address = true;
4265 trace.opts.sample_time = true;
4266 }
4267
4268 if (trace.opts.mmap_pages == UINT_MAX)
4269 mmap_pages_user_set = false;
4270
4271 if (trace.max_stack == UINT_MAX) {
4272 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl__max_stack();
4273 max_stack_user_set = false;
4274 }
4275
4276 #ifdef HAVE_DWARF_UNWIND_SUPPORT
4277 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) {
4278 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
4279 }
4280 #endif
4281
4282 if (callchain_param.enabled) {
4283 if (!mmap_pages_user_set && geteuid() == 0)
4284 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
4285
4286 symbol_conf.use_callchain = true;
4287 }
4288
4289 if (trace.evlist->core.nr_entries > 0) {
4290 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
4291 if (evlist__set_syscall_tp_fields(trace.evlist)) {
4292 perror("failed to set syscalls:* tracepoint fields");
4293 goto out;
4294 }
4295 }
4296
4297 if (trace.sort_events) {
4298 ordered_events__init(&trace.oe.data, ordered_events__deliver_event, &trace);
4299 ordered_events__set_copy_on_queue(&trace.oe.data, true);
4300 }
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313 if (trace.syscalls.events.augmented) {
4314 evlist__for_each_entry(trace.evlist, evsel) {
4315 bool raw_syscalls_sys_exit = strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
4316
4317 if (raw_syscalls_sys_exit) {
4318 trace.raw_augmented_syscalls = true;
4319 goto init_augmented_syscall_tp;
4320 }
4321
4322 if (trace.syscalls.events.augmented->priv == NULL &&
4323 strstr(perf_evsel__name(evsel), "syscalls:sys_enter")) {
4324 struct evsel *augmented = trace.syscalls.events.augmented;
4325 if (perf_evsel__init_augmented_syscall_tp(augmented, evsel) ||
4326 perf_evsel__init_augmented_syscall_tp_args(augmented))
4327 goto out;
4328
4329
4330
4331
4332
4333 augmented->handler = trace__sys_enter;
4334
4335
4336
4337
4338
4339
4340 if (perf_evsel__init_augmented_syscall_tp(evsel, evsel) ||
4341 perf_evsel__init_augmented_syscall_tp_args(evsel))
4342 goto out;
4343 evsel->handler = trace__sys_enter;
4344 }
4345
4346 if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) {
4347 struct syscall_tp *sc;
4348 init_augmented_syscall_tp:
4349 if (perf_evsel__init_augmented_syscall_tp(evsel, evsel))
4350 goto out;
4351 sc = evsel->priv;
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371 if (trace.raw_augmented_syscalls)
4372 trace.raw_augmented_syscalls_args_size = (6 + 1) * sizeof(long) + sc->id.offset;
4373 perf_evsel__init_augmented_syscall_tp_ret(evsel);
4374 evsel->handler = trace__sys_exit;
4375 }
4376 }
4377 }
4378
4379 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
4380 return trace__record(&trace, argc-1, &argv[1]);
4381
4382
4383 if (trace.summary_only)
4384 trace.summary = trace.summary_only;
4385
4386 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
4387 trace.evlist->core.nr_entries == 0 ) {
4388 trace.trace_syscalls = true;
4389 }
4390
4391 if (output_name != NULL) {
4392 err = trace__open_output(&trace, output_name);
4393 if (err < 0) {
4394 perror("failed to create output file");
4395 goto out;
4396 }
4397 }
4398
4399 err = evswitch__init(&trace.evswitch, trace.evlist, stderr);
4400 if (err)
4401 goto out_close;
4402
4403 err = target__validate(&trace.opts.target);
4404 if (err) {
4405 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
4406 fprintf(trace.output, "%s", bf);
4407 goto out_close;
4408 }
4409
4410 err = target__parse_uid(&trace.opts.target);
4411 if (err) {
4412 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
4413 fprintf(trace.output, "%s", bf);
4414 goto out_close;
4415 }
4416
4417 if (!argc && target__none(&trace.opts.target))
4418 trace.opts.target.system_wide = true;
4419
4420 if (input_name)
4421 err = trace__replay(&trace);
4422 else
4423 err = trace__run(&trace, argc, argv);
4424
4425 out_close:
4426 if (output_name != NULL)
4427 fclose(trace.output);
4428 out:
4429 return err;
4430 }