1/*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc.  Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
19#include <traceevent/event-parse.h>
20#include <api/fs/tracing_path.h>
21#include "builtin.h"
22#include "util/color.h"
23#include "util/debug.h"
24#include "util/evlist.h"
25#include "util/exec_cmd.h"
26#include "util/machine.h"
27#include "util/session.h"
28#include "util/thread.h"
29#include "util/parse-options.h"
30#include "util/strlist.h"
31#include "util/intlist.h"
32#include "util/thread_map.h"
33#include "util/stat.h"
34#include "trace-event.h"
35#include "util/parse-events.h"
36
37#include <libaudit.h>
38#include <stdlib.h>
39#include <sys/mman.h>
40#include <linux/futex.h>
41#include <linux/err.h>
42
43/* For older distros: */
44#ifndef MAP_STACK
45# define MAP_STACK		0x20000
46#endif
47
48#ifndef MADV_HWPOISON
49# define MADV_HWPOISON		100
50
51#endif
52
53#ifndef MADV_MERGEABLE
54# define MADV_MERGEABLE		12
55#endif
56
57#ifndef MADV_UNMERGEABLE
58# define MADV_UNMERGEABLE	13
59#endif
60
61#ifndef EFD_SEMAPHORE
62# define EFD_SEMAPHORE		1
63#endif
64
65#ifndef EFD_NONBLOCK
66# define EFD_NONBLOCK		00004000
67#endif
68
69#ifndef EFD_CLOEXEC
70# define EFD_CLOEXEC		02000000
71#endif
72
73#ifndef O_CLOEXEC
74# define O_CLOEXEC		02000000
75#endif
76
77#ifndef SOCK_DCCP
78# define SOCK_DCCP		6
79#endif
80
81#ifndef SOCK_CLOEXEC
82# define SOCK_CLOEXEC		02000000
83#endif
84
85#ifndef SOCK_NONBLOCK
86# define SOCK_NONBLOCK		00004000
87#endif
88
89#ifndef MSG_CMSG_CLOEXEC
90# define MSG_CMSG_CLOEXEC	0x40000000
91#endif
92
93#ifndef PERF_FLAG_FD_NO_GROUP
94# define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
95#endif
96
97#ifndef PERF_FLAG_FD_OUTPUT
98# define PERF_FLAG_FD_OUTPUT		(1UL << 1)
99#endif
100
101#ifndef PERF_FLAG_PID_CGROUP
102# define PERF_FLAG_PID_CGROUP		(1UL << 2) /* pid=cgroup id, per-cpu mode only */
103#endif
104
105#ifndef PERF_FLAG_FD_CLOEXEC
106# define PERF_FLAG_FD_CLOEXEC		(1UL << 3) /* O_CLOEXEC */
107#endif
108
109
110struct tp_field {
111	int offset;
112	union {
113		u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
114		void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
115	};
116};
117
118#define TP_UINT_FIELD(bits) \
119static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
120{ \
121	u##bits value; \
122	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
123	return value;  \
124}
125
126TP_UINT_FIELD(8);
127TP_UINT_FIELD(16);
128TP_UINT_FIELD(32);
129TP_UINT_FIELD(64);
130
131#define TP_UINT_FIELD__SWAPPED(bits) \
132static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
133{ \
134	u##bits value; \
135	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
136	return bswap_##bits(value);\
137}
138
139TP_UINT_FIELD__SWAPPED(16);
140TP_UINT_FIELD__SWAPPED(32);
141TP_UINT_FIELD__SWAPPED(64);
142
143static int tp_field__init_uint(struct tp_field *field,
144			       struct format_field *format_field,
145			       bool needs_swap)
146{
147	field->offset = format_field->offset;
148
149	switch (format_field->size) {
150	case 1:
151		field->integer = tp_field__u8;
152		break;
153	case 2:
154		field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
155		break;
156	case 4:
157		field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
158		break;
159	case 8:
160		field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
161		break;
162	default:
163		return -1;
164	}
165
166	return 0;
167}
168
169static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
170{
171	return sample->raw_data + field->offset;
172}
173
174static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
175{
176	field->offset = format_field->offset;
177	field->pointer = tp_field__ptr;
178	return 0;
179}
180
181struct syscall_tp {
182	struct tp_field id;
183	union {
184		struct tp_field args, ret;
185	};
186};
187
188static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
189					  struct tp_field *field,
190					  const char *name)
191{
192	struct format_field *format_field = perf_evsel__field(evsel, name);
193
194	if (format_field == NULL)
195		return -1;
196
197	return tp_field__init_uint(field, format_field, evsel->needs_swap);
198}
199
200#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
201	({ struct syscall_tp *sc = evsel->priv;\
202	   perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
203
204static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
205					 struct tp_field *field,
206					 const char *name)
207{
208	struct format_field *format_field = perf_evsel__field(evsel, name);
209
210	if (format_field == NULL)
211		return -1;
212
213	return tp_field__init_ptr(field, format_field);
214}
215
216#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
217	({ struct syscall_tp *sc = evsel->priv;\
218	   perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
219
220static void perf_evsel__delete_priv(struct perf_evsel *evsel)
221{
222	zfree(&evsel->priv);
223	perf_evsel__delete(evsel);
224}
225
226static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
227{
228	evsel->priv = malloc(sizeof(struct syscall_tp));
229	if (evsel->priv != NULL) {
230		if (perf_evsel__init_sc_tp_uint_field(evsel, id))
231			goto out_delete;
232
233		evsel->handler = handler;
234		return 0;
235	}
236
237	return -ENOMEM;
238
239out_delete:
240	zfree(&evsel->priv);
241	return -ENOENT;
242}
243
244static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
245{
246	struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
247
248	/* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
249	if (IS_ERR(evsel))
250		evsel = perf_evsel__newtp("syscalls", direction);
251
252	if (IS_ERR(evsel))
253		return NULL;
254
255	if (perf_evsel__init_syscall_tp(evsel, handler))
256		goto out_delete;
257
258	return evsel;
259
260out_delete:
261	perf_evsel__delete_priv(evsel);
262	return NULL;
263}
264
265#define perf_evsel__sc_tp_uint(evsel, name, sample) \
266	({ struct syscall_tp *fields = evsel->priv; \
267	   fields->name.integer(&fields->name, sample); })
268
269#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
270	({ struct syscall_tp *fields = evsel->priv; \
271	   fields->name.pointer(&fields->name, sample); })
272
273struct syscall_arg {
274	unsigned long val;
275	struct thread *thread;
276	struct trace  *trace;
277	void	      *parm;
278	u8	      idx;
279	u8	      mask;
280};
281
282struct strarray {
283	int	    offset;
284	int	    nr_entries;
285	const char **entries;
286};
287
288#define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
289	.nr_entries = ARRAY_SIZE(array), \
290	.entries = array, \
291}
292
293#define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
294	.offset	    = off, \
295	.nr_entries = ARRAY_SIZE(array), \
296	.entries = array, \
297}
298
299static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
300						const char *intfmt,
301					        struct syscall_arg *arg)
302{
303	struct strarray *sa = arg->parm;
304	int idx = arg->val - sa->offset;
305
306	if (idx < 0 || idx >= sa->nr_entries)
307		return scnprintf(bf, size, intfmt, arg->val);
308
309	return scnprintf(bf, size, "%s", sa->entries[idx]);
310}
311
312static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
313					      struct syscall_arg *arg)
314{
315	return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
316}
317
318#define SCA_STRARRAY syscall_arg__scnprintf_strarray
319
320#if defined(__i386__) || defined(__x86_64__)
321/*
322 * FIXME: Make this available to all arches as soon as the ioctl beautifier
323 * 	  gets rewritten to support all arches.
324 */
325static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
326						 struct syscall_arg *arg)
327{
328	return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
329}
330
331#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
332#endif /* defined(__i386__) || defined(__x86_64__) */
333
334static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
335					struct syscall_arg *arg);
336
337#define SCA_FD syscall_arg__scnprintf_fd
338
339static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
340					   struct syscall_arg *arg)
341{
342	int fd = arg->val;
343
344	if (fd == AT_FDCWD)
345		return scnprintf(bf, size, "CWD");
346
347	return syscall_arg__scnprintf_fd(bf, size, arg);
348}
349
350#define SCA_FDAT syscall_arg__scnprintf_fd_at
351
352static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
353					      struct syscall_arg *arg);
354
355#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
356
357static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
358					 struct syscall_arg *arg)
359{
360	return scnprintf(bf, size, "%#lx", arg->val);
361}
362
363#define SCA_HEX syscall_arg__scnprintf_hex
364
365static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
366					 struct syscall_arg *arg)
367{
368	return scnprintf(bf, size, "%d", arg->val);
369}
370
371#define SCA_INT syscall_arg__scnprintf_int
372
373static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
374					       struct syscall_arg *arg)
375{
376	int printed = 0, prot = arg->val;
377
378	if (prot == PROT_NONE)
379		return scnprintf(bf, size, "NONE");
380#define	P_MMAP_PROT(n) \
381	if (prot & PROT_##n) { \
382		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
383		prot &= ~PROT_##n; \
384	}
385
386	P_MMAP_PROT(EXEC);
387	P_MMAP_PROT(READ);
388	P_MMAP_PROT(WRITE);
389#ifdef PROT_SEM
390	P_MMAP_PROT(SEM);
391#endif
392	P_MMAP_PROT(GROWSDOWN);
393	P_MMAP_PROT(GROWSUP);
394#undef P_MMAP_PROT
395
396	if (prot)
397		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
398
399	return printed;
400}
401
402#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
403
404static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
405						struct syscall_arg *arg)
406{
407	int printed = 0, flags = arg->val;
408
409#define	P_MMAP_FLAG(n) \
410	if (flags & MAP_##n) { \
411		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
412		flags &= ~MAP_##n; \
413	}
414
415	P_MMAP_FLAG(SHARED);
416	P_MMAP_FLAG(PRIVATE);
417#ifdef MAP_32BIT
418	P_MMAP_FLAG(32BIT);
419#endif
420	P_MMAP_FLAG(ANONYMOUS);
421	P_MMAP_FLAG(DENYWRITE);
422	P_MMAP_FLAG(EXECUTABLE);
423	P_MMAP_FLAG(FILE);
424	P_MMAP_FLAG(FIXED);
425	P_MMAP_FLAG(GROWSDOWN);
426#ifdef MAP_HUGETLB
427	P_MMAP_FLAG(HUGETLB);
428#endif
429	P_MMAP_FLAG(LOCKED);
430	P_MMAP_FLAG(NONBLOCK);
431	P_MMAP_FLAG(NORESERVE);
432	P_MMAP_FLAG(POPULATE);
433	P_MMAP_FLAG(STACK);
434#ifdef MAP_UNINITIALIZED
435	P_MMAP_FLAG(UNINITIALIZED);
436#endif
437#undef P_MMAP_FLAG
438
439	if (flags)
440		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
441
442	return printed;
443}
444
445#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
446
447static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
448						  struct syscall_arg *arg)
449{
450	int printed = 0, flags = arg->val;
451
452#define P_MREMAP_FLAG(n) \
453	if (flags & MREMAP_##n) { \
454		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
455		flags &= ~MREMAP_##n; \
456	}
457
458	P_MREMAP_FLAG(MAYMOVE);
459#ifdef MREMAP_FIXED
460	P_MREMAP_FLAG(FIXED);
461#endif
462#undef P_MREMAP_FLAG
463
464	if (flags)
465		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
466
467	return printed;
468}
469
470#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
471
472static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
473						      struct syscall_arg *arg)
474{
475	int behavior = arg->val;
476
477	switch (behavior) {
478#define	P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
479	P_MADV_BHV(NORMAL);
480	P_MADV_BHV(RANDOM);
481	P_MADV_BHV(SEQUENTIAL);
482	P_MADV_BHV(WILLNEED);
483	P_MADV_BHV(DONTNEED);
484	P_MADV_BHV(REMOVE);
485	P_MADV_BHV(DONTFORK);
486	P_MADV_BHV(DOFORK);
487	P_MADV_BHV(HWPOISON);
488#ifdef MADV_SOFT_OFFLINE
489	P_MADV_BHV(SOFT_OFFLINE);
490#endif
491	P_MADV_BHV(MERGEABLE);
492	P_MADV_BHV(UNMERGEABLE);
493#ifdef MADV_HUGEPAGE
494	P_MADV_BHV(HUGEPAGE);
495#endif
496#ifdef MADV_NOHUGEPAGE
497	P_MADV_BHV(NOHUGEPAGE);
498#endif
499#ifdef MADV_DONTDUMP
500	P_MADV_BHV(DONTDUMP);
501#endif
502#ifdef MADV_DODUMP
503	P_MADV_BHV(DODUMP);
504#endif
505#undef P_MADV_PHV
506	default: break;
507	}
508
509	return scnprintf(bf, size, "%#x", behavior);
510}
511
512#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
513
514static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
515					   struct syscall_arg *arg)
516{
517	int printed = 0, op = arg->val;
518
519	if (op == 0)
520		return scnprintf(bf, size, "NONE");
521#define	P_CMD(cmd) \
522	if ((op & LOCK_##cmd) == LOCK_##cmd) { \
523		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
524		op &= ~LOCK_##cmd; \
525	}
526
527	P_CMD(SH);
528	P_CMD(EX);
529	P_CMD(NB);
530	P_CMD(UN);
531	P_CMD(MAND);
532	P_CMD(RW);
533	P_CMD(READ);
534	P_CMD(WRITE);
535#undef P_OP
536
537	if (op)
538		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
539
540	return printed;
541}
542
543#define SCA_FLOCK syscall_arg__scnprintf_flock
544
545static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
546{
547	enum syscall_futex_args {
548		SCF_UADDR   = (1 << 0),
549		SCF_OP	    = (1 << 1),
550		SCF_VAL	    = (1 << 2),
551		SCF_TIMEOUT = (1 << 3),
552		SCF_UADDR2  = (1 << 4),
553		SCF_VAL3    = (1 << 5),
554	};
555	int op = arg->val;
556	int cmd = op & FUTEX_CMD_MASK;
557	size_t printed = 0;
558
559	switch (cmd) {
560#define	P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
561	P_FUTEX_OP(WAIT);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
562	P_FUTEX_OP(WAKE);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
563	P_FUTEX_OP(FD);		    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
564	P_FUTEX_OP(REQUEUE);	    arg->mask |= SCF_VAL3|SCF_TIMEOUT;	          break;
565	P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;			  break;
566	P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;			  break;
567	P_FUTEX_OP(WAKE_OP);							  break;
568	P_FUTEX_OP(LOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
569	P_FUTEX_OP(UNLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
570	P_FUTEX_OP(TRYLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
571	P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;			  break;
572	P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;			  break;
573	P_FUTEX_OP(WAIT_REQUEUE_PI);						  break;
574	default: printed = scnprintf(bf, size, "%#x", cmd);			  break;
575	}
576
577	if (op & FUTEX_PRIVATE_FLAG)
578		printed += scnprintf(bf + printed, size - printed, "|PRIV");
579
580	if (op & FUTEX_CLOCK_REALTIME)
581		printed += scnprintf(bf + printed, size - printed, "|CLKRT");
582
583	return printed;
584}
585
586#define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
587
588static const char *bpf_cmd[] = {
589	"MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
590	"MAP_GET_NEXT_KEY", "PROG_LOAD",
591};
592static DEFINE_STRARRAY(bpf_cmd);
593
594static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
595static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
596
597static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
598static DEFINE_STRARRAY(itimers);
599
600static const char *keyctl_options[] = {
601	"GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
602	"SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
603	"INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
604	"ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
605	"INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
606};
607static DEFINE_STRARRAY(keyctl_options);
608
609static const char *whences[] = { "SET", "CUR", "END",
610#ifdef SEEK_DATA
611"DATA",
612#endif
613#ifdef SEEK_HOLE
614"HOLE",
615#endif
616};
617static DEFINE_STRARRAY(whences);
618
619static const char *fcntl_cmds[] = {
620	"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
621	"SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
622	"F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
623	"F_GETOWNER_UIDS",
624};
625static DEFINE_STRARRAY(fcntl_cmds);
626
627static const char *rlimit_resources[] = {
628	"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
629	"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
630	"RTTIME",
631};
632static DEFINE_STRARRAY(rlimit_resources);
633
634static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
635static DEFINE_STRARRAY(sighow);
636
637static const char *clockid[] = {
638	"REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
639	"MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
640	"REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
641};
642static DEFINE_STRARRAY(clockid);
643
644static const char *socket_families[] = {
645	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
646	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
647	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
648	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
649	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
650	"ALG", "NFC", "VSOCK",
651};
652static DEFINE_STRARRAY(socket_families);
653
654#ifndef SOCK_TYPE_MASK
655#define SOCK_TYPE_MASK 0xf
656#endif
657
658static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
659						      struct syscall_arg *arg)
660{
661	size_t printed;
662	int type = arg->val,
663	    flags = type & ~SOCK_TYPE_MASK;
664
665	type &= SOCK_TYPE_MASK;
666	/*
667 	 * Can't use a strarray, MIPS may override for ABI reasons.
668 	 */
669	switch (type) {
670#define	P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
671	P_SK_TYPE(STREAM);
672	P_SK_TYPE(DGRAM);
673	P_SK_TYPE(RAW);
674	P_SK_TYPE(RDM);
675	P_SK_TYPE(SEQPACKET);
676	P_SK_TYPE(DCCP);
677	P_SK_TYPE(PACKET);
678#undef P_SK_TYPE
679	default:
680		printed = scnprintf(bf, size, "%#x", type);
681	}
682
683#define	P_SK_FLAG(n) \
684	if (flags & SOCK_##n) { \
685		printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
686		flags &= ~SOCK_##n; \
687	}
688
689	P_SK_FLAG(CLOEXEC);
690	P_SK_FLAG(NONBLOCK);
691#undef P_SK_FLAG
692
693	if (flags)
694		printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
695
696	return printed;
697}
698
699#define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
700
701#ifndef MSG_PROBE
702#define MSG_PROBE	     0x10
703#endif
704#ifndef MSG_WAITFORONE
705#define MSG_WAITFORONE	0x10000
706#endif
707#ifndef MSG_SENDPAGE_NOTLAST
708#define MSG_SENDPAGE_NOTLAST 0x20000
709#endif
710#ifndef MSG_FASTOPEN
711#define MSG_FASTOPEN	     0x20000000
712#endif
713
714static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
715					       struct syscall_arg *arg)
716{
717	int printed = 0, flags = arg->val;
718
719	if (flags == 0)
720		return scnprintf(bf, size, "NONE");
721#define	P_MSG_FLAG(n) \
722	if (flags & MSG_##n) { \
723		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
724		flags &= ~MSG_##n; \
725	}
726
727	P_MSG_FLAG(OOB);
728	P_MSG_FLAG(PEEK);
729	P_MSG_FLAG(DONTROUTE);
730	P_MSG_FLAG(TRYHARD);
731	P_MSG_FLAG(CTRUNC);
732	P_MSG_FLAG(PROBE);
733	P_MSG_FLAG(TRUNC);
734	P_MSG_FLAG(DONTWAIT);
735	P_MSG_FLAG(EOR);
736	P_MSG_FLAG(WAITALL);
737	P_MSG_FLAG(FIN);
738	P_MSG_FLAG(SYN);
739	P_MSG_FLAG(CONFIRM);
740	P_MSG_FLAG(RST);
741	P_MSG_FLAG(ERRQUEUE);
742	P_MSG_FLAG(NOSIGNAL);
743	P_MSG_FLAG(MORE);
744	P_MSG_FLAG(WAITFORONE);
745	P_MSG_FLAG(SENDPAGE_NOTLAST);
746	P_MSG_FLAG(FASTOPEN);
747	P_MSG_FLAG(CMSG_CLOEXEC);
748#undef P_MSG_FLAG
749
750	if (flags)
751		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
752
753	return printed;
754}
755
756#define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
757
758static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
759						 struct syscall_arg *arg)
760{
761	size_t printed = 0;
762	int mode = arg->val;
763
764	if (mode == F_OK) /* 0 */
765		return scnprintf(bf, size, "F");
766#define	P_MODE(n) \
767	if (mode & n##_OK) { \
768		printed += scnprintf(bf + printed, size - printed, "%s", #n); \
769		mode &= ~n##_OK; \
770	}
771
772	P_MODE(R);
773	P_MODE(W);
774	P_MODE(X);
775#undef P_MODE
776
777	if (mode)
778		printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
779
780	return printed;
781}
782
783#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
784
785static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
786					      struct syscall_arg *arg);
787
788#define SCA_FILENAME syscall_arg__scnprintf_filename
789
790static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
791					       struct syscall_arg *arg)
792{
793	int printed = 0, flags = arg->val;
794
795	if (!(flags & O_CREAT))
796		arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
797
798	if (flags == 0)
799		return scnprintf(bf, size, "RDONLY");
800#define	P_FLAG(n) \
801	if (flags & O_##n) { \
802		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
803		flags &= ~O_##n; \
804	}
805
806	P_FLAG(APPEND);
807	P_FLAG(ASYNC);
808	P_FLAG(CLOEXEC);
809	P_FLAG(CREAT);
810	P_FLAG(DIRECT);
811	P_FLAG(DIRECTORY);
812	P_FLAG(EXCL);
813	P_FLAG(LARGEFILE);
814	P_FLAG(NOATIME);
815	P_FLAG(NOCTTY);
816#ifdef O_NONBLOCK
817	P_FLAG(NONBLOCK);
818#elif O_NDELAY
819	P_FLAG(NDELAY);
820#endif
821#ifdef O_PATH
822	P_FLAG(PATH);
823#endif
824	P_FLAG(RDWR);
825#ifdef O_DSYNC
826	if ((flags & O_SYNC) == O_SYNC)
827		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
828	else {
829		P_FLAG(DSYNC);
830	}
831#else
832	P_FLAG(SYNC);
833#endif
834	P_FLAG(TRUNC);
835	P_FLAG(WRONLY);
836#undef P_FLAG
837
838	if (flags)
839		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
840
841	return printed;
842}
843
844#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
845
846static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
847						struct syscall_arg *arg)
848{
849	int printed = 0, flags = arg->val;
850
851	if (flags == 0)
852		return 0;
853
854#define	P_FLAG(n) \
855	if (flags & PERF_FLAG_##n) { \
856		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
857		flags &= ~PERF_FLAG_##n; \
858	}
859
860	P_FLAG(FD_NO_GROUP);
861	P_FLAG(FD_OUTPUT);
862	P_FLAG(PID_CGROUP);
863	P_FLAG(FD_CLOEXEC);
864#undef P_FLAG
865
866	if (flags)
867		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
868
869	return printed;
870}
871
872#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
873
874static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
875						   struct syscall_arg *arg)
876{
877	int printed = 0, flags = arg->val;
878
879	if (flags == 0)
880		return scnprintf(bf, size, "NONE");
881#define	P_FLAG(n) \
882	if (flags & EFD_##n) { \
883		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
884		flags &= ~EFD_##n; \
885	}
886
887	P_FLAG(SEMAPHORE);
888	P_FLAG(CLOEXEC);
889	P_FLAG(NONBLOCK);
890#undef P_FLAG
891
892	if (flags)
893		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
894
895	return printed;
896}
897
898#define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
899
900static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
901						struct syscall_arg *arg)
902{
903	int printed = 0, flags = arg->val;
904
905#define	P_FLAG(n) \
906	if (flags & O_##n) { \
907		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
908		flags &= ~O_##n; \
909	}
910
911	P_FLAG(CLOEXEC);
912	P_FLAG(NONBLOCK);
913#undef P_FLAG
914
915	if (flags)
916		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
917
918	return printed;
919}
920
921#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
922
923static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
924{
925	int sig = arg->val;
926
927	switch (sig) {
928#define	P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
929	P_SIGNUM(HUP);
930	P_SIGNUM(INT);
931	P_SIGNUM(QUIT);
932	P_SIGNUM(ILL);
933	P_SIGNUM(TRAP);
934	P_SIGNUM(ABRT);
935	P_SIGNUM(BUS);
936	P_SIGNUM(FPE);
937	P_SIGNUM(KILL);
938	P_SIGNUM(USR1);
939	P_SIGNUM(SEGV);
940	P_SIGNUM(USR2);
941	P_SIGNUM(PIPE);
942	P_SIGNUM(ALRM);
943	P_SIGNUM(TERM);
944	P_SIGNUM(CHLD);
945	P_SIGNUM(CONT);
946	P_SIGNUM(STOP);
947	P_SIGNUM(TSTP);
948	P_SIGNUM(TTIN);
949	P_SIGNUM(TTOU);
950	P_SIGNUM(URG);
951	P_SIGNUM(XCPU);
952	P_SIGNUM(XFSZ);
953	P_SIGNUM(VTALRM);
954	P_SIGNUM(PROF);
955	P_SIGNUM(WINCH);
956	P_SIGNUM(IO);
957	P_SIGNUM(PWR);
958	P_SIGNUM(SYS);
959#ifdef SIGEMT
960	P_SIGNUM(EMT);
961#endif
962#ifdef SIGSTKFLT
963	P_SIGNUM(STKFLT);
964#endif
965#ifdef SIGSWI
966	P_SIGNUM(SWI);
967#endif
968	default: break;
969	}
970
971	return scnprintf(bf, size, "%#x", sig);
972}
973
974#define SCA_SIGNUM syscall_arg__scnprintf_signum
975
976#if defined(__i386__) || defined(__x86_64__)
977/*
978 * FIXME: Make this available to all arches.
979 */
980#define TCGETS		0x5401
981
982static const char *tioctls[] = {
983	"TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
984	"TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
985	"TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
986	"TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
987	"TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
988	"TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
989	"TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
990	"TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
991	"TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
992	"TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
993	"TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
994	[0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
995	"TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
996	"TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
997	"TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
998};
999
1000static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
1001#endif /* defined(__i386__) || defined(__x86_64__) */
1002
1003#define STRARRAY(arg, name, array) \
1004	  .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
1005	  .arg_parm	 = { [arg] = &strarray__##array, }
1006
1007static struct syscall_fmt {
1008	const char *name;
1009	const char *alias;
1010	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1011	void	   *arg_parm[6];
1012	bool	   errmsg;
1013	bool	   timeout;
1014	bool	   hexret;
1015} syscall_fmts[] = {
1016	{ .name	    = "access",	    .errmsg = true,
1017	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1018			     [1] = SCA_ACCMODE,  /* mode */ }, },
1019	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
1020	{ .name	    = "bpf",	    .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
1021	{ .name	    = "brk",	    .hexret = true,
1022	  .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
1023	{ .name	    = "chdir",	    .errmsg = true,
1024	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1025	{ .name	    = "chmod",	    .errmsg = true,
1026	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1027	{ .name	    = "chroot",	    .errmsg = true,
1028	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1029	{ .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
1030	{ .name	    = "close",	    .errmsg = true,
1031	  .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
1032	{ .name	    = "connect",    .errmsg = true, },
1033	{ .name	    = "creat",	    .errmsg = true,
1034	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1035	{ .name	    = "dup",	    .errmsg = true,
1036	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1037	{ .name	    = "dup2",	    .errmsg = true,
1038	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1039	{ .name	    = "dup3",	    .errmsg = true,
1040	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1041	{ .name	    = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1042	{ .name	    = "eventfd2",   .errmsg = true,
1043	  .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1044	{ .name	    = "faccessat",  .errmsg = true,
1045	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1046			     [1] = SCA_FILENAME, /* filename */ }, },
1047	{ .name	    = "fadvise64",  .errmsg = true,
1048	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1049	{ .name	    = "fallocate",  .errmsg = true,
1050	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1051	{ .name	    = "fchdir",	    .errmsg = true,
1052	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1053	{ .name	    = "fchmod",	    .errmsg = true,
1054	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1055	{ .name	    = "fchmodat",   .errmsg = true,
1056	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1057			     [1] = SCA_FILENAME, /* filename */ }, },
1058	{ .name	    = "fchown",	    .errmsg = true,
1059	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1060	{ .name	    = "fchownat",   .errmsg = true,
1061	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1062			     [1] = SCA_FILENAME, /* filename */ }, },
1063	{ .name	    = "fcntl",	    .errmsg = true,
1064	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1065			     [1] = SCA_STRARRAY, /* cmd */ },
1066	  .arg_parm	 = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1067	{ .name	    = "fdatasync",  .errmsg = true,
1068	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1069	{ .name	    = "flock",	    .errmsg = true,
1070	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1071			     [1] = SCA_FLOCK, /* cmd */ }, },
1072	{ .name	    = "fsetxattr",  .errmsg = true,
1073	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1074	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat",
1075	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1076	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat",
1077	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1078			     [1] = SCA_FILENAME, /* filename */ }, },
1079	{ .name	    = "fstatfs",    .errmsg = true,
1080	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1081	{ .name	    = "fsync",    .errmsg = true,
1082	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1083	{ .name	    = "ftruncate", .errmsg = true,
1084	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1085	{ .name	    = "futex",	    .errmsg = true,
1086	  .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1087	{ .name	    = "futimesat", .errmsg = true,
1088	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1089			     [1] = SCA_FILENAME, /* filename */ }, },
1090	{ .name	    = "getdents",   .errmsg = true,
1091	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1092	{ .name	    = "getdents64", .errmsg = true,
1093	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1094	{ .name	    = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1095	{ .name	    = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1096	{ .name	    = "getxattr",    .errmsg = true,
1097	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1098	{ .name	    = "inotify_add_watch",	    .errmsg = true,
1099	  .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1100	{ .name	    = "ioctl",	    .errmsg = true,
1101	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1102#if defined(__i386__) || defined(__x86_64__)
1103/*
1104 * FIXME: Make this available to all arches.
1105 */
1106			     [1] = SCA_STRHEXARRAY, /* cmd */
1107			     [2] = SCA_HEX, /* arg */ },
1108	  .arg_parm	 = { [1] = &strarray__tioctls, /* cmd */ }, },
1109#else
1110			     [2] = SCA_HEX, /* arg */ }, },
1111#endif
1112	{ .name	    = "keyctl",	    .errmsg = true, STRARRAY(0, option, keyctl_options), },
1113	{ .name	    = "kill",	    .errmsg = true,
1114	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1115	{ .name	    = "lchown",    .errmsg = true,
1116	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1117	{ .name	    = "lgetxattr",  .errmsg = true,
1118	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1119	{ .name	    = "linkat",	    .errmsg = true,
1120	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1121	{ .name	    = "listxattr",  .errmsg = true,
1122	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1123	{ .name	    = "llistxattr", .errmsg = true,
1124	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1125	{ .name	    = "lremovexattr",  .errmsg = true,
1126	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1127	{ .name	    = "lseek",	    .errmsg = true,
1128	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1129			     [2] = SCA_STRARRAY, /* whence */ },
1130	  .arg_parm	 = { [2] = &strarray__whences, /* whence */ }, },
1131	{ .name	    = "lsetxattr",  .errmsg = true,
1132	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1133	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat",
1134	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1135	{ .name	    = "lsxattr",    .errmsg = true,
1136	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1137	{ .name     = "madvise",    .errmsg = true,
1138	  .arg_scnprintf = { [0] = SCA_HEX,	 /* start */
1139			     [2] = SCA_MADV_BHV, /* behavior */ }, },
1140	{ .name	    = "mkdir",    .errmsg = true,
1141	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1142	{ .name	    = "mkdirat",    .errmsg = true,
1143	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1144			     [1] = SCA_FILENAME, /* pathname */ }, },
1145	{ .name	    = "mknod",      .errmsg = true,
1146	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1147	{ .name	    = "mknodat",    .errmsg = true,
1148	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1149			     [1] = SCA_FILENAME, /* filename */ }, },
1150	{ .name	    = "mlock",	    .errmsg = true,
1151	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1152	{ .name	    = "mlockall",   .errmsg = true,
1153	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1154	{ .name	    = "mmap",	    .hexret = true,
1155	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
1156			     [2] = SCA_MMAP_PROT, /* prot */
1157			     [3] = SCA_MMAP_FLAGS, /* flags */
1158			     [4] = SCA_FD, 	  /* fd */ }, },
1159	{ .name	    = "mprotect",   .errmsg = true,
1160	  .arg_scnprintf = { [0] = SCA_HEX, /* start */
1161			     [2] = SCA_MMAP_PROT, /* prot */ }, },
1162	{ .name	    = "mq_unlink", .errmsg = true,
1163	  .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1164	{ .name	    = "mremap",	    .hexret = true,
1165	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1166			     [3] = SCA_MREMAP_FLAGS, /* flags */
1167			     [4] = SCA_HEX, /* new_addr */ }, },
1168	{ .name	    = "munlock",    .errmsg = true,
1169	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1170	{ .name	    = "munmap",	    .errmsg = true,
1171	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1172	{ .name	    = "name_to_handle_at", .errmsg = true,
1173	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1174	{ .name	    = "newfstatat", .errmsg = true,
1175	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1176			     [1] = SCA_FILENAME, /* filename */ }, },
1177	{ .name	    = "open",	    .errmsg = true,
1178	  .arg_scnprintf = { [0] = SCA_FILENAME,   /* filename */
1179			     [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1180	{ .name	    = "open_by_handle_at", .errmsg = true,
1181	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1182			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1183	{ .name	    = "openat",	    .errmsg = true,
1184	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1185			     [1] = SCA_FILENAME, /* filename */
1186			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1187	{ .name	    = "perf_event_open", .errmsg = true,
1188	  .arg_scnprintf = { [1] = SCA_INT, /* pid */
1189			     [2] = SCA_INT, /* cpu */
1190			     [3] = SCA_FD,  /* group_fd */
1191			     [4] = SCA_PERF_FLAGS,  /* flags */ }, },
1192	{ .name	    = "pipe2",	    .errmsg = true,
1193	  .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1194	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
1195	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
1196	{ .name	    = "pread",	    .errmsg = true, .alias = "pread64",
1197	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1198	{ .name	    = "preadv",	    .errmsg = true, .alias = "pread",
1199	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1200	{ .name	    = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1201	{ .name	    = "pwrite",	    .errmsg = true, .alias = "pwrite64",
1202	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1203	{ .name	    = "pwritev",    .errmsg = true,
1204	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1205	{ .name	    = "read",	    .errmsg = true,
1206	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1207	{ .name	    = "readlink",   .errmsg = true,
1208	  .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1209	{ .name	    = "readlinkat", .errmsg = true,
1210	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1211			     [1] = SCA_FILENAME, /* pathname */ }, },
1212	{ .name	    = "readv",	    .errmsg = true,
1213	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1214	{ .name	    = "recvfrom",   .errmsg = true,
1215	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1216			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1217	{ .name	    = "recvmmsg",   .errmsg = true,
1218	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1219			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1220	{ .name	    = "recvmsg",    .errmsg = true,
1221	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1222			     [2] = SCA_MSG_FLAGS, /* flags */ }, },
1223	{ .name	    = "removexattr", .errmsg = true,
1224	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1225	{ .name	    = "renameat",   .errmsg = true,
1226	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1227	{ .name	    = "rmdir",    .errmsg = true,
1228	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1229	{ .name	    = "rt_sigaction", .errmsg = true,
1230	  .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1231	{ .name	    = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1232	{ .name	    = "rt_sigqueueinfo", .errmsg = true,
1233	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1234	{ .name	    = "rt_tgsigqueueinfo", .errmsg = true,
1235	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1236	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
1237	{ .name	    = "sendmmsg",    .errmsg = true,
1238	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1239			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1240	{ .name	    = "sendmsg",    .errmsg = true,
1241	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1242			     [2] = SCA_MSG_FLAGS, /* flags */ }, },
1243	{ .name	    = "sendto",	    .errmsg = true,
1244	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1245			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1246	{ .name	    = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1247	{ .name	    = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1248	{ .name	    = "setxattr",   .errmsg = true,
1249	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1250	{ .name	    = "shutdown",   .errmsg = true,
1251	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1252	{ .name	    = "socket",	    .errmsg = true,
1253	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1254			     [1] = SCA_SK_TYPE, /* type */ },
1255	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1256	{ .name	    = "socketpair", .errmsg = true,
1257	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1258			     [1] = SCA_SK_TYPE, /* type */ },
1259	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1260	{ .name	    = "stat",	    .errmsg = true, .alias = "newstat",
1261	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1262	{ .name	    = "statfs",	    .errmsg = true,
1263	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1264	{ .name	    = "swapoff",    .errmsg = true,
1265	  .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1266	{ .name	    = "swapon",	    .errmsg = true,
1267	  .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1268	{ .name	    = "symlinkat",  .errmsg = true,
1269	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1270	{ .name	    = "tgkill",	    .errmsg = true,
1271	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1272	{ .name	    = "tkill",	    .errmsg = true,
1273	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1274	{ .name	    = "truncate",   .errmsg = true,
1275	  .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1276	{ .name	    = "uname",	    .errmsg = true, .alias = "newuname", },
1277	{ .name	    = "unlinkat",   .errmsg = true,
1278	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1279			     [1] = SCA_FILENAME, /* pathname */ }, },
1280	{ .name	    = "utime",  .errmsg = true,
1281	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1282	{ .name	    = "utimensat",  .errmsg = true,
1283	  .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1284			     [1] = SCA_FILENAME, /* filename */ }, },
1285	{ .name	    = "utimes",  .errmsg = true,
1286	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1287	{ .name	    = "vmsplice",  .errmsg = true,
1288	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1289	{ .name	    = "write",	    .errmsg = true,
1290	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1291	{ .name	    = "writev",	    .errmsg = true,
1292	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1293};
1294
1295static int syscall_fmt__cmp(const void *name, const void *fmtp)
1296{
1297	const struct syscall_fmt *fmt = fmtp;
1298	return strcmp(name, fmt->name);
1299}
1300
1301static struct syscall_fmt *syscall_fmt__find(const char *name)
1302{
1303	const int nmemb = ARRAY_SIZE(syscall_fmts);
1304	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1305}
1306
1307struct syscall {
1308	struct event_format *tp_format;
1309	int		    nr_args;
1310	struct format_field *args;
1311	const char	    *name;
1312	bool		    is_exit;
1313	struct syscall_fmt  *fmt;
1314	size_t		    (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1315	void		    **arg_parm;
1316};
1317
1318static size_t fprintf_duration(unsigned long t, FILE *fp)
1319{
1320	double duration = (double)t / NSEC_PER_MSEC;
1321	size_t printed = fprintf(fp, "(");
1322
1323	if (duration >= 1.0)
1324		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1325	else if (duration >= 0.01)
1326		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1327	else
1328		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1329	return printed + fprintf(fp, "): ");
1330}
1331
1332/**
1333 * filename.ptr: The filename char pointer that will be vfs_getname'd
1334 * filename.entry_str_pos: Where to insert the string translated from
1335 *                         filename.ptr by the vfs_getname tracepoint/kprobe.
1336 */
1337struct thread_trace {
1338	u64		  entry_time;
1339	u64		  exit_time;
1340	bool		  entry_pending;
1341	unsigned long	  nr_events;
1342	unsigned long	  pfmaj, pfmin;
1343	char		  *entry_str;
1344	double		  runtime_ms;
1345        struct {
1346		unsigned long ptr;
1347		short int     entry_str_pos;
1348		bool	      pending_open;
1349		unsigned int  namelen;
1350		char	      *name;
1351	} filename;
1352	struct {
1353		int	  max;
1354		char	  **table;
1355	} paths;
1356
1357	struct intlist *syscall_stats;
1358};
1359
1360static struct thread_trace *thread_trace__new(void)
1361{
1362	struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1363
1364	if (ttrace)
1365		ttrace->paths.max = -1;
1366
1367	ttrace->syscall_stats = intlist__new(NULL);
1368
1369	return ttrace;
1370}
1371
1372static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1373{
1374	struct thread_trace *ttrace;
1375
1376	if (thread == NULL)
1377		goto fail;
1378
1379	if (thread__priv(thread) == NULL)
1380		thread__set_priv(thread, thread_trace__new());
1381
1382	if (thread__priv(thread) == NULL)
1383		goto fail;
1384
1385	ttrace = thread__priv(thread);
1386	++ttrace->nr_events;
1387
1388	return ttrace;
1389fail:
1390	color_fprintf(fp, PERF_COLOR_RED,
1391		      "WARNING: not enough memory, dropping samples!\n");
1392	return NULL;
1393}
1394
1395#define TRACE_PFMAJ		(1 << 0)
1396#define TRACE_PFMIN		(1 << 1)
1397
1398static const size_t trace__entry_str_size = 2048;
1399
1400struct trace {
1401	struct perf_tool	tool;
1402	struct {
1403		int		machine;
1404		int		open_id;
1405	}			audit;
1406	struct {
1407		int		max;
1408		struct syscall  *table;
1409		struct {
1410			struct perf_evsel *sys_enter,
1411					  *sys_exit;
1412		}		events;
1413	} syscalls;
1414	struct record_opts	opts;
1415	struct perf_evlist	*evlist;
1416	struct machine		*host;
1417	struct thread		*current;
1418	u64			base_time;
1419	FILE			*output;
1420	unsigned long		nr_events;
1421	struct strlist		*ev_qualifier;
1422	struct {
1423		size_t		nr;
1424		int		*entries;
1425	}			ev_qualifier_ids;
1426	struct intlist		*tid_list;
1427	struct intlist		*pid_list;
1428	struct {
1429		size_t		nr;
1430		pid_t		*entries;
1431	}			filter_pids;
1432	double			duration_filter;
1433	double			runtime_ms;
1434	struct {
1435		u64		vfs_getname,
1436				proc_getname;
1437	} stats;
1438	bool			not_ev_qualifier;
1439	bool			live;
1440	bool			full_time;
1441	bool			sched;
1442	bool			multiple_threads;
1443	bool			summary;
1444	bool			summary_only;
1445	bool			show_comm;
1446	bool			show_tool_stats;
1447	bool			trace_syscalls;
1448	bool			force;
1449	bool			vfs_getname;
1450	int			trace_pgfaults;
1451};
1452
1453static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1454{
1455	struct thread_trace *ttrace = thread__priv(thread);
1456
1457	if (fd > ttrace->paths.max) {
1458		char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1459
1460		if (npath == NULL)
1461			return -1;
1462
1463		if (ttrace->paths.max != -1) {
1464			memset(npath + ttrace->paths.max + 1, 0,
1465			       (fd - ttrace->paths.max) * sizeof(char *));
1466		} else {
1467			memset(npath, 0, (fd + 1) * sizeof(char *));
1468		}
1469
1470		ttrace->paths.table = npath;
1471		ttrace->paths.max   = fd;
1472	}
1473
1474	ttrace->paths.table[fd] = strdup(pathname);
1475
1476	return ttrace->paths.table[fd] != NULL ? 0 : -1;
1477}
1478
1479static int thread__read_fd_path(struct thread *thread, int fd)
1480{
1481	char linkname[PATH_MAX], pathname[PATH_MAX];
1482	struct stat st;
1483	int ret;
1484
1485	if (thread->pid_ == thread->tid) {
1486		scnprintf(linkname, sizeof(linkname),
1487			  "/proc/%d/fd/%d", thread->pid_, fd);
1488	} else {
1489		scnprintf(linkname, sizeof(linkname),
1490			  "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1491	}
1492
1493	if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1494		return -1;
1495
1496	ret = readlink(linkname, pathname, sizeof(pathname));
1497
1498	if (ret < 0 || ret > st.st_size)
1499		return -1;
1500
1501	pathname[ret] = '\0';
1502	return trace__set_fd_pathname(thread, fd, pathname);
1503}
1504
1505static const char *thread__fd_path(struct thread *thread, int fd,
1506				   struct trace *trace)
1507{
1508	struct thread_trace *ttrace = thread__priv(thread);
1509
1510	if (ttrace == NULL)
1511		return NULL;
1512
1513	if (fd < 0)
1514		return NULL;
1515
1516	if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1517		if (!trace->live)
1518			return NULL;
1519		++trace->stats.proc_getname;
1520		if (thread__read_fd_path(thread, fd))
1521			return NULL;
1522	}
1523
1524	return ttrace->paths.table[fd];
1525}
1526
1527static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1528					struct syscall_arg *arg)
1529{
1530	int fd = arg->val;
1531	size_t printed = scnprintf(bf, size, "%d", fd);
1532	const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1533
1534	if (path)
1535		printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1536
1537	return printed;
1538}
1539
1540static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1541					      struct syscall_arg *arg)
1542{
1543	int fd = arg->val;
1544	size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1545	struct thread_trace *ttrace = thread__priv(arg->thread);
1546
1547	if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1548		zfree(&ttrace->paths.table[fd]);
1549
1550	return printed;
1551}
1552
1553static void thread__set_filename_pos(struct thread *thread, const char *bf,
1554				     unsigned long ptr)
1555{
1556	struct thread_trace *ttrace = thread__priv(thread);
1557
1558	ttrace->filename.ptr = ptr;
1559	ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1560}
1561
1562static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1563					      struct syscall_arg *arg)
1564{
1565	unsigned long ptr = arg->val;
1566
1567	if (!arg->trace->vfs_getname)
1568		return scnprintf(bf, size, "%#x", ptr);
1569
1570	thread__set_filename_pos(arg->thread, bf, ptr);
1571	return 0;
1572}
1573
1574static bool trace__filter_duration(struct trace *trace, double t)
1575{
1576	return t < (trace->duration_filter * NSEC_PER_MSEC);
1577}
1578
1579static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1580{
1581	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1582
1583	return fprintf(fp, "%10.3f ", ts);
1584}
1585
1586static bool done = false;
1587static bool interrupted = false;
1588
1589static void sig_handler(int sig)
1590{
1591	done = true;
1592	interrupted = sig == SIGINT;
1593}
1594
1595static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1596					u64 duration, u64 tstamp, FILE *fp)
1597{
1598	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1599	printed += fprintf_duration(duration, fp);
1600
1601	if (trace->multiple_threads) {
1602		if (trace->show_comm)
1603			printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1604		printed += fprintf(fp, "%d ", thread->tid);
1605	}
1606
1607	return printed;
1608}
1609
1610static int trace__process_event(struct trace *trace, struct machine *machine,
1611				union perf_event *event, struct perf_sample *sample)
1612{
1613	int ret = 0;
1614
1615	switch (event->header.type) {
1616	case PERF_RECORD_LOST:
1617		color_fprintf(trace->output, PERF_COLOR_RED,
1618			      "LOST %" PRIu64 " events!\n", event->lost.lost);
1619		ret = machine__process_lost_event(machine, event, sample);
1620	default:
1621		ret = machine__process_event(machine, event, sample);
1622		break;
1623	}
1624
1625	return ret;
1626}
1627
1628static int trace__tool_process(struct perf_tool *tool,
1629			       union perf_event *event,
1630			       struct perf_sample *sample,
1631			       struct machine *machine)
1632{
1633	struct trace *trace = container_of(tool, struct trace, tool);
1634	return trace__process_event(trace, machine, event, sample);
1635}
1636
1637static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1638{
1639	int err = symbol__init(NULL);
1640
1641	if (err)
1642		return err;
1643
1644	trace->host = machine__new_host();
1645	if (trace->host == NULL)
1646		return -ENOMEM;
1647
1648	if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1649		return -errno;
1650
1651	err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1652					    evlist->threads, trace__tool_process, false,
1653					    trace->opts.proc_map_timeout);
1654	if (err)
1655		symbol__exit();
1656
1657	return err;
1658}
1659
1660static int syscall__set_arg_fmts(struct syscall *sc)
1661{
1662	struct format_field *field;
1663	int idx = 0;
1664
1665	sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1666	if (sc->arg_scnprintf == NULL)
1667		return -1;
1668
1669	if (sc->fmt)
1670		sc->arg_parm = sc->fmt->arg_parm;
1671
1672	for (field = sc->args; field; field = field->next) {
1673		if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1674			sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1675		else if (field->flags & FIELD_IS_POINTER)
1676			sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1677		++idx;
1678	}
1679
1680	return 0;
1681}
1682
1683static int trace__read_syscall_info(struct trace *trace, int id)
1684{
1685	char tp_name[128];
1686	struct syscall *sc;
1687	const char *name = audit_syscall_to_name(id, trace->audit.machine);
1688
1689	if (name == NULL)
1690		return -1;
1691
1692	if (id > trace->syscalls.max) {
1693		struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1694
1695		if (nsyscalls == NULL)
1696			return -1;
1697
1698		if (trace->syscalls.max != -1) {
1699			memset(nsyscalls + trace->syscalls.max + 1, 0,
1700			       (id - trace->syscalls.max) * sizeof(*sc));
1701		} else {
1702			memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1703		}
1704
1705		trace->syscalls.table = nsyscalls;
1706		trace->syscalls.max   = id;
1707	}
1708
1709	sc = trace->syscalls.table + id;
1710	sc->name = name;
1711
1712	sc->fmt  = syscall_fmt__find(sc->name);
1713
1714	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1715	sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1716
1717	if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1718		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1719		sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1720	}
1721
1722	if (IS_ERR(sc->tp_format))
1723		return -1;
1724
1725	sc->args = sc->tp_format->format.fields;
1726	sc->nr_args = sc->tp_format->format.nr_fields;
1727	/* drop nr field - not relevant here; does not exist on older kernels */
1728	if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1729		sc->args = sc->args->next;
1730		--sc->nr_args;
1731	}
1732
1733	sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1734
1735	return syscall__set_arg_fmts(sc);
1736}
1737
1738static int trace__validate_ev_qualifier(struct trace *trace)
1739{
1740	int err = 0, i;
1741	struct str_node *pos;
1742
1743	trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1744	trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1745						 sizeof(trace->ev_qualifier_ids.entries[0]));
1746
1747	if (trace->ev_qualifier_ids.entries == NULL) {
1748		fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1749		       trace->output);
1750		err = -EINVAL;
1751		goto out;
1752	}
1753
1754	i = 0;
1755
1756	strlist__for_each(pos, trace->ev_qualifier) {
1757		const char *sc = pos->s;
1758		int id = audit_name_to_syscall(sc, trace->audit.machine);
1759
1760		if (id < 0) {
1761			if (err == 0) {
1762				fputs("Error:\tInvalid syscall ", trace->output);
1763				err = -EINVAL;
1764			} else {
1765				fputs(", ", trace->output);
1766			}
1767
1768			fputs(sc, trace->output);
1769		}
1770
1771		trace->ev_qualifier_ids.entries[i++] = id;
1772	}
1773
1774	if (err < 0) {
1775		fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1776		      "\nHint:\tand: 'man syscalls'\n", trace->output);
1777		zfree(&trace->ev_qualifier_ids.entries);
1778		trace->ev_qualifier_ids.nr = 0;
1779	}
1780out:
1781	return err;
1782}
1783
1784/*
1785 * args is to be interpreted as a series of longs but we need to handle
1786 * 8-byte unaligned accesses. args points to raw_data within the event
1787 * and raw_data is guaranteed to be 8-byte unaligned because it is
1788 * preceded by raw_size which is a u32. So we need to copy args to a temp
1789 * variable to read it. Most notably this avoids extended load instructions
1790 * on unaligned addresses
1791 */
1792
1793static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1794				      unsigned char *args, struct trace *trace,
1795				      struct thread *thread)
1796{
1797	size_t printed = 0;
1798	unsigned char *p;
1799	unsigned long val;
1800
1801	if (sc->args != NULL) {
1802		struct format_field *field;
1803		u8 bit = 1;
1804		struct syscall_arg arg = {
1805			.idx	= 0,
1806			.mask	= 0,
1807			.trace  = trace,
1808			.thread = thread,
1809		};
1810
1811		for (field = sc->args; field;
1812		     field = field->next, ++arg.idx, bit <<= 1) {
1813			if (arg.mask & bit)
1814				continue;
1815
1816			/* special care for unaligned accesses */
1817			p = args + sizeof(unsigned long) * arg.idx;
1818			memcpy(&val, p, sizeof(val));
1819
1820			/*
1821 			 * Suppress this argument if its value is zero and
1822 			 * and we don't have a string associated in an
1823 			 * strarray for it.
1824 			 */
1825			if (val == 0 &&
1826			    !(sc->arg_scnprintf &&
1827			      sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1828			      sc->arg_parm[arg.idx]))
1829				continue;
1830
1831			printed += scnprintf(bf + printed, size - printed,
1832					     "%s%s: ", printed ? ", " : "", field->name);
1833			if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1834				arg.val = val;
1835				if (sc->arg_parm)
1836					arg.parm = sc->arg_parm[arg.idx];
1837				printed += sc->arg_scnprintf[arg.idx](bf + printed,
1838								      size - printed, &arg);
1839			} else {
1840				printed += scnprintf(bf + printed, size - printed,
1841						     "%ld", val);
1842			}
1843		}
1844	} else {
1845		int i = 0;
1846
1847		while (i < 6) {
1848			/* special care for unaligned accesses */
1849			p = args + sizeof(unsigned long) * i;
1850			memcpy(&val, p, sizeof(val));
1851			printed += scnprintf(bf + printed, size - printed,
1852					     "%sarg%d: %ld",
1853					     printed ? ", " : "", i, val);
1854			++i;
1855		}
1856	}
1857
1858	return printed;
1859}
1860
1861typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1862				  union perf_event *event,
1863				  struct perf_sample *sample);
1864
1865static struct syscall *trace__syscall_info(struct trace *trace,
1866					   struct perf_evsel *evsel, int id)
1867{
1868
1869	if (id < 0) {
1870
1871		/*
1872		 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1873		 * before that, leaving at a higher verbosity level till that is
1874		 * explained. Reproduced with plain ftrace with:
1875		 *
1876		 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1877		 * grep "NR -1 " /t/trace_pipe
1878		 *
1879		 * After generating some load on the machine.
1880 		 */
1881		if (verbose > 1) {
1882			static u64 n;
1883			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1884				id, perf_evsel__name(evsel), ++n);
1885		}
1886		return NULL;
1887	}
1888
1889	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1890	    trace__read_syscall_info(trace, id))
1891		goto out_cant_read;
1892
1893	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1894		goto out_cant_read;
1895
1896	return &trace->syscalls.table[id];
1897
1898out_cant_read:
1899	if (verbose) {
1900		fprintf(trace->output, "Problems reading syscall %d", id);
1901		if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1902			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1903		fputs(" information\n", trace->output);
1904	}
1905	return NULL;
1906}
1907
1908static void thread__update_stats(struct thread_trace *ttrace,
1909				 int id, struct perf_sample *sample)
1910{
1911	struct int_node *inode;
1912	struct stats *stats;
1913	u64 duration = 0;
1914
1915	inode = intlist__findnew(ttrace->syscall_stats, id);
1916	if (inode == NULL)
1917		return;
1918
1919	stats = inode->priv;
1920	if (stats == NULL) {
1921		stats = malloc(sizeof(struct stats));
1922		if (stats == NULL)
1923			return;
1924		init_stats(stats);
1925		inode->priv = stats;
1926	}
1927
1928	if (ttrace->entry_time && sample->time > ttrace->entry_time)
1929		duration = sample->time - ttrace->entry_time;
1930
1931	update_stats(stats, duration);
1932}
1933
1934static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1935{
1936	struct thread_trace *ttrace;
1937	u64 duration;
1938	size_t printed;
1939
1940	if (trace->current == NULL)
1941		return 0;
1942
1943	ttrace = thread__priv(trace->current);
1944
1945	if (!ttrace->entry_pending)
1946		return 0;
1947
1948	duration = sample->time - ttrace->entry_time;
1949
1950	printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1951	printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1952	ttrace->entry_pending = false;
1953
1954	return printed;
1955}
1956
1957static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1958			    union perf_event *event __maybe_unused,
1959			    struct perf_sample *sample)
1960{
1961	char *msg;
1962	void *args;
1963	size_t printed = 0;
1964	struct thread *thread;
1965	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1966	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1967	struct thread_trace *ttrace;
1968
1969	if (sc == NULL)
1970		return -1;
1971
1972	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1973	ttrace = thread__trace(thread, trace->output);
1974	if (ttrace == NULL)
1975		goto out_put;
1976
1977	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1978
1979	if (ttrace->entry_str == NULL) {
1980		ttrace->entry_str = malloc(trace__entry_str_size);
1981		if (!ttrace->entry_str)
1982			goto out_put;
1983	}
1984
1985	if (!trace->summary_only)
1986		trace__printf_interrupted_entry(trace, sample);
1987
1988	ttrace->entry_time = sample->time;
1989	msg = ttrace->entry_str;
1990	printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1991
1992	printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1993					   args, trace, thread);
1994
1995	if (sc->is_exit) {
1996		if (!trace->duration_filter && !trace->summary_only) {
1997			trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1998			fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1999		}
2000	} else {
2001		ttrace->entry_pending = true;
2002		/* See trace__vfs_getname & trace__sys_exit */
2003		ttrace->filename.pending_open = false;
2004	}
2005
2006	if (trace->current != thread) {
2007		thread__put(trace->current);
2008		trace->current = thread__get(thread);
2009	}
2010	err = 0;
2011out_put:
2012	thread__put(thread);
2013	return err;
2014}
2015
2016static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
2017			   union perf_event *event __maybe_unused,
2018			   struct perf_sample *sample)
2019{
2020	long ret;
2021	u64 duration = 0;
2022	struct thread *thread;
2023	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2024	struct syscall *sc = trace__syscall_info(trace, evsel, id);
2025	struct thread_trace *ttrace;
2026
2027	if (sc == NULL)
2028		return -1;
2029
2030	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2031	ttrace = thread__trace(thread, trace->output);
2032	if (ttrace == NULL)
2033		goto out_put;
2034
2035	if (trace->summary)
2036		thread__update_stats(ttrace, id, sample);
2037
2038	ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2039
2040	if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
2041		trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2042		ttrace->filename.pending_open = false;
2043		++trace->stats.vfs_getname;
2044	}
2045
2046	ttrace->exit_time = sample->time;
2047
2048	if (ttrace->entry_time) {
2049		duration = sample->time - ttrace->entry_time;
2050		if (trace__filter_duration(trace, duration))
2051			goto out;
2052	} else if (trace->duration_filter)
2053		goto out;
2054
2055	if (trace->summary_only)
2056		goto out;
2057
2058	trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2059
2060	if (ttrace->entry_pending) {
2061		fprintf(trace->output, "%-70s", ttrace->entry_str);
2062	} else {
2063		fprintf(trace->output, " ... [");
2064		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2065		fprintf(trace->output, "]: %s()", sc->name);
2066	}
2067
2068	if (sc->fmt == NULL) {
2069signed_print:
2070		fprintf(trace->output, ") = %ld", ret);
2071	} else if (ret < 0 && sc->fmt->errmsg) {
2072		char bf[STRERR_BUFSIZE];
2073		const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2074			   *e = audit_errno_to_name(-ret);
2075
2076		fprintf(trace->output, ") = -1 %s %s", e, emsg);
2077	} else if (ret == 0 && sc->fmt->timeout)
2078		fprintf(trace->output, ") = 0 Timeout");
2079	else if (sc->fmt->hexret)
2080		fprintf(trace->output, ") = %#lx", ret);
2081	else
2082		goto signed_print;
2083
2084	fputc('\n', trace->output);
2085out:
2086	ttrace->entry_pending = false;
2087	err = 0;
2088out_put:
2089	thread__put(thread);
2090	return err;
2091}
2092
2093static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2094			      union perf_event *event __maybe_unused,
2095			      struct perf_sample *sample)
2096{
2097	struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2098	struct thread_trace *ttrace;
2099	size_t filename_len, entry_str_len, to_move;
2100	ssize_t remaining_space;
2101	char *pos;
2102	const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2103
2104	if (!thread)
2105		goto out;
2106
2107	ttrace = thread__priv(thread);
2108	if (!ttrace)
2109		goto out;
2110
2111	filename_len = strlen(filename);
2112
2113	if (ttrace->filename.namelen < filename_len) {
2114		char *f = realloc(ttrace->filename.name, filename_len + 1);
2115
2116		if (f == NULL)
2117				goto out;
2118
2119		ttrace->filename.namelen = filename_len;
2120		ttrace->filename.name = f;
2121	}
2122
2123	strcpy(ttrace->filename.name, filename);
2124	ttrace->filename.pending_open = true;
2125
2126	if (!ttrace->filename.ptr)
2127		goto out;
2128
2129	entry_str_len = strlen(ttrace->entry_str);
2130	remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2131	if (remaining_space <= 0)
2132		goto out;
2133
2134	if (filename_len > (size_t)remaining_space) {
2135		filename += filename_len - remaining_space;
2136		filename_len = remaining_space;
2137	}
2138
2139	to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2140	pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2141	memmove(pos + filename_len, pos, to_move);
2142	memcpy(pos, filename, filename_len);
2143
2144	ttrace->filename.ptr = 0;
2145	ttrace->filename.entry_str_pos = 0;
2146out:
2147	return 0;
2148}
2149
2150static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2151				     union perf_event *event __maybe_unused,
2152				     struct perf_sample *sample)
2153{
2154        u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2155	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2156	struct thread *thread = machine__findnew_thread(trace->host,
2157							sample->pid,
2158							sample->tid);
2159	struct thread_trace *ttrace = thread__trace(thread, trace->output);
2160
2161	if (ttrace == NULL)
2162		goto out_dump;
2163
2164	ttrace->runtime_ms += runtime_ms;
2165	trace->runtime_ms += runtime_ms;
2166	thread__put(thread);
2167	return 0;
2168
2169out_dump:
2170	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2171	       evsel->name,
2172	       perf_evsel__strval(evsel, sample, "comm"),
2173	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2174	       runtime,
2175	       perf_evsel__intval(evsel, sample, "vruntime"));
2176	thread__put(thread);
2177	return 0;
2178}
2179
2180static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2181				union perf_event *event __maybe_unused,
2182				struct perf_sample *sample)
2183{
2184	trace__printf_interrupted_entry(trace, sample);
2185	trace__fprintf_tstamp(trace, sample->time, trace->output);
2186
2187	if (trace->trace_syscalls)
2188		fprintf(trace->output, "(         ): ");
2189
2190	fprintf(trace->output, "%s:", evsel->name);
2191
2192	if (evsel->tp_format) {
2193		event_format__fprintf(evsel->tp_format, sample->cpu,
2194				      sample->raw_data, sample->raw_size,
2195				      trace->output);
2196	}
2197
2198	fprintf(trace->output, ")\n");
2199	return 0;
2200}
2201
2202static void print_location(FILE *f, struct perf_sample *sample,
2203			   struct addr_location *al,
2204			   bool print_dso, bool print_sym)
2205{
2206
2207	if ((verbose || print_dso) && al->map)
2208		fprintf(f, "%s@", al->map->dso->long_name);
2209
2210	if ((verbose || print_sym) && al->sym)
2211		fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2212			al->addr - al->sym->start);
2213	else if (al->map)
2214		fprintf(f, "0x%" PRIx64, al->addr);
2215	else
2216		fprintf(f, "0x%" PRIx64, sample->addr);
2217}
2218
2219static int trace__pgfault(struct trace *trace,
2220			  struct perf_evsel *evsel,
2221			  union perf_event *event,
2222			  struct perf_sample *sample)
2223{
2224	struct thread *thread;
2225	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2226	struct addr_location al;
2227	char map_type = 'd';
2228	struct thread_trace *ttrace;
2229	int err = -1;
2230
2231	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2232	ttrace = thread__trace(thread, trace->output);
2233	if (ttrace == NULL)
2234		goto out_put;
2235
2236	if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2237		ttrace->pfmaj++;
2238	else
2239		ttrace->pfmin++;
2240
2241	if (trace->summary_only)
2242		goto out;
2243
2244	thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2245			      sample->ip, &al);
2246
2247	trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2248
2249	fprintf(trace->output, "%sfault [",
2250		evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2251		"maj" : "min");
2252
2253	print_location(trace->output, sample, &al, false, true);
2254
2255	fprintf(trace->output, "] => ");
2256
2257	thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2258				   sample->addr, &al);
2259
2260	if (!al.map) {
2261		thread__find_addr_location(thread, cpumode,
2262					   MAP__FUNCTION, sample->addr, &al);
2263
2264		if (al.map)
2265			map_type = 'x';
2266		else
2267			map_type = '?';
2268	}
2269
2270	print_location(trace->output, sample, &al, true, false);
2271
2272	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2273out:
2274	err = 0;
2275out_put:
2276	thread__put(thread);
2277	return err;
2278}
2279
2280static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2281{
2282	if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2283	    (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2284		return false;
2285
2286	if (trace->pid_list || trace->tid_list)
2287		return true;
2288
2289	return false;
2290}
2291
2292static int trace__process_sample(struct perf_tool *tool,
2293				 union perf_event *event,
2294				 struct perf_sample *sample,
2295				 struct perf_evsel *evsel,
2296				 struct machine *machine __maybe_unused)
2297{
2298	struct trace *trace = container_of(tool, struct trace, tool);
2299	int err = 0;
2300
2301	tracepoint_handler handler = evsel->handler;
2302
2303	if (skip_sample(trace, sample))
2304		return 0;
2305
2306	if (!trace->full_time && trace->base_time == 0)
2307		trace->base_time = sample->time;
2308
2309	if (handler) {
2310		++trace->nr_events;
2311		handler(trace, evsel, event, sample);
2312	}
2313
2314	return err;
2315}
2316
2317static int parse_target_str(struct trace *trace)
2318{
2319	if (trace->opts.target.pid) {
2320		trace->pid_list = intlist__new(trace->opts.target.pid);
2321		if (trace->pid_list == NULL) {
2322			pr_err("Error parsing process id string\n");
2323			return -EINVAL;
2324		}
2325	}
2326
2327	if (trace->opts.target.tid) {
2328		trace->tid_list = intlist__new(trace->opts.target.tid);
2329		if (trace->tid_list == NULL) {
2330			pr_err("Error parsing thread id string\n");
2331			return -EINVAL;
2332		}
2333	}
2334
2335	return 0;
2336}
2337
2338static int trace__record(struct trace *trace, int argc, const char **argv)
2339{
2340	unsigned int rec_argc, i, j;
2341	const char **rec_argv;
2342	const char * const record_args[] = {
2343		"record",
2344		"-R",
2345		"-m", "1024",
2346		"-c", "1",
2347	};
2348
2349	const char * const sc_args[] = { "-e", };
2350	unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2351	const char * const majpf_args[] = { "-e", "major-faults" };
2352	unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2353	const char * const minpf_args[] = { "-e", "minor-faults" };
2354	unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2355
2356	/* +1 is for the event string below */
2357	rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2358		majpf_args_nr + minpf_args_nr + argc;
2359	rec_argv = calloc(rec_argc + 1, sizeof(char *));
2360
2361	if (rec_argv == NULL)
2362		return -ENOMEM;
2363
2364	j = 0;
2365	for (i = 0; i < ARRAY_SIZE(record_args); i++)
2366		rec_argv[j++] = record_args[i];
2367
2368	if (trace->trace_syscalls) {
2369		for (i = 0; i < sc_args_nr; i++)
2370			rec_argv[j++] = sc_args[i];
2371
2372		/* event string may be different for older kernels - e.g., RHEL6 */
2373		if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2374			rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2375		else if (is_valid_tracepoint("syscalls:sys_enter"))
2376			rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2377		else {
2378			pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2379			return -1;
2380		}
2381	}
2382
2383	if (trace->trace_pgfaults & TRACE_PFMAJ)
2384		for (i = 0; i < majpf_args_nr; i++)
2385			rec_argv[j++] = majpf_args[i];
2386
2387	if (trace->trace_pgfaults & TRACE_PFMIN)
2388		for (i = 0; i < minpf_args_nr; i++)
2389			rec_argv[j++] = minpf_args[i];
2390
2391	for (i = 0; i < (unsigned int)argc; i++)
2392		rec_argv[j++] = argv[i];
2393
2394	return cmd_record(j, rec_argv, NULL);
2395}
2396
2397static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2398
2399static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2400{
2401	struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2402
2403	if (IS_ERR(evsel))
2404		return false;
2405
2406	if (perf_evsel__field(evsel, "pathname") == NULL) {
2407		perf_evsel__delete(evsel);
2408		return false;
2409	}
2410
2411	evsel->handler = trace__vfs_getname;
2412	perf_evlist__add(evlist, evsel);
2413	return true;
2414}
2415
2416static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2417				    u64 config)
2418{
2419	struct perf_evsel *evsel;
2420	struct perf_event_attr attr = {
2421		.type = PERF_TYPE_SOFTWARE,
2422		.mmap_data = 1,
2423	};
2424
2425	attr.config = config;
2426	attr.sample_period = 1;
2427
2428	event_attr_init(&attr);
2429
2430	evsel = perf_evsel__new(&attr);
2431	if (!evsel)
2432		return -ENOMEM;
2433
2434	evsel->handler = trace__pgfault;
2435	perf_evlist__add(evlist, evsel);
2436
2437	return 0;
2438}
2439
2440static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2441{
2442	const u32 type = event->header.type;
2443	struct perf_evsel *evsel;
2444
2445	if (!trace->full_time && trace->base_time == 0)
2446		trace->base_time = sample->time;
2447
2448	if (type != PERF_RECORD_SAMPLE) {
2449		trace__process_event(trace, trace->host, event, sample);
2450		return;
2451	}
2452
2453	evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2454	if (evsel == NULL) {
2455		fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2456		return;
2457	}
2458
2459	if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2460	    sample->raw_data == NULL) {
2461		fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2462		       perf_evsel__name(evsel), sample->tid,
2463		       sample->cpu, sample->raw_size);
2464	} else {
2465		tracepoint_handler handler = evsel->handler;
2466		handler(trace, evsel, event, sample);
2467	}
2468}
2469
2470static int trace__add_syscall_newtp(struct trace *trace)
2471{
2472	int ret = -1;
2473	struct perf_evlist *evlist = trace->evlist;
2474	struct perf_evsel *sys_enter, *sys_exit;
2475
2476	sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2477	if (sys_enter == NULL)
2478		goto out;
2479
2480	if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2481		goto out_delete_sys_enter;
2482
2483	sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2484	if (sys_exit == NULL)
2485		goto out_delete_sys_enter;
2486
2487	if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2488		goto out_delete_sys_exit;
2489
2490	perf_evlist__add(evlist, sys_enter);
2491	perf_evlist__add(evlist, sys_exit);
2492
2493	trace->syscalls.events.sys_enter = sys_enter;
2494	trace->syscalls.events.sys_exit  = sys_exit;
2495
2496	ret = 0;
2497out:
2498	return ret;
2499
2500out_delete_sys_exit:
2501	perf_evsel__delete_priv(sys_exit);
2502out_delete_sys_enter:
2503	perf_evsel__delete_priv(sys_enter);
2504	goto out;
2505}
2506
2507static int trace__set_ev_qualifier_filter(struct trace *trace)
2508{
2509	int err = -1;
2510	char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2511						trace->ev_qualifier_ids.nr,
2512						trace->ev_qualifier_ids.entries);
2513
2514	if (filter == NULL)
2515		goto out_enomem;
2516
2517	if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2518		err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2519
2520	free(filter);
2521out:
2522	return err;
2523out_enomem:
2524	errno = ENOMEM;
2525	goto out;
2526}
2527
2528static int trace__run(struct trace *trace, int argc, const char **argv)
2529{
2530	struct perf_evlist *evlist = trace->evlist;
2531	struct perf_evsel *evsel;
2532	int err = -1, i;
2533	unsigned long before;
2534	const bool forks = argc > 0;
2535	bool draining = false;
2536
2537	trace->live = true;
2538
2539	if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2540		goto out_error_raw_syscalls;
2541
2542	if (trace->trace_syscalls)
2543		trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2544
2545	if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2546	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2547		goto out_error_mem;
2548	}
2549
2550	if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2551	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2552		goto out_error_mem;
2553
2554	if (trace->sched &&
2555	    perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2556				   trace__sched_stat_runtime))
2557		goto out_error_sched_stat_runtime;
2558
2559	err = perf_evlist__create_maps(evlist, &trace->opts.target);
2560	if (err < 0) {
2561		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2562		goto out_delete_evlist;
2563	}
2564
2565	err = trace__symbols_init(trace, evlist);
2566	if (err < 0) {
2567		fprintf(trace->output, "Problems initializing symbol libraries!\n");
2568		goto out_delete_evlist;
2569	}
2570
2571	perf_evlist__config(evlist, &trace->opts);
2572
2573	signal(SIGCHLD, sig_handler);
2574	signal(SIGINT, sig_handler);
2575
2576	if (forks) {
2577		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2578						    argv, false, NULL);
2579		if (err < 0) {
2580			fprintf(trace->output, "Couldn't run the workload!\n");
2581			goto out_delete_evlist;
2582		}
2583	}
2584
2585	err = perf_evlist__open(evlist);
2586	if (err < 0)
2587		goto out_error_open;
2588
2589	/*
2590	 * Better not use !target__has_task() here because we need to cover the
2591	 * case where no threads were specified in the command line, but a
2592	 * workload was, and in that case we will fill in the thread_map when
2593	 * we fork the workload in perf_evlist__prepare_workload.
2594	 */
2595	if (trace->filter_pids.nr > 0)
2596		err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2597	else if (thread_map__pid(evlist->threads, 0) == -1)
2598		err = perf_evlist__set_filter_pid(evlist, getpid());
2599
2600	if (err < 0)
2601		goto out_error_mem;
2602
2603	if (trace->ev_qualifier_ids.nr > 0) {
2604		err = trace__set_ev_qualifier_filter(trace);
2605		if (err < 0)
2606			goto out_errno;
2607
2608		pr_debug("event qualifier tracepoint filter: %s\n",
2609			 trace->syscalls.events.sys_exit->filter);
2610	}
2611
2612	err = perf_evlist__apply_filters(evlist, &evsel);
2613	if (err < 0)
2614		goto out_error_apply_filters;
2615
2616	err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2617	if (err < 0)
2618		goto out_error_mmap;
2619
2620	if (!target__none(&trace->opts.target))
2621		perf_evlist__enable(evlist);
2622
2623	if (forks)
2624		perf_evlist__start_workload(evlist);
2625
2626	trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2627				  evlist->threads->nr > 1 ||
2628				  perf_evlist__first(evlist)->attr.inherit;
2629again:
2630	before = trace->nr_events;
2631
2632	for (i = 0; i < evlist->nr_mmaps; i++) {
2633		union perf_event *event;
2634
2635		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2636			struct perf_sample sample;
2637
2638			++trace->nr_events;
2639
2640			err = perf_evlist__parse_sample(evlist, event, &sample);
2641			if (err) {
2642				fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2643				goto next_event;
2644			}
2645
2646			trace__handle_event(trace, event, &sample);
2647next_event:
2648			perf_evlist__mmap_consume(evlist, i);
2649
2650			if (interrupted)
2651				goto out_disable;
2652
2653			if (done && !draining) {
2654				perf_evlist__disable(evlist);
2655				draining = true;
2656			}
2657		}
2658	}
2659
2660	if (trace->nr_events == before) {
2661		int timeout = done ? 100 : -1;
2662
2663		if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2664			if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2665				draining = true;
2666
2667			goto again;
2668		}
2669	} else {
2670		goto again;
2671	}
2672
2673out_disable:
2674	thread__zput(trace->current);
2675
2676	perf_evlist__disable(evlist);
2677
2678	if (!err) {
2679		if (trace->summary)
2680			trace__fprintf_thread_summary(trace, trace->output);
2681
2682		if (trace->show_tool_stats) {
2683			fprintf(trace->output, "Stats:\n "
2684					       " vfs_getname : %" PRIu64 "\n"
2685					       " proc_getname: %" PRIu64 "\n",
2686				trace->stats.vfs_getname,
2687				trace->stats.proc_getname);
2688		}
2689	}
2690
2691out_delete_evlist:
2692	perf_evlist__delete(evlist);
2693	trace->evlist = NULL;
2694	trace->live = false;
2695	return err;
2696{
2697	char errbuf[BUFSIZ];
2698
2699out_error_sched_stat_runtime:
2700	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2701	goto out_error;
2702
2703out_error_raw_syscalls:
2704	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2705	goto out_error;
2706
2707out_error_mmap:
2708	perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2709	goto out_error;
2710
2711out_error_open:
2712	perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2713
2714out_error:
2715	fprintf(trace->output, "%s\n", errbuf);
2716	goto out_delete_evlist;
2717
2718out_error_apply_filters:
2719	fprintf(trace->output,
2720		"Failed to set filter \"%s\" on event %s with %d (%s)\n",
2721		evsel->filter, perf_evsel__name(evsel), errno,
2722		strerror_r(errno, errbuf, sizeof(errbuf)));
2723	goto out_delete_evlist;
2724}
2725out_error_mem:
2726	fprintf(trace->output, "Not enough memory to run!\n");
2727	goto out_delete_evlist;
2728
2729out_errno:
2730	fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2731	goto out_delete_evlist;
2732}
2733
2734static int trace__replay(struct trace *trace)
2735{
2736	const struct perf_evsel_str_handler handlers[] = {
2737		{ "probe:vfs_getname",	     trace__vfs_getname, },
2738	};
2739	struct perf_data_file file = {
2740		.path  = input_name,
2741		.mode  = PERF_DATA_MODE_READ,
2742		.force = trace->force,
2743	};
2744	struct perf_session *session;
2745	struct perf_evsel *evsel;
2746	int err = -1;
2747
2748	trace->tool.sample	  = trace__process_sample;
2749	trace->tool.mmap	  = perf_event__process_mmap;
2750	trace->tool.mmap2	  = perf_event__process_mmap2;
2751	trace->tool.comm	  = perf_event__process_comm;
2752	trace->tool.exit	  = perf_event__process_exit;
2753	trace->tool.fork	  = perf_event__process_fork;
2754	trace->tool.attr	  = perf_event__process_attr;
2755	trace->tool.tracing_data = perf_event__process_tracing_data;
2756	trace->tool.build_id	  = perf_event__process_build_id;
2757
2758	trace->tool.ordered_events = true;
2759	trace->tool.ordering_requires_timestamps = true;
2760
2761	/* add tid to output */
2762	trace->multiple_threads = true;
2763
2764	session = perf_session__new(&file, false, &trace->tool);
2765	if (session == NULL)
2766		return -1;
2767
2768	if (symbol__init(&session->header.env) < 0)
2769		goto out;
2770
2771	trace->host = &session->machines.host;
2772
2773	err = perf_session__set_tracepoints_handlers(session, handlers);
2774	if (err)
2775		goto out;
2776
2777	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2778						     "raw_syscalls:sys_enter");
2779	/* older kernels have syscalls tp versus raw_syscalls */
2780	if (evsel == NULL)
2781		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2782							     "syscalls:sys_enter");
2783
2784	if (evsel &&
2785	    (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2786	    perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2787		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2788		goto out;
2789	}
2790
2791	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2792						     "raw_syscalls:sys_exit");
2793	if (evsel == NULL)
2794		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2795							     "syscalls:sys_exit");
2796	if (evsel &&
2797	    (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2798	    perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2799		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2800		goto out;
2801	}
2802
2803	evlist__for_each(session->evlist, evsel) {
2804		if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2805		    (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2806		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2807		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2808			evsel->handler = trace__pgfault;
2809	}
2810
2811	err = parse_target_str(trace);
2812	if (err != 0)
2813		goto out;
2814
2815	setup_pager();
2816
2817	err = perf_session__process_events(session);
2818	if (err)
2819		pr_err("Failed to process events, error %d", err);
2820
2821	else if (trace->summary)
2822		trace__fprintf_thread_summary(trace, trace->output);
2823
2824out:
2825	perf_session__delete(session);
2826
2827	return err;
2828}
2829
2830static size_t trace__fprintf_threads_header(FILE *fp)
2831{
2832	size_t printed;
2833
2834	printed  = fprintf(fp, "\n Summary of events:\n\n");
2835
2836	return printed;
2837}
2838
2839static size_t thread__dump_stats(struct thread_trace *ttrace,
2840				 struct trace *trace, FILE *fp)
2841{
2842	struct stats *stats;
2843	size_t printed = 0;
2844	struct syscall *sc;
2845	struct int_node *inode = intlist__first(ttrace->syscall_stats);
2846
2847	if (inode == NULL)
2848		return 0;
2849
2850	printed += fprintf(fp, "\n");
2851
2852	printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
2853	printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
2854	printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
2855
2856	/* each int_node is a syscall */
2857	while (inode) {
2858		stats = inode->priv;
2859		if (stats) {
2860			double min = (double)(stats->min) / NSEC_PER_MSEC;
2861			double max = (double)(stats->max) / NSEC_PER_MSEC;
2862			double avg = avg_stats(stats);
2863			double pct;
2864			u64 n = (u64) stats->n;
2865
2866			pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2867			avg /= NSEC_PER_MSEC;
2868
2869			sc = &trace->syscalls.table[inode->i];
2870			printed += fprintf(fp, "   %-15s", sc->name);
2871			printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2872					   n, avg * n, min, avg);
2873			printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2874		}
2875
2876		inode = intlist__next(inode);
2877	}
2878
2879	printed += fprintf(fp, "\n\n");
2880
2881	return printed;
2882}
2883
2884/* struct used to pass data to per-thread function */
2885struct summary_data {
2886	FILE *fp;
2887	struct trace *trace;
2888	size_t printed;
2889};
2890
2891static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2892{
2893	struct summary_data *data = priv;
2894	FILE *fp = data->fp;
2895	size_t printed = data->printed;
2896	struct trace *trace = data->trace;
2897	struct thread_trace *ttrace = thread__priv(thread);
2898	double ratio;
2899
2900	if (ttrace == NULL)
2901		return 0;
2902
2903	ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2904
2905	printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2906	printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2907	printed += fprintf(fp, "%.1f%%", ratio);
2908	if (ttrace->pfmaj)
2909		printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2910	if (ttrace->pfmin)
2911		printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2912	printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2913	printed += thread__dump_stats(ttrace, trace, fp);
2914
2915	data->printed += printed;
2916
2917	return 0;
2918}
2919
2920static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2921{
2922	struct summary_data data = {
2923		.fp = fp,
2924		.trace = trace
2925	};
2926	data.printed = trace__fprintf_threads_header(fp);
2927
2928	machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2929
2930	return data.printed;
2931}
2932
2933static int trace__set_duration(const struct option *opt, const char *str,
2934			       int unset __maybe_unused)
2935{
2936	struct trace *trace = opt->value;
2937
2938	trace->duration_filter = atof(str);
2939	return 0;
2940}
2941
2942static int trace__set_filter_pids(const struct option *opt, const char *str,
2943				  int unset __maybe_unused)
2944{
2945	int ret = -1;
2946	size_t i;
2947	struct trace *trace = opt->value;
2948	/*
2949	 * FIXME: introduce a intarray class, plain parse csv and create a
2950	 * { int nr, int entries[] } struct...
2951	 */
2952	struct intlist *list = intlist__new(str);
2953
2954	if (list == NULL)
2955		return -1;
2956
2957	i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2958	trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2959
2960	if (trace->filter_pids.entries == NULL)
2961		goto out;
2962
2963	trace->filter_pids.entries[0] = getpid();
2964
2965	for (i = 1; i < trace->filter_pids.nr; ++i)
2966		trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2967
2968	intlist__delete(list);
2969	ret = 0;
2970out:
2971	return ret;
2972}
2973
2974static int trace__open_output(struct trace *trace, const char *filename)
2975{
2976	struct stat st;
2977
2978	if (!stat(filename, &st) && st.st_size) {
2979		char oldname[PATH_MAX];
2980
2981		scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2982		unlink(oldname);
2983		rename(filename, oldname);
2984	}
2985
2986	trace->output = fopen(filename, "w");
2987
2988	return trace->output == NULL ? -errno : 0;
2989}
2990
2991static int parse_pagefaults(const struct option *opt, const char *str,
2992			    int unset __maybe_unused)
2993{
2994	int *trace_pgfaults = opt->value;
2995
2996	if (strcmp(str, "all") == 0)
2997		*trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2998	else if (strcmp(str, "maj") == 0)
2999		*trace_pgfaults |= TRACE_PFMAJ;
3000	else if (strcmp(str, "min") == 0)
3001		*trace_pgfaults |= TRACE_PFMIN;
3002	else
3003		return -1;
3004
3005	return 0;
3006}
3007
3008static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
3009{
3010	struct perf_evsel *evsel;
3011
3012	evlist__for_each(evlist, evsel)
3013		evsel->handler = handler;
3014}
3015
3016int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
3017{
3018	const char *trace_usage[] = {
3019		"perf trace [<options>] [<command>]",
3020		"perf trace [<options>] -- <command> [<options>]",
3021		"perf trace record [<options>] [<command>]",
3022		"perf trace record [<options>] -- <command> [<options>]",
3023		NULL
3024	};
3025	struct trace trace = {
3026		.audit = {
3027			.machine = audit_detect_machine(),
3028			.open_id = audit_name_to_syscall("open", trace.audit.machine),
3029		},
3030		.syscalls = {
3031			. max = -1,
3032		},
3033		.opts = {
3034			.target = {
3035				.uid	   = UINT_MAX,
3036				.uses_mmap = true,
3037			},
3038			.user_freq     = UINT_MAX,
3039			.user_interval = ULLONG_MAX,
3040			.no_buffering  = true,
3041			.mmap_pages    = UINT_MAX,
3042			.proc_map_timeout  = 500,
3043		},
3044		.output = stderr,
3045		.show_comm = true,
3046		.trace_syscalls = true,
3047	};
3048	const char *output_name = NULL;
3049	const char *ev_qualifier_str = NULL;
3050	const struct option trace_options[] = {
3051	OPT_CALLBACK(0, "event", &trace.evlist, "event",
3052		     "event selector. use 'perf list' to list available events",
3053		     parse_events_option),
3054	OPT_BOOLEAN(0, "comm", &trace.show_comm,
3055		    "show the thread COMM next to its id"),
3056	OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3057	OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
3058	OPT_STRING('o', "output", &output_name, "file", "output file name"),
3059	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3060	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3061		    "trace events on existing process id"),
3062	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3063		    "trace events on existing thread id"),
3064	OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3065		     "pids to filter (by the kernel)", trace__set_filter_pids),
3066	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3067		    "system-wide collection from all CPUs"),
3068	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3069		    "list of cpus to monitor"),
3070	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3071		    "child tasks do not inherit counters"),
3072	OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3073		     "number of mmap data pages",
3074		     perf_evlist__parse_mmap_pages),
3075	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3076		   "user to profile"),
3077	OPT_CALLBACK(0, "duration", &trace, "float",
3078		     "show only events with duration > N.M ms",
3079		     trace__set_duration),
3080	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3081	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3082	OPT_BOOLEAN('T', "time", &trace.full_time,
3083		    "Show full timestamp, not time relative to first start"),
3084	OPT_BOOLEAN('s', "summary", &trace.summary_only,
3085		    "Show only syscall summary with statistics"),
3086	OPT_BOOLEAN('S', "with-summary", &trace.summary,
3087		    "Show all syscalls and summary with statistics"),
3088	OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3089		     "Trace pagefaults", parse_pagefaults, "maj"),
3090	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3091	OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3092	OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3093			"per thread proc mmap processing timeout in ms"),
3094	OPT_END()
3095	};
3096	const char * const trace_subcommands[] = { "record", NULL };
3097	int err;
3098	char bf[BUFSIZ];
3099
3100	signal(SIGSEGV, sighandler_dump_stack);
3101	signal(SIGFPE, sighandler_dump_stack);
3102
3103	trace.evlist = perf_evlist__new();
3104
3105	if (trace.evlist == NULL) {
3106		pr_err("Not enough memory to run!\n");
3107		err = -ENOMEM;
3108		goto out;
3109	}
3110
3111	argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3112				 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3113
3114	if (trace.trace_pgfaults) {
3115		trace.opts.sample_address = true;
3116		trace.opts.sample_time = true;
3117	}
3118
3119	if (trace.evlist->nr_entries > 0)
3120		evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3121
3122	if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3123		return trace__record(&trace, argc-1, &argv[1]);
3124
3125	/* summary_only implies summary option, but don't overwrite summary if set */
3126	if (trace.summary_only)
3127		trace.summary = trace.summary_only;
3128
3129	if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3130	    trace.evlist->nr_entries == 0 /* Was --events used? */) {
3131		pr_err("Please specify something to trace.\n");
3132		return -1;
3133	}
3134
3135	if (output_name != NULL) {
3136		err = trace__open_output(&trace, output_name);
3137		if (err < 0) {
3138			perror("failed to create output file");
3139			goto out;
3140		}
3141	}
3142
3143	if (ev_qualifier_str != NULL) {
3144		const char *s = ev_qualifier_str;
3145		struct strlist_config slist_config = {
3146			.dirname = system_path(STRACE_GROUPS_DIR),
3147		};
3148
3149		trace.not_ev_qualifier = *s == '!';
3150		if (trace.not_ev_qualifier)
3151			++s;
3152		trace.ev_qualifier = strlist__new(s, &slist_config);
3153		if (trace.ev_qualifier == NULL) {
3154			fputs("Not enough memory to parse event qualifier",
3155			      trace.output);
3156			err = -ENOMEM;
3157			goto out_close;
3158		}
3159
3160		err = trace__validate_ev_qualifier(&trace);
3161		if (err)
3162			goto out_close;
3163	}
3164
3165	err = target__validate(&trace.opts.target);
3166	if (err) {
3167		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3168		fprintf(trace.output, "%s", bf);
3169		goto out_close;
3170	}
3171
3172	err = target__parse_uid(&trace.opts.target);
3173	if (err) {
3174		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3175		fprintf(trace.output, "%s", bf);
3176		goto out_close;
3177	}
3178
3179	if (!argc && target__none(&trace.opts.target))
3180		trace.opts.target.system_wide = true;
3181
3182	if (input_name)
3183		err = trace__replay(&trace);
3184	else
3185		err = trace__run(&trace, argc, argv);
3186
3187out_close:
3188	if (output_name != NULL)
3189		fclose(trace.output);
3190out:
3191	return err;
3192}
3193