1/*
2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
3 *
4 * Parts came from builtin-{top,stat,record}.c, see those files for further
5 * copyright notes.
6 *
7 * Released under the GPL v2. (and only v2, not any later version)
8 */
9#include "util.h"
10#include <api/fs/fs.h>
11#include <poll.h>
12#include "cpumap.h"
13#include "thread_map.h"
14#include "target.h"
15#include "evlist.h"
16#include "evsel.h"
17#include "debug.h"
18#include <unistd.h>
19
20#include "parse-events.h"
21#include "parse-options.h"
22
23#include <sys/mman.h>
24
25#include <linux/bitops.h>
26#include <linux/hash.h>
27#include <linux/log2.h>
28#include <linux/err.h>
29
30static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx);
31static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx);
32
33#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
34#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
35
36void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
37		       struct thread_map *threads)
38{
39	int i;
40
41	for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
42		INIT_HLIST_HEAD(&evlist->heads[i]);
43	INIT_LIST_HEAD(&evlist->entries);
44	perf_evlist__set_maps(evlist, cpus, threads);
45	fdarray__init(&evlist->pollfd, 64);
46	evlist->workload.pid = -1;
47}
48
49struct perf_evlist *perf_evlist__new(void)
50{
51	struct perf_evlist *evlist = zalloc(sizeof(*evlist));
52
53	if (evlist != NULL)
54		perf_evlist__init(evlist, NULL, NULL);
55
56	return evlist;
57}
58
59struct perf_evlist *perf_evlist__new_default(void)
60{
61	struct perf_evlist *evlist = perf_evlist__new();
62
63	if (evlist && perf_evlist__add_default(evlist)) {
64		perf_evlist__delete(evlist);
65		evlist = NULL;
66	}
67
68	return evlist;
69}
70
71/**
72 * perf_evlist__set_id_pos - set the positions of event ids.
73 * @evlist: selected event list
74 *
75 * Events with compatible sample types all have the same id_pos
76 * and is_pos.  For convenience, put a copy on evlist.
77 */
78void perf_evlist__set_id_pos(struct perf_evlist *evlist)
79{
80	struct perf_evsel *first = perf_evlist__first(evlist);
81
82	evlist->id_pos = first->id_pos;
83	evlist->is_pos = first->is_pos;
84}
85
86static void perf_evlist__update_id_pos(struct perf_evlist *evlist)
87{
88	struct perf_evsel *evsel;
89
90	evlist__for_each(evlist, evsel)
91		perf_evsel__calc_id_pos(evsel);
92
93	perf_evlist__set_id_pos(evlist);
94}
95
96static void perf_evlist__purge(struct perf_evlist *evlist)
97{
98	struct perf_evsel *pos, *n;
99
100	evlist__for_each_safe(evlist, n, pos) {
101		list_del_init(&pos->node);
102		pos->evlist = NULL;
103		perf_evsel__delete(pos);
104	}
105
106	evlist->nr_entries = 0;
107}
108
109void perf_evlist__exit(struct perf_evlist *evlist)
110{
111	zfree(&evlist->mmap);
112	fdarray__exit(&evlist->pollfd);
113}
114
115void perf_evlist__delete(struct perf_evlist *evlist)
116{
117	perf_evlist__munmap(evlist);
118	perf_evlist__close(evlist);
119	cpu_map__put(evlist->cpus);
120	thread_map__put(evlist->threads);
121	evlist->cpus = NULL;
122	evlist->threads = NULL;
123	perf_evlist__purge(evlist);
124	perf_evlist__exit(evlist);
125	free(evlist);
126}
127
128static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
129					  struct perf_evsel *evsel)
130{
131	/*
132	 * We already have cpus for evsel (via PMU sysfs) so
133	 * keep it, if there's no target cpu list defined.
134	 */
135	if (!evsel->own_cpus || evlist->has_user_cpus) {
136		cpu_map__put(evsel->cpus);
137		evsel->cpus = cpu_map__get(evlist->cpus);
138	} else if (evsel->cpus != evsel->own_cpus) {
139		cpu_map__put(evsel->cpus);
140		evsel->cpus = cpu_map__get(evsel->own_cpus);
141	}
142
143	thread_map__put(evsel->threads);
144	evsel->threads = thread_map__get(evlist->threads);
145}
146
147static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
148{
149	struct perf_evsel *evsel;
150
151	evlist__for_each(evlist, evsel)
152		__perf_evlist__propagate_maps(evlist, evsel);
153}
154
155void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
156{
157	entry->evlist = evlist;
158	list_add_tail(&entry->node, &evlist->entries);
159	entry->idx = evlist->nr_entries;
160	entry->tracking = !entry->idx;
161
162	if (!evlist->nr_entries++)
163		perf_evlist__set_id_pos(evlist);
164
165	__perf_evlist__propagate_maps(evlist, entry);
166}
167
168void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel)
169{
170	evsel->evlist = NULL;
171	list_del_init(&evsel->node);
172	evlist->nr_entries -= 1;
173}
174
175void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
176				   struct list_head *list)
177{
178	struct perf_evsel *evsel, *temp;
179
180	__evlist__for_each_safe(list, temp, evsel) {
181		list_del_init(&evsel->node);
182		perf_evlist__add(evlist, evsel);
183	}
184}
185
186void __perf_evlist__set_leader(struct list_head *list)
187{
188	struct perf_evsel *evsel, *leader;
189
190	leader = list_entry(list->next, struct perf_evsel, node);
191	evsel = list_entry(list->prev, struct perf_evsel, node);
192
193	leader->nr_members = evsel->idx - leader->idx + 1;
194
195	__evlist__for_each(list, evsel) {
196		evsel->leader = leader;
197	}
198}
199
200void perf_evlist__set_leader(struct perf_evlist *evlist)
201{
202	if (evlist->nr_entries) {
203		evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0;
204		__perf_evlist__set_leader(&evlist->entries);
205	}
206}
207
208void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr)
209{
210	attr->precise_ip = 3;
211
212	while (attr->precise_ip != 0) {
213		int fd = sys_perf_event_open(attr, 0, -1, -1, 0);
214		if (fd != -1) {
215			close(fd);
216			break;
217		}
218		--attr->precise_ip;
219	}
220}
221
222int perf_evlist__add_default(struct perf_evlist *evlist)
223{
224	struct perf_event_attr attr = {
225		.type = PERF_TYPE_HARDWARE,
226		.config = PERF_COUNT_HW_CPU_CYCLES,
227	};
228	struct perf_evsel *evsel;
229
230	event_attr_init(&attr);
231
232	perf_event_attr__set_max_precise_ip(&attr);
233
234	evsel = perf_evsel__new(&attr);
235	if (evsel == NULL)
236		goto error;
237
238	/* use asprintf() because free(evsel) assumes name is allocated */
239	if (asprintf(&evsel->name, "cycles%.*s",
240		     attr.precise_ip ? attr.precise_ip + 1 : 0, ":ppp") < 0)
241		goto error_free;
242
243	perf_evlist__add(evlist, evsel);
244	return 0;
245error_free:
246	perf_evsel__delete(evsel);
247error:
248	return -ENOMEM;
249}
250
251static int perf_evlist__add_attrs(struct perf_evlist *evlist,
252				  struct perf_event_attr *attrs, size_t nr_attrs)
253{
254	struct perf_evsel *evsel, *n;
255	LIST_HEAD(head);
256	size_t i;
257
258	for (i = 0; i < nr_attrs; i++) {
259		evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i);
260		if (evsel == NULL)
261			goto out_delete_partial_list;
262		list_add_tail(&evsel->node, &head);
263	}
264
265	perf_evlist__splice_list_tail(evlist, &head);
266
267	return 0;
268
269out_delete_partial_list:
270	__evlist__for_each_safe(&head, n, evsel)
271		perf_evsel__delete(evsel);
272	return -1;
273}
274
275int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
276				     struct perf_event_attr *attrs, size_t nr_attrs)
277{
278	size_t i;
279
280	for (i = 0; i < nr_attrs; i++)
281		event_attr_init(attrs + i);
282
283	return perf_evlist__add_attrs(evlist, attrs, nr_attrs);
284}
285
286struct perf_evsel *
287perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id)
288{
289	struct perf_evsel *evsel;
290
291	evlist__for_each(evlist, evsel) {
292		if (evsel->attr.type   == PERF_TYPE_TRACEPOINT &&
293		    (int)evsel->attr.config == id)
294			return evsel;
295	}
296
297	return NULL;
298}
299
300struct perf_evsel *
301perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist,
302				     const char *name)
303{
304	struct perf_evsel *evsel;
305
306	evlist__for_each(evlist, evsel) {
307		if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) &&
308		    (strcmp(evsel->name, name) == 0))
309			return evsel;
310	}
311
312	return NULL;
313}
314
315int perf_evlist__add_newtp(struct perf_evlist *evlist,
316			   const char *sys, const char *name, void *handler)
317{
318	struct perf_evsel *evsel = perf_evsel__newtp(sys, name);
319
320	if (IS_ERR(evsel))
321		return -1;
322
323	evsel->handler = handler;
324	perf_evlist__add(evlist, evsel);
325	return 0;
326}
327
328static int perf_evlist__nr_threads(struct perf_evlist *evlist,
329				   struct perf_evsel *evsel)
330{
331	if (evsel->system_wide)
332		return 1;
333	else
334		return thread_map__nr(evlist->threads);
335}
336
337void perf_evlist__disable(struct perf_evlist *evlist)
338{
339	int cpu, thread;
340	struct perf_evsel *pos;
341	int nr_cpus = cpu_map__nr(evlist->cpus);
342	int nr_threads;
343
344	for (cpu = 0; cpu < nr_cpus; cpu++) {
345		evlist__for_each(evlist, pos) {
346			if (!perf_evsel__is_group_leader(pos) || !pos->fd)
347				continue;
348			nr_threads = perf_evlist__nr_threads(evlist, pos);
349			for (thread = 0; thread < nr_threads; thread++)
350				ioctl(FD(pos, cpu, thread),
351				      PERF_EVENT_IOC_DISABLE, 0);
352		}
353	}
354
355	evlist->enabled = false;
356}
357
358void perf_evlist__enable(struct perf_evlist *evlist)
359{
360	int cpu, thread;
361	struct perf_evsel *pos;
362	int nr_cpus = cpu_map__nr(evlist->cpus);
363	int nr_threads;
364
365	for (cpu = 0; cpu < nr_cpus; cpu++) {
366		evlist__for_each(evlist, pos) {
367			if (!perf_evsel__is_group_leader(pos) || !pos->fd)
368				continue;
369			nr_threads = perf_evlist__nr_threads(evlist, pos);
370			for (thread = 0; thread < nr_threads; thread++)
371				ioctl(FD(pos, cpu, thread),
372				      PERF_EVENT_IOC_ENABLE, 0);
373		}
374	}
375
376	evlist->enabled = true;
377}
378
379void perf_evlist__toggle_enable(struct perf_evlist *evlist)
380{
381	(evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist);
382}
383
384int perf_evlist__disable_event(struct perf_evlist *evlist,
385			       struct perf_evsel *evsel)
386{
387	int cpu, thread, err;
388	int nr_cpus = cpu_map__nr(evlist->cpus);
389	int nr_threads = perf_evlist__nr_threads(evlist, evsel);
390
391	if (!evsel->fd)
392		return 0;
393
394	for (cpu = 0; cpu < nr_cpus; cpu++) {
395		for (thread = 0; thread < nr_threads; thread++) {
396			err = ioctl(FD(evsel, cpu, thread),
397				    PERF_EVENT_IOC_DISABLE, 0);
398			if (err)
399				return err;
400		}
401	}
402	return 0;
403}
404
405int perf_evlist__enable_event(struct perf_evlist *evlist,
406			      struct perf_evsel *evsel)
407{
408	int cpu, thread, err;
409	int nr_cpus = cpu_map__nr(evlist->cpus);
410	int nr_threads = perf_evlist__nr_threads(evlist, evsel);
411
412	if (!evsel->fd)
413		return -EINVAL;
414
415	for (cpu = 0; cpu < nr_cpus; cpu++) {
416		for (thread = 0; thread < nr_threads; thread++) {
417			err = ioctl(FD(evsel, cpu, thread),
418				    PERF_EVENT_IOC_ENABLE, 0);
419			if (err)
420				return err;
421		}
422	}
423	return 0;
424}
425
426static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist,
427					 struct perf_evsel *evsel, int cpu)
428{
429	int thread, err;
430	int nr_threads = perf_evlist__nr_threads(evlist, evsel);
431
432	if (!evsel->fd)
433		return -EINVAL;
434
435	for (thread = 0; thread < nr_threads; thread++) {
436		err = ioctl(FD(evsel, cpu, thread),
437			    PERF_EVENT_IOC_ENABLE, 0);
438		if (err)
439			return err;
440	}
441	return 0;
442}
443
444static int perf_evlist__enable_event_thread(struct perf_evlist *evlist,
445					    struct perf_evsel *evsel,
446					    int thread)
447{
448	int cpu, err;
449	int nr_cpus = cpu_map__nr(evlist->cpus);
450
451	if (!evsel->fd)
452		return -EINVAL;
453
454	for (cpu = 0; cpu < nr_cpus; cpu++) {
455		err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
456		if (err)
457			return err;
458	}
459	return 0;
460}
461
462int perf_evlist__enable_event_idx(struct perf_evlist *evlist,
463				  struct perf_evsel *evsel, int idx)
464{
465	bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus);
466
467	if (per_cpu_mmaps)
468		return perf_evlist__enable_event_cpu(evlist, evsel, idx);
469	else
470		return perf_evlist__enable_event_thread(evlist, evsel, idx);
471}
472
473int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
474{
475	int nr_cpus = cpu_map__nr(evlist->cpus);
476	int nr_threads = thread_map__nr(evlist->threads);
477	int nfds = 0;
478	struct perf_evsel *evsel;
479
480	evlist__for_each(evlist, evsel) {
481		if (evsel->system_wide)
482			nfds += nr_cpus;
483		else
484			nfds += nr_cpus * nr_threads;
485	}
486
487	if (fdarray__available_entries(&evlist->pollfd) < nfds &&
488	    fdarray__grow(&evlist->pollfd, nfds) < 0)
489		return -ENOMEM;
490
491	return 0;
492}
493
494static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx)
495{
496	int pos = fdarray__add(&evlist->pollfd, fd, POLLIN | POLLERR | POLLHUP);
497	/*
498	 * Save the idx so that when we filter out fds POLLHUP'ed we can
499	 * close the associated evlist->mmap[] entry.
500	 */
501	if (pos >= 0) {
502		evlist->pollfd.priv[pos].idx = idx;
503
504		fcntl(fd, F_SETFL, O_NONBLOCK);
505	}
506
507	return pos;
508}
509
510int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
511{
512	return __perf_evlist__add_pollfd(evlist, fd, -1);
513}
514
515static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd)
516{
517	struct perf_evlist *evlist = container_of(fda, struct perf_evlist, pollfd);
518
519	perf_evlist__mmap_put(evlist, fda->priv[fd].idx);
520}
521
522int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask)
523{
524	return fdarray__filter(&evlist->pollfd, revents_and_mask,
525			       perf_evlist__munmap_filtered);
526}
527
528int perf_evlist__poll(struct perf_evlist *evlist, int timeout)
529{
530	return fdarray__poll(&evlist->pollfd, timeout);
531}
532
533static void perf_evlist__id_hash(struct perf_evlist *evlist,
534				 struct perf_evsel *evsel,
535				 int cpu, int thread, u64 id)
536{
537	int hash;
538	struct perf_sample_id *sid = SID(evsel, cpu, thread);
539
540	sid->id = id;
541	sid->evsel = evsel;
542	hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
543	hlist_add_head(&sid->node, &evlist->heads[hash]);
544}
545
546void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
547			 int cpu, int thread, u64 id)
548{
549	perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
550	evsel->id[evsel->ids++] = id;
551}
552
553static int perf_evlist__id_add_fd(struct perf_evlist *evlist,
554				  struct perf_evsel *evsel,
555				  int cpu, int thread, int fd)
556{
557	u64 read_data[4] = { 0, };
558	int id_idx = 1; /* The first entry is the counter value */
559	u64 id;
560	int ret;
561
562	ret = ioctl(fd, PERF_EVENT_IOC_ID, &id);
563	if (!ret)
564		goto add;
565
566	if (errno != ENOTTY)
567		return -1;
568
569	/* Legacy way to get event id.. All hail to old kernels! */
570
571	/*
572	 * This way does not work with group format read, so bail
573	 * out in that case.
574	 */
575	if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP)
576		return -1;
577
578	if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
579	    read(fd, &read_data, sizeof(read_data)) == -1)
580		return -1;
581
582	if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
583		++id_idx;
584	if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
585		++id_idx;
586
587	id = read_data[id_idx];
588
589 add:
590	perf_evlist__id_add(evlist, evsel, cpu, thread, id);
591	return 0;
592}
593
594static void perf_evlist__set_sid_idx(struct perf_evlist *evlist,
595				     struct perf_evsel *evsel, int idx, int cpu,
596				     int thread)
597{
598	struct perf_sample_id *sid = SID(evsel, cpu, thread);
599	sid->idx = idx;
600	if (evlist->cpus && cpu >= 0)
601		sid->cpu = evlist->cpus->map[cpu];
602	else
603		sid->cpu = -1;
604	if (!evsel->system_wide && evlist->threads && thread >= 0)
605		sid->tid = thread_map__pid(evlist->threads, thread);
606	else
607		sid->tid = -1;
608}
609
610struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id)
611{
612	struct hlist_head *head;
613	struct perf_sample_id *sid;
614	int hash;
615
616	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
617	head = &evlist->heads[hash];
618
619	hlist_for_each_entry(sid, head, node)
620		if (sid->id == id)
621			return sid;
622
623	return NULL;
624}
625
626struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
627{
628	struct perf_sample_id *sid;
629
630	if (evlist->nr_entries == 1 || !id)
631		return perf_evlist__first(evlist);
632
633	sid = perf_evlist__id2sid(evlist, id);
634	if (sid)
635		return sid->evsel;
636
637	if (!perf_evlist__sample_id_all(evlist))
638		return perf_evlist__first(evlist);
639
640	return NULL;
641}
642
643struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist,
644						u64 id)
645{
646	struct perf_sample_id *sid;
647
648	if (!id)
649		return NULL;
650
651	sid = perf_evlist__id2sid(evlist, id);
652	if (sid)
653		return sid->evsel;
654
655	return NULL;
656}
657
658static int perf_evlist__event2id(struct perf_evlist *evlist,
659				 union perf_event *event, u64 *id)
660{
661	const u64 *array = event->sample.array;
662	ssize_t n;
663
664	n = (event->header.size - sizeof(event->header)) >> 3;
665
666	if (event->header.type == PERF_RECORD_SAMPLE) {
667		if (evlist->id_pos >= n)
668			return -1;
669		*id = array[evlist->id_pos];
670	} else {
671		if (evlist->is_pos > n)
672			return -1;
673		n -= evlist->is_pos;
674		*id = array[n];
675	}
676	return 0;
677}
678
679static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
680						   union perf_event *event)
681{
682	struct perf_evsel *first = perf_evlist__first(evlist);
683	struct hlist_head *head;
684	struct perf_sample_id *sid;
685	int hash;
686	u64 id;
687
688	if (evlist->nr_entries == 1)
689		return first;
690
691	if (!first->attr.sample_id_all &&
692	    event->header.type != PERF_RECORD_SAMPLE)
693		return first;
694
695	if (perf_evlist__event2id(evlist, event, &id))
696		return NULL;
697
698	/* Synthesized events have an id of zero */
699	if (!id)
700		return first;
701
702	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
703	head = &evlist->heads[hash];
704
705	hlist_for_each_entry(sid, head, node) {
706		if (sid->id == id)
707			return sid->evsel;
708	}
709	return NULL;
710}
711
712union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
713{
714	struct perf_mmap *md = &evlist->mmap[idx];
715	u64 head;
716	u64 old = md->prev;
717	unsigned char *data = md->base + page_size;
718	union perf_event *event = NULL;
719
720	/*
721	 * Check if event was unmapped due to a POLLHUP/POLLERR.
722	 */
723	if (!atomic_read(&md->refcnt))
724		return NULL;
725
726	head = perf_mmap__read_head(md);
727	if (evlist->overwrite) {
728		/*
729		 * If we're further behind than half the buffer, there's a chance
730		 * the writer will bite our tail and mess up the samples under us.
731		 *
732		 * If we somehow ended up ahead of the head, we got messed up.
733		 *
734		 * In either case, truncate and restart at head.
735		 */
736		int diff = head - old;
737		if (diff > md->mask / 2 || diff < 0) {
738			fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
739
740			/*
741			 * head points to a known good entry, start there.
742			 */
743			old = head;
744		}
745	}
746
747	if (old != head) {
748		size_t size;
749
750		event = (union perf_event *)&data[old & md->mask];
751		size = event->header.size;
752
753		/*
754		 * Event straddles the mmap boundary -- header should always
755		 * be inside due to u64 alignment of output.
756		 */
757		if ((old & md->mask) + size != ((old + size) & md->mask)) {
758			unsigned int offset = old;
759			unsigned int len = min(sizeof(*event), size), cpy;
760			void *dst = md->event_copy;
761
762			do {
763				cpy = min(md->mask + 1 - (offset & md->mask), len);
764				memcpy(dst, &data[offset & md->mask], cpy);
765				offset += cpy;
766				dst += cpy;
767				len -= cpy;
768			} while (len);
769
770			event = (union perf_event *) md->event_copy;
771		}
772
773		old += size;
774	}
775
776	md->prev = old;
777
778	return event;
779}
780
781static bool perf_mmap__empty(struct perf_mmap *md)
782{
783	return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base;
784}
785
786static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx)
787{
788	atomic_inc(&evlist->mmap[idx].refcnt);
789}
790
791static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx)
792{
793	BUG_ON(atomic_read(&evlist->mmap[idx].refcnt) == 0);
794
795	if (atomic_dec_and_test(&evlist->mmap[idx].refcnt))
796		__perf_evlist__munmap(evlist, idx);
797}
798
799void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
800{
801	struct perf_mmap *md = &evlist->mmap[idx];
802
803	if (!evlist->overwrite) {
804		u64 old = md->prev;
805
806		perf_mmap__write_tail(md, old);
807	}
808
809	if (atomic_read(&md->refcnt) == 1 && perf_mmap__empty(md))
810		perf_evlist__mmap_put(evlist, idx);
811}
812
813int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused,
814			       struct auxtrace_mmap_params *mp __maybe_unused,
815			       void *userpg __maybe_unused,
816			       int fd __maybe_unused)
817{
818	return 0;
819}
820
821void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused)
822{
823}
824
825void __weak auxtrace_mmap_params__init(
826			struct auxtrace_mmap_params *mp __maybe_unused,
827			off_t auxtrace_offset __maybe_unused,
828			unsigned int auxtrace_pages __maybe_unused,
829			bool auxtrace_overwrite __maybe_unused)
830{
831}
832
833void __weak auxtrace_mmap_params__set_idx(
834			struct auxtrace_mmap_params *mp __maybe_unused,
835			struct perf_evlist *evlist __maybe_unused,
836			int idx __maybe_unused,
837			bool per_cpu __maybe_unused)
838{
839}
840
841static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx)
842{
843	if (evlist->mmap[idx].base != NULL) {
844		munmap(evlist->mmap[idx].base, evlist->mmap_len);
845		evlist->mmap[idx].base = NULL;
846		atomic_set(&evlist->mmap[idx].refcnt, 0);
847	}
848	auxtrace_mmap__munmap(&evlist->mmap[idx].auxtrace_mmap);
849}
850
851void perf_evlist__munmap(struct perf_evlist *evlist)
852{
853	int i;
854
855	if (evlist->mmap == NULL)
856		return;
857
858	for (i = 0; i < evlist->nr_mmaps; i++)
859		__perf_evlist__munmap(evlist, i);
860
861	zfree(&evlist->mmap);
862}
863
864static int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
865{
866	evlist->nr_mmaps = cpu_map__nr(evlist->cpus);
867	if (cpu_map__empty(evlist->cpus))
868		evlist->nr_mmaps = thread_map__nr(evlist->threads);
869	evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
870	return evlist->mmap != NULL ? 0 : -ENOMEM;
871}
872
873struct mmap_params {
874	int prot;
875	int mask;
876	struct auxtrace_mmap_params auxtrace_mp;
877};
878
879static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
880			       struct mmap_params *mp, int fd)
881{
882	/*
883	 * The last one will be done at perf_evlist__mmap_consume(), so that we
884	 * make sure we don't prevent tools from consuming every last event in
885	 * the ring buffer.
886	 *
887	 * I.e. we can get the POLLHUP meaning that the fd doesn't exist
888	 * anymore, but the last events for it are still in the ring buffer,
889	 * waiting to be consumed.
890	 *
891	 * Tools can chose to ignore this at their own discretion, but the
892	 * evlist layer can't just drop it when filtering events in
893	 * perf_evlist__filter_pollfd().
894	 */
895	atomic_set(&evlist->mmap[idx].refcnt, 2);
896	evlist->mmap[idx].prev = 0;
897	evlist->mmap[idx].mask = mp->mask;
898	evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot,
899				      MAP_SHARED, fd, 0);
900	if (evlist->mmap[idx].base == MAP_FAILED) {
901		pr_debug2("failed to mmap perf event ring buffer, error %d\n",
902			  errno);
903		evlist->mmap[idx].base = NULL;
904		return -1;
905	}
906
907	if (auxtrace_mmap__mmap(&evlist->mmap[idx].auxtrace_mmap,
908				&mp->auxtrace_mp, evlist->mmap[idx].base, fd))
909		return -1;
910
911	return 0;
912}
913
914static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
915				       struct mmap_params *mp, int cpu,
916				       int thread, int *output)
917{
918	struct perf_evsel *evsel;
919
920	evlist__for_each(evlist, evsel) {
921		int fd;
922
923		if (evsel->system_wide && thread)
924			continue;
925
926		fd = FD(evsel, cpu, thread);
927
928		if (*output == -1) {
929			*output = fd;
930			if (__perf_evlist__mmap(evlist, idx, mp, *output) < 0)
931				return -1;
932		} else {
933			if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
934				return -1;
935
936			perf_evlist__mmap_get(evlist, idx);
937		}
938
939		/*
940		 * The system_wide flag causes a selected event to be opened
941		 * always without a pid.  Consequently it will never get a
942		 * POLLHUP, but it is used for tracking in combination with
943		 * other events, so it should not need to be polled anyway.
944		 * Therefore don't add it for polling.
945		 */
946		if (!evsel->system_wide &&
947		    __perf_evlist__add_pollfd(evlist, fd, idx) < 0) {
948			perf_evlist__mmap_put(evlist, idx);
949			return -1;
950		}
951
952		if (evsel->attr.read_format & PERF_FORMAT_ID) {
953			if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread,
954						   fd) < 0)
955				return -1;
956			perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
957						 thread);
958		}
959	}
960
961	return 0;
962}
963
964static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist,
965				     struct mmap_params *mp)
966{
967	int cpu, thread;
968	int nr_cpus = cpu_map__nr(evlist->cpus);
969	int nr_threads = thread_map__nr(evlist->threads);
970
971	pr_debug2("perf event ring buffer mmapped per cpu\n");
972	for (cpu = 0; cpu < nr_cpus; cpu++) {
973		int output = -1;
974
975		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
976					      true);
977
978		for (thread = 0; thread < nr_threads; thread++) {
979			if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
980							thread, &output))
981				goto out_unmap;
982		}
983	}
984
985	return 0;
986
987out_unmap:
988	for (cpu = 0; cpu < nr_cpus; cpu++)
989		__perf_evlist__munmap(evlist, cpu);
990	return -1;
991}
992
993static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist,
994					struct mmap_params *mp)
995{
996	int thread;
997	int nr_threads = thread_map__nr(evlist->threads);
998
999	pr_debug2("perf event ring buffer mmapped per thread\n");
1000	for (thread = 0; thread < nr_threads; thread++) {
1001		int output = -1;
1002
1003		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
1004					      false);
1005
1006		if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
1007						&output))
1008			goto out_unmap;
1009	}
1010
1011	return 0;
1012
1013out_unmap:
1014	for (thread = 0; thread < nr_threads; thread++)
1015		__perf_evlist__munmap(evlist, thread);
1016	return -1;
1017}
1018
1019static size_t perf_evlist__mmap_size(unsigned long pages)
1020{
1021	if (pages == UINT_MAX) {
1022		int max;
1023
1024		if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
1025			/*
1026			 * Pick a once upon a time good value, i.e. things look
1027			 * strange since we can't read a sysctl value, but lets not
1028			 * die yet...
1029			 */
1030			max = 512;
1031		} else {
1032			max -= (page_size / 1024);
1033		}
1034
1035		pages = (max * 1024) / page_size;
1036		if (!is_power_of_2(pages))
1037			pages = rounddown_pow_of_two(pages);
1038	} else if (!is_power_of_2(pages))
1039		return 0;
1040
1041	return (pages + 1) * page_size;
1042}
1043
1044static long parse_pages_arg(const char *str, unsigned long min,
1045			    unsigned long max)
1046{
1047	unsigned long pages, val;
1048	static struct parse_tag tags[] = {
1049		{ .tag  = 'B', .mult = 1       },
1050		{ .tag  = 'K', .mult = 1 << 10 },
1051		{ .tag  = 'M', .mult = 1 << 20 },
1052		{ .tag  = 'G', .mult = 1 << 30 },
1053		{ .tag  = 0 },
1054	};
1055
1056	if (str == NULL)
1057		return -EINVAL;
1058
1059	val = parse_tag_value(str, tags);
1060	if (val != (unsigned long) -1) {
1061		/* we got file size value */
1062		pages = PERF_ALIGN(val, page_size) / page_size;
1063	} else {
1064		/* we got pages count value */
1065		char *eptr;
1066		pages = strtoul(str, &eptr, 10);
1067		if (*eptr != '\0')
1068			return -EINVAL;
1069	}
1070
1071	if (pages == 0 && min == 0) {
1072		/* leave number of pages at 0 */
1073	} else if (!is_power_of_2(pages)) {
1074		/* round pages up to next power of 2 */
1075		pages = roundup_pow_of_two(pages);
1076		if (!pages)
1077			return -EINVAL;
1078		pr_info("rounding mmap pages size to %lu bytes (%lu pages)\n",
1079			pages * page_size, pages);
1080	}
1081
1082	if (pages > max)
1083		return -EINVAL;
1084
1085	return pages;
1086}
1087
1088int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
1089{
1090	unsigned long max = UINT_MAX;
1091	long pages;
1092
1093	if (max > SIZE_MAX / page_size)
1094		max = SIZE_MAX / page_size;
1095
1096	pages = parse_pages_arg(str, 1, max);
1097	if (pages < 0) {
1098		pr_err("Invalid argument for --mmap_pages/-m\n");
1099		return -1;
1100	}
1101
1102	*mmap_pages = pages;
1103	return 0;
1104}
1105
1106int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
1107				  int unset __maybe_unused)
1108{
1109	return __perf_evlist__parse_mmap_pages(opt->value, str);
1110}
1111
1112/**
1113 * perf_evlist__mmap_ex - Create mmaps to receive events.
1114 * @evlist: list of events
1115 * @pages: map length in pages
1116 * @overwrite: overwrite older events?
1117 * @auxtrace_pages - auxtrace map length in pages
1118 * @auxtrace_overwrite - overwrite older auxtrace data?
1119 *
1120 * If @overwrite is %false the user needs to signal event consumption using
1121 * perf_mmap__write_tail().  Using perf_evlist__mmap_read() does this
1122 * automatically.
1123 *
1124 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
1125 * consumption using auxtrace_mmap__write_tail().
1126 *
1127 * Return: %0 on success, negative error code otherwise.
1128 */
1129int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
1130			 bool overwrite, unsigned int auxtrace_pages,
1131			 bool auxtrace_overwrite)
1132{
1133	struct perf_evsel *evsel;
1134	const struct cpu_map *cpus = evlist->cpus;
1135	const struct thread_map *threads = evlist->threads;
1136	struct mmap_params mp = {
1137		.prot = PROT_READ | (overwrite ? 0 : PROT_WRITE),
1138	};
1139
1140	if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0)
1141		return -ENOMEM;
1142
1143	if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
1144		return -ENOMEM;
1145
1146	evlist->overwrite = overwrite;
1147	evlist->mmap_len = perf_evlist__mmap_size(pages);
1148	pr_debug("mmap size %zuB\n", evlist->mmap_len);
1149	mp.mask = evlist->mmap_len - page_size - 1;
1150
1151	auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len,
1152				   auxtrace_pages, auxtrace_overwrite);
1153
1154	evlist__for_each(evlist, evsel) {
1155		if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
1156		    evsel->sample_id == NULL &&
1157		    perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0)
1158			return -ENOMEM;
1159	}
1160
1161	if (cpu_map__empty(cpus))
1162		return perf_evlist__mmap_per_thread(evlist, &mp);
1163
1164	return perf_evlist__mmap_per_cpu(evlist, &mp);
1165}
1166
1167int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
1168		      bool overwrite)
1169{
1170	return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false);
1171}
1172
1173int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
1174{
1175	struct cpu_map *cpus;
1176	struct thread_map *threads;
1177
1178	threads = thread_map__new_str(target->pid, target->tid, target->uid);
1179
1180	if (!threads)
1181		return -1;
1182
1183	if (target__uses_dummy_map(target))
1184		cpus = cpu_map__dummy_new();
1185	else
1186		cpus = cpu_map__new(target->cpu_list);
1187
1188	if (!cpus)
1189		goto out_delete_threads;
1190
1191	evlist->has_user_cpus = !!target->cpu_list;
1192
1193	perf_evlist__set_maps(evlist, cpus, threads);
1194
1195	return 0;
1196
1197out_delete_threads:
1198	thread_map__put(threads);
1199	return -1;
1200}
1201
1202void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus,
1203			   struct thread_map *threads)
1204{
1205	/*
1206	 * Allow for the possibility that one or another of the maps isn't being
1207	 * changed i.e. don't put it.  Note we are assuming the maps that are
1208	 * being applied are brand new and evlist is taking ownership of the
1209	 * original reference count of 1.  If that is not the case it is up to
1210	 * the caller to increase the reference count.
1211	 */
1212	if (cpus != evlist->cpus) {
1213		cpu_map__put(evlist->cpus);
1214		evlist->cpus = cpu_map__get(cpus);
1215	}
1216
1217	if (threads != evlist->threads) {
1218		thread_map__put(evlist->threads);
1219		evlist->threads = thread_map__get(threads);
1220	}
1221
1222	perf_evlist__propagate_maps(evlist);
1223}
1224
1225int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel)
1226{
1227	struct perf_evsel *evsel;
1228	int err = 0;
1229	const int ncpus = cpu_map__nr(evlist->cpus),
1230		  nthreads = thread_map__nr(evlist->threads);
1231
1232	evlist__for_each(evlist, evsel) {
1233		if (evsel->filter == NULL)
1234			continue;
1235
1236		/*
1237		 * filters only work for tracepoint event, which doesn't have cpu limit.
1238		 * So evlist and evsel should always be same.
1239		 */
1240		err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter);
1241		if (err) {
1242			*err_evsel = evsel;
1243			break;
1244		}
1245	}
1246
1247	return err;
1248}
1249
1250int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter)
1251{
1252	struct perf_evsel *evsel;
1253	int err = 0;
1254
1255	evlist__for_each(evlist, evsel) {
1256		err = perf_evsel__set_filter(evsel, filter);
1257		if (err)
1258			break;
1259	}
1260
1261	return err;
1262}
1263
1264int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids)
1265{
1266	char *filter;
1267	int ret = -1;
1268	size_t i;
1269
1270	for (i = 0; i < npids; ++i) {
1271		if (i == 0) {
1272			if (asprintf(&filter, "common_pid != %d", pids[i]) < 0)
1273				return -1;
1274		} else {
1275			char *tmp;
1276
1277			if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0)
1278				goto out_free;
1279
1280			free(filter);
1281			filter = tmp;
1282		}
1283	}
1284
1285	ret = perf_evlist__set_filter(evlist, filter);
1286out_free:
1287	free(filter);
1288	return ret;
1289}
1290
1291int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid)
1292{
1293	return perf_evlist__set_filter_pids(evlist, 1, &pid);
1294}
1295
1296bool perf_evlist__valid_sample_type(struct perf_evlist *evlist)
1297{
1298	struct perf_evsel *pos;
1299
1300	if (evlist->nr_entries == 1)
1301		return true;
1302
1303	if (evlist->id_pos < 0 || evlist->is_pos < 0)
1304		return false;
1305
1306	evlist__for_each(evlist, pos) {
1307		if (pos->id_pos != evlist->id_pos ||
1308		    pos->is_pos != evlist->is_pos)
1309			return false;
1310	}
1311
1312	return true;
1313}
1314
1315u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist)
1316{
1317	struct perf_evsel *evsel;
1318
1319	if (evlist->combined_sample_type)
1320		return evlist->combined_sample_type;
1321
1322	evlist__for_each(evlist, evsel)
1323		evlist->combined_sample_type |= evsel->attr.sample_type;
1324
1325	return evlist->combined_sample_type;
1326}
1327
1328u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist)
1329{
1330	evlist->combined_sample_type = 0;
1331	return __perf_evlist__combined_sample_type(evlist);
1332}
1333
1334u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist)
1335{
1336	struct perf_evsel *evsel;
1337	u64 branch_type = 0;
1338
1339	evlist__for_each(evlist, evsel)
1340		branch_type |= evsel->attr.branch_sample_type;
1341	return branch_type;
1342}
1343
1344bool perf_evlist__valid_read_format(struct perf_evlist *evlist)
1345{
1346	struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
1347	u64 read_format = first->attr.read_format;
1348	u64 sample_type = first->attr.sample_type;
1349
1350	evlist__for_each(evlist, pos) {
1351		if (read_format != pos->attr.read_format)
1352			return false;
1353	}
1354
1355	/* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
1356	if ((sample_type & PERF_SAMPLE_READ) &&
1357	    !(read_format & PERF_FORMAT_ID)) {
1358		return false;
1359	}
1360
1361	return true;
1362}
1363
1364u64 perf_evlist__read_format(struct perf_evlist *evlist)
1365{
1366	struct perf_evsel *first = perf_evlist__first(evlist);
1367	return first->attr.read_format;
1368}
1369
1370u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist)
1371{
1372	struct perf_evsel *first = perf_evlist__first(evlist);
1373	struct perf_sample *data;
1374	u64 sample_type;
1375	u16 size = 0;
1376
1377	if (!first->attr.sample_id_all)
1378		goto out;
1379
1380	sample_type = first->attr.sample_type;
1381
1382	if (sample_type & PERF_SAMPLE_TID)
1383		size += sizeof(data->tid) * 2;
1384
1385       if (sample_type & PERF_SAMPLE_TIME)
1386		size += sizeof(data->time);
1387
1388	if (sample_type & PERF_SAMPLE_ID)
1389		size += sizeof(data->id);
1390
1391	if (sample_type & PERF_SAMPLE_STREAM_ID)
1392		size += sizeof(data->stream_id);
1393
1394	if (sample_type & PERF_SAMPLE_CPU)
1395		size += sizeof(data->cpu) * 2;
1396
1397	if (sample_type & PERF_SAMPLE_IDENTIFIER)
1398		size += sizeof(data->id);
1399out:
1400	return size;
1401}
1402
1403bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist)
1404{
1405	struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
1406
1407	evlist__for_each_continue(evlist, pos) {
1408		if (first->attr.sample_id_all != pos->attr.sample_id_all)
1409			return false;
1410	}
1411
1412	return true;
1413}
1414
1415bool perf_evlist__sample_id_all(struct perf_evlist *evlist)
1416{
1417	struct perf_evsel *first = perf_evlist__first(evlist);
1418	return first->attr.sample_id_all;
1419}
1420
1421void perf_evlist__set_selected(struct perf_evlist *evlist,
1422			       struct perf_evsel *evsel)
1423{
1424	evlist->selected = evsel;
1425}
1426
1427void perf_evlist__close(struct perf_evlist *evlist)
1428{
1429	struct perf_evsel *evsel;
1430	int ncpus = cpu_map__nr(evlist->cpus);
1431	int nthreads = thread_map__nr(evlist->threads);
1432	int n;
1433
1434	evlist__for_each_reverse(evlist, evsel) {
1435		n = evsel->cpus ? evsel->cpus->nr : ncpus;
1436		perf_evsel__close(evsel, n, nthreads);
1437	}
1438}
1439
1440static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist)
1441{
1442	struct cpu_map	  *cpus;
1443	struct thread_map *threads;
1444	int err = -ENOMEM;
1445
1446	/*
1447	 * Try reading /sys/devices/system/cpu/online to get
1448	 * an all cpus map.
1449	 *
1450	 * FIXME: -ENOMEM is the best we can do here, the cpu_map
1451	 * code needs an overhaul to properly forward the
1452	 * error, and we may not want to do that fallback to a
1453	 * default cpu identity map :-\
1454	 */
1455	cpus = cpu_map__new(NULL);
1456	if (!cpus)
1457		goto out;
1458
1459	threads = thread_map__new_dummy();
1460	if (!threads)
1461		goto out_put;
1462
1463	perf_evlist__set_maps(evlist, cpus, threads);
1464out:
1465	return err;
1466out_put:
1467	cpu_map__put(cpus);
1468	goto out;
1469}
1470
1471int perf_evlist__open(struct perf_evlist *evlist)
1472{
1473	struct perf_evsel *evsel;
1474	int err;
1475
1476	/*
1477	 * Default: one fd per CPU, all threads, aka systemwide
1478	 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL
1479	 */
1480	if (evlist->threads == NULL && evlist->cpus == NULL) {
1481		err = perf_evlist__create_syswide_maps(evlist);
1482		if (err < 0)
1483			goto out_err;
1484	}
1485
1486	perf_evlist__update_id_pos(evlist);
1487
1488	evlist__for_each(evlist, evsel) {
1489		err = perf_evsel__open(evsel, evlist->cpus, evlist->threads);
1490		if (err < 0)
1491			goto out_err;
1492	}
1493
1494	return 0;
1495out_err:
1496	perf_evlist__close(evlist);
1497	errno = -err;
1498	return err;
1499}
1500
1501int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target,
1502				  const char *argv[], bool pipe_output,
1503				  void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
1504{
1505	int child_ready_pipe[2], go_pipe[2];
1506	char bf;
1507
1508	if (pipe(child_ready_pipe) < 0) {
1509		perror("failed to create 'ready' pipe");
1510		return -1;
1511	}
1512
1513	if (pipe(go_pipe) < 0) {
1514		perror("failed to create 'go' pipe");
1515		goto out_close_ready_pipe;
1516	}
1517
1518	evlist->workload.pid = fork();
1519	if (evlist->workload.pid < 0) {
1520		perror("failed to fork");
1521		goto out_close_pipes;
1522	}
1523
1524	if (!evlist->workload.pid) {
1525		int ret;
1526
1527		if (pipe_output)
1528			dup2(2, 1);
1529
1530		signal(SIGTERM, SIG_DFL);
1531
1532		close(child_ready_pipe[0]);
1533		close(go_pipe[1]);
1534		fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
1535
1536		/*
1537		 * Tell the parent we're ready to go
1538		 */
1539		close(child_ready_pipe[1]);
1540
1541		/*
1542		 * Wait until the parent tells us to go.
1543		 */
1544		ret = read(go_pipe[0], &bf, 1);
1545		/*
1546		 * The parent will ask for the execvp() to be performed by
1547		 * writing exactly one byte, in workload.cork_fd, usually via
1548		 * perf_evlist__start_workload().
1549		 *
1550		 * For cancelling the workload without actually running it,
1551		 * the parent will just close workload.cork_fd, without writing
1552		 * anything, i.e. read will return zero and we just exit()
1553		 * here.
1554		 */
1555		if (ret != 1) {
1556			if (ret == -1)
1557				perror("unable to read pipe");
1558			exit(ret);
1559		}
1560
1561		execvp(argv[0], (char **)argv);
1562
1563		if (exec_error) {
1564			union sigval val;
1565
1566			val.sival_int = errno;
1567			if (sigqueue(getppid(), SIGUSR1, val))
1568				perror(argv[0]);
1569		} else
1570			perror(argv[0]);
1571		exit(-1);
1572	}
1573
1574	if (exec_error) {
1575		struct sigaction act = {
1576			.sa_flags     = SA_SIGINFO,
1577			.sa_sigaction = exec_error,
1578		};
1579		sigaction(SIGUSR1, &act, NULL);
1580	}
1581
1582	if (target__none(target)) {
1583		if (evlist->threads == NULL) {
1584			fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n",
1585				__func__, __LINE__);
1586			goto out_close_pipes;
1587		}
1588		thread_map__set_pid(evlist->threads, 0, evlist->workload.pid);
1589	}
1590
1591	close(child_ready_pipe[1]);
1592	close(go_pipe[0]);
1593	/*
1594	 * wait for child to settle
1595	 */
1596	if (read(child_ready_pipe[0], &bf, 1) == -1) {
1597		perror("unable to read pipe");
1598		goto out_close_pipes;
1599	}
1600
1601	fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC);
1602	evlist->workload.cork_fd = go_pipe[1];
1603	close(child_ready_pipe[0]);
1604	return 0;
1605
1606out_close_pipes:
1607	close(go_pipe[0]);
1608	close(go_pipe[1]);
1609out_close_ready_pipe:
1610	close(child_ready_pipe[0]);
1611	close(child_ready_pipe[1]);
1612	return -1;
1613}
1614
1615int perf_evlist__start_workload(struct perf_evlist *evlist)
1616{
1617	if (evlist->workload.cork_fd > 0) {
1618		char bf = 0;
1619		int ret;
1620		/*
1621		 * Remove the cork, let it rip!
1622		 */
1623		ret = write(evlist->workload.cork_fd, &bf, 1);
1624		if (ret < 0)
1625			perror("enable to write to pipe");
1626
1627		close(evlist->workload.cork_fd);
1628		return ret;
1629	}
1630
1631	return 0;
1632}
1633
1634int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event,
1635			      struct perf_sample *sample)
1636{
1637	struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event);
1638
1639	if (!evsel)
1640		return -EFAULT;
1641	return perf_evsel__parse_sample(evsel, event, sample);
1642}
1643
1644size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp)
1645{
1646	struct perf_evsel *evsel;
1647	size_t printed = 0;
1648
1649	evlist__for_each(evlist, evsel) {
1650		printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "",
1651				   perf_evsel__name(evsel));
1652	}
1653
1654	return printed + fprintf(fp, "\n");
1655}
1656
1657int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused,
1658			       int err, char *buf, size_t size)
1659{
1660	int printed, value;
1661	char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf));
1662
1663	switch (err) {
1664	case EACCES:
1665	case EPERM:
1666		printed = scnprintf(buf, size,
1667				    "Error:\t%s.\n"
1668				    "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg);
1669
1670		value = perf_event_paranoid();
1671
1672		printed += scnprintf(buf + printed, size - printed, "\nHint:\t");
1673
1674		if (value >= 2) {
1675			printed += scnprintf(buf + printed, size - printed,
1676					     "For your workloads it needs to be <= 1\nHint:\t");
1677		}
1678		printed += scnprintf(buf + printed, size - printed,
1679				     "For system wide tracing it needs to be set to -1.\n");
1680
1681		printed += scnprintf(buf + printed, size - printed,
1682				    "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
1683				    "Hint:\tThe current value is %d.", value);
1684		break;
1685	default:
1686		scnprintf(buf, size, "%s", emsg);
1687		break;
1688	}
1689
1690	return 0;
1691}
1692
1693int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size)
1694{
1695	char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf));
1696	int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0;
1697
1698	switch (err) {
1699	case EPERM:
1700		sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user);
1701		printed += scnprintf(buf + printed, size - printed,
1702				     "Error:\t%s.\n"
1703				     "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n"
1704				     "Hint:\tTried using %zd kB.\n",
1705				     emsg, pages_max_per_user, pages_attempted);
1706
1707		if (pages_attempted >= pages_max_per_user) {
1708			printed += scnprintf(buf + printed, size - printed,
1709					     "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n",
1710					     pages_max_per_user + pages_attempted);
1711		}
1712
1713		printed += scnprintf(buf + printed, size - printed,
1714				     "Hint:\tTry using a smaller -m/--mmap-pages value.");
1715		break;
1716	default:
1717		scnprintf(buf, size, "%s", emsg);
1718		break;
1719	}
1720
1721	return 0;
1722}
1723
1724void perf_evlist__to_front(struct perf_evlist *evlist,
1725			   struct perf_evsel *move_evsel)
1726{
1727	struct perf_evsel *evsel, *n;
1728	LIST_HEAD(move);
1729
1730	if (move_evsel == perf_evlist__first(evlist))
1731		return;
1732
1733	evlist__for_each_safe(evlist, n, evsel) {
1734		if (evsel->leader == move_evsel->leader)
1735			list_move_tail(&evsel->node, &move);
1736	}
1737
1738	list_splice(&move, &evlist->entries);
1739}
1740
1741void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
1742				     struct perf_evsel *tracking_evsel)
1743{
1744	struct perf_evsel *evsel;
1745
1746	if (tracking_evsel->tracking)
1747		return;
1748
1749	evlist__for_each(evlist, evsel) {
1750		if (evsel != tracking_evsel)
1751			evsel->tracking = false;
1752	}
1753
1754	tracking_evsel->tracking = true;
1755}
1756