1#include "builtin.h"
2#include "perf.h"
3
4#include "util/evlist.h"
5#include "util/evsel.h"
6#include "util/util.h"
7#include "util/cache.h"
8#include "util/symbol.h"
9#include "util/thread.h"
10#include "util/header.h"
11#include "util/session.h"
12#include "util/tool.h"
13#include "util/callchain.h"
14
15#include "util/parse-options.h"
16#include "util/trace-event.h"
17#include "util/data.h"
18#include "util/cpumap.h"
19
20#include "util/debug.h"
21
22#include <linux/rbtree.h>
23#include <linux/string.h>
24#include <locale.h>
25#include <regex.h>
26
27static int	kmem_slab;
28static int	kmem_page;
29
30static long	kmem_page_size;
31static enum {
32	KMEM_SLAB,
33	KMEM_PAGE,
34} kmem_default = KMEM_SLAB;  /* for backward compatibility */
35
36struct alloc_stat;
37typedef int (*sort_fn_t)(void *, void *);
38
39static int			alloc_flag;
40static int			caller_flag;
41
42static int			alloc_lines = -1;
43static int			caller_lines = -1;
44
45static bool			raw_ip;
46
47struct alloc_stat {
48	u64	call_site;
49	u64	ptr;
50	u64	bytes_req;
51	u64	bytes_alloc;
52	u32	hit;
53	u32	pingpong;
54
55	short	alloc_cpu;
56
57	struct rb_node node;
58};
59
60static struct rb_root root_alloc_stat;
61static struct rb_root root_alloc_sorted;
62static struct rb_root root_caller_stat;
63static struct rb_root root_caller_sorted;
64
65static unsigned long total_requested, total_allocated;
66static unsigned long nr_allocs, nr_cross_allocs;
67
68static int insert_alloc_stat(unsigned long call_site, unsigned long ptr,
69			     int bytes_req, int bytes_alloc, int cpu)
70{
71	struct rb_node **node = &root_alloc_stat.rb_node;
72	struct rb_node *parent = NULL;
73	struct alloc_stat *data = NULL;
74
75	while (*node) {
76		parent = *node;
77		data = rb_entry(*node, struct alloc_stat, node);
78
79		if (ptr > data->ptr)
80			node = &(*node)->rb_right;
81		else if (ptr < data->ptr)
82			node = &(*node)->rb_left;
83		else
84			break;
85	}
86
87	if (data && data->ptr == ptr) {
88		data->hit++;
89		data->bytes_req += bytes_req;
90		data->bytes_alloc += bytes_alloc;
91	} else {
92		data = malloc(sizeof(*data));
93		if (!data) {
94			pr_err("%s: malloc failed\n", __func__);
95			return -1;
96		}
97		data->ptr = ptr;
98		data->pingpong = 0;
99		data->hit = 1;
100		data->bytes_req = bytes_req;
101		data->bytes_alloc = bytes_alloc;
102
103		rb_link_node(&data->node, parent, node);
104		rb_insert_color(&data->node, &root_alloc_stat);
105	}
106	data->call_site = call_site;
107	data->alloc_cpu = cpu;
108	return 0;
109}
110
111static int insert_caller_stat(unsigned long call_site,
112			      int bytes_req, int bytes_alloc)
113{
114	struct rb_node **node = &root_caller_stat.rb_node;
115	struct rb_node *parent = NULL;
116	struct alloc_stat *data = NULL;
117
118	while (*node) {
119		parent = *node;
120		data = rb_entry(*node, struct alloc_stat, node);
121
122		if (call_site > data->call_site)
123			node = &(*node)->rb_right;
124		else if (call_site < data->call_site)
125			node = &(*node)->rb_left;
126		else
127			break;
128	}
129
130	if (data && data->call_site == call_site) {
131		data->hit++;
132		data->bytes_req += bytes_req;
133		data->bytes_alloc += bytes_alloc;
134	} else {
135		data = malloc(sizeof(*data));
136		if (!data) {
137			pr_err("%s: malloc failed\n", __func__);
138			return -1;
139		}
140		data->call_site = call_site;
141		data->pingpong = 0;
142		data->hit = 1;
143		data->bytes_req = bytes_req;
144		data->bytes_alloc = bytes_alloc;
145
146		rb_link_node(&data->node, parent, node);
147		rb_insert_color(&data->node, &root_caller_stat);
148	}
149
150	return 0;
151}
152
153static int perf_evsel__process_alloc_event(struct perf_evsel *evsel,
154					   struct perf_sample *sample)
155{
156	unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"),
157		      call_site = perf_evsel__intval(evsel, sample, "call_site");
158	int bytes_req = perf_evsel__intval(evsel, sample, "bytes_req"),
159	    bytes_alloc = perf_evsel__intval(evsel, sample, "bytes_alloc");
160
161	if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, sample->cpu) ||
162	    insert_caller_stat(call_site, bytes_req, bytes_alloc))
163		return -1;
164
165	total_requested += bytes_req;
166	total_allocated += bytes_alloc;
167
168	nr_allocs++;
169	return 0;
170}
171
172static int perf_evsel__process_alloc_node_event(struct perf_evsel *evsel,
173						struct perf_sample *sample)
174{
175	int ret = perf_evsel__process_alloc_event(evsel, sample);
176
177	if (!ret) {
178		int node1 = cpu__get_node(sample->cpu),
179		    node2 = perf_evsel__intval(evsel, sample, "node");
180
181		if (node1 != node2)
182			nr_cross_allocs++;
183	}
184
185	return ret;
186}
187
188static int ptr_cmp(void *, void *);
189static int slab_callsite_cmp(void *, void *);
190
191static struct alloc_stat *search_alloc_stat(unsigned long ptr,
192					    unsigned long call_site,
193					    struct rb_root *root,
194					    sort_fn_t sort_fn)
195{
196	struct rb_node *node = root->rb_node;
197	struct alloc_stat key = { .ptr = ptr, .call_site = call_site };
198
199	while (node) {
200		struct alloc_stat *data;
201		int cmp;
202
203		data = rb_entry(node, struct alloc_stat, node);
204
205		cmp = sort_fn(&key, data);
206		if (cmp < 0)
207			node = node->rb_left;
208		else if (cmp > 0)
209			node = node->rb_right;
210		else
211			return data;
212	}
213	return NULL;
214}
215
216static int perf_evsel__process_free_event(struct perf_evsel *evsel,
217					  struct perf_sample *sample)
218{
219	unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr");
220	struct alloc_stat *s_alloc, *s_caller;
221
222	s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
223	if (!s_alloc)
224		return 0;
225
226	if ((short)sample->cpu != s_alloc->alloc_cpu) {
227		s_alloc->pingpong++;
228
229		s_caller = search_alloc_stat(0, s_alloc->call_site,
230					     &root_caller_stat,
231					     slab_callsite_cmp);
232		if (!s_caller)
233			return -1;
234		s_caller->pingpong++;
235	}
236	s_alloc->alloc_cpu = -1;
237
238	return 0;
239}
240
241static u64 total_page_alloc_bytes;
242static u64 total_page_free_bytes;
243static u64 total_page_nomatch_bytes;
244static u64 total_page_fail_bytes;
245static unsigned long nr_page_allocs;
246static unsigned long nr_page_frees;
247static unsigned long nr_page_fails;
248static unsigned long nr_page_nomatch;
249
250static bool use_pfn;
251static bool live_page;
252static struct perf_session *kmem_session;
253
254#define MAX_MIGRATE_TYPES  6
255#define MAX_PAGE_ORDER     11
256
257static int order_stats[MAX_PAGE_ORDER][MAX_MIGRATE_TYPES];
258
259struct page_stat {
260	struct rb_node 	node;
261	u64 		page;
262	u64 		callsite;
263	int 		order;
264	unsigned 	gfp_flags;
265	unsigned 	migrate_type;
266	u64		alloc_bytes;
267	u64 		free_bytes;
268	int 		nr_alloc;
269	int 		nr_free;
270};
271
272static struct rb_root page_live_tree;
273static struct rb_root page_alloc_tree;
274static struct rb_root page_alloc_sorted;
275static struct rb_root page_caller_tree;
276static struct rb_root page_caller_sorted;
277
278struct alloc_func {
279	u64 start;
280	u64 end;
281	char *name;
282};
283
284static int nr_alloc_funcs;
285static struct alloc_func *alloc_func_list;
286
287static int funcmp(const void *a, const void *b)
288{
289	const struct alloc_func *fa = a;
290	const struct alloc_func *fb = b;
291
292	if (fa->start > fb->start)
293		return 1;
294	else
295		return -1;
296}
297
298static int callcmp(const void *a, const void *b)
299{
300	const struct alloc_func *fa = a;
301	const struct alloc_func *fb = b;
302
303	if (fb->start <= fa->start && fa->end < fb->end)
304		return 0;
305
306	if (fa->start > fb->start)
307		return 1;
308	else
309		return -1;
310}
311
312static int build_alloc_func_list(void)
313{
314	int ret;
315	struct map *kernel_map;
316	struct symbol *sym;
317	struct rb_node *node;
318	struct alloc_func *func;
319	struct machine *machine = &kmem_session->machines.host;
320	regex_t alloc_func_regex;
321	const char pattern[] = "^_?_?(alloc|get_free|get_zeroed)_pages?";
322
323	ret = regcomp(&alloc_func_regex, pattern, REG_EXTENDED);
324	if (ret) {
325		char err[BUFSIZ];
326
327		regerror(ret, &alloc_func_regex, err, sizeof(err));
328		pr_err("Invalid regex: %s\n%s", pattern, err);
329		return -EINVAL;
330	}
331
332	kernel_map = machine__kernel_map(machine);
333	if (map__load(kernel_map, NULL) < 0) {
334		pr_err("cannot load kernel map\n");
335		return -ENOENT;
336	}
337
338	map__for_each_symbol(kernel_map, sym, node) {
339		if (regexec(&alloc_func_regex, sym->name, 0, NULL, 0))
340			continue;
341
342		func = realloc(alloc_func_list,
343			       (nr_alloc_funcs + 1) * sizeof(*func));
344		if (func == NULL)
345			return -ENOMEM;
346
347		pr_debug("alloc func: %s\n", sym->name);
348		func[nr_alloc_funcs].start = sym->start;
349		func[nr_alloc_funcs].end   = sym->end;
350		func[nr_alloc_funcs].name  = sym->name;
351
352		alloc_func_list = func;
353		nr_alloc_funcs++;
354	}
355
356	qsort(alloc_func_list, nr_alloc_funcs, sizeof(*func), funcmp);
357
358	regfree(&alloc_func_regex);
359	return 0;
360}
361
362/*
363 * Find first non-memory allocation function from callchain.
364 * The allocation functions are in the 'alloc_func_list'.
365 */
366static u64 find_callsite(struct perf_evsel *evsel, struct perf_sample *sample)
367{
368	struct addr_location al;
369	struct machine *machine = &kmem_session->machines.host;
370	struct callchain_cursor_node *node;
371
372	if (alloc_func_list == NULL) {
373		if (build_alloc_func_list() < 0)
374			goto out;
375	}
376
377	al.thread = machine__findnew_thread(machine, sample->pid, sample->tid);
378	sample__resolve_callchain(sample, NULL, evsel, &al, 16);
379
380	callchain_cursor_commit(&callchain_cursor);
381	while (true) {
382		struct alloc_func key, *caller;
383		u64 addr;
384
385		node = callchain_cursor_current(&callchain_cursor);
386		if (node == NULL)
387			break;
388
389		key.start = key.end = node->ip;
390		caller = bsearch(&key, alloc_func_list, nr_alloc_funcs,
391				 sizeof(key), callcmp);
392		if (!caller) {
393			/* found */
394			if (node->map)
395				addr = map__unmap_ip(node->map, node->ip);
396			else
397				addr = node->ip;
398
399			return addr;
400		} else
401			pr_debug3("skipping alloc function: %s\n", caller->name);
402
403		callchain_cursor_advance(&callchain_cursor);
404	}
405
406out:
407	pr_debug2("unknown callsite: %"PRIx64 "\n", sample->ip);
408	return sample->ip;
409}
410
411struct sort_dimension {
412	const char		name[20];
413	sort_fn_t		cmp;
414	struct list_head	list;
415};
416
417static LIST_HEAD(page_alloc_sort_input);
418static LIST_HEAD(page_caller_sort_input);
419
420static struct page_stat *
421__page_stat__findnew_page(struct page_stat *pstat, bool create)
422{
423	struct rb_node **node = &page_live_tree.rb_node;
424	struct rb_node *parent = NULL;
425	struct page_stat *data;
426
427	while (*node) {
428		s64 cmp;
429
430		parent = *node;
431		data = rb_entry(*node, struct page_stat, node);
432
433		cmp = data->page - pstat->page;
434		if (cmp < 0)
435			node = &parent->rb_left;
436		else if (cmp > 0)
437			node = &parent->rb_right;
438		else
439			return data;
440	}
441
442	if (!create)
443		return NULL;
444
445	data = zalloc(sizeof(*data));
446	if (data != NULL) {
447		data->page = pstat->page;
448		data->order = pstat->order;
449		data->gfp_flags = pstat->gfp_flags;
450		data->migrate_type = pstat->migrate_type;
451
452		rb_link_node(&data->node, parent, node);
453		rb_insert_color(&data->node, &page_live_tree);
454	}
455
456	return data;
457}
458
459static struct page_stat *page_stat__find_page(struct page_stat *pstat)
460{
461	return __page_stat__findnew_page(pstat, false);
462}
463
464static struct page_stat *page_stat__findnew_page(struct page_stat *pstat)
465{
466	return __page_stat__findnew_page(pstat, true);
467}
468
469static struct page_stat *
470__page_stat__findnew_alloc(struct page_stat *pstat, bool create)
471{
472	struct rb_node **node = &page_alloc_tree.rb_node;
473	struct rb_node *parent = NULL;
474	struct page_stat *data;
475	struct sort_dimension *sort;
476
477	while (*node) {
478		int cmp = 0;
479
480		parent = *node;
481		data = rb_entry(*node, struct page_stat, node);
482
483		list_for_each_entry(sort, &page_alloc_sort_input, list) {
484			cmp = sort->cmp(pstat, data);
485			if (cmp)
486				break;
487		}
488
489		if (cmp < 0)
490			node = &parent->rb_left;
491		else if (cmp > 0)
492			node = &parent->rb_right;
493		else
494			return data;
495	}
496
497	if (!create)
498		return NULL;
499
500	data = zalloc(sizeof(*data));
501	if (data != NULL) {
502		data->page = pstat->page;
503		data->order = pstat->order;
504		data->gfp_flags = pstat->gfp_flags;
505		data->migrate_type = pstat->migrate_type;
506
507		rb_link_node(&data->node, parent, node);
508		rb_insert_color(&data->node, &page_alloc_tree);
509	}
510
511	return data;
512}
513
514static struct page_stat *page_stat__find_alloc(struct page_stat *pstat)
515{
516	return __page_stat__findnew_alloc(pstat, false);
517}
518
519static struct page_stat *page_stat__findnew_alloc(struct page_stat *pstat)
520{
521	return __page_stat__findnew_alloc(pstat, true);
522}
523
524static struct page_stat *
525__page_stat__findnew_caller(struct page_stat *pstat, bool create)
526{
527	struct rb_node **node = &page_caller_tree.rb_node;
528	struct rb_node *parent = NULL;
529	struct page_stat *data;
530	struct sort_dimension *sort;
531
532	while (*node) {
533		int cmp = 0;
534
535		parent = *node;
536		data = rb_entry(*node, struct page_stat, node);
537
538		list_for_each_entry(sort, &page_caller_sort_input, list) {
539			cmp = sort->cmp(pstat, data);
540			if (cmp)
541				break;
542		}
543
544		if (cmp < 0)
545			node = &parent->rb_left;
546		else if (cmp > 0)
547			node = &parent->rb_right;
548		else
549			return data;
550	}
551
552	if (!create)
553		return NULL;
554
555	data = zalloc(sizeof(*data));
556	if (data != NULL) {
557		data->callsite = pstat->callsite;
558		data->order = pstat->order;
559		data->gfp_flags = pstat->gfp_flags;
560		data->migrate_type = pstat->migrate_type;
561
562		rb_link_node(&data->node, parent, node);
563		rb_insert_color(&data->node, &page_caller_tree);
564	}
565
566	return data;
567}
568
569static struct page_stat *page_stat__find_caller(struct page_stat *pstat)
570{
571	return __page_stat__findnew_caller(pstat, false);
572}
573
574static struct page_stat *page_stat__findnew_caller(struct page_stat *pstat)
575{
576	return __page_stat__findnew_caller(pstat, true);
577}
578
579static bool valid_page(u64 pfn_or_page)
580{
581	if (use_pfn && pfn_or_page == -1UL)
582		return false;
583	if (!use_pfn && pfn_or_page == 0)
584		return false;
585	return true;
586}
587
588struct gfp_flag {
589	unsigned int flags;
590	char *compact_str;
591	char *human_readable;
592};
593
594static struct gfp_flag *gfps;
595static int nr_gfps;
596
597static int gfpcmp(const void *a, const void *b)
598{
599	const struct gfp_flag *fa = a;
600	const struct gfp_flag *fb = b;
601
602	return fa->flags - fb->flags;
603}
604
605/* see include/trace/events/gfpflags.h */
606static const struct {
607	const char *original;
608	const char *compact;
609} gfp_compact_table[] = {
610	{ "GFP_TRANSHUGE",		"THP" },
611	{ "GFP_HIGHUSER_MOVABLE",	"HUM" },
612	{ "GFP_HIGHUSER",		"HU" },
613	{ "GFP_USER",			"U" },
614	{ "GFP_TEMPORARY",		"TMP" },
615	{ "GFP_KERNEL",			"K" },
616	{ "GFP_NOFS",			"NF" },
617	{ "GFP_ATOMIC",			"A" },
618	{ "GFP_NOIO",			"NI" },
619	{ "GFP_HIGH",			"H" },
620	{ "GFP_WAIT",			"W" },
621	{ "GFP_IO",			"I" },
622	{ "GFP_COLD",			"CO" },
623	{ "GFP_NOWARN",			"NWR" },
624	{ "GFP_REPEAT",			"R" },
625	{ "GFP_NOFAIL",			"NF" },
626	{ "GFP_NORETRY",		"NR" },
627	{ "GFP_COMP",			"C" },
628	{ "GFP_ZERO",			"Z" },
629	{ "GFP_NOMEMALLOC",		"NMA" },
630	{ "GFP_MEMALLOC",		"MA" },
631	{ "GFP_HARDWALL",		"HW" },
632	{ "GFP_THISNODE",		"TN" },
633	{ "GFP_RECLAIMABLE",		"RC" },
634	{ "GFP_MOVABLE",		"M" },
635	{ "GFP_NOTRACK",		"NT" },
636	{ "GFP_NO_KSWAPD",		"NK" },
637	{ "GFP_OTHER_NODE",		"ON" },
638	{ "GFP_NOWAIT",			"NW" },
639};
640
641static size_t max_gfp_len;
642
643static char *compact_gfp_flags(char *gfp_flags)
644{
645	char *orig_flags = strdup(gfp_flags);
646	char *new_flags = NULL;
647	char *str, *pos = NULL;
648	size_t len = 0;
649
650	if (orig_flags == NULL)
651		return NULL;
652
653	str = strtok_r(orig_flags, "|", &pos);
654	while (str) {
655		size_t i;
656		char *new;
657		const char *cpt;
658
659		for (i = 0; i < ARRAY_SIZE(gfp_compact_table); i++) {
660			if (strcmp(gfp_compact_table[i].original, str))
661				continue;
662
663			cpt = gfp_compact_table[i].compact;
664			new = realloc(new_flags, len + strlen(cpt) + 2);
665			if (new == NULL) {
666				free(new_flags);
667				return NULL;
668			}
669
670			new_flags = new;
671
672			if (!len) {
673				strcpy(new_flags, cpt);
674			} else {
675				strcat(new_flags, "|");
676				strcat(new_flags, cpt);
677				len++;
678			}
679
680			len += strlen(cpt);
681		}
682
683		str = strtok_r(NULL, "|", &pos);
684	}
685
686	if (max_gfp_len < len)
687		max_gfp_len = len;
688
689	free(orig_flags);
690	return new_flags;
691}
692
693static char *compact_gfp_string(unsigned long gfp_flags)
694{
695	struct gfp_flag key = {
696		.flags = gfp_flags,
697	};
698	struct gfp_flag *gfp;
699
700	gfp = bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp);
701	if (gfp)
702		return gfp->compact_str;
703
704	return NULL;
705}
706
707static int parse_gfp_flags(struct perf_evsel *evsel, struct perf_sample *sample,
708			   unsigned int gfp_flags)
709{
710	struct pevent_record record = {
711		.cpu = sample->cpu,
712		.data = sample->raw_data,
713		.size = sample->raw_size,
714	};
715	struct trace_seq seq;
716	char *str, *pos = NULL;
717
718	if (nr_gfps) {
719		struct gfp_flag key = {
720			.flags = gfp_flags,
721		};
722
723		if (bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp))
724			return 0;
725	}
726
727	trace_seq_init(&seq);
728	pevent_event_info(&seq, evsel->tp_format, &record);
729
730	str = strtok_r(seq.buffer, " ", &pos);
731	while (str) {
732		if (!strncmp(str, "gfp_flags=", 10)) {
733			struct gfp_flag *new;
734
735			new = realloc(gfps, (nr_gfps + 1) * sizeof(*gfps));
736			if (new == NULL)
737				return -ENOMEM;
738
739			gfps = new;
740			new += nr_gfps++;
741
742			new->flags = gfp_flags;
743			new->human_readable = strdup(str + 10);
744			new->compact_str = compact_gfp_flags(str + 10);
745			if (!new->human_readable || !new->compact_str)
746				return -ENOMEM;
747
748			qsort(gfps, nr_gfps, sizeof(*gfps), gfpcmp);
749		}
750
751		str = strtok_r(NULL, " ", &pos);
752	}
753
754	trace_seq_destroy(&seq);
755	return 0;
756}
757
758static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
759						struct perf_sample *sample)
760{
761	u64 page;
762	unsigned int order = perf_evsel__intval(evsel, sample, "order");
763	unsigned int gfp_flags = perf_evsel__intval(evsel, sample, "gfp_flags");
764	unsigned int migrate_type = perf_evsel__intval(evsel, sample,
765						       "migratetype");
766	u64 bytes = kmem_page_size << order;
767	u64 callsite;
768	struct page_stat *pstat;
769	struct page_stat this = {
770		.order = order,
771		.gfp_flags = gfp_flags,
772		.migrate_type = migrate_type,
773	};
774
775	if (use_pfn)
776		page = perf_evsel__intval(evsel, sample, "pfn");
777	else
778		page = perf_evsel__intval(evsel, sample, "page");
779
780	nr_page_allocs++;
781	total_page_alloc_bytes += bytes;
782
783	if (!valid_page(page)) {
784		nr_page_fails++;
785		total_page_fail_bytes += bytes;
786
787		return 0;
788	}
789
790	if (parse_gfp_flags(evsel, sample, gfp_flags) < 0)
791		return -1;
792
793	callsite = find_callsite(evsel, sample);
794
795	/*
796	 * This is to find the current page (with correct gfp flags and
797	 * migrate type) at free event.
798	 */
799	this.page = page;
800	pstat = page_stat__findnew_page(&this);
801	if (pstat == NULL)
802		return -ENOMEM;
803
804	pstat->nr_alloc++;
805	pstat->alloc_bytes += bytes;
806	pstat->callsite = callsite;
807
808	if (!live_page) {
809		pstat = page_stat__findnew_alloc(&this);
810		if (pstat == NULL)
811			return -ENOMEM;
812
813		pstat->nr_alloc++;
814		pstat->alloc_bytes += bytes;
815		pstat->callsite = callsite;
816	}
817
818	this.callsite = callsite;
819	pstat = page_stat__findnew_caller(&this);
820	if (pstat == NULL)
821		return -ENOMEM;
822
823	pstat->nr_alloc++;
824	pstat->alloc_bytes += bytes;
825
826	order_stats[order][migrate_type]++;
827
828	return 0;
829}
830
831static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
832						struct perf_sample *sample)
833{
834	u64 page;
835	unsigned int order = perf_evsel__intval(evsel, sample, "order");
836	u64 bytes = kmem_page_size << order;
837	struct page_stat *pstat;
838	struct page_stat this = {
839		.order = order,
840	};
841
842	if (use_pfn)
843		page = perf_evsel__intval(evsel, sample, "pfn");
844	else
845		page = perf_evsel__intval(evsel, sample, "page");
846
847	nr_page_frees++;
848	total_page_free_bytes += bytes;
849
850	this.page = page;
851	pstat = page_stat__find_page(&this);
852	if (pstat == NULL) {
853		pr_debug2("missing free at page %"PRIx64" (order: %d)\n",
854			  page, order);
855
856		nr_page_nomatch++;
857		total_page_nomatch_bytes += bytes;
858
859		return 0;
860	}
861
862	this.gfp_flags = pstat->gfp_flags;
863	this.migrate_type = pstat->migrate_type;
864	this.callsite = pstat->callsite;
865
866	rb_erase(&pstat->node, &page_live_tree);
867	free(pstat);
868
869	if (live_page) {
870		order_stats[this.order][this.migrate_type]--;
871	} else {
872		pstat = page_stat__find_alloc(&this);
873		if (pstat == NULL)
874			return -ENOMEM;
875
876		pstat->nr_free++;
877		pstat->free_bytes += bytes;
878	}
879
880	pstat = page_stat__find_caller(&this);
881	if (pstat == NULL)
882		return -ENOENT;
883
884	pstat->nr_free++;
885	pstat->free_bytes += bytes;
886
887	if (live_page) {
888		pstat->nr_alloc--;
889		pstat->alloc_bytes -= bytes;
890
891		if (pstat->nr_alloc == 0) {
892			rb_erase(&pstat->node, &page_caller_tree);
893			free(pstat);
894		}
895	}
896
897	return 0;
898}
899
900typedef int (*tracepoint_handler)(struct perf_evsel *evsel,
901				  struct perf_sample *sample);
902
903static int process_sample_event(struct perf_tool *tool __maybe_unused,
904				union perf_event *event,
905				struct perf_sample *sample,
906				struct perf_evsel *evsel,
907				struct machine *machine)
908{
909	int err = 0;
910	struct thread *thread = machine__findnew_thread(machine, sample->pid,
911							sample->tid);
912
913	if (thread == NULL) {
914		pr_debug("problem processing %d event, skipping it.\n",
915			 event->header.type);
916		return -1;
917	}
918
919	dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
920
921	if (evsel->handler != NULL) {
922		tracepoint_handler f = evsel->handler;
923		err = f(evsel, sample);
924	}
925
926	thread__put(thread);
927
928	return err;
929}
930
931static struct perf_tool perf_kmem = {
932	.sample		 = process_sample_event,
933	.comm		 = perf_event__process_comm,
934	.mmap		 = perf_event__process_mmap,
935	.mmap2		 = perf_event__process_mmap2,
936	.ordered_events	 = true,
937};
938
939static double fragmentation(unsigned long n_req, unsigned long n_alloc)
940{
941	if (n_alloc == 0)
942		return 0.0;
943	else
944		return 100.0 - (100.0 * n_req / n_alloc);
945}
946
947static void __print_slab_result(struct rb_root *root,
948				struct perf_session *session,
949				int n_lines, int is_caller)
950{
951	struct rb_node *next;
952	struct machine *machine = &session->machines.host;
953
954	printf("%.105s\n", graph_dotted_line);
955	printf(" %-34s |",  is_caller ? "Callsite": "Alloc Ptr");
956	printf(" Total_alloc/Per | Total_req/Per   | Hit      | Ping-pong | Frag\n");
957	printf("%.105s\n", graph_dotted_line);
958
959	next = rb_first(root);
960
961	while (next && n_lines--) {
962		struct alloc_stat *data = rb_entry(next, struct alloc_stat,
963						   node);
964		struct symbol *sym = NULL;
965		struct map *map;
966		char buf[BUFSIZ];
967		u64 addr;
968
969		if (is_caller) {
970			addr = data->call_site;
971			if (!raw_ip)
972				sym = machine__find_kernel_function(machine, addr, &map, NULL);
973		} else
974			addr = data->ptr;
975
976		if (sym != NULL)
977			snprintf(buf, sizeof(buf), "%s+%" PRIx64 "", sym->name,
978				 addr - map->unmap_ip(map, sym->start));
979		else
980			snprintf(buf, sizeof(buf), "%#" PRIx64 "", addr);
981		printf(" %-34s |", buf);
982
983		printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %9lu | %6.3f%%\n",
984		       (unsigned long long)data->bytes_alloc,
985		       (unsigned long)data->bytes_alloc / data->hit,
986		       (unsigned long long)data->bytes_req,
987		       (unsigned long)data->bytes_req / data->hit,
988		       (unsigned long)data->hit,
989		       (unsigned long)data->pingpong,
990		       fragmentation(data->bytes_req, data->bytes_alloc));
991
992		next = rb_next(next);
993	}
994
995	if (n_lines == -1)
996		printf(" ...                                | ...             | ...             | ...      | ...       | ...   \n");
997
998	printf("%.105s\n", graph_dotted_line);
999}
1000
1001static const char * const migrate_type_str[] = {
1002	"UNMOVABL",
1003	"RECLAIM",
1004	"MOVABLE",
1005	"RESERVED",
1006	"CMA/ISLT",
1007	"UNKNOWN",
1008};
1009
1010static void __print_page_alloc_result(struct perf_session *session, int n_lines)
1011{
1012	struct rb_node *next = rb_first(&page_alloc_sorted);
1013	struct machine *machine = &session->machines.host;
1014	const char *format;
1015	int gfp_len = max(strlen("GFP flags"), max_gfp_len);
1016
1017	printf("\n%.105s\n", graph_dotted_line);
1018	printf(" %-16s | %5s alloc (KB) | Hits      | Order | Mig.type | %-*s | Callsite\n",
1019	       use_pfn ? "PFN" : "Page", live_page ? "Live" : "Total",
1020	       gfp_len, "GFP flags");
1021	printf("%.105s\n", graph_dotted_line);
1022
1023	if (use_pfn)
1024		format = " %16llu | %'16llu | %'9d | %5d | %8s | %-*s | %s\n";
1025	else
1026		format = " %016llx | %'16llu | %'9d | %5d | %8s | %-*s | %s\n";
1027
1028	while (next && n_lines--) {
1029		struct page_stat *data;
1030		struct symbol *sym;
1031		struct map *map;
1032		char buf[32];
1033		char *caller = buf;
1034
1035		data = rb_entry(next, struct page_stat, node);
1036		sym = machine__find_kernel_function(machine, data->callsite,
1037						    &map, NULL);
1038		if (sym && sym->name)
1039			caller = sym->name;
1040		else
1041			scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
1042
1043		printf(format, (unsigned long long)data->page,
1044		       (unsigned long long)data->alloc_bytes / 1024,
1045		       data->nr_alloc, data->order,
1046		       migrate_type_str[data->migrate_type],
1047		       gfp_len, compact_gfp_string(data->gfp_flags), caller);
1048
1049		next = rb_next(next);
1050	}
1051
1052	if (n_lines == -1) {
1053		printf(" ...              | ...              | ...       | ...   | ...      | %-*s | ...\n",
1054		       gfp_len, "...");
1055	}
1056
1057	printf("%.105s\n", graph_dotted_line);
1058}
1059
1060static void __print_page_caller_result(struct perf_session *session, int n_lines)
1061{
1062	struct rb_node *next = rb_first(&page_caller_sorted);
1063	struct machine *machine = &session->machines.host;
1064	int gfp_len = max(strlen("GFP flags"), max_gfp_len);
1065
1066	printf("\n%.105s\n", graph_dotted_line);
1067	printf(" %5s alloc (KB) | Hits      | Order | Mig.type | %-*s | Callsite\n",
1068	       live_page ? "Live" : "Total", gfp_len, "GFP flags");
1069	printf("%.105s\n", graph_dotted_line);
1070
1071	while (next && n_lines--) {
1072		struct page_stat *data;
1073		struct symbol *sym;
1074		struct map *map;
1075		char buf[32];
1076		char *caller = buf;
1077
1078		data = rb_entry(next, struct page_stat, node);
1079		sym = machine__find_kernel_function(machine, data->callsite,
1080						    &map, NULL);
1081		if (sym && sym->name)
1082			caller = sym->name;
1083		else
1084			scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
1085
1086		printf(" %'16llu | %'9d | %5d | %8s | %-*s | %s\n",
1087		       (unsigned long long)data->alloc_bytes / 1024,
1088		       data->nr_alloc, data->order,
1089		       migrate_type_str[data->migrate_type],
1090		       gfp_len, compact_gfp_string(data->gfp_flags), caller);
1091
1092		next = rb_next(next);
1093	}
1094
1095	if (n_lines == -1) {
1096		printf(" ...              | ...       | ...   | ...      | %-*s | ...\n",
1097		       gfp_len, "...");
1098	}
1099
1100	printf("%.105s\n", graph_dotted_line);
1101}
1102
1103static void print_gfp_flags(void)
1104{
1105	int i;
1106
1107	printf("#\n");
1108	printf("# GFP flags\n");
1109	printf("# ---------\n");
1110	for (i = 0; i < nr_gfps; i++) {
1111		printf("# %08x: %*s: %s\n", gfps[i].flags,
1112		       (int) max_gfp_len, gfps[i].compact_str,
1113		       gfps[i].human_readable);
1114	}
1115}
1116
1117static void print_slab_summary(void)
1118{
1119	printf("\nSUMMARY (SLAB allocator)");
1120	printf("\n========================\n");
1121	printf("Total bytes requested: %'lu\n", total_requested);
1122	printf("Total bytes allocated: %'lu\n", total_allocated);
1123	printf("Total bytes wasted on internal fragmentation: %'lu\n",
1124	       total_allocated - total_requested);
1125	printf("Internal fragmentation: %f%%\n",
1126	       fragmentation(total_requested, total_allocated));
1127	printf("Cross CPU allocations: %'lu/%'lu\n", nr_cross_allocs, nr_allocs);
1128}
1129
1130static void print_page_summary(void)
1131{
1132	int o, m;
1133	u64 nr_alloc_freed = nr_page_frees - nr_page_nomatch;
1134	u64 total_alloc_freed_bytes = total_page_free_bytes - total_page_nomatch_bytes;
1135
1136	printf("\nSUMMARY (page allocator)");
1137	printf("\n========================\n");
1138	printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total allocation requests",
1139	       nr_page_allocs, total_page_alloc_bytes / 1024);
1140	printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total free requests",
1141	       nr_page_frees, total_page_free_bytes / 1024);
1142	printf("\n");
1143
1144	printf("%-30s: %'16"PRIu64"   [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests",
1145	       nr_alloc_freed, (total_alloc_freed_bytes) / 1024);
1146	printf("%-30s: %'16"PRIu64"   [ %'16"PRIu64" KB ]\n", "Total alloc-only requests",
1147	       nr_page_allocs - nr_alloc_freed,
1148	       (total_page_alloc_bytes - total_alloc_freed_bytes) / 1024);
1149	printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total free-only requests",
1150	       nr_page_nomatch, total_page_nomatch_bytes / 1024);
1151	printf("\n");
1152
1153	printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total allocation failures",
1154	       nr_page_fails, total_page_fail_bytes / 1024);
1155	printf("\n");
1156
1157	printf("%5s  %12s  %12s  %12s  %12s  %12s\n", "Order",  "Unmovable",
1158	       "Reclaimable", "Movable", "Reserved", "CMA/Isolated");
1159	printf("%.5s  %.12s  %.12s  %.12s  %.12s  %.12s\n", graph_dotted_line,
1160	       graph_dotted_line, graph_dotted_line, graph_dotted_line,
1161	       graph_dotted_line, graph_dotted_line);
1162
1163	for (o = 0; o < MAX_PAGE_ORDER; o++) {
1164		printf("%5d", o);
1165		for (m = 0; m < MAX_MIGRATE_TYPES - 1; m++) {
1166			if (order_stats[o][m])
1167				printf("  %'12d", order_stats[o][m]);
1168			else
1169				printf("  %12c", '.');
1170		}
1171		printf("\n");
1172	}
1173}
1174
1175static void print_slab_result(struct perf_session *session)
1176{
1177	if (caller_flag)
1178		__print_slab_result(&root_caller_sorted, session, caller_lines, 1);
1179	if (alloc_flag)
1180		__print_slab_result(&root_alloc_sorted, session, alloc_lines, 0);
1181	print_slab_summary();
1182}
1183
1184static void print_page_result(struct perf_session *session)
1185{
1186	if (caller_flag || alloc_flag)
1187		print_gfp_flags();
1188	if (caller_flag)
1189		__print_page_caller_result(session, caller_lines);
1190	if (alloc_flag)
1191		__print_page_alloc_result(session, alloc_lines);
1192	print_page_summary();
1193}
1194
1195static void print_result(struct perf_session *session)
1196{
1197	if (kmem_slab)
1198		print_slab_result(session);
1199	if (kmem_page)
1200		print_page_result(session);
1201}
1202
1203static LIST_HEAD(slab_caller_sort);
1204static LIST_HEAD(slab_alloc_sort);
1205static LIST_HEAD(page_caller_sort);
1206static LIST_HEAD(page_alloc_sort);
1207
1208static void sort_slab_insert(struct rb_root *root, struct alloc_stat *data,
1209			     struct list_head *sort_list)
1210{
1211	struct rb_node **new = &(root->rb_node);
1212	struct rb_node *parent = NULL;
1213	struct sort_dimension *sort;
1214
1215	while (*new) {
1216		struct alloc_stat *this;
1217		int cmp = 0;
1218
1219		this = rb_entry(*new, struct alloc_stat, node);
1220		parent = *new;
1221
1222		list_for_each_entry(sort, sort_list, list) {
1223			cmp = sort->cmp(data, this);
1224			if (cmp)
1225				break;
1226		}
1227
1228		if (cmp > 0)
1229			new = &((*new)->rb_left);
1230		else
1231			new = &((*new)->rb_right);
1232	}
1233
1234	rb_link_node(&data->node, parent, new);
1235	rb_insert_color(&data->node, root);
1236}
1237
1238static void __sort_slab_result(struct rb_root *root, struct rb_root *root_sorted,
1239			       struct list_head *sort_list)
1240{
1241	struct rb_node *node;
1242	struct alloc_stat *data;
1243
1244	for (;;) {
1245		node = rb_first(root);
1246		if (!node)
1247			break;
1248
1249		rb_erase(node, root);
1250		data = rb_entry(node, struct alloc_stat, node);
1251		sort_slab_insert(root_sorted, data, sort_list);
1252	}
1253}
1254
1255static void sort_page_insert(struct rb_root *root, struct page_stat *data,
1256			     struct list_head *sort_list)
1257{
1258	struct rb_node **new = &root->rb_node;
1259	struct rb_node *parent = NULL;
1260	struct sort_dimension *sort;
1261
1262	while (*new) {
1263		struct page_stat *this;
1264		int cmp = 0;
1265
1266		this = rb_entry(*new, struct page_stat, node);
1267		parent = *new;
1268
1269		list_for_each_entry(sort, sort_list, list) {
1270			cmp = sort->cmp(data, this);
1271			if (cmp)
1272				break;
1273		}
1274
1275		if (cmp > 0)
1276			new = &parent->rb_left;
1277		else
1278			new = &parent->rb_right;
1279	}
1280
1281	rb_link_node(&data->node, parent, new);
1282	rb_insert_color(&data->node, root);
1283}
1284
1285static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted,
1286			       struct list_head *sort_list)
1287{
1288	struct rb_node *node;
1289	struct page_stat *data;
1290
1291	for (;;) {
1292		node = rb_first(root);
1293		if (!node)
1294			break;
1295
1296		rb_erase(node, root);
1297		data = rb_entry(node, struct page_stat, node);
1298		sort_page_insert(root_sorted, data, sort_list);
1299	}
1300}
1301
1302static void sort_result(void)
1303{
1304	if (kmem_slab) {
1305		__sort_slab_result(&root_alloc_stat, &root_alloc_sorted,
1306				   &slab_alloc_sort);
1307		__sort_slab_result(&root_caller_stat, &root_caller_sorted,
1308				   &slab_caller_sort);
1309	}
1310	if (kmem_page) {
1311		if (live_page)
1312			__sort_page_result(&page_live_tree, &page_alloc_sorted,
1313					   &page_alloc_sort);
1314		else
1315			__sort_page_result(&page_alloc_tree, &page_alloc_sorted,
1316					   &page_alloc_sort);
1317
1318		__sort_page_result(&page_caller_tree, &page_caller_sorted,
1319				   &page_caller_sort);
1320	}
1321}
1322
1323static int __cmd_kmem(struct perf_session *session)
1324{
1325	int err = -EINVAL;
1326	struct perf_evsel *evsel;
1327	const struct perf_evsel_str_handler kmem_tracepoints[] = {
1328		/* slab allocator */
1329		{ "kmem:kmalloc",		perf_evsel__process_alloc_event, },
1330    		{ "kmem:kmem_cache_alloc",	perf_evsel__process_alloc_event, },
1331		{ "kmem:kmalloc_node",		perf_evsel__process_alloc_node_event, },
1332    		{ "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, },
1333		{ "kmem:kfree",			perf_evsel__process_free_event, },
1334    		{ "kmem:kmem_cache_free",	perf_evsel__process_free_event, },
1335		/* page allocator */
1336		{ "kmem:mm_page_alloc",		perf_evsel__process_page_alloc_event, },
1337		{ "kmem:mm_page_free",		perf_evsel__process_page_free_event, },
1338	};
1339
1340	if (!perf_session__has_traces(session, "kmem record"))
1341		goto out;
1342
1343	if (perf_session__set_tracepoints_handlers(session, kmem_tracepoints)) {
1344		pr_err("Initializing perf session tracepoint handlers failed\n");
1345		goto out;
1346	}
1347
1348	evlist__for_each(session->evlist, evsel) {
1349		if (!strcmp(perf_evsel__name(evsel), "kmem:mm_page_alloc") &&
1350		    perf_evsel__field(evsel, "pfn")) {
1351			use_pfn = true;
1352			break;
1353		}
1354	}
1355
1356	setup_pager();
1357	err = perf_session__process_events(session);
1358	if (err != 0) {
1359		pr_err("error during process events: %d\n", err);
1360		goto out;
1361	}
1362	sort_result();
1363	print_result(session);
1364out:
1365	return err;
1366}
1367
1368/* slab sort keys */
1369static int ptr_cmp(void *a, void *b)
1370{
1371	struct alloc_stat *l = a;
1372	struct alloc_stat *r = b;
1373
1374	if (l->ptr < r->ptr)
1375		return -1;
1376	else if (l->ptr > r->ptr)
1377		return 1;
1378	return 0;
1379}
1380
1381static struct sort_dimension ptr_sort_dimension = {
1382	.name	= "ptr",
1383	.cmp	= ptr_cmp,
1384};
1385
1386static int slab_callsite_cmp(void *a, void *b)
1387{
1388	struct alloc_stat *l = a;
1389	struct alloc_stat *r = b;
1390
1391	if (l->call_site < r->call_site)
1392		return -1;
1393	else if (l->call_site > r->call_site)
1394		return 1;
1395	return 0;
1396}
1397
1398static struct sort_dimension callsite_sort_dimension = {
1399	.name	= "callsite",
1400	.cmp	= slab_callsite_cmp,
1401};
1402
1403static int hit_cmp(void *a, void *b)
1404{
1405	struct alloc_stat *l = a;
1406	struct alloc_stat *r = b;
1407
1408	if (l->hit < r->hit)
1409		return -1;
1410	else if (l->hit > r->hit)
1411		return 1;
1412	return 0;
1413}
1414
1415static struct sort_dimension hit_sort_dimension = {
1416	.name	= "hit",
1417	.cmp	= hit_cmp,
1418};
1419
1420static int bytes_cmp(void *a, void *b)
1421{
1422	struct alloc_stat *l = a;
1423	struct alloc_stat *r = b;
1424
1425	if (l->bytes_alloc < r->bytes_alloc)
1426		return -1;
1427	else if (l->bytes_alloc > r->bytes_alloc)
1428		return 1;
1429	return 0;
1430}
1431
1432static struct sort_dimension bytes_sort_dimension = {
1433	.name	= "bytes",
1434	.cmp	= bytes_cmp,
1435};
1436
1437static int frag_cmp(void *a, void *b)
1438{
1439	double x, y;
1440	struct alloc_stat *l = a;
1441	struct alloc_stat *r = b;
1442
1443	x = fragmentation(l->bytes_req, l->bytes_alloc);
1444	y = fragmentation(r->bytes_req, r->bytes_alloc);
1445
1446	if (x < y)
1447		return -1;
1448	else if (x > y)
1449		return 1;
1450	return 0;
1451}
1452
1453static struct sort_dimension frag_sort_dimension = {
1454	.name	= "frag",
1455	.cmp	= frag_cmp,
1456};
1457
1458static int pingpong_cmp(void *a, void *b)
1459{
1460	struct alloc_stat *l = a;
1461	struct alloc_stat *r = b;
1462
1463	if (l->pingpong < r->pingpong)
1464		return -1;
1465	else if (l->pingpong > r->pingpong)
1466		return 1;
1467	return 0;
1468}
1469
1470static struct sort_dimension pingpong_sort_dimension = {
1471	.name	= "pingpong",
1472	.cmp	= pingpong_cmp,
1473};
1474
1475/* page sort keys */
1476static int page_cmp(void *a, void *b)
1477{
1478	struct page_stat *l = a;
1479	struct page_stat *r = b;
1480
1481	if (l->page < r->page)
1482		return -1;
1483	else if (l->page > r->page)
1484		return 1;
1485	return 0;
1486}
1487
1488static struct sort_dimension page_sort_dimension = {
1489	.name	= "page",
1490	.cmp	= page_cmp,
1491};
1492
1493static int page_callsite_cmp(void *a, void *b)
1494{
1495	struct page_stat *l = a;
1496	struct page_stat *r = b;
1497
1498	if (l->callsite < r->callsite)
1499		return -1;
1500	else if (l->callsite > r->callsite)
1501		return 1;
1502	return 0;
1503}
1504
1505static struct sort_dimension page_callsite_sort_dimension = {
1506	.name	= "callsite",
1507	.cmp	= page_callsite_cmp,
1508};
1509
1510static int page_hit_cmp(void *a, void *b)
1511{
1512	struct page_stat *l = a;
1513	struct page_stat *r = b;
1514
1515	if (l->nr_alloc < r->nr_alloc)
1516		return -1;
1517	else if (l->nr_alloc > r->nr_alloc)
1518		return 1;
1519	return 0;
1520}
1521
1522static struct sort_dimension page_hit_sort_dimension = {
1523	.name	= "hit",
1524	.cmp	= page_hit_cmp,
1525};
1526
1527static int page_bytes_cmp(void *a, void *b)
1528{
1529	struct page_stat *l = a;
1530	struct page_stat *r = b;
1531
1532	if (l->alloc_bytes < r->alloc_bytes)
1533		return -1;
1534	else if (l->alloc_bytes > r->alloc_bytes)
1535		return 1;
1536	return 0;
1537}
1538
1539static struct sort_dimension page_bytes_sort_dimension = {
1540	.name	= "bytes",
1541	.cmp	= page_bytes_cmp,
1542};
1543
1544static int page_order_cmp(void *a, void *b)
1545{
1546	struct page_stat *l = a;
1547	struct page_stat *r = b;
1548
1549	if (l->order < r->order)
1550		return -1;
1551	else if (l->order > r->order)
1552		return 1;
1553	return 0;
1554}
1555
1556static struct sort_dimension page_order_sort_dimension = {
1557	.name	= "order",
1558	.cmp	= page_order_cmp,
1559};
1560
1561static int migrate_type_cmp(void *a, void *b)
1562{
1563	struct page_stat *l = a;
1564	struct page_stat *r = b;
1565
1566	/* for internal use to find free'd page */
1567	if (l->migrate_type == -1U)
1568		return 0;
1569
1570	if (l->migrate_type < r->migrate_type)
1571		return -1;
1572	else if (l->migrate_type > r->migrate_type)
1573		return 1;
1574	return 0;
1575}
1576
1577static struct sort_dimension migrate_type_sort_dimension = {
1578	.name	= "migtype",
1579	.cmp	= migrate_type_cmp,
1580};
1581
1582static int gfp_flags_cmp(void *a, void *b)
1583{
1584	struct page_stat *l = a;
1585	struct page_stat *r = b;
1586
1587	/* for internal use to find free'd page */
1588	if (l->gfp_flags == -1U)
1589		return 0;
1590
1591	if (l->gfp_flags < r->gfp_flags)
1592		return -1;
1593	else if (l->gfp_flags > r->gfp_flags)
1594		return 1;
1595	return 0;
1596}
1597
1598static struct sort_dimension gfp_flags_sort_dimension = {
1599	.name	= "gfp",
1600	.cmp	= gfp_flags_cmp,
1601};
1602
1603static struct sort_dimension *slab_sorts[] = {
1604	&ptr_sort_dimension,
1605	&callsite_sort_dimension,
1606	&hit_sort_dimension,
1607	&bytes_sort_dimension,
1608	&frag_sort_dimension,
1609	&pingpong_sort_dimension,
1610};
1611
1612static struct sort_dimension *page_sorts[] = {
1613	&page_sort_dimension,
1614	&page_callsite_sort_dimension,
1615	&page_hit_sort_dimension,
1616	&page_bytes_sort_dimension,
1617	&page_order_sort_dimension,
1618	&migrate_type_sort_dimension,
1619	&gfp_flags_sort_dimension,
1620};
1621
1622static int slab_sort_dimension__add(const char *tok, struct list_head *list)
1623{
1624	struct sort_dimension *sort;
1625	int i;
1626
1627	for (i = 0; i < (int)ARRAY_SIZE(slab_sorts); i++) {
1628		if (!strcmp(slab_sorts[i]->name, tok)) {
1629			sort = memdup(slab_sorts[i], sizeof(*slab_sorts[i]));
1630			if (!sort) {
1631				pr_err("%s: memdup failed\n", __func__);
1632				return -1;
1633			}
1634			list_add_tail(&sort->list, list);
1635			return 0;
1636		}
1637	}
1638
1639	return -1;
1640}
1641
1642static int page_sort_dimension__add(const char *tok, struct list_head *list)
1643{
1644	struct sort_dimension *sort;
1645	int i;
1646
1647	for (i = 0; i < (int)ARRAY_SIZE(page_sorts); i++) {
1648		if (!strcmp(page_sorts[i]->name, tok)) {
1649			sort = memdup(page_sorts[i], sizeof(*page_sorts[i]));
1650			if (!sort) {
1651				pr_err("%s: memdup failed\n", __func__);
1652				return -1;
1653			}
1654			list_add_tail(&sort->list, list);
1655			return 0;
1656		}
1657	}
1658
1659	return -1;
1660}
1661
1662static int setup_slab_sorting(struct list_head *sort_list, const char *arg)
1663{
1664	char *tok;
1665	char *str = strdup(arg);
1666	char *pos = str;
1667
1668	if (!str) {
1669		pr_err("%s: strdup failed\n", __func__);
1670		return -1;
1671	}
1672
1673	while (true) {
1674		tok = strsep(&pos, ",");
1675		if (!tok)
1676			break;
1677		if (slab_sort_dimension__add(tok, sort_list) < 0) {
1678			error("Unknown slab --sort key: '%s'", tok);
1679			free(str);
1680			return -1;
1681		}
1682	}
1683
1684	free(str);
1685	return 0;
1686}
1687
1688static int setup_page_sorting(struct list_head *sort_list, const char *arg)
1689{
1690	char *tok;
1691	char *str = strdup(arg);
1692	char *pos = str;
1693
1694	if (!str) {
1695		pr_err("%s: strdup failed\n", __func__);
1696		return -1;
1697	}
1698
1699	while (true) {
1700		tok = strsep(&pos, ",");
1701		if (!tok)
1702			break;
1703		if (page_sort_dimension__add(tok, sort_list) < 0) {
1704			error("Unknown page --sort key: '%s'", tok);
1705			free(str);
1706			return -1;
1707		}
1708	}
1709
1710	free(str);
1711	return 0;
1712}
1713
1714static int parse_sort_opt(const struct option *opt __maybe_unused,
1715			  const char *arg, int unset __maybe_unused)
1716{
1717	if (!arg)
1718		return -1;
1719
1720	if (kmem_page > kmem_slab ||
1721	    (kmem_page == 0 && kmem_slab == 0 && kmem_default == KMEM_PAGE)) {
1722		if (caller_flag > alloc_flag)
1723			return setup_page_sorting(&page_caller_sort, arg);
1724		else
1725			return setup_page_sorting(&page_alloc_sort, arg);
1726	} else {
1727		if (caller_flag > alloc_flag)
1728			return setup_slab_sorting(&slab_caller_sort, arg);
1729		else
1730			return setup_slab_sorting(&slab_alloc_sort, arg);
1731	}
1732
1733	return 0;
1734}
1735
1736static int parse_caller_opt(const struct option *opt __maybe_unused,
1737			    const char *arg __maybe_unused,
1738			    int unset __maybe_unused)
1739{
1740	caller_flag = (alloc_flag + 1);
1741	return 0;
1742}
1743
1744static int parse_alloc_opt(const struct option *opt __maybe_unused,
1745			   const char *arg __maybe_unused,
1746			   int unset __maybe_unused)
1747{
1748	alloc_flag = (caller_flag + 1);
1749	return 0;
1750}
1751
1752static int parse_slab_opt(const struct option *opt __maybe_unused,
1753			  const char *arg __maybe_unused,
1754			  int unset __maybe_unused)
1755{
1756	kmem_slab = (kmem_page + 1);
1757	return 0;
1758}
1759
1760static int parse_page_opt(const struct option *opt __maybe_unused,
1761			  const char *arg __maybe_unused,
1762			  int unset __maybe_unused)
1763{
1764	kmem_page = (kmem_slab + 1);
1765	return 0;
1766}
1767
1768static int parse_line_opt(const struct option *opt __maybe_unused,
1769			  const char *arg, int unset __maybe_unused)
1770{
1771	int lines;
1772
1773	if (!arg)
1774		return -1;
1775
1776	lines = strtoul(arg, NULL, 10);
1777
1778	if (caller_flag > alloc_flag)
1779		caller_lines = lines;
1780	else
1781		alloc_lines = lines;
1782
1783	return 0;
1784}
1785
1786static int __cmd_record(int argc, const char **argv)
1787{
1788	const char * const record_args[] = {
1789	"record", "-a", "-R", "-c", "1",
1790	};
1791	const char * const slab_events[] = {
1792	"-e", "kmem:kmalloc",
1793	"-e", "kmem:kmalloc_node",
1794	"-e", "kmem:kfree",
1795	"-e", "kmem:kmem_cache_alloc",
1796	"-e", "kmem:kmem_cache_alloc_node",
1797	"-e", "kmem:kmem_cache_free",
1798	};
1799	const char * const page_events[] = {
1800	"-e", "kmem:mm_page_alloc",
1801	"-e", "kmem:mm_page_free",
1802	};
1803	unsigned int rec_argc, i, j;
1804	const char **rec_argv;
1805
1806	rec_argc = ARRAY_SIZE(record_args) + argc - 1;
1807	if (kmem_slab)
1808		rec_argc += ARRAY_SIZE(slab_events);
1809	if (kmem_page)
1810		rec_argc += ARRAY_SIZE(page_events) + 1; /* for -g */
1811
1812	rec_argv = calloc(rec_argc + 1, sizeof(char *));
1813
1814	if (rec_argv == NULL)
1815		return -ENOMEM;
1816
1817	for (i = 0; i < ARRAY_SIZE(record_args); i++)
1818		rec_argv[i] = strdup(record_args[i]);
1819
1820	if (kmem_slab) {
1821		for (j = 0; j < ARRAY_SIZE(slab_events); j++, i++)
1822			rec_argv[i] = strdup(slab_events[j]);
1823	}
1824	if (kmem_page) {
1825		rec_argv[i++] = strdup("-g");
1826
1827		for (j = 0; j < ARRAY_SIZE(page_events); j++, i++)
1828			rec_argv[i] = strdup(page_events[j]);
1829	}
1830
1831	for (j = 1; j < (unsigned int)argc; j++, i++)
1832		rec_argv[i] = argv[j];
1833
1834	return cmd_record(i, rec_argv, NULL);
1835}
1836
1837static int kmem_config(const char *var, const char *value, void *cb)
1838{
1839	if (!strcmp(var, "kmem.default")) {
1840		if (!strcmp(value, "slab"))
1841			kmem_default = KMEM_SLAB;
1842		else if (!strcmp(value, "page"))
1843			kmem_default = KMEM_PAGE;
1844		else
1845			pr_err("invalid default value ('slab' or 'page' required): %s\n",
1846			       value);
1847		return 0;
1848	}
1849
1850	return perf_default_config(var, value, cb);
1851}
1852
1853int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
1854{
1855	const char * const default_slab_sort = "frag,hit,bytes";
1856	const char * const default_page_sort = "bytes,hit";
1857	struct perf_data_file file = {
1858		.mode = PERF_DATA_MODE_READ,
1859	};
1860	const struct option kmem_options[] = {
1861	OPT_STRING('i', "input", &input_name, "file", "input file name"),
1862	OPT_INCR('v', "verbose", &verbose,
1863		    "be more verbose (show symbol address, etc)"),
1864	OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL,
1865			   "show per-callsite statistics", parse_caller_opt),
1866	OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL,
1867			   "show per-allocation statistics", parse_alloc_opt),
1868	OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
1869		     "sort by keys: ptr, callsite, bytes, hit, pingpong, frag, "
1870		     "page, order, migtype, gfp", parse_sort_opt),
1871	OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt),
1872	OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
1873	OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
1874	OPT_CALLBACK_NOOPT(0, "slab", NULL, NULL, "Analyze slab allocator",
1875			   parse_slab_opt),
1876	OPT_CALLBACK_NOOPT(0, "page", NULL, NULL, "Analyze page allocator",
1877			   parse_page_opt),
1878	OPT_BOOLEAN(0, "live", &live_page, "Show live page stat"),
1879	OPT_END()
1880	};
1881	const char *const kmem_subcommands[] = { "record", "stat", NULL };
1882	const char *kmem_usage[] = {
1883		NULL,
1884		NULL
1885	};
1886	struct perf_session *session;
1887	int ret = -1;
1888	const char errmsg[] = "No %s allocation events found.  Have you run 'perf kmem record --%s'?\n";
1889
1890	perf_config(kmem_config, NULL);
1891	argc = parse_options_subcommand(argc, argv, kmem_options,
1892					kmem_subcommands, kmem_usage, 0);
1893
1894	if (!argc)
1895		usage_with_options(kmem_usage, kmem_options);
1896
1897	if (kmem_slab == 0 && kmem_page == 0) {
1898		if (kmem_default == KMEM_SLAB)
1899			kmem_slab = 1;
1900		else
1901			kmem_page = 1;
1902	}
1903
1904	if (!strncmp(argv[0], "rec", 3)) {
1905		symbol__init(NULL);
1906		return __cmd_record(argc, argv);
1907	}
1908
1909	file.path = input_name;
1910
1911	kmem_session = session = perf_session__new(&file, false, &perf_kmem);
1912	if (session == NULL)
1913		return -1;
1914
1915	if (kmem_slab) {
1916		if (!perf_evlist__find_tracepoint_by_name(session->evlist,
1917							  "kmem:kmalloc")) {
1918			pr_err(errmsg, "slab", "slab");
1919			goto out_delete;
1920		}
1921	}
1922
1923	if (kmem_page) {
1924		struct perf_evsel *evsel;
1925
1926		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
1927							     "kmem:mm_page_alloc");
1928		if (evsel == NULL) {
1929			pr_err(errmsg, "page", "page");
1930			goto out_delete;
1931		}
1932
1933		kmem_page_size = pevent_get_page_size(evsel->tp_format->pevent);
1934		symbol_conf.use_callchain = true;
1935	}
1936
1937	symbol__init(&session->header.env);
1938
1939	if (!strcmp(argv[0], "stat")) {
1940		setlocale(LC_ALL, "");
1941
1942		if (cpu__setup_cpunode_map())
1943			goto out_delete;
1944
1945		if (list_empty(&slab_caller_sort))
1946			setup_slab_sorting(&slab_caller_sort, default_slab_sort);
1947		if (list_empty(&slab_alloc_sort))
1948			setup_slab_sorting(&slab_alloc_sort, default_slab_sort);
1949		if (list_empty(&page_caller_sort))
1950			setup_page_sorting(&page_caller_sort, default_page_sort);
1951		if (list_empty(&page_alloc_sort))
1952			setup_page_sorting(&page_alloc_sort, default_page_sort);
1953
1954		if (kmem_page) {
1955			setup_page_sorting(&page_alloc_sort_input,
1956					   "page,order,migtype,gfp");
1957			setup_page_sorting(&page_caller_sort_input,
1958					   "callsite,order,migtype,gfp");
1959		}
1960		ret = __cmd_kmem(session);
1961	} else
1962		usage_with_options(kmem_usage, kmem_options);
1963
1964out_delete:
1965	perf_session__delete(session);
1966
1967	return ret;
1968}
1969
1970